diff --git a/offloading-cacher/cache.hpp b/offloading-cacher/cache.hpp
index 842da8d..6856332 100644
--- a/offloading-cacher/cache.hpp
+++ b/offloading-cacher/cache.hpp
@@ -89,8 +89,6 @@ namespace dsacache {
         using dml_handler = dml::handler<dml::mem_copy_operation, std::allocator<uint8_t>>;
 
     private:
-        static constexpr uint64_t maxptr = 0xffff'ffff'ffff'ffff;
-
         // set to false if we do not own the cache pointer
         bool delete_ = false;
 
@@ -115,6 +113,10 @@ namespace dsacache {
         // to wait on caching task completion
         std::atomic<std::vector<dml_handler>*>* handlers_;
 
+        // invalid handlers pointer as we need a secondary
+        // invalid state due to issues with waiting
+        std::vector<dml_handler>* invalid_handlers_;
+
         // deallocates the global cache-location
         // and invalidates it
         void Deallocate();
@@ -123,7 +125,7 @@ namespace dsacache {
         uint8_t* GetSource() const { return src_; }
         int32_t GetRefCount() const { return active_->load(); }
         void SetCacheToSource() { cache_->store(src_); delete_ = false; }
-        void SetTaskHandlersAndCache(uint8_t* cache, std::vector<dml_handler>* handlers);
+        void SetTaskHandlersAndCache(uint8_t* cache, std::vector<dml_handler>* handlers, std::vector<dml_handler>* invalid_handlers);
 
         // initializes the class after which it is thread safe
         // but may only be destroyed safely after setting handlers
@@ -241,6 +243,11 @@ namespace dsacache {
 
         uint64_t flags_ = 0;
 
+        // secondary invalid handlers vector
+        // needed due to wake-up issues in CacheData::WaitOnCompletion
+
+        std::vector<CacheData::dml_handler> invalid_handlers_;
+
         // map from [dst-numa-node,map2]
         // map2 from [data-ptr,cache-structure]
 
@@ -491,7 +498,7 @@ inline void dsacache::Cache::SubmitTask(CacheData* task, const int dst_node, con
         handlers->emplace_back(ExecuteCopy(local_src, local_dst, local_size, executing_nodes[i]));
     }
 
-    task->SetTaskHandlersAndCache(dst, handlers);
+    task->SetTaskHandlersAndCache(dst, handlers, &invalid_handlers_);
 }
 
 inline dml::handler<dml::mem_copy_operation, std::allocator<uint8_t>> dsacache::Cache::ExecuteCopy(
@@ -735,12 +742,12 @@ inline void dsacache::CacheData::WaitOnCompletion() {
     // set to maximum of 64-bit in order to prevent deadlocks from the above
     // waiting construct
 
-    std::vector<dml_handler>* local_handlers = handlers_->exchange(reinterpret_cast<std::vector<dml_handler>*>(maxptr));
+    std::vector<dml_handler>* local_handlers = handlers_->exchange(invalid_handlers_);
 
     // ensure that no other thread snatched the handlers before us
     // and in case one did, wait again and then return
 
-    if (local_handlers == nullptr || local_handlers == reinterpret_cast<std::vector<dml_handler>*>(maxptr)) {
+    if (local_handlers == nullptr || local_handlers == invalid_handlers_) {
         cache_->wait(nullptr);
         return;
     }
@@ -788,10 +795,11 @@ inline void dsacache::CacheData::WaitOnCompletion() {
     handlers_->notify_all();
 }
 
-void dsacache::CacheData::SetTaskHandlersAndCache(uint8_t* cache, std::vector<dml_handler>* handlers) {
+void dsacache::CacheData::SetTaskHandlersAndCache(uint8_t* cache, std::vector<dml_handler>* handlers, std::vector<dml_handler>* invalid_handlers) {
     *incomplete_cache_ = cache;
     handlers_->store(handlers);
     handlers_->notify_one();
+    invalid_handlers_ = invalid_handlers;
 }
 
 void dsacache::CacheData::Init() {