diff --git a/offloading-cacher/cache.hpp b/offloading-cacher/cache.hpp index c461f2f..a58bc0e 100644 --- a/offloading-cacher/cache.hpp +++ b/offloading-cacher/cache.hpp @@ -39,6 +39,8 @@ namespace dml { } namespace dsacache { + class Cache; + /* * Class Description: * Holds all required information on one cache entry and is used @@ -72,6 +74,11 @@ namespace dsacache { using dml_handler = dml::handler>; private: + static constexpr uint64_t maxptr = 0xffff'ffff'ffff'ffff; + + // set to false if we do not own the cache pointer + bool delete_ = false; + // data source and size of the block uint8_t* src_; size_t size_; @@ -84,7 +91,7 @@ namespace dsacache { // object-local incomplete cache location pointer // contract: only access when being in sole posession of handlers - uint8_t* incomplete_cache_; + uint8_t** incomplete_cache_; // dml handler vector pointer which is used // to wait on caching task completion @@ -94,6 +101,17 @@ namespace dsacache { // and invalidates it void Deallocate(); + size_t GetSize() const { return size_; } + uint8_t* GetSource() const { return src_; } + int32_t GetRefCount() const { return active_->load(); } + void SetTaskHandlersAndCache(uint8_t* cache, std::vector* handlers); + + // initializes the class after which it is thread safe + // but may only be destroyed safely after setting handlers + void Init(); + + friend Cache; + public: CacheData(uint8_t* data, const size_t size); CacheData(const CacheData& other); @@ -108,13 +126,7 @@ namespace dsacache { // instance which is valid as long as the // instance is alive - !!! this may also // yield a nullptr !!! - - void SetTaskHanldersAndCache(uint8_t* cache, std::vector* handlers); - uint8_t* GetDataLocation() const { return cache_->load(); } - size_t GetSize() const { return size_; } - uint8_t* GetSource() const { return src_; } - int32_t GetRefCount() const { return active_->load(); } }; /* @@ -341,6 +353,12 @@ inline std::unique_ptr dsacache::Cache::Access(uint8_t* dat std::cout << "[!] Found another cache instance for 0x" << std::hex << (uint64_t)task->GetSource() << std::dec << std::endl; return std::move(std::make_unique(state.first->second)); } + + // initialize the task now for thread safety + // as we are now sure that we will submit work + // to it and will not delete it beforehand + + task->Init(); } SubmitTask(task.get(), dst_node, src_node); @@ -426,7 +444,7 @@ inline void dsacache::Cache::SubmitTask(CacheData* task, const int dst_node, con handlers->emplace_back(ExecuteCopy(local_src, local_dst, local_size, executing_nodes[i])); } - task->SetTaskHanldersAndCache(dst, handlers); + task->SetTaskHandlersAndCache(dst, handlers); // restore the previous nodemask @@ -581,10 +599,11 @@ inline dsacache::Cache::~Cache() { inline dsacache::CacheData::CacheData(uint8_t* data, const size_t size) { src_ = data; size_ = size; + delete_ = false; active_ = new std::atomic(1); - cache_ = new std::atomic(); + cache_ = new std::atomic(data); handlers_ = new std::atomic*>(); - incomplete_cache_ = nullptr; + incomplete_cache_ = new uint8_t*(nullptr); } inline dsacache::CacheData::CacheData(const dsacache::CacheData& other) { @@ -612,7 +631,7 @@ inline dsacache::CacheData::~CacheData() { // then we must execute proper deletion // as this was the last reference - if (v <= 0) { + if (v == 0) { // on deletion we must ensure that all offloaded // operations have completed successfully @@ -625,6 +644,7 @@ inline dsacache::CacheData::~CacheData() { delete active_; delete cache_; delete handlers_; + delete incomplete_cache_; } } @@ -636,8 +656,8 @@ inline void dsacache::CacheData::Deallocate() { // takes place for the retrieved local cache uint8_t* cache_local = cache_->exchange(nullptr); - if (cache_local != nullptr) numa_free(cache_local, size_); - else if (incomplete_cache_ != nullptr) numa_free(incomplete_cache_, size_); + if (cache_local != nullptr && delete_) numa_free(cache_local, size_); + else if (*incomplete_cache_ != nullptr) numa_free(*incomplete_cache_, size_); else; } @@ -655,15 +675,17 @@ inline void dsacache::CacheData::WaitOnCompletion() { // exchange the global handlers pointer with nullptr to have a local // copy - this signals that this thread is the sole owner and therefore - // responsible for waiting for them + // responsible for waiting for them. we can not set to nullptr here but + // set to maximum of 64-bit in order to prevent deadlocks from the above + // waiting construct - std::vector* local_handlers = handlers_->exchange(nullptr); + std::vector* local_handlers = handlers_->exchange(reinterpret_cast*>(maxptr)); // ensure that no other thread snatched the handlers before us // and in case one did, wait again and then return - if (local_handlers == nullptr) { - WaitOnCompletion(); + if (local_handlers == nullptr || local_handlers == reinterpret_cast*>(maxptr)) { + cache_->wait(nullptr); return; } @@ -693,10 +715,12 @@ inline void dsacache::CacheData::WaitOnCompletion() { if (error) { cache_->store(src_); - numa_free(incomplete_cache_, size_); + numa_free(*incomplete_cache_, size_); + delete_ = false; + *incomplete_cache_ = nullptr; } else { - cache_->store(incomplete_cache_); + cache_->store(*incomplete_cache_); } // notify all waiting threads so they wake up quickly @@ -705,8 +729,13 @@ inline void dsacache::CacheData::WaitOnCompletion() { handlers_->notify_all(); } -void dsacache::CacheData::SetTaskHanldersAndCache(uint8_t* cache, std::vector* handlers) { - incomplete_cache_ = cache; +void dsacache::CacheData::SetTaskHandlersAndCache(uint8_t* cache, std::vector* handlers) { + *incomplete_cache_ = cache; handlers_->store(handlers); handlers_->notify_one(); } + +void dsacache::CacheData::Init() { + cache_->store(nullptr); + delete_ = true; +}