diff --git a/offloading-cacher/cache.hpp b/offloading-cacher/cache.hpp index 6b0e712..d96f02b 100644 --- a/offloading-cacher/cache.hpp +++ b/offloading-cacher/cache.hpp @@ -59,13 +59,33 @@ namespace dml { namespace dsacache { class Cache; - // cache data holds all required information on - // one cache entry and will both be stored - // internally by the cache and handed out - // as copies to the user - // this class uses its object lifetime and - // a global reference counter to allow - // thread-safe copies and resource management + /* + * Class Description: + * Holds all required information on one cache entry and is used + * both internally by the Cache and externally by the user. + * + * Important Usage Notes: + * The pointer is only updated in WaitOnCompletion() which + * therefore must be called by the user at some point in order + * to use the cached data. Using this class as T for + * std::shared_ptr is not recommended as references are + * already counted internally. + * + * Cache Lifetime: + * As long as the instance is referenced, the pointer it stores + * is guaranteed to be either nullptr or pointing to a valid copy. + * + * Implementation Detail: + * Performs self-reference counting with a shared atomic integer. + * Therefore on creating a copy the reference count is increased + * and with the destructor it is deacresed. If the last copy is + * destroyed the actual underlying data is freed and all shared + * variables deleted. + * + * Notes on Thread Safety: + * Class is thread safe in any possible state and performs + * reference counting and deallocation itself entirely atomically. + */ class CacheData { public: @@ -101,6 +121,7 @@ namespace dsacache { bool Active() const; friend Cache; + public: CacheData(uint8_t* data, const size_t size); CacheData(const CacheData& other); @@ -118,11 +139,62 @@ namespace dsacache { uint8_t* GetDataLocation() const; }; - // cache class will handle access to data through the cache - // by managing the cache through work submission, it sticks - // to user-defined caching and copy policies, is thread - // safe after initialization and returns copies of - // cache data class to the user + /* + * Class Description: + * Class will handle access to data through internal copies. + * These are obtained via work submission to the Intel DSA which takes + * care of asynchronously duplicating the data. The user will define + * where these copies lie and which system nodes will perform the copy. + * This is done through policy functions set during initialization. + * + * Placement Policy: + * The Placement Policy Function decides on which node a particular + * entry is to be placed, given the current executing node and the + * data source node and data size. This in turn means that for one + * datum, multiple cached copies may exist at one time. + * + * Cache Lifetime: + * When accessing the cache, a CacheData-object will be returned. + * As long as this object lives, the pointer which it holds is + * guaranteed to be either nullptr or a valid copy. When destroyed + * the entry is marked for deletion which is only carried out + * when system memory pressure drives an automated cache flush. + * + * Restrictions: + * - Overlapping Pointers may lead to undefined behaviour during + * manual cache invalidation which should not be used if you + * intend to have these types of pointers + * - Cache Invalidation may only be performed manually and gives + * no ordering guarantees. Therefore, it is the users responsibility + * to ensure that results after invalidation have been generated + * using the latest state of data. The cache is best suited + * to static data. + * + * Notes on Thread Safety: + * - Cache is completely thread-safe after initialization + * - CacheData-class will handle deallocation of data itself by + * performing self-reference-counting atomically and only + * deallocating if the last reference is destroyed + * - The internal cache state has one lock which is either + * acquired shared for reading the state (upon accessing an already + * cached element) or unique (accessing a new element, flushing, invalidating) + * - Waiting on copy completion is done over an atomic-wait in copies + * of the original CacheData-instance + * - Overall this class may experience performance issues due to the use + * of locking (in any configuration), lock contention (worsens with higher + * core count, node count and utilization) and atomics (worse in the same + * situations as lock contention) + * + * Improving Performance: + * When data is never shared between threads or memory size for the cache is + * not an issue you may consider having one Cache-instance per thread and removing + * the lock in Cache and modifying the reference counting and waiting mechanisms + * of CacheData accordingly (although this is high effort and will yield little due + * to the atomics not being shared among cores/nodes). + * Otherwise, one Cache-instance per node could also be considered. This will allow + * the placement policy function to be barebones and reduces the lock contention and + * synchronization impact of the atomic variables. + */ class Cache { public: @@ -202,6 +274,8 @@ namespace dsacache { // be properly deleted, but the cache // will be fresh - use for testing void Clear(); + + void Invalidate(uint8_t* data); }; } @@ -486,6 +560,28 @@ inline std::unique_ptr dsacache::Cache::GetFromCache(uint8_ return nullptr; } +void dsacache::Cache::Invalidate(uint8_t* data) { + // as the cache is modified we must obtain a unique writers lock + + std::unique_lock lock(cache_mutex_); + + // loop through all per-node-caches available + + for (auto node : cache_state_) { + // search for an entry for the given data pointer + + auto search = node.second.find(data); + + if (search != node.second.end()) { + // if the data is represented in-cache + // then it will be erased to re-trigger + // caching on next access + + node.second.erase(search); + } + } +} + inline dsacache::CacheData::CacheData(uint8_t* data, const size_t size) { src_ = data; size_ = size; @@ -552,6 +648,11 @@ inline void dsacache::CacheData::Deallocate() { uint8_t* cache_local = cache_->exchange(nullptr); if (cache_local != nullptr) numa_free(cache_local, size_); + + // if the cache was never waited for then incomplete_cache_ + // may still contain a valid pointer which has to be freed + + if (incomplete_cache_ != nullptr) numa_free(incomplete_cache_, size_); } inline uint8_t* dsacache::CacheData::GetDataLocation() const {