improve the class-comments for Cache and CacheData, also free incomplete_cache_ if it has not been waited for (see comment on this)

11 months ago · 0fdf650fe4
1 changed files with 113 additions and 12 deletions
--- a/offloading-cacher/cache.hpp
+++ b/offloading-cacher/cache.hpp
@ -59,13 +59,33 @@ namespace dml {
 namespace dsacache {
    class Cache;

-    // cache data holds all required information on
-    // one cache entry and will both be stored
-    // internally by the cache and handed out
-    // as copies to the user
-    // this class uses its object lifetime and
-    // a global reference counter to allow
-    // thread-safe copies and resource management
+    /*
+     * Class Description:
+     * Holds all required information on one cache entry and is used
+     * both internally by the Cache and externally by the user.
+     *
+     * Important Usage Notes:
+     * The pointer is only updated in WaitOnCompletion() which
+     * therefore must be called by the user at some point in order
+     * to use the cached data. Using this class as T for
+     * std::shared_ptr<T> is not recommended as references are
+     * already counted internally.
+     *
+     * Cache Lifetime:
+     * As long as the instance is referenced, the pointer it stores
+     * is guaranteed to be either nullptr or pointing to a valid copy.
+     *
+     * Implementation Detail:
+     * Performs self-reference counting with a shared atomic integer.
+     * Therefore on creating a copy the reference count is increased
+     * and with the destructor it is deacresed. If the last copy is
+     * destroyed the actual underlying data is freed and all shared
+     * variables deleted.
+     *
+     * Notes on Thread Safety:
+     * Class is thread safe in any possible state and performs
+     * reference counting and deallocation itself entirely atomically.
+     */

    class CacheData {
    public:
@ -101,6 +121,7 @@ namespace dsacache {
        bool Active() const;

        friend Cache;
+
    public:
        CacheData(uint8_t* data, const size_t size);
        CacheData(const CacheData& other);
@ -118,11 +139,62 @@ namespace dsacache {
        uint8_t* GetDataLocation() const;
    };

-    // cache class will handle access to data through the cache
-    // by managing the cache through work submission, it sticks
-    // to user-defined caching and copy policies, is thread
-    // safe after initialization and returns copies of
-    // cache data class to the user
+    /*
+     * Class Description:
+     * Class will handle access to data through internal copies.
+     * These are obtained via work submission to the Intel DSA which takes
+     * care of asynchronously duplicating the data. The user will define
+     * where these copies lie and which system nodes will perform the copy.
+     * This is done through policy functions set during initialization.
+     *
+     * Placement Policy:
+     * The Placement Policy Function decides on which node a particular
+     * entry is to be placed, given the current executing node and the
+     * data source node and data size. This in turn means that for one
+     * datum, multiple cached copies may exist at one time.
+     *
+     * Cache Lifetime:
+     * When accessing the cache, a CacheData-object will be returned.
+     * As long as this object lives, the pointer which it holds is
+     * guaranteed to be either nullptr or a valid copy. When destroyed
+     * the entry is marked for deletion which is only carried out
+     * when system memory pressure drives an automated cache flush.
+     *
+     * Restrictions:
+     * - Overlapping Pointers may lead to undefined behaviour during
+     *   manual cache invalidation which should not be used if you
+     *   intend to have these types of pointers
+     * - Cache Invalidation may only be performed manually and gives
+     *   no ordering guarantees. Therefore, it is the users responsibility
+     *   to ensure that results after invalidation have been generated
+     *   using the latest state of data. The cache is best suited
+     *   to static data.
+     *
+     * Notes on Thread Safety:
+     * - Cache is completely thread-safe after initialization
+     * - CacheData-class will handle deallocation of data itself by
+     *   performing self-reference-counting atomically and only
+     *   deallocating if the last reference is destroyed
+     * - The internal cache state has one lock which is either
+     *   acquired shared for reading the state (upon accessing an already
+     *   cached element) or unique (accessing a new element, flushing, invalidating)
+     * - Waiting on copy completion is done over an atomic-wait in copies
+     *   of the original CacheData-instance
+     * - Overall this class may experience performance issues due to the use
+     *   of locking (in any configuration), lock contention (worsens with higher
+     *   core count, node count and utilization) and atomics (worse in the same
+     *   situations as lock contention)
+     *
+     * Improving Performance:
+     * When data is never shared between threads or memory size for the cache is
+     * not an issue you may consider having one Cache-instance per thread and removing
+     * the lock in Cache and modifying the reference counting and waiting mechanisms
+     * of CacheData accordingly (although this is high effort and will yield little due
+     * to the atomics not being shared among cores/nodes).
+     * Otherwise, one Cache-instance per node could also be considered. This will allow
+     * the placement policy function to be barebones and reduces the lock contention and
+     * synchronization impact of the atomic variables.
+     */

    class Cache {
    public:
@ -202,6 +274,8 @@ namespace dsacache {
        // be properly deleted, but the cache
        // will be fresh - use for testing
        void Clear();
+
+        void Invalidate(uint8_t* data);
    };
 }

@ -486,6 +560,28 @@ inline std::unique_ptr<dsacache::CacheData> dsacache::Cache::GetFromCache(uint8_
    return nullptr;
 }

+void dsacache::Cache::Invalidate(uint8_t* data) {
+    // as the cache is modified we must obtain a unique writers lock
+
+    std::unique_lock<std::shared_mutex> lock(cache_mutex_);
+
+    // loop through all per-node-caches available
+
+    for (auto node : cache_state_) {
+        // search for an entry for the given data pointer
+
+        auto search = node.second.find(data);
+
+        if (search != node.second.end()) {
+            // if the data is represented in-cache
+            // then it will be erased to re-trigger
+            // caching on next access
+
+            node.second.erase(search);
+        }
+    }
+}
+
 inline dsacache::CacheData::CacheData(uint8_t* data, const size_t size) {
    src_ = data;
    size_ = size;
@ -552,6 +648,11 @@ inline void dsacache::CacheData::Deallocate() {

    uint8_t* cache_local = cache_->exchange(nullptr);
    if (cache_local != nullptr) numa_free(cache_local, size_);
+
+    // if the cache was never waited for then incomplete_cache_
+    // may still contain a valid pointer which has to be freed
+
+    if (incomplete_cache_ != nullptr) numa_free(incomplete_cache_, size_);
 }

 inline uint8_t* dsacache::CacheData::GetDataLocation() const {