bachelor-thesis/offloading-cacher/offloading-cache.hpp


								#pragma once


								#include <atomic>

								#include <vector>

								#include <unordered_map>


								#include <numa.h>


								#include <dml/dml.hpp>


								namespace offcache {

								    // the cache task structure will be used to submit and

								    // control a cache element, while providing source pointer

								    // and size in bytes for submission

								    //

								    // then the submitting thread may wait on the atomic "result"

								    // which will be notified by the cache worker upon processing

								    // after which the atomic-bool-ptr active will also become valid

								    //

								    // the data pointed to by result and the bool-ptr are guaranteed

								    // to remain valid until the value pointed to by active is changed

								    // to false, after which the worker may clean up and delete the

								    // structure - carefull, do not call delete on this, the worker does

								    struct CacheTask {

								        uint8_t* data_;

								        size_t size_;

								        std::atomic<uint8_t*> result_ { nullptr };

								        std::atomic<bool>* active_;

								    };


								    // worker class, one for each numa node

								    // discovers its node configuration on startup

								    // and keeps track of available memory

								    class CacheWorker {

								    private:

								        uint8_t numa_node_ = 0;


								        std::unordered_map<uint8_t*, CacheTask*> cache_info_;


								    public:

								        // this is the mailbox of the worker to which a new task

								        // may be submitted by exchanging nullptr with a valid one

								        // and notifying on the atomic after which ownership

								        // of the CacheTask structure is transferred to the worker

								        std::atomic<CacheTask*>* task_slot_ = nullptr;


								        static void run(CacheWorker* this_, const uint8_t numa_node);

								    };


								    // singleton which holds the cache workers

								    // and is the place where work will be submited

								    class CacheCoordinator {

								    public:

								        // cache policy is defined as a type here to allow flexible usage of the cacher

								        // given a numa destination node (where the data will be needed), the numa source

								        // node (current location of the data) and the data size, this function should

								        // return optimal cache placement

								        // dst node and returned value can differ if the system, for example, has HBM

								        // attached accessible directly to node n under a different node id m

								        typedef uint8_t (CachePolicy)(const uint8_t numa_dst_node, const uint8_t numa_src_node, const size_t data_size);


								        // copy policy specifies the copy-executing nodes for a given task

								        // which allows flexibility in assignment for optimizing raw throughput

								        // or choosing a conservative usage policy

								        typedef std::vector<uint8_t> (CopyPolicy)(const uint8_t numa_dst_node, const uint8_t numa_src_node);


								        enum class ExecutionPolicy {

								            Immediate, Relaxed, NoCache

								        };


								    private:

								        CachePolicy* cache_policy_function_ = nullptr;

								        CopyPolicy* copy_policy_function_ = nullptr;


								    public:

								        void Init(CachePolicy* cache_policy_function, CopyPolicy* copy_policy_function);


								        // submits the given task and takes ownership of the pointer

								        void SubmitTask(CacheTask* task, const ExecutionPolicy policy) const;


								        static void WaitOnCompletion(CacheTask* task);

								        static void SignalDataUnused(CacheTask* task);

								    };

								}