|
|
@ -57,6 +57,28 @@ namespace dsacache { |
|
|
|
|
|
|
|
class Cache; |
|
|
|
|
|
|
|
// cache policy is defined as a type here to allow flexible usage of the cacher
|
|
|
|
// given a numa destination node (where the data will be needed), the numa source
|
|
|
|
// node (current location of the data) and the data size, this function should
|
|
|
|
// return optimal cache placement
|
|
|
|
// dst node and returned value can differ if the system, for example, has HBM
|
|
|
|
// attached accessible directly to node n under a different node id m
|
|
|
|
typedef int (CachePolicy)(const int numa_dst_node, const int numa_src_node, const size_t data_size); |
|
|
|
|
|
|
|
// copy policy specifies the copy-executing nodes for a given task
|
|
|
|
// which allows flexibility in assignment for optimizing raw throughput
|
|
|
|
// or choosing a conservative usage policy
|
|
|
|
typedef std::vector<int> (CopyPolicy)(const int numa_dst_node, const int numa_src_node, const size_t data_size); |
|
|
|
|
|
|
|
// memory allocation is a complex topic but can have big performance
|
|
|
|
// impact, we therefore do not handle it in the cache. must return
|
|
|
|
// pointer to a block of at least the given size, cache will also
|
|
|
|
// not handle deallocation but signal that the block is free
|
|
|
|
typedef uint8_t* (MemoryAllocator_Allocate)(const int numa_node, const size_t size); |
|
|
|
|
|
|
|
// signals that the given memory block will not be used by cache anymore
|
|
|
|
typedef void (MemoryAllocator_Free)(uint8_t* pointer, const size_t size); |
|
|
|
|
|
|
|
/*
|
|
|
|
* Class Description: |
|
|
|
* Holds all required information on one cache entry and is used |
|
|
@ -93,6 +115,8 @@ namespace dsacache { |
|
|
|
// set to false if we do not own the cache pointer
|
|
|
|
bool delete_ = false; |
|
|
|
|
|
|
|
MemoryAllocator_Free* memory_free_function_; |
|
|
|
|
|
|
|
// data source and size of the block
|
|
|
|
uint8_t* src_; |
|
|
|
size_t size_; |
|
|
@ -135,7 +159,7 @@ namespace dsacache { |
|
|
|
friend Cache; |
|
|
|
|
|
|
|
public: |
|
|
|
CacheData(uint8_t* data, const size_t size); |
|
|
|
CacheData(uint8_t* data, const size_t size, MemoryAllocator_Free* free); |
|
|
|
CacheData(const CacheData& other); |
|
|
|
~CacheData(); |
|
|
|
|
|
|
@ -225,20 +249,6 @@ namespace dsacache { |
|
|
|
*/ |
|
|
|
|
|
|
|
class Cache { |
|
|
|
public: |
|
|
|
// cache policy is defined as a type here to allow flexible usage of the cacher
|
|
|
|
// given a numa destination node (where the data will be needed), the numa source
|
|
|
|
// node (current location of the data) and the data size, this function should
|
|
|
|
// return optimal cache placement
|
|
|
|
// dst node and returned value can differ if the system, for example, has HBM
|
|
|
|
// attached accessible directly to node n under a different node id m
|
|
|
|
typedef int (CachePolicy)(const int numa_dst_node, const int numa_src_node, const size_t data_size); |
|
|
|
|
|
|
|
// copy policy specifies the copy-executing nodes for a given task
|
|
|
|
// which allows flexibility in assignment for optimizing raw throughput
|
|
|
|
// or choosing a conservative usage policy
|
|
|
|
typedef std::vector<int> (CopyPolicy)(const int numa_dst_node, const int numa_src_node, const size_t data_size); |
|
|
|
|
|
|
|
private: |
|
|
|
// flags to store options duh
|
|
|
|
|
|
|
@ -261,6 +271,8 @@ namespace dsacache { |
|
|
|
|
|
|
|
CachePolicy* cache_policy_function_ = nullptr; |
|
|
|
CopyPolicy* copy_policy_function_ = nullptr; |
|
|
|
MemoryAllocator_Allocate* memory_allocate_function_ = nullptr; |
|
|
|
MemoryAllocator_Free* memory_free_function_ = nullptr; |
|
|
|
|
|
|
|
// function used to submit a copy task on a specific node to the dml
|
|
|
|
// engine on that node - will change the current threads node assignment
|
|
|
@ -281,12 +293,6 @@ namespace dsacache { |
|
|
|
// as this is set as the "optimal placement" node
|
|
|
|
void GetCacheNode(uint8_t* src, const size_t size, int* OUT_DST_NODE, int* OUT_SRC_NODE) const; |
|
|
|
|
|
|
|
// allocates memory of size "size" on the numa node "node"
|
|
|
|
// and returns nullptr if this is not possible, also may
|
|
|
|
// try to flush the cache of the requested node to
|
|
|
|
// alleviate encountered shortage
|
|
|
|
uint8_t* AllocOnNode(const size_t size, const int node); |
|
|
|
|
|
|
|
// checks whether the cache contains an entry for
|
|
|
|
// the given data in the given memory node and
|
|
|
|
// returns it, otherwise returns nullptr
|
|
|
@ -299,7 +305,11 @@ namespace dsacache { |
|
|
|
|
|
|
|
// initializes the cache with the two policy functions
|
|
|
|
// only after this is it safe to use in a threaded environment
|
|
|
|
void Init(CachePolicy* cache_policy_function, CopyPolicy* copy_policy_function); |
|
|
|
void Init( |
|
|
|
CachePolicy* cache_policy_function, CopyPolicy* copy_policy_function, |
|
|
|
MemoryAllocator_Allocate* memory_allocate_function, |
|
|
|
MemoryAllocator_Free* memory_free_function |
|
|
|
); |
|
|
|
|
|
|
|
// function to perform data access through the cache, behaviour depends
|
|
|
|
// on flags, by default will also perform prefetch, otherwise with
|
|
|
@ -336,9 +346,15 @@ inline void dsacache::Cache::Clear() { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
inline void dsacache::Cache::Init(CachePolicy* cache_policy_function, CopyPolicy* copy_policy_function) { |
|
|
|
inline void dsacache::Cache::Init( |
|
|
|
CachePolicy* cache_policy_function, CopyPolicy* copy_policy_function, |
|
|
|
MemoryAllocator_Allocate* memory_allocate_function, |
|
|
|
MemoryAllocator_Free* memory_free_function |
|
|
|
) { |
|
|
|
cache_policy_function_ = cache_policy_function; |
|
|
|
copy_policy_function_ = copy_policy_function; |
|
|
|
memory_allocate_function_ = memory_allocate_function; |
|
|
|
memory_free_function_ = memory_free_function; |
|
|
|
|
|
|
|
// initialize numa library
|
|
|
|
|
|
|
@ -382,7 +398,7 @@ inline std::unique_ptr<dsacache::CacheData> dsacache::Cache::Access(uint8_t* dat |
|
|
|
// at this point the requested data is not present in cache
|
|
|
|
// and we create a caching task for it, copying our current flags
|
|
|
|
|
|
|
|
task = std::make_unique<CacheData>(data, size); |
|
|
|
task = std::make_unique<CacheData>(data, size, memory_free_function_); |
|
|
|
task->SetFlags(flags_); |
|
|
|
|
|
|
|
// when the ACCESS_WEAK flag is set for the flags parameter (!)
|
|
|
@ -434,51 +450,8 @@ inline std::unique_ptr<dsacache::CacheData> dsacache::Cache::Access(uint8_t* dat |
|
|
|
return std::move(task); |
|
|
|
} |
|
|
|
|
|
|
|
inline uint8_t* dsacache::Cache::AllocOnNode(const size_t size, const int node) { |
|
|
|
// allocate data on this node and flush the unused parts of the
|
|
|
|
// cache if the operation fails and retry once
|
|
|
|
// TODO: smarter flush strategy could keep some stuff cached
|
|
|
|
|
|
|
|
// check currently free memory to see if the data fits
|
|
|
|
|
|
|
|
long long int free_space = 0; |
|
|
|
numa_node_size64(node, &free_space); |
|
|
|
|
|
|
|
if (free_space < size) { |
|
|
|
// dst node lacks memory space so we flush the cache for this
|
|
|
|
// node hoping to free enough currently unused entries to make
|
|
|
|
// the second allocation attempt successful
|
|
|
|
|
|
|
|
Flush(node); |
|
|
|
|
|
|
|
// re-test by getting the free space and checking again
|
|
|
|
|
|
|
|
numa_node_size64(node, &free_space); |
|
|
|
|
|
|
|
if (free_space < size) { |
|
|
|
return nullptr; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
uint8_t* dst = reinterpret_cast<uint8_t*>(numa_alloc_onnode(size, node)); |
|
|
|
|
|
|
|
if (dst == nullptr) { |
|
|
|
return nullptr; |
|
|
|
} |
|
|
|
|
|
|
|
if (CheckFlag(flags_, FLAG_FORCE_MAP_PAGES)) { |
|
|
|
static const size_t page_size_b = getpagesize(); |
|
|
|
|
|
|
|
for (size_t i = 0; i < size; i += page_size_b) { |
|
|
|
dst[i] = 0; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
return dst; |
|
|
|
} |
|
|
|
|
|
|
|
inline void dsacache::Cache::SubmitTask(CacheData* task, const int dst_node, const int src_node) { |
|
|
|
uint8_t* dst = AllocOnNode(task->GetSize(), dst_node); |
|
|
|
uint8_t* dst = memory_allocate_function_(dst_node, task->GetSize()); |
|
|
|
|
|
|
|
if (dst == nullptr) { |
|
|
|
return; |
|
|
@ -667,10 +640,11 @@ inline dsacache::Cache::~Cache() { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
inline dsacache::CacheData::CacheData(uint8_t* data, const size_t size) { |
|
|
|
inline dsacache::CacheData::CacheData(uint8_t* data, const size_t size, MemoryAllocator_Free* free) { |
|
|
|
src_ = data; |
|
|
|
size_ = size; |
|
|
|
delete_ = false; |
|
|
|
memory_free_function_ = free; |
|
|
|
active_ = new std::atomic<int32_t>(1); |
|
|
|
cache_ = new std::atomic<uint8_t*>(data); |
|
|
|
handlers_ = new std::atomic<std::vector<dml_handler>*>(); |
|
|
@ -689,6 +663,8 @@ inline dsacache::CacheData::CacheData(const dsacache::CacheData& other) { |
|
|
|
cache_ = other.cache_; |
|
|
|
flags_ = other.flags_; |
|
|
|
|
|
|
|
memory_free_function_ = other.memory_free_function_; |
|
|
|
|
|
|
|
incomplete_cache_ = other.incomplete_cache_; |
|
|
|
handlers_ = other.handlers_; |
|
|
|
invalid_handlers_ = other.invalid_handlers_; |
|
|
@ -733,8 +709,8 @@ inline void dsacache::CacheData::Deallocate() { |
|
|
|
// takes place for the retrieved local cache
|
|
|
|
|
|
|
|
uint8_t* cache_local = cache_->exchange(nullptr); |
|
|
|
if (cache_local != nullptr && delete_) numa_free(cache_local, size_); |
|
|
|
else if (*incomplete_cache_ != nullptr) numa_free(*incomplete_cache_, size_); |
|
|
|
if (cache_local != nullptr && delete_) memory_free_function_(cache_local, size_); |
|
|
|
else if (*incomplete_cache_ != nullptr) memory_free_function_(*incomplete_cache_, size_); |
|
|
|
else; |
|
|
|
} |
|
|
|
|
|
|
|