#include #include #include #include #include #include "cache.hpp" static constexpr size_t SIZE_64_MIB = 64 * 1024 * 1024; dsacache::Cache CACHE; void InitCache(const std::string& device) { if (device == "default") { auto cache_policy = [](const int numa_dst_node, const int numa_src_node, const size_t data_size) { return numa_dst_node; }; auto copy_policy = [](const int numa_dst_node, const int numa_src_node, const size_t data_size) { return std::vector{ numa_dst_node }; }; CACHE.Init(cache_policy,copy_policy); } else if (device == "xeonmax") { auto cache_policy = [](const int numa_dst_node, const int numa_src_node, const size_t data_size) { // xeon max is configured to have hbm on node ids that are +8 return numa_dst_node < 8 ? numa_dst_node + 8 : numa_dst_node; }; auto copy_policy = [](const int numa_dst_node, const int numa_src_node, const size_t data_size) { if (data_size < SIZE_64_MIB) { // if the data size is small then the copy will just be carried // out by the destination node which does not require setting numa // thread affinity as the selected dsa engine is already the one // present on the calling thread return std::vector{ (numa_dst_node >= 8 ? numa_dst_node - 8 : numa_dst_node) }; } else { // for sufficiently large data, smart copy is used which will utilize // all four engines for intra-socket copy operations and cross copy on // the source and destination nodes for inter-socket copy const bool same_socket = ((numa_dst_node ^ numa_src_node) & 4) == 0; if (same_socket) { const bool socket_number = numa_dst_node >> 2; if (socket_number == 0) return std::vector{ 0, 1, 2, 3 }; else return std::vector{ 4, 5, 6, 7 }; } else { return std::vector{ (numa_src_node >= 8 ? numa_src_node - 8 : numa_src_node), (numa_dst_node >= 8 ? numa_dst_node - 8 : numa_dst_node) }; } } }; CACHE.Init(cache_policy,copy_policy); } else { std::cerr << "Given device '" << device << "' not supported!" << std::endl; exit(-1); } } uint8_t* GetRandomArray(const size_t size) { uint8_t* array = new uint8_t[size]; std::uniform_int_distribution unif(std::numeric_limits::min(), std::numeric_limits::max()); std::default_random_engine re; for (size_t i = 0; i < size; i++) { array[i] = unif(re); } return array; } bool IsEqual(const uint8_t* a, const uint8_t* b, const size_t size) { for (size_t i = 0; i < size; i++) { try { if (a[i] != b[i]) return false; } catch (...) { return false; } } return true; } std::unique_ptr PerformAccessAndTest(uint8_t* src, const size_t size, const int tid) { std::unique_ptr data_cache = CACHE.Access( reinterpret_cast(src), size * sizeof(uint8_t) ); data_cache->SetFlags(dsacache::FLAG_WAIT_WEAK); data_cache->WaitOnCompletion(); uint8_t* cached_imm = reinterpret_cast(data_cache->GetDataLocation()); // check the value immediately just to see if ram or cache was returned if (src == cached_imm) { std::cout << "[" << tid << "] Caching did not immediately yield different data location." << std::endl; } else if (cached_imm == nullptr) { std::cout << "[" << tid << "] Immediately got nullptr." << std::endl; } else { std::cout << "[" << tid << "] Immediately got different data location." << std::endl; } // waits for the completion of the asynchronous caching operation data_cache->SetFlags(dsacache::FLAG_DEFAULT); data_cache->WaitOnCompletion(); // gets the cache-data-location from the struct uint8_t* cached = reinterpret_cast(data_cache->GetDataLocation()); // tests on the resulting value if (src == cached) { std::cout << "[" << tid << "] Caching did not affect data location." << std::endl; } else if (cached == nullptr) { std::cerr << "[" << tid << "] Got nullptr from cache." << std::endl; } else { std::cout << "[" << tid << "] Got different data location from cache." << std::endl; } if (IsEqual(src,cached,size)) { std::cout << "[" << tid << "] Cached data is correct." << std::endl; } else { std::cerr << "[" << tid << "] Cached data is wrong." << std::endl; } return std::move(data_cache); } void RunTestST(const size_t size) { uint8_t* data = GetRandomArray(size); static constexpr int tid = 0; std::cout << "[" << tid << "] first access --- " << std::endl; PerformAccessAndTest(data, size, tid); std::cout << "[" << tid << "] second access --- " << std::endl; PerformAccessAndTest(data, size, tid); std::cout << "[" << tid << "] end of application --- " << std::endl; } void RunTestMT(const size_t size) { uint8_t* data = GetRandomArray(size); #pragma omp parallel { const int tid = omp_get_thread_num(); std::cout << "[" << tid << "] first access --- " << std::endl; PerformAccessAndTest(data, size, tid); std::cout << "[" << tid << "] second access --- " << std::endl; PerformAccessAndTest(data, size, tid); std::cout << "[" << tid << "] end of block --- " << std::endl; } } void RunTestFlush(const size_t size) { uint8_t* data1 = GetRandomArray(size); uint8_t* data2 = GetRandomArray(size); uint8_t* data3 = GetRandomArray(size); static constexpr int tid = 0; std::cout << "[" << tid << "] first access to data d1 and keepalive --- " << std::endl; const auto c1 = PerformAccessAndTest(data1, size, tid); std::cout << "[" << tid << "] second access to d2 lets d2 vanish --- " << std::endl; PerformAccessAndTest(data2, size, tid); std::cout << "[" << tid << "] third access to d3 should clear d2 --- " << std::endl; PerformAccessAndTest(data3, size, tid); std::cout << "[" << tid << "] end of block and test d1 == cache1 --- " << std::endl; if (IsEqual(data1, c1->GetDataLocation(), size)) { std::cout << "[" << tid << "] Cached d1 is still correct." << std::endl; } else { std::cerr << "[" << tid << "] Cached d1 is bad." << std::endl; } } int main(int argc, char **argv) { if (argc != 4) { std::cerr << "This application requires three parameters!" << std::endl; std::cout << "Please provide the following positional arguments: [device] [mode] [size]" << std::endl; std::cout << "[device] from { default, xeonmax } which influences cache and execution placement" << std::endl; std::cout << "[mode] from { st, mt, flt } or single and multi threaded and flushtest respectively" << std::endl; std::cout << "[size] positive integral number, amount of bytes in data array" << std::endl; std::cout << "for flushtest the given size should be 1/3 of the available cache size" << std::endl; exit(-1); } const std::string device = argv[1]; const std::string mode = argv[2]; const std::string size_s = argv[3]; uint32_t size = 0; try { size = std::stoul(size_s); } catch (...) { std::cerr << "Given Size '" << size_s << "' caused error during conversion to number!" << std::endl; } InitCache(device); if (mode == "st") { RunTestST(size); } else if (mode == "mt") { RunTestMT(size); } else if (mode == "flt") { RunTestFlush(size); } else { std::cerr << "Given Mode '" << mode << "' not supported!" << std::endl; exit(-1); } }