From e3e17cec7b0d6b6cecdbc740faf72f2635eac5e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Constantin=20F=C3=BCrst?= Date: Wed, 10 Jan 2024 17:44:52 +0100 Subject: [PATCH] extend the main method of the small supplied test program to allow running on multiple threads --- offloading-cacher/CMakeLists.txt | 5 +- offloading-cacher/main.cpp | 138 ++++++++++++++++++++++++------- 2 files changed, 110 insertions(+), 33 deletions(-) diff --git a/offloading-cacher/CMakeLists.txt b/offloading-cacher/CMakeLists.txt index 7b4844a..19ddbdd 100755 --- a/offloading-cacher/CMakeLists.txt +++ b/offloading-cacher/CMakeLists.txt @@ -1,12 +1,13 @@ cmake_minimum_required(VERSION 3.18) -project(offloading-cacher) +project(offloading-cacher LANGUAGES CXX) set(CMAKE_CXX_STANDARD 20) list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake/modules") find_package(NUMA REQUIRED) +find_package(OpenMP REQUIRED) set(DML_SOURCE_DIR "../../DML/include/") set(SOURCES main.cpp) @@ -14,6 +15,6 @@ set(SOURCES main.cpp) add_executable(offloading-cacher ${SOURCES}) target_include_directories(offloading-cacher PRIVATE ${CMAKE_SOURCE_DIR} ${NUMA_INCLUDE_DIRS} ${DML_SOURCE_DIR}) -target_link_libraries(offloading-cacher PRIVATE libdml.a pthread ${CMAKE_DL_LIBS} ${NUMA_LIBRARY}) +target_link_libraries(offloading-cacher PRIVATE libdml.a pthread ${CMAKE_DL_LIBS} ${NUMA_LIBRARY} OpenMP::OpenMP_CXX) install(TARGETS offloading-cacher DESTINATION ${CMAKE_INSTALL_PREFIX}) diff --git a/offloading-cacher/main.cpp b/offloading-cacher/main.cpp index 4310d3d..08640dc 100644 --- a/offloading-cacher/main.cpp +++ b/offloading-cacher/main.cpp @@ -1,10 +1,49 @@ #include #include +#include +#include + +#include #include "cache.hpp" dsacache::Cache CACHE; +void InitCache(const std::string& device) { + if (device == "default") { + auto cache_policy = [](const int numa_dst_node, const int numa_src_node, const size_t data_size) { + return numa_dst_node; + }; + + auto copy_policy = [](const int numa_dst_node, const int numa_src_node) { + return std::vector{ numa_src_node, numa_dst_node }; + }; + + CACHE.Init(cache_policy,copy_policy); + } + else if (device == "xeonmax") { + auto cache_policy = [](const int numa_dst_node, const int numa_src_node, const size_t data_size) { + return numa_dst_node < 8 ? numa_dst_node + 8 : numa_dst_node; + }; + + auto copy_policy = [](const int numa_dst_node, const int numa_src_node) { + const bool same_socket = ((numa_dst_node ^ numa_src_node) & 4) == 0; + if (same_socket) { + const bool socket_number = numa_dst_node >> 2; + if (socket_number == 0) return std::vector{ 0, 1, 2, 3 }; + else return std::vector{ 4, 5, 6, 7 }; + } + else return std::vector{ numa_src_node, numa_dst_node }; + }; + + CACHE.Init(cache_policy,copy_policy); + } + else { + std::cerr << "Given device '" << device << "' not supported!" << std::endl; + exit(-1); + } +} + double* GetRandomArray(const size_t size) { double* array = new double[size]; @@ -31,7 +70,7 @@ bool IsEqual(const double* a, const double* b, const size_t size) { return true; } -void PerformAccessAndTest(double* src, const size_t size) { +void PerformAccessAndTest(double* src, const size_t size, const int tid) { std::unique_ptr data_cache = CACHE.Access( reinterpret_cast(src), size * sizeof(double) @@ -42,13 +81,13 @@ void PerformAccessAndTest(double* src, const size_t size) { // check the value immediately just to see if ram or cache was returned if (src == cached_imm) { - std::cout << "Caching did not immediately yield different data location." << std::endl; + std::cout << "[" << tid << "] Caching did not immediately yield different data location." << std::endl; } else if (cached_imm == nullptr) { - std::cout << "Immediately got nullptr." << std::endl; + std::cout << "[" << tid << "] Immediately got nullptr." << std::endl; } else { - std::cout << "Immediately got different data location." << std::endl; + std::cout << "[" << tid << "] Immediately got different data location." << std::endl; } // waits for the completion of the asynchronous caching operation @@ -62,56 +101,93 @@ void PerformAccessAndTest(double* src, const size_t size) { // tests on the resulting value if (src == cached) { - std::cout << "Caching did not affect data location." << std::endl; + std::cout << "[" << tid << "] Caching did not affect data location." << std::endl; } else if (cached == nullptr) { - std::cerr << "Got nullptr from cache." << std::endl; + std::cerr << "[" << tid << "] Got nullptr from cache." << std::endl; } else { - std::cout << "Got different data location from cache." << std::endl; + std::cout << "[" << tid << "] Got different data location from cache." << std::endl; } if (IsEqual(src,cached,size)) { - std::cout << "Cached data is correct." << std::endl; + std::cout << "[" << tid << "] Cached data is correct." << std::endl; } else { - std::cerr << "Cached data is wrong." << std::endl; + std::cerr << "[" << tid << "] Cached data is wrong." << std::endl; } } -int main(int argc, char **argv) { +void RunTestST(const size_t size) { + double* data = GetRandomArray(size); - // given numa destination and source node and the size of the data - // this function decides on which the data will be placed - // which is used to select the HBM-node for the dst-node if desired + static constexpr int tid = 0; - auto cache_policy = [](const int numa_dst_node, const int numa_src_node, const size_t data_size) { - return numa_dst_node; - }; + std::cout << "[" << tid << "] first access --- " << std::endl; - // this function receives the memory source and destination node - // and then decides, on which nodes the copy operation will be split + PerformAccessAndTest(data, size, tid); - auto copy_policy = [](const int numa_dst_node, const int numa_src_node) { - return std::vector{ numa_src_node, numa_dst_node }; - }; + std::cout << "[" << tid << "] second access --- " << std::endl; - // initializes the cache with the two policies + PerformAccessAndTest(data, size, tid); - CACHE.Init(cache_policy,copy_policy); + std::cout << "[" << tid << "] end of application --- " << std::endl; +} - // generate the test data +void RunTestMT(const size_t size) { + double* data = GetRandomArray(size); - static constexpr size_t data_size = 1024 * 1024; - double* data = GetRandomArray(data_size); + #pragma omp parallel + { + const int tid = omp_get_thread_num(); - std::cout << "--- first access --- " << std::endl; + std::cout << "[" << tid << "] first access --- " << std::endl; - PerformAccessAndTest(data, data_size); + PerformAccessAndTest(data, size, tid); - std::cout << "--- second access --- " << std::endl; + std::cout << "[" << tid << "] second access --- " << std::endl; - PerformAccessAndTest(data, data_size); + PerformAccessAndTest(data, size, tid); - std::cout << "--- end of application --- " << std::endl; + std::cout << "[" << tid << "] end of block --- " << std::endl; + } +} + +int main(int argc, char **argv) { + if (argc != 4) { + std::cerr << "This application requires four parameters!" << std::endl; + + std::cout << "Please provide the following positional arguments: [device] [mode] [size]" << std::endl; + std::cout << "[device] from { default, xeonmax } which influences cache and execution placement" << std::endl; + std::cout << "[mode] from { st, mt } or single and multi threaded respectively" << std::endl; + std::cout << "[size] positive integral number, amount of float64 in data array" << std::endl; + + exit(-1); + } + + const std::string device = argv[1]; + const std::string mode = argv[2]; + const std::string size_s = argv[3]; + + uint32_t size = 0; + + try { + size = std::stoul(size_s); + } + catch (...) { + std::cerr << "Given Size '" << size_s << "' caused error during conversion to number!" << std::endl; + } + + InitCache(device); + + if (mode == "st") { + RunTestST(size); + } + else if (mode == "mt") { + RunTestMT(size); + } + else { + std::cerr << "Given Mode '" << mode << "' not supported!" << std::endl; + exit(-1); + } }