From 2f17a38ef085da9a00101b76f61d1a9ed960c45e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Constantin=20F=C3=BCrst?= Date: Wed, 24 Jan 2024 05:14:54 +0100 Subject: [PATCH 1/3] select device via jobs node parameter so we can skip the numa affinity change, also properly delete jobs in destructor --- offloading-cacher/cache.hpp | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/offloading-cacher/cache.hpp b/offloading-cacher/cache.hpp index d392167..07253cc 100644 --- a/offloading-cacher/cache.hpp +++ b/offloading-cacher/cache.hpp @@ -400,12 +400,6 @@ inline void dsacache::Cache::SubmitTask(CacheData* task, const int dst_node, con const size_t size = task->GetSize() / task_count; const size_t last_size = size + task->GetSize() % task_count; - // save the current numa node mask to restore later - // as executing the copy task will place this thread - // on a different node - - bitmask* nodemask = numa_get_run_node_mask(); - auto handlers = new std::vector(); for (uint32_t i = 0; i < task_count; i++) { @@ -418,18 +412,11 @@ inline void dsacache::Cache::SubmitTask(CacheData* task, const int dst_node, con } task->SetTaskHandlersAndCache(dst, handlers); - - // restore the previous nodemask - - numa_run_on_node_mask(nodemask); - numa_free_nodemask(nodemask); } inline dml_job_t* dsacache::Cache::ExecuteCopy( const uint8_t* src, uint8_t* dst, const size_t size, const int node ) const { - numa_run_on_node(node); - uint32_t job_size = 0; dml_status_t status = dml_get_job_size(DML_PATH_HW, &job_size); @@ -452,6 +439,7 @@ inline dml_job_t* dsacache::Cache::ExecuteCopy( job->destination_first_ptr = dst; job->source_length = size; job->flags |= DML_FLAG_BLOCK_ON_FAULT | DML_FLAG_COPY_ONLY; + job->numa_id = node; status = dml_submit_job(job); @@ -642,8 +630,12 @@ inline dsacache::CacheData::~CacheData() { Deallocate(); - for (dml_job_t* job : *handlers_->load()) { - if (job != nullptr) delete job; + std::vector* handlers = handlers_->load(); + + if (handlers != nullptr && handlers != reinterpret_cast*>(maxptr)) { + for (dml_job_t* job : *handlers_->load()) { + if (job != nullptr) delete job; + } } delete active_; From 2fe9aa90e37afcde28d040f01bf8186f6c4acd2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Constantin=20F=C3=BCrst?= Date: Wed, 24 Jan 2024 05:15:11 +0100 Subject: [PATCH 2/3] latest changes to benchmarker --- qdp_project/src/Benchmark.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/qdp_project/src/Benchmark.cpp b/qdp_project/src/Benchmark.cpp index 7c2f76b..a7bc547 100644 --- a/qdp_project/src/Benchmark.cpp +++ b/qdp_project/src/Benchmark.cpp @@ -19,7 +19,7 @@ #include "BenchmarkHelpers.cpp" -#define MODE_PREFETCH +#define MODE_HBM //////////////////////////////// /// BENCHMARK SETUP @@ -29,7 +29,7 @@ constexpr uint32_t WARMUP_ITERATION_COUNT = 5; constexpr uint32_t ITERATION_COUNT = 5; #ifdef MODE_PREFETCH -constexpr size_t CHUNK_SIZE_B = 64_MiB; +constexpr size_t CHUNK_SIZE_B = 128_MiB; constexpr uint32_t GROUP_COUNT = 32; constexpr uint32_t TC_SCANA = 1; constexpr uint32_t TC_SCANB = 1; @@ -73,7 +73,6 @@ constexpr size_t MASK_STEP_SIZE = CHUNK_SIZE_ELEMENTS / MASK_ELEMENT_SIZE; static_assert(RUN_COUNT > 0); static_assert(TC_SCANB <= TC_AGGRJ); -static_assert(TC_AGGRJ % TC_SCANB == 0); static_assert(WL_SIZE_B % 16 == 0); static_assert(CHUNK_SIZE_B % 16 == 0); @@ -108,7 +107,7 @@ inline uint16_t* get_mask(uint16_t* base, const size_t chunk_index, const size_t } void scan_b(size_t gid, size_t tid) { - constexpr size_t split = TC_AGGRJ / TC_SCANB; + constexpr size_t split = TC_AGGRJ / (TC_SCANB == 0 ? 1 : TC_SCANB); const size_t start = tid * split; const size_t end = start + split; From fb98c282cbe89dc36ba5f17bbb1d9ba1e257ebb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Constantin=20F=C3=BCrst?= Date: Wed, 24 Jan 2024 05:15:59 +0100 Subject: [PATCH 3/3] add test results for qdp --- ...leq-dram-tca4-tcb0-tcj2-tmul4-wl2147483648-cs2097152.csv | 6 ------ ...leq-dram-tca4-tcb0-tcj2-tmul4-wl4294967296-cs2097152.csv | 6 ++++++ ...pleq-hbm-tca4-tcb0-tcj2-tmul4-wl2147483648-cs2097152.csv | 6 ------ ...pleq-hbm-tca4-tcb0-tcj2-tmul4-wl4294967296-cs2097152.csv | 6 ++++++ ...fetch-tca1-tcb1-tcj1-tmul32-wl2147483648-cs134217728.csv | 6 ------ ...fetch-tca1-tcb1-tcj1-tmul32-wl4294967296-cs134217728.csv | 6 ++++++ 6 files changed, 18 insertions(+), 18 deletions(-) delete mode 100644 qdp_project/results/qdp-xeonmax-simpleq-dram-tca4-tcb0-tcj2-tmul4-wl2147483648-cs2097152.csv create mode 100644 qdp_project/results/qdp-xeonmax-simpleq-dram-tca4-tcb0-tcj2-tmul4-wl4294967296-cs2097152.csv delete mode 100644 qdp_project/results/qdp-xeonmax-simpleq-hbm-tca4-tcb0-tcj2-tmul4-wl2147483648-cs2097152.csv create mode 100644 qdp_project/results/qdp-xeonmax-simpleq-hbm-tca4-tcb0-tcj2-tmul4-wl4294967296-cs2097152.csv delete mode 100644 qdp_project/results/qdp-xeonmax-simpleq-prefetch-tca1-tcb1-tcj1-tmul32-wl2147483648-cs134217728.csv create mode 100644 qdp_project/results/qdp-xeonmax-simpleq-prefetch-tca1-tcb1-tcj1-tmul32-wl4294967296-cs134217728.csv diff --git a/qdp_project/results/qdp-xeonmax-simpleq-dram-tca4-tcb0-tcj2-tmul4-wl2147483648-cs2097152.csv b/qdp_project/results/qdp-xeonmax-simpleq-dram-tca4-tcb0-tcj2-tmul4-wl2147483648-cs2097152.csv deleted file mode 100644 index 7e769cd..0000000 --- a/qdp_project/results/qdp-xeonmax-simpleq-dram-tca4-tcb0-tcj2-tmul4-wl2147483648-cs2097152.csv +++ /dev/null @@ -1,6 +0,0 @@ -run;time;result[0]; -0;22199017;0; -1;16588422;0; -2;18267635;0; -3;17026004;0; -4;16958071;0; diff --git a/qdp_project/results/qdp-xeonmax-simpleq-dram-tca4-tcb0-tcj2-tmul4-wl4294967296-cs2097152.csv b/qdp_project/results/qdp-xeonmax-simpleq-dram-tca4-tcb0-tcj2-tmul4-wl4294967296-cs2097152.csv new file mode 100644 index 0000000..d6d3a6e --- /dev/null +++ b/qdp_project/results/qdp-xeonmax-simpleq-dram-tca4-tcb0-tcj2-tmul4-wl4294967296-cs2097152.csv @@ -0,0 +1,6 @@ +run;time;result[0]; +0;97400868;0; +1;97565944;0; +2;89098555;0; +3;93226925;0; +4;97550283;0; diff --git a/qdp_project/results/qdp-xeonmax-simpleq-hbm-tca4-tcb0-tcj2-tmul4-wl2147483648-cs2097152.csv b/qdp_project/results/qdp-xeonmax-simpleq-hbm-tca4-tcb0-tcj2-tmul4-wl2147483648-cs2097152.csv deleted file mode 100644 index b647ecb..0000000 --- a/qdp_project/results/qdp-xeonmax-simpleq-hbm-tca4-tcb0-tcj2-tmul4-wl2147483648-cs2097152.csv +++ /dev/null @@ -1,6 +0,0 @@ -run;time;result[0]; -0;14448722;0; -1;17734795;0; -2;19240141;0; -3;15579654;0; -4;14252101;0; diff --git a/qdp_project/results/qdp-xeonmax-simpleq-hbm-tca4-tcb0-tcj2-tmul4-wl4294967296-cs2097152.csv b/qdp_project/results/qdp-xeonmax-simpleq-hbm-tca4-tcb0-tcj2-tmul4-wl4294967296-cs2097152.csv new file mode 100644 index 0000000..146307f --- /dev/null +++ b/qdp_project/results/qdp-xeonmax-simpleq-hbm-tca4-tcb0-tcj2-tmul4-wl4294967296-cs2097152.csv @@ -0,0 +1,6 @@ +run;time;result[0]; +0;67853704;0; +1;85513791;0; +2;66482278;0; +3;67492755;0; +4;68083298;0; diff --git a/qdp_project/results/qdp-xeonmax-simpleq-prefetch-tca1-tcb1-tcj1-tmul32-wl2147483648-cs134217728.csv b/qdp_project/results/qdp-xeonmax-simpleq-prefetch-tca1-tcb1-tcj1-tmul32-wl2147483648-cs134217728.csv deleted file mode 100644 index 87cc9a8..0000000 --- a/qdp_project/results/qdp-xeonmax-simpleq-prefetch-tca1-tcb1-tcj1-tmul32-wl2147483648-cs134217728.csv +++ /dev/null @@ -1,6 +0,0 @@ -run;time;result[0]; -0;916843;0; -1;1060229;0; -2;914006;0; -3;1217119;0; -4;1029607;0; diff --git a/qdp_project/results/qdp-xeonmax-simpleq-prefetch-tca1-tcb1-tcj1-tmul32-wl4294967296-cs134217728.csv b/qdp_project/results/qdp-xeonmax-simpleq-prefetch-tca1-tcb1-tcj1-tmul32-wl4294967296-cs134217728.csv new file mode 100644 index 0000000..e2c3e4f --- /dev/null +++ b/qdp_project/results/qdp-xeonmax-simpleq-prefetch-tca1-tcb1-tcj1-tmul32-wl4294967296-cs134217728.csv @@ -0,0 +1,6 @@ +run;time;result[0]; +0;6136464327;0; +1;6125809164;0; +2;6122382339;0; +3;6207611081;0; +4;6149495534;0;