diff --git a/offloading-cacher/cache.hpp b/offloading-cacher/cache.hpp index d392167..07253cc 100644 --- a/offloading-cacher/cache.hpp +++ b/offloading-cacher/cache.hpp @@ -400,12 +400,6 @@ inline void dsacache::Cache::SubmitTask(CacheData* task, const int dst_node, con const size_t size = task->GetSize() / task_count; const size_t last_size = size + task->GetSize() % task_count; - // save the current numa node mask to restore later - // as executing the copy task will place this thread - // on a different node - - bitmask* nodemask = numa_get_run_node_mask(); - auto handlers = new std::vector(); for (uint32_t i = 0; i < task_count; i++) { @@ -418,18 +412,11 @@ inline void dsacache::Cache::SubmitTask(CacheData* task, const int dst_node, con } task->SetTaskHandlersAndCache(dst, handlers); - - // restore the previous nodemask - - numa_run_on_node_mask(nodemask); - numa_free_nodemask(nodemask); } inline dml_job_t* dsacache::Cache::ExecuteCopy( const uint8_t* src, uint8_t* dst, const size_t size, const int node ) const { - numa_run_on_node(node); - uint32_t job_size = 0; dml_status_t status = dml_get_job_size(DML_PATH_HW, &job_size); @@ -452,6 +439,7 @@ inline dml_job_t* dsacache::Cache::ExecuteCopy( job->destination_first_ptr = dst; job->source_length = size; job->flags |= DML_FLAG_BLOCK_ON_FAULT | DML_FLAG_COPY_ONLY; + job->numa_id = node; status = dml_submit_job(job); @@ -642,8 +630,12 @@ inline dsacache::CacheData::~CacheData() { Deallocate(); - for (dml_job_t* job : *handlers_->load()) { - if (job != nullptr) delete job; + std::vector* handlers = handlers_->load(); + + if (handlers != nullptr && handlers != reinterpret_cast*>(maxptr)) { + for (dml_job_t* job : *handlers_->load()) { + if (job != nullptr) delete job; + } } delete active_; diff --git a/qdp_project/results/qdp-xeonmax-simpleq-dram-tca4-tcb0-tcj2-tmul4-wl2147483648-cs2097152.csv b/qdp_project/results/qdp-xeonmax-simpleq-dram-tca4-tcb0-tcj2-tmul4-wl2147483648-cs2097152.csv deleted file mode 100644 index 7e769cd..0000000 --- a/qdp_project/results/qdp-xeonmax-simpleq-dram-tca4-tcb0-tcj2-tmul4-wl2147483648-cs2097152.csv +++ /dev/null @@ -1,6 +0,0 @@ -run;time;result[0]; -0;22199017;0; -1;16588422;0; -2;18267635;0; -3;17026004;0; -4;16958071;0; diff --git a/qdp_project/results/qdp-xeonmax-simpleq-dram-tca4-tcb0-tcj2-tmul4-wl4294967296-cs2097152.csv b/qdp_project/results/qdp-xeonmax-simpleq-dram-tca4-tcb0-tcj2-tmul4-wl4294967296-cs2097152.csv new file mode 100644 index 0000000..d6d3a6e --- /dev/null +++ b/qdp_project/results/qdp-xeonmax-simpleq-dram-tca4-tcb0-tcj2-tmul4-wl4294967296-cs2097152.csv @@ -0,0 +1,6 @@ +run;time;result[0]; +0;97400868;0; +1;97565944;0; +2;89098555;0; +3;93226925;0; +4;97550283;0; diff --git a/qdp_project/results/qdp-xeonmax-simpleq-hbm-tca4-tcb0-tcj2-tmul4-wl2147483648-cs2097152.csv b/qdp_project/results/qdp-xeonmax-simpleq-hbm-tca4-tcb0-tcj2-tmul4-wl2147483648-cs2097152.csv deleted file mode 100644 index b647ecb..0000000 --- a/qdp_project/results/qdp-xeonmax-simpleq-hbm-tca4-tcb0-tcj2-tmul4-wl2147483648-cs2097152.csv +++ /dev/null @@ -1,6 +0,0 @@ -run;time;result[0]; -0;14448722;0; -1;17734795;0; -2;19240141;0; -3;15579654;0; -4;14252101;0; diff --git a/qdp_project/results/qdp-xeonmax-simpleq-hbm-tca4-tcb0-tcj2-tmul4-wl4294967296-cs2097152.csv b/qdp_project/results/qdp-xeonmax-simpleq-hbm-tca4-tcb0-tcj2-tmul4-wl4294967296-cs2097152.csv new file mode 100644 index 0000000..146307f --- /dev/null +++ b/qdp_project/results/qdp-xeonmax-simpleq-hbm-tca4-tcb0-tcj2-tmul4-wl4294967296-cs2097152.csv @@ -0,0 +1,6 @@ +run;time;result[0]; +0;67853704;0; +1;85513791;0; +2;66482278;0; +3;67492755;0; +4;68083298;0; diff --git a/qdp_project/results/qdp-xeonmax-simpleq-prefetch-tca1-tcb1-tcj1-tmul32-wl2147483648-cs134217728.csv b/qdp_project/results/qdp-xeonmax-simpleq-prefetch-tca1-tcb1-tcj1-tmul32-wl2147483648-cs134217728.csv deleted file mode 100644 index 87cc9a8..0000000 --- a/qdp_project/results/qdp-xeonmax-simpleq-prefetch-tca1-tcb1-tcj1-tmul32-wl2147483648-cs134217728.csv +++ /dev/null @@ -1,6 +0,0 @@ -run;time;result[0]; -0;916843;0; -1;1060229;0; -2;914006;0; -3;1217119;0; -4;1029607;0; diff --git a/qdp_project/results/qdp-xeonmax-simpleq-prefetch-tca1-tcb1-tcj1-tmul32-wl4294967296-cs134217728.csv b/qdp_project/results/qdp-xeonmax-simpleq-prefetch-tca1-tcb1-tcj1-tmul32-wl4294967296-cs134217728.csv new file mode 100644 index 0000000..e2c3e4f --- /dev/null +++ b/qdp_project/results/qdp-xeonmax-simpleq-prefetch-tca1-tcb1-tcj1-tmul32-wl4294967296-cs134217728.csv @@ -0,0 +1,6 @@ +run;time;result[0]; +0;6136464327;0; +1;6125809164;0; +2;6122382339;0; +3;6207611081;0; +4;6149495534;0; diff --git a/qdp_project/src/Benchmark.cpp b/qdp_project/src/Benchmark.cpp index 84d63f1..07c7ebe 100644 --- a/qdp_project/src/Benchmark.cpp +++ b/qdp_project/src/Benchmark.cpp @@ -20,7 +20,7 @@ #include "BenchmarkHelpers.cpp" -#define MODE_PREFETCH +#define MODE_HBM //////////////////////////////// /// BENCHMARK SETUP @@ -30,8 +30,8 @@ constexpr uint32_t WARMUP_ITERATION_COUNT = 5; constexpr uint32_t ITERATION_COUNT = 5; #ifdef MODE_PREFETCH -constexpr size_t CHUNK_SIZE_B = 256_MiB; -constexpr uint32_t GROUP_COUNT = 8; +constexpr size_t CHUNK_SIZE_B = 128_MiB; +constexpr uint32_t GROUP_COUNT = 32; constexpr uint32_t TC_SCANA = 1; constexpr uint32_t TC_SCANB = 2; constexpr uint32_t TC_AGGRJ = 2; @@ -74,7 +74,6 @@ constexpr size_t MASK_STEP_SIZE = CHUNK_SIZE_ELEMENTS / MASK_ELEMENT_SIZE; static_assert(RUN_COUNT > 0); static_assert(TC_SCANB <= TC_AGGRJ); -static_assert(TC_AGGRJ % TC_SCANB == 0); static_assert(WL_SIZE_B % 16 == 0); static_assert(CHUNK_SIZE_B % 16 == 0); @@ -168,7 +167,7 @@ void process_timings( } void scan_b(size_t gid, size_t tid) { - constexpr size_t split = TC_AGGRJ / TC_SCANB; + constexpr size_t split = TC_AGGRJ / (TC_SCANB == 0 ? 1 : TC_SCANB); const size_t start = tid * split; const size_t end = start + split;