From 10a791dea1175d09cebfc2f902768e2e948e60e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Constantin=20F=C3=BCrst?= Date: Wed, 24 Jan 2024 23:02:34 +0100 Subject: [PATCH] remove the experimental code branches that turned out not to yield any benefit (sched-yield has too high delay and with the new load balancer, subchunking for aggrj is also not needed anymore) --- qdp_project/src/Benchmark.cpp | 53 +++++++---------------------------- 1 file changed, 10 insertions(+), 43 deletions(-) diff --git a/qdp_project/src/Benchmark.cpp b/qdp_project/src/Benchmark.cpp index ceabe9e..ecc1969 100644 --- a/qdp_project/src/Benchmark.cpp +++ b/qdp_project/src/Benchmark.cpp @@ -1,9 +1,5 @@ -#include -#include #include -#include #include -#include #include #include #include @@ -20,7 +16,7 @@ #include "BenchmarkHelpers.cpp" -#define MODE_HBM +#define MODE_PREFETCH //////////////////////////////// /// BENCHMARK SETUP @@ -30,15 +26,13 @@ constexpr uint32_t WARMUP_ITERATION_COUNT = 5; constexpr uint32_t ITERATION_COUNT = 5; #ifdef MODE_PREFETCH -constexpr uint32_t GROUP_COUNT = 8; -constexpr size_t CHUNK_SIZE_B = 16_MiB; -constexpr uint32_t TC_SCANA = 4; +constexpr uint32_t GROUP_COUNT = 16; +constexpr size_t CHUNK_SIZE_B = 8_MiB; +constexpr uint32_t TC_SCANA = 2; constexpr uint32_t TC_SCANB = 1; -constexpr uint32_t TC_AGGRJ = 4; +constexpr uint32_t TC_AGGRJ = 2; constexpr bool PERFORM_CACHING = true; -constexpr bool YIELD_ON_CACHE_MISS = false; constexpr bool DATA_IN_HBM = false; -constexpr bool AGGRJ_ITERATIVE = true; constexpr char MODE_STRING[] = "prefetch"; #endif #ifdef MODE_DRAM @@ -48,9 +42,7 @@ constexpr uint32_t TC_SCANA = 2; constexpr uint32_t TC_SCANB = 0; constexpr uint32_t TC_AGGRJ = 1; constexpr bool PERFORM_CACHING = false; -constexpr bool YIELD_ON_CACHE_MISS = false; constexpr bool DATA_IN_HBM = false; -constexpr bool AGGRJ_ITERATIVE = false; constexpr char MODE_STRING[] = "dram"; #endif #ifdef MODE_HBM @@ -60,9 +52,7 @@ constexpr uint32_t TC_SCANA = 2; constexpr uint32_t TC_SCANB = 0; constexpr uint32_t TC_AGGRJ = 1; constexpr bool PERFORM_CACHING = false; -constexpr bool YIELD_ON_CACHE_MISS = false; constexpr bool DATA_IN_HBM = true; -constexpr bool AGGRJ_ITERATIVE = false; constexpr char MODE_STRING[] = "hbm"; #endif @@ -254,12 +244,7 @@ void aggr_j(size_t gid, size_t tid) { THREAD_TIMING_[AGGRJ_TIMING_INDEX][tid * gid][0][TIME_STAMP_BEGIN] = std::chrono::steady_clock::now(); - if constexpr (AGGRJ_ITERATIVE) { - if (tid == 0) BARRIERS_[gid]->arrive_and_wait(); - } - else { - BARRIERS_[gid]->arrive_and_wait(); - } + BARRIERS_[gid]->arrive_and_wait(); THREAD_TIMING_[AGGRJ_TIMING_INDEX][tid * gid][0][TIME_STAMP_WAIT] = std::chrono::steady_clock::now(); @@ -278,7 +263,6 @@ void aggr_j(size_t gid, size_t tid) { if (data_ptr == nullptr) { data_ptr = chunk_ptr; - if constexpr (YIELD_ON_CACHE_MISS) sched_yield(); } else { CACHE_HITS_[gid * tid]++; @@ -294,16 +278,9 @@ void aggr_j(size_t gid, size_t tid) { THREAD_TIMING_[AGGRJ_TIMING_INDEX][tid * gid][0][TIME_STAMP_END] = std::chrono::steady_clock::now(); - if constexpr (!AGGRJ_ITERATIVE) { - BARRIERS_[gid]->arrive_and_drop(); - } + BARRIERS_[gid]->arrive_and_drop(); aggregation::happly(DATA_DST_ + (tid * GROUP_COUNT + gid), aggregator); - - if constexpr (AGGRJ_ITERATIVE) { - if (++tid < TC_AGGRJ) aggr_j(gid, tid); - else BARRIERS_[gid]->arrive_and_drop(); - } } int main() { @@ -349,12 +326,7 @@ int main() { std::vector agg_pool; for(uint32_t gid = 0; gid < GROUP_COUNT; ++gid) { - if constexpr (AGGRJ_ITERATIVE) { - BARRIERS_.emplace_back(new std::barrier(TC_SCANA + TC_SCANB + 1)); - } - else { - BARRIERS_.emplace_back(new std::barrier(TC_COMBINED)); - } + BARRIERS_.emplace_back(new std::barrier(TC_COMBINED)); for(uint32_t tid = 0; tid < TC_SCANA; ++tid) { filter_pool.emplace_back(scan_a, gid, tid); @@ -364,13 +336,8 @@ int main() { copy_pool.emplace_back(scan_b, gid, tid); } - if constexpr (AGGRJ_ITERATIVE) { - agg_pool.emplace_back(aggr_j, gid, 0); - } - else { - for(uint32_t tid = 0; tid < TC_AGGRJ; ++tid) { - agg_pool.emplace_back(aggr_j, gid, tid); - } + for(uint32_t tid = 0; tid < TC_AGGRJ; ++tid) { + agg_pool.emplace_back(aggr_j, gid, tid); } }