From 52132522a31cd6f68b380dbd88587edb8c9d991e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Constantin=20F=C3=BCrst?= Date: Tue, 23 Jan 2024 22:09:58 +0100 Subject: [PATCH] fix scanb not working with less than aggrj threads --- qdp_project/src/Benchmark.cpp | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/qdp_project/src/Benchmark.cpp b/qdp_project/src/Benchmark.cpp index 48f03ee..f68bbd5 100644 --- a/qdp_project/src/Benchmark.cpp +++ b/qdp_project/src/Benchmark.cpp @@ -28,10 +28,10 @@ constexpr uint64_t CMP_A = 50; constexpr uint32_t WARMUP_ITERATION_COUNT = 0; constexpr uint32_t ITERATION_COUNT = 2; constexpr uint32_t GROUP_COUNT = 2; -constexpr uint32_t TC_SCANA = 1; constexpr uint32_t TC_SCANB = 1; +constexpr uint32_t TC_SCANA = 1; constexpr uint32_t TC_AGGRJ = 1; -constexpr bool PERFORM_CACHING = false; +constexpr bool PERFORM_CACHING = true; constexpr bool DATA_IN_HBM = false; constexpr char MODE_STRING[] = "DramBase"; @@ -59,19 +59,19 @@ uint64_t* DATA_DST_; void scan_b(size_t gid, size_t tid) { LAUNCH_.wait(); - uint32_t runs = CHUNK_COUNT / GROUP_COUNT + (CHUNK_COUNT % GROUP_COUNT > gid); - - std::unique_ptr data; + if constexpr (PERFORM_CACHING) { + uint32_t runs = CHUNK_COUNT / GROUP_COUNT + (CHUNK_COUNT % GROUP_COUNT > gid); - for(uint32_t i = 0; i < runs; ++i) { - // calculate pointers - size_t chunk_id = gid + GROUP_COUNT * i; - uint64_t* chunk_ptr = get_sub_chunk_ptr(DATA_B_, chunk_id, CHUNK_SIZE_ELEMENTS, tid, TC_SCANB); + std::unique_ptr data; - if constexpr (PERFORM_CACHING) { - data = CACHE_.Access(reinterpret_cast(chunk_ptr), CHUNK_SIZE_B / TC_SCANB); - data->WaitOnCompletion(); + for(uint32_t i = 0; i < runs; ++i) { + // calculate pointers + size_t chunk_id = gid + GROUP_COUNT * i; + uint64_t* chunk_ptr = get_sub_chunk_ptr(DATA_B_, chunk_id, CHUNK_SIZE_ELEMENTS, tid, TC_AGGRJ); + data = CACHE_.Access(reinterpret_cast(chunk_ptr), CHUNK_SIZE_B / TC_AGGRJ); } + + data->WaitOnCompletion(); } BARRIERS_[gid]->arrive_and_drop(); @@ -204,7 +204,7 @@ int main() { for(std::thread& t : copy_pool) { t.join(); } for(std::thread& t : agg_pool) { t.join(); } - Aggregation::apply(DATA_DST_, DATA_DST_, sizeof(uint64_t) * TC_AGGRJ * GROUP_COUNT); + Aggregation::apply(DATA_nDST_, DATA_DST_, sizeof(uint64_t) * TC_AGGRJ * GROUP_COUNT); const auto time_end = std::chrono::steady_clock::now();