From bb1d20924aed41d34fd970f8308cb5955a874138 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Constantin=20F=C3=BCrst?= Date: Thu, 25 Jan 2024 16:09:05 +0100 Subject: [PATCH] fix index clash for thread-and-group unique indexing --- qdp_project/src/Benchmark.cpp | 36 +++++++++++----------- qdp_project/src/utils/BenchmarkHelpers.cpp | 4 +++ 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/qdp_project/src/Benchmark.cpp b/qdp_project/src/Benchmark.cpp index cabb8aa..d895635 100644 --- a/qdp_project/src/Benchmark.cpp +++ b/qdp_project/src/Benchmark.cpp @@ -42,12 +42,12 @@ void scan_b(size_t gid, size_t tid) { constexpr size_t SUBCHUNK_COUNT = TC_AGGRJ / (TC_SCANB == 0 ? 1 : TC_SCANB); constexpr size_t SUBCHUNK_SIZE_B = CHUNK_SIZE_B / SUBCHUNK_COUNT; - THREAD_TIMING_[SCANB_TIMING_INDEX][tid * gid].clear(); - THREAD_TIMING_[SCANB_TIMING_INDEX][tid * gid].resize(1); + THREAD_TIMING_[SCANB_TIMING_INDEX][UniqueIndex(gid,tid)].clear(); + THREAD_TIMING_[SCANB_TIMING_INDEX][UniqueIndex(gid,tid)].resize(1); LAUNCH_.wait(); - THREAD_TIMING_[SCANB_TIMING_INDEX][tid * gid][0][TIME_STAMP_BEGIN] = std::chrono::steady_clock::now(); + THREAD_TIMING_[SCANB_TIMING_INDEX][UniqueIndex(gid,tid)][0][TIME_STAMP_BEGIN] = std::chrono::steady_clock::now(); if constexpr (PERFORM_CACHING) { for (size_t i = 0; i < RUN_COUNT; i++) { @@ -71,19 +71,19 @@ void scan_b(size_t gid, size_t tid) { } } - THREAD_TIMING_[SCANB_TIMING_INDEX][tid * gid][0][TIME_STAMP_WAIT] = std::chrono::steady_clock::now(); - THREAD_TIMING_[SCANB_TIMING_INDEX][tid * gid][0][TIME_STAMP_END] = std::chrono::steady_clock::now(); + THREAD_TIMING_[SCANB_TIMING_INDEX][UniqueIndex(gid,tid)][0][TIME_STAMP_WAIT] = std::chrono::steady_clock::now(); + THREAD_TIMING_[SCANB_TIMING_INDEX][UniqueIndex(gid,tid)][0][TIME_STAMP_END] = std::chrono::steady_clock::now(); BARRIERS_[gid]->arrive_and_drop(); } void scan_a(size_t gid, size_t tid) { - THREAD_TIMING_[SCANA_TIMING_INDEX][tid * gid].clear(); - THREAD_TIMING_[SCANA_TIMING_INDEX][tid * gid].resize(1); + THREAD_TIMING_[SCANA_TIMING_INDEX][UniqueIndex(gid,tid)].clear(); + THREAD_TIMING_[SCANA_TIMING_INDEX][UniqueIndex(gid,tid)].resize(1); LAUNCH_.wait(); - THREAD_TIMING_[SCANA_TIMING_INDEX][tid * gid][0][TIME_STAMP_BEGIN] = std::chrono::steady_clock::now(); + THREAD_TIMING_[SCANA_TIMING_INDEX][UniqueIndex(gid,tid)][0][TIME_STAMP_BEGIN] = std::chrono::steady_clock::now(); for (size_t i = 0; i < RUN_COUNT; i++) { const size_t chunk_index = get_chunk_index(gid, i); @@ -93,27 +93,27 @@ void scan_a(size_t gid, size_t tid) { filter::apply_same(mask_ptr, nullptr, chunk_ptr, CMP_A, CHUNK_SIZE_B / TC_SCANA); } - THREAD_TIMING_[SCANA_TIMING_INDEX][tid * gid][0][TIME_STAMP_WAIT] = std::chrono::steady_clock::now(); - THREAD_TIMING_[SCANA_TIMING_INDEX][tid * gid][0][TIME_STAMP_END] = std::chrono::steady_clock::now(); + THREAD_TIMING_[SCANA_TIMING_INDEX][UniqueIndex(gid,tid)][0][TIME_STAMP_WAIT] = std::chrono::steady_clock::now(); + THREAD_TIMING_[SCANA_TIMING_INDEX][UniqueIndex(gid,tid)][0][TIME_STAMP_END] = std::chrono::steady_clock::now(); BARRIERS_[gid]->arrive_and_drop(); } void aggr_j(size_t gid, size_t tid) { - CACHE_HITS_[gid * tid] = 0; + CACHE_HITS_[UniqueIndex(gid,tid)] = 0; - THREAD_TIMING_[AGGRJ_TIMING_INDEX][tid * gid].clear(); - THREAD_TIMING_[AGGRJ_TIMING_INDEX][tid * gid].resize(1); + THREAD_TIMING_[AGGRJ_TIMING_INDEX][UniqueIndex(gid,tid)].clear(); + THREAD_TIMING_[AGGRJ_TIMING_INDEX][UniqueIndex(gid,tid)].resize(1); __m512i aggregator = aggregation::OP::zero(); LAUNCH_.wait(); - THREAD_TIMING_[AGGRJ_TIMING_INDEX][tid * gid][0][TIME_STAMP_BEGIN] = std::chrono::steady_clock::now(); + THREAD_TIMING_[AGGRJ_TIMING_INDEX][UniqueIndex(gid,tid)][0][TIME_STAMP_BEGIN] = std::chrono::steady_clock::now(); BARRIERS_[gid]->arrive_and_wait(); - THREAD_TIMING_[AGGRJ_TIMING_INDEX][tid * gid][0][TIME_STAMP_WAIT] = std::chrono::steady_clock::now(); + THREAD_TIMING_[AGGRJ_TIMING_INDEX][UniqueIndex(gid,tid)][0][TIME_STAMP_WAIT] = std::chrono::steady_clock::now(); for (size_t i = 0; i < RUN_COUNT; i++) { const size_t chunk_index = get_chunk_index(gid, i); @@ -133,7 +133,7 @@ void aggr_j(size_t gid, size_t tid) { data_ptr = chunk_ptr; } else { - CACHE_HITS_[gid * tid]++; + CACHE_HITS_[UniqueIndex(gid,tid)]++; } } else { @@ -150,11 +150,11 @@ void aggr_j(size_t gid, size_t tid) { } } - THREAD_TIMING_[AGGRJ_TIMING_INDEX][tid * gid][0][TIME_STAMP_END] = std::chrono::steady_clock::now(); + THREAD_TIMING_[AGGRJ_TIMING_INDEX][UniqueIndex(gid,tid)][0][TIME_STAMP_END] = std::chrono::steady_clock::now(); BARRIERS_[gid]->arrive_and_drop(); - aggregation::happly(&DATA_DST_[gid * tid], aggregator); + aggregation::happly(&DATA_DST_[UniqueIndex(gid,tid)], aggregator); } int main() { diff --git a/qdp_project/src/utils/BenchmarkHelpers.cpp b/qdp_project/src/utils/BenchmarkHelpers.cpp index 713b925..00727f5 100644 --- a/qdp_project/src/utils/BenchmarkHelpers.cpp +++ b/qdp_project/src/utils/BenchmarkHelpers.cpp @@ -13,6 +13,10 @@ std::array CACHE_HITS_; +inline size_t UniqueIndex(const uint32_t gid, const uint32_t tid) { + return tid * GROUP_COUNT + gid; +} + uint64_t sum_check(uint64_t compare_value, uint64_t* row_A, uint64_t* row_B, size_t row_size) { uint64_t sum = 0; for(int i = 0; i < row_size / sizeof(uint64_t); ++i) {