diff --git a/qdp_project/src/Benchmark.cpp b/qdp_project/src/Benchmark.cpp index ecc1969..0934f7d 100644 --- a/qdp_project/src/Benchmark.cpp +++ b/qdp_project/src/Benchmark.cpp @@ -32,27 +32,27 @@ constexpr uint32_t TC_SCANA = 2; constexpr uint32_t TC_SCANB = 1; constexpr uint32_t TC_AGGRJ = 2; constexpr bool PERFORM_CACHING = true; -constexpr bool DATA_IN_HBM = false; +constexpr bool DATA_B_IN_HBM = false; constexpr char MODE_STRING[] = "prefetch"; #endif #ifdef MODE_DRAM constexpr size_t CHUNK_SIZE_B = 2_MiB; -constexpr uint32_t GROUP_COUNT = 8; -constexpr uint32_t TC_SCANA = 2; +constexpr uint32_t GROUP_COUNT = 32; +constexpr uint32_t TC_SCANA = 1; constexpr uint32_t TC_SCANB = 0; constexpr uint32_t TC_AGGRJ = 1; constexpr bool PERFORM_CACHING = false; -constexpr bool DATA_IN_HBM = false; +constexpr bool DATA_B_IN_HBM = false; constexpr char MODE_STRING[] = "dram"; #endif #ifdef MODE_HBM constexpr size_t CHUNK_SIZE_B = 2_MiB; -constexpr uint32_t GROUP_COUNT = 8; -constexpr uint32_t TC_SCANA = 2; +constexpr uint32_t GROUP_COUNT = 32; +constexpr uint32_t TC_SCANA = 1; constexpr uint32_t TC_SCANB = 0; constexpr uint32_t TC_AGGRJ = 1; constexpr bool PERFORM_CACHING = false; -constexpr bool DATA_IN_HBM = true; +constexpr bool DATA_B_IN_HBM = true; constexpr char MODE_STRING[] = "hbm"; #endif @@ -297,25 +297,23 @@ int main() { fout << "run;rt-ns;rt-s;result[0];scana-run;scana-wait;scanb-run;scanb-wait;aggrj-run;aggrj-wait;cache-hr;" << std::endl; - if constexpr (DATA_IN_HBM) { - DATA_A_ = (uint64_t*) numa_alloc_onnode(WL_SIZE_B, cache_node); + if constexpr (DATA_B_IN_HBM) { DATA_B_ = (uint64_t*) numa_alloc_onnode(WL_SIZE_B, cache_node); - MASK_A_ = (uint16_t*) numa_alloc_onnode(WL_SIZE_ELEMENTS, cache_node); - DATA_DST_ = (uint64_t*) numa_alloc_onnode(TC_AGGRJ * GROUP_COUNT * sizeof(uint64_t), cache_node); } else { - DATA_A_ = (uint64_t*) numa_alloc_local(WL_SIZE_B); DATA_B_ = (uint64_t*) numa_alloc_local(WL_SIZE_B); - MASK_A_ = (uint16_t*) numa_alloc_local(WL_SIZE_ELEMENTS); - DATA_DST_ = (uint64_t*) numa_alloc_local(TC_AGGRJ * GROUP_COUNT * sizeof(uint64_t)); } + DATA_A_ = (uint64_t*) numa_alloc_local(WL_SIZE_B); + MASK_A_ = (uint16_t*) numa_alloc_local(WL_SIZE_ELEMENTS); + DATA_DST_ = (uint64_t*) numa_alloc_local(TC_AGGRJ * GROUP_COUNT * sizeof(uint64_t)); + if constexpr (PERFORM_CACHING) { CACHE_.Init(CachePlacementPolicy, CopyMethodPolicy); } fill_mt(DATA_A_, WL_SIZE_B, 0, 100, 42); - fill_mt(DATA_A_, WL_SIZE_B, 0, 100, 420); + fill_mt(DATA_B_, WL_SIZE_B, 0, 100, 420); for (uint32_t i = 0; i < ITERATION_COUNT + WARMUP_ITERATION_COUNT; i++) { std::promise launch_promise;