#pragma once #include "utils/memory_literals.h" #ifndef MODE_SET_BY_CMAKE #define MODE_DISTPREFETCH #endif constexpr size_t WL_SIZE_B = 4_GiB; constexpr uint32_t WARMUP_ITERATION_COUNT = 5; constexpr uint32_t ITERATION_COUNT = 5; constexpr int MEM_NODE_HBM = 8; constexpr int MEM_NODE_DRAM = 0; #ifdef MODE_PREFETCH constexpr uint32_t GROUP_COUNT = 32; constexpr size_t CHUNK_SIZE_B = 8_MiB; constexpr uint32_t TC_SCANA = 1; constexpr uint32_t TC_SCANB = 1; constexpr uint32_t TC_AGGRJ = 1; constexpr bool PERFORM_CACHING = true; constexpr int MEM_NODE_A = 0; constexpr int MEM_NODE_B = 0; constexpr char MODE_STRING[] = "prefetch"; #endif #ifdef MODE_DISTPREFETCH constexpr uint32_t GROUP_COUNT = 32; constexpr size_t CHUNK_SIZE_B = 8_MiB; constexpr uint32_t TC_SCANA = 1; constexpr uint32_t TC_SCANB = 1; constexpr uint32_t TC_AGGRJ = 1; constexpr bool PERFORM_CACHING = true; constexpr int MEM_NODE_A = 0; constexpr int MEM_NODE_B = 1; constexpr char MODE_STRING[] = "distprefetch"; #endif #ifdef MODE_DRAM constexpr size_t CHUNK_SIZE_B = 2_MiB; constexpr uint32_t GROUP_COUNT = 32; constexpr uint32_t TC_SCANA = 2; constexpr uint32_t TC_SCANB = 0; constexpr uint32_t TC_AGGRJ = 1; constexpr bool PERFORM_CACHING = false; constexpr int MEM_NODE_A = 0; constexpr int MEM_NODE_B = 0; constexpr char MODE_STRING[] = "dram"; #endif #ifdef MODE_HBM constexpr size_t CHUNK_SIZE_B = 2_MiB; constexpr uint32_t GROUP_COUNT = 32; constexpr uint32_t TC_SCANA = 4; constexpr uint32_t TC_SCANB = 0; constexpr uint32_t TC_AGGRJ = 1; constexpr bool PERFORM_CACHING = false; constexpr int MEM_NODE_A = 0; constexpr int MEM_NODE_B = 8; constexpr char MODE_STRING[] = "hbm"; #endif constexpr uint64_t CMP_A = 50; constexpr size_t WL_SIZE_ELEMENTS = WL_SIZE_B / sizeof(uint64_t); constexpr size_t CHUNK_COUNT = WL_SIZE_B / CHUNK_SIZE_B; constexpr size_t CHUNK_SIZE_ELEMENTS = CHUNK_SIZE_B / sizeof(uint64_t); constexpr size_t RUN_COUNT = CHUNK_COUNT / GROUP_COUNT; static_assert(RUN_COUNT > 0); static_assert(WL_SIZE_B % 16 == 0); static_assert(CHUNK_SIZE_B % 16 == 0); static_assert(PERFORM_CACHING ? TC_SCANB == TC_AGGRJ : true); constexpr size_t SCANA_CHUNK_SIZE_B = CHUNK_SIZE_B / TC_SCANA; constexpr size_t AGGRJ_CHUNK_SIZE_B = CHUNK_SIZE_B / TC_AGGRJ;