diff --git a/qdp_project/bench_max.sh b/qdp_project/bench_max.sh index f05aacd..6903863 100644 --- a/qdp_project/bench_max.sh +++ b/qdp_project/bench_max.sh @@ -3,7 +3,7 @@ current_date_time=$(date) echo "Benchmark start at: $current_date_time" -sudo numactl --cpunodebind=0 cmake-build-release/MAXBench +sudo numactl --cpunodebind=0 cmake-build-release/QDPBench current_date_time=$(date) echo "Benchmark end at: $current_date_time" diff --git a/qdp_project/results/qdp-xeonmax-simpleq-DramBase-tca4-tcb0-tcj2-tmul4-wl4294967296-cs1048576.csv b/qdp_project/results/qdp-xeonmax-simpleq-DramBase-tca4-tcb0-tcj2-tmul4-wl4294967296-cs1048576.csv new file mode 100644 index 0000000..0bb63a0 --- /dev/null +++ b/qdp_project/results/qdp-xeonmax-simpleq-DramBase-tca4-tcb0-tcj2-tmul4-wl4294967296-cs1048576.csv @@ -0,0 +1,6 @@ +run;time;result[0]; +5;72414484;0; +6;72695538;0; +7;70226208;0; +8;64734077;0; +9;70125966;0; diff --git a/qdp_project/results/qdp-xeonmax-simpleq-HbmPeak-tca4-tcb0-tcj2-tmul4-wl4294967296-cs1048576.csv b/qdp_project/results/qdp-xeonmax-simpleq-HbmPeak-tca4-tcb0-tcj2-tmul4-wl4294967296-cs1048576.csv new file mode 100644 index 0000000..10284fd --- /dev/null +++ b/qdp_project/results/qdp-xeonmax-simpleq-HbmPeak-tca4-tcb0-tcj2-tmul4-wl4294967296-cs1048576.csv @@ -0,0 +1,6 @@ +run;time;result[0]; +5;67105366;0; +6;64812778;0; +7;64667008;0; +8;66072639;0; +9;66492940;0; diff --git a/qdp_project/results/qdp-xeonmax-simpleq-Prefetch-tca4-tcb1-tcj2-tmul4-wl4294967296-cs134217728.csv b/qdp_project/results/qdp-xeonmax-simpleq-Prefetch-tca4-tcb1-tcj2-tmul4-wl4294967296-cs134217728.csv new file mode 100644 index 0000000..8a97119 --- /dev/null +++ b/qdp_project/results/qdp-xeonmax-simpleq-Prefetch-tca4-tcb1-tcj2-tmul4-wl4294967296-cs134217728.csv @@ -0,0 +1,6 @@ +run;time;result[0]; +5;4700269615;0; +6;4780414439;0; +7;4596210467;0; +8;4679035690;0; +9;4687101402;0; diff --git a/qdp_project/src/Benchmark.cpp b/qdp_project/src/Benchmark.cpp index 9ba1abb..4704240 100644 --- a/qdp_project/src/Benchmark.cpp +++ b/qdp_project/src/Benchmark.cpp @@ -19,25 +19,50 @@ #include "BenchmarkHelpers.cpp" +#define MODE_PREFETCH + //////////////////////////////// /// BENCHMARK SETUP -constexpr size_t WL_SIZE_B = 64_MiB; -constexpr size_t CHUNK_SIZE_B = 1_MiB; -constexpr uint64_t CMP_A = 50; -constexpr uint32_t WARMUP_ITERATION_COUNT = 0; -constexpr uint32_t ITERATION_COUNT = 2; -constexpr uint32_t GROUP_COUNT = 2; +constexpr size_t WL_SIZE_B = 4_GiB; +constexpr uint32_t WARMUP_ITERATION_COUNT = 5; +constexpr uint32_t ITERATION_COUNT = 5; + +#ifdef MODE_PREFETCH +constexpr size_t CHUNK_SIZE_B = 128_MiB; +constexpr uint32_t GROUP_COUNT = 4; +constexpr uint32_t TC_SCANA = 4; constexpr uint32_t TC_SCANB = 1; -constexpr uint32_t TC_SCANA = 1; -constexpr uint32_t TC_AGGRJ = 1; +constexpr uint32_t TC_AGGRJ = 2; constexpr bool PERFORM_CACHING = true; constexpr bool DATA_IN_HBM = false; -constexpr char MODE_STRING[] = "DramBase"; +constexpr char MODE_STRING[] = "Dram"; +#endif +#ifdef MODE_DRAM +constexpr size_t CHUNK_SIZE_B = 2_MiB; +constexpr uint32_t GROUP_COUNT = 4; +constexpr uint32_t TC_SCANA = 4; +constexpr uint32_t TC_SCANB = 0; +constexpr uint32_t TC_AGGRJ = 2; +constexpr bool PERFORM_CACHING = false; +constexpr bool DATA_IN_HBM = false; +constexpr char MODE_STRING[] = "Prefetch"; +#endif +#ifdef MODE_HBM +constexpr size_t CHUNK_SIZE_B = 2_MiB; +constexpr uint32_t GROUP_COUNT = 4; +constexpr uint32_t TC_SCANA = 4; +constexpr uint32_t TC_SCANB = 0; +constexpr uint32_t TC_AGGRJ = 2; +constexpr bool PERFORM_CACHING = false; +constexpr bool DATA_IN_HBM = true; +constexpr char MODE_STRING[] = "Hbm"; +#endif /// DO NOT CONFIGURE BEYOND THIS //////////////////////////////// +constexpr uint64_t CMP_A = 50; constexpr uint32_t TC_COMBINED = TC_SCANA + TC_SCANB + TC_AGGRJ; constexpr size_t WL_SIZE_ELEMENTS = WL_SIZE_B / sizeof(uint64_t); constexpr size_t CHUNK_COUNT = WL_SIZE_B / CHUNK_SIZE_B; @@ -208,4 +233,4 @@ int main() { numa_free(DATA_DST_, TC_AGGRJ * GROUP_COUNT * sizeof(uint64_t)); return 0; -} \ No newline at end of file +}