|
@ -28,10 +28,10 @@ constexpr uint64_t CMP_A = 50; |
|
|
constexpr uint32_t WARMUP_ITERATION_COUNT = 0; |
|
|
constexpr uint32_t WARMUP_ITERATION_COUNT = 0; |
|
|
constexpr uint32_t ITERATION_COUNT = 2; |
|
|
constexpr uint32_t ITERATION_COUNT = 2; |
|
|
constexpr uint32_t GROUP_COUNT = 2; |
|
|
constexpr uint32_t GROUP_COUNT = 2; |
|
|
constexpr uint32_t TC_SCANA = 1; |
|
|
|
|
|
constexpr uint32_t TC_SCANB = 1; |
|
|
constexpr uint32_t TC_SCANB = 1; |
|
|
|
|
|
constexpr uint32_t TC_SCANA = 1; |
|
|
constexpr uint32_t TC_AGGRJ = 1; |
|
|
constexpr uint32_t TC_AGGRJ = 1; |
|
|
constexpr bool PERFORM_CACHING = false; |
|
|
|
|
|
|
|
|
constexpr bool PERFORM_CACHING = true; |
|
|
constexpr bool DATA_IN_HBM = false; |
|
|
constexpr bool DATA_IN_HBM = false; |
|
|
constexpr char MODE_STRING[] = "DramBase"; |
|
|
constexpr char MODE_STRING[] = "DramBase"; |
|
|
|
|
|
|
|
@ -59,19 +59,19 @@ uint64_t* DATA_DST_; |
|
|
void scan_b(size_t gid, size_t tid) { |
|
|
void scan_b(size_t gid, size_t tid) { |
|
|
LAUNCH_.wait(); |
|
|
LAUNCH_.wait(); |
|
|
|
|
|
|
|
|
uint32_t runs = CHUNK_COUNT / GROUP_COUNT + (CHUNK_COUNT % GROUP_COUNT > gid); |
|
|
|
|
|
|
|
|
|
|
|
std::unique_ptr<dsacache::CacheData> data; |
|
|
|
|
|
|
|
|
if constexpr (PERFORM_CACHING) { |
|
|
|
|
|
uint32_t runs = CHUNK_COUNT / GROUP_COUNT + (CHUNK_COUNT % GROUP_COUNT > gid); |
|
|
|
|
|
|
|
|
for(uint32_t i = 0; i < runs; ++i) { |
|
|
|
|
|
// calculate pointers
|
|
|
|
|
|
size_t chunk_id = gid + GROUP_COUNT * i; |
|
|
|
|
|
uint64_t* chunk_ptr = get_sub_chunk_ptr(DATA_B_, chunk_id, CHUNK_SIZE_ELEMENTS, tid, TC_SCANB); |
|
|
|
|
|
|
|
|
std::unique_ptr<dsacache::CacheData> data; |
|
|
|
|
|
|
|
|
if constexpr (PERFORM_CACHING) { |
|
|
|
|
|
data = CACHE_.Access(reinterpret_cast<uint8_t *>(chunk_ptr), CHUNK_SIZE_B / TC_SCANB); |
|
|
|
|
|
data->WaitOnCompletion(); |
|
|
|
|
|
|
|
|
for(uint32_t i = 0; i < runs; ++i) { |
|
|
|
|
|
// calculate pointers
|
|
|
|
|
|
size_t chunk_id = gid + GROUP_COUNT * i; |
|
|
|
|
|
uint64_t* chunk_ptr = get_sub_chunk_ptr(DATA_B_, chunk_id, CHUNK_SIZE_ELEMENTS, tid, TC_AGGRJ); |
|
|
|
|
|
data = CACHE_.Access(reinterpret_cast<uint8_t *>(chunk_ptr), CHUNK_SIZE_B / TC_AGGRJ); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
data->WaitOnCompletion(); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
BARRIERS_[gid]->arrive_and_drop(); |
|
|
BARRIERS_[gid]->arrive_and_drop(); |
|
@ -204,7 +204,7 @@ int main() { |
|
|
for(std::thread& t : copy_pool) { t.join(); } |
|
|
for(std::thread& t : copy_pool) { t.join(); } |
|
|
for(std::thread& t : agg_pool) { t.join(); } |
|
|
for(std::thread& t : agg_pool) { t.join(); } |
|
|
|
|
|
|
|
|
Aggregation<uint64_t, Sum, load_mode::Aligned>::apply(DATA_DST_, DATA_DST_, sizeof(uint64_t) * TC_AGGRJ * GROUP_COUNT); |
|
|
|
|
|
|
|
|
Aggregation<uint64_t, Sum, load_mode::Aligned>::apply(DATA_nDST_, DATA_DST_, sizeof(uint64_t) * TC_AGGRJ * GROUP_COUNT); |
|
|
|
|
|
|
|
|
const auto time_end = std::chrono::steady_clock::now(); |
|
|
const auto time_end = std::chrono::steady_clock::now(); |
|
|
|
|
|
|
|
|