|
@ -81,6 +81,9 @@ void scan_b(size_t gid, size_t tid) { |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
THREAD_TIMING_[SCANB_TIMING_INDEX][UniqueIndex(gid,tid)][0][TIME_STAMP_WAIT] = std::chrono::steady_clock::now(); |
|
|
THREAD_TIMING_[SCANB_TIMING_INDEX][UniqueIndex(gid,tid)][0][TIME_STAMP_WAIT] = std::chrono::steady_clock::now(); |
|
|
|
|
|
|
|
|
|
|
|
BARRIERS_[gid]->arrive_and_drop(); |
|
|
|
|
|
|
|
|
THREAD_TIMING_[SCANB_TIMING_INDEX][UniqueIndex(gid,tid)][0][TIME_STAMP_END] = std::chrono::steady_clock::now(); |
|
|
THREAD_TIMING_[SCANB_TIMING_INDEX][UniqueIndex(gid,tid)][0][TIME_STAMP_END] = std::chrono::steady_clock::now(); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
@ -155,7 +158,6 @@ void aggr_j(size_t gid, size_t tid) { |
|
|
|
|
|
|
|
|
uint64_t tmp = _mm512_reduce_add_epi64(aggregator); |
|
|
uint64_t tmp = _mm512_reduce_add_epi64(aggregator); |
|
|
aggregator = aggregation::apply_masked(aggregator, data_ptr, mask_ptr_a, SUBCHUNK_SIZE_B); |
|
|
aggregator = aggregation::apply_masked(aggregator, data_ptr, mask_ptr_a, SUBCHUNK_SIZE_B); |
|
|
|
|
|
|
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
THREAD_TIMING_[AGGRJ_TIMING_INDEX][UniqueIndex(gid,tid)][0][TIME_STAMP_END] = std::chrono::steady_clock::now(); |
|
|
THREAD_TIMING_[AGGRJ_TIMING_INDEX][UniqueIndex(gid,tid)][0][TIME_STAMP_END] = std::chrono::steady_clock::now(); |
|
@ -209,7 +211,7 @@ int main() { |
|
|
std::vector<std::thread> agg_pool; |
|
|
std::vector<std::thread> agg_pool; |
|
|
|
|
|
|
|
|
for(uint32_t gid = 0; gid < GROUP_COUNT; ++gid) { |
|
|
for(uint32_t gid = 0; gid < GROUP_COUNT; ++gid) { |
|
|
BARRIERS_.emplace_back(new std::barrier<NopStruct>(TC_SCANA + TC_AGGRJ)); |
|
|
|
|
|
|
|
|
BARRIERS_.emplace_back(new std::barrier<NopStruct>(TC_SCANA + TC_AGGRJ + TC_SCANB)); |
|
|
|
|
|
|
|
|
for(uint32_t tid = 0; tid < TC_SCANA; ++tid) { |
|
|
for(uint32_t tid = 0; tid < TC_SCANA; ++tid) { |
|
|
filter_pool.emplace_back(scan_a, gid, tid); |
|
|
filter_pool.emplace_back(scan_a, gid, tid); |
|
|