|
@ -42,12 +42,12 @@ void scan_b(size_t gid, size_t tid) { |
|
|
constexpr size_t SUBCHUNK_COUNT = TC_AGGRJ / (TC_SCANB == 0 ? 1 : TC_SCANB); |
|
|
constexpr size_t SUBCHUNK_COUNT = TC_AGGRJ / (TC_SCANB == 0 ? 1 : TC_SCANB); |
|
|
constexpr size_t SUBCHUNK_SIZE_B = CHUNK_SIZE_B / SUBCHUNK_COUNT; |
|
|
constexpr size_t SUBCHUNK_SIZE_B = CHUNK_SIZE_B / SUBCHUNK_COUNT; |
|
|
|
|
|
|
|
|
THREAD_TIMING_[SCANB_TIMING_INDEX][tid * gid].clear(); |
|
|
|
|
|
THREAD_TIMING_[SCANB_TIMING_INDEX][tid * gid].resize(1); |
|
|
|
|
|
|
|
|
THREAD_TIMING_[SCANB_TIMING_INDEX][UniqueIndex(gid,tid)].clear(); |
|
|
|
|
|
THREAD_TIMING_[SCANB_TIMING_INDEX][UniqueIndex(gid,tid)].resize(1); |
|
|
|
|
|
|
|
|
LAUNCH_.wait(); |
|
|
LAUNCH_.wait(); |
|
|
|
|
|
|
|
|
THREAD_TIMING_[SCANB_TIMING_INDEX][tid * gid][0][TIME_STAMP_BEGIN] = std::chrono::steady_clock::now(); |
|
|
|
|
|
|
|
|
THREAD_TIMING_[SCANB_TIMING_INDEX][UniqueIndex(gid,tid)][0][TIME_STAMP_BEGIN] = std::chrono::steady_clock::now(); |
|
|
|
|
|
|
|
|
if constexpr (PERFORM_CACHING) { |
|
|
if constexpr (PERFORM_CACHING) { |
|
|
for (size_t i = 0; i < RUN_COUNT; i++) { |
|
|
for (size_t i = 0; i < RUN_COUNT; i++) { |
|
@ -71,19 +71,19 @@ void scan_b(size_t gid, size_t tid) { |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
THREAD_TIMING_[SCANB_TIMING_INDEX][tid * gid][0][TIME_STAMP_WAIT] = std::chrono::steady_clock::now(); |
|
|
|
|
|
THREAD_TIMING_[SCANB_TIMING_INDEX][tid * gid][0][TIME_STAMP_END] = std::chrono::steady_clock::now(); |
|
|
|
|
|
|
|
|
THREAD_TIMING_[SCANB_TIMING_INDEX][UniqueIndex(gid,tid)][0][TIME_STAMP_WAIT] = std::chrono::steady_clock::now(); |
|
|
|
|
|
THREAD_TIMING_[SCANB_TIMING_INDEX][UniqueIndex(gid,tid)][0][TIME_STAMP_END] = std::chrono::steady_clock::now(); |
|
|
|
|
|
|
|
|
BARRIERS_[gid]->arrive_and_drop(); |
|
|
BARRIERS_[gid]->arrive_and_drop(); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
void scan_a(size_t gid, size_t tid) { |
|
|
void scan_a(size_t gid, size_t tid) { |
|
|
THREAD_TIMING_[SCANA_TIMING_INDEX][tid * gid].clear(); |
|
|
|
|
|
THREAD_TIMING_[SCANA_TIMING_INDEX][tid * gid].resize(1); |
|
|
|
|
|
|
|
|
THREAD_TIMING_[SCANA_TIMING_INDEX][UniqueIndex(gid,tid)].clear(); |
|
|
|
|
|
THREAD_TIMING_[SCANA_TIMING_INDEX][UniqueIndex(gid,tid)].resize(1); |
|
|
|
|
|
|
|
|
LAUNCH_.wait(); |
|
|
LAUNCH_.wait(); |
|
|
|
|
|
|
|
|
THREAD_TIMING_[SCANA_TIMING_INDEX][tid * gid][0][TIME_STAMP_BEGIN] = std::chrono::steady_clock::now(); |
|
|
|
|
|
|
|
|
THREAD_TIMING_[SCANA_TIMING_INDEX][UniqueIndex(gid,tid)][0][TIME_STAMP_BEGIN] = std::chrono::steady_clock::now(); |
|
|
|
|
|
|
|
|
for (size_t i = 0; i < RUN_COUNT; i++) { |
|
|
for (size_t i = 0; i < RUN_COUNT; i++) { |
|
|
const size_t chunk_index = get_chunk_index(gid, i); |
|
|
const size_t chunk_index = get_chunk_index(gid, i); |
|
@ -93,27 +93,27 @@ void scan_a(size_t gid, size_t tid) { |
|
|
filter::apply_same(mask_ptr, nullptr, chunk_ptr, CMP_A, CHUNK_SIZE_B / TC_SCANA); |
|
|
filter::apply_same(mask_ptr, nullptr, chunk_ptr, CMP_A, CHUNK_SIZE_B / TC_SCANA); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
THREAD_TIMING_[SCANA_TIMING_INDEX][tid * gid][0][TIME_STAMP_WAIT] = std::chrono::steady_clock::now(); |
|
|
|
|
|
THREAD_TIMING_[SCANA_TIMING_INDEX][tid * gid][0][TIME_STAMP_END] = std::chrono::steady_clock::now(); |
|
|
|
|
|
|
|
|
THREAD_TIMING_[SCANA_TIMING_INDEX][UniqueIndex(gid,tid)][0][TIME_STAMP_WAIT] = std::chrono::steady_clock::now(); |
|
|
|
|
|
THREAD_TIMING_[SCANA_TIMING_INDEX][UniqueIndex(gid,tid)][0][TIME_STAMP_END] = std::chrono::steady_clock::now(); |
|
|
|
|
|
|
|
|
BARRIERS_[gid]->arrive_and_drop(); |
|
|
BARRIERS_[gid]->arrive_and_drop(); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
void aggr_j(size_t gid, size_t tid) { |
|
|
void aggr_j(size_t gid, size_t tid) { |
|
|
CACHE_HITS_[gid * tid] = 0; |
|
|
|
|
|
|
|
|
CACHE_HITS_[UniqueIndex(gid,tid)] = 0; |
|
|
|
|
|
|
|
|
THREAD_TIMING_[AGGRJ_TIMING_INDEX][tid * gid].clear(); |
|
|
|
|
|
THREAD_TIMING_[AGGRJ_TIMING_INDEX][tid * gid].resize(1); |
|
|
|
|
|
|
|
|
THREAD_TIMING_[AGGRJ_TIMING_INDEX][UniqueIndex(gid,tid)].clear(); |
|
|
|
|
|
THREAD_TIMING_[AGGRJ_TIMING_INDEX][UniqueIndex(gid,tid)].resize(1); |
|
|
|
|
|
|
|
|
__m512i aggregator = aggregation::OP::zero(); |
|
|
__m512i aggregator = aggregation::OP::zero(); |
|
|
|
|
|
|
|
|
LAUNCH_.wait(); |
|
|
LAUNCH_.wait(); |
|
|
|
|
|
|
|
|
THREAD_TIMING_[AGGRJ_TIMING_INDEX][tid * gid][0][TIME_STAMP_BEGIN] = std::chrono::steady_clock::now(); |
|
|
|
|
|
|
|
|
THREAD_TIMING_[AGGRJ_TIMING_INDEX][UniqueIndex(gid,tid)][0][TIME_STAMP_BEGIN] = std::chrono::steady_clock::now(); |
|
|
|
|
|
|
|
|
BARRIERS_[gid]->arrive_and_wait(); |
|
|
BARRIERS_[gid]->arrive_and_wait(); |
|
|
|
|
|
|
|
|
THREAD_TIMING_[AGGRJ_TIMING_INDEX][tid * gid][0][TIME_STAMP_WAIT] = std::chrono::steady_clock::now(); |
|
|
|
|
|
|
|
|
THREAD_TIMING_[AGGRJ_TIMING_INDEX][UniqueIndex(gid,tid)][0][TIME_STAMP_WAIT] = std::chrono::steady_clock::now(); |
|
|
|
|
|
|
|
|
for (size_t i = 0; i < RUN_COUNT; i++) { |
|
|
for (size_t i = 0; i < RUN_COUNT; i++) { |
|
|
const size_t chunk_index = get_chunk_index(gid, i); |
|
|
const size_t chunk_index = get_chunk_index(gid, i); |
|
@ -133,7 +133,7 @@ void aggr_j(size_t gid, size_t tid) { |
|
|
data_ptr = chunk_ptr; |
|
|
data_ptr = chunk_ptr; |
|
|
} |
|
|
} |
|
|
else { |
|
|
else { |
|
|
CACHE_HITS_[gid * tid]++; |
|
|
|
|
|
|
|
|
CACHE_HITS_[UniqueIndex(gid,tid)]++; |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
else { |
|
|
else { |
|
@ -150,11 +150,11 @@ void aggr_j(size_t gid, size_t tid) { |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
THREAD_TIMING_[AGGRJ_TIMING_INDEX][tid * gid][0][TIME_STAMP_END] = std::chrono::steady_clock::now(); |
|
|
|
|
|
|
|
|
THREAD_TIMING_[AGGRJ_TIMING_INDEX][UniqueIndex(gid,tid)][0][TIME_STAMP_END] = std::chrono::steady_clock::now(); |
|
|
|
|
|
|
|
|
BARRIERS_[gid]->arrive_and_drop(); |
|
|
BARRIERS_[gid]->arrive_and_drop(); |
|
|
|
|
|
|
|
|
aggregation::happly(&DATA_DST_[gid * tid], aggregator); |
|
|
|
|
|
|
|
|
aggregation::happly(&DATA_DST_[UniqueIndex(gid,tid)], aggregator); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
int main() { |
|
|
int main() { |
|
|