|
|
@ -179,20 +179,20 @@ void scan_b(size_t gid, size_t tid) { |
|
|
|
|
|
|
|
if constexpr (PERFORM_CACHING) { |
|
|
|
for (size_t i = start; i < end; i++) { |
|
|
|
THREAD_TIMING_[AGGRJ_TIMING_INDEX][tid * gid][i][TIME_STAMP_BEGIN] = std::chrono::steady_clock::now(); |
|
|
|
THREAD_TIMING_[SCANB_TIMING_INDEX][tid * gid][i][TIME_STAMP_BEGIN] = std::chrono::steady_clock::now(); |
|
|
|
|
|
|
|
const size_t chunk_index = get_chunk_index(gid, 0); |
|
|
|
uint64_t* chunk_ptr = get_chunk<TC_AGGRJ>(DATA_B_, chunk_index, i); |
|
|
|
|
|
|
|
const auto data = CACHE_.Access(reinterpret_cast<uint8_t*>(chunk_ptr), CHUNK_SIZE_B / TC_AGGRJ); |
|
|
|
|
|
|
|
THREAD_TIMING_[AGGRJ_TIMING_INDEX][tid * gid][i][TIME_STAMP_WAIT] = std::chrono::steady_clock::now(); |
|
|
|
THREAD_TIMING_[SCANB_TIMING_INDEX][tid * gid][i][TIME_STAMP_WAIT] = std::chrono::steady_clock::now(); |
|
|
|
|
|
|
|
BARRIERS_[gid]->arrive_and_wait(); |
|
|
|
|
|
|
|
data->WaitOnCompletion(); |
|
|
|
|
|
|
|
THREAD_TIMING_[AGGRJ_TIMING_INDEX][tid * gid][i][TIME_STAMP_END] = std::chrono::steady_clock::now(); |
|
|
|
THREAD_TIMING_[SCANB_TIMING_INDEX][tid * gid][i][TIME_STAMP_END] = std::chrono::steady_clock::now(); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
@ -206,7 +206,7 @@ void scan_a(size_t gid, size_t tid) { |
|
|
|
LAUNCH_.wait(); |
|
|
|
|
|
|
|
for (size_t i = 0; i < RUN_COUNT; i++) { |
|
|
|
THREAD_TIMING_[AGGRJ_TIMING_INDEX][tid * gid][i][TIME_STAMP_BEGIN] = std::chrono::steady_clock::now(); |
|
|
|
THREAD_TIMING_[SCANA_TIMING_INDEX][tid * gid][i][TIME_STAMP_BEGIN] = std::chrono::steady_clock::now(); |
|
|
|
|
|
|
|
const size_t chunk_index = get_chunk_index(gid, i); |
|
|
|
uint64_t* chunk_ptr = get_chunk<TC_SCANA>(DATA_A_, chunk_index, tid); |
|
|
@ -214,11 +214,11 @@ void scan_a(size_t gid, size_t tid) { |
|
|
|
|
|
|
|
filter::apply_same(mask_ptr, nullptr, chunk_ptr, CMP_A, CHUNK_SIZE_B / TC_SCANA); |
|
|
|
|
|
|
|
THREAD_TIMING_[AGGRJ_TIMING_INDEX][tid * gid][i][TIME_STAMP_WAIT] = std::chrono::steady_clock::now(); |
|
|
|
THREAD_TIMING_[SCANA_TIMING_INDEX][tid * gid][i][TIME_STAMP_WAIT] = std::chrono::steady_clock::now(); |
|
|
|
|
|
|
|
BARRIERS_[gid]->arrive_and_wait(); |
|
|
|
|
|
|
|
THREAD_TIMING_[AGGRJ_TIMING_INDEX][tid * gid][i][TIME_STAMP_END] = std::chrono::steady_clock::now(); |
|
|
|
THREAD_TIMING_[SCANA_TIMING_INDEX][tid * gid][i][TIME_STAMP_END] = std::chrono::steady_clock::now(); |
|
|
|
} |
|
|
|
|
|
|
|
BARRIERS_[gid]->arrive_and_drop(); |
|
|
@ -345,7 +345,7 @@ int main() { |
|
|
|
process_timings(&scana_run, &scana_wait, &scanb_run, &scanb_wait, &aggrj_run, &aggrj_wait); |
|
|
|
|
|
|
|
constexpr double nanos_per_second = ((double)1000) * 1000 * 1000; |
|
|
|
const uint64_t nanos = std::chrono::duration_cast<std::chrono::nanoseconds>(time_end - time_end).count(); |
|
|
|
const uint64_t nanos = std::chrono::duration_cast<std::chrono::nanoseconds>(time_end - time_start).count(); |
|
|
|
const double seconds = (double)(nanos) / nanos_per_second; |
|
|
|
|
|
|
|
fout |
|
|
|