Browse Source

fix index clash for thread-and-group unique indexing

master
Constantin Fürst 11 months ago
parent
commit
bb1d20924a
  1. 36
      qdp_project/src/Benchmark.cpp
  2. 4
      qdp_project/src/utils/BenchmarkHelpers.cpp

36
qdp_project/src/Benchmark.cpp

@ -42,12 +42,12 @@ void scan_b(size_t gid, size_t tid) {
constexpr size_t SUBCHUNK_COUNT = TC_AGGRJ / (TC_SCANB == 0 ? 1 : TC_SCANB); constexpr size_t SUBCHUNK_COUNT = TC_AGGRJ / (TC_SCANB == 0 ? 1 : TC_SCANB);
constexpr size_t SUBCHUNK_SIZE_B = CHUNK_SIZE_B / SUBCHUNK_COUNT; constexpr size_t SUBCHUNK_SIZE_B = CHUNK_SIZE_B / SUBCHUNK_COUNT;
THREAD_TIMING_[SCANB_TIMING_INDEX][tid * gid].clear();
THREAD_TIMING_[SCANB_TIMING_INDEX][tid * gid].resize(1);
THREAD_TIMING_[SCANB_TIMING_INDEX][UniqueIndex(gid,tid)].clear();
THREAD_TIMING_[SCANB_TIMING_INDEX][UniqueIndex(gid,tid)].resize(1);
LAUNCH_.wait(); LAUNCH_.wait();
THREAD_TIMING_[SCANB_TIMING_INDEX][tid * gid][0][TIME_STAMP_BEGIN] = std::chrono::steady_clock::now();
THREAD_TIMING_[SCANB_TIMING_INDEX][UniqueIndex(gid,tid)][0][TIME_STAMP_BEGIN] = std::chrono::steady_clock::now();
if constexpr (PERFORM_CACHING) { if constexpr (PERFORM_CACHING) {
for (size_t i = 0; i < RUN_COUNT; i++) { for (size_t i = 0; i < RUN_COUNT; i++) {
@ -71,19 +71,19 @@ void scan_b(size_t gid, size_t tid) {
} }
} }
THREAD_TIMING_[SCANB_TIMING_INDEX][tid * gid][0][TIME_STAMP_WAIT] = std::chrono::steady_clock::now();
THREAD_TIMING_[SCANB_TIMING_INDEX][tid * gid][0][TIME_STAMP_END] = std::chrono::steady_clock::now();
THREAD_TIMING_[SCANB_TIMING_INDEX][UniqueIndex(gid,tid)][0][TIME_STAMP_WAIT] = std::chrono::steady_clock::now();
THREAD_TIMING_[SCANB_TIMING_INDEX][UniqueIndex(gid,tid)][0][TIME_STAMP_END] = std::chrono::steady_clock::now();
BARRIERS_[gid]->arrive_and_drop(); BARRIERS_[gid]->arrive_and_drop();
} }
void scan_a(size_t gid, size_t tid) { void scan_a(size_t gid, size_t tid) {
THREAD_TIMING_[SCANA_TIMING_INDEX][tid * gid].clear();
THREAD_TIMING_[SCANA_TIMING_INDEX][tid * gid].resize(1);
THREAD_TIMING_[SCANA_TIMING_INDEX][UniqueIndex(gid,tid)].clear();
THREAD_TIMING_[SCANA_TIMING_INDEX][UniqueIndex(gid,tid)].resize(1);
LAUNCH_.wait(); LAUNCH_.wait();
THREAD_TIMING_[SCANA_TIMING_INDEX][tid * gid][0][TIME_STAMP_BEGIN] = std::chrono::steady_clock::now();
THREAD_TIMING_[SCANA_TIMING_INDEX][UniqueIndex(gid,tid)][0][TIME_STAMP_BEGIN] = std::chrono::steady_clock::now();
for (size_t i = 0; i < RUN_COUNT; i++) { for (size_t i = 0; i < RUN_COUNT; i++) {
const size_t chunk_index = get_chunk_index(gid, i); const size_t chunk_index = get_chunk_index(gid, i);
@ -93,27 +93,27 @@ void scan_a(size_t gid, size_t tid) {
filter::apply_same(mask_ptr, nullptr, chunk_ptr, CMP_A, CHUNK_SIZE_B / TC_SCANA); filter::apply_same(mask_ptr, nullptr, chunk_ptr, CMP_A, CHUNK_SIZE_B / TC_SCANA);
} }
THREAD_TIMING_[SCANA_TIMING_INDEX][tid * gid][0][TIME_STAMP_WAIT] = std::chrono::steady_clock::now();
THREAD_TIMING_[SCANA_TIMING_INDEX][tid * gid][0][TIME_STAMP_END] = std::chrono::steady_clock::now();
THREAD_TIMING_[SCANA_TIMING_INDEX][UniqueIndex(gid,tid)][0][TIME_STAMP_WAIT] = std::chrono::steady_clock::now();
THREAD_TIMING_[SCANA_TIMING_INDEX][UniqueIndex(gid,tid)][0][TIME_STAMP_END] = std::chrono::steady_clock::now();
BARRIERS_[gid]->arrive_and_drop(); BARRIERS_[gid]->arrive_and_drop();
} }
void aggr_j(size_t gid, size_t tid) { void aggr_j(size_t gid, size_t tid) {
CACHE_HITS_[gid * tid] = 0;
CACHE_HITS_[UniqueIndex(gid,tid)] = 0;
THREAD_TIMING_[AGGRJ_TIMING_INDEX][tid * gid].clear();
THREAD_TIMING_[AGGRJ_TIMING_INDEX][tid * gid].resize(1);
THREAD_TIMING_[AGGRJ_TIMING_INDEX][UniqueIndex(gid,tid)].clear();
THREAD_TIMING_[AGGRJ_TIMING_INDEX][UniqueIndex(gid,tid)].resize(1);
__m512i aggregator = aggregation::OP::zero(); __m512i aggregator = aggregation::OP::zero();
LAUNCH_.wait(); LAUNCH_.wait();
THREAD_TIMING_[AGGRJ_TIMING_INDEX][tid * gid][0][TIME_STAMP_BEGIN] = std::chrono::steady_clock::now();
THREAD_TIMING_[AGGRJ_TIMING_INDEX][UniqueIndex(gid,tid)][0][TIME_STAMP_BEGIN] = std::chrono::steady_clock::now();
BARRIERS_[gid]->arrive_and_wait(); BARRIERS_[gid]->arrive_and_wait();
THREAD_TIMING_[AGGRJ_TIMING_INDEX][tid * gid][0][TIME_STAMP_WAIT] = std::chrono::steady_clock::now();
THREAD_TIMING_[AGGRJ_TIMING_INDEX][UniqueIndex(gid,tid)][0][TIME_STAMP_WAIT] = std::chrono::steady_clock::now();
for (size_t i = 0; i < RUN_COUNT; i++) { for (size_t i = 0; i < RUN_COUNT; i++) {
const size_t chunk_index = get_chunk_index(gid, i); const size_t chunk_index = get_chunk_index(gid, i);
@ -133,7 +133,7 @@ void aggr_j(size_t gid, size_t tid) {
data_ptr = chunk_ptr; data_ptr = chunk_ptr;
} }
else { else {
CACHE_HITS_[gid * tid]++;
CACHE_HITS_[UniqueIndex(gid,tid)]++;
} }
} }
else { else {
@ -150,11 +150,11 @@ void aggr_j(size_t gid, size_t tid) {
} }
} }
THREAD_TIMING_[AGGRJ_TIMING_INDEX][tid * gid][0][TIME_STAMP_END] = std::chrono::steady_clock::now();
THREAD_TIMING_[AGGRJ_TIMING_INDEX][UniqueIndex(gid,tid)][0][TIME_STAMP_END] = std::chrono::steady_clock::now();
BARRIERS_[gid]->arrive_and_drop(); BARRIERS_[gid]->arrive_and_drop();
aggregation::happly(&DATA_DST_[gid * tid], aggregator);
aggregation::happly(&DATA_DST_[UniqueIndex(gid,tid)], aggregator);
} }
int main() { int main() {

4
qdp_project/src/utils/BenchmarkHelpers.cpp

@ -13,6 +13,10 @@ std::array<std::vector<std::vector<std::array<std::chrono::steady_clock::time_po
std::array<uint32_t, GROUP_COUNT * TC_AGGRJ> CACHE_HITS_; std::array<uint32_t, GROUP_COUNT * TC_AGGRJ> CACHE_HITS_;
inline size_t UniqueIndex(const uint32_t gid, const uint32_t tid) {
return tid * GROUP_COUNT + gid;
}
uint64_t sum_check(uint64_t compare_value, uint64_t* row_A, uint64_t* row_B, size_t row_size) { uint64_t sum_check(uint64_t compare_value, uint64_t* row_A, uint64_t* row_B, size_t row_size) {
uint64_t sum = 0; uint64_t sum = 0;
for(int i = 0; i < row_size / sizeof(uint64_t); ++i) { for(int i = 0; i < row_size / sizeof(uint64_t); ++i) {

Loading…
Cancel
Save