Browse Source

make load balancing thread-local to reduce atomic cost

master
Constantin Fürst 11 months ago
parent
commit
5044b4419c
  1. 8
      qdp_project/src/utils/BenchmarkHelpers.cpp

8
qdp_project/src/utils/BenchmarkHelpers.cpp

@ -38,13 +38,13 @@ std::vector<int> CopyMethodPolicy(const int numa_dst_node, const int numa_src_no
// as submission cost matters for low transfer size // as submission cost matters for low transfer size
if (data_size < 16_MiB) { if (data_size < 16_MiB) {
static std::atomic<int> last_node = 0;
const int node = last_node.fetch_add(1) % 4;
static thread_local int last_node = 0;
const int node = last_node++ % 4;
return std::vector<int>{ node }; return std::vector<int>{ node };
} }
else { else {
static std::atomic<int> last_group = 0;
const int group = last_group.fetch_add(1) % 2;
static thread_local int last_group = 0;
const int group = last_group++ % 2;
return group == 0 ? std::vector<int>{ 0, 1 } : std::vector<int>{ 2, 3 }; return group == 0 ? std::vector<int>{ 0, 1 } : std::vector<int>{ 2, 3 };
} }
} }

Loading…
Cancel
Save