Browse Source

improve load balancing node assignment

master
Constantin Fürst 11 months ago
parent
commit
c393b8eb88
  1. 24
      qdp_project/src/utils/BenchmarkHelpers.cpp

24
qdp_project/src/utils/BenchmarkHelpers.cpp

@ -32,30 +32,20 @@ int CachePlacementPolicy(const int numa_dst_node, const int numa_src_node, const
}
std::vector<int> CopyMethodPolicy(const int numa_dst_node, const int numa_src_node, const size_t data_size) {
// we always run on n0 and can cut the amount of code here therefore
// for small data it is more efficient to run on only one node
// which causes less submissions and therefore completes faster
// as submission cost matters for low transfer size
if (data_size < 16_MiB) {
return std::vector<int>{ (numa_src_node >= 8 ? numa_src_node - 8 : numa_src_node) };
}
// for sufficiently large data, smart copy is used which will utilize
// all four engines for intra-socket copy operations and cross copy on
// the source and destination nodes for inter-socket copy
const bool same_socket = ((numa_dst_node ^ numa_src_node) & 4) == 0;
if (same_socket) {
const bool socket_number = numa_dst_node >> 2;
if (socket_number == 0) return std::vector<int>{ 0, 1, 2, 3 };
else return std::vector<int>{ 4, 5, 6, 7 };
static std::atomic<int> last_node = 0;
const int node = last_node.fetch_add(1) % 4;
return std::vector<int>{ node };
}
else {
return std::vector<int>{
(numa_src_node >= 8 ? numa_src_node - 8 : numa_src_node),
(numa_dst_node >= 8 ? numa_dst_node - 8 : numa_dst_node)
};
static std::atomic<int> last_group = 0;
const int group = last_group.fetch_add(1) % 2;
return group == 0 ? std::vector<int>{ 0, 1 } : std::vector<int>{ 2, 3 };
}
}

Loading…
Cancel
Save