From c393b8eb886d85870d5b92ab3402f53914d538f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Constantin=20F=C3=BCrst?= Date: Wed, 7 Feb 2024 15:54:02 +0100 Subject: [PATCH] improve load balancing node assignment --- qdp_project/src/utils/BenchmarkHelpers.cpp | 24 +++++++--------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/qdp_project/src/utils/BenchmarkHelpers.cpp b/qdp_project/src/utils/BenchmarkHelpers.cpp index 9cb3579..676f68c 100644 --- a/qdp_project/src/utils/BenchmarkHelpers.cpp +++ b/qdp_project/src/utils/BenchmarkHelpers.cpp @@ -32,30 +32,20 @@ int CachePlacementPolicy(const int numa_dst_node, const int numa_src_node, const } std::vector CopyMethodPolicy(const int numa_dst_node, const int numa_src_node, const size_t data_size) { + // we always run on n0 and can cut the amount of code here therefore // for small data it is more efficient to run on only one node // which causes less submissions and therefore completes faster // as submission cost matters for low transfer size if (data_size < 16_MiB) { - return std::vector{ (numa_src_node >= 8 ? numa_src_node - 8 : numa_src_node) }; - } - - // for sufficiently large data, smart copy is used which will utilize - // all four engines for intra-socket copy operations and cross copy on - // the source and destination nodes for inter-socket copy - - const bool same_socket = ((numa_dst_node ^ numa_src_node) & 4) == 0; - - if (same_socket) { - const bool socket_number = numa_dst_node >> 2; - if (socket_number == 0) return std::vector{ 0, 1, 2, 3 }; - else return std::vector{ 4, 5, 6, 7 }; + static std::atomic last_node = 0; + const int node = last_node.fetch_add(1) % 4; + return std::vector{ node }; } else { - return std::vector{ - (numa_src_node >= 8 ? numa_src_node - 8 : numa_src_node), - (numa_dst_node >= 8 ? numa_dst_node - 8 : numa_dst_node) - }; + static std::atomic last_group = 0; + const int group = last_group.fetch_add(1) % 2; + return group == 0 ? std::vector{ 0, 1 } : std::vector{ 2, 3 }; } }