improve load balancing node assignment

11 months ago · c393b8eb88
1 changed files with 7 additions and 17 deletions
--- a/qdp_project/src/utils/BenchmarkHelpers.cpp
+++ b/qdp_project/src/utils/BenchmarkHelpers.cpp
@ -32,30 +32,20 @@ int CachePlacementPolicy(const int numa_dst_node, const int numa_src_node, const
 }

 std::vector<int> CopyMethodPolicy(const int numa_dst_node, const int numa_src_node, const size_t data_size) {
+    // we always run on n0 and can cut the amount of code here therefore
    // for small data it is more efficient to run on only one node
    // which causes less submissions and therefore completes faster
    // as submission cost matters for low transfer size

    if (data_size < 16_MiB) {
-        return std::vector<int>{ (numa_src_node >= 8 ? numa_src_node - 8 : numa_src_node) };
-    }
-
-    // for sufficiently large data, smart copy is used which will utilize
-    // all four engines for intra-socket copy operations and cross copy on
-    // the source and destination nodes for inter-socket copy
-
-    const bool same_socket = ((numa_dst_node ^ numa_src_node) & 4) == 0;
-
-    if (same_socket) {
-        const bool socket_number = numa_dst_node >> 2;
-        if (socket_number == 0) return std::vector<int>{ 0, 1, 2, 3 };
-        else return std::vector<int>{ 4, 5, 6, 7 };
+        static std::atomic<int> last_node = 0;
+        const int node = last_node.fetch_add(1) % 4;
+        return std::vector<int>{ node };
    }
    else {
-        return std::vector<int>{
-                (numa_src_node >= 8 ? numa_src_node - 8 : numa_src_node),
-                (numa_dst_node >= 8 ? numa_dst_node - 8 : numa_dst_node)
-        };
+        static std::atomic<int> last_group = 0;
+        const int group = last_group.fetch_add(1) % 2;
+        return group == 0 ? std::vector<int>{ 0, 1 } : std::vector<int>{ 2, 3 };
    }
 }