From 5044b4419c4b7001638ee8d6f14c44b2d36482f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Constantin=20F=C3=BCrst?= Date: Wed, 7 Feb 2024 18:19:30 +0100 Subject: [PATCH] make load balancing thread-local to reduce atomic cost --- qdp_project/src/utils/BenchmarkHelpers.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/qdp_project/src/utils/BenchmarkHelpers.cpp b/qdp_project/src/utils/BenchmarkHelpers.cpp index 676f68c..903eba5 100644 --- a/qdp_project/src/utils/BenchmarkHelpers.cpp +++ b/qdp_project/src/utils/BenchmarkHelpers.cpp @@ -38,13 +38,13 @@ std::vector CopyMethodPolicy(const int numa_dst_node, const int numa_src_no // as submission cost matters for low transfer size if (data_size < 16_MiB) { - static std::atomic last_node = 0; - const int node = last_node.fetch_add(1) % 4; + static thread_local int last_node = 0; + const int node = last_node++ % 4; return std::vector{ node }; } else { - static std::atomic last_group = 0; - const int group = last_group.fetch_add(1) % 2; + static thread_local int last_group = 0; + const int group = last_group++ % 2; return group == 0 ? std::vector{ 0, 1 } : std::vector{ 2, 3 }; } }