diff --git a/benchmarks/benchmark-descriptors/copy-debug-n0ton0-cpu.json b/benchmarks/benchmark-descriptors/copy-debug-n0ton0-cpu.json
new file mode 100755
index 0000000..6f938bf
--- /dev/null
+++ b/benchmarks/benchmark-descriptors/copy-debug-n0ton0-cpu.json
@@ -0,0 +1,18 @@
+{
+  "count": 1,
+  "list": [
+    {
+      "affinity": {
+        "nnode_dst": 0,
+        "nnode_src": 0,
+        "node": 0
+      },
+      "task": {
+        "size": 1024,
+        "batch_size": 0
+      }
+    }
+  ],
+  "path": "sw",
+  "repetitions": 10
+}
\ No newline at end of file
diff --git a/benchmarks/benchmark.hpp b/benchmarks/benchmark.hpp
index 856491e..177d0ba 100644
--- a/benchmarks/benchmark.hpp
+++ b/benchmarks/benchmark.hpp
@@ -89,7 +89,6 @@ void* thread_function(void* argp) {
 
 template <typename path>
 std::vector<uint64_t> execute_dml_memcpy(std::vector<TaskData>& args, const uint64_t iterations) {
-    std::vector<std::thread> threads;
     std::vector<uint64_t> timing;
 
     // initialize numa library
@@ -98,7 +97,8 @@ std::vector<uint64_t> execute_dml_memcpy(std::vector<TaskData>& args, const uint
     // for each submitted task we link the semaphore
     // and create the thread, passing the argument
 
-    for (uint64_t i = 0; i < iterations; i++) {
+    for (uint64_t i = 0; i < iterations + 5; i++) {
+        std::vector<std::thread> threads;
         std::promise<void> launch_promise;
         LAUNCH_ = launch_promise.get_future();
 
@@ -117,7 +117,7 @@ std::vector<uint64_t> execute_dml_memcpy(std::vector<TaskData>& args, const uint
 
         const auto time_end = std::chrono::steady_clock::now();
 
-        timing.emplace_back(std::chrono::duration_cast<std::chrono::nanoseconds>(time_end - time_start).count());
+        if (i >= 5) timing.emplace_back(std::chrono::duration_cast<std::chrono::nanoseconds>(time_end - time_start).count());
     }
 
     return timing;