small changes to execute move; remove unused repetition-options, pass args as ref to allow for writing results, dont use numa-node-setting dml submit call

1 year ago · 9083ba834f
1 changed files with 8 additions and 7 deletions
--- a/benchmarks/execute-move.hpp
+++ b/benchmarks/execute-move.hpp
@ -3,8 +3,11 @@
 #include <iostream>
 #include <vector>
 #include <chrono>
-#include <pthread_np.h>
+
+#include <pthread.h>
 #include <semaphore.h>
+#include <numa.h>
+
 #include <dml/dml.hpp>

 struct ThreadArgs {
@ -15,9 +18,6 @@ struct ThreadArgs {
    size_t size;
    uint8_t nnode_src;
    uint8_t nnode_dst;
-    // repetition
-    uint8_t count; // TODO: unused
-    bool batched;  // TODO: unused
    // thread output
    dml::status_code status;
    std::chrono::microseconds duration;
@ -52,7 +52,7 @@ void* thread_function(void* argp) {

    // we use the asynchronous submit-routine even though this is not required
    // here, however the project later on will only use async operation
-    auto handler = dml::submit<path>(dml::mem_move, srcv, dstv, args->numa_node);
+    auto handler = dml::submit<path>(dml::mem_move, srcv, dstv);
    auto result = handler.get();

    const auto et = std::chrono::high_resolution_clock::now();
@ -63,12 +63,13 @@ void* thread_function(void* argp) {

    args->duration = std::chrono::duration_cast<std::chrono::microseconds>(et - st);
    args->status = result.status;
+    args->sig = nullptr;

    return nullptr;
 }

 template <typename path>
-void execute_mem_move(std::vector<ThreadArgs> args) {
+void execute_mem_move(std::vector<ThreadArgs>& args) {
    sem_t sem;
    std::vector<pthread_t> threads;

@ -78,7 +79,7 @@ void execute_mem_move(std::vector<ThreadArgs> args) {

    // for each submitted task we link the semaphore
    // and create the thread, passing the argument
-    for (auto arg : args) {
+    for (auto& arg : args) {
        arg.sig = &sem;
        threads.emplace_back();