@ -3,8 +3,11 @@
# include <iostream>
# include <vector>
# include <chrono>
# include <pthread_np.h>
# include <pthread.h>
# include <semaphore.h>
# include <numa.h>
# include <dml/dml.hpp>
struct ThreadArgs {
@ -15,9 +18,6 @@ struct ThreadArgs {
size_t size ;
uint8_t nnode_src ;
uint8_t nnode_dst ;
// repetition
uint8_t count ; // TODO: unused
bool batched ; // TODO: unused
// thread output
dml : : status_code status ;
std : : chrono : : microseconds duration ;
@ -52,7 +52,7 @@ void* thread_function(void* argp) {
// we use the asynchronous submit-routine even though this is not required
// here, however the project later on will only use async operation
auto handler = dml : : submit < path > ( dml : : mem_move , srcv , dstv , args - > numa_node ) ;
auto handler = dml : : submit < path > ( dml : : mem_move , srcv , dstv ) ;
auto result = handler . get ( ) ;
const auto et = std : : chrono : : high_resolution_clock : : now ( ) ;
@ -63,12 +63,13 @@ void* thread_function(void* argp) {
args - > duration = std : : chrono : : duration_cast < std : : chrono : : microseconds > ( et - st ) ;
args - > status = result . status ;
args - > sig = nullptr ;
return nullptr ;
}
template < typename path >
void execute_mem_move ( std : : vector < ThreadArgs > args ) {
void execute_mem_move ( std : : vector < ThreadArgs > & args ) {
sem_t sem ;
std : : vector < pthread_t > threads ;
@ -78,7 +79,7 @@ void execute_mem_move(std::vector<ThreadArgs> args) {
// for each submitted task we link the semaphore
// and create the thread, passing the argument
for ( auto arg : args ) {
for ( auto & arg : args ) {
arg . sig = & sem ;
threads . emplace_back ( ) ;