diff --git a/benchmarks/benchmark.hpp b/benchmarks/benchmark.hpp index a535c65..c94c73a 100644 --- a/benchmarks/benchmark.hpp +++ b/benchmarks/benchmark.hpp @@ -60,13 +60,22 @@ void* thread_function(void* argp) { auto sequence = dml::sequence(opcount, std::allocator()); for (uint32_t j = 0; j < args->batch_size; j++) { - const auto status = sequence.add(dml::mem_copy, srcv, dstv); + // block_on_fault() is required to submit the task in a way so that the + // DSA engine can handle page faults itself together with the IOMMU which + // requires the WQ to be configured to allow this too + + const auto status = sequence.add(dml::mem_copy.block_on_fault(), srcv, dstv); + CHECK_STATUS(status, "Adding operation to batch failed!"); if (j % args->barrier_after_n_operations == 0) { sequence.add(dml::nop); } } + // we use the asynchronous submit-routine even though this is not required + // here, however the project later on will only use async operation and + // therefore this behaviour should be benchmarked + auto handler = dml::submit(dml::batch, sequence); const auto se = std::chrono::high_resolution_clock::now(); @@ -88,7 +97,10 @@ void* thread_function(void* argp) { // we use the asynchronous submit-routine even though this is not required // here, however the project later on will only use async operation and // therefore this behaviour should be benchmarked - auto handler = dml::submit(dml::mem_copy, srcv, dstv); + // block_on_fault() is required to submit the task in a way so that the + // DSA engine can handle page faults itself together with the IOMMU which + // requires the WQ to be configured to allow this too + auto handler = dml::submit(dml::mem_copy.block_on_fault(), srcv, dstv); const auto se = std::chrono::high_resolution_clock::now();