diff --git a/benchmarks/benchmark.hpp b/benchmarks/benchmark.hpp
index a535c65..c94c73a 100644
--- a/benchmarks/benchmark.hpp
+++ b/benchmarks/benchmark.hpp
@@ -60,13 +60,22 @@ void* thread_function(void* argp) {
             auto sequence = dml::sequence(opcount, std::allocator<dml::byte_t>());
 
             for (uint32_t j = 0; j < args->batch_size; j++) {
-                const auto status = sequence.add(dml::mem_copy, srcv, dstv);
+                // block_on_fault() is required to submit the task in a way so that the
+                // DSA engine can handle page faults itself together with the IOMMU which
+                // requires the WQ to be configured to allow this too
+
+                const auto status = sequence.add(dml::mem_copy.block_on_fault(), srcv, dstv);
+                CHECK_STATUS(status, "Adding operation to batch failed!");
 
                 if (j % args->barrier_after_n_operations == 0) {
                     sequence.add(dml::nop);
                 }
             }
 
+            // we use the asynchronous submit-routine even though this is not required
+            // here, however the project later on will only use async operation and
+            // therefore this behaviour should be benchmarked
+            
             auto handler = dml::submit<path>(dml::batch, sequence);
 
             const auto se = std::chrono::high_resolution_clock::now();
@@ -88,7 +97,10 @@ void* thread_function(void* argp) {
             // we use the asynchronous submit-routine even though this is not required
             // here, however the project later on will only use async operation and
             // therefore this behaviour should be benchmarked
-            auto handler = dml::submit<path>(dml::mem_copy, srcv, dstv);
+            // block_on_fault() is required to submit the task in a way so that the
+            // DSA engine can handle page faults itself together with the IOMMU which
+            // requires the WQ to be configured to allow this too
+            auto handler = dml::submit<path>(dml::mem_copy.block_on_fault(), srcv, dstv);
 
             const auto se = std::chrono::high_resolution_clock::now();