|
|
@ -60,13 +60,22 @@ void* thread_function(void* argp) { |
|
|
|
auto sequence = dml::sequence(opcount, std::allocator<dml::byte_t>()); |
|
|
|
|
|
|
|
for (uint32_t j = 0; j < args->batch_size; j++) { |
|
|
|
const auto status = sequence.add(dml::mem_copy, srcv, dstv); |
|
|
|
// block_on_fault() is required to submit the task in a way so that the
|
|
|
|
// DSA engine can handle page faults itself together with the IOMMU which
|
|
|
|
// requires the WQ to be configured to allow this too
|
|
|
|
|
|
|
|
const auto status = sequence.add(dml::mem_copy.block_on_fault(), srcv, dstv); |
|
|
|
CHECK_STATUS(status, "Adding operation to batch failed!"); |
|
|
|
|
|
|
|
if (j % args->barrier_after_n_operations == 0) { |
|
|
|
sequence.add(dml::nop); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
// we use the asynchronous submit-routine even though this is not required
|
|
|
|
// here, however the project later on will only use async operation and
|
|
|
|
// therefore this behaviour should be benchmarked
|
|
|
|
|
|
|
|
auto handler = dml::submit<path>(dml::batch, sequence); |
|
|
|
|
|
|
|
const auto se = std::chrono::high_resolution_clock::now(); |
|
|
@ -88,7 +97,10 @@ void* thread_function(void* argp) { |
|
|
|
// we use the asynchronous submit-routine even though this is not required
|
|
|
|
// here, however the project later on will only use async operation and
|
|
|
|
// therefore this behaviour should be benchmarked
|
|
|
|
auto handler = dml::submit<path>(dml::mem_copy, srcv, dstv); |
|
|
|
// block_on_fault() is required to submit the task in a way so that the
|
|
|
|
// DSA engine can handle page faults itself together with the IOMMU which
|
|
|
|
// requires the WQ to be configured to allow this too
|
|
|
|
auto handler = dml::submit<path>(dml::mem_copy.block_on_fault(), srcv, dstv); |
|
|
|
|
|
|
|
const auto se = std::chrono::high_resolution_clock::now(); |
|
|
|
|
|
|
|