Browse Source

add option for internal repetitions to benchmarks which allows the small copies of 1kib to run long enough for the timings to become usable (goal is about 1s runtime for each iteration)

master
Constantin Fürst 11 months ago
parent
commit
8ac601fc07
  1. 6
      benchmarks/benchmark-descriptors/copy-debug-n0ton0-cpu.json
  2. 3
      benchmarks/benchmark-descriptors/engine-location-bench/dstcopy-internode-n0ton1-1gib.json
  3. 18
      benchmarks/benchmark-descriptors/engine-location-bench/dstcopy-internode-n0ton1-1mib.json
  4. 3
      benchmarks/benchmark-descriptors/engine-location-bench/dstcopy-intersock-n0ton4-1gib.json
  5. 18
      benchmarks/benchmark-descriptors/engine-location-bench/dstcopy-intersock-n0ton4-1mib.json
  6. 3
      benchmarks/benchmark-descriptors/engine-location-bench/dstoutsidercopy-intersock-n0to4-1gib.json
  7. 18
      benchmarks/benchmark-descriptors/engine-location-bench/dstoutsidercopy-intersock-n0to4-1mib.json
  8. 3
      benchmarks/benchmark-descriptors/engine-location-bench/nodeoutsidercopy-internode-n0ton1-1gib.json
  9. 18
      benchmarks/benchmark-descriptors/engine-location-bench/nodeoutsidercopy-internode-n0ton1-1mib.json
  10. 3
      benchmarks/benchmark-descriptors/engine-location-bench/sockoutsidercopy-internode-n0ton1-1gib.json
  11. 18
      benchmarks/benchmark-descriptors/engine-location-bench/sockoutsidercopy-internode-n0ton1-1mib.json
  12. 3
      benchmarks/benchmark-descriptors/engine-location-bench/srccopy-internode-n0ton1-1gib.json
  13. 18
      benchmarks/benchmark-descriptors/engine-location-bench/srccopy-internode-n0ton1-1mib.json
  14. 3
      benchmarks/benchmark-descriptors/engine-location-bench/srccopy-intersock-n0ton4-1gib.json
  15. 18
      benchmarks/benchmark-descriptors/engine-location-bench/srccopy-intersock-n0ton4-1mib.json
  16. 3
      benchmarks/benchmark-descriptors/engine-location-bench/srcoutsidercopy-intersock-n0to4-1gib.json
  17. 18
      benchmarks/benchmark-descriptors/engine-location-bench/srcoutsidercopy-intersock-n0to4-1mib.json
  18. 6
      benchmarks/benchmark-descriptors/engine-location-bench/xcopy-internode-n0ton1-1gib.json
  19. 29
      benchmarks/benchmark-descriptors/engine-location-bench/xcopy-internode-n0ton1-1mib.json
  20. 6
      benchmarks/benchmark-descriptors/engine-location-bench/xcopy-intersock-n0ton4-1gib.json
  21. 29
      benchmarks/benchmark-descriptors/engine-location-bench/xcopy-intersock-n0ton4-1mib.json
  22. 36
      benchmarks/benchmark-descriptors/mtsubmit-bench/mtsubmit-12t-1gib.json
  23. 36
      benchmarks/benchmark-descriptors/mtsubmit-bench/mtsubmit-12t-1mib.json
  24. 3
      benchmarks/benchmark-descriptors/mtsubmit-bench/mtsubmit-1t-1gib.json
  25. 3
      benchmarks/benchmark-descriptors/mtsubmit-bench/mtsubmit-1t-1mib.json
  26. 6
      benchmarks/benchmark-descriptors/mtsubmit-bench/mtsubmit-2t-1gib.json
  27. 6
      benchmarks/benchmark-descriptors/mtsubmit-bench/mtsubmit-2t-1mib.json
  28. 12
      benchmarks/benchmark-descriptors/mtsubmit-bench/mtsubmit-4t-1gib.json
  29. 12
      benchmarks/benchmark-descriptors/mtsubmit-bench/mtsubmit-4t-1mib.json
  30. 24
      benchmarks/benchmark-descriptors/mtsubmit-bench/mtsubmit-8t-1gib.json
  31. 24
      benchmarks/benchmark-descriptors/mtsubmit-bench/mtsubmit-8t-1mib.json
  32. 24
      benchmarks/benchmark-descriptors/peak-perf-allnodes-cpu/copy-n0ton11-1gib-allnodes-cpu.json
  33. 24
      benchmarks/benchmark-descriptors/peak-perf-allnodes-cpu/copy-n0ton12-1gib-allnodes-cpu.json
  34. 24
      benchmarks/benchmark-descriptors/peak-perf-allnodes-cpu/copy-n0ton15-1gib-allnodes-cpu.json
  35. 24
      benchmarks/benchmark-descriptors/peak-perf-allnodes-cpu/copy-n0ton8-1gib-allnodes-cpu.json
  36. 24
      benchmarks/benchmark-descriptors/peak-perf-allnodes/copy-n0ton11-1gib-allnodes.json
  37. 24
      benchmarks/benchmark-descriptors/peak-perf-allnodes/copy-n0ton12-1gib-allnodes.json
  38. 24
      benchmarks/benchmark-descriptors/peak-perf-allnodes/copy-n0ton15-1gib-allnodes.json
  39. 24
      benchmarks/benchmark-descriptors/peak-perf-allnodes/copy-n0ton8-1gib-allnodes.json
  40. 36
      benchmarks/benchmark-descriptors/peak-perf-brute-cpu/copy-n0ton11-1gib-brute-cpu.json
  41. 36
      benchmarks/benchmark-descriptors/peak-perf-brute-cpu/copy-n0ton12-1gib-brute-cpu.json
  42. 36
      benchmarks/benchmark-descriptors/peak-perf-brute-cpu/copy-n0ton15-1gib-brute-cpu.json
  43. 36
      benchmarks/benchmark-descriptors/peak-perf-brute-cpu/copy-n0ton8-1gib-brute-cpu.json
  44. 12
      benchmarks/benchmark-descriptors/peak-perf-smart/copy-n0ton11-1gib-smart.json
  45. 6
      benchmarks/benchmark-descriptors/peak-perf-smart/copy-n0ton12-1gib-smart.json
  46. 6
      benchmarks/benchmark-descriptors/peak-perf-smart/copy-n0ton15-1gib-smart.json
  47. 12
      benchmarks/benchmark-descriptors/peak-perf-smart/copy-n0ton8-1gib-smart.json
  48. 3
      benchmarks/benchmark-descriptors/submit-bench/submit-bs10-128mib.json
  49. 5
      benchmarks/benchmark-descriptors/submit-bench/submit-bs10-1kib.json
  50. 5
      benchmarks/benchmark-descriptors/submit-bench/submit-bs10-1mib.json
  51. 5
      benchmarks/benchmark-descriptors/submit-bench/submit-bs10-4kib.json
  52. 3
      benchmarks/benchmark-descriptors/submit-bench/submit-bs50-128mib.json
  53. 5
      benchmarks/benchmark-descriptors/submit-bench/submit-bs50-1kib.json
  54. 5
      benchmarks/benchmark-descriptors/submit-bench/submit-bs50-1mib.json
  55. 5
      benchmarks/benchmark-descriptors/submit-bench/submit-bs50-4kib.json
  56. 3
      benchmarks/benchmark-descriptors/submit-bench/submit-ssaw-128mib.json
  57. 5
      benchmarks/benchmark-descriptors/submit-bench/submit-ssaw-1kib.json
  58. 5
      benchmarks/benchmark-descriptors/submit-bench/submit-ssaw-1mib.json
  59. 5
      benchmarks/benchmark-descriptors/submit-bench/submit-ssaw-4kib.json
  60. 140
      benchmarks/benchmark.cpp
  61. 7
      benchmarks/util/task-data.hpp

6
benchmarks/benchmark-descriptors/copy-debug-n0ton0-cpu.json

@ -9,7 +9,8 @@
},
"task": {
"size": 1024,
"batch_size": 0
"batch_size": 0,
"reps": 1000
}
},
{
@ -20,7 +21,8 @@
},
"task": {
"size": 1024,
"batch_size": 10
"batch_size": 10,
"reps": 100
}
}
],

3
benchmarks/benchmark-descriptors/engine-location-bench/dstcopy-internode-n0ton1-1gib.json

@ -9,7 +9,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
}
],

18
benchmarks/benchmark-descriptors/engine-location-bench/dstcopy-internode-n0ton1-1mib.json

@ -1,18 +0,0 @@
{
"count": 1,
"list": [
{
"affinity": {
"nnode_dst": 1,
"nnode_src": 0,
"node": 1
},
"task": {
"size": 1048576,
"batch_size": 0
}
}
],
"path": "hw",
"repetitions": 10
}

3
benchmarks/benchmark-descriptors/engine-location-bench/dstcopy-intersock-n0ton4-1gib.json

@ -9,7 +9,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
}
],

18
benchmarks/benchmark-descriptors/engine-location-bench/dstcopy-intersock-n0ton4-1mib.json

@ -1,18 +0,0 @@
{
"count": 1,
"list": [
{
"affinity": {
"nnode_dst": 4,
"nnode_src": 0,
"node": 4
},
"task": {
"size": 1048576,
"batch_size": 0
}
}
],
"path": "hw",
"repetitions": 10
}

3
benchmarks/benchmark-descriptors/engine-location-bench/dstoutsidercopy-intersock-n0to4-1gib.json

@ -9,7 +9,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
}
],

18
benchmarks/benchmark-descriptors/engine-location-bench/dstoutsidercopy-intersock-n0to4-1mib.json

@ -1,18 +0,0 @@
{
"count": 1,
"list": [
{
"affinity": {
"nnode_dst": 4,
"nnode_src": 0,
"node": 7
},
"task": {
"size": 1048576,
"batch_size": 0
}
}
],
"path": "hw",
"repetitions": 10
}

3
benchmarks/benchmark-descriptors/engine-location-bench/nodeoutsidercopy-internode-n0ton1-1gib.json

@ -9,7 +9,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
}
],

18
benchmarks/benchmark-descriptors/engine-location-bench/nodeoutsidercopy-internode-n0ton1-1mib.json

@ -1,18 +0,0 @@
{
"count": 1,
"list": [
{
"affinity": {
"nnode_dst": 1,
"nnode_src": 0,
"node": 3
},
"task": {
"size": 1048576,
"batch_size": 0
}
}
],
"path": "hw",
"repetitions": 10
}

3
benchmarks/benchmark-descriptors/engine-location-bench/sockoutsidercopy-internode-n0ton1-1gib.json

@ -9,7 +9,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
}
],

18
benchmarks/benchmark-descriptors/engine-location-bench/sockoutsidercopy-internode-n0ton1-1mib.json

@ -1,18 +0,0 @@
{
"count": 1,
"list": [
{
"affinity": {
"nnode_dst": 1,
"nnode_src": 0,
"node": 3
},
"task": {
"size": 1048576,
"batch_size": 0
}
}
],
"path": "hw",
"repetitions": 10
}

3
benchmarks/benchmark-descriptors/engine-location-bench/srccopy-internode-n0ton1-1gib.json

@ -9,7 +9,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
}
],

18
benchmarks/benchmark-descriptors/engine-location-bench/srccopy-internode-n0ton1-1mib.json

@ -1,18 +0,0 @@
{
"count": 1,
"list": [
{
"affinity": {
"nnode_dst": 1,
"nnode_src": 0,
"node": 0
},
"task": {
"size": 1048576,
"batch_size": 0
}
}
],
"path": "hw",
"repetitions": 10
}

3
benchmarks/benchmark-descriptors/engine-location-bench/srccopy-intersock-n0ton4-1gib.json

@ -9,7 +9,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
}
],

18
benchmarks/benchmark-descriptors/engine-location-bench/srccopy-intersock-n0ton4-1mib.json

@ -1,18 +0,0 @@
{
"count": 1,
"list": [
{
"affinity": {
"nnode_dst": 4,
"nnode_src": 0,
"node": 0
},
"task": {
"size": 1048576,
"batch_size": 0
}
}
],
"path": "hw",
"repetitions": 10
}

3
benchmarks/benchmark-descriptors/engine-location-bench/srcoutsidercopy-intersock-n0to4-1gib.json

@ -9,7 +9,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
}
],

18
benchmarks/benchmark-descriptors/engine-location-bench/srcoutsidercopy-intersock-n0to4-1mib.json

@ -1,18 +0,0 @@
{
"count": 1,
"list": [
{
"affinity": {
"nnode_dst": 4,
"nnode_src": 0,
"node": 3
},
"task": {
"size": 1048576,
"batch_size": 0
}
}
],
"path": "hw",
"repetitions": 10
}

6
benchmarks/benchmark-descriptors/engine-location-bench/xcopy-internode-n0ton1-1gib.json

@ -9,7 +9,8 @@
},
"task": {
"size": 536870912,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -20,7 +21,8 @@
},
"task": {
"size": 536870912,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
}
],

29
benchmarks/benchmark-descriptors/engine-location-bench/xcopy-internode-n0ton1-1mib.json

@ -1,29 +0,0 @@
{
"count": 2,
"list": [
{
"affinity": {
"nnode_dst": 1,
"nnode_src": 0,
"node": 0
},
"task": {
"size": 1048576,
"batch_size": 0
}
},
{
"affinity": {
"nnode_dst": 1,
"nnode_src": 0,
"node": 1
},
"task": {
"size": 1048576,
"batch_size": 0
}
}
],
"path": "hw",
"repetitions": 10
}

6
benchmarks/benchmark-descriptors/engine-location-bench/xcopy-intersock-n0ton4-1gib.json

@ -9,7 +9,8 @@
},
"task": {
"size": 536870912,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -20,7 +21,8 @@
},
"task": {
"size": 536870912,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
}
],

29
benchmarks/benchmark-descriptors/engine-location-bench/xcopy-intersock-n0ton4-1mib.json

@ -1,29 +0,0 @@
{
"count": 2,
"list": [
{
"affinity": {
"nnode_dst": 4,
"nnode_src": 0,
"node": 0
},
"task": {
"size": 1048576,
"batch_size": 0
}
},
{
"affinity": {
"nnode_dst": 4,
"nnode_src": 0,
"node": 4
},
"task": {
"size": 1048576,
"batch_size": 0
}
}
],
"path": "hw",
"repetitions": 10
}

36
benchmarks/benchmark-descriptors/mtsubmit-bench/mtsubmit-12t-1gib.json

@ -9,7 +9,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 10
"batch_size": 0,
"reps": 1
}
},
{
@ -20,7 +21,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 10
"batch_size": 0,
"reps": 1
}
},
{
@ -31,7 +33,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 10
"batch_size": 0,
"reps": 1
}
},
{
@ -42,7 +45,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 10
"batch_size": 0,
"reps": 1
}
},
{
@ -53,7 +57,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 10
"batch_size": 0,
"reps": 1
}
},
{
@ -64,7 +69,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 10
"batch_size": 0,
"reps": 1
}
},
{
@ -75,7 +81,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 10
"batch_size": 0,
"reps": 1
}
},
{
@ -86,7 +93,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 10
"batch_size": 0,
"reps": 1
}
},
{
@ -97,7 +105,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 10
"batch_size": 0,
"reps": 1
}
},
{
@ -108,7 +117,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 10
"batch_size": 0,
"reps": 1
}
},
{
@ -119,7 +129,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 10
"batch_size": 0,
"reps": 1
}
},
{
@ -130,7 +141,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 10
"batch_size": 0,
"reps": 1
}
}
],

36
benchmarks/benchmark-descriptors/mtsubmit-bench/mtsubmit-12t-1mib.json

@ -9,7 +9,8 @@
},
"task": {
"size": 1048576,
"batch_size": 10
"batch_size": 0,
"reps": 100
}
},
{
@ -20,7 +21,8 @@
},
"task": {
"size": 1048576,
"batch_size": 10
"batch_size": 0,
"reps": 100
}
},
{
@ -31,7 +33,8 @@
},
"task": {
"size": 1048576,
"batch_size": 10
"batch_size": 0,
"reps": 100
}
},
{
@ -42,7 +45,8 @@
},
"task": {
"size": 1048576,
"batch_size": 10
"batch_size": 0,
"reps": 100
}
},
{
@ -53,7 +57,8 @@
},
"task": {
"size": 1048576,
"batch_size": 10
"batch_size": 0,
"reps": 100
}
},
{
@ -64,7 +69,8 @@
},
"task": {
"size": 1048576,
"batch_size": 10
"batch_size": 0,
"reps": 100
}
},
{
@ -75,7 +81,8 @@
},
"task": {
"size": 1048576,
"batch_size": 10
"batch_size": 0,
"reps": 100
}
},
{
@ -86,7 +93,8 @@
},
"task": {
"size": 1048576,
"batch_size": 10
"batch_size": 0,
"reps": 100
}
},
{
@ -97,7 +105,8 @@
},
"task": {
"size": 1048576,
"batch_size": 10
"batch_size": 0,
"reps": 100
}
},
{
@ -108,7 +117,8 @@
},
"task": {
"size": 1048576,
"batch_size": 10
"batch_size": 0,
"reps": 100
}
},
{
@ -119,7 +129,8 @@
},
"task": {
"size": 1048576,
"batch_size": 10
"batch_size": 0,
"reps": 100
}
},
{
@ -130,7 +141,8 @@
},
"task": {
"size": 1048576,
"batch_size": 10
"batch_size": 0,
"reps": 100
}
}
],

3
benchmarks/benchmark-descriptors/mtsubmit-bench/mtsubmit-1t-1gib.json

@ -9,7 +9,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 120
"batch_size": 0,
"reps": 1
}
}
],

3
benchmarks/benchmark-descriptors/mtsubmit-bench/mtsubmit-1t-1mib.json

@ -9,7 +9,8 @@
},
"task": {
"size": 1048576,
"batch_size": 120
"batch_size": 0,
"reps": 100
}
}
],

6
benchmarks/benchmark-descriptors/mtsubmit-bench/mtsubmit-2t-1gib.json

@ -9,7 +9,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 60
"batch_size": 0,
"reps": 1
}
},
{
@ -20,7 +21,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 60
"batch_size": 0,
"reps": 1
}
}
],

6
benchmarks/benchmark-descriptors/mtsubmit-bench/mtsubmit-2t-1mib.json

@ -9,7 +9,8 @@
},
"task": {
"size": 1048576,
"batch_size": 60
"batch_size": 0,
"reps": 100
}
},
{
@ -20,7 +21,8 @@
},
"task": {
"size": 1048576,
"batch_size": 60
"batch_size": 0,
"reps": 100
}
}
],

12
benchmarks/benchmark-descriptors/mtsubmit-bench/mtsubmit-4t-1gib.json

@ -9,7 +9,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 30
"batch_size": 0,
"reps": 1
}
},
{
@ -20,7 +21,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 30
"batch_size": 0,
"reps": 1
}
},
{
@ -31,7 +33,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 30
"batch_size": 0,
"reps": 1
}
},
{
@ -42,7 +45,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 30
"batch_size": 0,
"reps": 1
}
}
],

12
benchmarks/benchmark-descriptors/mtsubmit-bench/mtsubmit-4t-1mib.json

@ -9,7 +9,8 @@
},
"task": {
"size": 1048576,
"batch_size": 30
"batch_size": 0,
"reps": 100
}
},
{
@ -20,7 +21,8 @@
},
"task": {
"size": 1048576,
"batch_size": 30
"batch_size": 0,
"reps": 100
}
},
{
@ -31,7 +33,8 @@
},
"task": {
"size": 1048576,
"batch_size": 30
"batch_size": 0,
"reps": 100
}
},
{
@ -42,7 +45,8 @@
},
"task": {
"size": 1048576,
"batch_size": 30
"batch_size": 0,
"reps": 100
}
}
],

24
benchmarks/benchmark-descriptors/mtsubmit-bench/mtsubmit-8t-1gib.json

@ -9,7 +9,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 15
"batch_size": 0,
"reps": 1
}
},
{
@ -20,7 +21,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 15
"batch_size": 0,
"reps": 1
}
},
{
@ -31,7 +33,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 15
"batch_size": 0,
"reps": 1
}
},
{
@ -42,7 +45,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 15
"batch_size": 0,
"reps": 1
}
},
{
@ -53,7 +57,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 15
"batch_size": 0,
"reps": 1
}
},
{
@ -64,7 +69,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 15
"batch_size": 0,
"reps": 1
}
},
{
@ -75,7 +81,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 15
"batch_size": 0,
"reps": 1
}
},
{
@ -86,7 +93,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 15
"batch_size": 0,
"reps": 1
}
}
],

24
benchmarks/benchmark-descriptors/mtsubmit-bench/mtsubmit-8t-1mib.json

@ -9,7 +9,8 @@
},
"task": {
"size": 1048576,
"batch_size": 15
"batch_size": 0,
"reps": 100
}
},
{
@ -20,7 +21,8 @@
},
"task": {
"size": 1048576,
"batch_size": 15
"batch_size": 0,
"reps": 100
}
},
{
@ -31,7 +33,8 @@
},
"task": {
"size": 1048576,
"batch_size": 15
"batch_size": 0,
"reps": 100
}
},
{
@ -42,7 +45,8 @@
},
"task": {
"size": 1048576,
"batch_size": 15
"batch_size": 0,
"reps": 100
}
},
{
@ -53,7 +57,8 @@
},
"task": {
"size": 1048576,
"batch_size": 15
"batch_size": 0,
"reps": 100
}
},
{
@ -64,7 +69,8 @@
},
"task": {
"size": 1048576,
"batch_size": 15
"batch_size": 0,
"reps": 100
}
},
{
@ -75,7 +81,8 @@
},
"task": {
"size": 1048576,
"batch_size": 15
"batch_size": 0,
"reps": 100
}
},
{
@ -86,7 +93,8 @@
},
"task": {
"size": 1048576,
"batch_size": 15
"batch_size": 0,
"reps": 100
}
}
],

24
benchmarks/benchmark-descriptors/peak-perf-allnodes-cpu/copy-n0ton11-1gib-allnodes-cpu.json

@ -9,7 +9,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -20,7 +21,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -31,7 +33,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -42,7 +45,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -53,7 +57,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -64,7 +69,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -75,7 +81,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -86,7 +93,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
}
],

24
benchmarks/benchmark-descriptors/peak-perf-allnodes-cpu/copy-n0ton12-1gib-allnodes-cpu.json

@ -9,7 +9,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -20,7 +21,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -31,7 +33,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -42,7 +45,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -53,7 +57,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -64,7 +69,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -75,7 +81,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -86,7 +93,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
}
],

24
benchmarks/benchmark-descriptors/peak-perf-allnodes-cpu/copy-n0ton15-1gib-allnodes-cpu.json

@ -9,7 +9,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -20,7 +21,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -31,7 +33,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -42,7 +45,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -53,7 +57,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -64,7 +69,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -75,7 +81,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -86,7 +93,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
}
],

24
benchmarks/benchmark-descriptors/peak-perf-allnodes-cpu/copy-n0ton8-1gib-allnodes-cpu.json

@ -9,7 +9,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -20,7 +21,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -31,7 +33,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -42,7 +45,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -53,7 +57,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -64,7 +69,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -75,7 +81,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -86,7 +93,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
}
],

24
benchmarks/benchmark-descriptors/peak-perf-allnodes/copy-n0ton11-1gib-allnodes.json

@ -9,7 +9,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -20,7 +21,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -31,7 +33,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -42,7 +45,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -53,7 +57,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -64,7 +69,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -75,7 +81,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -86,7 +93,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
}
],

24
benchmarks/benchmark-descriptors/peak-perf-allnodes/copy-n0ton12-1gib-allnodes.json

@ -9,7 +9,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -20,7 +21,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -31,7 +33,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -42,7 +45,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -53,7 +57,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -64,7 +69,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -75,7 +81,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -86,7 +93,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
}
],

24
benchmarks/benchmark-descriptors/peak-perf-allnodes/copy-n0ton15-1gib-allnodes.json

@ -9,7 +9,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -20,7 +21,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -31,7 +33,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -42,7 +45,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -53,7 +57,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -64,7 +69,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -75,7 +81,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -86,7 +93,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
}
],

24
benchmarks/benchmark-descriptors/peak-perf-allnodes/copy-n0ton8-1gib-allnodes.json

@ -9,7 +9,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -20,7 +21,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -31,7 +33,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -42,7 +45,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -53,7 +57,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -64,7 +69,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -75,7 +81,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -86,7 +93,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
}
],

36
benchmarks/benchmark-descriptors/peak-perf-brute-cpu/copy-n0ton11-1gib-brute-cpu.json

@ -9,7 +9,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -20,7 +21,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -31,7 +33,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -42,7 +45,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -53,7 +57,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -64,7 +69,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -75,7 +81,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -86,7 +93,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -97,7 +105,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -108,7 +117,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -119,7 +129,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -130,7 +141,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
}
],

36
benchmarks/benchmark-descriptors/peak-perf-brute-cpu/copy-n0ton12-1gib-brute-cpu.json

@ -9,7 +9,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -20,7 +21,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -31,7 +33,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -42,7 +45,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -53,7 +57,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -64,7 +69,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -75,7 +81,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -86,7 +93,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -97,7 +105,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -108,7 +117,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -119,7 +129,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -130,7 +141,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
}
],

36
benchmarks/benchmark-descriptors/peak-perf-brute-cpu/copy-n0ton15-1gib-brute-cpu.json

@ -9,7 +9,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -20,7 +21,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -31,7 +33,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -42,7 +45,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -53,7 +57,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -64,7 +69,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -75,7 +81,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -86,7 +93,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -97,7 +105,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -108,7 +117,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -119,7 +129,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -130,7 +141,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
}
],

36
benchmarks/benchmark-descriptors/peak-perf-brute-cpu/copy-n0ton8-1gib-brute-cpu.json

@ -9,7 +9,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -20,7 +21,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -31,7 +33,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -42,7 +45,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -53,7 +57,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -64,7 +69,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -75,7 +81,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -86,7 +93,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -97,7 +105,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -108,7 +117,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -119,7 +129,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -130,7 +141,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
}
],

12
benchmarks/benchmark-descriptors/peak-perf-smart/copy-n0ton11-1gib-smart.json

@ -9,7 +9,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -20,7 +21,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -31,7 +33,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -42,7 +45,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
}
],

6
benchmarks/benchmark-descriptors/peak-perf-smart/copy-n0ton12-1gib-smart.json

@ -9,7 +9,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -20,7 +21,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
}
],

6
benchmarks/benchmark-descriptors/peak-perf-smart/copy-n0ton15-1gib-smart.json

@ -9,7 +9,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -20,7 +21,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
}
],

12
benchmarks/benchmark-descriptors/peak-perf-smart/copy-n0ton8-1gib-smart.json

@ -9,7 +9,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -20,7 +21,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -31,7 +33,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
},
{
@ -42,7 +45,8 @@
},
"task": {
"size": 1073741824,
"batch_size": 0
"batch_size": 0,
"reps": 1
}
}
],

3
benchmarks/benchmark-descriptors/submit-bench/submit-bs10-128mib.json

@ -9,7 +9,8 @@
},
"task": {
"batch_size": 10,
"size": 134217728
"size": 134217728,
"reps": 1
}
}
],

5
benchmarks/benchmark-descriptors/submit-bench/submit-bs10-1kib.json

@ -9,10 +9,11 @@
},
"task": {
"batch_size": 10,
"size": 1024
"size": 1024,
"reps": 1000
}
}
],
"repetitions": 100,
"repetitions": 10,
"path": "hw"
}

5
benchmarks/benchmark-descriptors/submit-bench/submit-bs10-1mib.json

@ -9,10 +9,11 @@
},
"task": {
"batch_size": 10,
"size": 1048576
"size": 1048576,
"reps": 100
}
}
],
"repetitions": 100,
"repetitions": 10,
"path": "hw"
}

5
benchmarks/benchmark-descriptors/submit-bench/submit-bs10-4kib.json

@ -9,10 +9,11 @@
},
"task": {
"batch_size": 10,
"size": 4096
"size": 4096,
"reps": 1000
}
}
],
"repetitions": 100,
"repetitions": 10,
"path": "hw"
}

3
benchmarks/benchmark-descriptors/submit-bench/submit-bs50-128mib.json

@ -9,7 +9,8 @@
},
"task": {
"batch_size": 50,
"size": 134217728
"size": 134217728,
"reps": 1
}
}
],

5
benchmarks/benchmark-descriptors/submit-bench/submit-bs50-1kib.json

@ -9,10 +9,11 @@
},
"task": {
"batch_size": 50,
"size": 1024
"size": 1024,
"reps": 100
}
}
],
"repetitions": 100,
"repetitions": 10,
"path": "hw"
}

5
benchmarks/benchmark-descriptors/submit-bench/submit-bs50-1mib.json

@ -9,10 +9,11 @@
},
"task": {
"batch_size": 50,
"size": 1048576
"size": 1048576,
"reps": 10
}
}
],
"repetitions": 100,
"repetitions": 10,
"path": "hw"
}

5
benchmarks/benchmark-descriptors/submit-bench/submit-bs50-4kib.json

@ -9,10 +9,11 @@
},
"task": {
"batch_size": 50,
"size": 4096
"size": 4096,
"reps": 100
}
}
],
"repetitions": 100,
"repetitions": 10,
"path": "hw"
}

3
benchmarks/benchmark-descriptors/submit-bench/submit-ssaw-128mib.json

@ -9,7 +9,8 @@
},
"task": {
"batch_size": 0,
"size": 134217728
"size": 134217728,
"reps": 1
}
}
],

5
benchmarks/benchmark-descriptors/submit-bench/submit-ssaw-1kib.json

@ -9,10 +9,11 @@
},
"task": {
"batch_size": 0,
"size": 1024
"size": 1024,
"reps": 1000
}
}
],
"repetitions": 100,
"repetitions": 10,
"path": "hw"
}

5
benchmarks/benchmark-descriptors/submit-bench/submit-ssaw-1mib.json

@ -9,10 +9,11 @@
},
"task": {
"batch_size": 0,
"size": 1048576
"size": 1048576,
"reps": 100
}
}
],
"repetitions": 100,
"repetitions": 10,
"path": "hw"
}

5
benchmarks/benchmark-descriptors/submit-bench/submit-ssaw-4kib.json

@ -9,10 +9,11 @@
},
"task": {
"batch_size": 0,
"size": 4096
"size": 4096,
"reps": 1000
}
}
],
"repetitions": 100,
"repetitions": 10,
"path": "hw"
}

140
benchmarks/benchmark.cpp

@ -14,55 +14,109 @@
#define LOG_CODE_INFO "Location: " << __FILE__ << "@" << __LINE__ << "::" << __FUNCTION__ << std::endl
#define LOG_ERR { std::cerr << "--- BEGIN ERROR MSG ---" << std::endl << "Physical: [Node " << task->numa_node << " | Thread " << tid << "]" << std::endl; } std::cerr << LOG_CODE_INFO
#define CHECK_STATUS(stat,msg) { if (stat != dml::status_code::ok) { LOG_ERR << "Status Code: " << StatusCodeToString(stat) << std::endl << msg << std::endl; task->status = stat; return; }}
#define CHECK_STATUS(stat,msg) { if (stat != dml::status_code::ok) { LOG_ERR << "Status Code: " << StatusCodeToString(stat) << std::endl << msg << std::endl; return; }}
std::shared_future<void> LAUNCH_;
std::vector<uint64_t> ITERATION_TIMING_;
std::vector<void*> SOURCE_;
std::vector<void*> DESTINATION_;
std::vector<std::vector<void*>> SOURCE_;
std::vector<std::vector<void*>> DESTINATION_;
template <typename path>
void thread_function(const uint32_t tid, TaskData* task) {
dml::data_view srcv = dml::make_view(reinterpret_cast<uint8_t*>(SOURCE_[tid]), task->size);
dml::data_view dstv = dml::make_view(reinterpret_cast<uint8_t*>(DESTINATION_[tid]), task->size);
LAUNCH_.wait();
task->status = dml::status_code::ok;
for (uint32_t i = 0; i < task->rep_count; i++) {
dml::data_view srcv = dml::make_view(reinterpret_cast<uint8_t*>(SOURCE_[tid][i]), task->size);
dml::data_view dstv = dml::make_view(reinterpret_cast<uint8_t*>(DESTINATION_[tid][i]), task->size);
LAUNCH_.wait();
if (task->batch_size > 1) {
auto sequence = dml::sequence(task->batch_size, std::allocator<dml::byte_t>());
if (task->batch_size > 1) {
auto sequence = dml::sequence(task->batch_size, std::allocator<dml::byte_t>());
for (uint32_t j = 0; j < task->batch_size; j++) {
const auto status = sequence.add(dml::mem_copy, srcv, dstv);
CHECK_STATUS(status, "Adding operation to batch failed!");
}
for (uint32_t j = 0; j < task->batch_size; j++) {
const auto status = sequence.add(dml::mem_copy, srcv, dstv);
CHECK_STATUS(status, "Adding operation to batch failed!");
}
// we use the asynchronous submit-routine even though this is not required
// here, however the project later on will only use async operation and
// therefore this behaviour should be benchmarked
// we use the asynchronous submit-routine even though this is not required
// here, however the project later on will only use async operation and
// therefore this behaviour should be benchmarked
auto handler = dml::submit<path>(dml::batch, sequence, dml::execution_interface<path, std::allocator<dml::byte_t>>(), task->numa_node);
auto handler = dml::submit<path>(dml::batch, sequence, dml::execution_interface<path, std::allocator<dml::byte_t>>(), task->numa_node);
auto result = handler.get();
const dml::status_code status = result.status;
CHECK_STATUS(status, "Batch completed with an Error!");
}
else {
// we use the asynchronous submit-routine even though this is not required
// here, however the project later on will only use async operation and
// therefore this behaviour should be benchmarked
auto handler = dml::submit<path>(dml::mem_copy, srcv, dstv, dml::execution_interface<path, std::allocator<dml::byte_t>>(), task->numa_node);
auto result = handler.get();
auto result = handler.get();
const dml::status_code status = result.status;
CHECK_STATUS(status, "Batch completed with an Error!");
const dml::status_code status = result.status;
CHECK_STATUS(status, "Operation completed with an Error!");
}
}
else {
// we use the asynchronous submit-routine even though this is not required
// here, however the project later on will only use async operation and
// therefore this behaviour should be benchmarked
auto handler = dml::submit<path>(dml::mem_copy, srcv, dstv, dml::execution_interface<path, std::allocator<dml::byte_t>>(), task->numa_node);
}
template <typename path>
void flush_cache(std::vector<TaskData>& args) {
auto flush_container = [&args](std::vector<std::vector<void*>>& container) {
if (container.size() != args.size()) {
std::cerr << LOG_CODE_INFO << "Failed Clearing Cache due to size missmatch between tasks and entries!";
exit(-1);
}
for (uint32_t i = 0; i < args.size(); i++) {
for (auto ptr : container[i]) {
dml::data_view view = dml::make_view(reinterpret_cast<uint8_t*>(ptr), args[i].size);
auto result = dml::execute<path>(dml::cache_flush, view);
auto result = handler.get();
if (result.status != dml::status_code::ok) {
std::cerr << LOG_CODE_INFO << "Failed Clearing Cache!";
exit(-1);
}
}
}
};
flush_container(DESTINATION_);
flush_container(SOURCE_);
}
const dml::status_code status = result.status;
CHECK_STATUS(status, "Operation completed with an Error!");
void alloc_data_fields(std::vector<TaskData>& args) {
SOURCE_.resize(args.size());
DESTINATION_.resize(args.size());
for (uint32_t tid = 0; tid < args.size(); tid++) {
DESTINATION_[tid].resize(args[tid].rep_count);
SOURCE_[tid].resize(args[tid].rep_count);
for (uint32_t r = 0; r < args[tid].rep_count; r++) {
SOURCE_[tid][r] = numa_alloc_onnode(args[tid].size, args[tid].nnode_src);
DESTINATION_[tid][r] = numa_alloc_onnode(args[tid].size, args[tid].nnode_dst);
std::memset(SOURCE_[tid][r], 0xAB, args[tid].size);
std::memset(DESTINATION_[tid][r], 0xAB, args[tid].size);
}
}
}
void dealloc_data_fields(std::vector<TaskData>& args) {
for (uint32_t tid = 0; tid < args.size(); tid++) {
for (uint32_t r = 0; r < args[tid].rep_count; r++) {
numa_free(SOURCE_[tid][r], args[tid].size);
numa_free(DESTINATION_[tid][r], args[tid].size);
}
}
SOURCE_.clear();
DESTINATION_.clear();
}
template <typename path>
void execute_dml_memcpy(std::vector<TaskData>& args, const uint64_t iterations) {
// initialize numa library
@ -71,15 +125,7 @@ void execute_dml_memcpy(std::vector<TaskData>& args, const uint64_t iterations)
// initialize data fields for use
SOURCE_.resize(args.size());
DESTINATION_.resize(args.size());
for (uint32_t tid = 0; tid < args.size(); tid++) {
SOURCE_[tid] = numa_alloc_onnode(args[tid].size, args[tid].nnode_src);
DESTINATION_[tid] = numa_alloc_onnode(args[tid].size, args[tid].nnode_dst);
std::memset(SOURCE_[tid], 0xAB, args[tid].size);
std::memset(DESTINATION_[tid], 0xAB, args[tid].size);
}
alloc_data_fields(args);
// for each requested iteration this is repeated, plus 5 iterations as warmup
@ -88,22 +134,16 @@ void execute_dml_memcpy(std::vector<TaskData>& args, const uint64_t iterations)
std::promise<void> launch_promise;
LAUNCH_ = launch_promise.get_future();
for (uint32_t tid = 0; tid < args.size(); tid++) {
// we flush the cache for the memory regions to avoid any caching effects
dml::data_view srcv = dml::make_view(reinterpret_cast<uint8_t*>(SOURCE_[tid]), args[tid].size);
dml::data_view dstv = dml::make_view(reinterpret_cast<uint8_t*>(DESTINATION_[tid]), args[tid].size);
auto rsrc = dml::execute<path>(dml::cache_flush, srcv);
auto rdst = dml::execute<path>(dml::cache_flush, dstv);
TaskData* task = &args[tid];
CHECK_STATUS(rsrc.status, "Flushing Cache for Source failed!");
CHECK_STATUS(rdst.status, "Flushing Cache for Destination failed!");
// then spawn the thread
// we flush the cache for the memory regions to avoid any caching effects
flush_cache<path>(args);
// for each requested task we spawn a thread and pass the task description
// and the thread id for accessing per-thread source and data pointers
for (uint32_t tid = 0; tid < args.size(); tid++) {
threads.emplace_back(thread_function<path>, tid, &args[tid]);
}
// sleep shortly, hopefully after this all threads have reached the barrier
using namespace std::chrono_literals;
std::this_thread::sleep_for(1ms);
@ -117,4 +157,6 @@ void execute_dml_memcpy(std::vector<TaskData>& args, const uint64_t iterations)
if (i >= 5) ITERATION_TIMING_.emplace_back(std::chrono::duration_cast<std::chrono::nanoseconds>(time_end - time_start).count());
}
dealloc_data_fields(args);
}

7
benchmarks/util/task-data.hpp

@ -16,24 +16,21 @@ struct TaskData {
// repetition
uint32_t rep_count;
uint32_t batch_size;
// thread output
dml::status_code status;
// completed iterations
uint32_t rep_completed;
};
inline void to_json(nlohmann::json& j, const TaskData& a) {
j["task"]["size"] = a.size;
j["task"]["batch_size"] = a.batch_size;
j["task"]["reps"] = a.rep_count;
j["affinity"]["node"] = a.numa_node;
j["affinity"]["nnode_src"] = a.nnode_src;
j["affinity"]["nnode_dst"] = a.nnode_dst;
j["report"]["status"] = StatusCodeToString(a.status);
}
inline void from_json(const nlohmann::json& j, TaskData& a) {
j["task"]["size"].get_to(a.size);
j["task"]["batch_size"].get_to(a.batch_size);
j["task"]["reps"].get_to(a.rep_count);
j["affinity"]["node"].get_to(a.numa_node);
j["affinity"]["nnode_src"].get_to(a.nnode_src);
j["affinity"]["nnode_dst"].get_to(a.nnode_dst);

Loading…
Cancel
Save