This contains my bachelors thesis and associated tex files, code snippets and maybe more. Topic: Data Movement in Heterogeneous Memories with Intel Data Streaming Accelerator
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

252 lines
7.9 KiB

  1. #include <iostream>
  2. #include <random>
  3. #include <vector>
  4. #include <string>
  5. #include <omp.h>
  6. #include "cache.hpp"
  7. static constexpr size_t SIZE_64_MIB = 64 * 1024 * 1024;
  8. dsacache::Cache CACHE;
  9. void InitCache(const std::string& device) {
  10. if (device == "default") {
  11. auto cache_policy = [](const int numa_dst_node, const int numa_src_node, const size_t data_size) {
  12. return numa_dst_node;
  13. };
  14. auto copy_policy = [](const int numa_dst_node, const int numa_src_node, const size_t data_size) {
  15. return std::vector<int>{ numa_dst_node };
  16. };
  17. CACHE.Init(cache_policy,copy_policy);
  18. }
  19. else if (device == "xeonmax") {
  20. auto cache_policy = [](const int numa_dst_node, const int numa_src_node, const size_t data_size) {
  21. // xeon max is configured to have hbm on node ids that are +8
  22. return numa_dst_node < 8 ? numa_dst_node + 8 : numa_dst_node;
  23. };
  24. auto copy_policy = [](const int numa_dst_node, const int numa_src_node, const size_t data_size) {
  25. if (data_size < SIZE_64_MIB) {
  26. // if the data size is small then the copy will just be carried
  27. // out by the destination node which does not require setting numa
  28. // thread affinity as the selected dsa engine is already the one
  29. // present on the calling thread
  30. return std::vector<int>{ (numa_dst_node >= 8 ? numa_dst_node - 8 : numa_dst_node) };
  31. }
  32. else {
  33. // for sufficiently large data, smart copy is used which will utilize
  34. // all four engines for intra-socket copy operations and cross copy on
  35. // the source and destination nodes for inter-socket copy
  36. const bool same_socket = ((numa_dst_node ^ numa_src_node) & 4) == 0;
  37. if (same_socket) {
  38. const bool socket_number = numa_dst_node >> 2;
  39. if (socket_number == 0) return std::vector<int>{ 0, 1, 2, 3 };
  40. else return std::vector<int>{ 4, 5, 6, 7 };
  41. }
  42. else {
  43. return std::vector<int>{
  44. (numa_src_node >= 8 ? numa_src_node - 8 : numa_src_node),
  45. (numa_dst_node >= 8 ? numa_dst_node - 8 : numa_dst_node)
  46. };
  47. }
  48. }
  49. };
  50. CACHE.Init(cache_policy,copy_policy);
  51. }
  52. else {
  53. std::cerr << "Given device '" << device << "' not supported!" << std::endl;
  54. exit(-1);
  55. }
  56. }
  57. uint8_t* GetRandomArray(const size_t size) {
  58. uint8_t* array = new uint8_t[size];
  59. std::uniform_int_distribution<uint8_t> unif(std::numeric_limits<uint8_t>::min(), std::numeric_limits<uint8_t>::max());
  60. std::default_random_engine re;
  61. for (size_t i = 0; i < size; i++) {
  62. array[i] = unif(re);
  63. }
  64. return array;
  65. }
  66. bool IsEqual(const uint8_t* a, const uint8_t* b, const size_t size) {
  67. for (size_t i = 0; i < size; i++) {
  68. try {
  69. if (a[i] != b[i]) return false;
  70. }
  71. catch (...) {
  72. return false;
  73. }
  74. }
  75. return true;
  76. }
  77. std::unique_ptr<dsacache::CacheData> PerformAccessAndTest(uint8_t* src, const size_t size, const int tid) {
  78. std::unique_ptr<dsacache::CacheData> data_cache = CACHE.Access(
  79. reinterpret_cast<uint8_t *>(src),
  80. size * sizeof(uint8_t)
  81. );
  82. uint8_t* cached_imm = reinterpret_cast<uint8_t *>(data_cache->GetDataLocation());
  83. // check the value immediately just to see if ram or cache was returned
  84. if (src == cached_imm) {
  85. std::cout << "[" << tid << "] Caching did not immediately yield different data location." << std::endl;
  86. }
  87. else if (cached_imm == nullptr) {
  88. std::cout << "[" << tid << "] Immediately got nullptr." << std::endl;
  89. }
  90. else {
  91. std::cout << "[" << tid << "] Immediately got different data location." << std::endl;
  92. }
  93. // waits for the completion of the asynchronous caching operation
  94. data_cache->WaitOnCompletion();
  95. // gets the cache-data-location from the struct
  96. uint8_t* cached = reinterpret_cast<uint8_t *>(data_cache->GetDataLocation());
  97. // tests on the resulting value
  98. if (src == cached) {
  99. std::cout << "[" << tid << "] Caching did not affect data location." << std::endl;
  100. }
  101. else if (cached == nullptr) {
  102. std::cerr << "[" << tid << "] Got nullptr from cache." << std::endl;
  103. }
  104. else {
  105. std::cout << "[" << tid << "] Got different data location from cache." << std::endl;
  106. }
  107. if (IsEqual(src,cached,size)) {
  108. std::cout << "[" << tid << "] Cached data is correct." << std::endl;
  109. }
  110. else {
  111. std::cerr << "[" << tid << "] Cached data is wrong." << std::endl;
  112. }
  113. return std::move(data_cache);
  114. }
  115. void RunTestST(const size_t size) {
  116. uint8_t* data = GetRandomArray(size);
  117. static constexpr int tid = 0;
  118. std::cout << "[" << tid << "] first access --- " << std::endl;
  119. PerformAccessAndTest(data, size, tid);
  120. std::cout << "[" << tid << "] second access --- " << std::endl;
  121. PerformAccessAndTest(data, size, tid);
  122. std::cout << "[" << tid << "] end of application --- " << std::endl;
  123. }
  124. void RunTestMT(const size_t size) {
  125. uint8_t* data = GetRandomArray(size);
  126. #pragma omp parallel
  127. {
  128. const int tid = omp_get_thread_num();
  129. std::cout << "[" << tid << "] first access --- " << std::endl;
  130. PerformAccessAndTest(data, size, tid);
  131. std::cout << "[" << tid << "] second access --- " << std::endl;
  132. PerformAccessAndTest(data, size, tid);
  133. std::cout << "[" << tid << "] end of block --- " << std::endl;
  134. }
  135. }
  136. void RunTestFlush(const size_t size) {
  137. uint8_t* data1 = GetRandomArray(size);
  138. uint8_t* data2 = GetRandomArray(size);
  139. uint8_t* data3 = GetRandomArray(size);
  140. static constexpr int tid = 0;
  141. std::cout << "[" << tid << "] first access to data d1 and keepalive --- " << std::endl;
  142. const auto c1 = PerformAccessAndTest(data1, size, tid);
  143. std::cout << "[" << tid << "] second access to d2 lets d2 vanish --- " << std::endl;
  144. PerformAccessAndTest(data2, size, tid);
  145. std::cout << "[" << tid << "] third access to d3 should clear d2 --- " << std::endl;
  146. PerformAccessAndTest(data3, size, tid);
  147. std::cout << "[" << tid << "] end of block and test d1 == cache1 --- " << std::endl;
  148. if (IsEqual(data1, c1->GetDataLocation(), size)) {
  149. std::cout << "[" << tid << "] Cached d1 is still correct." << std::endl;
  150. }
  151. else {
  152. std::cerr << "[" << tid << "] Cached d1 is bad." << std::endl;
  153. }
  154. }
  155. int main(int argc, char **argv) {
  156. if (argc != 4) {
  157. std::cerr << "This application requires three parameters!" << std::endl;
  158. std::cout << "Please provide the following positional arguments: [device] [mode] [size]" << std::endl;
  159. std::cout << "[device] from { default, xeonmax } which influences cache and execution placement" << std::endl;
  160. std::cout << "[mode] from { st, mt, flt } or single and multi threaded and flushtest respectively" << std::endl;
  161. std::cout << "[size] positive integral number, amount of bytes in data array" << std::endl;
  162. std::cout << "for flushtest the given size should be 1/3 of the available cache size" << std::endl;
  163. exit(-1);
  164. }
  165. const std::string device = argv[1];
  166. const std::string mode = argv[2];
  167. const std::string size_s = argv[3];
  168. uint32_t size = 0;
  169. try {
  170. size = std::stoul(size_s);
  171. }
  172. catch (...) {
  173. std::cerr << "Given Size '" << size_s << "' caused error during conversion to number!" << std::endl;
  174. }
  175. InitCache(device);
  176. if (mode == "st") {
  177. RunTestST(size);
  178. }
  179. else if (mode == "mt") {
  180. RunTestMT(size);
  181. }
  182. else if (mode == "flt") {
  183. RunTestFlush(size);
  184. }
  185. else {
  186. std::cerr << "Given Mode '" << mode << "' not supported!" << std::endl;
  187. exit(-1);
  188. }
  189. }