This contains my bachelors thesis and associated tex files, code snippets and maybe more. Topic: Data Movement in Heterogeneous Memories with Intel Data Streaming Accelerator
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

254 lines
8.9 KiB

  1. @misc{intel:dsaspec,
  2. author = {Intel},
  3. title = {{Intel® Data Streaming Accelerator Architecture Specification}},
  4. date = {2022-09-16},
  5. publisher = {Intel},
  6. howpublished = {\url{https://www.intel.com/content/www/us/en/content-details/671116/intel-data-streaming-accelerator-architecture-specification.html}},
  7. urldate = {2023-11-15}
  8. }
  9. @misc{intel:xeonbrief,
  10. author = {Intel},
  11. title = {{New Intel® Xeon® Platform Includes Built-In Accelerators for Encryption, Compression, and Data Movement}},
  12. date = {2022-12},
  13. publisher = {Intel},
  14. howpublished = {\url{https://www.intel.com/content/dam/www/central-libraries/us/en/documents/2022-12/storage-engines-4th-gen-xeon-brief.pdf}},
  15. urldate = {2023-11-15}
  16. }
  17. @misc{intel:xeonmaxbrief,
  18. author = {Intel},
  19. title = {{Intel® Xeon® CPU Max Series Product Brief}},
  20. date = {2023-01-06},
  21. publisher = {Intel},
  22. howpublished = {\url{https://www.intel.com/content/www/us/en/content-details/765259/intel-xeon-cpu-max-series-product-brief.html}},
  23. urldate = {2024-01-18}
  24. }
  25. @misc{intel:dsaguide,
  26. author = {Intel},
  27. title = {{Intel® Data Streaming Accelerator User Guide}},
  28. date = {2023-01-11},
  29. publisher = {Intel},
  30. howpublished = {\url{https://www.intel.com/content/www/us/en/content-details/759709/intel-data-streaming-accelerator-user-guide.html}},
  31. urldate = {2023-11-15}
  32. }
  33. @misc{intel:idxd-driver-repo,
  34. author = {Intel},
  35. title = {{Intel IDXD Driver for Linux Kernel}},
  36. publisher = {GitHub},
  37. howpublished = {\url{https://github.com/intel/idxd-driver}},
  38. urldate = {2024-01-07}
  39. }
  40. @misc{intel:libaccel-config-repo,
  41. author = {Intel},
  42. title = {{Intel IDXD User Space Application}},
  43. publisher = {GitHub},
  44. howpublished = {\url{https://github.com/intel/idxd-config}},
  45. urldate = {2024-01-07}
  46. }
  47. @misc{intel:dmldoc,
  48. author = {Intel},
  49. title = {{Intel Data Mover Library Documentation}},
  50. publisher = {GitHub},
  51. howpublished = {\url{https://intel.github.io/DML/documentation/api_docs/high_level_api.html}},
  52. urldate = {2024-01-07}
  53. }
  54. @ARTICLE{intel:analysis,
  55. author = {Reese Kuper and Ipoom Jeong and Yifan Yuan and Jiayu Hu and Ren Wang and Narayan Ranganathan and Nam Sung Kim},
  56. title = {{A Quantitative Analysis and Guideline of Data Streaming Accelerator in Intel® 4th Gen Xeon® Scalable Processors}},
  57. date = {2023-05},
  58. doi = {10.48550/arXiv.2305.02480}
  59. }
  60. @INPROCEEDINGS{atomics-cost-analysis,
  61. author={Schweizer, Hermann and Besta, Maciej and Hoefler, Torsten},
  62. booktitle={{2015 International Conference on Parallel Architecture and Compilation (PACT)}},
  63. title={{Evaluating the Cost of Atomic Operations on Modern Architectures}},
  64. year={2015},
  65. pages={445-456},
  66. doi={10.1109/PACT.2015.24}
  67. }
  68. @INPROCEEDINGS{shared-ptr-perf,
  69. author={T. Ku and N. Jung},
  70. booktitle={{Journal of Korea Game Society}},
  71. title={{Implementation of Lock-Free shared\_ptr and weak\_ptr for C++11 multi-thread programming}},
  72. year={2021},
  73. volume={21},
  74. number={1},
  75. pages={55-65},
  76. date = {2021-02-28},
  77. doi={10.7583/jkgs.2021.21.1.55.}
  78. }
  79. @misc{cppreference:shared-ptr,
  80. author = {cppreference.com},
  81. title = {{CPP Reference Entry on std::shared\_ptr<T>}},
  82. publisher = {cppreference},
  83. howpublished = {\url{https://en.cppreference.com/w/cpp/memory/shared_ptr}},
  84. urldate = {2024-01-17}
  85. }
  86. @misc{cppreference:atomic-wait,
  87. author = {cppreference.com},
  88. title = {{CPP Reference Entry on std::atomic<T>::wait}},
  89. publisher = {cppreference},
  90. howpublished = {\url{https://en.cppreference.com/w/cpp/atomic/atomic/wait}},
  91. urldate = {2024-01-18}
  92. }
  93. @misc{cppreference:atomic-notify-one,
  94. author = {cppreference.com},
  95. title = {{CPP Reference Entry on std::atomic<T>::notify\_one}},
  96. publisher = {cppreference},
  97. howpublished = {\url{https://en.cppreference.com/w/cpp/atomic/atomic/notify_one}},
  98. urldate = {2024-01-18}
  99. }
  100. @misc{cppreference:atomic-notify-all,
  101. author = {cppreference.com},
  102. title = {{CPP Reference Entry on std::atomic<T>::notify\_all}},
  103. publisher = {cppreference},
  104. howpublished = {\url{https://en.cppreference.com/w/cpp/atomic/atomic/notify_all}},
  105. urldate = {2024-01-18}
  106. }
  107. @ARTICLE{atomic-wait-details,
  108. author = {Thomas Rodgers},
  109. title = {{Implementing C++20 atomic waiting in libstdc++}},
  110. publisher = {Red Hat Developer Blog},
  111. date = {2022-12-06},
  112. urldate = {2024-01-18},
  113. howpublished = {\url{https://developers.redhat.com/articles/2022/12/06/implementing-c20-atomic-waiting-libstdc}}
  114. }
  115. @INPROCEEDINGS{hbm-arch-paper,
  116. author={Jun, Hongshin and Cho, Jinhee and Lee, Kangseol and Son, Ho-Young and Kim, Kwiwook and Jin, Hanho and Kim, Keith},
  117. booktitle={2017 IEEE International Memory Workshop (IMW)},
  118. title={{HBM (High Bandwidth Memory) DRAM Technology and Architecture}},
  119. year={2017},
  120. pages={1-4},
  121. doi={10.1109/IMW.2017.7939084}
  122. }
  123. @misc{man-libnuma,
  124. author = {Debian},
  125. publisher = {Debian},
  126. title = {{Debian manpage 3 for libnuma-dev}},
  127. urldate = {2024-01-21},
  128. howpublished = {\url{https://manpages.debian.org/bookworm/libnuma-dev/numa.3.en.html}}
  129. }
  130. @misc{lenovo:dsa,
  131. author = {Adrian Huang},
  132. publisher = {Lenovo},
  133. title = {{Enabling Intel Data Streaming Accelerator on Lenovo ThinkSystem Servers}},
  134. urldate = {2022-04-18},
  135. howpublished = {\url{https://lenovopress.lenovo.com/lp1582.pdf}}
  136. }
  137. @misc{thesis-repo,
  138. author = {Anatol Constantin Fürst},
  139. publisher = {Anatol Constantin Fürst},
  140. title = {{Accompanying Thesis Repository}},
  141. howpublished = {\url{https://git.constantin-fuerst.com/constantin/bachelor-thesis}}
  142. }
  143. @misc{lenovo:hbm,
  144. author = {Sam Kuo, Jimmy Cheng},
  145. publisher = {Lenovo},
  146. title = {{Implementing High Bandwidth Memory and Intel Xeon Processors Max Series on Lenovo ThinkSystem Servers}},
  147. date = {2023-06-26},
  148. howpublished = {\url{https://lenovopress.lenovo.com/lp1738.pdf}},
  149. urldate = {2024-01-21}
  150. }
  151. @misc{intel:maxtuning,
  152. author = {Intel},
  153. publisher = {Intel},
  154. title = {{Intel® Xeon® CPU Max Series Configuration and Tuning Guide}},
  155. date = {2023-08},
  156. howpublished = {\url{https://cdrdv2-public.intel.com/787743/354227-intel-xeon-cpu-max-series-configuration-and-tuning-guide-rev3.pdf}},
  157. urldate = {2024-01-21}
  158. }
  159. @misc{dimes-prefetching,
  160. author = {André Berthold and Anna Bartuschka and Dirk Habich and Wolfgang Lehner and Horst Schirmeier},
  161. title = {{Towards Query-Driven Prefetching to Optimize Data Pipelines in Heterogeneous Memory Systems}},
  162. date = {2023},
  163. howpublished = "unpublished"
  164. }
  165. @misc{microsoft:numa-malloc,
  166. publisher = {Microsoft},
  167. title = {{Allocating Memory from a NUMA Node}},
  168. date = {2021-07-01},
  169. howpublished = {\url{https://learn.microsoft.com/en-us/windows/win32/memory/allocating-memory-from-a-numa-node}},
  170. urldate = {2024-01-28}
  171. }
  172. @misc{kingston:ddr5-spec-overview,
  173. author = {Kingston},
  174. title = {{DDR5 memory standard: An introduction to the next generation of DRAM module technology}},
  175. date = {2024-01},
  176. howpublished = {\url{https://www.kingston.com/en/blog/pc-performance/ddr5-overview}},
  177. urldate = {2024-02-04}
  178. }
  179. @ARTICLE{bench:heterogeneous-communication,
  180. author={Thune, Andreas and Reinemo, Sven-Arne and Skeie, Tor and Cai, Xing},
  181. journal={IEEE Transactions on Parallel and Distributed Systems},
  182. title={{Detailed Modeling of Heterogeneous and Contention-Constrained Point-to-Point MPI Communication}},
  183. year={2023},
  184. volume={34},
  185. number={5},
  186. pages={1580-1593},
  187. keywords={Bandwidth;Sockets;Benchmark testing;Size measurement;Protocols;Multicore processing;Computational modeling;Intra-node communication;performance modeling;point-to-point MPI communication},
  188. doi={10.1109/TPDS.2023.3253881}
  189. }
  190. @misc{xeonmax-peakthroughput,
  191. author = "André Berthold and Anna Bartuschka",
  192. title={{Throughput Benchmarks for CPU}},
  193. date = "2023",
  194. howpublished = "personal communication"
  195. }
  196. @misc{dwcas-cpp,
  197. author = {Timur Doumler},
  198. title = {{DWCAS in C++}},
  199. date = {2022-03-31},
  200. howpublished = {\url{https://timur.audio/dwcas-in-c}},
  201. urldate = {2024-02-07}
  202. }
  203. @misc{amd:hbmoverview,
  204. author = {AMD},
  205. title = {{High-Bandwidth Memory (HBM)}},
  206. urldate = {2024-02-14},
  207. howpublished = {\url{https://www.amd.com/system/files/documents/high-bandwidth-memory-hbm.pdf}}
  208. }
  209. @article{virtual-memory,
  210. author = {Peter J. Denning},
  211. title = {{Virtual Memory}},
  212. date = {1996-03},
  213. publisher = {Association for Computing Machinery},
  214. volume = {28},
  215. number = {1},
  216. doi = {10.1145/234313.234403},
  217. journal = {ACM Computing Surveys},
  218. pages = {213–216},
  219. numpages = {4}
  220. }
  221. @misc{intel:xeonmax-ark,
  222. author = {Intel},
  223. title = {{Intel® Xeon® CPU Max 9468 Processor}},
  224. urldate = {2024-02-14},
  225. howpublished = {\url{https://ark.intel.com/content/www/us/en/ark/products/232596/intel-xeon-cpu-max-9468-processor-105m-cache-2-10-ghz.html}}
  226. }