@misc{intel:dsaspec, author = {Intel}, title = {{Intel® Data Streaming Accelerator Architecture Specification}}, date = {2022-09-16}, publisher = {Intel}, howpublished = {\url{https://www.intel.com/content/www/us/en/content-details/671116/intel-data-streaming-accelerator-architecture-specification.html}}, urldate = {2023-11-15} } @misc{intel:xeonbrief, author = {Intel}, title = {{New Intel® Xeon® Platform Includes Built-In Accelerators for Encryption, Compression, and Data Movement}}, date = {2022-12}, publisher = {Intel}, howpublished = {\url{https://www.intel.com/content/dam/www/central-libraries/us/en/documents/2022-12/storage-engines-4th-gen-xeon-brief.pdf}}, urldate = {2023-11-15} } @misc{intel:xeonmaxbrief, author = {Intel}, title = {{Intel® Xeon® CPU Max Series Product Brief}}, date = {2023-01-06}, publisher = {Intel}, howpublished = {\url{https://www.intel.com/content/www/us/en/content-details/765259/intel-xeon-cpu-max-series-product-brief.html}}, urldate = {2024-01-18} } @misc{intel:dsaguide, author = {Intel}, title = {{Intel® Data Streaming Accelerator User Guide}}, date = {2023-01-11}, publisher = {Intel}, howpublished = {\url{https://www.intel.com/content/www/us/en/content-details/759709/intel-data-streaming-accelerator-user-guide.html}}, urldate = {2023-11-15} } @misc{intel:idxd-driver-repo, author = {Intel}, title = {{Intel IDXD Driver for Linux Kernel}}, publisher = {GitHub}, howpublished = {\url{https://github.com/intel/idxd-driver}}, urldate = {2024-01-07} } @misc{intel:libaccel-config-repo, author = {Intel}, title = {{Intel IDXD User Space Application}}, publisher = {GitHub}, howpublished = {\url{https://github.com/intel/idxd-config}}, urldate = {2024-01-07} } @misc{intel:dmldoc, author = {Intel}, title = {{Intel Data Mover Library Documentation}}, publisher = {GitHub}, howpublished = {\url{https://intel.github.io/DML/documentation/api_docs/high_level_api.html}}, urldate = {2024-01-07} } @ARTICLE{intel:analysis, author = {Reese Kuper and Ipoom Jeong and Yifan Yuan and Jiayu Hu and Ren Wang and Narayan Ranganathan and Nam Sung Kim}, title = {{A Quantitative Analysis and Guideline of Data Streaming Accelerator in Intel® 4th Gen Xeon® Scalable Processors}}, date = {2023-05}, doi = {10.48550/arXiv.2305.02480} } @INPROCEEDINGS{atomics-cost-analysis, author={Schweizer, Hermann and Besta, Maciej and Hoefler, Torsten}, booktitle={{2015 International Conference on Parallel Architecture and Compilation (PACT)}}, title={{Evaluating the Cost of Atomic Operations on Modern Architectures}}, year={2015}, pages={445-456}, doi={10.1109/PACT.2015.24} } @INPROCEEDINGS{shared-ptr-perf, author={T. Ku and N. Jung}, booktitle={{Journal of Korea Game Society}}, title={{Implementation of Lock-Free shared\_ptr and weak\_ptr for C++11 multi-thread programming}}, year={2021}, volume={21}, number={1}, pages={55-65}, date = {2021-02-28}, doi={10.7583/jkgs.2021.21.1.55.} } @misc{cppreference:shared-ptr, author = {cppreference.com}, title = {{CPP Reference Entry on std::shared\_ptr}}, publisher = {cppreference}, howpublished = {\url{https://en.cppreference.com/w/cpp/memory/shared_ptr}}, urldate = {2024-01-17} } @misc{cppreference:atomic-wait, author = {cppreference.com}, title = {{CPP Reference Entry on std::atomic::wait}}, publisher = {cppreference}, howpublished = {\url{https://en.cppreference.com/w/cpp/atomic/atomic/wait}}, urldate = {2024-01-18} } @misc{cppreference:atomic-notify-one, author = {cppreference.com}, title = {{CPP Reference Entry on std::atomic::notify\_one}}, publisher = {cppreference}, howpublished = {\url{https://en.cppreference.com/w/cpp/atomic/atomic/notify_one}}, urldate = {2024-01-18} } @misc{cppreference:atomic-notify-all, author = {cppreference.com}, title = {{CPP Reference Entry on std::atomic::notify\_all}}, publisher = {cppreference}, howpublished = {\url{https://en.cppreference.com/w/cpp/atomic/atomic/notify_all}}, urldate = {2024-01-18} } @misc{cppreference:atomic-exchange, author = {cppreference.com}, title = {{CPP Reference Entry on std::atomic::exchange}}, publisher = {cppreference}, howpublished = {\url{https://en.cppreference.com/w/cpp/atomic/atomic/exchange}}, urldate = {2024-01-18} } @ARTICLE{atomic-wait-details, author = {Thomas Rodgers}, title = {{Implementing C++20 atomic waiting in libstdc++}}, publisher = {Red Hat Developer Blog}, date = {2022-12-06}, urldate = {2024-01-18}, howpublished = {\url{https://developers.redhat.com/articles/2022/12/06/implementing-c20-atomic-waiting-libstdc}} } @INPROCEEDINGS{hbm-arch-paper, author={Jun, Hongshin and Cho, Jinhee and Lee, Kangseol and Son, Ho-Young and Kim, Kwiwook and Jin, Hanho and Kim, Keith}, booktitle={2017 IEEE International Memory Workshop (IMW)}, title={{HBM (High Bandwidth Memory) DRAM Technology and Architecture}}, year={2017}, pages={1-4}, doi={10.1109/IMW.2017.7939084} } @misc{man-libnuma, author = {Debian}, publisher = {Debian}, title = {{Debian manpage 3 for libnuma-dev}}, urldate = {2024-01-21}, howpublished = {\url{https://manpages.debian.org/bookworm/libnuma-dev/numa.3.en.html}} } @misc{lenovo:dsa, author = {Adrian Huang}, publisher = {Lenovo}, title = {{Enabling Intel Data Streaming Accelerator on Lenovo ThinkSystem Servers}}, urldate = {2022-04-18}, howpublished = {\url{https://lenovopress.lenovo.com/lp1582.pdf}} } @misc{thesis-repo, author = {Anatol Constantin Fürst}, publisher = {Anatol Constantin Fürst}, title = {{Accompanying Thesis Repository}}, howpublished = {\url{https://git.constantin-fuerst.com/constantin/bachelor-thesis}} } @misc{lenovo:hbm, author = {Sam Kuo, Jimmy Cheng}, publisher = {Lenovo}, title = {{Implementing High Bandwidth Memory and Intel Xeon Processors Max Series on Lenovo ThinkSystem Servers}}, date = {2023-06-26}, howpublished = {\url{https://lenovopress.lenovo.com/lp1738.pdf}}, urldate = {2024-01-21} } @misc{intel:maxtuning, author = {Intel}, publisher = {Intel}, title = {{Intel® Xeon® CPU Max Series Configuration and Tuning Guide}}, date = {2023-08}, howpublished = {\url{https://cdrdv2-public.intel.com/787743/354227-intel-xeon-cpu-max-series-configuration-and-tuning-guide-rev3.pdf}}, urldate = {2024-01-21} } @misc{dimes-prefetching, author = {André Berthold and Anna Bartuschka and Dirk Habich and Wolfgang Lehner and Horst Schirmeier}, title = {{Towards Query-Driven Prefetching to Optimize Data Pipelines in Heterogeneous Memory Systems}}, date = {2023}, howpublished = "unpublished" } @misc{microsoft:numa-malloc, publisher = {Microsoft}, title = {{Allocating Memory from a NUMA Node}}, date = {2021-07-01}, howpublished = {\url{https://learn.microsoft.com/en-us/windows/win32/memory/allocating-memory-from-a-numa-node}}, urldate = {2024-01-28} } @misc{kingston:ddr5-spec-overview, author = {Kingston}, title = {{DDR5 memory standard: An introduction to the next generation of DRAM module technology}}, date = {2024-01}, howpublished = {\url{https://www.kingston.com/en/blog/pc-performance/ddr5-overview}}, urldate = {2024-02-04} } @ARTICLE{bench:heterogeneous-communication, author={Thune, Andreas and Reinemo, Sven-Arne and Skeie, Tor and Cai, Xing}, journal={IEEE Transactions on Parallel and Distributed Systems}, title={{Detailed Modeling of Heterogeneous and Contention-Constrained Point-to-Point MPI Communication}}, year={2023}, volume={34}, number={5}, pages={1580-1593}, keywords={Bandwidth;Sockets;Benchmark testing;Size measurement;Protocols;Multicore processing;Computational modeling;Intra-node communication;performance modeling;point-to-point MPI communication}, doi={10.1109/TPDS.2023.3253881} } @misc{xeonmax-peakthroughput, author = "André Berthold and Anna Bartuschka", title={{Throughput Benchmarks for CPU}}, date = "2023", howpublished = "personal communication" } @misc{dwcas-cpp, author = {Timur Doumler}, title = {{DWCAS in C++}}, date = {2022-03-31}, howpublished = {\url{https://timur.audio/dwcas-in-c}}, urldate = {2024-02-07} } @misc{amd:hbmoverview, author = {AMD}, title = {{High-Bandwidth Memory (HBM)}}, urldate = {2024-02-14}, howpublished = {\url{https://www.amd.com/system/files/documents/high-bandwidth-memory-hbm.pdf}} } @article{virtual-memory, author = {Peter J. Denning}, title = {{Virtual Memory}}, date = {1996-03}, publisher = {Association for Computing Machinery}, volume = {28}, number = {1}, doi = {10.1145/234313.234403}, journal = {ACM Computing Surveys}, pages = {213–216}, numpages = {4} } @misc{intel:xeonmax-ark, author = {Intel}, title = {{Intel® Xeon® CPU Max 9468 Processor}}, urldate = {2024-02-14}, howpublished = {\url{https://ark.intel.com/content/www/us/en/ark/products/232596/intel-xeon-cpu-max-9468-processor-105m-cache-2-10-ghz.html}} }