You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
263 lines
9.2 KiB
263 lines
9.2 KiB
@misc{intel:dsaspec,
|
|
author = {Intel},
|
|
title = {{Intel® Data Streaming Accelerator Architecture Specification}},
|
|
date = {2022-09-16},
|
|
publisher = {Intel},
|
|
howpublished = {\url{https://www.intel.com/content/www/us/en/content-details/671116/intel-data-streaming-accelerator-architecture-specification.html}},
|
|
urldate = {2023-11-15}
|
|
}
|
|
|
|
@misc{intel:xeonbrief,
|
|
author = {Intel},
|
|
title = {{New Intel® Xeon® Platform Includes Built-In Accelerators for Encryption, Compression, and Data Movement}},
|
|
date = {2022-12},
|
|
publisher = {Intel},
|
|
howpublished = {\url{https://www.intel.com/content/dam/www/central-libraries/us/en/documents/2022-12/storage-engines-4th-gen-xeon-brief.pdf}},
|
|
urldate = {2023-11-15}
|
|
}
|
|
|
|
@misc{intel:xeonmaxbrief,
|
|
author = {Intel},
|
|
title = {{Intel® Xeon® CPU Max Series Product Brief}},
|
|
date = {2023-01-06},
|
|
publisher = {Intel},
|
|
howpublished = {\url{https://www.intel.com/content/www/us/en/content-details/765259/intel-xeon-cpu-max-series-product-brief.html}},
|
|
urldate = {2024-01-18}
|
|
}
|
|
|
|
@misc{intel:dsaguide,
|
|
author = {Intel},
|
|
title = {{Intel® Data Streaming Accelerator User Guide}},
|
|
date = {2023-01-11},
|
|
publisher = {Intel},
|
|
howpublished = {\url{https://www.intel.com/content/www/us/en/content-details/759709/intel-data-streaming-accelerator-user-guide.html}},
|
|
urldate = {2023-11-15}
|
|
}
|
|
|
|
@misc{intel:idxd-driver-repo,
|
|
author = {Intel},
|
|
title = {{Intel IDXD Driver for Linux Kernel}},
|
|
publisher = {GitHub},
|
|
howpublished = {\url{https://github.com/intel/idxd-driver}},
|
|
urldate = {2024-01-07}
|
|
}
|
|
|
|
@misc{intel:libaccel-config-repo,
|
|
author = {Intel},
|
|
title = {{Intel IDXD User Space Application}},
|
|
publisher = {GitHub},
|
|
howpublished = {\url{https://github.com/intel/idxd-config}},
|
|
urldate = {2024-01-07}
|
|
}
|
|
|
|
@misc{intel:dmldoc,
|
|
author = {Intel},
|
|
title = {{Intel Data Mover Library Documentation}},
|
|
publisher = {GitHub},
|
|
howpublished = {\url{https://intel.github.io/DML/documentation/api_docs/high_level_api.html}},
|
|
urldate = {2024-01-07}
|
|
}
|
|
|
|
@ARTICLE{intel:analysis,
|
|
author = {Reese Kuper and Ipoom Jeong and Yifan Yuan and Jiayu Hu and Ren Wang and Narayan Ranganathan and Nam Sung Kim},
|
|
title = {{A Quantitative Analysis and Guideline of Data Streaming Accelerator in Intel® 4th Gen Xeon® Scalable Processors}},
|
|
date = {2023-05},
|
|
doi = {10.48550/arXiv.2305.02480}
|
|
}
|
|
|
|
@INPROCEEDINGS{atomics-cost-analysis,
|
|
author={Schweizer, Hermann and Besta, Maciej and Hoefler, Torsten},
|
|
booktitle={{2015 International Conference on Parallel Architecture and Compilation (PACT)}},
|
|
title={{Evaluating the Cost of Atomic Operations on Modern Architectures}},
|
|
year={2015},
|
|
pages={445-456},
|
|
doi={10.1109/PACT.2015.24}
|
|
}
|
|
|
|
@INPROCEEDINGS{shared-ptr-perf,
|
|
author={T. Ku and N. Jung},
|
|
booktitle={{Journal of Korea Game Society}},
|
|
title={{Implementation of Lock-Free shared\_ptr and weak\_ptr for C++11 multi-thread programming}},
|
|
year={2021},
|
|
volume={21},
|
|
number={1},
|
|
pages={55-65},
|
|
date = {2021-02-28},
|
|
doi={10.7583/jkgs.2021.21.1.55.}
|
|
}
|
|
|
|
@misc{cppreference:shared-ptr,
|
|
author = {cppreference.com},
|
|
title = {{CPP Reference Entry on std::shared\_ptr<T>}},
|
|
publisher = {cppreference},
|
|
howpublished = {\url{https://en.cppreference.com/w/cpp/memory/shared_ptr}},
|
|
urldate = {2024-01-17}
|
|
}
|
|
|
|
@misc{cppreference:atomic-wait,
|
|
author = {cppreference.com},
|
|
title = {{CPP Reference Entry on std::atomic<T>::wait}},
|
|
publisher = {cppreference},
|
|
howpublished = {\url{https://en.cppreference.com/w/cpp/atomic/atomic/wait}},
|
|
urldate = {2024-01-18}
|
|
}
|
|
|
|
@misc{cppreference:atomic-notify-one,
|
|
author = {cppreference.com},
|
|
title = {{CPP Reference Entry on std::atomic<T>::notify\_one}},
|
|
publisher = {cppreference},
|
|
howpublished = {\url{https://en.cppreference.com/w/cpp/atomic/atomic/notify_one}},
|
|
urldate = {2024-01-18}
|
|
}
|
|
|
|
@misc{cppreference:atomic-notify-all,
|
|
author = {cppreference.com},
|
|
title = {{CPP Reference Entry on std::atomic<T>::notify\_all}},
|
|
publisher = {cppreference},
|
|
howpublished = {\url{https://en.cppreference.com/w/cpp/atomic/atomic/notify_all}},
|
|
urldate = {2024-01-18}
|
|
}
|
|
|
|
@misc{cppreference:atomic-exchange,
|
|
author = {cppreference.com},
|
|
title = {{CPP Reference Entry on std::atomic<T>::exchange}},
|
|
publisher = {cppreference},
|
|
howpublished = {\url{https://en.cppreference.com/w/cpp/atomic/atomic/exchange}},
|
|
urldate = {2024-01-18}
|
|
}
|
|
|
|
@ARTICLE{atomic-wait-details,
|
|
author = {Thomas Rodgers},
|
|
title = {{Implementing C++20 atomic waiting in libstdc++}},
|
|
publisher = {Red Hat Developer Blog},
|
|
date = {2022-12-06},
|
|
urldate = {2024-01-18},
|
|
howpublished = {\url{https://developers.redhat.com/articles/2022/12/06/implementing-c20-atomic-waiting-libstdc}}
|
|
}
|
|
|
|
@INPROCEEDINGS{hbm-arch-paper,
|
|
author={Jun, Hongshin and Cho, Jinhee and Lee, Kangseol and Son, Ho-Young and Kim, Kwiwook and Jin, Hanho and Kim, Keith},
|
|
booktitle={2017 IEEE International Memory Workshop (IMW)},
|
|
title={{HBM (High Bandwidth Memory) DRAM Technology and Architecture}},
|
|
year={2017},
|
|
pages={1-4},
|
|
doi={10.1109/IMW.2017.7939084}
|
|
}
|
|
|
|
@misc{man-libnuma,
|
|
author = {Debian},
|
|
publisher = {Debian},
|
|
title = {{Debian manpage 3 for libnuma-dev}},
|
|
urldate = {2024-01-21},
|
|
howpublished = {\url{https://manpages.debian.org/bookworm/libnuma-dev/numa.3.en.html}}
|
|
}
|
|
|
|
@misc{lenovo:dsa,
|
|
author = {Adrian Huang},
|
|
publisher = {Lenovo},
|
|
title = {{Enabling Intel Data Streaming Accelerator on Lenovo ThinkSystem Servers}},
|
|
urldate = {2022-04-18},
|
|
howpublished = {\url{https://lenovopress.lenovo.com/lp1582.pdf}}
|
|
}
|
|
|
|
@misc{thesis-repo,
|
|
author = {Anatol Constantin Fürst},
|
|
publisher = {Anatol Constantin Fürst},
|
|
title = {{Accompanying Thesis Repository}},
|
|
howpublished = {\url{https://git.constantin-fuerst.com/constantin/bachelor-thesis}}
|
|
}
|
|
|
|
@misc{lenovo:hbm,
|
|
author = {Sam Kuo, Jimmy Cheng},
|
|
publisher = {Lenovo},
|
|
title = {{Implementing High Bandwidth Memory and Intel Xeon Processors Max Series on Lenovo ThinkSystem Servers}},
|
|
date = {2023-06-26},
|
|
howpublished = {\url{https://lenovopress.lenovo.com/lp1738.pdf}},
|
|
urldate = {2024-01-21}
|
|
}
|
|
|
|
@misc{intel:maxtuning,
|
|
author = {Intel},
|
|
publisher = {Intel},
|
|
title = {{Intel® Xeon® CPU Max Series Configuration and Tuning Guide}},
|
|
date = {2023-08},
|
|
howpublished = {\url{https://cdrdv2-public.intel.com/787743/354227-intel-xeon-cpu-max-series-configuration-and-tuning-guide-rev3.pdf}},
|
|
urldate = {2024-01-21}
|
|
}
|
|
|
|
@misc{dimes-prefetching,
|
|
author = {André Berthold and Anna Bartuschka and Dirk Habich and Wolfgang Lehner and Horst Schirmeier},
|
|
title = {{Towards Query-Driven Prefetching to Optimize Data Pipelines in Heterogeneous Memory Systems}},
|
|
date = {2023},
|
|
howpublished = "unpublished"
|
|
}
|
|
|
|
@misc{microsoft:numa-malloc,
|
|
publisher = {Microsoft},
|
|
title = {{Allocating Memory from a NUMA Node}},
|
|
date = {2021-07-01},
|
|
howpublished = {\url{https://learn.microsoft.com/en-us/windows/win32/memory/allocating-memory-from-a-numa-node}},
|
|
urldate = {2024-01-28}
|
|
}
|
|
|
|
@misc{kingston:ddr5-spec-overview,
|
|
author = {Kingston},
|
|
title = {{DDR5 memory standard: An introduction to the next generation of DRAM module technology}},
|
|
date = {2024-01},
|
|
howpublished = {\url{https://www.kingston.com/en/blog/pc-performance/ddr5-overview}},
|
|
urldate = {2024-02-04}
|
|
}
|
|
|
|
@ARTICLE{bench:heterogeneous-communication,
|
|
author={Thune, Andreas and Reinemo, Sven-Arne and Skeie, Tor and Cai, Xing},
|
|
journal={IEEE Transactions on Parallel and Distributed Systems},
|
|
title={{Detailed Modeling of Heterogeneous and Contention-Constrained Point-to-Point MPI Communication}},
|
|
year={2023},
|
|
volume={34},
|
|
number={5},
|
|
pages={1580-1593},
|
|
keywords={Bandwidth;Sockets;Benchmark testing;Size measurement;Protocols;Multicore processing;Computational modeling;Intra-node communication;performance modeling;point-to-point MPI communication},
|
|
doi={10.1109/TPDS.2023.3253881}
|
|
}
|
|
|
|
@misc{xeonmax-peakthroughput,
|
|
author = "André Berthold and Anna Bartuschka",
|
|
title={{Throughput Benchmarks for CPU}},
|
|
date = "2023",
|
|
howpublished = "personal communication"
|
|
}
|
|
|
|
@misc{dwcas-cpp,
|
|
author = {Timur Doumler},
|
|
title = {{DWCAS in C++}},
|
|
date = {2022-03-31},
|
|
howpublished = {\url{https://timur.audio/dwcas-in-c}},
|
|
urldate = {2024-02-07}
|
|
}
|
|
|
|
@misc{amd:hbmoverview,
|
|
author = {AMD},
|
|
title = {{High-Bandwidth Memory (HBM)}},
|
|
urldate = {2024-02-14},
|
|
howpublished = {\url{https://www.amd.com/system/files/documents/high-bandwidth-memory-hbm.pdf}}
|
|
}
|
|
|
|
@article{virtual-memory,
|
|
author = {Peter J. Denning},
|
|
title = {{Virtual Memory}},
|
|
date = {1996-03},
|
|
publisher = {Association for Computing Machinery},
|
|
volume = {28},
|
|
number = {1},
|
|
doi = {10.1145/234313.234403},
|
|
journal = {ACM Computing Surveys},
|
|
pages = {213–216},
|
|
numpages = {4}
|
|
}
|
|
|
|
@misc{intel:xeonmax-ark,
|
|
author = {Intel},
|
|
title = {{Intel® Xeon® CPU Max 9468 Processor}},
|
|
urldate = {2024-02-14},
|
|
howpublished = {\url{https://ark.intel.com/content/www/us/en/ark/products/232596/intel-xeon-cpu-max-9468-processor-105m-cache-2-10-ghz.html}}
|
|
}
|