diff --git a/thesis/content/50_implementation.tex b/thesis/content/50_implementation.tex index 41186dd..fffbe25 100644 --- a/thesis/content/50_implementation.tex +++ b/thesis/content/50_implementation.tex @@ -42,7 +42,7 @@ As we aim to minimize the time spent in a locked region, only the task is added Using \texttt{std::shared\_ptr} also introduces uncertainty, relying on the implementation to be performant. The standard does not specify whether a lock-free algorithm is to be used, and \cite{shared-ptr-perf} suggests abysmal performance for some implementations, although the full article is in Korean. No further research was found on this topic. \par -Therefore, the decision was made to implement atomic reference counting for \texttt{CacheData}. This involves providing a custom constructor and destructor wherein a shared atomic integer is either incremented or decremented using atomic fetch sub and add operations \cite{cppreference:atomic-operations} to modify the reference count. In the case of a decrease to zero, the destructor was called for the last reference and then performs the actual destruction. \par +Therefore, the decision was made to implement atomic reference counting for \texttt{CacheData}. This involves providing a custom constructor and destructor wherein a shared atomic integer is either incremented or decremented using atomic fetch sub and add operations to modify the reference count. In the case of a decrease to zero, the destructor was called for the last reference and then performs the actual destruction. \par \begin{figure}[h!tb] \centering @@ -107,6 +107,8 @@ After \ref{sec:design:accel-usage} the implementation of \texttt{Cache} provided Applying the \texttt{Cache} to \gls{qdp} is straightforward. We adapted the benchmarking code developed by Anna Bartuschka and André Berthold \cite{dimes-prefetching}, calling \texttt{Cache::Access} for both prefetching and cache access. \par +\todo{write about modifications} + %%% Local Variables: %%% TeX-master: "diplom" %%% End: diff --git a/thesis/own.bib b/thesis/own.bib index a9f026f..4457d6f 100644 --- a/thesis/own.bib +++ b/thesis/own.bib @@ -1,32 +1,36 @@ -@ONLINE{intel:dsaspec, +@misc{intel:dsaspec, author = {Intel}, title = {{Intel® Data Streaming Accelerator Architecture Specification}}, date = {2022-09-16}, - url = {https://www.intel.com/content/www/us/en/content-details/671116/intel-data-streaming-accelerator-architecture-specification.html}, + publisher = {Intel}, + howpublished = {\url{https://www.intel.com/content/www/us/en/content-details/671116/intel-data-streaming-accelerator-architecture-specification.html}}, urldate = {2023-11-15} } -@ONLINE{intel:xeonbrief, +@misc{intel:xeonbrief, author = {Intel}, title = {{New Intel® Xeon® Platform Includes Built-In Accelerators for Encryption, Compression, and Data Movement}}, date = {2022-12}, - url = {https://www.intel.com/content/dam/www/central-libraries/us/en/documents/2022-12/storage-engines-4th-gen-xeon-brief.pdf}, + publisher = {Intel}, + howpublished = {\url{https://www.intel.com/content/dam/www/central-libraries/us/en/documents/2022-12/storage-engines-4th-gen-xeon-brief.pdf}}, urldate = {2023-11-15} } -@ONLINE{intel:xeonmaxbrief, +@misc{intel:xeonmaxbrief, author = {Intel}, title = {{Intel® Xeon® CPU Max Series Product Brief}}, date = {2023-01-06}, - url = {https://www.intel.com/content/www/us/en/content-details/765259/intel-xeon-cpu-max-series-product-brief.html}, + publisher = {Intel}, + howpublished = {\url{https://www.intel.com/content/www/us/en/content-details/765259/intel-xeon-cpu-max-series-product-brief.html}}, urldate = {2024-01-18} } -@ONLINE{intel:dsaguide, +@misc{intel:dsaguide, author = {Intel}, title = {{Intel® Data Streaming Accelerator User Guide}}, date = {2023-01-11}, - url = {https://www.intel.com/content/www/us/en/content-details/759709/intel-data-streaming-accelerator-user-guide.html}, + publisher = {Intel}, + howpublished = {\url{https://www.intel.com/content/www/us/en/content-details/759709/intel-data-streaming-accelerator-user-guide.html}}, urldate = {2023-11-15} } @@ -34,7 +38,6 @@ author = {Intel}, title = {{Intel IDXD Driver for Linux Kernel}}, publisher = {GitHub}, - journal = {GitHub repository}, howpublished = {\url{https://github.com/intel/idxd-driver}}, urldate = {2024-01-07} } @@ -43,7 +46,6 @@ author = {Intel}, title = {{Intel IDXD User Space Application}}, publisher = {GitHub}, - journal = {GitHub repository}, howpublished = {\url{https://github.com/intel/idxd-config}}, urldate = {2024-01-07} } @@ -56,160 +58,148 @@ urldate = {2024-01-07} } -@ONLINE{intel:analysis, - author = {Reese Kuper et al.}, +@ARTICLE{intel:analysis, + author = {Reese Kuper and Ipoom Jeong and Yifan Yuan and Jiayu Hu and Ren Wang and Narayan Ranganathan and Nam Sung Kim}, title = {{A Quantitative Analysis and Guideline of Data Streaming Accelerator in Intel® 4th Gen Xeon® Scalable Processors}}, date = {2023-05}, - url = {https://arxiv.org/pdf/2305.02480.pdf}, - urldate = {2024-01-07} + doi = {10.48550/arXiv.2305.02480} } @INPROCEEDINGS{atomics-cost-analysis, - author={Schweizer, Hermann and Besta, Maciej and Hoefler, Torsten}, - booktitle={{2015 International Conference on Parallel Architecture and Compilation (PACT)}}, - title={{Evaluating the Cost of Atomic Operations on Modern Architectures}}, - year={2015}, - volume={}, - number={}, - pages={445-456}, - doi={10.1109/PACT.2015.24} + author={Schweizer, Hermann and Besta, Maciej and Hoefler, Torsten}, + booktitle={{2015 International Conference on Parallel Architecture and Compilation (PACT)}}, + title={{Evaluating the Cost of Atomic Operations on Modern Architectures}}, + year={2015}, + pages={445-456}, + doi={10.1109/PACT.2015.24} } @INPROCEEDINGS{shared-ptr-perf, - author={T. Ku and N. Jung}, - booktitle={{Journal of Korea Game Society}}, - title={{Implementation of Lock-Free shared\_ptr and weak\_ptr for C++11 multi-thread programming}}, - year={2021}, - volume={21}, - number={1}, - pages={55-65}, - date = {2021-02-28}, - doi={10.7583/jkgs.2021.21.1.55.} -} - -@ONLINE{cppreference:shared-ptr, + author={T. Ku and N. Jung}, + booktitle={{Journal of Korea Game Society}}, + title={{Implementation of Lock-Free shared\_ptr and weak\_ptr for C++11 multi-thread programming}}, + year={2021}, + volume={21}, + number={1}, + pages={55-65}, + date = {2021-02-28}, + doi={10.7583/jkgs.2021.21.1.55.} +} + +@misc{cppreference:shared-ptr, author = {cppreference.com}, title = {{CPP Reference Entry on std::shared\_ptr}}, publisher = {cppreference}, - date = {2023}, - url = {https://en.cppreference.com/w/cpp/memory/shared_ptr}, + howpublished = {\url{https://en.cppreference.com/w/cpp/memory/shared_ptr}}, urldate = {2024-01-17} } -@ONLINE{cppreference:atomic-operations, - author = {cppreference.com}, - title = {{CPP Reference List of Atomic Operations}}, - publisher = {cppreference}, - date = {2023}, - url = {https://en.cppreference.com/w/cpp/thread#Atomic_operations}, - urldate = {2024-01-18} -} - -@ONLINE{cppreference:atomic-wait, +@misc{cppreference:atomic-wait, author = {cppreference.com}, title = {{CPP Reference Entry on std::atomic::wait}}, publisher = {cppreference}, - date = {2023}, - url = {https://en.cppreference.com/w/cpp/atomic/atomic/wait}, + howpublished = {\url{https://en.cppreference.com/w/cpp/atomic/atomic/wait}}, urldate = {2024-01-18} } -@ONLINE{cppreference:atomic-notify-one, +@misc{cppreference:atomic-notify-one, author = {cppreference.com}, title = {{CPP Reference Entry on std::atomic::notify\_one}}, publisher = {cppreference}, - date = {2023}, - url = {https://en.cppreference.com/w/cpp/atomic/atomic/notify_one}, + howpublished = {\url{https://en.cppreference.com/w/cpp/atomic/atomic/notify_one}}, urldate = {2024-01-18} } -@ONLINE{cppreference:atomic-notify-all, +@misc{cppreference:atomic-notify-all, author = {cppreference.com}, title = {{CPP Reference Entry on std::atomic::notify\_all}}, publisher = {cppreference}, - date = {2023}, - url = {https://en.cppreference.com/w/cpp/atomic/atomic/notify_all}, + howpublished = {\url{https://en.cppreference.com/w/cpp/atomic/atomic/notify_all}}, urldate = {2024-01-18} } -@ONLINE{cppreference:atomic-exchange, +@misc{cppreference:atomic-exchange, author = {cppreference.com}, title = {{CPP Reference Entry on std::atomic::exchange}}, publisher = {cppreference}, - date = {2023}, - url = {https://en.cppreference.com/w/cpp/atomic/atomic/exchange}, + howpublished = {\url{https://en.cppreference.com/w/cpp/atomic/atomic/exchange}}, urldate = {2024-01-18} } -@ONLINE{atomic-wait-details, +@ARTICLE{atomic-wait-details, author = {Thomas Rodgers}, title = {{Implementing C++20 atomic waiting in libstdc++}}, publisher = {Red Hat Developer Blog}, date = {2022-12-06}, urldate = {2024-01-18}, - url = {https://developers.redhat.com/articles/2022/12/06/implementing-c20-atomic-waiting-libstdc#how_can_we_implement_atomic_waiting_} + howpublished = {\url{https://developers.redhat.com/articles/2022/12/06/implementing-c20-atomic-waiting-libstdc}} } -@ONLINE{amd:programmers-manual, +@misc{amd:programmers-manual, author = {AMD}, + publisher = {AMD}, title = {{AMD64 Programmer's Manual Volume 2: System Programming}}, date = {2016-12}, urldate = {2024-01-18}, - url = {https://support.amd.com/TechDocs/24593.pdf} + howpublished = {\url{https://support.amd.com/TechDocs/24593.pdf}} } -@ONLINE{intel:programmers-manual, +@misc{intel:programmers-manual, author = {Intel}, + publisher = {Intel}, title = {{Intel 64 and IA-32 Architectures Software Developer's Manual Volume 3A: System Programming Guide, Part 1}}, date = {2016-12}, urldate = {2024-01-18}, - url = {https://support.amd.com/TechDocs/24593.pdf} + howpublished = {\url{https://support.amd.com/TechDocs/24593.pdf}} } @INPROCEEDINGS{hbm-arch-paper, - author={Jun, Hongshin and Cho, Jinhee and Lee, Kangseol and Son, Ho-Young and Kim, Kwiwook and Jin, Hanho and Kim, Keith}, - booktitle={2017 IEEE International Memory Workshop (IMW)}, - title={HBM (High Bandwidth Memory) DRAM Technology and Architecture}, - year={2017}, - volume={}, - number={}, - pages={1-4}, - doi={10.1109/IMW.2017.7939084} + author={Jun, Hongshin and Cho, Jinhee and Lee, Kangseol and Son, Ho-Young and Kim, Kwiwook and Jin, Hanho and Kim, Keith}, + booktitle={2017 IEEE International Memory Workshop (IMW)}, + title={HBM (High Bandwidth Memory) DRAM Technology and Architecture}, + year={2017}, + pages={1-4}, + doi={10.1109/IMW.2017.7939084} } @misc{man-libnuma, author = {Debian}, + publisher = {Debian}, title = {{Debian manpage 3 for libnuma-dev}}, urldate = {2024-01-21}, - url = {https://manpages.debian.org/bookworm/libnuma-dev/numa.3.en.html} + howpublished = {\url{https://manpages.debian.org/bookworm/libnuma-dev/numa.3.en.html}} } -@ONLINE{lenovo:dsa, +@misc{lenovo:dsa, author = {Adrian Huang}, + publisher = {Lenovo}, title = {{Enabling Intel Data Streaming Accelerator on Lenovo ThinkSystem Servers}}, urldate = {2022-04-18}, - url = {https://lenovopress.lenovo.com/lp1582.pdf} + howpublished = {\url{https://lenovopress.lenovo.com/lp1582.pdf}} } @misc{thesis-repo, author = {Anatol Constantin Fürst}, + publisher = {Anatol Constantin Fürst}, title = {{Accompanying Thesis Repository}}, - url = {https://git.constantin-fuerst.com/constantin/bachelor-thesis} + howpublished = {\url{https://git.constantin-fuerst.com/constantin/bachelor-thesis}} } -@ONLINE{lenovo:hbm, +@misc{lenovo:hbm, author = {Sam Kuo, Jimmy Cheng}, + publisher = {Lenovo}, title = {{Implementing High Bandwidth Memory and Intel Xeon Processors Max Series on Lenovo ThinkSystem Servers}}, date = {2023-06-26}, - url = {https://lenovopress.lenovo.com/lp1738.pdf}, + howpublished = {\url{https://lenovopress.lenovo.com/lp1738.pdf}}, urldate = {2024-01-21} } -@ONLINE{intel:maxtuning, +@misc{intel:maxtuning, author = {Intel}, + publisher = {Intel}, title = {{Intel® Xeon® CPU Max Series Configuration and Tuning Guide}}, date = {2023-08}, - url = {https://cdrdv2-public.intel.com/787743/354227-intel-xeon-cpu-max-series-configuration-and-tuning-guide-rev3.pdf}, + howpublished = {\url{https://cdrdv2-public.intel.com/787743/354227-intel-xeon-cpu-max-series-configuration-and-tuning-guide-rev3.pdf}}, urldate = {2024-01-21} } @@ -220,19 +210,19 @@ howpublished = "unpublished" } -@ONLINE{microsoft:numa-malloc, - author = {Microsoft}, +@misc{microsoft:numa-malloc, + publisher = {Microsoft}, title = {{Allocating Memory from a NUMA Node}}, date = {2021-07-01}, - url = {https://learn.microsoft.com/en-us/windows/win32/memory/allocating-memory-from-a-numa-node}, + howpublished = {\url{https://learn.microsoft.com/en-us/windows/win32/memory/allocating-memory-from-a-numa-node}}, urldate = {2024-01-28} } -@ONLINE{kingston:ddr5-spec-overview, +@misc{kingston:ddr5-spec-overview, author = {Kingston}, title = {{DDR5 memory standard: An introduction to the next generation of DRAM module technology}}, date = {2024-01}, - url = {https://www.kingston.com/en/blog/pc-performance/ddr5-overview}, + howpublished = {\url{https://www.kingston.com/en/blog/pc-performance/ddr5-overview}}, urldate = {2024-02-04} }