From ba9d18d8959949c6eeff6bf27f8104ac705bdcae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Constantin=20F=C3=BCrst?= Date: Sun, 21 Jan 2024 16:35:03 +0100 Subject: [PATCH] handle cleardoublepage in bachelor.tex and not the chapter files themselves --- thesis/bachelor.tex | 11 ++++++++++- thesis/content/00_title.tex | 4 +--- thesis/content/10_introduction.tex | 3 --- thesis/content/30_performance.tex | 8 +++----- thesis/content/40_design.tex | 2 -- thesis/content/50_implementation.tex | 2 -- thesis/content/60_evaluation.tex | 2 -- thesis/content/70_conclusion.tex | 2 -- 8 files changed, 14 insertions(+), 20 deletions(-) diff --git a/thesis/bachelor.tex b/thesis/bachelor.tex index c0710cb..7049b6c 100644 --- a/thesis/bachelor.tex +++ b/thesis/bachelor.tex @@ -47,6 +47,7 @@ plainpages=false,pdfpagelabels=true]{hyperref} \pagenumbering{Roman} \input{content/00_title.tex} +\cleardoublepage \includepdf{images/bachelor-aufgabe.pdf} \cleardoublepage @@ -60,6 +61,8 @@ plainpages=false,pdfpagelabels=true]{hyperref} \input{content/02_abstract.tex} \end{abstract} +\setcounter{figure}{0} + \cleardoublepage \tableofcontents @@ -73,17 +76,23 @@ plainpages=false,pdfpagelabels=true]{hyperref} \cleardoublepage \pagenumbering{arabic} -\setcounter{figure}{0} % use \input for small stuff (like a list you include twice or a tiks figure) % and \include for large latex compilation workloads (like a chapter) to get faster builds. \include{content/10_introduction} +\cleardoublepage \include{content/20_state} +\cleardoublepage \include{content/30_performance} +\cleardoublepage \include{content/40_design} +\cleardoublepage \include{content/50_implementation} +\cleardoublepage \include{content/60_evaluation} +\cleardoublepage \include{content/70_conclusion} +\cleardoublepage \appendix diff --git a/thesis/content/00_title.tex b/thesis/content/00_title.tex index 88eb683..560fc10 100644 --- a/thesis/content/00_title.tex +++ b/thesis/content/00_title.tex @@ -23,6 +23,4 @@ \end{minipage}} \maketitle -\end{singlespace} - -\cleardoublepage +\end{singlespace} \ No newline at end of file diff --git a/thesis/content/10_introduction.tex b/thesis/content/10_introduction.tex index 249b04b..706e43e 100644 --- a/thesis/content/10_introduction.tex +++ b/thesis/content/10_introduction.tex @@ -12,11 +12,8 @@ % den Rest der Arbeit. Meist braucht man mindestens 4 Seiten dafür, mehr % als 10 Seiten liest keiner. - \todo{write this chapter} -\cleardoublepage - %%% Local Variables: %%% TeX-master: "diplom" %%% End: diff --git a/thesis/content/30_performance.tex b/thesis/content/30_performance.tex index 195c87d..b93477d 100644 --- a/thesis/content/30_performance.tex +++ b/thesis/content/30_performance.tex @@ -9,9 +9,9 @@ mention article by reese cooper here \begin{figure}[h] \centering - \includegraphics[width=1.0\textwidth]{images/structo-benchmark.png} - \caption{Throughput for different Submission Methods and Sizes} - \label{fig:perf-submitmethod} + \includegraphics[width=0.9\textwidth]{images/structo-benchmark.png} + \caption{Benchmark Procedure Pseudo-Code} + \label{fig:benchmark-function} \end{figure} \todo{split graphic into multiple parts for the three submission types} @@ -74,8 +74,6 @@ Another limitation may be observed in this result, namely the inherent throughpu \item lower utilization of dsa is good when it will be shared between threads/processes \end{itemize} -\cleardoublepage - %%% Local Variables: %%% TeX-master: "diplom" %%% End: diff --git a/thesis/content/40_design.tex b/thesis/content/40_design.tex index ac7754d..aba8820 100644 --- a/thesis/content/40_design.tex +++ b/thesis/content/40_design.tex @@ -62,8 +62,6 @@ Due to its reliance on libnuma for memory allocation and thread pinning, \texttt Compared with the challenges of ensuring correct entry lifetime and thread safety, the application of \gls{dsa} for the task of duplicating data is simple, thanks partly to \gls{intel:dml} \cite{intel:dmldoc}. Upon a call to \texttt{Cache::Access} and determining that the given memory pointer is not present in cache, work will be submitted to the Accelerator. Before, however, the desired location must be determined which the user-defined cache placement policy function handles. With the desired placement obtained, the copy policy then determines, which nodes should take part in the copy operation which is equivalent to selecting the Accelerators following \ref{subsection:dsa-hwarch}. This causes the work to be split upon the available accelerators to which the work descriptors are submitted at this time. The handlers that \gls{intel:dml} \cite{intel:dmldoc} provides will then be moved to the \texttt{CacheData} instance to permit the callee to wait upon caching completion. As the choice of cache placement and copy policy is user-defined, one possibility will be discussed in \ref{chap:implementation}. \par -\cleardoublepage - %%% Local Variables: %%% TeX-master: "diplom" %%% End: diff --git a/thesis/content/50_implementation.tex b/thesis/content/50_implementation.tex index 0d73362..e6da028 100644 --- a/thesis/content/50_implementation.tex +++ b/thesis/content/50_implementation.tex @@ -72,8 +72,6 @@ With the distributed locking described in \ref{subsec:implementation:cache-state After \ref{subsec:implementation:accel-usage} the implementation of \texttt{Cache} provided leaves it up to the user to choose a caching and copy method policy which is accomplished through submitting function pointers at initialization of the \texttt{Cache}. In \ref{sec:state:setup-and-config} we configured our system to have separate \gls{numa:node}s for accessing \gls{hbm} which are assigned a \gls{numa:node}-ID by adding eight to the \gls{numa:node}s ID of the \gls{numa:node} that physically contains the \gls{hbm}. Therefore, given \gls{numa:node} 3 accesses some datum, the most efficient placement for the copy would be on \gls{numa:node} \(3 + 8 == 11\). As the \texttt{Cache} is intended for multithreaded usage, conserving accelerator resources is important, so that concurrent cache requests complete quickly. To get high per-copy performance while maintaining low usage, the smart-copy method is selected as described in \ref{sec:perf:datacopy} for larger copies, while small copies will be handled exclusively by the current node. This distinction is made due to the overhead of assigning the current thread to the selected nodes, which is required as \gls{intel:dml} assigns submissions only to the \gls{dsa} engine present on the node of the calling thread \cite[Section "NUMA support"]{intel:dmldoc}. No testing has taken place to evaluate this overhead and determine the most effective threshold. -\cleardoublepage - %%% Local Variables: %%% TeX-master: "diplom" %%% End: diff --git a/thesis/content/60_evaluation.tex b/thesis/content/60_evaluation.tex index 4670fcf..5265fa7 100644 --- a/thesis/content/60_evaluation.tex +++ b/thesis/content/60_evaluation.tex @@ -14,8 +14,6 @@ \todo{write this chapter} -\cleardoublepage - %%% Local Variables: %%% TeX-master: "diplom" %%% End: diff --git a/thesis/content/70_conclusion.tex b/thesis/content/70_conclusion.tex index fa38018..8524da6 100644 --- a/thesis/content/70_conclusion.tex +++ b/thesis/content/70_conclusion.tex @@ -33,8 +33,6 @@ \item extend the cache implementation use cases where data is not static \end{itemize} -\cleardoublepage - %%% Local Variables: %%% TeX-master: "diplom" %%% End: