Compare commits

...

2 Commits

Author SHA1 Message Date
Claudio Maggioni 64f3d38dc9 hw1: tweaks to figures 2022-10-05 11:14:40 +02:00
Claudio Maggioni b99792f558 hw1: submittable 2022-10-05 10:30:24 +02:00
2 changed files with 34 additions and 38 deletions

View File

@ -4,9 +4,8 @@
\usepackage{float}
\usepackage{subcaption}
\usepackage{graphicx}
\usepackage{fancyvrb}
\usepackage{tikz}
\usepackage{multirow}
\begin{document}
@ -40,42 +39,34 @@ L1 cache & 32 kB per core
All values are reported using base 2 IEC byte units. The cluster has 2 sockets
and a total of 20 cores (10 per socket). The cache topology diagram reported by
\texttt{likwid-topology -g} is the following:
\texttt{likwid-topology -g} is shown in Figure \ref{fig:topo}.
\pagebreak[4]
% https://tex.stackexchange.com/a/171818
\begin{Verbatim}[fontsize=\tiny]
Socket 0:
+---------------------------------------------------------------------------------------------------------------+
| +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ |
| | 0 | | 1 | | 2 | | 3 | | 4 | | 5 | | 6 | | 7 | | 8 | | 9 | |
| +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ |
| +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ |
| | 32 kB | | 32 kB | | 32 kB | | 32 kB | | 32 kB | | 32 kB | | 32 kB | | 32 kB | | 32 kB | | 32 kB | |
| +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ |
| +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ |
| | 256 kB | | 256 kB | | 256 kB | | 256 kB | | 256 kB | | 256 kB | | 256 kB | | 256 kB | | 256 kB | | 256 kB | |
| +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ |
| +-----------------------------------------------------------------------------------------------------------+ |
| | 25 MB | |
| +-----------------------------------------------------------------------------------------------------------+ |
+---------------------------------------------------------------------------------------------------------------+
Socket 1:
+---------------------------------------------------------------------------------------------------------------+
| +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ |
| | 10 | | 11 | | 12 | | 13 | | 14 | | 15 | | 16 | | 17 | | 18 | | 19 | |
| +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ |
| +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ |
| | 32 kB | | 32 kB | | 32 kB | | 32 kB | | 32 kB | | 32 kB | | 32 kB | | 32 kB | | 32 kB | | 32 kB | |
| +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ |
| +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ |
| | 256 kB | | 256 kB | | 256 kB | | 256 kB | | 256 kB | | 256 kB | | 256 kB | | 256 kB | | 256 kB | | 256 kB | |
| +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ +--------+ |
| +-----------------------------------------------------------------------------------------------------------+ |
| | 25 MB | |
| +-----------------------------------------------------------------------------------------------------------+ |
+---------------------------------------------------------------------------------------------------------------+
\end{Verbatim}
\begin{figure}[t]
\begin{center}
Socket 0:\vspace{0.3cm}
\begin{tabular}{|l|l|l|l|l|l|l|l|l|l|}
\hline 0 & 1 & 2 & 3 & 4 & 5 & 6 & 7 & 8 & 9 \\\hline
32 kB & 32 kB & 32 kB & 32 kB & 32 kB & 32 kB & 32 kB & 32 kB & 32
kB & 32 kB \\\hline
256 kB & 256 kB & 256 kB & 256 kB & 256 kB & 256 kB & 256 kB & 256
kB & 256 kB & 256 kB \\\hline
\multicolumn{10}{|c|}{25 MB} \\\hline
\end{tabular}\vspace{0.8cm}\\
Socket 1:\vspace{0.3cm}
\begin{tabular}{|l|l|l|l|l|l|l|l|l|l|}
\hline 10 & 11 & 12 & 13 & 14 & 15 & 16 & 17 & 18 & 19 \\\hline
32 kB & 32 kB & 32 kB & 32 kB & 32 kB & 32 kB & 32 kB & 32 kB & 32
kB & 32 kB \\\hline
256 kB & 256 kB & 256 kB & 256 kB & 256 kB & 256 kB & 256 kB & 256
kB & 256 kB & 256 kB \\\hline
\multicolumn{10}{|c|}{25 MB} \\\hline
\end{tabular}
\end{center}
\caption{Cache topology diagram as outputted by \texttt{likwid-topology -g}.
Byte sizes all in IEC units.}
\label{fig:topo}
\end{figure}
\subsection{Memory Access Pattern of \texttt{membench.c}}
@ -221,7 +212,12 @@ implementing the pseudocode, my implementation:
\end{figure}
The results of the matrix multiplication benchmark for the naive, blocked, and
BLAS implementations are shown in Figure \ref{fig:bench}.
BLAS implementations are shown in Figure \ref{fig:bench}. The blocked
implementation achieves approximately 50\% more FLOPS than the naive
implementation thanks to the optimisations in space and temporal cache locality
described. However, the blocked implementation achives less than a tenth of
FLOPS compared to Intel MKL BLAS based one due to the microarchitecture
optimization the latter one is able to exploit.
\begin{figure}[t]
\includegraphics[width=\textwidth]{timing.pdf}