This contains my bachelors thesis and associated tex files, code snippets and maybe more. Topic: Data Movement in Heterogeneous Memories with Intel Data Streaming Accelerator
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

158 lines
5.2 KiB

  1. import os
  2. import pandas as pd
  3. import seaborn as sns
  4. import matplotlib.pyplot as plt
  5. from common import calc_throughput, load_time_mesurements, get_task_count
  6. result_path = "benchmark-results/"
  7. output_path = "benchmark-plots/"
  8. runid = "Run ID"
  9. x_label = "Destination Node"
  10. y_label = "Throughput in GiB/s"
  11. title_allnodes = \
  12. """Copy Throughput in GiB/s tested for 1GiB Elements\n
  13. Using all 8 DSA Chiplets available on the System"""
  14. title_smartnodes = \
  15. """Copy Throughput in GiB/s tested for 1GiB Elements\n
  16. Using Cross-Copy for Intersocket and all 4 Chiplets of Socket for Intrasocket"""
  17. title_difference = \
  18. """Gain in Copy Throughput in GiB/s of All-DSA vs. Smart Assignment"""
  19. description_smartnodes = \
  20. """Copy Throughput in GiB/s tested for 1GiB Elements\n
  21. Nodes of {8...15} are HBM accessors for their counterparts (minus 8)\n
  22. Using all 4 DSA Chiplets of a Socket for Intra-Socket Operation\n
  23. And using only the Source and Destination Nodes DSA for Inter-Socket"""
  24. description_allnodes = \
  25. """Copy Throughput in GiB/s tested for 1GiB Elements\n
  26. Nodes of {8...15} are HBM accessors for their counterparts (minus 8)\n
  27. Using all 8 DSA Chiplets available on the System"""
  28. index = [ runid, x_label, y_label]
  29. data = []
  30. data_avg = {}
  31. # loads the measurements from a given file and processes them
  32. # so that they are normalized, meaning that the timings returned
  33. # are nanoseconds per element transfered
  34. def get_timing(file_path):
  35. divisor = get_task_count(file_path)
  36. return [ x / divisor for x in load_time_mesurements(file_path)]
  37. # procceses a single file and appends the desired timings
  38. # to the global data-array, handles multiple runs with a runid
  39. # and ignores if the given file is not found as some
  40. # configurations may not be benchmarked
  41. def process_file_to_dataset(file_path, config, dst_node):
  42. size = 1024*1024*1024
  43. if config not in data_avg:
  44. data_avg[config] = 0
  45. timing = get_timing(file_path)
  46. run_idx = 0
  47. for t in timing:
  48. tp = calc_throughput(size, t)
  49. data_avg[config] += tp / len(timing)
  50. data.append({ runid : run_idx, x_label : dst_node, y_label : tp})
  51. run_idx = run_idx + 1
  52. def plot_bar(table,node_config,display_x,display_y):
  53. plt.figure(figsize=(2, 3))
  54. sns.barplot(x=x_label, y=y_label, data=table, palette="mako", errorbar="sd")
  55. plt.ylim(0, 70)
  56. plt.yticks([15,30,45,60,65])
  57. plt.xlabel(display_x)
  58. plt.ylabel(display_y)
  59. plt.savefig(os.path.join(output_path, f"plot-{node_config}-throughput.pdf"), bbox_inches='tight')
  60. def PlotAndrePeakResults():
  61. data_peakbench_andre = [
  62. { runid : 0, x_label : 8, y_label : 64 },
  63. { runid : 0, x_label : 11, y_label : 63 },
  64. { runid : 0, x_label : 12, y_label : 40 },
  65. { runid : 0, x_label : 15, y_label : 54 }
  66. ]
  67. df = pd.DataFrame(data_peakbench_andre)
  68. df.set_index(index, inplace=True)
  69. plot_bar(df, "andrepeak", x_label, y_label)
  70. return df
  71. # loops over all possible configuration combinations and calls
  72. # process_file_to_dataset for them in order to build a dataframe
  73. # which is then displayed and saved
  74. def main(node_config):
  75. dst_nodes = {8,11,12,15}
  76. for dst_node in dst_nodes:
  77. file = os.path.join(result_path, f"copy-n0ton{dst_node}-1gib-{node_config}.json")
  78. process_file_to_dataset(file, node_config, dst_node)
  79. data_avg[node_config] = data_avg[node_config] / len(dst_nodes)
  80. df = pd.DataFrame(data)
  81. data.clear()
  82. df.set_index(index, inplace=True)
  83. plot_bar(df, node_config, x_label, y_label)
  84. return df
  85. def get_scaling_factor(baseline,topline,utilfactor):
  86. return (topline / baseline) * (1 / utilfactor)
  87. if __name__ == "__main__":
  88. dsa_df1 = main("1dsa")
  89. dsa_df2 = main("2dsa")
  90. dsa_df4 = main("4dsa")
  91. dsa_df8 = main("8dsa")
  92. cpu_df8 = main("8cpu")
  93. cpu_dfandre = PlotAndrePeakResults()
  94. x_dsacount = "Count of DSAs"
  95. y_avgtp = "Average Throughput in GiB/s"
  96. y_scaling = "Scaling Factor"
  97. data_average = [
  98. { runid : 0, x_label : 1, y_label : data_avg["1dsa"] },
  99. { runid : 0, x_label : 2, y_label : data_avg["2dsa"] },
  100. { runid : 0, x_label : 4, y_label : data_avg["4dsa"] },
  101. { runid : 0, x_label : 8, y_label : data_avg["8dsa"] }
  102. ]
  103. average_df = pd.DataFrame(data_average)
  104. average_df.set_index(index, inplace=True)
  105. plot_bar(average_df, "average", x_dsacount, y_avgtp)
  106. data_scaling = [
  107. { x_dsacount : 1, y_scaling : get_scaling_factor(data_avg["1dsa"], data_avg["1dsa"], 1) },
  108. { x_dsacount : 2, y_scaling : get_scaling_factor(data_avg["1dsa"], data_avg["2dsa"], 2) },
  109. { x_dsacount : 4, y_scaling : get_scaling_factor(data_avg["1dsa"], data_avg["4dsa"], 4) },
  110. { x_dsacount : 8, y_scaling : get_scaling_factor(data_avg["1dsa"], data_avg["8dsa"], 8) }
  111. ]
  112. scaling_df = pd.DataFrame(data_scaling)
  113. plt.figure(figsize=(2, 3))
  114. fig = sns.lineplot(x=x_dsacount, y=y_scaling, data=scaling_df, marker='o', linestyle='-', color='b', markersize=8)
  115. plt.xticks([1,2,4,8])
  116. plt.yticks([0.25,0.5,0.75,1.0])
  117. plt.xlim(0,10)
  118. plt.ylim(0.2,1.05)
  119. plt.savefig(os.path.join(output_path, f"plot-dsa-throughput-scaling.pdf"), bbox_inches='tight')