This contains my bachelors thesis and associated tex files, code snippets and maybe more. Topic: Data Movement in Heterogeneous Memories with Intel Data Streaming Accelerator
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

80 lines
2.7 KiB

  1. import os
  2. import json
  3. import pandas as pd
  4. from itertools import chain
  5. from pandas.core.ops import methods
  6. from typing import List
  7. import seaborn as sns
  8. import matplotlib.pyplot as plt
  9. runid = "Run ID"
  10. x_label = "Destination Node"
  11. y_label = "Source Node"
  12. v_label = "Throughput"
  13. title = "Copy Throughput in GiB/s tested for 1GiB Elements using all 8 DSA Chiplets"
  14. index = [ runid, x_label, y_label]
  15. data = []
  16. def calc_throughput(size_bytes,time_ns):
  17. time_seconds = time_ns * 1e-9
  18. size_gib = size_bytes / (1024 ** 3)
  19. throughput_gibs = size_gib / time_seconds
  20. return throughput_gibs
  21. def index_from_element(value,array):
  22. for (idx,val) in enumerate(array):
  23. if val == value: return idx
  24. return 0
  25. def load_time_mesurements(file_path):
  26. with open(file_path, 'r') as file:
  27. data = json.load(file)
  28. count = data["count"]
  29. batch_size = data["list"][0]["task"]["batching"]["batch_size"] if data["list"][0]["task"]["batching"]["batch_size"] > 0 else 1
  30. iterations = data["list"][0]["task"]["iterations"]
  31. return {
  32. "total": sum([x / (iterations * batch_size * count * count) for x in list(chain([data["list"][i]["report"]["time"]["total"] for i in range(count)]))]),
  33. "combined": [ x / (count * batch_size) for x in list(chain(*[data["list"][i]["report"]["time"]["combined"] for i in range(count)]))],
  34. "submission": [ x / (count * batch_size) for x in list(chain(*[data["list"][i]["report"]["time"]["submission"] for i in range(count)]))],
  35. "completion": [ x / (count * batch_size) for x in list(chain(*[data["list"][i]["report"]["time"]["completion"] for i in range(count)]))]
  36. }
  37. def process_file_to_dataset(file_path, src_node, dst_node):
  38. data_size = 1024*1024*1024
  39. try:
  40. time = [load_time_mesurements(file_path)["total"]]
  41. run_idx = 0
  42. for t in time:
  43. data.append({ runid : run_idx, x_label : dst_node, y_label : src_node, v_label: calc_throughput(data_size, t)})
  44. run_idx = run_idx + 1
  45. except FileNotFoundError:
  46. return
  47. def main():
  48. folder_path = "benchmark-results/"
  49. for src_node in range(16):
  50. for dst_node in range(16):
  51. file = os.path.join(folder_path, f"copy-n{src_node}ton{dst_node}-1gib-allnodes-1e.json")
  52. process_file_to_dataset(file, src_node, dst_node)
  53. df = pd.DataFrame(data)
  54. df.set_index(index, inplace=True)
  55. data_pivot = df.pivot_table(index=y_label, columns=x_label, values=v_label)
  56. sns.heatmap(data_pivot, annot=True, cmap="YlGn", fmt=".0f")
  57. plt.title(title)
  58. plt.savefig(os.path.join(folder_path, "plot-perf-allnodethroughput.png"), bbox_inches='tight')
  59. plt.show()
  60. if __name__ == "__main__":
  61. main()