This contains my bachelors thesis and associated tex files, code snippets and maybe more. Topic: Data Movement in Heterogeneous Memories with Intel Data Streaming Accelerator
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

99 lines
3.3 KiB

  1. import os
  2. import json
  3. import pandas as pd
  4. from pandas.core.ops import methods
  5. from typing import List
  6. import seaborn as sns
  7. import matplotlib.pyplot as plt
  8. runid = "Run ID"
  9. x_label = "Size of Submitted Task"
  10. y_label = "Throughput in GiB/s, LogScale"
  11. var_label = "Submission Type"
  12. sizes = ["1kib", "4kib", "1mib", "32mib"]
  13. sizes_nice = ["1 KiB", "4 KiB", "1 MiB", "32 MiB"]
  14. types = ["bs10", "bs50", "ms10", "ms50", "ssaw"]
  15. types_nice = ["Batch, Size 10", "Batch, Size 50", "Multi-Submit, Count 10", "Multi-Submit, Count 50", "Single Submit"]
  16. title = "Optimal Submission Method - Copy Operation tested Intra-Node on DDR"
  17. index = [runid, x_label, var_label]
  18. data = []
  19. def calc_throughput(size_bytes,time_ns):
  20. time_seconds = time_ns * 1e-9
  21. size_gib = size_bytes / (1024 ** 3)
  22. throughput_gibs = size_gib / time_seconds
  23. return throughput_gibs
  24. def index_from_element(value,array):
  25. for (idx,val) in enumerate(array):
  26. if val == value: return idx
  27. return 0
  28. def load_time_mesurements(file_path,type_label):
  29. with open(file_path, 'r') as file:
  30. data = json.load(file)
  31. iterations = data["list"][0]["task"]["iterations"]
  32. divisor = 1
  33. # bs and ms types for submission process more than one
  34. # element per run and the results therefore must be
  35. # divided by this number
  36. if type_label in ["bs10", "ms10"]: divisor = 10
  37. elif type_label in ["ms50", "bs50"]: divisor = 50
  38. else: divisor = 1
  39. return {
  40. "total": data["list"][0]["report"]["time"]["total"] / (iterations * divisor),
  41. "combined": [ x / divisor for x in data["list"][0]["report"]["time"]["combined"]],
  42. "submission": [ x / divisor for x in data["list"][0]["report"]["time"]["submission"]],
  43. "completion": [ x / divisor for x in data["list"][0]["report"]["time"]["completion"]]
  44. }
  45. def process_file_to_dataset(file_path, type_label,size_label):
  46. type_index = index_from_element(type_label,types)
  47. type_nice = types_nice[type_index]
  48. size_index = index_from_element(size_label, sizes)
  49. size_nice = sizes_nice[size_index]
  50. data_size = 0
  51. if size_label == "1kib": data_size = 1024;
  52. elif size_label == "4kib": data_size = 4 * 1024;
  53. elif size_label == "1mib": data_size = 1024 * 1024;
  54. elif size_label == "32mib": data_size = 32 * 1024 * 1024;
  55. elif size_label == "1gib": data_size = 1024 * 1024 * 1024;
  56. else: data_size = 0
  57. try:
  58. time = [load_time_mesurements(file_path,type_label)["total"]]
  59. run_idx = 0
  60. for t in time:
  61. data.append({ runid : run_idx, x_label: type_nice, var_label : size_nice, y_label : calc_throughput(data_size, t)})
  62. run_idx = run_idx + 1
  63. except FileNotFoundError:
  64. return
  65. def main():
  66. folder_path = "benchmark-results/"
  67. for type_label in types:
  68. for size in sizes:
  69. file = os.path.join(folder_path, f"submit-{type_label}-{size}-1e.json")
  70. process_file_to_dataset(file, type_label, size)
  71. df = pd.DataFrame(data)
  72. df.set_index(index, inplace=True)
  73. df = df.sort_values(y_label)
  74. sns.barplot(x=x_label, y=y_label, hue=var_label, data=df, palette="rocket", errorbar="sd")
  75. plt.title(title)
  76. plt.savefig(os.path.join(folder_path, "plot-opt-submitmethod.png"), bbox_inches='tight')
  77. plt.show()
  78. if __name__ == "__main__":
  79. main()