This contains my bachelors thesis and associated tex files, code snippets and maybe more. Topic: Data Movement in Heterogeneous Memories with Intel Data Streaming Accelerator
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

95 lines
3.5 KiB

import os
import json
import pandas as pd
from itertools import chain
import seaborn as sns
import matplotlib.pyplot as plt
runid = "Run ID"
x_label = "Thread Count"
y_label = "Throughput in GiB/s LogScale"
var_label = "Thread Counts"
thread_counts = ["1t", "2t", "4t", "8t", "12t"]
thread_counts_nice = ["1 Thread", "2 Threads", "4 Threads", "8 Threads", "12 Threads"]
engine_counts = ["1mib-1e", "1mib-4e", "1gib-1e", "1gib-4e"]
engine_counts_nice = ["1 E/WQ and Tasksize 1 MiB", "4 E/WQ and Tasksize 1 MiB", "1 E/WQ and Tasksize 1 GiB", "4 E/WQ and Tasksize 1 GiB"]
title = "Per-Thread Throughput - 120 Copy Operations split on Threads Intra-Node on DDR with Size 1 MiB"
index = [runid, x_label, var_label]
data = []
def calc_throughput(size_bytes,time_nanosec):
time_seconds = time_nanosec * 1e-9
size_gib = size_bytes / (1024 ** 3)
throughput_gibs = size_gib / time_seconds
return throughput_gibs
def index_from_element(value,array):
for (idx,val) in enumerate(array):
if val == value: return idx
return 0
def load_and_process_copy_json(file_path):
with open(file_path, 'r') as file:
data = json.load(file)
count = data["count"]
iterations = data["list"][0]["task"]["iterations"]
return {
"total" : sum([x / (iterations * (120 / count)) for x in list(chain(*[data["list"][i]["report"]["time"]["total"] for i in range(count)]))]),
"combined" : [x / (120 / count) for x in list(chain(*[data["list"][i]["report"]["time"]["combined"] for i in range(count)]))],
"submission" : [x / (120 / count) for x in list(chain(*[data["list"][i]["report"]["time"]["submission"] for i in range(count)]))],
"completion" : [x / (120 / count) for x in list(chain(*[data["list"][i]["report"]["time"]["completion"] for i in range(count)]))]
}
# Function to plot the graph for the new benchmark
def create_mtsubmit_dataset(file_paths, engine_label):
times = []
engine_index = index_from_element(engine_label,engine_counts)
engine_nice = engine_counts_nice[engine_index]
idx = 0
for file_path in file_paths:
time = load_and_process_copy_json(file_path)
times.append(time["total"])
idx = idx + 1
throughput = []
if engine_label in ["1gib-1e", "1gib-4e"]:
throughput = [[calc_throughput(1024*1024*1024,time) for time in t] for t in times]
else:
throughput = [[calc_throughput(1024*1024,time) for time in t] for t in times]
idx = 0
for run_set in throughput:
run_idx = 0
for run in run_set:
data.append({ runid : run_idx, x_label: thread_counts_nice[idx], var_label : engine_nice, y_label : throughput[idx][run_idx]})
run_idx = run_idx + 1
idx = idx + 1
# Main function to iterate over files and create plots for the new benchmark
def main():
folder_path = "benchmark-results/" # Replace with the actual path to your folder
for engine_label in engine_counts:
mt_file_paths = [os.path.join(folder_path, f"mtsubmit-{thread_count}-{engine_label}.json") for thread_count in thread_counts]
create_mtsubmit_dataset(mt_file_paths, engine_label)
df = pd.DataFrame(data)
df.set_index(index, inplace=True)
sns.barplot(x=x_label, y=y_label, hue=var_label, data=df, palette="rocket", errorbar="sd")
plt.title(title)
plt.savefig(os.path.join(folder_path, "plot-perf-mtsubmit.png"), bbox_inches='tight')
plt.show()
if __name__ == "__main__":
main()