This contains my bachelors thesis and associated tex files, code snippets and maybe more. Topic: Data Movement in Heterogeneous Memories with Intel Data Streaming Accelerator
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

81 lines
2.7 KiB

import os
import json
import pandas as pd
from itertools import chain
from pandas.core.ops import methods
from typing import List
import seaborn as sns
import matplotlib.pyplot as plt
runid = "Run ID"
x_label = "Destination Node"
y_label = "Source Node"
v_label = "Throughput"
title = "Copy Throughput in GiB/s tested for 1GiB Elements using all 8 DSA Chiplets"
index = [ runid, x_label, y_label]
data = []
def calc_throughput(size_bytes,time_ns):
time_seconds = time_ns * 1e-9
size_gib = size_bytes / (1024 ** 3)
throughput_gibs = size_gib / time_seconds
return throughput_gibs
def index_from_element(value,array):
for (idx,val) in enumerate(array):
if val == value: return idx
return 0
def load_time_mesurements(file_path):
with open(file_path, 'r') as file:
data = json.load(file)
count = data["count"]
batch_size = data["list"][0]["task"]["batching"]["batch_size"] if data["list"][0]["task"]["batching"]["batch_size"] > 0 else 1
iterations = data["list"][0]["task"]["iterations"]
return {
"total": sum([x / (iterations * batch_size * count * count) for x in list(chain([data["list"][i]["report"]["time"]["total"] for i in range(count)]))]),
"combined": [ x / (count * batch_size) for x in list(chain(*[data["list"][i]["report"]["time"]["combined"] for i in range(count)]))],
"submission": [ x / (count * batch_size) for x in list(chain(*[data["list"][i]["report"]["time"]["submission"] for i in range(count)]))],
"completion": [ x / (count * batch_size) for x in list(chain(*[data["list"][i]["report"]["time"]["completion"] for i in range(count)]))]
}
def process_file_to_dataset(file_path, src_node, dst_node):
data_size = 1024*1024*1024
try:
time = [load_time_mesurements(file_path)["total"]]
run_idx = 0
for t in time:
data.append({ runid : run_idx, x_label : dst_node, y_label : src_node, v_label: calc_throughput(data_size, t)})
run_idx = run_idx + 1
except FileNotFoundError:
return
def main():
folder_path = "benchmark-results/"
for src_node in range(16):
for dst_node in range(16):
file = os.path.join(folder_path, f"copy-n{src_node}ton{dst_node}-1gib-allnodes-1e.json")
process_file_to_dataset(file, src_node, dst_node)
df = pd.DataFrame(data)
df.set_index(index, inplace=True)
data_pivot = df.pivot_table(index=y_label, columns=x_label, values=v_label)
sns.heatmap(data_pivot, annot=True, cmap="YlGn", fmt=".0f")
plt.title(title)
plt.savefig(os.path.join(folder_path, "plot-perf-allnodethroughput.png"), bbox_inches='tight')
plt.show()
if __name__ == "__main__":
main()