This contains my bachelors thesis and associated tex files, code snippets and maybe more. Topic: Data Movement in Heterogeneous Memories with Intel Data Streaming Accelerator
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

108 lines
3.9 KiB

import os
import json
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from common import calc_throughput, index_from_element
runid = "Run ID"
x_label = "Size of Submitted Task"
y_label = "Throughput in GiB/s"
var_label = "Submission Type"
sizes = ["1kib", "4kib", "1mib", "32mib"]
sizes_nice = ["1 KiB", "4 KiB", "1 MiB", "32 MiB"]
types = ["bs10", "bs50", "ms10", "ms50", "ssaw"]
types_nice = ["Batch 10", "Batch 50", "Multi-Submit 10", "Multi-Submit 50", "Single-Submit"]
title = \
"""Throughput showing Optimal Submission Method and Size\n
Copy Operation tested Intra-Node on DDR with 1 Engine per WQ"""
description = \
"""Throughput showing Optimal Submission Method and Size\n
Batch uses a Batch Descriptor of given Size\n
Multi-Submit fills the Work Queue with n Single Descriptors\n
Single-Submit submits one Descriptor and immediately waits\n
Copy Operation tested Intra-Node on DDR with 1 Engine per WQ"""
index = [runid, x_label, var_label]
data = []
# loads the measurements from a given file and processes them
# so that they are normalized, meaning that the timings returned
# are nanoseconds per element transfered
def load_time_mesurements(file_path,type_label):
with open(file_path, 'r') as file:
data = json.load(file)
iterations = data["list"][0]["task"]["iterations"]
divisor = 1
# bs and ms types for submission process more than one
# element per run and the results therefore must be
# divided by this number
if type_label in ["bs10", "ms10"]: divisor = 10
elif type_label in ["ms50", "bs50"]: divisor = 50
else: divisor = 1
return {
"total": data["list"][0]["report"]["time"]["total"] / (iterations * divisor),
"combined": [ x / divisor for x in data["list"][0]["report"]["time"]["combined"]],
"submission": [ x / divisor for x in data["list"][0]["report"]["time"]["submission"]],
"completion": [ x / divisor for x in data["list"][0]["report"]["time"]["completion"]]
}
# procceses a single file and appends the desired timings
# to the global data-array, handles multiple runs with a runid
# and ignores if the given file is not found as some
# configurations may not be benchmarked
def process_file_to_dataset(file_path, type_label,size_label):
type_index = index_from_element(type_label,types)
type_nice = types_nice[type_index]
size_index = index_from_element(size_label, sizes)
size_nice = sizes_nice[size_index]
data_size = 0
if size_label == "1kib": data_size = 1024;
elif size_label == "4kib": data_size = 4 * 1024;
elif size_label == "1mib": data_size = 1024 * 1024;
elif size_label == "32mib": data_size = 32 * 1024 * 1024;
elif size_label == "1gib": data_size = 1024 * 1024 * 1024;
else: data_size = 0
try:
time = load_time_mesurements(file_path,type_label)["combined"]
run_idx = 0
for t in time:
data.append({ runid : run_idx, x_label: size_nice, var_label : type_nice, y_label : calc_throughput(data_size, t)})
run_idx = run_idx + 1
except FileNotFoundError:
return
# loops over all possible configuration combinations and calls
# process_file_to_dataset for them in order to build a dataframe
# which is then displayed and saved
def main():
folder_path = "benchmark-results/"
for type_label in types:
for size in sizes:
file = os.path.join(folder_path, f"submit-{type_label}-{size}-1e.json")
process_file_to_dataset(file, type_label, size)
df = pd.DataFrame(data)
df.set_index(index, inplace=True)
df = df.sort_values(y_label)
sns.barplot(x=x_label, y=y_label, hue=var_label, data=df, palette="rocket", errorbar="sd")
plt.title(title)
plt.savefig(os.path.join(folder_path, "plot-opt-submitmethod.png"), bbox_inches='tight')
plt.show()
if __name__ == "__main__":
main()