Browse Source

finish adapting plotters to new result style, add division by thread count to the throughput plotters, adjust figure sizes to be small (larger font when scaled up in latex)

master
Constantin Fürst 11 months ago
parent
commit
8ab5eb4902
  1. 9
      benchmarks/benchmark-plotters/common.py
  2. 65
      benchmarks/benchmark-plotters/plot-cost-mtsubmit.py
  3. 33
      benchmarks/benchmark-plotters/plot-perf-peakthroughput-bar.py
  4. 37
      benchmarks/benchmark-plotters/plot-perf-peakthroughput-cpu-bar.py
  5. 31
      benchmarks/benchmark-plotters/plot-perf-submitmethod.py

9
benchmarks/benchmark-plotters/common.py

@ -23,7 +23,7 @@ def index_from_element(value,array):
# loads the measurements from a given file
def load_time_mesurements(file_path) -> List[float64]:
def load_time_mesurements(file_path):
with open(file_path, 'r') as file:
data = json.load(file)
count = data["count"]
@ -39,4 +39,9 @@ def load_time_mesurements(file_path) -> List[float64]:
print("Runcount missmatch between tasks. Check the commend above, aborting for now.")
os.abort()
return [ x / runcount_divisor for x in data["timings"]]
return [ x / runcount_divisor for x in data["timings"]]
def get_task_count(file_path):
with open(file_path, 'r') as file:
return json.load(file)["count"]

65
benchmarks/benchmark-plotters/plot-cost-mtsubmit.py

@ -1,11 +1,9 @@
import os
import json
import pandas as pd
from itertools import chain
import seaborn as sns
import matplotlib.pyplot as plt
from common import calc_throughput, index_from_element
from common import calc_throughput, index_from_element, load_time_mesurements
runid = "Run ID"
x_label = "Thread Count"
@ -13,8 +11,8 @@ y_label = "Throughput in GiB/s"
var_label = "Transfer Size"
thread_counts = ["1t", "2t", "12t"]
thread_counts_nice = ["1 Thread", "2 Threads", "12 Threads"]
engine_counts = ["1mib-1e", "1gib-1e"]
engine_counts_nice = ["1 MiB", "1 GiB"]
size_labels = ["1mib", "1gib"]
size_labels_nice = ["1 MiB", "1 GiB"]
title = \
"""Total Throughput showing cost of MT Submit\n
@ -34,47 +32,36 @@ data = []
# loads the measurements from a given file and processes them
# so that they are normalized, meaning that the timings returned
# are nanoseconds per element transfered
def load_time_mesurements(file_path):
with open(file_path, 'r') as file:
data = json.load(file)
count = data["count"]
iterations = data["list"][0]["task"]["iterations"]
def get_timing(file_path,thread_count):
divisor = 0
# work queue size is 120 which is split over all available threads
# therefore we divide the result by 120/n_threads to get the per-element speed
if thread_count == "1t": divisor = 1
elif thread_count == "2t" : divisor = 2
elif thread_count == "12t" : divisor = 12
return {
"total" : sum([x / (iterations * 120) for x in list(chain([data["list"][i]["report"]["time"]["total"] for i in range(count)]))]),
"combined" : [x / 120 for x in list(chain(*[data["list"][i]["report"]["time"]["combined"] for i in range(count)]))],
"submission" : [x / 120 for x in list(chain(*[data["list"][i]["report"]["time"]["submission"] for i in range(count)]))],
"completion" : [x / 120 for x in list(chain(*[data["list"][i]["report"]["time"]["completion"] for i in range(count)]))]
}
return [ x / divisor for x in load_time_mesurements(file_path)]
# procceses a single file and appends the desired timings
# to the global data-array, handles multiple runs with a runid
# and ignores if the given file is not found as some
# configurations may not be benchmarked
def process_file_to_dataset(file_path, engine_label, thread_count):
engine_index = index_from_element(engine_label,engine_counts)
engine_nice = engine_counts_nice[engine_index]
def process_file_to_dataset(file_path, size_label, thread_count):
size_index = index_from_element(size_label,size_labels)
size_nice = size_labels_nice[size_index]
threadc_index = index_from_element(thread_count, thread_counts)
thread_count_nice = thread_counts_nice[threadc_index]
data_size = 0
if engine_label in ["1gib-1e", "1gib-4e"]: data_size = 1024*1024*1024
elif engine_label in ["1mib-1e", "1mib-4e"]: data_size = 1024*1024
else: data_size = 0
if size_label == "1gib" : data_size = 1024*1024*1024
elif size_label == "1mib" : data_size = 1024*1024
timing = get_timing(file_path, thread_count)
run_idx = 0
for t in timing:
data.append({ runid : run_idx, x_label: thread_count_nice, var_label : size_nice, y_label : calc_throughput(data_size, t)})
run_idx = run_idx + 1
try:
time = load_time_mesurements(file_path)["combined"]
run_idx = 0
for t in time:
data.append({ runid : run_idx, x_label: thread_count_nice, var_label : engine_nice, y_label : calc_throughput(data_size, t)})
run_idx = run_idx + 1
except FileNotFoundError:
return
# loops over all possible configuration combinations and calls
@ -84,17 +71,19 @@ def main():
result_path = "benchmark-results/"
output_path = "benchmark-plots/"
for engine_label in engine_counts:
for size in size_labels:
for thread_count in thread_counts:
file = os.path.join(result_path, f"mtsubmit-{thread_count}-{engine_label}.json")
process_file_to_dataset(file, engine_label, thread_count)
file = os.path.join(result_path, f"mtsubmit-{thread_count}-{size}.json")
process_file_to_dataset(file, size, thread_count)
df = pd.DataFrame(data)
df.set_index(index, inplace=True)
sns.barplot(x=x_label, y=y_label, hue=var_label, data=df, palette="rocket", errorbar="sd")
plt.figure(figsize=(4, 4))
plt.ylim(0, 30)
sns.barplot(x=x_label, y=y_label, hue=var_label, data=df, palette="mako", errorbar="sd")
plt.savefig(os.path.join(output_path, "plot-perf-mtsubmit.pdf"), bbox_inches='tight')
plt.savefig(os.path.join(output_path, "plot-mtsubmit.pdf"), bbox_inches='tight')
plt.show()

33
benchmarks/benchmark-plotters/plot-perf-peakthroughput-bar.py

@ -1,11 +1,9 @@
import os
import json
import pandas as pd
from itertools import chain
import seaborn as sns
import matplotlib.pyplot as plt
from common import calc_throughput, load_time_mesurements
from common import calc_throughput, load_time_mesurements, get_task_count
result_path = "benchmark-results/"
output_path = "benchmark-plots/"
@ -41,7 +39,8 @@ data = []
# so that they are normalized, meaning that the timings returned
# are nanoseconds per element transfered
def get_timing(file_path):
return load_time_mesurements(file_path)
divisor = get_task_count(file_path)
return [ x / divisor for x in load_time_mesurements(file_path)]
# procceses a single file and appends the desired timings
@ -50,25 +49,23 @@ def get_timing(file_path):
# configurations may not be benchmarked
def process_file_to_dataset(file_path, src_node, dst_node):
size = 1024*1024*1024
try:
timing = get_timing(file_path)
run_idx = 0
for t in timing:
tp = calc_throughput(size, t)
data.append({ runid : run_idx, x_label : dst_node, y_label : tp})
run_idx = run_idx + 1
except FileNotFoundError:
return
timing = get_timing(file_path)
run_idx = 0
for t in timing:
tp = calc_throughput(size, t)
data.append({ runid : run_idx, x_label : dst_node, y_label : tp})
run_idx = run_idx + 1
def plot_bar(table,title,node_config):
plt.figure(figsize=(8, 6))
plt.figure(figsize=(2, 3))
sns.barplot(x=x_label, y=y_label, data=table, palette="rocket")
sns.barplot(x=x_label, y=y_label, data=table, palette="mako", errorbar="sd")
plt.ylim(0, 100)
plt.ylim(0, 75)
plt.savefig(os.path.join(output_path, f"plot-perf-{node_config}-throughput-selectbarplot.png"), bbox_inches='tight')
plt.savefig(os.path.join(output_path, f"plot-{node_config}-throughput.pdf"), bbox_inches='tight')
plt.show()
@ -78,7 +75,7 @@ def plot_bar(table,title,node_config):
def main(node_config,title):
src_node = 0
for dst_node in {8,11,12,15}:
file = os.path.join(result_path, f"copy-n{src_node}ton{dst_node}-1gib-{node_config}-1e.json")
file = os.path.join(result_path, f"copy-n{src_node}ton{dst_node}-1gib-{node_config}.json")
process_file_to_dataset(file, src_node, dst_node)
df = pd.DataFrame(data)

37
benchmarks/benchmark-plotters/plot-perf-peakthroughput-cpu-bar.py

@ -1,11 +1,9 @@
import os
import json
import pandas as pd
from itertools import chain
import seaborn as sns
import matplotlib.pyplot as plt
from common import calc_throughput, load_time_mesurements
from common import calc_throughput, load_time_mesurements, get_task_count
result_path = "benchmark-results/"
output_path = "benchmark-plots/"
@ -41,7 +39,8 @@ data = []
# so that they are normalized, meaning that the timings returned
# are nanoseconds per element transfered
def get_timing(file_path):
return load_time_mesurements(file_path)
divisor = get_task_count(file_path)
return [ x / divisor for x in load_time_mesurements(file_path)]
# procceses a single file and appends the desired timings
@ -50,25 +49,23 @@ def get_timing(file_path):
# configurations may not be benchmarked
def process_file_to_dataset(file_path, src_node, dst_node):
size = 1024*1024*1024
try:
timing = get_timing(file_path)
run_idx = 0
for t in timing:
tp = calc_throughput(size, t)
data.append({ runid : run_idx, x_label : dst_node, y_label : tp})
run_idx = run_idx + 1
except FileNotFoundError:
return
timing = get_timing(file_path)
run_idx = 0
for t in timing:
tp = calc_throughput(size, t)
data.append({ runid : run_idx, x_label : dst_node, y_label : tp})
run_idx = run_idx + 1
def plot_bar(table,title,node_config):
plt.figure(figsize=(8, 6))
plt.figure(figsize=(2, 3))
sns.barplot(x=x_label, y=y_label, data=table, palette="rocket", errorbar=None)
sns.barplot(x=x_label, y=y_label, data=table, palette="mako", errorbar="sd")
plt.ylim(0, 100)
plt.ylim(0, 75)
plt.savefig(os.path.join(output_path, f"plot-perf-{node_config}-cpu-throughput-selectbarplot.pdf"), bbox_inches='tight')
plt.savefig(os.path.join(output_path, f"plot-{node_config}-cpu-throughput.pdf"), bbox_inches='tight')
plt.show()
@ -78,7 +75,7 @@ def plot_bar(table,title,node_config):
def main(node_config,title,ext):
src_node = 0
for dst_node in {8,11,12,15}:
file = os.path.join(result_path, f"copy-n{src_node}ton{dst_node}-1gib-{node_config}-cpu-{ext}.json")
file = os.path.join(result_path, f"copy-n{src_node}ton{dst_node}-1gib-{node_config}{ext}.json")
process_file_to_dataset(file, src_node, dst_node)
df = pd.DataFrame(data)
@ -93,5 +90,5 @@ def main(node_config,title,ext):
if __name__ == "__main__":
dall = main("allnodes", title_allnodes, "1e")
dbrt = main("allnodes", title_allnodes, "brute")
dall = main("allnodes", title_allnodes, "-cpu")
dbrt = main("brute", title_allnodes, "-cpu")

31
benchmarks/benchmark-plotters/plot-perf-submitmethod.py

@ -1,7 +1,4 @@
import os
import json
from numpy import float64, int64
from typing import List
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
@ -12,7 +9,7 @@ runid = "Run ID"
x_label = "Size of Submitted Task"
y_label = "Throughput in GiB/s"
var_label = "Submission Type"
sizes = ["1kib", "4kib", "1mib", "128mib"]
sizes = ["1kib", "4kib", "1mib"]
sizes_nice = ["1 KiB", "4 KiB", "1 MiB", "128 MiB"]
types = ["bs10", "bs50", "ssaw"]
types_nice = ["Batch, Size 10", "Batch, Size 50", "Single Submit"]
@ -35,7 +32,7 @@ data = []
# loads the measurements from a given file and processes them
# so that they are normalized, meaning that the timings returned
# are nanoseconds per element transfered
def get_timing(file_path,type_label) -> List[float64]:
def get_timing(file_path,type_label):
divisor = 0
if type_label == "bs10": divisor = 10
@ -59,18 +56,13 @@ def process_file_to_dataset(file_path, type_label,size_label):
if size_label == "1kib": data_size = 1024;
elif size_label == "4kib": data_size = 4 * 1024;
elif size_label == "1mib": data_size = 1024 * 1024;
elif size_label == "32mib": data_size = 32 * 1024 * 1024;
elif size_label == "1gib": data_size = 1024 * 1024 * 1024;
else: data_size = 0
elif size_label == "128mib": data_size = 128 * 1024 * 1024;
try:
time = get_timing(file_path,type_label)
run_idx = 0
for t in time:
data.append({ runid : run_idx, x_label: size_nice, var_label : type_nice, y_label : calc_throughput(data_size, t)})
run_idx = run_idx + 1
except FileNotFoundError:
return
time = get_timing(file_path,type_label)
run_idx = 0
for t in time:
data.append({ runid : run_idx, x_label: size_nice, var_label : type_nice, y_label : calc_throughput(data_size, t)})
run_idx = run_idx + 1
# loops over all possible configuration combinations and calls
@ -82,16 +74,17 @@ def main():
for type_label in types:
for size in sizes:
file = os.path.join(result_path, f"submit-{type_label}-{size}-1e.json")
file = os.path.join(result_path, f"submit-{type_label}-{size}.json")
process_file_to_dataset(file, type_label, size)
df = pd.DataFrame(data)
df.set_index(index, inplace=True)
df = df.sort_values(y_label)
sns.barplot(x=x_label, y=y_label, hue=var_label, data=df, palette="rocket", errorbar="sd")
plt.figure(figsize=(4, 4))
sns.barplot(x=x_label, y=y_label, hue=var_label, data=df, palette="mako", errorbar="sd")
plt.savefig(os.path.join(output_path, "plot-opt-submitmethod.pdf"), bbox_inches='tight')
plt.savefig(os.path.join(output_path, "plot-submitmethod.pdf"), bbox_inches='tight')
plt.show()
if __name__ == "__main__":
Loading…
Cancel
Save