Browse Source

update benchmark plotters for changes made to benchmark and result format

master
Constantin Fürst 11 months ago
parent
commit
326cf92af3
  1. BIN
      benchmarks/benchmark-plotters/__pycache__/common.cpython-39.pyc
  2. 29
      benchmarks/benchmark-plotters/common.py
  3. 30
      benchmarks/benchmark-plotters/plot-perf-peakthroughput-bar.py
  4. 28
      benchmarks/benchmark-plotters/plot-perf-peakthroughput-cpu-bar.py
  5. 34
      benchmarks/benchmark-plotters/plot-perf-submitmethod.py

BIN
benchmarks/benchmark-plotters/__pycache__/common.cpython-39.pyc

29
benchmarks/benchmark-plotters/common.py

@ -1,3 +1,10 @@
import os
import json
from typing import List
from numpy import float64
# calculates throughput in gib/s from the meassured # calculates throughput in gib/s from the meassured
# transfer duration (in nanoseconds) for a given element # transfer duration (in nanoseconds) for a given element
# with the size of this given in bytes # with the size of this given in bytes
@ -12,4 +19,24 @@ def calc_throughput(size_bytes,time_ns):
def index_from_element(value,array): def index_from_element(value,array):
for (idx,val) in enumerate(array): for (idx,val) in enumerate(array):
if val == value: return idx if val == value: return idx
return 0
return 0
# loads the measurements from a given file
def load_time_mesurements(file_path) -> List[float64]:
with open(file_path, 'r') as file:
data = json.load(file)
count = data["count"]
runcount_divisor = data["list"][0]["task"]["reps"]
# if theres more than one thread, the internal repetition
# count should be the same. if you decide it shouldnt
# remove the check below
if count > 1:
for i in range(count):
if runcount_divisor != data["list"][i]["task"]["reps"]:
print("Runcount missmatch between tasks. Check the commend above, aborting for now.")
os.abort()
return [ x / runcount_divisor for x in data["timings"]]

30
benchmarks/benchmark-plotters/plot-perf-peakthroughput-bar.py

@ -5,7 +5,7 @@ from itertools import chain
import seaborn as sns import seaborn as sns
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from common import calc_throughput
from common import calc_throughput, load_time_mesurements
result_path = "benchmark-results/" result_path = "benchmark-results/"
output_path = "benchmark-plots/" output_path = "benchmark-plots/"
@ -40,20 +40,8 @@ data = []
# loads the measurements from a given file and processes them # loads the measurements from a given file and processes them
# so that they are normalized, meaning that the timings returned # so that they are normalized, meaning that the timings returned
# are nanoseconds per element transfered # are nanoseconds per element transfered
def load_time_mesurements(file_path):
with open(file_path, 'r') as file:
data = json.load(file)
count = data["count"]
batch_size = data["list"][0]["task"]["batching"]["batch_size"] if data["list"][0]["task"]["batching"]["batch_size"] > 0 else 1
iterations = data["list"][0]["task"]["iterations"]
return {
"size": data["list"][0]["task"]["size"],
"total": sum([x / (iterations * batch_size * count * count) for x in list(chain([data["list"][i]["report"]["time"]["total"] for i in range(count)]))]),
"combined": [ x / (count * batch_size) for x in list(chain(*[data["list"][i]["report"]["time"]["combined"] for i in range(count)]))],
"submission": [ x / (count * batch_size) for x in list(chain(*[data["list"][i]["report"]["time"]["submission"] for i in range(count)]))],
"completion": [ x / (count * batch_size) for x in list(chain(*[data["list"][i]["report"]["time"]["completion"] for i in range(count)]))]
}
def get_timing(file_path):
return load_time_mesurements(file_path)
# procceses a single file and appends the desired timings # procceses a single file and appends the desired timings
@ -61,12 +49,13 @@ def load_time_mesurements(file_path):
# and ignores if the given file is not found as some # and ignores if the given file is not found as some
# configurations may not be benchmarked # configurations may not be benchmarked
def process_file_to_dataset(file_path, src_node, dst_node): def process_file_to_dataset(file_path, src_node, dst_node):
size = 1024*1024*1024
try: try:
file_data = load_time_mesurements(file_path)
time = file_data["combined"]
timing = get_timing(file_path)
run_idx = 0 run_idx = 0
for t in time:
data.append({ runid : run_idx, x_label : dst_node, y_label : calc_throughput(file_data["size"], t)})
for t in timing:
tp = calc_throughput(size, t)
data.append({ runid : run_idx, x_label : dst_node, y_label : tp})
run_idx = run_idx + 1 run_idx = run_idx + 1
except FileNotFoundError: except FileNotFoundError:
return return
@ -89,8 +78,7 @@ def plot_bar(table,title,node_config):
def main(node_config,title): def main(node_config,title):
src_node = 0 src_node = 0
for dst_node in {8,11,12,15}: for dst_node in {8,11,12,15}:
size = "512mib" if node_config == "allnodes" and src_node == dst_node and src_node >= 8 else "1gib"
file = os.path.join(result_path, f"copy-n{src_node}ton{dst_node}-{size}-{node_config}-1e.json")
file = os.path.join(result_path, f"copy-n{src_node}ton{dst_node}-1gib-{node_config}-1e.json")
process_file_to_dataset(file, src_node, dst_node) process_file_to_dataset(file, src_node, dst_node)
df = pd.DataFrame(data) df = pd.DataFrame(data)

28
benchmarks/benchmark-plotters/plot-perf-peakthroughput-cpu-bar.py

@ -5,7 +5,7 @@ from itertools import chain
import seaborn as sns import seaborn as sns
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from common import calc_throughput
from common import calc_throughput, load_time_mesurements
result_path = "benchmark-results/" result_path = "benchmark-results/"
output_path = "benchmark-plots/" output_path = "benchmark-plots/"
@ -40,20 +40,8 @@ data = []
# loads the measurements from a given file and processes them # loads the measurements from a given file and processes them
# so that they are normalized, meaning that the timings returned # so that they are normalized, meaning that the timings returned
# are nanoseconds per element transfered # are nanoseconds per element transfered
def load_time_mesurements(file_path):
with open(file_path, 'r') as file:
data = json.load(file)
count = data["count"]
batch_size = data["list"][0]["task"]["batching"]["batch_size"] if data["list"][0]["task"]["batching"]["batch_size"] > 0 else 1
iterations = data["list"][0]["task"]["iterations"]
return {
"size": data["list"][0]["task"]["size"],
"total": sum([x / (iterations * batch_size * count * count) for x in list(chain([data["list"][i]["report"]["time"]["total"] for i in range(count)]))]),
"combined": [ x / (count * batch_size) for x in list(chain(*[data["list"][i]["report"]["time"]["combined"] for i in range(count)]))],
"submission": [ x / (count * batch_size) for x in list(chain(*[data["list"][i]["report"]["time"]["submission"] for i in range(count)]))],
"completion": [ x / (count * batch_size) for x in list(chain(*[data["list"][i]["report"]["time"]["completion"] for i in range(count)]))]
}
def get_timing(file_path):
return load_time_mesurements(file_path)
# procceses a single file and appends the desired timings # procceses a single file and appends the desired timings
@ -61,12 +49,11 @@ def load_time_mesurements(file_path):
# and ignores if the given file is not found as some # and ignores if the given file is not found as some
# configurations may not be benchmarked # configurations may not be benchmarked
def process_file_to_dataset(file_path, src_node, dst_node): def process_file_to_dataset(file_path, src_node, dst_node):
size = 1024*1024*1024
try: try:
file_data = load_time_mesurements(file_path)
time = file_data["combined"]
timing = get_timing(file_path)
run_idx = 0 run_idx = 0
for t in time:
size = file_data["size"]
for t in timing:
tp = calc_throughput(size, t) tp = calc_throughput(size, t)
data.append({ runid : run_idx, x_label : dst_node, y_label : tp}) data.append({ runid : run_idx, x_label : dst_node, y_label : tp})
run_idx = run_idx + 1 run_idx = run_idx + 1
@ -91,8 +78,7 @@ def plot_bar(table,title,node_config):
def main(node_config,title,ext): def main(node_config,title,ext):
src_node = 0 src_node = 0
for dst_node in {8,11,12,15}: for dst_node in {8,11,12,15}:
size = "512mib" if node_config == "allnodes" and src_node == dst_node and src_node >= 8 else "1gib"
file = os.path.join(result_path, f"copy-n{src_node}ton{dst_node}-{size}-{node_config}-cpu-{ext}.json")
file = os.path.join(result_path, f"copy-n{src_node}ton{dst_node}-1gib-{node_config}-cpu-{ext}.json")
process_file_to_dataset(file, src_node, dst_node) process_file_to_dataset(file, src_node, dst_node)
df = pd.DataFrame(data) df = pd.DataFrame(data)

34
benchmarks/benchmark-plotters/plot-perf-submitmethod.py

@ -1,17 +1,19 @@
import os import os
import json import json
from numpy import float64, int64
from typing import List
import pandas as pd import pandas as pd
import seaborn as sns import seaborn as sns
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from common import calc_throughput, index_from_element
from common import calc_throughput, index_from_element, load_time_mesurements
runid = "Run ID" runid = "Run ID"
x_label = "Size of Submitted Task" x_label = "Size of Submitted Task"
y_label = "Throughput in GiB/s" y_label = "Throughput in GiB/s"
var_label = "Submission Type" var_label = "Submission Type"
sizes = ["1kib", "4kib", "1mib"]
sizes_nice = ["1 KiB", "4 KiB", "1 MiB"]
sizes = ["1kib", "4kib", "1mib", "128mib"]
sizes_nice = ["1 KiB", "4 KiB", "1 MiB", "128 MiB"]
types = ["bs10", "bs50", "ssaw"] types = ["bs10", "bs50", "ssaw"]
types_nice = ["Batch, Size 10", "Batch, Size 50", "Single Submit"] types_nice = ["Batch, Size 10", "Batch, Size 50", "Single Submit"]
@ -33,26 +35,14 @@ data = []
# loads the measurements from a given file and processes them # loads the measurements from a given file and processes them
# so that they are normalized, meaning that the timings returned # so that they are normalized, meaning that the timings returned
# are nanoseconds per element transfered # are nanoseconds per element transfered
def load_time_mesurements(file_path,type_label):
with open(file_path, 'r') as file:
data = json.load(file)
iterations = data["list"][0]["task"]["iterations"]
divisor = 1
def get_timing(file_path,type_label) -> List[float64]:
divisor = 0
# bs and ms types for submission process more than one
# element per run and the results therefore must be
# divided by this number
if type_label == "bs10": divisor = 10
elif type_label == "bs50" : divisor = 50
else: divisor = 1
if type_label in ["bs10", "ms10"]: divisor = 10
elif type_label in ["ms50", "bs50"]: divisor = 50
else: divisor = 1
return {
"total": data["list"][0]["report"]["time"]["total"] / (iterations * divisor),
"combined": [ x / divisor for x in data["list"][0]["report"]["time"]["combined"]],
"submission": [ x / divisor for x in data["list"][0]["report"]["time"]["submission"]],
"completion": [ x / divisor for x in data["list"][0]["report"]["time"]["completion"]]
}
return [ x / divisor for x in load_time_mesurements(file_path)]
# procceses a single file and appends the desired timings # procceses a single file and appends the desired timings
@ -74,7 +64,7 @@ def process_file_to_dataset(file_path, type_label,size_label):
else: data_size = 0 else: data_size = 0
try: try:
time = load_time_mesurements(file_path,type_label)["combined"]
time = get_timing(file_path,type_label)
run_idx = 0 run_idx = 0
for t in time: for t in time:
data.append({ runid : run_idx, x_label: size_nice, var_label : type_nice, y_label : calc_throughput(data_size, t)}) data.append({ runid : run_idx, x_label: size_nice, var_label : type_nice, y_label : calc_throughput(data_size, t)})

Loading…
Cancel
Save