Browse Source

update benchmark plotters for changes made to benchmark and result format

master
Constantin Fürst 11 months ago
parent
commit
326cf92af3
  1. BIN
      benchmarks/benchmark-plotters/__pycache__/common.cpython-39.pyc
  2. 29
      benchmarks/benchmark-plotters/common.py
  3. 30
      benchmarks/benchmark-plotters/plot-perf-peakthroughput-bar.py
  4. 28
      benchmarks/benchmark-plotters/plot-perf-peakthroughput-cpu-bar.py
  5. 34
      benchmarks/benchmark-plotters/plot-perf-submitmethod.py

BIN
benchmarks/benchmark-plotters/__pycache__/common.cpython-39.pyc

29
benchmarks/benchmark-plotters/common.py

@ -1,3 +1,10 @@
import os
import json
from typing import List
from numpy import float64
# calculates throughput in gib/s from the meassured
# transfer duration (in nanoseconds) for a given element
# with the size of this given in bytes
@ -12,4 +19,24 @@ def calc_throughput(size_bytes,time_ns):
def index_from_element(value,array):
for (idx,val) in enumerate(array):
if val == value: return idx
return 0
return 0
# loads the measurements from a given file
def load_time_mesurements(file_path) -> List[float64]:
with open(file_path, 'r') as file:
data = json.load(file)
count = data["count"]
runcount_divisor = data["list"][0]["task"]["reps"]
# if theres more than one thread, the internal repetition
# count should be the same. if you decide it shouldnt
# remove the check below
if count > 1:
for i in range(count):
if runcount_divisor != data["list"][i]["task"]["reps"]:
print("Runcount missmatch between tasks. Check the commend above, aborting for now.")
os.abort()
return [ x / runcount_divisor for x in data["timings"]]

30
benchmarks/benchmark-plotters/plot-perf-peakthroughput-bar.py

@ -5,7 +5,7 @@ from itertools import chain
import seaborn as sns
import matplotlib.pyplot as plt
from common import calc_throughput
from common import calc_throughput, load_time_mesurements
result_path = "benchmark-results/"
output_path = "benchmark-plots/"
@ -40,20 +40,8 @@ data = []
# loads the measurements from a given file and processes them
# so that they are normalized, meaning that the timings returned
# are nanoseconds per element transfered
def load_time_mesurements(file_path):
with open(file_path, 'r') as file:
data = json.load(file)
count = data["count"]
batch_size = data["list"][0]["task"]["batching"]["batch_size"] if data["list"][0]["task"]["batching"]["batch_size"] > 0 else 1
iterations = data["list"][0]["task"]["iterations"]
return {
"size": data["list"][0]["task"]["size"],
"total": sum([x / (iterations * batch_size * count * count) for x in list(chain([data["list"][i]["report"]["time"]["total"] for i in range(count)]))]),
"combined": [ x / (count * batch_size) for x in list(chain(*[data["list"][i]["report"]["time"]["combined"] for i in range(count)]))],
"submission": [ x / (count * batch_size) for x in list(chain(*[data["list"][i]["report"]["time"]["submission"] for i in range(count)]))],
"completion": [ x / (count * batch_size) for x in list(chain(*[data["list"][i]["report"]["time"]["completion"] for i in range(count)]))]
}
def get_timing(file_path):
return load_time_mesurements(file_path)
# procceses a single file and appends the desired timings
@ -61,12 +49,13 @@ def load_time_mesurements(file_path):
# and ignores if the given file is not found as some
# configurations may not be benchmarked
def process_file_to_dataset(file_path, src_node, dst_node):
size = 1024*1024*1024
try:
file_data = load_time_mesurements(file_path)
time = file_data["combined"]
timing = get_timing(file_path)
run_idx = 0
for t in time:
data.append({ runid : run_idx, x_label : dst_node, y_label : calc_throughput(file_data["size"], t)})
for t in timing:
tp = calc_throughput(size, t)
data.append({ runid : run_idx, x_label : dst_node, y_label : tp})
run_idx = run_idx + 1
except FileNotFoundError:
return
@ -89,8 +78,7 @@ def plot_bar(table,title,node_config):
def main(node_config,title):
src_node = 0
for dst_node in {8,11,12,15}:
size = "512mib" if node_config == "allnodes" and src_node == dst_node and src_node >= 8 else "1gib"
file = os.path.join(result_path, f"copy-n{src_node}ton{dst_node}-{size}-{node_config}-1e.json")
file = os.path.join(result_path, f"copy-n{src_node}ton{dst_node}-1gib-{node_config}-1e.json")
process_file_to_dataset(file, src_node, dst_node)
df = pd.DataFrame(data)

28
benchmarks/benchmark-plotters/plot-perf-peakthroughput-cpu-bar.py

@ -5,7 +5,7 @@ from itertools import chain
import seaborn as sns
import matplotlib.pyplot as plt
from common import calc_throughput
from common import calc_throughput, load_time_mesurements
result_path = "benchmark-results/"
output_path = "benchmark-plots/"
@ -40,20 +40,8 @@ data = []
# loads the measurements from a given file and processes them
# so that they are normalized, meaning that the timings returned
# are nanoseconds per element transfered
def load_time_mesurements(file_path):
with open(file_path, 'r') as file:
data = json.load(file)
count = data["count"]
batch_size = data["list"][0]["task"]["batching"]["batch_size"] if data["list"][0]["task"]["batching"]["batch_size"] > 0 else 1
iterations = data["list"][0]["task"]["iterations"]
return {
"size": data["list"][0]["task"]["size"],
"total": sum([x / (iterations * batch_size * count * count) for x in list(chain([data["list"][i]["report"]["time"]["total"] for i in range(count)]))]),
"combined": [ x / (count * batch_size) for x in list(chain(*[data["list"][i]["report"]["time"]["combined"] for i in range(count)]))],
"submission": [ x / (count * batch_size) for x in list(chain(*[data["list"][i]["report"]["time"]["submission"] for i in range(count)]))],
"completion": [ x / (count * batch_size) for x in list(chain(*[data["list"][i]["report"]["time"]["completion"] for i in range(count)]))]
}
def get_timing(file_path):
return load_time_mesurements(file_path)
# procceses a single file and appends the desired timings
@ -61,12 +49,11 @@ def load_time_mesurements(file_path):
# and ignores if the given file is not found as some
# configurations may not be benchmarked
def process_file_to_dataset(file_path, src_node, dst_node):
size = 1024*1024*1024
try:
file_data = load_time_mesurements(file_path)
time = file_data["combined"]
timing = get_timing(file_path)
run_idx = 0
for t in time:
size = file_data["size"]
for t in timing:
tp = calc_throughput(size, t)
data.append({ runid : run_idx, x_label : dst_node, y_label : tp})
run_idx = run_idx + 1
@ -91,8 +78,7 @@ def plot_bar(table,title,node_config):
def main(node_config,title,ext):
src_node = 0
for dst_node in {8,11,12,15}:
size = "512mib" if node_config == "allnodes" and src_node == dst_node and src_node >= 8 else "1gib"
file = os.path.join(result_path, f"copy-n{src_node}ton{dst_node}-{size}-{node_config}-cpu-{ext}.json")
file = os.path.join(result_path, f"copy-n{src_node}ton{dst_node}-1gib-{node_config}-cpu-{ext}.json")
process_file_to_dataset(file, src_node, dst_node)
df = pd.DataFrame(data)

34
benchmarks/benchmark-plotters/plot-perf-submitmethod.py

@ -1,17 +1,19 @@
import os
import json
from numpy import float64, int64
from typing import List
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from common import calc_throughput, index_from_element
from common import calc_throughput, index_from_element, load_time_mesurements
runid = "Run ID"
x_label = "Size of Submitted Task"
y_label = "Throughput in GiB/s"
var_label = "Submission Type"
sizes = ["1kib", "4kib", "1mib"]
sizes_nice = ["1 KiB", "4 KiB", "1 MiB"]
sizes = ["1kib", "4kib", "1mib", "128mib"]
sizes_nice = ["1 KiB", "4 KiB", "1 MiB", "128 MiB"]
types = ["bs10", "bs50", "ssaw"]
types_nice = ["Batch, Size 10", "Batch, Size 50", "Single Submit"]
@ -33,26 +35,14 @@ data = []
# loads the measurements from a given file and processes them
# so that they are normalized, meaning that the timings returned
# are nanoseconds per element transfered
def load_time_mesurements(file_path,type_label):
with open(file_path, 'r') as file:
data = json.load(file)
iterations = data["list"][0]["task"]["iterations"]
divisor = 1
def get_timing(file_path,type_label) -> List[float64]:
divisor = 0
# bs and ms types for submission process more than one
# element per run and the results therefore must be
# divided by this number
if type_label == "bs10": divisor = 10
elif type_label == "bs50" : divisor = 50
else: divisor = 1
if type_label in ["bs10", "ms10"]: divisor = 10
elif type_label in ["ms50", "bs50"]: divisor = 50
else: divisor = 1
return {
"total": data["list"][0]["report"]["time"]["total"] / (iterations * divisor),
"combined": [ x / divisor for x in data["list"][0]["report"]["time"]["combined"]],
"submission": [ x / divisor for x in data["list"][0]["report"]["time"]["submission"]],
"completion": [ x / divisor for x in data["list"][0]["report"]["time"]["completion"]]
}
return [ x / divisor for x in load_time_mesurements(file_path)]
# procceses a single file and appends the desired timings
@ -74,7 +64,7 @@ def process_file_to_dataset(file_path, type_label,size_label):
else: data_size = 0
try:
time = load_time_mesurements(file_path,type_label)["combined"]
time = get_timing(file_path,type_label)
run_idx = 0
for t in time:
data.append({ runid : run_idx, x_label: size_nice, var_label : type_nice, y_label : calc_throughput(data_size, t)})

Loading…
Cancel
Save