From 326cf92af33a4f6359dfa80dc847a34825a5bce9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Constantin=20F=C3=BCrst?= Date: Wed, 31 Jan 2024 21:54:21 +0100 Subject: [PATCH] update benchmark plotters for changes made to benchmark and result format --- .../__pycache__/common.cpython-39.pyc | Bin 579 -> 0 bytes benchmarks/benchmark-plotters/common.py | 29 ++++++++++++++- .../plot-perf-peakthroughput-bar.py | 30 +++++----------- .../plot-perf-peakthroughput-cpu-bar.py | 28 ++++----------- .../plot-perf-submitmethod.py | 34 +++++++----------- 5 files changed, 56 insertions(+), 65 deletions(-) delete mode 100644 benchmarks/benchmark-plotters/__pycache__/common.cpython-39.pyc diff --git a/benchmarks/benchmark-plotters/__pycache__/common.cpython-39.pyc b/benchmarks/benchmark-plotters/__pycache__/common.cpython-39.pyc deleted file mode 100644 index 024860791d18f1a1db739cc91e1b1fa73ad0bea6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 579 zcmY*Wv2NQi5Is^>syYeM8Xj6SvjGo@_8ijMFF-rt_k#u-4{en`2W}sxV9Xw1p?DT02L5X{nXc&FO00AC*O2 z=DQ0F{F@UBz>=_xXa1AN6A^ikXF|Y{Ku_!netfy%8y@6=B{xaOIWV|}yF@S&iqHO1 zF-%$i5t~+_DI87*eSBZkE%tF?OtJ1GDIWTWsGSJLAJ&g#Bk&==HCp9Zqr%1=5O4SV kO!kS5nAs;ZoAPkuki44_Bn^X-bEQRF<3)I?|IbPC2EP-BuK)l5 diff --git a/benchmarks/benchmark-plotters/common.py b/benchmarks/benchmark-plotters/common.py index 4f44dd8..eeb2f90 100644 --- a/benchmarks/benchmark-plotters/common.py +++ b/benchmarks/benchmark-plotters/common.py @@ -1,3 +1,10 @@ +import os +import json + +from typing import List +from numpy import float64 + + # calculates throughput in gib/s from the meassured # transfer duration (in nanoseconds) for a given element # with the size of this given in bytes @@ -12,4 +19,24 @@ def calc_throughput(size_bytes,time_ns): def index_from_element(value,array): for (idx,val) in enumerate(array): if val == value: return idx - return 0 \ No newline at end of file + return 0 + + +# loads the measurements from a given file +def load_time_mesurements(file_path) -> List[float64]: + with open(file_path, 'r') as file: + data = json.load(file) + count = data["count"] + runcount_divisor = data["list"][0]["task"]["reps"] + + # if theres more than one thread, the internal repetition + # count should be the same. if you decide it shouldnt + # remove the check below + + if count > 1: + for i in range(count): + if runcount_divisor != data["list"][i]["task"]["reps"]: + print("Runcount missmatch between tasks. Check the commend above, aborting for now.") + os.abort() + + return [ x / runcount_divisor for x in data["timings"]] \ No newline at end of file diff --git a/benchmarks/benchmark-plotters/plot-perf-peakthroughput-bar.py b/benchmarks/benchmark-plotters/plot-perf-peakthroughput-bar.py index 67fe99d..098825b 100644 --- a/benchmarks/benchmark-plotters/plot-perf-peakthroughput-bar.py +++ b/benchmarks/benchmark-plotters/plot-perf-peakthroughput-bar.py @@ -5,7 +5,7 @@ from itertools import chain import seaborn as sns import matplotlib.pyplot as plt -from common import calc_throughput +from common import calc_throughput, load_time_mesurements result_path = "benchmark-results/" output_path = "benchmark-plots/" @@ -40,20 +40,8 @@ data = [] # loads the measurements from a given file and processes them # so that they are normalized, meaning that the timings returned # are nanoseconds per element transfered -def load_time_mesurements(file_path): - with open(file_path, 'r') as file: - data = json.load(file) - count = data["count"] - batch_size = data["list"][0]["task"]["batching"]["batch_size"] if data["list"][0]["task"]["batching"]["batch_size"] > 0 else 1 - iterations = data["list"][0]["task"]["iterations"] - - return { - "size": data["list"][0]["task"]["size"], - "total": sum([x / (iterations * batch_size * count * count) for x in list(chain([data["list"][i]["report"]["time"]["total"] for i in range(count)]))]), - "combined": [ x / (count * batch_size) for x in list(chain(*[data["list"][i]["report"]["time"]["combined"] for i in range(count)]))], - "submission": [ x / (count * batch_size) for x in list(chain(*[data["list"][i]["report"]["time"]["submission"] for i in range(count)]))], - "completion": [ x / (count * batch_size) for x in list(chain(*[data["list"][i]["report"]["time"]["completion"] for i in range(count)]))] - } +def get_timing(file_path): + return load_time_mesurements(file_path) # procceses a single file and appends the desired timings @@ -61,12 +49,13 @@ def load_time_mesurements(file_path): # and ignores if the given file is not found as some # configurations may not be benchmarked def process_file_to_dataset(file_path, src_node, dst_node): + size = 1024*1024*1024 try: - file_data = load_time_mesurements(file_path) - time = file_data["combined"] + timing = get_timing(file_path) run_idx = 0 - for t in time: - data.append({ runid : run_idx, x_label : dst_node, y_label : calc_throughput(file_data["size"], t)}) + for t in timing: + tp = calc_throughput(size, t) + data.append({ runid : run_idx, x_label : dst_node, y_label : tp}) run_idx = run_idx + 1 except FileNotFoundError: return @@ -89,8 +78,7 @@ def plot_bar(table,title,node_config): def main(node_config,title): src_node = 0 for dst_node in {8,11,12,15}: - size = "512mib" if node_config == "allnodes" and src_node == dst_node and src_node >= 8 else "1gib" - file = os.path.join(result_path, f"copy-n{src_node}ton{dst_node}-{size}-{node_config}-1e.json") + file = os.path.join(result_path, f"copy-n{src_node}ton{dst_node}-1gib-{node_config}-1e.json") process_file_to_dataset(file, src_node, dst_node) df = pd.DataFrame(data) diff --git a/benchmarks/benchmark-plotters/plot-perf-peakthroughput-cpu-bar.py b/benchmarks/benchmark-plotters/plot-perf-peakthroughput-cpu-bar.py index 447851e..e4ff1bd 100644 --- a/benchmarks/benchmark-plotters/plot-perf-peakthroughput-cpu-bar.py +++ b/benchmarks/benchmark-plotters/plot-perf-peakthroughput-cpu-bar.py @@ -5,7 +5,7 @@ from itertools import chain import seaborn as sns import matplotlib.pyplot as plt -from common import calc_throughput +from common import calc_throughput, load_time_mesurements result_path = "benchmark-results/" output_path = "benchmark-plots/" @@ -40,20 +40,8 @@ data = [] # loads the measurements from a given file and processes them # so that they are normalized, meaning that the timings returned # are nanoseconds per element transfered -def load_time_mesurements(file_path): - with open(file_path, 'r') as file: - data = json.load(file) - count = data["count"] - batch_size = data["list"][0]["task"]["batching"]["batch_size"] if data["list"][0]["task"]["batching"]["batch_size"] > 0 else 1 - iterations = data["list"][0]["task"]["iterations"] - - return { - "size": data["list"][0]["task"]["size"], - "total": sum([x / (iterations * batch_size * count * count) for x in list(chain([data["list"][i]["report"]["time"]["total"] for i in range(count)]))]), - "combined": [ x / (count * batch_size) for x in list(chain(*[data["list"][i]["report"]["time"]["combined"] for i in range(count)]))], - "submission": [ x / (count * batch_size) for x in list(chain(*[data["list"][i]["report"]["time"]["submission"] for i in range(count)]))], - "completion": [ x / (count * batch_size) for x in list(chain(*[data["list"][i]["report"]["time"]["completion"] for i in range(count)]))] - } +def get_timing(file_path): + return load_time_mesurements(file_path) # procceses a single file and appends the desired timings @@ -61,12 +49,11 @@ def load_time_mesurements(file_path): # and ignores if the given file is not found as some # configurations may not be benchmarked def process_file_to_dataset(file_path, src_node, dst_node): + size = 1024*1024*1024 try: - file_data = load_time_mesurements(file_path) - time = file_data["combined"] + timing = get_timing(file_path) run_idx = 0 - for t in time: - size = file_data["size"] + for t in timing: tp = calc_throughput(size, t) data.append({ runid : run_idx, x_label : dst_node, y_label : tp}) run_idx = run_idx + 1 @@ -91,8 +78,7 @@ def plot_bar(table,title,node_config): def main(node_config,title,ext): src_node = 0 for dst_node in {8,11,12,15}: - size = "512mib" if node_config == "allnodes" and src_node == dst_node and src_node >= 8 else "1gib" - file = os.path.join(result_path, f"copy-n{src_node}ton{dst_node}-{size}-{node_config}-cpu-{ext}.json") + file = os.path.join(result_path, f"copy-n{src_node}ton{dst_node}-1gib-{node_config}-cpu-{ext}.json") process_file_to_dataset(file, src_node, dst_node) df = pd.DataFrame(data) diff --git a/benchmarks/benchmark-plotters/plot-perf-submitmethod.py b/benchmarks/benchmark-plotters/plot-perf-submitmethod.py index fd1cb13..05698c4 100644 --- a/benchmarks/benchmark-plotters/plot-perf-submitmethod.py +++ b/benchmarks/benchmark-plotters/plot-perf-submitmethod.py @@ -1,17 +1,19 @@ import os import json +from numpy import float64, int64 +from typing import List import pandas as pd import seaborn as sns import matplotlib.pyplot as plt -from common import calc_throughput, index_from_element +from common import calc_throughput, index_from_element, load_time_mesurements runid = "Run ID" x_label = "Size of Submitted Task" y_label = "Throughput in GiB/s" var_label = "Submission Type" -sizes = ["1kib", "4kib", "1mib"] -sizes_nice = ["1 KiB", "4 KiB", "1 MiB"] +sizes = ["1kib", "4kib", "1mib", "128mib"] +sizes_nice = ["1 KiB", "4 KiB", "1 MiB", "128 MiB"] types = ["bs10", "bs50", "ssaw"] types_nice = ["Batch, Size 10", "Batch, Size 50", "Single Submit"] @@ -33,26 +35,14 @@ data = [] # loads the measurements from a given file and processes them # so that they are normalized, meaning that the timings returned # are nanoseconds per element transfered -def load_time_mesurements(file_path,type_label): - with open(file_path, 'r') as file: - data = json.load(file) - iterations = data["list"][0]["task"]["iterations"] - divisor = 1 +def get_timing(file_path,type_label) -> List[float64]: + divisor = 0 - # bs and ms types for submission process more than one - # element per run and the results therefore must be - # divided by this number + if type_label == "bs10": divisor = 10 + elif type_label == "bs50" : divisor = 50 + else: divisor = 1 - if type_label in ["bs10", "ms10"]: divisor = 10 - elif type_label in ["ms50", "bs50"]: divisor = 50 - else: divisor = 1 - - return { - "total": data["list"][0]["report"]["time"]["total"] / (iterations * divisor), - "combined": [ x / divisor for x in data["list"][0]["report"]["time"]["combined"]], - "submission": [ x / divisor for x in data["list"][0]["report"]["time"]["submission"]], - "completion": [ x / divisor for x in data["list"][0]["report"]["time"]["completion"]] - } + return [ x / divisor for x in load_time_mesurements(file_path)] # procceses a single file and appends the desired timings @@ -74,7 +64,7 @@ def process_file_to_dataset(file_path, type_label,size_label): else: data_size = 0 try: - time = load_time_mesurements(file_path,type_label)["combined"] + time = get_timing(file_path,type_label) run_idx = 0 for t in time: data.append({ runid : run_idx, x_label: size_nice, var_label : type_nice, y_label : calc_throughput(data_size, t)})