From 099f454f19941a2ef927a1cda7c1161032ad4b3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Constantin=20F=C3=BCrst?= Date: Mon, 11 Dec 2023 18:45:05 +0100 Subject: [PATCH] modify plotters to a more streamlined state, all now use the file-loop in main and have a function that processes one file into the dataset, also adds the peakthroughput plotter and removes the defunct opt-submitmethod plotter --- .../benchmark-plotters/plot-cost-mtsubmit.py | 47 ++++---- .../plot-opt-submitmethod.py | 104 ------------------ .../plot-perf-enginelocation.py | 16 +-- .../plot-perf-peakthroughput.py | 80 ++++++++++++++ .../plot-perf-submitmethod.py | 86 ++++++++------- 5 files changed, 155 insertions(+), 178 deletions(-) delete mode 100644 benchmarks/benchmark-plotters/plot-opt-submitmethod.py create mode 100644 benchmarks/benchmark-plotters/plot-perf-peakthroughput.py diff --git a/benchmarks/benchmark-plotters/plot-cost-mtsubmit.py b/benchmarks/benchmark-plotters/plot-cost-mtsubmit.py index c9c72ee..e1d4879 100644 --- a/benchmarks/benchmark-plotters/plot-cost-mtsubmit.py +++ b/benchmarks/benchmark-plotters/plot-cost-mtsubmit.py @@ -18,8 +18,8 @@ title = "Per-Thread Throughput - 120 Copy Operations split on Threads Intra-Node index = [runid, x_label, var_label] data = [] -def calc_throughput(size_bytes,time_nanosec): - time_seconds = time_nanosec * 1e-9 +def calc_throughput(size_bytes,time_ns): + time_seconds = time_ns * 1e-9 size_gib = size_bytes / (1024 ** 3) throughput_gibs = size_gib / time_seconds return throughput_gibs @@ -31,13 +31,16 @@ def index_from_element(value,array): return 0 -def load_and_process_copy_json(file_path): +def load_time_mesurements(file_path): with open(file_path, 'r') as file: data = json.load(file) count = data["count"] iterations = data["list"][0]["task"]["iterations"] + # work queue size is 120 which is split over all available threads + # therefore we divide the result by 120/n_threads to get the per-element speed + return { "total" : sum([x / (iterations * (120 / count)) for x in list(chain(*[data["list"][i]["report"]["time"]["total"] for i in range(count)]))]), "combined" : [x / (120 / count) for x in list(chain(*[data["list"][i]["report"]["time"]["combined"] for i in range(count)]))], @@ -45,41 +48,35 @@ def load_and_process_copy_json(file_path): "completion" : [x / (120 / count) for x in list(chain(*[data["list"][i]["report"]["time"]["completion"] for i in range(count)]))] } -# Function to plot the graph for the new benchmark -def create_mtsubmit_dataset(file_paths, engine_label): - times = [] - +def process_file_to_dataset(file_path, engine_label, thread_count): engine_index = index_from_element(engine_label,engine_counts) engine_nice = engine_counts_nice[engine_index] + threadc_index = index_from_element(thread_count, thread_counts) + thread_count_nice = thread_counts_nice[threadc_index] + data_size = 0 - idx = 0 - for file_path in file_paths: - time = load_and_process_copy_json(file_path) - times.append(time["total"]) - idx = idx + 1 - - throughput = [] if engine_label in ["1gib-1e", "1gib-4e"]: - throughput = [[calc_throughput(1024*1024*1024,time) for time in t] for t in times] + data_size = 1024*1024*1024 else: - throughput = [[calc_throughput(1024*1024,time) for time in t] for t in times] + data_size = 1024*1024 - idx = 0 - for run_set in throughput: + try: + time = load_time_mesurements(file_path)["total"] run_idx = 0 - for run in run_set: - data.append({ runid : run_idx, x_label: thread_counts_nice[idx], var_label : engine_nice, y_label : throughput[idx][run_idx]}) + for t in time: + data.append({ runid : run_idx, x_label: thread_count_nice, var_label : engine_nice, y_label : calc_throughput(data_size, t)}) run_idx = run_idx + 1 - idx = idx + 1 + except FileNotFoundError: + return -# Main function to iterate over files and create plots for the new benchmark def main(): - folder_path = "benchmark-results/" # Replace with the actual path to your folder + folder_path = "benchmark-results/" for engine_label in engine_counts: - mt_file_paths = [os.path.join(folder_path, f"mtsubmit-{thread_count}-{engine_label}.json") for thread_count in thread_counts] - create_mtsubmit_dataset(mt_file_paths, engine_label) + for thread_count in thread_counts: + file = os.path.join(folder_path, f"mtsubmit-{thread_count}-{engine_label}.json") + process_file_to_dataset(file, engine_label, thread_count) df = pd.DataFrame(data) df.set_index(index, inplace=True) diff --git a/benchmarks/benchmark-plotters/plot-opt-submitmethod.py b/benchmarks/benchmark-plotters/plot-opt-submitmethod.py deleted file mode 100644 index ae1f5c4..0000000 --- a/benchmarks/benchmark-plotters/plot-opt-submitmethod.py +++ /dev/null @@ -1,104 +0,0 @@ -import os -import json -import pandas as pd -from pandas.core.ops import methods -from typing import List -import seaborn as sns -import matplotlib.pyplot as plt - -runid = "Run ID" -x_label = "Size of Submitted Task" -y_label = "Throughput in GiB/s, LogScale" -var_label = "Submission Type" -sizes = ["1kib", "4kib", "1mib", "32mib"] -sizes_nice = ["1 KiB", "4 KiB", "1 MiB", "32 MiB"] -types = ["bs10", "bs50", "ms10", "ms50", "ssaw"] -types_nice = ["Batch, Size 10", "Batch, Size 50", "Multi-Submit, Count 10", "Multi-Submit, Count 50", "Single Submit"] -title = "Optimal Submission Method - Copy Operation tested Intra-Node on DDR" - -index = [runid, x_label, var_label] -data = [] - -def calc_throughput(size_bytes,time_microseconds): - time_seconds = time_microseconds * 1e-9 - size_gib = size_bytes / (1024 ** 3) - throughput_gibs = size_gib / time_seconds - return throughput_gibs - - -def index_from_element(value,array): - for (idx,val) in enumerate(array): - if val == value: return idx - return 0 - - -def load_and_process_submit_json(file_path): - with open(file_path, 'r') as file: - data = json.load(file) - iterations = data["list"][0]["task"]["iterations"] - - return { - "total": data["list"][0]["report"]["total"] / iterations, - "combined": data["list"][0]["report"]["combined"], - "submission": data["list"][0]["report"]["submission"], - "completion": data["list"][0]["report"]["completion"] - } - - -# Function to plot the graph for the new benchmark -def create_submit_dataset(file_paths, type_label): - times = [] - - type_index = index_from_element(type_label,types) - type_nice = types_nice[type_index] - - idx = 0 - for file_path in file_paths: - time = load_and_process_submit_json(file_path) - times.append(time["total"]) - idx = idx + 1 - - # Adjust time measurements based on type - # which can contain multiple submissions - if type_label in {"bs10", "ms10"}: - times = [[t / 10 for t in time] for time in times] - elif type_label in {"ms50", "bs50"}: - times = [[t / 50 for t in time] for time in times] - - times[0] = [t / 1 for t in times[0]] - times[1] = [t / 4 for t in times[1]] - times[2] = [t / (1024) for t in times[2]] - times[3] = [t / (32*1024) for t in times[3]] - - throughput = [[calc_throughput(1024,time) for time in t] for t in times] - - idx = 0 - for run_set in throughput: - run_idx = 0 - for run in run_set: - data.append({ runid : run_idx, x_label: sizes_nice[idx], var_label : type_nice, y_label : throughput[idx][run_idx]}) - run_idx = run_idx + 1 - idx = idx + 1 - - -# Main function to iterate over files and create plots for the new benchmark -def main(): - folder_path = "benchmark-results/" # Replace with the actual path to your folder - - for type_label in types: - file_paths = [os.path.join(folder_path, f"submit-{type_label}-{size}-1e.json") for size in sizes] - create_submit_dataset(file_paths, type_label) - - df = pd.DataFrame(data) - df.set_index(index, inplace=True) - df = df.sort_values(y_label) - - ax = sns.barplot(x=x_label, y=y_label, hue=var_label, data=df, palette="rocket", errorbar="sd") - ax.set(yscale="log") - sns.move_legend(ax, "lower right") - plt.title(title) - plt.savefig(os.path.join(folder_path, "plot-opt-submitmethod.png"), bbox_inches='tight') - plt.show() - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/benchmarks/benchmark-plotters/plot-perf-enginelocation.py b/benchmarks/benchmark-plotters/plot-perf-enginelocation.py index c4df13c..2111cd7 100644 --- a/benchmarks/benchmark-plotters/plot-perf-enginelocation.py +++ b/benchmarks/benchmark-plotters/plot-perf-enginelocation.py @@ -18,8 +18,8 @@ title = "Performance of Engine Location - Copy Operation on DDR with 1 Engine pe index = [runid, x_label, var_label] data = [] -def calc_throughput(size_bytes,time_microseconds): - time_seconds = time_microseconds * 1e-9 +def calc_throughput(size_bytes,time_ns): + time_seconds = time_ns * 1e-9 size_gib = size_bytes / (1024 ** 3) throughput_gibs = size_gib / time_seconds return throughput_gibs @@ -31,14 +31,16 @@ def index_from_element(value,array): return 0 -def load_and_process_copy_json(file_path,method_label): +def load_time_mesurements(file_path,method_label): with open(file_path, 'r') as file: data = json.load(file) iterations = data["list"][0]["task"]["iterations"] - # Extracting time from JSON structure if method_label == "xcopy": - # For xcopy method, add times from two entries and divide by 3 + # xcopy runs on two engines that both copy 1/2 of the entire + # specified size of 1gib, therefore the maximum time between + # these two is going to be the total time for copy + time0 = data["list"][0]["report"]["time"] time1 = data["list"][1]["report"]["time"] @@ -52,7 +54,6 @@ def load_and_process_copy_json(file_path,method_label): else: return data["list"][0]["report"]["time"] -# Function to plot the graph for the new benchmark def create_copy_dataset(file_path, method_label, type_label): method_index = index_from_element(method_label,copy_methods) method_nice = copy_methods_nice[method_index] @@ -66,7 +67,7 @@ def create_copy_dataset(file_path, method_label, type_label): data_size = 1024*1024*1024 try: - time = load_and_process_copy_json(file_path,method_label)["total"] + time = load_time_mesurements(file_path,method_label)["total"] run_idx = 0 for t in time: data.append({ runid : run_idx, x_label: type_nice, var_label : method_nice, y_label : calc_throughput(data_size, t)}) @@ -74,7 +75,6 @@ def create_copy_dataset(file_path, method_label, type_label): except FileNotFoundError: return -# Main function to iterate over files and create plots for the new benchmark def main(): folder_path = "benchmark-results/" diff --git a/benchmarks/benchmark-plotters/plot-perf-peakthroughput.py b/benchmarks/benchmark-plotters/plot-perf-peakthroughput.py new file mode 100644 index 0000000..fc65159 --- /dev/null +++ b/benchmarks/benchmark-plotters/plot-perf-peakthroughput.py @@ -0,0 +1,80 @@ +import os +import json +import pandas as pd +from pandas.core.ops import methods +from typing import List +import seaborn as sns +import matplotlib.pyplot as plt + +runid = "Run ID" +x_label = "Destination Node" +y_label = "Source Node" +v_label = "Throughput" +title = "Copy Throughput for 1GiB Elements running on SRC Node" + +data = [] + + +def mean_without_outliers(x): + return x.sort_values()[2:-2].mean() + + +def calc_throughput(size_bytes,time_ns): + time_seconds = time_ns * 1e-9 + size_gib = size_bytes / (1024 ** 3) + throughput_gibs = size_gib / time_seconds + return throughput_gibs + + +def index_from_element(value,array): + for (idx,val) in enumerate(array): + if val == value: return idx + return 0 + + +def load_time_mesurements(file_path): + with open(file_path, 'r') as file: + data = json.load(file) + iterations = data["list"][0]["task"]["iterations"] + + return { + "total": data["list"][0]["report"]["total"] / iterations, + "combined": data["list"][0]["report"]["combined"], + "submission": data["list"][0]["report"]["submission"], + "completion": data["list"][0]["report"]["completion"] + } + + +def process_file_to_dataset(file_path, src_node, dst_node): + data_size = 1024*1024*1024 + + try: + time = load_time_mesurements(file_path)["total"] + run_idx = 0 + for t in time: + data.append({ runid : run_idx, x_label : dst_node, y_label : src_node, v_label: calc_throughput(data_size, t)}) + run_idx = run_idx + 1 + except FileNotFoundError: + return + + +def main(): + folder_path = "benchmark-results/" + + for src_node in range(16): + for dst_node in range(16): + file = os.path .join(folder_path, f"copy-n{src_node}ton{dst_node}-1gib-1e.json") + process_file_to_dataset(file, src_node, dst_node) + + df = pd.DataFrame(data) + data_pivot = df.pivot_table(index=y_label, columns=x_label, values=v_label, aggfunc=mean_without_outliers) + + sns.heatmap(data_pivot, annot=True, palette="rocket", fmt=".0f") + + plt.title(title) + plt.savefig(os.path.join(folder_path, "plot-perf-peakthroughput.png"), bbox_inches='tight') + plt.show() + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/benchmarks/benchmark-plotters/plot-perf-submitmethod.py b/benchmarks/benchmark-plotters/plot-perf-submitmethod.py index c2381bd..22a5cc2 100644 --- a/benchmarks/benchmark-plotters/plot-perf-submitmethod.py +++ b/benchmarks/benchmark-plotters/plot-perf-submitmethod.py @@ -8,19 +8,19 @@ import matplotlib.pyplot as plt runid = "Run ID" x_label = "Size of Submitted Task" -y_label = "Throughput in GiB/s" +y_label = "Throughput in GiB/s, LogScale" var_label = "Submission Type" sizes = ["1kib", "4kib", "1mib", "32mib"] sizes_nice = ["1 KiB", "4 KiB", "1 MiB", "32 MiB"] types = ["bs10", "bs50", "ms10", "ms50", "ssaw"] types_nice = ["Batch, Size 10", "Batch, Size 50", "Multi-Submit, Count 10", "Multi-Submit, Count 50", "Single Submit"] -title = "Performance of Submission Methods - Copy Operation tested Intra-Node on DDR" +title = "Optimal Submission Method - Copy Operation tested Intra-Node on DDR" index = [runid, x_label, var_label] data = [] -def calc_throughput(size_bytes,time_microseconds): - time_seconds = time_microseconds * 1e-9 +def calc_throughput(size_bytes,time_ns): + time_seconds = time_ns * 1e-9 size_gib = size_bytes / (1024 ** 3) throughput_gibs = size_gib / time_seconds return throughput_gibs @@ -32,64 +32,68 @@ def index_from_element(value,array): return 0 -def load_and_process_submit_json(file_path): +def load_time_mesurements(file_path,type_label): with open(file_path, 'r') as file: data = json.load(file) - return data["list"][0]["report"]["time"] + iterations = data["list"][0]["task"]["iterations"] + divisor = 1 + # bs and ms types for submission process more than one + # element per run and the results therefore must be + # divided by this number -# Function to plot the graph for the new benchmark -def create_submit_dataset(file_paths, type_label): - times = [] + if type_label in ["bs10", "ms10"]: divisor = 10 + elif type_label in ["ms50", "bs50"]: divisor = 50 + else: divisor = 1 - type_index = index_from_element(type_label,types) - type_nice = types_nice[type_index] - - idx = 0 - for file_path in file_paths: - time = load_and_process_submit_json(file_path) - times.append(time["combined"]) - idx = idx + 1 - - # Adjust time measurements based on type - # which can contain multiple submissions - if type_label in {"bs10", "ms10"}: - times = [[t / 10 for t in time] for time in times] - elif type_label in {"ms50", "bs50"}: - times = [[t / 50 for t in time] for time in times] - - times[0] = [t / 1 for t in times[0]] - times[1] = [t / 4 for t in times[1]] - times[2] = [t / (1024) for t in times[2]] - times[3] = [t / (32*1024) for t in times[3]] + return { + "total": data["list"][0]["report"]["total"] / (iterations * divisor), + "combined": [ x / divisor for x in data["list"][0]["report"]["combined"]], + "submission": [ x / divisor for x in data["list"][0]["report"]["submission"]], + "completion": [ x / divisor for x in data["list"][0]["report"]["completion"]] + } - throughput = [[calc_throughput(1024,time) for time in t] for t in times] - idx = 0 - for run_set in throughput: +def process_file_to_dataset(file_path, type_label,size_label): + type_index = index_from_element(type_label,types) + type_nice = types_nice[type_index] + size_index = index_from_element(size_label, sizes) + size_nice = sizes_nice[size_index] + data_size = 0 + + if size_label == "1kib": data_size = 1024; + elif size_label == "4kib": data_size = 4 * 1024; + elif size_label == "1mib": data_size = 1024 * 1024; + elif size_label == "32mib": data_size = 32 * 1024 * 1024; + elif size_label == "1gib": data_size = 1024 * 1024 * 1024; + else: data_size = 0 + + try: + time = load_time_mesurements(file_path,type_label)["total"] run_idx = 0 - for run in run_set: - data.append({ runid : run_idx, x_label: sizes_nice[idx], var_label : type_nice, y_label : throughput[idx][run_idx]}) + for t in time: + data.append({ runid : run_idx, x_label: type_nice, var_label : size_nice, y_label : calc_throughput(data_size, t)}) run_idx = run_idx + 1 - idx = idx + 1 + except FileNotFoundError: + return + -# Main function to iterate over files and create plots for the new benchmark def main(): - folder_path = "benchmark-results/" # Replace with the actual path to your folder + folder_path = "benchmark-results/" for type_label in types: - file_paths = [os.path.join(folder_path, f"submit-{type_label}-{size}-1e.json") for size in sizes] - create_submit_dataset(file_paths, type_label) + for size in sizes: + file = os.path.join(folder_path, f"submit-{type_label}-{size}-1e.json") + process_file_to_dataset(file, type_label, size) df = pd.DataFrame(data) df.set_index(index, inplace=True) df = df.sort_values(y_label) sns.barplot(x=x_label, y=y_label, hue=var_label, data=df, palette="rocket", errorbar="sd") - plt.title(title) - plt.savefig(os.path.join(folder_path, "plot-perf-submitmethod.png"), bbox_inches='tight') + plt.savefig(os.path.join(folder_path, "plot-opt-submitmethod.png"), bbox_inches='tight') plt.show() if __name__ == "__main__":