diff --git a/benchmarks/benchmark-plotters/__init__.py b/benchmarks/benchmark-plotters/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/benchmarks/benchmark-plotters/__pycache__/common.cpython-39.pyc b/benchmarks/benchmark-plotters/__pycache__/common.cpython-39.pyc new file mode 100644 index 0000000..0b955d5 Binary files /dev/null and b/benchmarks/benchmark-plotters/__pycache__/common.cpython-39.pyc differ diff --git a/benchmarks/benchmark-plotters/common.py b/benchmarks/benchmark-plotters/common.py new file mode 100644 index 0000000..4f44dd8 --- /dev/null +++ b/benchmarks/benchmark-plotters/common.py @@ -0,0 +1,15 @@ +# calculates throughput in gib/s from the meassured +# transfer duration (in nanoseconds) for a given element +# with the size of this given in bytes +def calc_throughput(size_bytes,time_ns): + time_seconds = time_ns * 1e-9 + size_gib = size_bytes / (1024 ** 3) + throughput_gibs = size_gib / time_seconds + return throughput_gibs + + +# reverse array search: return index of value in array +def index_from_element(value,array): + for (idx,val) in enumerate(array): + if val == value: return idx + return 0 \ No newline at end of file diff --git a/benchmarks/benchmark-plotters/plot-cost-mtsubmit.py b/benchmarks/benchmark-plotters/plot-cost-mtsubmit.py index 7de358f..d9c9e50 100644 --- a/benchmarks/benchmark-plotters/plot-cost-mtsubmit.py +++ b/benchmarks/benchmark-plotters/plot-cost-mtsubmit.py @@ -5,32 +5,35 @@ from itertools import chain import seaborn as sns import matplotlib.pyplot as plt +from common import calc_throughput, index_from_element + runid = "Run ID" x_label = "Thread Count" y_label = "Throughput in GiB/s" var_label = "Thread Counts" thread_counts = ["1t", "2t", "4t", "8t", "12t"] thread_counts_nice = ["1 Thread", "2 Threads", "4 Threads", "8 Threads", "12 Threads"] -engine_counts = ["1mib-1e_PREVENT_FROM_DISPLAYING", "1mib-4e_PREVENT_FROM_DISPLAYING", "1gib-1e", "1gib-4e"] -engine_counts_nice = ["1 E/WQ and Tasksize 1 MiB", "4 E/WQ and Tasksize 1 MiB", "1 E/WQ and Tasksize 1 GiB", "4 E/WQ and Tasksize 1 GiB"] -title = "Total Throughput - 120 Copy Operations split on Threads Intra-Node on DDR" - -index = [runid, x_label, var_label] -data = [] +engine_counts = ["1mib-1e", "1mib-4e", "1gib-1e", "1gib-4e"] +engine_counts_nice = ["1 E/WQ and 1 MiB", "4 E/WQ and 1 MiB", "1 E/WQ and 1 GiB", "4 E/WQ and 1 GiB"] -def calc_throughput(size_bytes,time_ns): - time_seconds = time_ns * 1e-9 - size_gib = size_bytes / (1024 ** 3) - throughput_gibs = size_gib / time_seconds - return throughput_gibs +title = \ + """Total Throughput showing cost of MT Submit\n + Copying 120x split on n Threads Intra-Node on DDR\n + """ +description = \ + """Total Throughput showing cost of MT Submit\n + Running 120 Copy Operations split on n Threads\n + Copying Intra-Node on DDR performed for multiple Configurations\n + """ -def index_from_element(value,array): - for (idx,val) in enumerate(array): - if val == value: return idx - return 0 +index = [runid, x_label, var_label] +data = [] +# loads the measurements from a given file and processes them +# so that they are normalized, meaning that the timings returned +# are nanoseconds per element transfered def load_time_mesurements(file_path): with open(file_path, 'r') as file: data = json.load(file) @@ -48,6 +51,11 @@ def load_time_mesurements(file_path): "completion" : [x / 120 for x in list(chain(*[data["list"][i]["report"]["time"]["completion"] for i in range(count)]))] } + +# procceses a single file and appends the desired timings +# to the global data-array, handles multiple runs with a runid +# and ignores if the given file is not found as some +# configurations may not be benchmarked def process_file_to_dataset(file_path, engine_label, thread_count): engine_index = index_from_element(engine_label,engine_counts) engine_nice = engine_counts_nice[engine_index] @@ -69,6 +77,9 @@ def process_file_to_dataset(file_path, engine_label, thread_count): return +# loops over all possible configuration combinations and calls +# process_file_to_dataset for them in order to build a dataframe +# which is then displayed and saved def main(): folder_path = "benchmark-results/" diff --git a/benchmarks/benchmark-plotters/plot-perf-enginelocation.py b/benchmarks/benchmark-plotters/plot-perf-enginelocation.py index c878c95..3fc26dd 100644 --- a/benchmarks/benchmark-plotters/plot-perf-enginelocation.py +++ b/benchmarks/benchmark-plotters/plot-perf-enginelocation.py @@ -1,10 +1,11 @@ import os import json import pandas as pd -from pandas.core.ops import methods import seaborn as sns import matplotlib.pyplot as plt +from common import calc_throughput, index_from_element + runid = "Run ID" x_label = "Copy Type" y_label = "Throughput in GiB/s" @@ -13,24 +14,23 @@ types = ["intersock-n0ton4-1mib", "internode-n0ton1-1mib", "intersock-n0ton4-1gi types_nice = ["Inter-Socket 1MiB", "Inter-Node 1MiB", "Inter-Socket 1GiB", "Inter-Node 1GiB"] copy_methods = ["dstcopy", "srccopy", "xcopy", "srcoutsidercopy", "dstoutsidercopy", "sockoutsidercopy", "nodeoutsidercopy"] copy_methods_nice = [ "Engine on DST-Node", "Engine on SRC-Node", "Cross-Copy / Both Engines", "Engine on SRC-Socket, not SRC-Node", "Engine on DST-Socket, not DST-Node", "Engine on different Socket", "Engine on same Socket"] -title = "Performance of Engine Location - Copy Operation on DDR with 1 Engine per WQ" - -index = [runid, x_label, var_label] -data = [] -def calc_throughput(size_bytes,time_ns): - time_seconds = time_ns * 1e-9 - size_gib = size_bytes / (1024 ** 3) - throughput_gibs = size_gib / time_seconds - return throughput_gibs +title = \ + """Throughput showing impact of Engine Location\n + Copy Operation on DDR with 1 Engine per WQ""" +description = \ + """Throughput showing impact of Engine Location\n + Some Configurations missing as they are not feesible\n + Copy Operation on DDR with 1 Engine per WQ""" -def index_from_element(value,array): - for (idx,val) in enumerate(array): - if val == value: return idx - return 0 +index = [runid, x_label, var_label] +data = [] +# loads the measurements from a given file and processes them +# so that they are normalized, meaning that the timings returned +# are nanoseconds per element transfered def load_time_mesurements(file_path,method_label): with open(file_path, 'r') as file: data = json.load(file) @@ -59,6 +59,11 @@ def load_time_mesurements(file_path,method_label): "completion": data["list"][0]["report"]["time"]["completion"] } + +# procceses a single file and appends the desired timings +# to the global data-array, handles multiple runs with a runid +# and ignores if the given file is not found as some +# configurations may not be benchmarked def create_copy_dataset(file_path, method_label, type_label): method_index = index_from_element(method_label,copy_methods) method_nice = copy_methods_nice[method_index] @@ -80,6 +85,10 @@ def create_copy_dataset(file_path, method_label, type_label): except FileNotFoundError: return + +# loops over all possible configuration combinations and calls +# process_file_to_dataset for them in order to build a dataframe +# which is then displayed and saved def main(): folder_path = "benchmark-results/" diff --git a/benchmarks/benchmark-plotters/plot-perf-peakthroughput.py b/benchmarks/benchmark-plotters/plot-perf-peakthroughput.py index 29957fd..302470b 100644 --- a/benchmarks/benchmark-plotters/plot-perf-peakthroughput.py +++ b/benchmarks/benchmark-plotters/plot-perf-peakthroughput.py @@ -2,34 +2,40 @@ import os import json import pandas as pd from itertools import chain -from pandas.core.ops import methods -from typing import List import seaborn as sns import matplotlib.pyplot as plt +from common import calc_throughput + runid = "Run ID" x_label = "Destination Node" y_label = "Source Node" v_label = "Throughput" -title = "Copy Throughput in GiB/s tested for 1GiB Elements using all 8 DSA Chiplets" + +title_allnodes = \ + """Copy Throughput in GiB/s tested for 1GiB Elements\n + Using all 8 DSA Chiplets available on the System""" +title_smartnodes = \ + """Copy Throughput in GiB/s tested for 1GiB Elements\n + Using Cross-Copy for Intersocket and all 4 Chiplets of Socket for Intrasocket""" + +description_smartnodes = \ + """Copy Throughput in GiB/s tested for 1GiB Elements\n + Nodes of {8...15} are HBM accessors for their counterparts (minus 8)\n + Using all 4 DSA Chiplets of a Socket for Intra-Socket Operation\n + And using only the Source and Destination Nodes DSA for Inter-Socket""" +description_allnodes = \ + """Copy Throughput in GiB/s tested for 1GiB Elements\n + Nodes of {8...15} are HBM accessors for their counterparts (minus 8)\n + Using all 8 DSA Chiplets available on the System""" index = [ runid, x_label, y_label] data = [] -def calc_throughput(size_bytes,time_ns): - time_seconds = time_ns * 1e-9 - size_gib = size_bytes / (1024 ** 3) - throughput_gibs = size_gib / time_seconds - return throughput_gibs - - -def index_from_element(value,array): - for (idx,val) in enumerate(array): - if val == value: return idx - return 0 - - +# loads the measurements from a given file and processes them +# so that they are normalized, meaning that the timings returned +# are nanoseconds per element transfered def load_time_mesurements(file_path): with open(file_path, 'r') as file: data = json.load(file) @@ -45,6 +51,10 @@ def load_time_mesurements(file_path): } +# procceses a single file and appends the desired timings +# to the global data-array, handles multiple runs with a runid +# and ignores if the given file is not found as some +# configurations may not be benchmarked def process_file_to_dataset(file_path, src_node, dst_node): data_size = 1024*1024*1024 @@ -58,24 +68,31 @@ def process_file_to_dataset(file_path, src_node, dst_node): return -def main(): +# loops over all possible configuration combinations and calls +# process_file_to_dataset for them in order to build a dataframe +# which is then displayed and saved +def main(node_config,title): folder_path = "benchmark-results/" for src_node in range(16): for dst_node in range(16): - file = os.path.join(folder_path, f"copy-n{src_node}ton{dst_node}-1gib-allnodes-1e.json") + file = os.path.join(folder_path, f"copy-n{src_node}ton{dst_node}-1gib-{node_config}-1e.json") process_file_to_dataset(file, src_node, dst_node) df = pd.DataFrame(data) + + data.clear() df.set_index(index, inplace=True) data_pivot = df.pivot_table(index=y_label, columns=x_label, values=v_label) + plt.figure(figsize=(8, 6)) - sns.heatmap(data_pivot, annot=True, cmap="YlGn", fmt=".0f") + sns.heatmap(data_pivot, annot=True, cmap="rocket_r", fmt=".0f") plt.title(title) - plt.savefig(os.path.join(folder_path, "plot-perf-allnodethroughput.png"), bbox_inches='tight') + plt.savefig(os.path.join(folder_path, f"plot-perf-{node_config}-throughput.png"), bbox_inches='tight') plt.show() if __name__ == "__main__": - main() \ No newline at end of file + main("allnodes", title_allnodes) + main("smart", title_smartnodes) \ No newline at end of file diff --git a/benchmarks/benchmark-plotters/plot-perf-submitmethod.py b/benchmarks/benchmark-plotters/plot-perf-submitmethod.py index 4777ad8..672eb34 100644 --- a/benchmarks/benchmark-plotters/plot-perf-submitmethod.py +++ b/benchmarks/benchmark-plotters/plot-perf-submitmethod.py @@ -1,11 +1,11 @@ import os import json import pandas as pd -from pandas.core.ops import methods -from typing import List import seaborn as sns import matplotlib.pyplot as plt +from common import calc_throughput, index_from_element + runid = "Run ID" x_label = "Size of Submitted Task" y_label = "Throughput in GiB/s" @@ -13,25 +13,26 @@ var_label = "Submission Type" sizes = ["1kib", "4kib", "1mib", "32mib"] sizes_nice = ["1 KiB", "4 KiB", "1 MiB", "32 MiB"] types = ["bs10", "bs50", "ms10", "ms50", "ssaw"] -types_nice = ["Batch, Size 10", "Batch, Size 50", "Multi-Submit, Count 10", "Multi-Submit, Count 50", "Single Submit"] -title = "Optimal Submission Method - Copy Operation tested Intra-Node on DDR" - -index = [runid, x_label, var_label] -data = [] +types_nice = ["Batch 10", "Batch 50", "Multi-Submit 10", "Multi-Submit 50", "Single-Submit"] -def calc_throughput(size_bytes,time_ns): - time_seconds = time_ns * 1e-9 - size_gib = size_bytes / (1024 ** 3) - throughput_gibs = size_gib / time_seconds - return throughput_gibs +title = \ + """Throughput showing Optimal Submission Method and Size\n + Copy Operation tested Intra-Node on DDR with 1 Engine per WQ""" +description = \ + """Throughput showing Optimal Submission Method and Size\n + Batch uses a Batch Descriptor of given Size\n + Multi-Submit fills the Work Queue with n Single Descriptors\n + Single-Submit submits one Descriptor and immediately waits\n + Copy Operation tested Intra-Node on DDR with 1 Engine per WQ""" -def index_from_element(value,array): - for (idx,val) in enumerate(array): - if val == value: return idx - return 0 +index = [runid, x_label, var_label] +data = [] +# loads the measurements from a given file and processes them +# so that they are normalized, meaning that the timings returned +# are nanoseconds per element transfered def load_time_mesurements(file_path,type_label): with open(file_path, 'r') as file: data = json.load(file) @@ -54,6 +55,10 @@ def load_time_mesurements(file_path,type_label): } +# procceses a single file and appends the desired timings +# to the global data-array, handles multiple runs with a runid +# and ignores if the given file is not found as some +# configurations may not be benchmarked def process_file_to_dataset(file_path, type_label,size_label): type_index = index_from_element(type_label,types) type_nice = types_nice[type_index] @@ -78,7 +83,9 @@ def process_file_to_dataset(file_path, type_label,size_label): return - +# loops over all possible configuration combinations and calls +# process_file_to_dataset for them in order to build a dataframe +# which is then displayed and saved def main(): folder_path = "benchmark-results/" diff --git a/benchmarks/benchmark-results/plot-opt-submitmethod.png b/benchmarks/benchmark-results/plot-opt-submitmethod.png index 001ae92..88d20dc 100644 Binary files a/benchmarks/benchmark-results/plot-opt-submitmethod.png and b/benchmarks/benchmark-results/plot-opt-submitmethod.png differ diff --git a/benchmarks/benchmark-results/plot-perf-allnodes-throughput.png b/benchmarks/benchmark-results/plot-perf-allnodes-throughput.png new file mode 100644 index 0000000..dc850f1 Binary files /dev/null and b/benchmarks/benchmark-results/plot-perf-allnodes-throughput.png differ diff --git a/benchmarks/benchmark-results/plot-perf-allnodethroughput.png b/benchmarks/benchmark-results/plot-perf-allnodethroughput.png deleted file mode 100644 index b00693c..0000000 Binary files a/benchmarks/benchmark-results/plot-perf-allnodethroughput.png and /dev/null differ diff --git a/benchmarks/benchmark-results/plot-perf-enginelocation.png b/benchmarks/benchmark-results/plot-perf-enginelocation.png index 0bd8d67..568a3bf 100644 Binary files a/benchmarks/benchmark-results/plot-perf-enginelocation.png and b/benchmarks/benchmark-results/plot-perf-enginelocation.png differ diff --git a/benchmarks/benchmark-results/plot-perf-mtsubmit.png b/benchmarks/benchmark-results/plot-perf-mtsubmit.png index 7658a9b..65f8069 100644 Binary files a/benchmarks/benchmark-results/plot-perf-mtsubmit.png and b/benchmarks/benchmark-results/plot-perf-mtsubmit.png differ diff --git a/benchmarks/benchmark-results/plot-perf-smart-throughput.png b/benchmarks/benchmark-results/plot-perf-smart-throughput.png new file mode 100644 index 0000000..409aa35 Binary files /dev/null and b/benchmarks/benchmark-results/plot-perf-smart-throughput.png differ diff --git a/benchmarks/benchmark-results/plot-perf-smartthroughput.png b/benchmarks/benchmark-results/plot-perf-smartthroughput.png deleted file mode 100644 index 43c893c..0000000 Binary files a/benchmarks/benchmark-results/plot-perf-smartthroughput.png and /dev/null differ