refactor the benchmark plotters and submit newly plotted graphs

1 year ago · 60a5ba5120
14 changed files with 126 additions and 67 deletions
--- a/benchmarks/benchmark-plotters/init.py
+++ b/benchmarks/benchmark-plotters/init.py
--- a/benchmarks/benchmark-plotters/pycache/common.cpython-39.pyc
+++ b/benchmarks/benchmark-plotters/pycache/common.cpython-39.pyc
--- a/benchmarks/benchmark-plotters/common.py
+++ b/benchmarks/benchmark-plotters/common.py
@ -0,0 +1,15 @@
+# calculates throughput in gib/s from the meassured
+# transfer duration (in nanoseconds) for a given element
+# with the size of this given in bytes
+def calc_throughput(size_bytes,time_ns):
+    time_seconds = time_ns * 1e-9
+    size_gib = size_bytes / (1024 ** 3)
+    throughput_gibs = size_gib / time_seconds
+    return throughput_gibs
+
+
+# reverse array search: return index of value in array
+def index_from_element(value,array):
+    for (idx,val) in enumerate(array):
+        if val == value: return idx
+    return 0
--- a/benchmarks/benchmark-plotters/plot-cost-mtsubmit.py
+++ b/benchmarks/benchmark-plotters/plot-cost-mtsubmit.py
@ -5,32 +5,35 @@ from itertools import chain
 import seaborn as sns
 import matplotlib.pyplot as plt

+from common import calc_throughput, index_from_element
+
 runid = "Run ID"
 x_label = "Thread Count"
 y_label = "Throughput in GiB/s"
 var_label = "Thread Counts"
 thread_counts = ["1t", "2t", "4t", "8t", "12t"]
 thread_counts_nice = ["1 Thread", "2 Threads", "4 Threads", "8 Threads", "12 Threads"]
-engine_counts = ["1mib-1e_PREVENT_FROM_DISPLAYING", "1mib-4e_PREVENT_FROM_DISPLAYING", "1gib-1e", "1gib-4e"]
-engine_counts_nice = ["1 E/WQ and Tasksize 1 MiB", "4 E/WQ and Tasksize 1 MiB", "1 E/WQ and Tasksize 1 GiB", "4 E/WQ and Tasksize 1 GiB"]
-title = "Total Throughput - 120 Copy Operations split on Threads Intra-Node on DDR"
-
-index = [runid, x_label, var_label]
-data = []
+engine_counts = ["1mib-1e", "1mib-4e", "1gib-1e", "1gib-4e"]
+engine_counts_nice = ["1 E/WQ and 1 MiB", "4 E/WQ and 1 MiB", "1 E/WQ and 1 GiB", "4 E/WQ and 1 GiB"]

-def calc_throughput(size_bytes,time_ns):
-    time_seconds = time_ns * 1e-9
-    size_gib = size_bytes / (1024 ** 3)
-    throughput_gibs = size_gib / time_seconds
-    return throughput_gibs
+title = \
+    """Total Throughput showing cost of MT Submit\n
+    Copying 120x split on n Threads Intra-Node on DDR\n
+    """

+description =  \
+    """Total Throughput showing cost of MT Submit\n
+    Running 120 Copy Operations split on n Threads\n
+    Copying Intra-Node on DDR performed for multiple Configurations\n
+    """

-def index_from_element(value,array):
-    for (idx,val) in enumerate(array):
-        if val == value: return idx
-    return 0
+index = [runid, x_label, var_label]
+data = []


+# loads the measurements from a given file and processes them
+# so that they are normalized, meaning that the timings returned
+# are nanoseconds per element transfered
 def load_time_mesurements(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)
@ -48,6 +51,11 @@ def load_time_mesurements(file_path):
            "completion" : [x / 120 for x in list(chain(*[data["list"][i]["report"]["time"]["completion"] for i in range(count)]))]
        }

+
+# procceses a single file and appends the desired timings
+# to the global data-array, handles multiple runs with a runid
+# and ignores if the given file is not found as some
+# configurations may not be benchmarked
 def process_file_to_dataset(file_path, engine_label, thread_count):
    engine_index = index_from_element(engine_label,engine_counts)
    engine_nice = engine_counts_nice[engine_index]
@ -69,6 +77,9 @@ def process_file_to_dataset(file_path, engine_label, thread_count):
        return


+# loops over all possible configuration combinations and calls
+# process_file_to_dataset for them in order to build a dataframe
+# which is then displayed and saved
 def main():
    folder_path = "benchmark-results/"

--- a/benchmarks/benchmark-plotters/plot-perf-enginelocation.py
+++ b/benchmarks/benchmark-plotters/plot-perf-enginelocation.py
@ -1,10 +1,11 @@
 import os
 import json
 import pandas as pd
-from pandas.core.ops import methods
 import seaborn as sns
 import matplotlib.pyplot as plt

+from common import calc_throughput, index_from_element
+
 runid = "Run ID"
 x_label = "Copy Type"
 y_label = "Throughput in GiB/s"
@ -13,24 +14,23 @@ types = ["intersock-n0ton4-1mib", "internode-n0ton1-1mib", "intersock-n0ton4-1gi
 types_nice = ["Inter-Socket 1MiB", "Inter-Node 1MiB", "Inter-Socket 1GiB", "Inter-Node 1GiB"]
 copy_methods = ["dstcopy", "srccopy", "xcopy", "srcoutsidercopy", "dstoutsidercopy", "sockoutsidercopy", "nodeoutsidercopy"]
 copy_methods_nice = [ "Engine on DST-Node", "Engine on SRC-Node", "Cross-Copy / Both Engines", "Engine on SRC-Socket, not SRC-Node", "Engine on DST-Socket, not DST-Node", "Engine on different Socket", "Engine on same Socket"]
-title = "Performance of Engine Location - Copy Operation on DDR with 1 Engine per WQ"
-
-index = [runid, x_label, var_label]
-data = []

-def calc_throughput(size_bytes,time_ns):
-    time_seconds = time_ns * 1e-9
-    size_gib = size_bytes / (1024 ** 3)
-    throughput_gibs = size_gib / time_seconds
-    return throughput_gibs
+title = \
+    """Throughput showing impact of Engine Location\n
+    Copy Operation on DDR with 1 Engine per WQ"""

+description = \
+    """Throughput showing impact of Engine Location\n
+    Some Configurations missing as they are not feesible\n
+    Copy Operation on DDR with 1 Engine per WQ"""

-def index_from_element(value,array):
-    for (idx,val) in enumerate(array):
-        if val == value: return idx
-    return 0
+index = [runid, x_label, var_label]
+data = []


+# loads the measurements from a given file and processes them
+# so that they are normalized, meaning that the timings returned
+# are nanoseconds per element transfered
 def load_time_mesurements(file_path,method_label):
    with open(file_path, 'r') as file:
        data = json.load(file)
@ -59,6 +59,11 @@ def load_time_mesurements(file_path,method_label):
                "completion": data["list"][0]["report"]["time"]["completion"]
            }

+
+# procceses a single file and appends the desired timings
+# to the global data-array, handles multiple runs with a runid
+# and ignores if the given file is not found as some
+# configurations may not be benchmarked
 def create_copy_dataset(file_path, method_label, type_label):
    method_index = index_from_element(method_label,copy_methods)
    method_nice = copy_methods_nice[method_index]
@ -80,6 +85,10 @@ def create_copy_dataset(file_path, method_label, type_label):
    except FileNotFoundError:
        return

+
+# loops over all possible configuration combinations and calls
+# process_file_to_dataset for them in order to build a dataframe
+# which is then displayed and saved
 def main():
    folder_path = "benchmark-results/"

--- a/benchmarks/benchmark-plotters/plot-perf-peakthroughput.py
+++ b/benchmarks/benchmark-plotters/plot-perf-peakthroughput.py
@ -2,34 +2,40 @@ import os
 import json
 import pandas as pd
 from itertools import chain
-from pandas.core.ops import methods
-from typing import List
 import seaborn as sns
 import matplotlib.pyplot as plt

+from common import calc_throughput
+
 runid = "Run ID"
 x_label = "Destination Node"
 y_label = "Source Node"
 v_label = "Throughput"
-title = "Copy Throughput in GiB/s tested for 1GiB Elements using all 8 DSA Chiplets"
+
+title_allnodes = \
+    """Copy Throughput in GiB/s tested for 1GiB Elements\n
+    Using all 8 DSA Chiplets available on the System"""
+title_smartnodes = \
+    """Copy Throughput in GiB/s tested for 1GiB Elements\n
+    Using Cross-Copy for Intersocket and all 4 Chiplets of Socket for Intrasocket"""
+
+description_smartnodes = \
+    """Copy Throughput in GiB/s tested for 1GiB Elements\n
+    Nodes of {8...15} are HBM accessors for their counterparts (minus 8)\n
+    Using all 4 DSA Chiplets of a Socket for Intra-Socket Operation\n
+    And using only the Source and Destination Nodes DSA for Inter-Socket"""
+description_allnodes = \
+    """Copy Throughput in GiB/s tested for 1GiB Elements\n
+    Nodes of {8...15} are HBM accessors for their counterparts (minus 8)\n
+    Using all 8 DSA Chiplets available on the System"""

 index = [ runid, x_label, y_label]
 data = []


-def calc_throughput(size_bytes,time_ns):
-    time_seconds = time_ns * 1e-9
-    size_gib = size_bytes / (1024 ** 3)
-    throughput_gibs = size_gib / time_seconds
-    return throughput_gibs
-
-
-def index_from_element(value,array):
-    for (idx,val) in enumerate(array):
-        if val == value: return idx
-    return 0
-
-
+# loads the measurements from a given file and processes them
+# so that they are normalized, meaning that the timings returned
+# are nanoseconds per element transfered
 def load_time_mesurements(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)
@ -45,6 +51,10 @@ def load_time_mesurements(file_path):
        }


+# procceses a single file and appends the desired timings
+# to the global data-array, handles multiple runs with a runid
+# and ignores if the given file is not found as some
+# configurations may not be benchmarked
 def process_file_to_dataset(file_path, src_node, dst_node):
    data_size = 1024*1024*1024

@ -58,24 +68,31 @@ def process_file_to_dataset(file_path, src_node, dst_node):
        return


-def main():
+# loops over all possible configuration combinations and calls
+# process_file_to_dataset for them in order to build a dataframe
+# which is then displayed and saved
+def main(node_config,title):
    folder_path = "benchmark-results/"

    for src_node in range(16):
        for dst_node in range(16):
-            file = os.path.join(folder_path, f"copy-n{src_node}ton{dst_node}-1gib-allnodes-1e.json")
+            file = os.path.join(folder_path, f"copy-n{src_node}ton{dst_node}-1gib-{node_config}-1e.json")
            process_file_to_dataset(file, src_node, dst_node)

    df = pd.DataFrame(data)
+
+    data.clear()
    df.set_index(index, inplace=True)
    data_pivot = df.pivot_table(index=y_label, columns=x_label, values=v_label)
+    plt.figure(figsize=(8, 6))

-    sns.heatmap(data_pivot, annot=True, cmap="YlGn", fmt=".0f")
+    sns.heatmap(data_pivot, annot=True, cmap="rocket_r", fmt=".0f")

    plt.title(title)
-    plt.savefig(os.path.join(folder_path, "plot-perf-allnodethroughput.png"), bbox_inches='tight')
+    plt.savefig(os.path.join(folder_path, f"plot-perf-{node_config}-throughput.png"), bbox_inches='tight')
    plt.show()


 if __name__ == "__main__":
-    main()
+    main("allnodes", title_allnodes)
+    main("smart", title_smartnodes)
--- a/benchmarks/benchmark-plotters/plot-perf-submitmethod.py
+++ b/benchmarks/benchmark-plotters/plot-perf-submitmethod.py
@ -1,11 +1,11 @@
 import os
 import json
 import pandas as pd
-from pandas.core.ops import methods
-from typing import List
 import seaborn as sns
 import matplotlib.pyplot as plt

+from common import calc_throughput, index_from_element
+
 runid = "Run ID"
 x_label = "Size of Submitted Task"
 y_label = "Throughput in GiB/s"
@ -13,25 +13,26 @@ var_label = "Submission Type"
 sizes = ["1kib", "4kib", "1mib", "32mib"]
 sizes_nice = ["1 KiB", "4 KiB", "1 MiB", "32 MiB"]
 types = ["bs10", "bs50", "ms10", "ms50", "ssaw"]
-types_nice = ["Batch, Size 10", "Batch, Size 50", "Multi-Submit, Count 10", "Multi-Submit, Count 50", "Single Submit"]
-title = "Optimal Submission Method - Copy Operation tested Intra-Node on DDR"
-
-index = [runid, x_label, var_label]
-data = []
+types_nice = ["Batch 10", "Batch 50", "Multi-Submit 10", "Multi-Submit 50", "Single-Submit"]

-def calc_throughput(size_bytes,time_ns):
-    time_seconds = time_ns * 1e-9
-    size_gib = size_bytes / (1024 ** 3)
-    throughput_gibs = size_gib / time_seconds
-    return throughput_gibs
+title = \
+    """Throughput showing Optimal Submission Method and Size\n
+    Copy Operation tested Intra-Node on DDR with 1 Engine per WQ"""

+description = \
+    """Throughput showing Optimal Submission Method and Size\n
+    Batch uses a Batch Descriptor of given Size\n
+    Multi-Submit fills the Work Queue with n Single Descriptors\n
+    Single-Submit submits one Descriptor and immediately waits\n
+    Copy Operation tested Intra-Node on DDR with 1 Engine per WQ"""

-def index_from_element(value,array):
-    for (idx,val) in enumerate(array):
-        if val == value: return idx
-    return 0
+index = [runid, x_label, var_label]
+data = []


+# loads the measurements from a given file and processes them
+# so that they are normalized, meaning that the timings returned
+# are nanoseconds per element transfered
 def load_time_mesurements(file_path,type_label):
    with open(file_path, 'r') as file:
        data = json.load(file)
@ -54,6 +55,10 @@ def load_time_mesurements(file_path,type_label):
        }


+# procceses a single file and appends the desired timings
+# to the global data-array, handles multiple runs with a runid
+# and ignores if the given file is not found as some
+# configurations may not be benchmarked
 def process_file_to_dataset(file_path, type_label,size_label):
    type_index = index_from_element(type_label,types)
    type_nice = types_nice[type_index]
@ -78,7 +83,9 @@ def process_file_to_dataset(file_path, type_label,size_label):
        return


-
+# loops over all possible configuration combinations and calls
+# process_file_to_dataset for them in order to build a dataframe
+# which is then displayed and saved
 def main():
    folder_path = "benchmark-results/"

--- a/benchmarks/benchmark-results/plot-opt-submitmethod.png
+++ b/benchmarks/benchmark-results/plot-opt-submitmethod.png
--- a/benchmarks/benchmark-results/plot-perf-allnodes-throughput.png
+++ b/benchmarks/benchmark-results/plot-perf-allnodes-throughput.png
--- a/benchmarks/benchmark-results/plot-perf-allnodethroughput.png
+++ b/benchmarks/benchmark-results/plot-perf-allnodethroughput.png
--- a/benchmarks/benchmark-results/plot-perf-enginelocation.png
+++ b/benchmarks/benchmark-results/plot-perf-enginelocation.png
--- a/benchmarks/benchmark-results/plot-perf-mtsubmit.png
+++ b/benchmarks/benchmark-results/plot-perf-mtsubmit.png
--- a/benchmarks/benchmark-results/plot-perf-smart-throughput.png
+++ b/benchmarks/benchmark-results/plot-perf-smart-throughput.png
--- a/benchmarks/benchmark-results/plot-perf-smartthroughput.png
+++ b/benchmarks/benchmark-results/plot-perf-smartthroughput.png