update benchmark plotters for changes made to benchmark and result format

11 months ago · 326cf92af3
5 changed files with 56 additions and 65 deletions
--- a/benchmarks/benchmark-plotters/pycache/common.cpython-39.pyc
+++ b/benchmarks/benchmark-plotters/pycache/common.cpython-39.pyc
--- a/benchmarks/benchmark-plotters/common.py
+++ b/benchmarks/benchmark-plotters/common.py
@ -1,3 +1,10 @@
+import os
+import json
+
+from typing import List
+from numpy import float64
+
+
 # calculates throughput in gib/s from the meassured
 # transfer duration (in nanoseconds) for a given element
 # with the size of this given in bytes
@ -12,4 +19,24 @@ def calc_throughput(size_bytes,time_ns):
 def index_from_element(value,array):
    for (idx,val) in enumerate(array):
        if val == value: return idx
-    return 0
+    return 0
+
+
+# loads the measurements from a given file
+def load_time_mesurements(file_path) -> List[float64]:
+    with open(file_path, 'r') as file:
+        data = json.load(file)
+        count = data["count"]
+        runcount_divisor = data["list"][0]["task"]["reps"]
+
+        # if theres more than one thread, the internal repetition
+        # count should be the same. if you decide it shouldnt
+        # remove the check below
+
+        if count > 1:
+            for i in range(count):
+                if runcount_divisor != data["list"][i]["task"]["reps"]:
+                    print("Runcount missmatch between tasks. Check the commend above, aborting for now.")
+                    os.abort()
+
+        return [ x / runcount_divisor for x in data["timings"]]  
--- a/benchmarks/benchmark-plotters/plot-perf-peakthroughput-bar.py
+++ b/benchmarks/benchmark-plotters/plot-perf-peakthroughput-bar.py
@ -5,7 +5,7 @@ from itertools import chain
 import seaborn as sns
 import matplotlib.pyplot as plt

-from common import calc_throughput
+from common import calc_throughput, load_time_mesurements

 result_path = "benchmark-results/"
 output_path = "benchmark-plots/"
@ -40,20 +40,8 @@ data = []
 # loads the measurements from a given file and processes them
 # so that they are normalized, meaning that the timings returned
 # are nanoseconds per element transfered
-def load_time_mesurements(file_path):
-    with open(file_path, 'r') as file:
-        data = json.load(file)
-        count = data["count"]
-        batch_size = data["list"][0]["task"]["batching"]["batch_size"] if data["list"][0]["task"]["batching"]["batch_size"] > 0 else 1
-        iterations = data["list"][0]["task"]["iterations"]
-
-        return {
-            "size": data["list"][0]["task"]["size"],
-            "total": sum([x / (iterations * batch_size * count * count) for x in list(chain([data["list"][i]["report"]["time"]["total"] for i in range(count)]))]),
-            "combined": [ x / (count * batch_size) for x in list(chain(*[data["list"][i]["report"]["time"]["combined"] for i in range(count)]))],
-            "submission": [ x / (count * batch_size) for x in list(chain(*[data["list"][i]["report"]["time"]["submission"] for i in range(count)]))],
-            "completion": [ x / (count * batch_size) for x in list(chain(*[data["list"][i]["report"]["time"]["completion"] for i in range(count)]))]
-        }
+def get_timing(file_path):
+    return load_time_mesurements(file_path)


 # procceses a single file and appends the desired timings
@ -61,12 +49,13 @@ def load_time_mesurements(file_path):
 # and ignores if the given file is not found as some
 # configurations may not be benchmarked
 def process_file_to_dataset(file_path, src_node, dst_node):
+    size = 1024*1024*1024
    try:
-        file_data = load_time_mesurements(file_path)
-        time = file_data["combined"]
+        timing = get_timing(file_path)
        run_idx = 0
-        for t in time:
-            data.append({ runid : run_idx, x_label : dst_node, y_label : calc_throughput(file_data["size"], t)})
+        for t in timing:
+            tp = calc_throughput(size, t)
+            data.append({ runid : run_idx, x_label : dst_node, y_label : tp})
            run_idx = run_idx + 1
    except FileNotFoundError:
        return
@ -89,8 +78,7 @@ def plot_bar(table,title,node_config):
 def main(node_config,title):
    src_node = 0
    for dst_node in {8,11,12,15}:
-        size = "512mib" if node_config == "allnodes" and src_node == dst_node and src_node >= 8 else "1gib" 
-        file = os.path.join(result_path, f"copy-n{src_node}ton{dst_node}-{size}-{node_config}-1e.json")
+        file = os.path.join(result_path, f"copy-n{src_node}ton{dst_node}-1gib-{node_config}-1e.json")
        process_file_to_dataset(file, src_node, dst_node)

    df = pd.DataFrame(data)
--- a/benchmarks/benchmark-plotters/plot-perf-peakthroughput-cpu-bar.py
+++ b/benchmarks/benchmark-plotters/plot-perf-peakthroughput-cpu-bar.py
@ -5,7 +5,7 @@ from itertools import chain
 import seaborn as sns
 import matplotlib.pyplot as plt

-from common import calc_throughput
+from common import calc_throughput, load_time_mesurements

 result_path = "benchmark-results/"
 output_path = "benchmark-plots/"
@ -40,20 +40,8 @@ data = []
 # loads the measurements from a given file and processes them
 # so that they are normalized, meaning that the timings returned
 # are nanoseconds per element transfered
-def load_time_mesurements(file_path):
-    with open(file_path, 'r') as file:
-        data = json.load(file)
-        count = data["count"]
-        batch_size = data["list"][0]["task"]["batching"]["batch_size"] if data["list"][0]["task"]["batching"]["batch_size"] > 0 else 1
-        iterations = data["list"][0]["task"]["iterations"]
-
-        return {
-            "size": data["list"][0]["task"]["size"],
-            "total": sum([x / (iterations * batch_size * count * count) for x in list(chain([data["list"][i]["report"]["time"]["total"] for i in range(count)]))]),
-            "combined": [ x / (count * batch_size) for x in list(chain(*[data["list"][i]["report"]["time"]["combined"] for i in range(count)]))],
-            "submission": [ x / (count * batch_size) for x in list(chain(*[data["list"][i]["report"]["time"]["submission"] for i in range(count)]))],
-            "completion": [ x / (count * batch_size) for x in list(chain(*[data["list"][i]["report"]["time"]["completion"] for i in range(count)]))]
-        }
+def get_timing(file_path):
+    return load_time_mesurements(file_path)


 # procceses a single file and appends the desired timings
@ -61,12 +49,11 @@ def load_time_mesurements(file_path):
 # and ignores if the given file is not found as some
 # configurations may not be benchmarked
 def process_file_to_dataset(file_path, src_node, dst_node):
+    size = 1024*1024*1024
    try:
-        file_data = load_time_mesurements(file_path)
-        time = file_data["combined"]
+        timing = get_timing(file_path)
        run_idx = 0
-        for t in time:
-            size = file_data["size"]
+        for t in timing:
            tp = calc_throughput(size, t)
            data.append({ runid : run_idx, x_label : dst_node, y_label : tp})
            run_idx = run_idx + 1
@ -91,8 +78,7 @@ def plot_bar(table,title,node_config):
 def main(node_config,title,ext):
    src_node = 0
    for dst_node in {8,11,12,15}:
-        size = "512mib" if node_config == "allnodes" and src_node == dst_node and src_node >= 8 else "1gib" 
-        file = os.path.join(result_path, f"copy-n{src_node}ton{dst_node}-{size}-{node_config}-cpu-{ext}.json")
+        file = os.path.join(result_path, f"copy-n{src_node}ton{dst_node}-1gib-{node_config}-cpu-{ext}.json")
        process_file_to_dataset(file, src_node, dst_node)

    df = pd.DataFrame(data)
--- a/benchmarks/benchmark-plotters/plot-perf-submitmethod.py
+++ b/benchmarks/benchmark-plotters/plot-perf-submitmethod.py
@ -1,17 +1,19 @@
 import os
 import json
+from numpy import float64, int64
+from typing import List
 import pandas as pd
 import seaborn as sns
 import matplotlib.pyplot as plt

-from common import calc_throughput, index_from_element
+from common import calc_throughput, index_from_element, load_time_mesurements

 runid = "Run ID"
 x_label = "Size of Submitted Task"
 y_label = "Throughput in GiB/s"
 var_label = "Submission Type"
-sizes = ["1kib", "4kib", "1mib"]
-sizes_nice = ["1 KiB", "4 KiB", "1 MiB"]
+sizes = ["1kib", "4kib", "1mib", "128mib"]
+sizes_nice = ["1 KiB", "4 KiB", "1 MiB", "128 MiB"]
 types = ["bs10", "bs50", "ssaw"]
 types_nice = ["Batch, Size 10", "Batch, Size 50", "Single Submit"]

@ -33,26 +35,14 @@ data = []
 # loads the measurements from a given file and processes them
 # so that they are normalized, meaning that the timings returned
 # are nanoseconds per element transfered
-def load_time_mesurements(file_path,type_label):
-    with open(file_path, 'r') as file:
-        data = json.load(file)
-        iterations = data["list"][0]["task"]["iterations"]
-        divisor = 1
+def get_timing(file_path,type_label) -> List[float64]:
+    divisor = 0

-        # bs and ms types for submission process more than one
-        # element per run and the results therefore must be
-        # divided by this number
+    if type_label == "bs10": divisor = 10
+    elif type_label == "bs50" : divisor = 50
+    else: divisor = 1

-        if type_label in ["bs10", "ms10"]: divisor = 10
-        elif type_label in ["ms50", "bs50"]: divisor = 50
-        else: divisor = 1
-
-        return {
-            "total": data["list"][0]["report"]["time"]["total"] / (iterations * divisor),
-            "combined": [ x / divisor for x in data["list"][0]["report"]["time"]["combined"]],
-            "submission": [ x / divisor for x in data["list"][0]["report"]["time"]["submission"]],
-            "completion": [ x / divisor for x in data["list"][0]["report"]["time"]["completion"]]
-        }
+    return [ x / divisor for x in load_time_mesurements(file_path)]


 # procceses a single file and appends the desired timings
@ -74,7 +64,7 @@ def process_file_to_dataset(file_path, type_label,size_label):
    else: data_size = 0

    try:
-        time = load_time_mesurements(file_path,type_label)["combined"]
+        time = get_timing(file_path,type_label)
        run_idx = 0
        for t in time:
            data.append({ runid : run_idx, x_label: size_nice, var_label : type_nice, y_label : calc_throughput(data_size, t)})