From 326cf92af33a4f6359dfa80dc847a34825a5bce9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Constantin=20F=C3=BCrst?= <c@fuersten.info>
Date: Wed, 31 Jan 2024 21:54:21 +0100
Subject: [PATCH] update benchmark plotters for changes made to benchmark and
 result format

---
 .../__pycache__/common.cpython-39.pyc         | Bin 579 -> 0 bytes
 benchmarks/benchmark-plotters/common.py       |  29 ++++++++++++++-
 .../plot-perf-peakthroughput-bar.py           |  30 +++++-----------
 .../plot-perf-peakthroughput-cpu-bar.py       |  28 ++++-----------
 .../plot-perf-submitmethod.py                 |  34 +++++++-----------
 5 files changed, 56 insertions(+), 65 deletions(-)
 delete mode 100644 benchmarks/benchmark-plotters/__pycache__/common.cpython-39.pyc

diff --git a/benchmarks/benchmark-plotters/__pycache__/common.cpython-39.pyc b/benchmarks/benchmark-plotters/__pycache__/common.cpython-39.pyc
deleted file mode 100644
index 024860791d18f1a1db739cc91e1b1fa73ad0bea6..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 579
zcmY*Wv2NQi5Is^>syYeM8Xj6SvjGo@_8<rXG_#j%LJC0^FBV}@bVw?Q19?h*A^(v6
zPzSAD+F$6DBLTzd5qEs|bay15io?SpkbOOS#h3&9_Q`uA4lc><n1TSoo*@IlMMOIj
zgKL<^rLWmqOu*;g$?Y+PjtMq~4ezMy8*HL3>ij<Fd~Y;cy!@K{*+2b!A*uHACymR7
zRz6}rUput@2PYNgjm-`MuvqFw*enSp3pvk@TxE2-s8+4ZDYu5voDlxyX=_bdX@vyP
zq*k}4B;3e*G*-HG>MFF-rt_k#u-4{en`2W}sxV9Xw1p?DT02L5X{nXc&FO00AC*O2
z=DQ0F{F@UBz>=_xXa1AN6A^ikXF|Y{Ku_!netfy%8y@6=B{xaOIWV|}yF@S&iqHO1
zF-%$i5t~+_DI87*eSBZkE%tF?OtJ1GDIWTWsGSJLAJ&g#Bk&==HCp9Zqr%1=5O4SV
kO!kS5nAs;ZoAPkuki44_Bn^X-bEQRF<3)I?|IbPC2EP-BuK)l5

diff --git a/benchmarks/benchmark-plotters/common.py b/benchmarks/benchmark-plotters/common.py
index 4f44dd8..eeb2f90 100644
--- a/benchmarks/benchmark-plotters/common.py
+++ b/benchmarks/benchmark-plotters/common.py
@@ -1,3 +1,10 @@
+import os
+import json
+
+from typing import List
+from numpy import float64
+
+
 # calculates throughput in gib/s from the meassured
 # transfer duration (in nanoseconds) for a given element
 # with the size of this given in bytes
@@ -12,4 +19,24 @@ def calc_throughput(size_bytes,time_ns):
 def index_from_element(value,array):
     for (idx,val) in enumerate(array):
         if val == value: return idx
-    return 0
\ No newline at end of file
+    return 0
+
+
+# loads the measurements from a given file
+def load_time_mesurements(file_path) -> List[float64]:
+    with open(file_path, 'r') as file:
+        data = json.load(file)
+        count = data["count"]
+        runcount_divisor = data["list"][0]["task"]["reps"]
+
+        # if theres more than one thread, the internal repetition
+        # count should be the same. if you decide it shouldnt
+        # remove the check below
+
+        if count > 1:
+            for i in range(count):
+                if runcount_divisor != data["list"][i]["task"]["reps"]:
+                    print("Runcount missmatch between tasks. Check the commend above, aborting for now.")
+                    os.abort()
+
+        return [ x / runcount_divisor for x in data["timings"]]  
\ No newline at end of file
diff --git a/benchmarks/benchmark-plotters/plot-perf-peakthroughput-bar.py b/benchmarks/benchmark-plotters/plot-perf-peakthroughput-bar.py
index 67fe99d..098825b 100644
--- a/benchmarks/benchmark-plotters/plot-perf-peakthroughput-bar.py
+++ b/benchmarks/benchmark-plotters/plot-perf-peakthroughput-bar.py
@@ -5,7 +5,7 @@ from itertools import chain
 import seaborn as sns
 import matplotlib.pyplot as plt
 
-from common import calc_throughput
+from common import calc_throughput, load_time_mesurements
 
 result_path = "benchmark-results/"
 output_path = "benchmark-plots/"
@@ -40,20 +40,8 @@ data = []
 # loads the measurements from a given file and processes them
 # so that they are normalized, meaning that the timings returned
 # are nanoseconds per element transfered
-def load_time_mesurements(file_path):
-    with open(file_path, 'r') as file:
-        data = json.load(file)
-        count = data["count"]
-        batch_size = data["list"][0]["task"]["batching"]["batch_size"] if data["list"][0]["task"]["batching"]["batch_size"] > 0 else 1
-        iterations = data["list"][0]["task"]["iterations"]
-
-        return {
-            "size": data["list"][0]["task"]["size"],
-            "total": sum([x / (iterations * batch_size * count * count) for x in list(chain([data["list"][i]["report"]["time"]["total"] for i in range(count)]))]),
-            "combined": [ x / (count * batch_size) for x in list(chain(*[data["list"][i]["report"]["time"]["combined"] for i in range(count)]))],
-            "submission": [ x / (count * batch_size) for x in list(chain(*[data["list"][i]["report"]["time"]["submission"] for i in range(count)]))],
-            "completion": [ x / (count * batch_size) for x in list(chain(*[data["list"][i]["report"]["time"]["completion"] for i in range(count)]))]
-        }
+def get_timing(file_path):
+    return load_time_mesurements(file_path)
 
 
 # procceses a single file and appends the desired timings
@@ -61,12 +49,13 @@ def load_time_mesurements(file_path):
 # and ignores if the given file is not found as some
 # configurations may not be benchmarked
 def process_file_to_dataset(file_path, src_node, dst_node):
+    size = 1024*1024*1024
     try:
-        file_data = load_time_mesurements(file_path)
-        time = file_data["combined"]
+        timing = get_timing(file_path)
         run_idx = 0
-        for t in time:
-            data.append({ runid : run_idx, x_label : dst_node, y_label : calc_throughput(file_data["size"], t)})
+        for t in timing:
+            tp = calc_throughput(size, t)
+            data.append({ runid : run_idx, x_label : dst_node, y_label : tp})
             run_idx = run_idx + 1
     except FileNotFoundError:
         return
@@ -89,8 +78,7 @@ def plot_bar(table,title,node_config):
 def main(node_config,title):
     src_node = 0
     for dst_node in {8,11,12,15}:
-        size = "512mib" if node_config == "allnodes" and src_node == dst_node and src_node >= 8 else "1gib" 
-        file = os.path.join(result_path, f"copy-n{src_node}ton{dst_node}-{size}-{node_config}-1e.json")
+        file = os.path.join(result_path, f"copy-n{src_node}ton{dst_node}-1gib-{node_config}-1e.json")
         process_file_to_dataset(file, src_node, dst_node)
 
     df = pd.DataFrame(data)
diff --git a/benchmarks/benchmark-plotters/plot-perf-peakthroughput-cpu-bar.py b/benchmarks/benchmark-plotters/plot-perf-peakthroughput-cpu-bar.py
index 447851e..e4ff1bd 100644
--- a/benchmarks/benchmark-plotters/plot-perf-peakthroughput-cpu-bar.py
+++ b/benchmarks/benchmark-plotters/plot-perf-peakthroughput-cpu-bar.py
@@ -5,7 +5,7 @@ from itertools import chain
 import seaborn as sns
 import matplotlib.pyplot as plt
 
-from common import calc_throughput
+from common import calc_throughput, load_time_mesurements
 
 result_path = "benchmark-results/"
 output_path = "benchmark-plots/"
@@ -40,20 +40,8 @@ data = []
 # loads the measurements from a given file and processes them
 # so that they are normalized, meaning that the timings returned
 # are nanoseconds per element transfered
-def load_time_mesurements(file_path):
-    with open(file_path, 'r') as file:
-        data = json.load(file)
-        count = data["count"]
-        batch_size = data["list"][0]["task"]["batching"]["batch_size"] if data["list"][0]["task"]["batching"]["batch_size"] > 0 else 1
-        iterations = data["list"][0]["task"]["iterations"]
-
-        return {
-            "size": data["list"][0]["task"]["size"],
-            "total": sum([x / (iterations * batch_size * count * count) for x in list(chain([data["list"][i]["report"]["time"]["total"] for i in range(count)]))]),
-            "combined": [ x / (count * batch_size) for x in list(chain(*[data["list"][i]["report"]["time"]["combined"] for i in range(count)]))],
-            "submission": [ x / (count * batch_size) for x in list(chain(*[data["list"][i]["report"]["time"]["submission"] for i in range(count)]))],
-            "completion": [ x / (count * batch_size) for x in list(chain(*[data["list"][i]["report"]["time"]["completion"] for i in range(count)]))]
-        }
+def get_timing(file_path):
+    return load_time_mesurements(file_path)
 
 
 # procceses a single file and appends the desired timings
@@ -61,12 +49,11 @@ def load_time_mesurements(file_path):
 # and ignores if the given file is not found as some
 # configurations may not be benchmarked
 def process_file_to_dataset(file_path, src_node, dst_node):
+    size = 1024*1024*1024
     try:
-        file_data = load_time_mesurements(file_path)
-        time = file_data["combined"]
+        timing = get_timing(file_path)
         run_idx = 0
-        for t in time:
-            size = file_data["size"]
+        for t in timing:
             tp = calc_throughput(size, t)
             data.append({ runid : run_idx, x_label : dst_node, y_label : tp})
             run_idx = run_idx + 1
@@ -91,8 +78,7 @@ def plot_bar(table,title,node_config):
 def main(node_config,title,ext):
     src_node = 0
     for dst_node in {8,11,12,15}:
-        size = "512mib" if node_config == "allnodes" and src_node == dst_node and src_node >= 8 else "1gib" 
-        file = os.path.join(result_path, f"copy-n{src_node}ton{dst_node}-{size}-{node_config}-cpu-{ext}.json")
+        file = os.path.join(result_path, f"copy-n{src_node}ton{dst_node}-1gib-{node_config}-cpu-{ext}.json")
         process_file_to_dataset(file, src_node, dst_node)
 
     df = pd.DataFrame(data)
diff --git a/benchmarks/benchmark-plotters/plot-perf-submitmethod.py b/benchmarks/benchmark-plotters/plot-perf-submitmethod.py
index fd1cb13..05698c4 100644
--- a/benchmarks/benchmark-plotters/plot-perf-submitmethod.py
+++ b/benchmarks/benchmark-plotters/plot-perf-submitmethod.py
@@ -1,17 +1,19 @@
 import os
 import json
+from numpy import float64, int64
+from typing import List
 import pandas as pd
 import seaborn as sns
 import matplotlib.pyplot as plt
 
-from common import calc_throughput, index_from_element
+from common import calc_throughput, index_from_element, load_time_mesurements
 
 runid = "Run ID"
 x_label = "Size of Submitted Task"
 y_label = "Throughput in GiB/s"
 var_label = "Submission Type"
-sizes = ["1kib", "4kib", "1mib"]
-sizes_nice = ["1 KiB", "4 KiB", "1 MiB"]
+sizes = ["1kib", "4kib", "1mib", "128mib"]
+sizes_nice = ["1 KiB", "4 KiB", "1 MiB", "128 MiB"]
 types = ["bs10", "bs50", "ssaw"]
 types_nice = ["Batch, Size 10", "Batch, Size 50", "Single Submit"]
 
@@ -33,26 +35,14 @@ data = []
 # loads the measurements from a given file and processes them
 # so that they are normalized, meaning that the timings returned
 # are nanoseconds per element transfered
-def load_time_mesurements(file_path,type_label):
-    with open(file_path, 'r') as file:
-        data = json.load(file)
-        iterations = data["list"][0]["task"]["iterations"]
-        divisor = 1
+def get_timing(file_path,type_label) -> List[float64]:
+    divisor = 0
 
-        # bs and ms types for submission process more than one
-        # element per run and the results therefore must be
-        # divided by this number
+    if type_label == "bs10": divisor = 10
+    elif type_label == "bs50" : divisor = 50
+    else: divisor = 1
 
-        if type_label in ["bs10", "ms10"]: divisor = 10
-        elif type_label in ["ms50", "bs50"]: divisor = 50
-        else: divisor = 1
-
-        return {
-            "total": data["list"][0]["report"]["time"]["total"] / (iterations * divisor),
-            "combined": [ x / divisor for x in data["list"][0]["report"]["time"]["combined"]],
-            "submission": [ x / divisor for x in data["list"][0]["report"]["time"]["submission"]],
-            "completion": [ x / divisor for x in data["list"][0]["report"]["time"]["completion"]]
-        }
+    return [ x / divisor for x in load_time_mesurements(file_path)]
 
 
 # procceses a single file and appends the desired timings
@@ -74,7 +64,7 @@ def process_file_to_dataset(file_path, type_label,size_label):
     else: data_size = 0
 
     try:
-        time = load_time_mesurements(file_path,type_label)["combined"]
+        time = get_timing(file_path,type_label)
         run_idx = 0
         for t in time:
             data.append({ runid : run_idx, x_label: size_nice, var_label : type_nice, y_label : calc_throughput(data_size, t)})