finish adapting plotters to new result style, add division by thread count to the throughput plotters, adjust figure sizes to be small (larger font when scaled up in latex)

11 months ago · 8ab5eb4902
5 changed files with 78 additions and 97 deletions
--- a/benchmarks/benchmark-plotters/common.py
+++ b/benchmarks/benchmark-plotters/common.py
@ -23,7 +23,7 @@ def index_from_element(value,array):


 # loads the measurements from a given file
-def load_time_mesurements(file_path) -> List[float64]:
+def load_time_mesurements(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)
        count = data["count"]
@ -39,4 +39,9 @@ def load_time_mesurements(file_path) -> List[float64]:
                    print("Runcount missmatch between tasks. Check the commend above, aborting for now.")
                    os.abort()

-        return [ x / runcount_divisor for x in data["timings"]]  
+        return [ x / runcount_divisor for x in data["timings"]]  
+
+
+def get_task_count(file_path):
+    with open(file_path, 'r') as file:
+        return json.load(file)["count"]
--- a/benchmarks/benchmark-plotters/plot-cost-mtsubmit.py
+++ b/benchmarks/benchmark-plotters/plot-cost-mtsubmit.py
@ -1,11 +1,9 @@
 import os
-import json
 import pandas as pd
-from itertools import chain
 import seaborn as sns
 import matplotlib.pyplot as plt

-from common import calc_throughput, index_from_element
+from common import calc_throughput, index_from_element, load_time_mesurements

 runid = "Run ID"
 x_label = "Thread Count"
@ -13,8 +11,8 @@ y_label = "Throughput in GiB/s"
 var_label = "Transfer Size"
 thread_counts = ["1t", "2t", "12t"]
 thread_counts_nice = ["1 Thread", "2 Threads", "12 Threads"]
-engine_counts = ["1mib-1e", "1gib-1e"]
-engine_counts_nice = ["1 MiB", "1 GiB"]
+size_labels = ["1mib", "1gib"]
+size_labels_nice = ["1 MiB", "1 GiB"]

 title = \
    """Total Throughput showing cost of MT Submit\n
@ -34,47 +32,36 @@ data = []
 # loads the measurements from a given file and processes them
 # so that they are normalized, meaning that the timings returned
 # are nanoseconds per element transfered
-def load_time_mesurements(file_path):
-    with open(file_path, 'r') as file:
-        data = json.load(file)
-     
-        count = data["count"]
-        iterations = data["list"][0]["task"]["iterations"]
+def get_timing(file_path,thread_count):
+    divisor = 0

-        # work queue size is 120 which is split over all available threads
-        # therefore we divide the result by 120/n_threads to get the per-element speed
+    if thread_count == "1t": divisor = 1
+    elif thread_count == "2t" : divisor = 2
+    elif thread_count == "12t" : divisor = 12

-        return {
-            "total" : sum([x / (iterations * 120) for x in list(chain([data["list"][i]["report"]["time"]["total"] for i in range(count)]))]),
-            "combined" : [x / 120 for x in list(chain(*[data["list"][i]["report"]["time"]["combined"] for i in range(count)]))],
-            "submission" : [x / 120 for x in list(chain(*[data["list"][i]["report"]["time"]["submission"] for i in range(count)]))],
-            "completion" : [x / 120 for x in list(chain(*[data["list"][i]["report"]["time"]["completion"] for i in range(count)]))]
-        }
+    return [ x / divisor for x in load_time_mesurements(file_path)]


 # procceses a single file and appends the desired timings
 # to the global data-array, handles multiple runs with a runid
 # and ignores if the given file is not found as some
 # configurations may not be benchmarked
-def process_file_to_dataset(file_path, engine_label, thread_count):
-    engine_index = index_from_element(engine_label,engine_counts)
-    engine_nice = engine_counts_nice[engine_index]
+def process_file_to_dataset(file_path, size_label, thread_count):
+    size_index = index_from_element(size_label,size_labels)
+    size_nice = size_labels_nice[size_index]
    threadc_index = index_from_element(thread_count, thread_counts)
    thread_count_nice = thread_counts_nice[threadc_index]
    data_size = 0

-    if engine_label in ["1gib-1e", "1gib-4e"]: data_size = 1024*1024*1024
-    elif engine_label in ["1mib-1e", "1mib-4e"]: data_size = 1024*1024
-    else: data_size = 0
+    if size_label == "1gib" : data_size = 1024*1024*1024
+    elif size_label == "1mib" : data_size = 1024*1024
+
+    timing = get_timing(file_path, thread_count)
+    run_idx = 0
+    for t in timing:
+        data.append({ runid : run_idx, x_label: thread_count_nice, var_label : size_nice, y_label : calc_throughput(data_size, t)})
+        run_idx = run_idx + 1

-    try:
-        time = load_time_mesurements(file_path)["combined"]
-        run_idx = 0
-        for t in time:
-            data.append({ runid : run_idx, x_label: thread_count_nice, var_label : engine_nice, y_label : calc_throughput(data_size, t)})
-            run_idx = run_idx + 1
-    except FileNotFoundError:
-        return


 # loops over all possible configuration combinations and calls
@ -84,17 +71,19 @@ def main():
    result_path = "benchmark-results/"
    output_path = "benchmark-plots/"

-    for engine_label in engine_counts:
+    for size in size_labels:
        for thread_count in thread_counts:
-            file = os.path.join(result_path, f"mtsubmit-{thread_count}-{engine_label}.json")
-            process_file_to_dataset(file, engine_label, thread_count)
+            file = os.path.join(result_path, f"mtsubmit-{thread_count}-{size}.json")
+            process_file_to_dataset(file, size, thread_count)

    df = pd.DataFrame(data)
    df.set_index(index, inplace=True)

-    sns.barplot(x=x_label, y=y_label, hue=var_label, data=df, palette="rocket", errorbar="sd")
+    plt.figure(figsize=(4, 4))
+    plt.ylim(0, 30)
+    sns.barplot(x=x_label, y=y_label, hue=var_label, data=df, palette="mako", errorbar="sd")

-    plt.savefig(os.path.join(output_path, "plot-perf-mtsubmit.pdf"), bbox_inches='tight')
+    plt.savefig(os.path.join(output_path, "plot-mtsubmit.pdf"), bbox_inches='tight')
    plt.show()


--- a/benchmarks/benchmark-plotters/plot-perf-peakthroughput-bar.py
+++ b/benchmarks/benchmark-plotters/plot-perf-peakthroughput-bar.py
@ -1,11 +1,9 @@
 import os
-import json
 import pandas as pd
-from itertools import chain
 import seaborn as sns
 import matplotlib.pyplot as plt

-from common import calc_throughput, load_time_mesurements
+from common import calc_throughput, load_time_mesurements, get_task_count

 result_path = "benchmark-results/"
 output_path = "benchmark-plots/"
@ -41,7 +39,8 @@ data = []
 # so that they are normalized, meaning that the timings returned
 # are nanoseconds per element transfered
 def get_timing(file_path):
-    return load_time_mesurements(file_path)
+    divisor = get_task_count(file_path)
+    return [ x / divisor for x in load_time_mesurements(file_path)]


 # procceses a single file and appends the desired timings
@ -50,25 +49,23 @@ def get_timing(file_path):
 # configurations may not be benchmarked
 def process_file_to_dataset(file_path, src_node, dst_node):
    size = 1024*1024*1024
-    try:
-        timing = get_timing(file_path)
-        run_idx = 0
-        for t in timing:
-            tp = calc_throughput(size, t)
-            data.append({ runid : run_idx, x_label : dst_node, y_label : tp})
-            run_idx = run_idx + 1
-    except FileNotFoundError:
-        return
+
+    timing = get_timing(file_path)
+    run_idx = 0
+    for t in timing:
+        tp = calc_throughput(size, t)
+        data.append({ runid : run_idx, x_label : dst_node, y_label : tp})
+        run_idx = run_idx + 1


 def plot_bar(table,title,node_config):
-    plt.figure(figsize=(8, 6))
+    plt.figure(figsize=(2, 3))

-    sns.barplot(x=x_label, y=y_label, data=table, palette="rocket")
+    sns.barplot(x=x_label, y=y_label, data=table, palette="mako", errorbar="sd")

-    plt.ylim(0, 100)
+    plt.ylim(0, 75)

-    plt.savefig(os.path.join(output_path, f"plot-perf-{node_config}-throughput-selectbarplot.png"), bbox_inches='tight')
+    plt.savefig(os.path.join(output_path, f"plot-{node_config}-throughput.pdf"), bbox_inches='tight')
    plt.show()


@ -78,7 +75,7 @@ def plot_bar(table,title,node_config):
 def main(node_config,title):
    src_node = 0
    for dst_node in {8,11,12,15}:
-        file = os.path.join(result_path, f"copy-n{src_node}ton{dst_node}-1gib-{node_config}-1e.json")
+        file = os.path.join(result_path, f"copy-n{src_node}ton{dst_node}-1gib-{node_config}.json")
        process_file_to_dataset(file, src_node, dst_node)

    df = pd.DataFrame(data)
--- a/benchmarks/benchmark-plotters/plot-perf-peakthroughput-cpu-bar.py
+++ b/benchmarks/benchmark-plotters/plot-perf-peakthroughput-cpu-bar.py
@ -1,11 +1,9 @@
 import os
-import json
 import pandas as pd
-from itertools import chain
 import seaborn as sns
 import matplotlib.pyplot as plt

-from common import calc_throughput, load_time_mesurements
+from common import calc_throughput, load_time_mesurements, get_task_count

 result_path = "benchmark-results/"
 output_path = "benchmark-plots/"
@ -41,7 +39,8 @@ data = []
 # so that they are normalized, meaning that the timings returned
 # are nanoseconds per element transfered
 def get_timing(file_path):
-    return load_time_mesurements(file_path)
+    divisor = get_task_count(file_path)
+    return [ x / divisor for x in load_time_mesurements(file_path)]


 # procceses a single file and appends the desired timings
@ -50,25 +49,23 @@ def get_timing(file_path):
 # configurations may not be benchmarked
 def process_file_to_dataset(file_path, src_node, dst_node):
    size = 1024*1024*1024
-    try:
-        timing = get_timing(file_path)
-        run_idx = 0
-        for t in timing:
-            tp = calc_throughput(size, t)
-            data.append({ runid : run_idx, x_label : dst_node, y_label : tp})
-            run_idx = run_idx + 1
-    except FileNotFoundError:
-        return
+
+    timing = get_timing(file_path)
+    run_idx = 0
+    for t in timing:
+        tp = calc_throughput(size, t)
+        data.append({ runid : run_idx, x_label : dst_node, y_label : tp})
+        run_idx = run_idx + 1


 def plot_bar(table,title,node_config):
-    plt.figure(figsize=(8, 6))
+    plt.figure(figsize=(2, 3))

-    sns.barplot(x=x_label, y=y_label, data=table, palette="rocket", errorbar=None)
+    sns.barplot(x=x_label, y=y_label, data=table, palette="mako", errorbar="sd")

-    plt.ylim(0, 100)
+    plt.ylim(0, 75)

-    plt.savefig(os.path.join(output_path, f"plot-perf-{node_config}-cpu-throughput-selectbarplot.pdf"), bbox_inches='tight')
+    plt.savefig(os.path.join(output_path, f"plot-{node_config}-cpu-throughput.pdf"), bbox_inches='tight')
    plt.show()


@ -78,7 +75,7 @@ def plot_bar(table,title,node_config):
 def main(node_config,title,ext):
    src_node = 0
    for dst_node in {8,11,12,15}:
-        file = os.path.join(result_path, f"copy-n{src_node}ton{dst_node}-1gib-{node_config}-cpu-{ext}.json")
+        file = os.path.join(result_path, f"copy-n{src_node}ton{dst_node}-1gib-{node_config}{ext}.json")
        process_file_to_dataset(file, src_node, dst_node)

    df = pd.DataFrame(data)
@ -93,5 +90,5 @@ def main(node_config,title,ext):


 if __name__ == "__main__":
-    dall = main("allnodes", title_allnodes, "1e")
-    dbrt = main("allnodes", title_allnodes, "brute")
+    dall = main("allnodes", title_allnodes, "-cpu")
+    dbrt = main("brute", title_allnodes, "-cpu")
--- a/benchmarks/benchmark-plotters/plot-perf-submitmethod.py
+++ b/benchmarks/benchmark-plotters/plot-perf-submitmethod.py
@ -1,7 +1,4 @@
 import os
-import json
-from numpy import float64, int64
-from typing import List
 import pandas as pd
 import seaborn as sns
 import matplotlib.pyplot as plt
@ -12,7 +9,7 @@ runid = "Run ID"
 x_label = "Size of Submitted Task"
 y_label = "Throughput in GiB/s"
 var_label = "Submission Type"
-sizes = ["1kib", "4kib", "1mib", "128mib"]
+sizes = ["1kib", "4kib", "1mib"]
 sizes_nice = ["1 KiB", "4 KiB", "1 MiB", "128 MiB"]
 types = ["bs10", "bs50", "ssaw"]
 types_nice = ["Batch, Size 10", "Batch, Size 50", "Single Submit"]
@ -35,7 +32,7 @@ data = []
 # loads the measurements from a given file and processes them
 # so that they are normalized, meaning that the timings returned
 # are nanoseconds per element transfered
-def get_timing(file_path,type_label) -> List[float64]:
+def get_timing(file_path,type_label):
    divisor = 0

    if type_label == "bs10": divisor = 10
@ -59,18 +56,13 @@ def process_file_to_dataset(file_path, type_label,size_label):
    if size_label == "1kib": data_size = 1024;
    elif size_label == "4kib": data_size = 4 * 1024;
    elif size_label == "1mib": data_size = 1024 * 1024;
-    elif size_label == "32mib": data_size = 32 * 1024 * 1024;
-    elif size_label == "1gib": data_size = 1024 * 1024 * 1024;
-    else: data_size = 0
+    elif size_label == "128mib": data_size = 128 * 1024 * 1024;

-    try:
-        time = get_timing(file_path,type_label)
-        run_idx = 0
-        for t in time:
-            data.append({ runid : run_idx, x_label: size_nice, var_label : type_nice, y_label : calc_throughput(data_size, t)})
-            run_idx = run_idx + 1
-    except FileNotFoundError:
-        return
+    time = get_timing(file_path,type_label)
+    run_idx = 0
+    for t in time:
+        data.append({ runid : run_idx, x_label: size_nice, var_label : type_nice, y_label : calc_throughput(data_size, t)})
+        run_idx = run_idx + 1


 # loops over all possible configuration combinations and calls
@ -82,16 +74,17 @@ def main():

    for type_label in types:
        for size in sizes:
-            file = os.path.join(result_path, f"submit-{type_label}-{size}-1e.json")
+            file = os.path.join(result_path, f"submit-{type_label}-{size}.json")
            process_file_to_dataset(file, type_label, size)

    df = pd.DataFrame(data)
    df.set_index(index, inplace=True)
    df = df.sort_values(y_label)

-    sns.barplot(x=x_label, y=y_label, hue=var_label, data=df, palette="rocket", errorbar="sd")
+    plt.figure(figsize=(4, 4))
+    sns.barplot(x=x_label, y=y_label, hue=var_label, data=df, palette="mako", errorbar="sd")
    
-    plt.savefig(os.path.join(output_path, "plot-opt-submitmethod.pdf"), bbox_inches='tight')
+    plt.savefig(os.path.join(output_path, "plot-submitmethod.pdf"), bbox_inches='tight')
    plt.show()

 if __name__ == "__main__":