finish adapting plotters to new result style, add division by thread count to the throughput plotters, adjust figure sizes to be small (larger font when scaled up in latex)

11 months ago · 8ab5eb4902
5 changed files with 78 additions and 97 deletions
--- a/benchmarks/benchmark-plotters/common.py
+++ b/benchmarks/benchmark-plotters/common.py
@ -23,7 +23,7 @@ def index_from_element(value,array):
 # loads the measurements from a given file
 def load_time_mesurements(file_path) -> List[float64]:
 def load_time_mesurements(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)
        count = data["count"]
@ -40,3 +40,8 @@ def load_time_mesurements(file_path) -> List[float64]:
                    os.abort()
        return [ x / runcount_divisor for x in data["timings"]]  
 def get_task_count(file_path):
    with open(file_path, 'r') as file:
        return json.load(file)["count"]
--- a/benchmarks/benchmark-plotters/plot-cost-mtsubmit.py
+++ b/benchmarks/benchmark-plotters/plot-cost-mtsubmit.py
@ -1,11 +1,9 @@
 import os
 import json
 import pandas as pd
 from itertools import chain
 import seaborn as sns
 import matplotlib.pyplot as plt
 from common import calc_throughput, index_from_element
 from common import calc_throughput, index_from_element, load_time_mesurements
 runid = "Run ID"
 x_label = "Thread Count"
@ -13,8 +11,8 @@ y_label = "Throughput in GiB/s"
 var_label = "Transfer Size"
 thread_counts = ["1t", "2t", "12t"]
 thread_counts_nice = ["1 Thread", "2 Threads", "12 Threads"]
 engine_counts = ["1mib-1e", "1gib-1e"]
 engine_counts_nice = ["1 MiB", "1 GiB"]
 size_labels = ["1mib", "1gib"]
 size_labels_nice = ["1 MiB", "1 GiB"]
 title = \
    """Total Throughput showing cost of MT Submit\n
@ -34,47 +32,36 @@ data = []
 # loads the measurements from a given file and processes them
 # so that they are normalized, meaning that the timings returned
 # are nanoseconds per element transfered
 def load_time_mesurements(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)
 def get_timing(file_path,thread_count):
    divisor = 0
        count = data["count"]
        iterations = data["list"][0]["task"]["iterations"]
    if thread_count == "1t": divisor = 1
    elif thread_count == "2t" : divisor = 2
    elif thread_count == "12t" : divisor = 12
        # work queue size is 120 which is split over all available threads
        # therefore we divide the result by 120/n_threads to get the per-element speed
        return {
            "total" : sum([x / (iterations * 120) for x in list(chain([data["list"][i]["report"]["time"]["total"] for i in range(count)]))]),
            "combined" : [x / 120 for x in list(chain(*[data["list"][i]["report"]["time"]["combined"] for i in range(count)]))],
            "submission" : [x / 120 for x in list(chain(*[data["list"][i]["report"]["time"]["submission"] for i in range(count)]))],
            "completion" : [x / 120 for x in list(chain(*[data["list"][i]["report"]["time"]["completion"] for i in range(count)]))]
        }
    return [ x / divisor for x in load_time_mesurements(file_path)]
 # procceses a single file and appends the desired timings
 # to the global data-array, handles multiple runs with a runid
 # and ignores if the given file is not found as some
 # configurations may not be benchmarked
 def process_file_to_dataset(file_path, engine_label, thread_count):
    engine_index = index_from_element(engine_label,engine_counts)
    engine_nice = engine_counts_nice[engine_index]
 def process_file_to_dataset(file_path, size_label, thread_count):
    size_index = index_from_element(size_label,size_labels)
    size_nice = size_labels_nice[size_index]
    threadc_index = index_from_element(thread_count, thread_counts)
    thread_count_nice = thread_counts_nice[threadc_index]
    data_size = 0
    if engine_label in ["1gib-1e", "1gib-4e"]: data_size = 1024*1024*1024
    elif engine_label in ["1mib-1e", "1mib-4e"]: data_size = 1024*1024
    else: data_size = 0
    if size_label == "1gib" : data_size = 1024*1024*1024
    elif size_label == "1mib" : data_size = 1024*1024
    try:
        time = load_time_mesurements(file_path)["combined"]
    timing = get_timing(file_path, thread_count)
    run_idx = 0
        for t in time:
            data.append({ runid : run_idx, x_label: thread_count_nice, var_label : engine_nice, y_label : calc_throughput(data_size, t)})
    for t in timing:
        data.append({ runid : run_idx, x_label: thread_count_nice, var_label : size_nice, y_label : calc_throughput(data_size, t)})
        run_idx = run_idx + 1
    except FileNotFoundError:
        return
 # loops over all possible configuration combinations and calls
@ -84,17 +71,19 @@ def main():
    result_path = "benchmark-results/"
    output_path = "benchmark-plots/"
    for engine_label in engine_counts:
    for size in size_labels:
        for thread_count in thread_counts:
            file = os.path.join(result_path, f"mtsubmit-{thread_count}-{engine_label}.json")
            process_file_to_dataset(file, engine_label, thread_count)
            file = os.path.join(result_path, f"mtsubmit-{thread_count}-{size}.json")
            process_file_to_dataset(file, size, thread_count)
    df = pd.DataFrame(data)
    df.set_index(index, inplace=True)
    sns.barplot(x=x_label, y=y_label, hue=var_label, data=df, palette="rocket", errorbar="sd")
    plt.figure(figsize=(4, 4))
    plt.ylim(0, 30)
    sns.barplot(x=x_label, y=y_label, hue=var_label, data=df, palette="mako", errorbar="sd")
    plt.savefig(os.path.join(output_path, "plot-perf-mtsubmit.pdf"), bbox_inches='tight')
    plt.savefig(os.path.join(output_path, "plot-mtsubmit.pdf"), bbox_inches='tight')
    plt.show()
--- a/benchmarks/benchmark-plotters/plot-perf-peakthroughput-bar.py
+++ b/benchmarks/benchmark-plotters/plot-perf-peakthroughput-bar.py
@ -1,11 +1,9 @@
 import os
 import json
 import pandas as pd
 from itertools import chain
 import seaborn as sns
 import matplotlib.pyplot as plt
 from common import calc_throughput, load_time_mesurements
 from common import calc_throughput, load_time_mesurements, get_task_count
 result_path = "benchmark-results/"
 output_path = "benchmark-plots/"
@ -41,7 +39,8 @@ data = []
 # so that they are normalized, meaning that the timings returned
 # are nanoseconds per element transfered
 def get_timing(file_path):
    return load_time_mesurements(file_path)
    divisor = get_task_count(file_path)
    return [ x / divisor for x in load_time_mesurements(file_path)]
 # procceses a single file and appends the desired timings
@ -50,25 +49,23 @@ def get_timing(file_path):
 # configurations may not be benchmarked
 def process_file_to_dataset(file_path, src_node, dst_node):
    size = 1024*1024*1024
    try:
    timing = get_timing(file_path)
    run_idx = 0
    for t in timing:
        tp = calc_throughput(size, t)
        data.append({ runid : run_idx, x_label : dst_node, y_label : tp})
        run_idx = run_idx + 1
    except FileNotFoundError:
        return
 def plot_bar(table,title,node_config):
    plt.figure(figsize=(8, 6))
    plt.figure(figsize=(2, 3))
    sns.barplot(x=x_label, y=y_label, data=table, palette="rocket")
    sns.barplot(x=x_label, y=y_label, data=table, palette="mako", errorbar="sd")
    plt.ylim(0, 100)
    plt.ylim(0, 75)
    plt.savefig(os.path.join(output_path, f"plot-perf-{node_config}-throughput-selectbarplot.png"), bbox_inches='tight')
    plt.savefig(os.path.join(output_path, f"plot-{node_config}-throughput.pdf"), bbox_inches='tight')
    plt.show()
@ -78,7 +75,7 @@ def plot_bar(table,title,node_config):
 def main(node_config,title):
    src_node = 0
    for dst_node in {8,11,12,15}:
        file = os.path.join(result_path, f"copy-n{src_node}ton{dst_node}-1gib-{node_config}-1e.json")
        file = os.path.join(result_path, f"copy-n{src_node}ton{dst_node}-1gib-{node_config}.json")
        process_file_to_dataset(file, src_node, dst_node)
    df = pd.DataFrame(data)
--- a/benchmarks/benchmark-plotters/plot-perf-peakthroughput-cpu-bar.py
+++ b/benchmarks/benchmark-plotters/plot-perf-peakthroughput-cpu-bar.py
@ -1,11 +1,9 @@
 import os
 import json
 import pandas as pd
 from itertools import chain
 import seaborn as sns
 import matplotlib.pyplot as plt
 from common import calc_throughput, load_time_mesurements
 from common import calc_throughput, load_time_mesurements, get_task_count
 result_path = "benchmark-results/"
 output_path = "benchmark-plots/"
@ -41,7 +39,8 @@ data = []
 # so that they are normalized, meaning that the timings returned
 # are nanoseconds per element transfered
 def get_timing(file_path):
    return load_time_mesurements(file_path)
    divisor = get_task_count(file_path)
    return [ x / divisor for x in load_time_mesurements(file_path)]
 # procceses a single file and appends the desired timings
@ -50,25 +49,23 @@ def get_timing(file_path):
 # configurations may not be benchmarked
 def process_file_to_dataset(file_path, src_node, dst_node):
    size = 1024*1024*1024
    try:
    timing = get_timing(file_path)
    run_idx = 0
    for t in timing:
        tp = calc_throughput(size, t)
        data.append({ runid : run_idx, x_label : dst_node, y_label : tp})
        run_idx = run_idx + 1
    except FileNotFoundError:
        return
 def plot_bar(table,title,node_config):
    plt.figure(figsize=(8, 6))
    plt.figure(figsize=(2, 3))
    sns.barplot(x=x_label, y=y_label, data=table, palette="rocket", errorbar=None)
    sns.barplot(x=x_label, y=y_label, data=table, palette="mako", errorbar="sd")
    plt.ylim(0, 100)
    plt.ylim(0, 75)
    plt.savefig(os.path.join(output_path, f"plot-perf-{node_config}-cpu-throughput-selectbarplot.pdf"), bbox_inches='tight')
    plt.savefig(os.path.join(output_path, f"plot-{node_config}-cpu-throughput.pdf"), bbox_inches='tight')
    plt.show()
@ -78,7 +75,7 @@ def plot_bar(table,title,node_config):
 def main(node_config,title,ext):
    src_node = 0
    for dst_node in {8,11,12,15}:
        file = os.path.join(result_path, f"copy-n{src_node}ton{dst_node}-1gib-{node_config}-cpu-{ext}.json")
        file = os.path.join(result_path, f"copy-n{src_node}ton{dst_node}-1gib-{node_config}{ext}.json")
        process_file_to_dataset(file, src_node, dst_node)
    df = pd.DataFrame(data)
@ -93,5 +90,5 @@ def main(node_config,title,ext):
 if __name__ == "__main__":
    dall = main("allnodes", title_allnodes, "1e")
    dbrt = main("allnodes", title_allnodes, "brute")
    dall = main("allnodes", title_allnodes, "-cpu")
    dbrt = main("brute", title_allnodes, "-cpu")
--- a/benchmarks/benchmark-plotters/plot-perf-submitmethod.py
+++ b/benchmarks/benchmark-plotters/plot-perf-submitmethod.py
@ -1,7 +1,4 @@
 import os
 import json
 from numpy import float64, int64
 from typing import List
 import pandas as pd
 import seaborn as sns
 import matplotlib.pyplot as plt
@ -12,7 +9,7 @@ runid = "Run ID"
 x_label = "Size of Submitted Task"
 y_label = "Throughput in GiB/s"
 var_label = "Submission Type"
 sizes = ["1kib", "4kib", "1mib", "128mib"]
 sizes = ["1kib", "4kib", "1mib"]
 sizes_nice = ["1 KiB", "4 KiB", "1 MiB", "128 MiB"]
 types = ["bs10", "bs50", "ssaw"]
 types_nice = ["Batch, Size 10", "Batch, Size 50", "Single Submit"]
@ -35,7 +32,7 @@ data = []
 # loads the measurements from a given file and processes them
 # so that they are normalized, meaning that the timings returned
 # are nanoseconds per element transfered
 def get_timing(file_path,type_label) -> List[float64]:
 def get_timing(file_path,type_label):
    divisor = 0
    if type_label == "bs10": divisor = 10
@ -59,18 +56,13 @@ def process_file_to_dataset(file_path, type_label,size_label):
    if size_label == "1kib": data_size = 1024;
    elif size_label == "4kib": data_size = 4 * 1024;
    elif size_label == "1mib": data_size = 1024 * 1024;
    elif size_label == "32mib": data_size = 32 * 1024 * 1024;
    elif size_label == "1gib": data_size = 1024 * 1024 * 1024;
    else: data_size = 0
    elif size_label == "128mib": data_size = 128 * 1024 * 1024;
    try:
    time = get_timing(file_path,type_label)
    run_idx = 0
    for t in time:
        data.append({ runid : run_idx, x_label: size_nice, var_label : type_nice, y_label : calc_throughput(data_size, t)})
        run_idx = run_idx + 1
    except FileNotFoundError:
        return
 # loops over all possible configuration combinations and calls
@ -82,16 +74,17 @@ def main():
    for type_label in types:
        for size in sizes:
            file = os.path.join(result_path, f"submit-{type_label}-{size}-1e.json")
            file = os.path.join(result_path, f"submit-{type_label}-{size}.json")
            process_file_to_dataset(file, type_label, size)
    df = pd.DataFrame(data)
    df.set_index(index, inplace=True)
    df = df.sort_values(y_label)
    sns.barplot(x=x_label, y=y_label, hue=var_label, data=df, palette="rocket", errorbar="sd")
    plt.figure(figsize=(4, 4))
    sns.barplot(x=x_label, y=y_label, hue=var_label, data=df, palette="mako", errorbar="sd")
    plt.savefig(os.path.join(output_path, "plot-opt-submitmethod.pdf"), bbox_inches='tight')
    plt.savefig(os.path.join(output_path, "plot-submitmethod.pdf"), bbox_inches='tight')
    plt.show()
 if __name__ == "__main__":