@ -1,11 +1,9 @@
import os
import json
import pandas as pd
from itertools import chain
import seaborn as sns
import matplotlib.pyplot as plt
from common import calc_throughput , index_from_element
from common import calc_throughput , index_from_element , load_time_mesurements
runid = " Run ID "
x_label = " Thread Count "
@ -13,8 +11,8 @@ y_label = "Throughput in GiB/s"
var_label = " Transfer Size "
thread_counts = [ " 1t " , " 2t " , " 12t " ]
thread_counts_nice = [ " 1 Thread " , " 2 Threads " , " 12 Threads " ]
engine_count s = [ " 1mib-1e " , " 1gib-1e " ]
engine_count s_nice = [ " 1 MiB " , " 1 GiB " ]
size_label s = [ " 1mib " , " 1gib " ]
size_label s_nice = [ " 1 MiB " , " 1 GiB " ]
title = \
""" Total Throughput showing cost of MT Submit \n
@ -34,47 +32,36 @@ data = []
# loads the measurements from a given file and processes them
# so that they are normalized, meaning that the timings returned
# are nanoseconds per element transfered
def load_time_mesurements ( file_path ) :
with open ( file_path , ' r ' ) as file :
data = json . load ( file )
count = data [ " count " ]
iterations = data [ " list " ] [ 0 ] [ " task " ] [ " iterations " ]
def get_timing ( file_path , thread_count ) :
divisor = 0
# work queue size is 120 which is split over all available threads
# therefore we divide the result by 120/n_threads to get the per-element speed
if thread_count == " 1t " : divisor = 1
elif thread_count == " 2t " : divisor = 2
elif thread_count == " 12t " : divisor = 12
return {
" total " : sum ( [ x / ( iterations * 120 ) for x in list ( chain ( [ data [ " list " ] [ i ] [ " report " ] [ " time " ] [ " total " ] for i in range ( count ) ] ) ) ] ) ,
" combined " : [ x / 120 for x in list ( chain ( * [ data [ " list " ] [ i ] [ " report " ] [ " time " ] [ " combined " ] for i in range ( count ) ] ) ) ] ,
" submission " : [ x / 120 for x in list ( chain ( * [ data [ " list " ] [ i ] [ " report " ] [ " time " ] [ " submission " ] for i in range ( count ) ] ) ) ] ,
" completion " : [ x / 120 for x in list ( chain ( * [ data [ " list " ] [ i ] [ " report " ] [ " time " ] [ " completion " ] for i in range ( count ) ] ) ) ]
}
return [ x / divisor for x in load_time_mesurements ( file_path ) ]
# procceses a single file and appends the desired timings
# to the global data-array, handles multiple runs with a runid
# and ignores if the given file is not found as some
# configurations may not be benchmarked
def process_file_to_dataset ( file_path , engin e_label, thread_count ) :
engin e_index = index_from_element ( engine_label , engine_count s)
engine_nice = engine_counts_nice [ engin e_index]
def process_file_to_dataset ( file_path , siz e_label, thread_count ) :
siz e_index = index_from_element ( size_label , size_label s)
size_nice = size_labels_nice [ siz e_index]
threadc_index = index_from_element ( thread_count , thread_counts )
thread_count_nice = thread_counts_nice [ threadc_index ]
data_size = 0
if engine_label in [ " 1gib-1e " , " 1gib-4e " ] : data_size = 1024 * 1024 * 1024
elif engine_label in [ " 1mib-1e " , " 1mib-4e " ] : data_size = 1024 * 1024
else : data_size = 0
if size_label == " 1gib " : data_size = 1024 * 1024 * 1024
elif size_label == " 1mib " : data_size = 1024 * 1024
timing = get_timing ( file_path , thread_count )
run_idx = 0
for t in timing :
data . append ( { runid : run_idx , x_label : thread_count_nice , var_label : size_nice , y_label : calc_throughput ( data_size , t ) } )
run_idx = run_idx + 1
try :
time = load_time_mesurements ( file_path ) [ " combined " ]
run_idx = 0
for t in time :
data . append ( { runid : run_idx , x_label : thread_count_nice , var_label : engine_nice , y_label : calc_throughput ( data_size , t ) } )
run_idx = run_idx + 1
except FileNotFoundError :
return
# loops over all possible configuration combinations and calls
@ -84,17 +71,19 @@ def main():
result_path = " benchmark-results/ "
output_path = " benchmark-plots/ "
for engine_label in engine_count s:
for size in size_label s:
for thread_count in thread_counts :
file = os . path . join ( result_path , f " mtsubmit-{thread_count}-{engine_label }.json " )
process_file_to_dataset ( file , engine_label , thread_count )
file = os . path . join ( result_path , f " mtsubmit-{thread_count}-{size }.json " )
process_file_to_dataset ( file , size , thread_count )
df = pd . DataFrame ( data )
df . set_index ( index , inplace = True )
sns . barplot ( x = x_label , y = y_label , hue = var_label , data = df , palette = " rocket " , errorbar = " sd " )
plt . figure ( figsize = ( 4 , 4 ) )
plt . ylim ( 0 , 30 )
sns . barplot ( x = x_label , y = y_label , hue = var_label , data = df , palette = " mako " , errorbar = " sd " )
plt . savefig ( os . path . join ( output_path , " plot-perf-mtsubmit.pdf " ) , bbox_inches = ' tight ' )
plt . savefig ( os . path . join ( output_path , " plot-mtsubmit.pdf " ) , bbox_inches = ' tight ' )
plt . show ( )