@ -1,11 +1,9 @@
import os
import os
import json
import pandas as pd
import pandas as pd
from itertools import chain
import seaborn as sns
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
from common import calc_throughput , index_from_element
from common import calc_throughput , index_from_element , load_time_mesurements
runid = " Run ID "
runid = " Run ID "
x_label = " Thread Count "
x_label = " Thread Count "
@ -13,8 +11,8 @@ y_label = "Throughput in GiB/s"
var_label = " Transfer Size "
var_label = " Transfer Size "
thread_counts = [ " 1t " , " 2t " , " 12t " ]
thread_counts = [ " 1t " , " 2t " , " 12t " ]
thread_counts_nice = [ " 1 Thread " , " 2 Threads " , " 12 Threads " ]
thread_counts_nice = [ " 1 Thread " , " 2 Threads " , " 12 Threads " ]
engine_count s = [ " 1mib-1e " , " 1gib-1e " ]
engine_count s_nice = [ " 1 MiB " , " 1 GiB " ]
size_label s = [ " 1mib " , " 1gib " ]
size_label s_nice = [ " 1 MiB " , " 1 GiB " ]
title = \
title = \
""" Total Throughput showing cost of MT Submit \n
""" Total Throughput showing cost of MT Submit \n
@ -34,47 +32,36 @@ data = []
# loads the measurements from a given file and processes them
# loads the measurements from a given file and processes them
# so that they are normalized, meaning that the timings returned
# so that they are normalized, meaning that the timings returned
# are nanoseconds per element transfered
# are nanoseconds per element transfered
def load_time_mesurements ( file_path ) :
with open ( file_path , ' r ' ) as file :
data = json . load ( file )
def get_timing ( file_path , thread_count ) :
divisor = 0
count = data [ " count " ]
iterations = data [ " list " ] [ 0 ] [ " task " ] [ " iterations " ]
if thread_count == " 1t " : divisor = 1
elif thread_count == " 2t " : divisor = 2
elif thread_count == " 12t " : divisor = 12
# work queue size is 120 which is split over all available threads
# therefore we divide the result by 120/n_threads to get the per-element speed
return {
" total " : sum ( [ x / ( iterations * 120 ) for x in list ( chain ( [ data [ " list " ] [ i ] [ " report " ] [ " time " ] [ " total " ] for i in range ( count ) ] ) ) ] ) ,
" combined " : [ x / 120 for x in list ( chain ( * [ data [ " list " ] [ i ] [ " report " ] [ " time " ] [ " combined " ] for i in range ( count ) ] ) ) ] ,
" submission " : [ x / 120 for x in list ( chain ( * [ data [ " list " ] [ i ] [ " report " ] [ " time " ] [ " submission " ] for i in range ( count ) ] ) ) ] ,
" completion " : [ x / 120 for x in list ( chain ( * [ data [ " list " ] [ i ] [ " report " ] [ " time " ] [ " completion " ] for i in range ( count ) ] ) ) ]
}
return [ x / divisor for x in load_time_mesurements ( file_path ) ]
# procceses a single file and appends the desired timings
# procceses a single file and appends the desired timings
# to the global data-array, handles multiple runs with a runid
# to the global data-array, handles multiple runs with a runid
# and ignores if the given file is not found as some
# and ignores if the given file is not found as some
# configurations may not be benchmarked
# configurations may not be benchmarked
def process_file_to_dataset ( file_path , engin e_label, thread_count ) :
engin e_index = index_from_element ( engine_label , engine_count s)
engine_nice = engine_counts_nice [ engin e_index]
def process_file_to_dataset ( file_path , size_label , thread_count ) :
size_index = index_from_element ( size_label , size_labels )
size_nice = size_labels_nice [ siz e_index]
threadc_index = index_from_element ( thread_count , thread_counts )
threadc_index = index_from_element ( thread_count , thread_counts )
thread_count_nice = thread_counts_nice [ threadc_index ]
thread_count_nice = thread_counts_nice [ threadc_index ]
data_size = 0
data_size = 0
if engine_label in [ " 1gib-1e " , " 1gib-4e " ] : data_size = 1024 * 1024 * 1024
elif engine_label in [ " 1mib-1e " , " 1mib-4e " ] : data_size = 1024 * 1024
else : data_size = 0
if size_label == " 1gib " : data_size = 1024 * 1024 * 1024
elif size_label == " 1mib " : data_size = 1024 * 1024
try :
time = load_time_mesurements ( file_path ) [ " combined " ]
timing = get_timing ( file_path , thread_count )
run_idx = 0
run_idx = 0
for t in time :
data . append ( { runid : run_idx , x_label : thread_count_nice , var_label : engin e_nice, y_label : calc_throughput ( data_size , t ) } )
for t in timing :
data . append ( { runid : run_idx , x_label : thread_count_nice , var_label : siz e_nice, y_label : calc_throughput ( data_size , t ) } )
run_idx = run_idx + 1
run_idx = run_idx + 1
except FileNotFoundError :
return
# loops over all possible configuration combinations and calls
# loops over all possible configuration combinations and calls
@ -84,17 +71,19 @@ def main():
result_path = " benchmark-results/ "
result_path = " benchmark-results/ "
output_path = " benchmark-plots/ "
output_path = " benchmark-plots/ "
for engine_label in engine_count s:
for size in size_label s:
for thread_count in thread_counts :
for thread_count in thread_counts :
file = os . path . join ( result_path , f " mtsubmit-{thread_count}-{engine_label }.json " )
process_file_to_dataset ( file , engine_label , thread_count )
file = os . path . join ( result_path , f " mtsubmit-{thread_count}-{size }.json " )
process_file_to_dataset ( file , size , thread_count )
df = pd . DataFrame ( data )
df = pd . DataFrame ( data )
df . set_index ( index , inplace = True )
df . set_index ( index , inplace = True )
sns . barplot ( x = x_label , y = y_label , hue = var_label , data = df , palette = " rocket " , errorbar = " sd " )
plt . figure ( figsize = ( 4 , 4 ) )
plt . ylim ( 0 , 30 )
sns . barplot ( x = x_label , y = y_label , hue = var_label , data = df , palette = " mako " , errorbar = " sd " )
plt . savefig ( os . path . join ( output_path , " plot-perf-mtsubmit.pdf " ) , bbox_inches = ' tight ' )
plt . savefig ( os . path . join ( output_path , " plot-mtsubmit.pdf " ) , bbox_inches = ' tight ' )
plt . show ( )
plt . show ( )