bachelor-thesis/qdp_project/plotter.py

import os
import csv
import numpy as np
import pandas as pd
import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt

output_path = "./plots"
hbm_result = "./evaluation-results/current/qdp-xeonmax-hbm-tca4-tcb0-tcj1-tmul32-wl4294967296-cs2097152.csv"
dram_result = "./evaluation-results/current/qdp-xeonmax-dram-tca2-tcb0-tcj1-tmul32-wl4294967296-cs2097152.csv"
prefetch_result = "./evaluation-results/current/qdp-xeonmax-prefetch-tca1-tcb1-tcj1-tmul32-wl4294967296-cs8388608.csv"
distprefetch_result = "./evaluation-results/current/qdp-xeonmax-distprefetch-tca1-tcb1-tcj1-tmul32-wl4294967296-cs8388608.csv"

tt_name = "rt-ns"
function_names = ["aggrj-run" , "scana-run",  "scanb-run" ]
fn_nice_prefetch = [ "Aggregate" ,"Scan A",   "Scan A and B (parallel)"]
fn_nice_normal = [ "Aggregate" , "Scan A", "NULL"]

def read_timings_from_csv(fname, fn_nice) -> tuple[list[float], list[str]]:
    t = {}

    row_count = 0
    
    with open(fname, newline='') as csvfile:
        reader = csv.DictReader(csvfile, delimiter=';')
        for row in reader:
            row_count = row_count + 1
            for i in range(len(function_names)):
                t[fn_nice[i]] = t.get(fn_nice[i], 0) + int(row[function_names[i]])

    t = {key: value / (1000 * 1000 * row_count) for key, value in t.items() if value != 0}

    if len(t.keys()) == 3:
        t[fn_nice[1]] = t[fn_nice[1]] - t[fn_nice[2]]

    return list(t.values()), list(t.keys())


def read_total_time_from_csv(fname) -> float:
    time = 0
    row_count = 0
    
    with open(fname, newline='') as csvfile:
        reader = csv.DictReader(csvfile, delimiter=';')
        for row in reader:
            row_count = row_count + 1
            time += int(row["rt-ns"])

    return time / (1000 * 1000 * row_count)


def read_cache_hitrate_from_csv(fname) -> float:
    hitrate = 0
    row_count = 0
    
    with open(fname, newline='') as csvfile:
        reader = csv.DictReader(csvfile, delimiter=';')
        for row in reader:
            row_count = row_count + 1
            hitrate += float(row["cache-hr"])

    return (hitrate * 100) / row_count


def generate_speedup_table():
    baseline = read_total_time_from_csv(dram_result)
    columns = [ "Configuration", "Speedup", "Cache Hitrate", "Raw Time" ]

    names = [
        "DDR-SDRAM (Baseline)",
        "HBM (Upper Limit)",
        "Prefetching",
        "Prefetching, Distributed Columns"
    ]

    rawtime = [
        read_total_time_from_csv(dram_result),
        read_total_time_from_csv(hbm_result),
        read_total_time_from_csv(prefetch_result),
        read_total_time_from_csv(distprefetch_result),
    ]

    speedup = [
        baseline / rawtime[0],
        baseline / rawtime[1],
        baseline / rawtime[2],
        baseline / rawtime[3]
    ]

    cachehr = [
        0,
        0,
        read_cache_hitrate_from_csv(prefetch_result),
        read_cache_hitrate_from_csv(distprefetch_result)
    ]

    data = [
        [ names[0], f"x{speedup[0]:1.2f}", r" \textemdash ", f"{rawtime[0]:.2f} ms" ],
        [ names[1], f"x{speedup[1]:1.2f}", r" \textemdash ", f"{rawtime[1]:.2f} ms" ],
        [ names[2], f"x{speedup[2]:1.2f}", f"{cachehr[2]:2.2f} \%", f"{rawtime[2]:.2f} ms" ],
        [ names[3], f"x{speedup[3]:1.2f}", f"{cachehr[3]:2.2f} \%", f"{rawtime[3]:.2f} ms" ]
    ]

    return pd.DataFrame(data, columns=columns)


def generate_rawtime_base_table():
    baseline = read_total_time_from_csv(dram_result)
    columns = [ "Configuration", "Raw Time" ]

    names = [
        "DDR-SDRAM (Baseline)",
        "HBM (Upper Limit)"
    ]

    rawtime = [
        read_total_time_from_csv(dram_result),
        read_total_time_from_csv(hbm_result)
    ]

    data = [
        [ names[0], f"{rawtime[0]:.2f} ms" ],
        [ names[1], f"{rawtime[1]:.2f} ms" ]
    ]

    return pd.DataFrame(data, columns=columns)


def tex_table(df, fname):
    with open(os.path.join(output_path, fname), "w") as of:
        of.write(df.to_latex(index=False))


# loops over all possible configuration combinations and calls
# process_file_to_dataset for them in order to build a dataframe
# which is then displayed and saved
def donut_plot(data: tuple[list[float], list[str]], maxtime, fname):
    # pad to maxtime
    data[0].append(maxtime - sum(data[0]))
    data[1].append("NULL")

    # pad to only display semi-circle
    data[0].append(sum(data[0]))
    data[1].append("NULL")

    fig, (ax, lax) = plt.subplots(nrows=2, gridspec_kw={"height_ratios":[4, 1]})

    palette_color = sns.color_palette('mako_r') 
    wedges, texts = ax.pie(data[0], wedgeprops=dict(width=0.5), colors=palette_color)
    wedges[-1].set_visible(False)
    wedges[-2].set_visible(False)
    ax.set_ylim(-0.0, 1.0)

    legend_labels = [f"{data[0][i]:3.2f} ms - {data[1][i]}" for i in range(len(data[0])) if data[1][i] != "NULL"]
    lax.legend(wedges, legend_labels, borderaxespad=0, loc="upper center")
    lax.set_ylim(0.0, 0.25)
    lax.axis("off")

    plt.tight_layout()

    plt.rcParams.update({'font.size': 16})
    fig.savefig(os.path.join(output_path, fname), bbox_inches='tight')


def main():
    timings = [
        read_timings_from_csv(prefetch_result, fn_nice_prefetch),
        read_timings_from_csv(distprefetch_result, fn_nice_prefetch),
        read_timings_from_csv(dram_result, fn_nice_normal),
        read_timings_from_csv(hbm_result, fn_nice_normal)
    ]

    maxtime = max([sum(timings[0][0]), sum(timings[1][0]), sum(timings[2][0]), sum(timings[3][0])])

    donut_plot(timings[0], maxtime, "plot-timing-prefetch.pdf")
    donut_plot(timings[1], maxtime, "plot-timing-distprefetch.pdf")
    donut_plot(timings[2], maxtime, "plot-timing-dram.pdf")
    donut_plot(timings[3], maxtime, "plot-timing-hbm.pdf")
    donut_plot(read_timings_from_csv(prefetch_result, fn_nice_prefetch), maxtime, "plot-timing-prefetch.pdf")

    tex_table(generate_speedup_table(), "table-qdp-speedup.tex")
    tex_table(generate_rawtime_base_table(), "table-qdp-baseline.tex")


if __name__ == "__main__":
    main()