import numpy as np
import pandas as pd
import plotly.figure_factory as ff
import plotly.express as px
import requests
data = []
def parse_file(path):
if path.startswith("http"):
content = requests.get(path).content.decode()
else:
content = open(path).read()
for line in content.splitlines():
if line.strip() == "CLEAR":
# ran the loop twice with "CLEAR" in between
# so delete the first set of cold measurements
data[:] = []
if line.count(",") != 6:
continue
resource, task, branch, basket, start, stop, nevents = line.split(",")
data.append(dict(
Resource=resource,
Task=task.lower(),
Branch=branch.split()[1],
Basket = int(basket.split()[1]),
Start = float(start.split()[1]),
Finish = float(stop.split()[1]),
Nevents = int(nevents.split()[1]),
))
df = pd.DataFrame(data)
df["Start"] = pd.to_datetime(df["Start"], unit="s")
df["Finish"] = pd.to_datetime(df["Finish"], unit="s")
return df
# outreach file uses gzip and has only Muon branches (~6)
# nanoaod file uses LZMA and has all branches (~1.5k)
df_outreach_4t = parse_file("http://uaf-7.t2.ucsd.edu/~namin/dump/threading/outreach_timing_4threads.txt")
df_nanoaod_4t = parse_file("http://uaf-7.t2.ucsd.edu/~namin/dump/threading/nanoaod_timing_4threads.txt")
df_outreach_1t = parse_file("http://uaf-7.t2.ucsd.edu/~namin/dump/threading/outreach_timing_1threads.txt")
df_nanoaod_1t = parse_file("http://uaf-7.t2.ucsd.edu/~namin/dump/threading/nanoaod_timing_1threads.txt")
def plot(df):
duration = (df["Finish"].max()-df["Start"].min()).total_seconds()
return ff.create_gantt(df, index_col='Resource', show_colorbar=True, group_tasks=True,title=f"total walltime = {duration:.3f}s").show()
plot(df_outreach_4t)
plot(df_nanoaod_4t)
# plot(df_outreach_1t)
# plot(df_nanoaod_1t)
# OUTREACH file
# many short baskets at the beginning of the file (which get assigned to thread 1)
# however, the number of events processed by each thread is roughly the same
gb = df_outreach_4t.query("Resource == 'interpretation'").drop_duplicates("Basket").groupby("Task")
print("Baskets per thread")
print(gb.size())
print("Nevents per thread")
gb["Nevents"].sum()
Baskets per thread Task thread 1 466 thread 2 19 thread 3 19 thread 4 19 dtype: int64 Nevents per thread
Task thread 1 14790510 thread 2 15612205 thread 3 15612205 thread 4 15525493 Name: Nevents, dtype: int64
# NANOAOD file
gb = df_nanoaod_4t.query("Resource == 'interpretation'").drop_duplicates("Basket").groupby("Task")
print("Baskets per thread")
print(gb.size())
print("Nevents per thread")
gb["Nevents"].sum()
Baskets per thread Task thread 1 48 thread 2 50 thread 3 49 thread 4 49 dtype: int64 Nevents per thread
Task thread 1 193704 thread 2 201775 thread 3 201775 thread 4 199412 Name: Nevents, dtype: int64