#!/usr/bin/env python
# coding: utf-8

# In[1]:


import numpy as np
import pandas as pd
import plotly.figure_factory as ff
import plotly.express as px
import requests


# In[2]:


data = []
def parse_file(path):
    if path.startswith("http"):
        content = requests.get(path).content.decode()
    else:
        content = open(path).read()
    for line in content.splitlines():
        if line.strip() == "CLEAR": 
            # ran the loop twice with "CLEAR" in between
            # so delete the first set of cold measurements
            data[:] = []
        if line.count(",") != 6:
            continue
        resource, task, branch, basket, start, stop, nevents = line.split(",")
        data.append(dict(
            Resource=resource,
            Task=task.lower(),
            Branch=branch.split()[1],
            Basket = int(basket.split()[1]),
            Start = float(start.split()[1]),
            Finish = float(stop.split()[1]),
            Nevents = int(nevents.split()[1]),
        ))
    df = pd.DataFrame(data)
    df["Start"] = pd.to_datetime(df["Start"], unit="s")
    df["Finish"] = pd.to_datetime(df["Finish"], unit="s")
    return df

# outreach file uses gzip and has only Muon branches (~6)
# nanoaod file uses LZMA and has all branches (~1.5k)
df_outreach_4t = parse_file("http://uaf-7.t2.ucsd.edu/~namin/dump/threading/outreach_timing_4threads.txt")
df_nanoaod_4t = parse_file("http://uaf-7.t2.ucsd.edu/~namin/dump/threading/nanoaod_timing_4threads.txt")
df_outreach_1t = parse_file("http://uaf-7.t2.ucsd.edu/~namin/dump/threading/outreach_timing_1threads.txt")
df_nanoaod_1t = parse_file("http://uaf-7.t2.ucsd.edu/~namin/dump/threading/nanoaod_timing_1threads.txt")


# In[3]:


def plot(df):
    duration = (df["Finish"].max()-df["Start"].min()).total_seconds()
    return ff.create_gantt(df, index_col='Resource', show_colorbar=True, group_tasks=True,title=f"total walltime = {duration:.3f}s").show()


# In[4]:


plot(df_outreach_4t)


# In[5]:


plot(df_nanoaod_4t)


# In[6]:


# plot(df_outreach_1t)


# In[7]:


# plot(df_nanoaod_1t)


# In[8]:


# OUTREACH file
# many short baskets at the beginning of the file (which get assigned to thread 1)
# however, the number of events processed by each thread is roughly the same
gb = df_outreach_4t.query("Resource == 'interpretation'").drop_duplicates("Basket").groupby("Task")
print("Baskets per thread")
print(gb.size())
print("Nevents per thread")
gb["Nevents"].sum()


# In[9]:


# NANOAOD file
gb = df_nanoaod_4t.query("Resource == 'interpretation'").drop_duplicates("Basket").groupby("Task")
print("Baskets per thread")
print(gb.size())
print("Nevents per thread")
gb["Nevents"].sum()


# In[ ]: