Notebook

In [ ]:

import json
import os
import boto3
from pprint import pprint
from bokeh.io import output_notebook, show
from bokeh.plotting import figure
import ipywidgets as widgets
import math

In [ ]:

BUCKET_NAME = 'modin-jenkins-result'

In [ ]:

# Connect to s3
s3 = boto3.resource('s3')
client = boto3.client('s3')

# Get list of folders in Bucket
response = client.list_objects(
    Bucket=BUCKET_NAME,
)

# Filter for folders containing perf data for commits merged into master
# use commented out function eventually, just use this for now:
master_commit_keys = [a['Key'] for a in response['Contents'] if "-perf" in a['Key']]
#master_commit_keys = [a['Key'] for a in response['Contents'] if "-perf-COMMIT" in a['Key']]

In [ ]:

# Fetch the Perf Data from S3 Bucket
perf_data = []
for key in master_commit_keys:
    response = client.get_object(
        Bucket=BUCKET_NAME,
        Key=key
    )
    file_str = response['Body'].read()
    # Convert the Data to JSON Object before storing
    file_json = json.loads(file_str)
    perf_data.append(file_json)

In [ ]:

def json_parser(data):
    """
    This function parses the a single performance json file.
    Args:
        data: json file corresponding to perf data for one commit

    Returns:
        commit_hash: hash of the commit for this file
        commit_order: order of the commit from earliest to latest
        test_data: parsed performance data for each test run in the suite
    """
    commit_hash = commit_data["commit_info"]["id"]
    #commit_order = commit_data["commit_info"]["commit_number"]
    test_data = {}
    for test in commit_data["benchmarks"]:
        name = test["name"][5:]
        test_data[name] = test["stats"]["mean"]
    #return commit_hash, commit_order, test_data
    return commit_hash, test_data

In [ ]:

all_commits_data = {}
commit_orders = {}
i = 0

# Loops through all the Perf Data files and gets the parsed data
for commit_data in perf_data:
    # use commented one later and get rid of the i lines
    # commit_hash, commit_order, data = json_parser(commit_data)
    commit_hash, data = json_parser(commit_data)
    all_commits_data[commit_hash] = data
    commit_orders[i] = commit_hash
    i += 1

In [ ]:

def plot_function_perf(data, func_name):
    commits = [commit_orders[i] for i in range(len(commit_orders))]
    perf = [data[commit_hash][func_name] for commit_hash in commits]
    commits = dict(enumerate([a[:7] for a in commits]))
    commits_n = [i for i in range(len(commits))]
    search_commit = {val:key for (key, val) in commits.items()}
    p = figure(plot_width=800, plot_height=400, x_axis_label="commit hash", 
               y_axis_label="seconds", title=func_name + " performance", 
               x_range=(search_commit[s.value],search_commit[e.value]))
    p.line(commits_n, perf, line_width=2)
    p.xaxis.ticker = commits_n
    p.xaxis.major_label_overrides = commits
    p.xaxis.major_label_orientation = math.pi/2
    show(p)

In [ ]:

style = {'description_width': 'initial'}
d = widgets.Dropdown(
    options=list((list(all_commits_data.values())[0]).keys()),
    value='fillna',
    description='Test',
    disabled=False,
    style=style
)
s = widgets.Text(
    disabled=False,
    style=style
)
e = widgets.Text(
    disabled=False,
    style=style
)
commits = [commit_orders[i] for i in range(len(commit_orders))]
commits = dict(enumerate([a[:7] for a in commits]))
start_hash = commits[0]
end_hash = commits[len(commits) - 1]
s.description="Start Hash (Default = " + start_hash + ")"
e.description="End Hash (Default = " + end_hash + ")"
s.value = start_hash
e.value = end_hash
output_notebook()

In [ ]:

display(d)
display(s)
display(e)
plot_function_perf(all_commits_data, d.value)

In [ ]:

#Please Ignore Below This Cell

In [ ]:

json_dir = os.getcwd() + "/modin/.benchmarks/Darwin-CPython-3.6-64bit/"
master_hash = "ae9f397109620cf00243169654f2f4bec7809b72"

data = []
commit_order = {}
master_data = []
for filename in os.listdir(json_dir):
    if filename.endswith(".json"):
        order = int(filename.split("_")[0])
        commit = filename.split("_")[1].split("_")[0]
        if commit == master_hash:
            with open(json_dir + filename) as f:
                master_data.append(json.load(f))
        else:
            commit_order[order] = commit
            with open(json_dir + filename) as f:
                data.append(json.load(f))
            
#commit_order = OrderedDict(sorted(commit_order.items(), key=lambda t: t))
commit_order

In [ ]:

def json_parser(data):
    parsed_data = {}
    for commit_data in data:
        commit_hash = commit_data["commit_info"]["id"]
        test_data = {}
        for test in commit_data["benchmarks"]:
            name = test["name"][5:]
            test_data[name] = test["stats"]["mean"]
        parsed_data[commit_hash] = test_data
    return parsed_data
        

pprint(json_parser(master_data))
        
    

In [ ]:

def plot_function_perf(other_data, func_name):
    commits = [commit_order[i] for i in range(1,len(commit_order)+1)]
    perf = [other_data[commit_hash][func_name] for commit_hash in commits]
    commits = [a[:7] for a in commits]
    commits = commits[::-1]
    perf = perf[::-1]
    plt.plot(commits, perf)
    plt.ylabel('seconds')
    plt.xlabel('commit hash')
    plt.title(func_name + ' performance')
    plt.show()

def plot_against_master(other_data, master_data, hash_to_compare, func_name):
    #commits = [a for a in commit_order]
    #perf = [other_data[commit_hash][func_name] for commit_hash in commits]
    #commits = [a[:7] for a in commits]
    #commits = commits[::-1]
    #perf = perf[::-1]
    commits = [hash_to_compare[:7], "master"]
    compare_perf = other_data[hash_to_compare][func_name]
    master_perf = master_data[master_hash][func_name]
    perf = [compare_perf, master_perf]
    #x = ["master"]
    #x.append(commits[0])
    #y = [master_data[master_hash][func_name]]
    #y.append(perf[0])
    print(commits)
    print(perf)
    plt.plot(commits, perf)
    plt.ylabel('seconds')
    plt.xlabel('commit hash')
    plt.title(func_name + ' performance')
    plt.show()

In [ ]:

#RUN
other = json_parser(data)
plot_function_perf(other, 'read_csv')

In [ ]:

hash_to_compare = 'a368735324669914efcd9020ac3c8ffffab9b641'
master = json_parser(master_data)
plot_against_master(other, master, hash_to_compare, 'read_csv')