#!/usr/bin/env python # coding: utf-8 # In[ ]: import json import os import boto3 from pprint import pprint from bokeh.io import output_notebook, show from bokeh.plotting import figure import ipywidgets as widgets import math # In[ ]: BUCKET_NAME = 'modin-jenkins-result' # In[ ]: # Connect to s3 s3 = boto3.resource('s3') client = boto3.client('s3') # Get list of folders in Bucket response = client.list_objects( Bucket=BUCKET_NAME, ) # Filter for folders containing perf data for commits merged into master # use commented out function eventually, just use this for now: master_commit_keys = [a['Key'] for a in response['Contents'] if "-perf" in a['Key']] #master_commit_keys = [a['Key'] for a in response['Contents'] if "-perf-COMMIT" in a['Key']] # In[ ]: # Fetch the Perf Data from S3 Bucket perf_data = [] for key in master_commit_keys: response = client.get_object( Bucket=BUCKET_NAME, Key=key ) file_str = response['Body'].read() # Convert the Data to JSON Object before storing file_json = json.loads(file_str) perf_data.append(file_json) # In[ ]: def json_parser(data): """ This function parses the a single performance json file. Args: data: json file corresponding to perf data for one commit Returns: commit_hash: hash of the commit for this file commit_order: order of the commit from earliest to latest test_data: parsed performance data for each test run in the suite """ commit_hash = commit_data["commit_info"]["id"] #commit_order = commit_data["commit_info"]["commit_number"] test_data = {} for test in commit_data["benchmarks"]: name = test["name"][5:] test_data[name] = test["stats"]["mean"] #return commit_hash, commit_order, test_data return commit_hash, test_data # In[ ]: all_commits_data = {} commit_orders = {} i = 0 # Loops through all the Perf Data files and gets the parsed data for commit_data in perf_data: # use commented one later and get rid of the i lines # commit_hash, commit_order, data = json_parser(commit_data) commit_hash, data = json_parser(commit_data) all_commits_data[commit_hash] = data commit_orders[i] = commit_hash i += 1 # In[ ]: def plot_function_perf(data, func_name): commits = [commit_orders[i] for i in range(len(commit_orders))] perf = [data[commit_hash][func_name] for commit_hash in commits] commits = dict(enumerate([a[:7] for a in commits])) commits_n = [i for i in range(len(commits))] search_commit = {val:key for (key, val) in commits.items()} p = figure(plot_width=800, plot_height=400, x_axis_label="commit hash", y_axis_label="seconds", title=func_name + " performance", x_range=(search_commit[s.value],search_commit[e.value])) p.line(commits_n, perf, line_width=2) p.xaxis.ticker = commits_n p.xaxis.major_label_overrides = commits p.xaxis.major_label_orientation = math.pi/2 show(p) # In[ ]: style = {'description_width': 'initial'} d = widgets.Dropdown( options=list((list(all_commits_data.values())[0]).keys()), value='fillna', description='Test', disabled=False, style=style ) s = widgets.Text( disabled=False, style=style ) e = widgets.Text( disabled=False, style=style ) commits = [commit_orders[i] for i in range(len(commit_orders))] commits = dict(enumerate([a[:7] for a in commits])) start_hash = commits[0] end_hash = commits[len(commits) - 1] s.description="Start Hash (Default = " + start_hash + ")" e.description="End Hash (Default = " + end_hash + ")" s.value = start_hash e.value = end_hash output_notebook() # In[ ]: display(d) display(s) display(e) plot_function_perf(all_commits_data, d.value) # In[ ]: #Please Ignore Below This Cell # In[ ]: json_dir = os.getcwd() + "/modin/.benchmarks/Darwin-CPython-3.6-64bit/" master_hash = "ae9f397109620cf00243169654f2f4bec7809b72" data = [] commit_order = {} master_data = [] for filename in os.listdir(json_dir): if filename.endswith(".json"): order = int(filename.split("_")[0]) commit = filename.split("_")[1].split("_")[0] if commit == master_hash: with open(json_dir + filename) as f: master_data.append(json.load(f)) else: commit_order[order] = commit with open(json_dir + filename) as f: data.append(json.load(f)) #commit_order = OrderedDict(sorted(commit_order.items(), key=lambda t: t)) commit_order # In[ ]: def json_parser(data): parsed_data = {} for commit_data in data: commit_hash = commit_data["commit_info"]["id"] test_data = {} for test in commit_data["benchmarks"]: name = test["name"][5:] test_data[name] = test["stats"]["mean"] parsed_data[commit_hash] = test_data return parsed_data pprint(json_parser(master_data)) # In[ ]: def plot_function_perf(other_data, func_name): commits = [commit_order[i] for i in range(1,len(commit_order)+1)] perf = [other_data[commit_hash][func_name] for commit_hash in commits] commits = [a[:7] for a in commits] commits = commits[::-1] perf = perf[::-1] plt.plot(commits, perf) plt.ylabel('seconds') plt.xlabel('commit hash') plt.title(func_name + ' performance') plt.show() def plot_against_master(other_data, master_data, hash_to_compare, func_name): #commits = [a for a in commit_order] #perf = [other_data[commit_hash][func_name] for commit_hash in commits] #commits = [a[:7] for a in commits] #commits = commits[::-1] #perf = perf[::-1] commits = [hash_to_compare[:7], "master"] compare_perf = other_data[hash_to_compare][func_name] master_perf = master_data[master_hash][func_name] perf = [compare_perf, master_perf] #x = ["master"] #x.append(commits[0]) #y = [master_data[master_hash][func_name]] #y.append(perf[0]) print(commits) print(perf) plt.plot(commits, perf) plt.ylabel('seconds') plt.xlabel('commit hash') plt.title(func_name + ' performance') plt.show() # In[ ]: #RUN other = json_parser(data) plot_function_perf(other, 'read_csv') # In[ ]: hash_to_compare = 'a368735324669914efcd9020ac3c8ffffab9b641' master = json_parser(master_data) plot_against_master(other, master, hash_to_compare, 'read_csv')