In [1]:
%matplotlib inline

import numpy as np
import pylab as py
import pandas as pd
import matplotlib.pyplot as plt
import IPython

from thclient import PerfherderClient
from sklearn import mixture
from scipy.stats import norm

IPython.core.pylabtools.figsize(32, 14)
/Users/vitillo/.pyenv/versions/anaconda3-2.4.0/lib/python3.5/site-packages/sklearn/utils/fixes.py:64: DeprecationWarning: inspect.getargspec() is deprecated, use inspect.signature() instead
  if 'order' in inspect.getargspec(np.copy)[0]:
In [2]:
def model(values, n_components=2, threshold_ratio=1.3):
    gmms = [mixture.GMM(n_components=i).fit(values) for i in range(1, n_components + 1)]
    bics = [gmm.bic(values) for gmm in gmms]
    
    # Favor simpler models to more complicated ones
    gmm = gmms[-1]
    for i in range(0, len(bics) - 1):
        if bics[i]/bics[i + 1] < threshold_ratio:
            gmm = gmms[i]
            break

    return gmm


def ztest(mean_1, var_1, n_1, mean_2, var_2, n_2):
    diff = abs(mean_1 - mean_2)
    stderr = np.sqrt(var_1/n_1 + var_2/n_2)
    z = diff/stderr
    return (1 - norm.cdf(z))


def sort_by(x, y):
    return [x for (y, x) in sorted(zip(y, x))]


def sorted_params(model):
    mean = sorted(model.means_)
    variance = sort_by(model.covars_, model.means_)
    weight = sort_by(model.weights_, model.means_)
    return (mean, variance, weight)


def compare_windows(a, b, pvalue=1e-8):
    model_a = model(a.reshape((len(a), 1)))
    model_b = model(b.reshape((len(b), 1)))
    
    if model_a.n_components != model_b.n_components:
        return  # TODO: Fallback to a simpler model for both windows
    
    mean_a, var_a, weight_a = sorted_params(model_a)
    mean_b, var_b, weight_b = sorted_params(model_b)
    
    for i in range(model_a.n_components):
        p = ztest(mean_a[i], var_a[i], weight_a[i]*len(a),
                  mean_b[i], var_b[i], weight_b[i]*len(b))
        
        if p < pvalue:
            return "ALERT"
        
        
def find_regressions(repository, signature, interval=8*604800, window_size=60):
    client = PerfherderClient()
    s = client.get_performance_data(repository, signatures=signature, interval=interval)
    values = pd.Series(s[signature]["value"])  # TODO: Standardize values?
    
    colors = ["white"]*len(values)
    for i in range(len(values) - 2*window_size + 1):
        a = values[i:i + window_size]
        b = values[i + window_size: i + 2*window_size]
        
        if compare_windows(a, b) == "ALERT":
            colors[i + window_size] = "red"
    
    plt.scatter(range(len(values)), values, c=colors)
In [3]:
find_regressions("mozilla-inbound", "5c6e37c52e0bd0abf37c52adf565068fad37bc03")
/Users/vitillo/.pyenv/versions/anaconda3-2.4.0/lib/python3.5/site-packages/matplotlib/collections.py:590: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
  if self._edgecolors == str('face'):
In [4]:
find_regressions("mozilla-inbound", "ff4ce004871790022f413b1abb21632e7c788f65", interval=12*604800)
/Users/vitillo/.pyenv/versions/anaconda3-2.4.0/lib/python3.5/site-packages/matplotlib/collections.py:590: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
  if self._edgecolors == str('face'):
In [5]:
find_regressions("mozilla-inbound", "93bb72a460cf2b6ce7f9d7912f0a2ab49c5cf9d8")
/Users/vitillo/.pyenv/versions/anaconda3-2.4.0/lib/python3.5/site-packages/matplotlib/collections.py:590: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
  if self._edgecolors == str('face'):
In [6]:
find_regressions("mozilla-inbound", "dbaade2dc703221d3287f7b3611302b4d34e8e7f", interval=12*604800)
/Users/vitillo/.pyenv/versions/anaconda3-2.4.0/lib/python3.5/site-packages/matplotlib/collections.py:590: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
  if self._edgecolors == str('face'):