%matplotlib inline
import numpy as np
import pylab as py
import pandas as pd
import matplotlib.pyplot as plt
import IPython
from thclient import PerfherderClient
from sklearn import mixture
from scipy.stats import norm
IPython.core.pylabtools.figsize(32, 14)
/Users/vitillo/.pyenv/versions/anaconda3-2.4.0/lib/python3.5/site-packages/sklearn/utils/fixes.py:64: DeprecationWarning: inspect.getargspec() is deprecated, use inspect.signature() instead if 'order' in inspect.getargspec(np.copy)[0]:
def model(values, n_components=2, threshold_ratio=1.3):
gmms = [mixture.GMM(n_components=i).fit(values) for i in range(1, n_components + 1)]
bics = [gmm.bic(values) for gmm in gmms]
# Favor simpler models to more complicated ones
gmm = gmms[-1]
for i in range(0, len(bics) - 1):
if bics[i]/bics[i + 1] < threshold_ratio:
gmm = gmms[i]
break
return gmm
def ztest(mean_1, var_1, n_1, mean_2, var_2, n_2):
diff = abs(mean_1 - mean_2)
stderr = np.sqrt(var_1/n_1 + var_2/n_2)
z = diff/stderr
return (1 - norm.cdf(z))
def sort_by(x, y):
return [x for (y, x) in sorted(zip(y, x))]
def sorted_params(model):
mean = sorted(model.means_)
variance = sort_by(model.covars_, model.means_)
weight = sort_by(model.weights_, model.means_)
return (mean, variance, weight)
def compare_windows(a, b, pvalue=1e-8):
model_a = model(a.reshape((len(a), 1)))
model_b = model(b.reshape((len(b), 1)))
if model_a.n_components != model_b.n_components:
return # TODO: Fallback to a simpler model for both windows
mean_a, var_a, weight_a = sorted_params(model_a)
mean_b, var_b, weight_b = sorted_params(model_b)
for i in range(model_a.n_components):
p = ztest(mean_a[i], var_a[i], weight_a[i]*len(a),
mean_b[i], var_b[i], weight_b[i]*len(b))
if p < pvalue:
return "ALERT"
def find_regressions(repository, signature, interval=8*604800, window_size=60):
client = PerfherderClient()
s = client.get_performance_data(repository, signatures=signature, interval=interval)
values = pd.Series(s[signature]["value"]) # TODO: Standardize values?
colors = ["white"]*len(values)
for i in range(len(values) - 2*window_size + 1):
a = values[i:i + window_size]
b = values[i + window_size: i + 2*window_size]
if compare_windows(a, b) == "ALERT":
colors[i + window_size] = "red"
plt.scatter(range(len(values)), values, c=colors)
find_regressions("mozilla-inbound", "5c6e37c52e0bd0abf37c52adf565068fad37bc03")
/Users/vitillo/.pyenv/versions/anaconda3-2.4.0/lib/python3.5/site-packages/matplotlib/collections.py:590: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison if self._edgecolors == str('face'):
find_regressions("mozilla-inbound", "ff4ce004871790022f413b1abb21632e7c788f65", interval=12*604800)
/Users/vitillo/.pyenv/versions/anaconda3-2.4.0/lib/python3.5/site-packages/matplotlib/collections.py:590: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison if self._edgecolors == str('face'):
find_regressions("mozilla-inbound", "93bb72a460cf2b6ce7f9d7912f0a2ab49c5cf9d8")
/Users/vitillo/.pyenv/versions/anaconda3-2.4.0/lib/python3.5/site-packages/matplotlib/collections.py:590: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison if self._edgecolors == str('face'):
find_regressions("mozilla-inbound", "dbaade2dc703221d3287f7b3611302b4d34e8e7f", interval=12*604800)
/Users/vitillo/.pyenv/versions/anaconda3-2.4.0/lib/python3.5/site-packages/matplotlib/collections.py:590: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison if self._edgecolors == str('face'):
find_regressions("mozilla-inbound", "f6fb7dcb89c26e9c7e18722fa9846f60583d2ef2", interval=12*604800)
/Users/vitillo/.pyenv/versions/anaconda3-2.4.0/lib/python3.5/site-packages/matplotlib/collections.py:590: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison if self._edgecolors == str('face'):