import ujson as json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import plotly.plotly as py
import IPython
import functools
from __future__ import division
from moztelemetry.spark import get_pings, get_one_ping_per_client, get_pings_properties
%pylab inline
IPython.core.pylabtools.figsize(16, 7)
Unable to parse whitelist (/home/hadoop/anaconda2/lib/python2.7/site-packages/moztelemetry/bucket-whitelist.json). Assuming all histograms are acceptable. Populating the interactive namespace from numpy and matplotlib
pings = get_pings(sc, channel="nightly", submission_date="20160127", build_id=("20160127000000", "20160127999999"))
def gecko_activity(ping):
threads = ping["payload"].get("threadHangStats", {})
result = {}
for thread in threads:
if thread["name"] == "Gecko":
activity = thread["activity"]["values"]
result["gecko_activity"] = pd.Series(activity.values(), index=map(int, activity.keys())).sort_index()
result["gecko_hangs"] = pd.Series()
for hang in thread["hangs"]:
histogram = hang["histogram"]["values"]
hang_histogram = pd.Series(histogram.values(), index=map(int, histogram.keys()))
result["gecko_hangs"] = result["gecko_hangs"].add(hang_histogram, fill_value=0)
result["gecko_hangs"] = result["gecko_hangs"].sort_index()
break
return result
bhr_summary = pings.map(gecko_activity)
sample = bhr_summary.take(5)
Let's have a look at few individual submissions:
for idx, x in enumerate(sample):
print "Sample {} activity:".format(idx)
print x["gecko_activity"]
print "Sample {} hangs:".format(idx)
print x["gecko_hangs"]
print
Sample 0 activity: 0 0 1 825 3 2 7 1 15 3 31 13 63 58 127 10 255 2 511 3 1023 0 dtype: int64 Sample 0 hangs: 127 0 255 2 511 3 1023 0 dtype: float64 Sample 1 activity: 0 0 1 8114 3 207 7 182 15 199 31 116 63 38 127 21 255 6 511 2 1023 4 2047 0 dtype: int64 Sample 1 hangs: 127 0 255 6 511 2 1023 4 2047 0 dtype: float64 Sample 2 activity: 0 0 1 7390 3 207 7 181 15 199 31 114 63 35 127 21 255 6 511 2 1023 3 2047 0 dtype: int64 Sample 2 hangs: 127 0 255 6 511 2 1023 3 2047 0 dtype: float64 Sample 3 activity: 0 0 1 422 3 5 7 3 15 5 31 11 63 9 127 6 511 1 1023 1 2047 0 dtype: int64 Sample 3 hangs: 255 0 511 1 1023 1 2047 0 dtype: float64 Sample 4 activity: 0 0 1 5244 3 22 7 34 15 92 31 246 63 152 127 75 255 36 511 11 1023 10 2047 5 8191 1 16383 0 dtype: int64 Sample 4 hangs: 127 0 255 34 511 13 1023 10 2047 5 4095 0 8191 1 16383 1 32767 0 dtype: float64
How many submissions have mismatching BHR reports?
def compare(ping):
activity = ping["gecko_activity"]
hangs = ping["gecko_hangs"]
index = sorted(set(hangs.index).union(set(activity.index)))
activity = pd.Series(activity[activity.index], index=index).fillna(0)
hangs = pd.Series(hangs[hangs.index], index=index)
return np.all(activity[activity.index >= 255] == hangs[hangs.index >= 255])
total = bhr_summary.count()
equal = bhr_summary.filter(compare).count()
print equal/total
0.527938517179
Let's have a look at few individual submissions with mismatching BHR reports:
sample = bhr_summary.filter(lambda x: not compare(x)).take(5)
for idx, x in enumerate(sample):
print "Sample {} activity:".format(idx)
print x["gecko_activity"]
print "Sample {} hangs:".format(idx)
print x["gecko_hangs"]
print
Sample 0 activity: 0 0 1 5244 3 22 7 34 15 92 31 246 63 152 127 75 255 36 511 11 1023 10 2047 5 8191 1 16383 0 dtype: int64 Sample 0 hangs: 127 0 255 34 511 13 1023 10 2047 5 4095 0 8191 1 16383 1 32767 0 dtype: float64 Sample 1 activity: 0 0 1 68162 3 2223 7 609 15 667 31 1620 63 3040 127 364 255 232 511 85 1023 70 2047 5 4095 4 8191 2 16383 0 dtype: int64 Sample 1 hangs: 127 0 255 229 511 87 1023 71 2047 5 4095 4 8191 2 16383 0 dtype: float64 Sample 2 activity: 0 0 1 66004 3 2201 7 585 15 643 31 1595 63 3009 127 358 255 227 511 85 1023 69 2047 5 4095 3 8191 2 16383 0 dtype: int64 Sample 2 hangs: 127 0 255 224 511 87 1023 70 2047 5 4095 3 8191 2 16383 0 dtype: float64 Sample 3 activity: 0 0 1 33775 3 945 7 691 15 1026 31 879 63 398 127 106 255 46 511 24 1023 8 2047 0 dtype: int64 Sample 3 hangs: 127 0 255 46 511 21 1023 9 2047 1 4095 0 dtype: float64 Sample 4 activity: 0 0 1 68201 3 1648 7 1456 15 1707 31 2454 63 1016 127 232 255 83 511 46 1023 15 2047 1 4095 0 dtype: int64 Sample 4 hangs: 127 0 255 79 511 46 1023 16 2047 2 4095 0 dtype: float64