from moztelemetry import get_pings
import datetime
import time
import re
channel = None # None, "nightly", etc
versions = re.compile("6[8]\..*") # ".*" for all
fraction = 0.1
ndays = 90 # number of days in the past
earliest = time.time() - ndays * 24 * 60 * 60
latest = time.time() # - 1 * 24 * 60 * 60 # one day back
# for efficiency, we only ask for ping submitted within these dates.
earliest_ping = (datetime.date.fromtimestamp(earliest) - datetime.timedelta(days=2)).strftime("%Y%m%d")
latest_ping = datetime.date.fromtimestamp(latest).strftime("%Y%m%d")
valueKeys = ["error", "from", "code"]
reOSError = re.compile("(Win|Unix) error (\\d+) .*", re.DOTALL)
reInvalidProperty = re.compile("(Invalid value for property '.*'): .*$")
def getPlatform(ping):
application = ping["application"]
if "name" not in application or application.get("name") == "Fennec":
return "android" # ??
return "iOS" if application.get("architecture") == "arm" else "desktop"
def normalizeFailure(failure):
name = failure["name"]
value = "<none>"
for maybe in valueKeys:
if maybe in failure:
value = unicode(failure[maybe]).strip()
break
if value.endswith("is not a valid URL."): # has the URL (since fixed for newer pings)
value = "<URL> is not a valid URL."
elif value.startswith("Error: Payload too big"): # has number of bytes, which varies
value = "Error: Payload too big"
elif value.startswith("Error: X-Last-Modified changed in the middle of a download batch"): # has a timestamp, which varies
value = "Error: X-Last-Modified changed in the middle of a download batch"
elif value.startswith("Error: Invalid value for property"):
value = ":".join(value.split(":")[:2])
elif value.startswith("TypeError: date:") and "cannot be a future date" in value:
value = "TypeError: date: <some date> cannot be a future date"
elif value.startswith("Error: no salt available for"):
value = "Error: no salt available for <extension-id> - how did this happen?"
elif value.startswith("Error: Duplicate entry for") and value.endswith("in changeset"):
value = "Error: Duplicate entry for <guid> in changeset"
elif value.startswith("Error: you cannot set the clients.lastRecordUpload pref to the number"):
value = "Error: you cannot set the clients.lastRecordUpload pref to the number <some large value> as number pref values must be in the signed 32-bit integer range"
else:
# this kinda sucks as we lose the filename being operated on, but in many cases it doesn't actually
# matter (eg, "disk is full" error isn't really related to a specific file)
value = reOSError.sub("\\1 Error \\2", value)
value = reInvalidProperty.sub("\\1", value)
return name + "/" + value
def mapFailures(ping):
try:
syncs = ping["payload"]["syncs"]
if getPlatform(ping) != "desktop" or versions.match(ping["application"]["version"]) is None:
return []
except KeyError:
return []
result = []
version = ping["application"]["version"].split(".")[0]
for sync in syncs:
try:
day = datetime.date.fromtimestamp(sync["when"] / 1000.0)
when = sync["when"] / 1000
except (TypeError, ValueError, KeyError):
continue # ignore bad timestamps
if when < earliest or when > latest:
continue
for engine in sync.get("engines", []):
if type(engine) != dict: # seeing a float here too in one ping!
continue
if "failureReason" in engine:
failure = normalizeFailure(engine["failureReason"])
elif "status" in engine:
failure = "status/%s" % engine.get("status")
else:
failure = None
# hrm - seeing records with no name, and apparently non-strings (a float!?) in others
if type(engine.get("name")) == unicode:
result.append(((day, version), { engine["name"]: { failure: 1 } }))
return result
sc = SparkContext.getOrCreate()
s = get_pings(sc, doc_type='sync', submission_date=(earliest_ping, latest_ping), channel=channel, fraction=fraction).flatMap(mapFailures)
Call to deprecated function get_pings. WARNING: THIS IS NOT A REPRESENTATIVE SAMPLE. This 'sampling' is based on s3 files and is highly susceptible to skew. Use only for quicker performance while prototyping. fetching 99485.05580MB in 1061452 files...
def reduceCounts(a, b):
result = a.copy()
for name, value in b.iteritems():
if type(value) in (int, long):
result[name] = a.get(name, 0) + value
else:
result[name] = reduceCounts(result.get(name, {}), value)
return result
summaries = s.reduceByKey(reduceCounts)
# regular list, sorted by date.
ssummaries = sorted(summaries.collect(), key=lambda x: x[0])
# This really is quite a mess - but it's quite complicated what we are trying to do...
# Get the number of errors per engine by version, simply so we can guess at what the
# "top" errors are for that engine/version.
successByEngineByVersion = {}
errorCountsByEngineByVersion = {}
for ((date, version), engineSummaries) in ssummaries:
for name, s in engineSummaries.iteritems():
engineEntry = successByEngineByVersion.setdefault(name, {})
verEntry = engineEntry.setdefault(version, {})
this = verEntry[date] = {"total": s.get(None, 0), "failures": 0}
for error, count in s.iteritems():
this["total"] += count
if error is not None:
this["failures"] += count
allEntry = engineEntry.setdefault("All Versions", {})
thisAll = allEntry.setdefault(date, {})
thisAll["total"] = thisAll.get("total", 0) + this["total"]
thisAll["failures"] = thisAll.get("failures", 0) + this["failures"]
engineEntry = errorCountsByEngineByVersion.setdefault(name, {})
verEntry = engineEntry.setdefault(version, {})
for error, count in s.iteritems():
if error is not None:
verEntry[error] = verEntry.get(error, 0) + count
# The data structures we use to graph.
def buildErrorCountsByVersionByDate():
result = {}
for ((date, version), engineSummaries) in ssummaries:
for name, s in engineSummaries.iteritems():
engineEntry = result.setdefault(name, {})
verEntry = engineEntry.setdefault(version, {})
for error, count in s.iteritems():
if error is not None:
verEntry[date] = verEntry.get(date, 0) + count
return result
# Build a map of total error counts per day, so we can work out the perc of each.
errorCountsByEngineByVersionByDate = buildErrorCountsByVersionByDate()
failuresByEngineByVersionByError = {}
for ((date, version), engineSummaries) in ssummaries:
#date = date2num(date)
for name, s in engineSummaries.iteritems():
engineEntry = failuresByEngineByVersionByError.setdefault(name, {})
verEntry = engineEntry.setdefault(version, {})
# total error count for this engine/version/date.
total = errorCountsByEngineByVersionByDate.get(name, {}).get(version, {}).get(date, 0)
# Take the top-n errors for this engine in this version across all dates,
# and put the rest in "other"
countsDict = errorCountsByEngineByVersion[name][version]
topCounts = set(map(lambda d: d[0], sorted(countsDict.items(), key=lambda d: -d[1])[:10]))
ourDict = {"other": 0}
for error, count in s.iteritems():
if error is None:
ourDict[None] = count
else:
if error in topCounts:
ourDict[error] = count
else:
ourDict["other"] += count
for error, count in sorted(ourDict.iteritems(), key=lambda d: -d[1]):
if error is not None:
perc = (100.0 * count / total) if total else 0
errorEntries = verEntry.setdefault(error, [])
errorEntries.append((date, perc))
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from matplotlib.dates import date2num
from pylab import rcParams
import itertools
%config InlineBackend.figure_format = 'svg'
rcParams['figure.figsize'] = 12, 10
rcParams.update({'font.size': 6})
# magic to support many items, each with different
# styles, but same "key" is always the same style.
class StyleGetter:
colors=('k','y','m','c','b','g','r')
linestyles=('-','--','-.',':')
def __init__(self):
self.styles = itertools.cycle([color+linestyle for linestyle in self.linestyles for color in self.colors])
self.markers = itertools.cycle(('+', 'o', '*', 'v', '^', '<', '>', 'x', 'D'))
self.map = {}
def get(self, key):
try:
return self.map[key]
except KeyError:
val = {"style": self.styles.next(), "marker": self.markers.next()}
self.map[key] = val
return val
styleGetter = StyleGetter();
for engineName in sorted(successByEngineByVersion.keys()):
print engineName
colorCycle = itertools.cycle(('k','y','m','c','r','g','b'))
fig, ax = plt.subplots()
# We use a second axis for total syncs for the version.
ax2 = ax.twinx()
byVersion = successByEngineByVersion[engineName]
totals = {}
for version in sorted(byVersion.keys()):
verTotals = {}
ticks = []
vals = []
byDate = byVersion[version]
for date in sorted(byDate.keys()):
ticks.append(date2num(date))
this = byDate[date]
nsuccess = this["total"] - this.get("failures", 0)
perc = (100.0 * nsuccess / this["total"]) if this["total"] else 0
vals.append(perc)
verTotals[date] = byDate[date]["total"]
totals[date] = totals.get(date, 0) + byDate[date]["total"]
color = colorCycle.next()
ax.plot_date(ticks,
vals,
color=color,
ls='-',
marker=None,
label=engineName + " - " + version + " - Percentage of Successful Syncs")
ax.legend(loc='lower left')
ax.get_yaxis().set_major_formatter(ticker.FuncFormatter(lambda x, p: format(x, ".3f")))
ax.set_ylim([99.5, 100])
# and plot the total syncs for this version.
verTotals = sorted(verTotals.items())
ax2.plot_date(map(lambda a: date2num(a[0]), verTotals), # ticks
map(lambda a: a[1], verTotals), # vals,
ls="--",
color=color,
fillstyle="full",
marker=None,
label="Total Syncs " + version + " (millions)")
ax2.set_ylabel('Total Syncs')
#ax2.tick_params('y', colors='r')
ax2.legend(loc='lower right')
ax2.get_yaxis().set_major_formatter(ticker.FuncFormatter(lambda x, p: format(int(x)/1000000, ',')))
# One graph per version of failure rates for this engine.
byVersion = failuresByEngineByVersionByError[engineName]
for version in sorted(byVersion.keys()):
fig, ax = plt.subplots()
ax.set_title(engineName + " - " + version)
byError = byVersion[version]
for error, details in byError.items():
style = styleGetter.get(error)
ax.plot_date(map(lambda d: d[0], details),
map(lambda d: d[1], details),
style["style"],
marker=style["marker"],
label=version + " - " + error)
ax.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, mode="expand", handlelength=10) # magic to get the legend outside the graph.
plt.show()
addons
addresses
bookmarks
bookmarks-buffered
clients
creditcards
extension-storage
forms
history
passwords
prefs
tabs