import ujson as json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history, get_records
%pylab inline
/home/hadoop/anaconda2/lib/python2.7/site-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment. warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')
Unable to parse whitelist (/home/hadoop/anaconda2/lib/python2.7/site-packages/moztelemetry/histogram-whitelists.json). Assuming all histograms are acceptable. Populating the interactive namespace from numpy and matplotlib
from datetime import datetime, timedelta
yesterday_utc = (datetime.utcnow() - timedelta(days=1)).strftime("%Y%m%d")
pings = get_pings(sc, app="Firefox", channel="nightly", build_id=(yesterday_utc + "000000", yesterday_utc + "999999"), fraction=1.0)
subset = get_pings_properties(pings, ["clientId",
"environment/system/os/windowsBuildNumber",
"environment/system/os/windowsUBR"])
subset = get_one_ping_per_client(subset)
cached = subset.filter(lambda p: p["environment/system/os/windowsBuildNumber"] is not None).cache()
cached.count()
4629
values = cached.map(lambda p: (p['environment/system/os/windowsBuildNumber'], p["environment/system/os/windowsUBR"])).countByValue()
versions, counts = zip(*sorted(values.items()))
versions = map(lambda p: "build {}, {}".format(*p), versions)
fig = plt.figure(figsize=(20,10))
ax = fig.add_subplot(111)
ax.bar(range(len(versions)), counts)
ax.set_xticks(range(len(versions)))
ax.set_xticklabels(versions, rotation=45)
ax.set_xlabel("Windows build")
ax.set_ylabel("Number of profiles that are submitting Windows build data")
plt.show()
versions, counts = zip(*sorted(values.items(), key=lambda p: -p[1]))
versions = map(lambda p: "build {}, {}".format(*p), versions)
plt.figure(figsize=(20, 20))
pie = plt.pie(counts, autopct="%1.1f%%")
plt.legend(pie[0], versions, loc="best")
plt.show()
values
defaultdict(int, {(10240, 16384): 29, (10240, 16397): 14, (10240, 16405): 3, (10240, 16430): 1, (10240, 16433): 1, (10240, 16445): 1, (10240, 16463): 1, (10240, 16487): 3, (10240, 16500): 1, (10240, 16520): 3, (10240, 16549): 2, (10240, 16644): 2, (10240, 16683): 3, (10240, 16725): 4, (10240, 16771): 1, (10240, 16854): 2, (10240, 16942): 2, (10240, 17024): 4, (10240, 17071): 6, (10240, 17113): 3, (10240, 17146): 11, (10240, 17190): 81, (10586, 0): 8, (10586, 3): 5, (10586, 14): 1, (10586, 36): 1, (10586, 63): 4, (10586, 71): 1, (10586, 104): 9, (10586, 122): 1, (10586, 164): 13, (10586, 218): 7, (10586, 318): 21, (10586, 338): 1, (10586, 420): 13, (10586, 456): 1, (10586, 494): 17, (10586, 545): 16, (10586, 589): 21, (10586, 633): 53, (10586, 679): 339, (14393, 0): 54, (14393, 5): 1, (14393, 10): 4, (14393, 51): 6, (14393, 82): 3, (14393, 105): 3, (14393, 187): 16, (14393, 222): 11, (14393, 321): 48, (14393, 351): 60, (14393, 447): 3379, (14393, 571): 1, (14931, 1000): 2, (14931, 1002): 18, (14936, 1000): 3, (14942, 1000): 3, (14955, 1000): 4, (14959, 1000): 7, (14965, 1001): 53, (14971, 1000): 241, (14977, 1001): 2})
values = cached.map(lambda p: p['environment/system/os/windowsBuildNumber']).countByValue()
versions, counts = zip(*sorted(values.items()))
versions = map(lambda p: "build {}".format(p), versions)
fig = plt.figure(figsize=(20,10))
ax = fig.add_subplot(111)
ax.bar(range(len(versions)), counts)
ax.set_xticks(range(len(versions)))
ax.set_xticklabels(versions, rotation=45)
ax.set_xlabel("Windows build (major only)")
ax.set_ylabel("Number of profiles that are submitting Windows build data")
plt.show()
versions, counts = zip(*sorted(values.items(), key=lambda p: -p[1]))
versions = map(lambda p: "build {}".format(p), versions)
plt.figure(figsize=(20, 20))
pie = plt.pie(counts, autopct="%1.1f%%")
plt.legend(pie[0], versions, loc="best")
plt.show()
values
defaultdict(int, {10240: 178, 10586: 532, 14393: 3586, 14931: 20, 14936: 3, 14942: 3, 14955: 4, 14959: 7, 14965: 53, 14971: 241, 14977: 2})