In [1]:

import ujson as json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history, get_records

%pylab inline

/home/hadoop/anaconda2/lib/python2.7/site-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.
  warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')

Unable to parse whitelist (/home/hadoop/anaconda2/lib/python2.7/site-packages/moztelemetry/histogram-whitelists.json). Assuming all histograms are acceptable.
Populating the interactive namespace from numpy and matplotlib

In [2]:

from datetime import datetime, timedelta
yesterday_utc = (datetime.utcnow() - timedelta(days=1)).strftime("%Y%m%d")
pings = get_pings(sc, app="Firefox", channel="nightly", build_id=(yesterday_utc + "000000", yesterday_utc + "999999"), fraction=1.0)

In [3]:

subset = get_pings_properties(pings, ["clientId",
                                      "environment/system/os/windowsBuildNumber",
                                      "environment/system/os/windowsUBR"])

In [4]:

subset = get_one_ping_per_client(subset)

In [5]:

cached = subset.filter(lambda p: p["environment/system/os/windowsBuildNumber"] is not None).cache()

In [6]:

cached.count()

Out[6]:

In [7]:

values = cached.map(lambda p: (p['environment/system/os/windowsBuildNumber'], p["environment/system/os/windowsUBR"])).countByValue()

In [8]:

versions, counts = zip(*sorted(values.items()))
versions = map(lambda p: "build {}, {}".format(*p), versions)
fig = plt.figure(figsize=(20,10))
ax = fig.add_subplot(111)
ax.bar(range(len(versions)), counts)
ax.set_xticks(range(len(versions)))
ax.set_xticklabels(versions, rotation=45)
ax.set_xlabel("Windows build")
ax.set_ylabel("Number of profiles that are submitting Windows build data")
plt.show()

In [9]:

versions, counts = zip(*sorted(values.items(), key=lambda p: -p[1]))
versions = map(lambda p: "build {}, {}".format(*p), versions)
plt.figure(figsize=(20, 20))
pie = plt.pie(counts, autopct="%1.1f%%")
plt.legend(pie[0], versions, loc="best")
plt.show()

In [10]:

values

Out[10]:

defaultdict(int,
            {(10240, 16384): 29,
             (10240, 16397): 14,
             (10240, 16405): 3,
             (10240, 16430): 1,
             (10240, 16433): 1,
             (10240, 16445): 1,
             (10240, 16463): 1,
             (10240, 16487): 3,
             (10240, 16500): 1,
             (10240, 16520): 3,
             (10240, 16549): 2,
             (10240, 16644): 2,
             (10240, 16683): 3,
             (10240, 16725): 4,
             (10240, 16771): 1,
             (10240, 16854): 2,
             (10240, 16942): 2,
             (10240, 17024): 4,
             (10240, 17071): 6,
             (10240, 17113): 3,
             (10240, 17146): 11,
             (10240, 17190): 81,
             (10586, 0): 8,
             (10586, 3): 5,
             (10586, 14): 1,
             (10586, 36): 1,
             (10586, 63): 4,
             (10586, 71): 1,
             (10586, 104): 9,
             (10586, 122): 1,
             (10586, 164): 13,
             (10586, 218): 7,
             (10586, 318): 21,
             (10586, 338): 1,
             (10586, 420): 13,
             (10586, 456): 1,
             (10586, 494): 17,
             (10586, 545): 16,
             (10586, 589): 21,
             (10586, 633): 53,
             (10586, 679): 339,
             (14393, 0): 54,
             (14393, 5): 1,
             (14393, 10): 4,
             (14393, 51): 6,
             (14393, 82): 3,
             (14393, 105): 3,
             (14393, 187): 16,
             (14393, 222): 11,
             (14393, 321): 48,
             (14393, 351): 60,
             (14393, 447): 3379,
             (14393, 571): 1,
             (14931, 1000): 2,
             (14931, 1002): 18,
             (14936, 1000): 3,
             (14942, 1000): 3,
             (14955, 1000): 4,
             (14959, 1000): 7,
             (14965, 1001): 53,
             (14971, 1000): 241,
             (14977, 1001): 2})

In [11]:

values = cached.map(lambda p: p['environment/system/os/windowsBuildNumber']).countByValue()

In [12]:

versions, counts = zip(*sorted(values.items()))
versions = map(lambda p: "build {}".format(p), versions)
fig = plt.figure(figsize=(20,10))
ax = fig.add_subplot(111)
ax.bar(range(len(versions)), counts)
ax.set_xticks(range(len(versions)))
ax.set_xticklabels(versions, rotation=45)
ax.set_xlabel("Windows build (major only)")
ax.set_ylabel("Number of profiles that are submitting Windows build data")
plt.show()

In [13]:

versions, counts = zip(*sorted(values.items(), key=lambda p: -p[1]))
versions = map(lambda p: "build {}".format(p), versions)
plt.figure(figsize=(20, 20))
pie = plt.pie(counts, autopct="%1.1f%%")
plt.legend(pie[0], versions, loc="best")
plt.show()

In [14]:

values

Out[14]:

defaultdict(int,
            {10240: 178,
             10586: 532,
             14393: 3586,
             14931: 20,
             14936: 3,
             14942: 3,
             14955: 4,
             14959: 7,
             14965: 53,
             14971: 241,
             14977: 2})

In [15]: