Fun with Overcast Network Player Count Data


This is a follow up to this post.

Strategy:

  • Get data from mongodb
  • Put data in postgres
  • Get data from postgres
  • Make plots

You might be asking: Why take data from one database and put it in another only to get it out again? idk, for fun?

In [1]:
from pymongo import MongoClient

mg_client = MongoClient()
mg_db = mg_client.ocn_player_count
data = mg_db.server_stats

# This is what one entry looks like in mongodb:
data.find_one()
Out[1]:
{u'_id': ObjectId('53bf4b591d41c8195948465b'),
 u'stats': {u'eu-blitz': {u'chaos': 0, u'rage1': 0},
  u'eu-ghost-squadron': {u'gs1': 0, u'gs2': 0, u'gs3': 0, u'gs4': 0},
  u'eu-project-ares': {u'alpha': 7,
   u'capture': 28,
   u'control': 0,
   u'deathmatch': 0,
   u'destroy': 7,
   u'mini01': 14,
   u'mini02': 0,
   u'mini03': 25,
   u'mini04': 10,
   u'mini05': 9,
   u'mini06': 3,
   u'nostalgia': 5},
  u'us-blitz': {u'chaos': 2, u'cronus': 3, u'rage1': 13, u'rage2': 4},
  u'us-ghost-squadron': {u'gs1': 0,
   u'gs2': 5,
   u'gs3': 2,
   u'gs4': 0,
   u'gs5': 11,
   u'gs6': 0},
  u'us-project-ares': {u'alpha': 29,
   u'beta': 17,
   u'capture1': 45,
   u'capture2': 64,
   u'control': 20,
   u'deathmatch': 23,
   u'destroy': 39,
   u'gear': 19,
   u'mini01': 23,
   u'mini02': 30,
   u'mini03': 14,
   u'mini04': 33,
   u'mini05': 25,
   u'mini06': 22,
   u'mini07': 17,
   u'mini08': 28,
   u'mini09': 34,
   u'mini10': 19,
   u'mini11': 11,
   u'nostalgia': 21,
   u'primed': 64}},
 u'time': datetime.datetime(2014, 7, 10, 22, 26, 33, 677000)}
In [2]:
import psycopg2

# connect to postgres db
pg_conn = psycopg2.connect("dbname='ocnserverstats' user='ben'")
cur = pg_conn.cursor()

# make table for data
cur.execute("CREATE TABLE playercounts (id serial PRIMARY KEY, region varchar, section varchar, server varchar, timestamp timestamp, count int);")

# Now we need to loop over all of the entries and convert them into SQL rows
for d in data.find():
    timestamp = d['time']
    
    for key, counts in d['stats'].iteritems():
        region = key[:2] # 'us' or 'eu'
        section = key[3:] # 'project-ares', 'blitz', etc.
        
        for server, count in counts.iteritems():
            # insert into db
            cur.execute("INSERT INTO playercounts (region, section, server, timestamp, count) VALUES (%s, %s, %s, %s, %s)",
                        (region, section, server, timestamp, count))
            
pg_conn.commit()
mg_client.close()

# and here's how the data looks in postgres
print "playercount table:"
cur.execute("""SELECT * from playercounts""")
for row in cur.fetchmany(5):
    print row
    
print "...\n\nNumber of entries in table: %s" % cur.rowcount

pg_conn.close()
playercount table:
(1, 'us', 'ghost-squadron', 'gs1', datetime.datetime(2014, 7, 10, 22, 26, 33, 677000), 0)
(2, 'us', 'ghost-squadron', 'gs3', datetime.datetime(2014, 7, 10, 22, 26, 33, 677000), 2)
(3, 'us', 'ghost-squadron', 'gs2', datetime.datetime(2014, 7, 10, 22, 26, 33, 677000), 5)
(4, 'us', 'ghost-squadron', 'gs5', datetime.datetime(2014, 7, 10, 22, 26, 33, 677000), 11)
(5, 'us', 'ghost-squadron', 'gs4', datetime.datetime(2014, 7, 10, 22, 26, 33, 677000), 0)
...

Number of entries in table: 187180
In [3]:
# now onto the analysis!
import pandas as pd
import matplotlib as mpl

# for displaying inline graphs in notebook
%matplotlib inline

def plot_counts(servers=('alpha',), region='us', errorbars=True):
    """ Get data from postgres and make some simple plots! """
    
    plt = mpl.pyplot
    mpl.rcParams['savefig.dpi'] = 110 # make the graph display nicer
    
    # get data
    pg_conn = psycopg2.connect("dbname='ocnserverstats' user='ben'")
    cur = pg_conn.cursor()
    cur.execute("""SELECT timestamp, count, server from playercounts WHERE region=(%s) AND server IN %s""", (region, servers,))

    d = cur.fetchall()
    pg_conn.close()

    df = pd.DataFrame(d, columns=['timestamp', 'count', 'server'])

    # replace datetime field with hour of the day
    df['hour'] = df['timestamp'].apply(lambda x: x.hour + x.minute / 60.0)
    df = df.drop('timestamp', 1)

    server_group = df.groupby('server')

    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.set_xlabel('hour of the day (EDT)')
    ax.set_ylabel('average player count')
    ax.xaxis.set_major_locator(mpl.ticker.MultipleLocator(2.0))
    ax.set_xlim(0,23)
    ax.yaxis.grid(b=True, which='major', color='black', alpha='0.1')

    plot_filename = []
    for sg in server_group:
        name = region + '-' + sg[0]
        plot_filename.append(name)
        gp = sg[1].groupby('hour')

        means = gp.mean() # plot line

        y = means['count'].tolist()
        x = means.index.tolist()

        if errorbars:
            errors = gp.std() # one standard deviation error bar
            yerr = errors['count'].tolist()
            ax.errorbar(x,y,yerr=yerr ,ls='None')
    
        ax.plot(x,y, label=name)
    ax.legend(loc='upper left')

    plot_filename = '--'.join(plot_filename) + '.png'
    fig.savefig(plot_filename, dpi=120)
    
 
# make some plots!
plot_counts(servers=('alpha','beta'))
plot_counts(servers=('alpha','mini01'))
plot_counts(servers=('capture1','capture2','destroy'))
plot_counts(servers=('capture','destroy'), region='eu')
In [4]:
# the error bars are helpful, but they crowd the graph when you have multiple series
plot_counts(servers=('capture1','capture2','destroy'),errorbars=False)
plot_counts(servers=('mini01','mini02','mini03','mini04','mini05'), region='eu',errorbars=False)