This is a follow up to this post.
You might be asking: Why take data from one database and put it in another only to get it out again? idk, for fun?
from pymongo import MongoClient
mg_client = MongoClient()
mg_db = mg_client.ocn_player_count
data = mg_db.server_stats
# This is what one entry looks like in mongodb:
data.find_one()
{u'_id': ObjectId('53bf4b591d41c8195948465b'), u'stats': {u'eu-blitz': {u'chaos': 0, u'rage1': 0}, u'eu-ghost-squadron': {u'gs1': 0, u'gs2': 0, u'gs3': 0, u'gs4': 0}, u'eu-project-ares': {u'alpha': 7, u'capture': 28, u'control': 0, u'deathmatch': 0, u'destroy': 7, u'mini01': 14, u'mini02': 0, u'mini03': 25, u'mini04': 10, u'mini05': 9, u'mini06': 3, u'nostalgia': 5}, u'us-blitz': {u'chaos': 2, u'cronus': 3, u'rage1': 13, u'rage2': 4}, u'us-ghost-squadron': {u'gs1': 0, u'gs2': 5, u'gs3': 2, u'gs4': 0, u'gs5': 11, u'gs6': 0}, u'us-project-ares': {u'alpha': 29, u'beta': 17, u'capture1': 45, u'capture2': 64, u'control': 20, u'deathmatch': 23, u'destroy': 39, u'gear': 19, u'mini01': 23, u'mini02': 30, u'mini03': 14, u'mini04': 33, u'mini05': 25, u'mini06': 22, u'mini07': 17, u'mini08': 28, u'mini09': 34, u'mini10': 19, u'mini11': 11, u'nostalgia': 21, u'primed': 64}}, u'time': datetime.datetime(2014, 7, 10, 22, 26, 33, 677000)}
import psycopg2
# connect to postgres db
pg_conn = psycopg2.connect("dbname='ocnserverstats' user='ben'")
cur = pg_conn.cursor()
# make table for data
cur.execute("CREATE TABLE playercounts (id serial PRIMARY KEY, region varchar, section varchar, server varchar, timestamp timestamp, count int);")
# Now we need to loop over all of the entries and convert them into SQL rows
for d in data.find():
timestamp = d['time']
for key, counts in d['stats'].iteritems():
region = key[:2] # 'us' or 'eu'
section = key[3:] # 'project-ares', 'blitz', etc.
for server, count in counts.iteritems():
# insert into db
cur.execute("INSERT INTO playercounts (region, section, server, timestamp, count) VALUES (%s, %s, %s, %s, %s)",
(region, section, server, timestamp, count))
pg_conn.commit()
mg_client.close()
# and here's how the data looks in postgres
print "playercount table:"
cur.execute("""SELECT * from playercounts""")
for row in cur.fetchmany(5):
print row
print "...\n\nNumber of entries in table: %s" % cur.rowcount
pg_conn.close()
playercount table: (1, 'us', 'ghost-squadron', 'gs1', datetime.datetime(2014, 7, 10, 22, 26, 33, 677000), 0) (2, 'us', 'ghost-squadron', 'gs3', datetime.datetime(2014, 7, 10, 22, 26, 33, 677000), 2) (3, 'us', 'ghost-squadron', 'gs2', datetime.datetime(2014, 7, 10, 22, 26, 33, 677000), 5) (4, 'us', 'ghost-squadron', 'gs5', datetime.datetime(2014, 7, 10, 22, 26, 33, 677000), 11) (5, 'us', 'ghost-squadron', 'gs4', datetime.datetime(2014, 7, 10, 22, 26, 33, 677000), 0) ... Number of entries in table: 187180
# now onto the analysis!
import pandas as pd
import matplotlib as mpl
# for displaying inline graphs in notebook
%matplotlib inline
def plot_counts(servers=('alpha',), region='us', errorbars=True):
""" Get data from postgres and make some simple plots! """
plt = mpl.pyplot
mpl.rcParams['savefig.dpi'] = 110 # make the graph display nicer
# get data
pg_conn = psycopg2.connect("dbname='ocnserverstats' user='ben'")
cur = pg_conn.cursor()
cur.execute("""SELECT timestamp, count, server from playercounts WHERE region=(%s) AND server IN %s""", (region, servers,))
d = cur.fetchall()
pg_conn.close()
df = pd.DataFrame(d, columns=['timestamp', 'count', 'server'])
# replace datetime field with hour of the day
df['hour'] = df['timestamp'].apply(lambda x: x.hour + x.minute / 60.0)
df = df.drop('timestamp', 1)
server_group = df.groupby('server')
fig = plt.figure()
ax = fig.add_subplot(111)
ax.set_xlabel('hour of the day (EDT)')
ax.set_ylabel('average player count')
ax.xaxis.set_major_locator(mpl.ticker.MultipleLocator(2.0))
ax.set_xlim(0,23)
ax.yaxis.grid(b=True, which='major', color='black', alpha='0.1')
plot_filename = []
for sg in server_group:
name = region + '-' + sg[0]
plot_filename.append(name)
gp = sg[1].groupby('hour')
means = gp.mean() # plot line
y = means['count'].tolist()
x = means.index.tolist()
if errorbars:
errors = gp.std() # one standard deviation error bar
yerr = errors['count'].tolist()
ax.errorbar(x,y,yerr=yerr ,ls='None')
ax.plot(x,y, label=name)
ax.legend(loc='upper left')
plot_filename = '--'.join(plot_filename) + '.png'
fig.savefig(plot_filename, dpi=120)
# make some plots!
plot_counts(servers=('alpha','beta'))
plot_counts(servers=('alpha','mini01'))
plot_counts(servers=('capture1','capture2','destroy'))
plot_counts(servers=('capture','destroy'), region='eu')
# the error bars are helpful, but they crowd the graph when you have multiple series
plot_counts(servers=('capture1','capture2','destroy'),errorbars=False)
plot_counts(servers=('mini01','mini02','mini03','mini04','mini05'), region='eu',errorbars=False)