from __future__ import print_function
# Setup, connect to database, fetch data
%pylab inline
from pylab import *
from prettyplotlib import *
import numpy as np
import datetime
from prettyplotlib import brewer2mpl
blue_green = brewer2mpl.get_map('BuGn', 'Sequential', 9).mpl_colormap
figsize(8,6)
# connect to the player database
import sqlite3
db = sqlite3.connect('games.sql')
cursor = db.cursor()
# useful constants
ONE_DAY = 3600 * 24.0
Populating the interactive namespace from numpy and matplotlib
# fetch all the data, convert to numpy arrays
ratings_white, ratings_black, start_times, end_times, board_sizes, move_times = \
zip(*cursor.execute('SELECT rating_white, rating_black, start_time, end_time, size, move_time FROM games').fetchall())
# Convert to numpy arrays
ratings_white = np.array(ratings_white)
ratings_black = np.array(ratings_black)
start_times = np.array(start_times, dtype='datetime64')
last_start_time = start_times.max()
end_times = np.array([t or last_start_time for t in end_times], dtype='datetime64')
board_sizes = np.array(board_sizes)
move_times = np.array(move_times, dtype=float)
# games without time control have move_time of 0 - set it to a move per day like most correspondence
move_times[move_times == 0] = ONE_DAY
# ...and clamp correspondence games at one move per day for simplicity below
move_times[move_times > ONE_DAY] = ONE_DAY
# Some basics
# how many days of data do we have?
number_of_days = start_times.ptp() / timedelta64(1, 'D')
print("{:.3} days of data".format(number_of_days))
# call anything less than 10 minutes / move "live", and more than 6 hours correspondence
live_games_mask = move_times < 60 * 5
corr_games_mask = move_times > 60 * 60 * 6
print("{} total games".format(len(board_sizes)))
print("{} live".format(sum(live_games_mask)))
print("{} correspondence".format(sum(corr_games_mask)))
print("")
counts = np.bincount(board_sizes[live_games_mask])
order = np.argsort(counts)[::-1]
print("Game count by board size")
for sz in order:
if counts[sz] > 0:
print("{}x{}\t\t{}\t\t{:.2%}".format(sz, sz, counts[sz], counts[sz] / float(len(board_sizes))))
print("\n")
# show game starts over the last N days
plot(np.bincount(((start_times - start_times.min()) / np.timedelta64(1, 'D')).astype(int)));
title('# of game started by day')
ylabel('# of games started')
xlabel('day')
xticks(np.arange(0, 31, 5), np.datetime64('2014-09-09') + np.arange(0, 31, 5) * np.timedelta64(1, 'D'), rotation=45);
31.7 days of data 91340 total games 79872 live 10208 correspondence Game count by board size 9x9 37355 40.90% 19x19 31229 34.19% 13x13 10521 11.52% 5x5 570 0.62% 25x25 76 0.08% 3x3 66 0.07% 21x21 16 0.02% 11x11 11 0.01% 7x7 10 0.01% 8x8 9 0.01% 10x10 3 0.00% 12x12 3 0.00% 15x15 1 0.00% 4x4 1 0.00% 23x23 1 0.00%
hist(move_times[live_games_mask] / 60.0, 30)
title('Live games only - # of games by time per move')
xlabel('minutes per move')
ylabel('# of games');
def rating_to_rank(r):
if r >= 2100:
return "{}d".format(1 + (r - 2100) / 100)
else:
return "{}k".format((2100 - r) / 100)
rank_ticks = np.array([100, 500, 1000, 1500, 1800, 2000, 2200, 2400])
rank_names = [rating_to_rank(r) for r in rank_ticks]
# Ranks of players
average_ratings = (ratings_white + ratings_black) / 2.0
hist(average_ratings, 25)
title('Average rank of players for all games')
axis('tight')
xticks(rank_ticks, rank_names)
ylabel('# of games')
figure()
hist(average_ratings[live_games_mask], 25)
title('Average rank of players for live games only')
axis('tight')
xticks(rank_ticks, rank_names)
ylabel('# of games');
# find the time of day - dates are in UTC
start_hours = np.array([t.item().hour for t in start_times])[live_games_mask]
# most OGS players are from the US, so move to EDT by subtracting 4 (mod 24)
start_hours = (start_hours + 20) % 24
hist(start_hours, 24)
axis('tight')
title('game starts by hour EDT')
xlabel('hour in EDT')
ylabel('# of games')
figure();
# group ranks into 2-stone bands, and put everything above 4d into one group
ranks = np.round(average_ratings / 200)[live_games_mask]
ranks[ranks > 12] = 12
rank_count = ranks.max() - ranks.min() + 1
print(rank_count, ranks.max())
D = np.histogram2d(start_hours, ranks, (24, rank_count))[0].T
# apply some smoothing across ranks
Dsmooth = 0.5 * D
Dsmooth[1:, :] += 0.25 * D[:-1, :]
Dsmooth[:-1, :] += 0.25 * D[1:, :]
D = Dsmooth
imshow(D, cmap=blue_green, interpolation='nearest', origin='lower-left')
axis('tight')
title('Game starts by EDT hour (x) and rank (y)')
xlabel('hour EDT')
ylabel('rank')
yticks(rank_ticks / 200 - ranks.min(), rank_names)
figure()
# apply a normalization to smooth out some of the spikes in the less active rank/hour combinations
D = D + 25
D = D / D.sum(axis=1).reshape((-1, 1))
imshow(D, cmap=blue_green, interpolation='nearest', origin='lower-left')
axis('tight')
title('Game starts by EDT hour (x) and rank (y), Normalized within rank')
xlabel('hour EDT')
ylabel('rank')
yticks(rank_ticks / 200 - ranks.min(), rank_names);
16.0 12.0
# We don't have the number of moves made in a game, but we can use (moves / minute) and a cummulative sum of rates
rates = 60 * 1.0 / move_times
starts_ends = np.hstack((start_times, end_times))
se_rates = np.hstack((rates, -rates))
order = np.argsort(starts_ends)
starts_ends = starts_ends[order]
cummulative_rates = np.cumsum(se_rates[order])
# sample every 1 minute
sample_times = np.arange(starts_ends[0], starts_ends[-1], numpy.timedelta64(1,'m'))
sample_rates = np.interp(sample_times.astype(float), starts_ends.astype(float), cummulative_rates)
from scipy.signal import medfilt
def smooth(v, order):
for i in range(order):
tmp = 0.5 * v
tmp[1:] += v[:-1] * 0.25
tmp[:-1] += v[1:] * 0.25
tmp[0] += 0.25 * v[0]
tmp[-1] += 0.25 * v[-1]
v = tmp
return tmp
# add together all the days, then sample across minutes
start_day = np.datetime64('2014-09-09T00:00')
time_from_midnight = (sample_times - start_day) / np.timedelta64(1, 'D')
time_from_midnight -= time_from_midnight.astype(int)
minutes_from_midnight = (time_from_midnight * 24 * 60).astype(int)
counts = np.bincount(minutes_from_midnight, sample_rates)
counts = smooth(counts, 31)
plot(counts / number_of_days) # normalize by number of days we are summing together
# label by hour
xticks(np.arange(0, 25, 2) * 60, np.arange(0, 25, 2));
xlabel('Hour EDT')
ylabel('Moves per minute')
title('Total moves on server per minute by time of day');
!gist -p --update https://gist.github.com/thouis/be68b0d138e70b43769c "OGS Games.ipynb"
https://gist.github.com/be68b0d138e70b43769c