from __future__ import print_function

# Setup, connect to database, fetch data
%pylab inline
from pylab import *
from prettyplotlib import *
import numpy as np
import datetime

from prettyplotlib import brewer2mpl
blue_green = brewer2mpl.get_map('BuGn', 'Sequential', 9).mpl_colormap

figsize(8,6)

# connect to the player database
import sqlite3
db = sqlite3.connect('games.sql')
cursor = db.cursor()

# useful constants
ONE_DAY = 3600 * 24.0

# fetch all the data, convert to numpy arrays
ratings_white, ratings_black, start_times, end_times, board_sizes, move_times = \
    zip(*cursor.execute('SELECT rating_white, rating_black, start_time, end_time, size, move_time FROM games').fetchall())
    
# Convert to numpy arrays 
ratings_white = np.array(ratings_white)
ratings_black = np.array(ratings_black)
start_times = np.array(start_times, dtype='datetime64')
last_start_time = start_times.max()
end_times = np.array([t or last_start_time for t in end_times], dtype='datetime64')
board_sizes = np.array(board_sizes)
move_times = np.array(move_times, dtype=float)

# games without time control have move_time of 0 - set it to a move per day like most correspondence
move_times[move_times == 0] = ONE_DAY
# ...and clamp correspondence games at one move per day for simplicity below
move_times[move_times > ONE_DAY] = ONE_DAY

# Some basics

# how many days of data do we have?
number_of_days = start_times.ptp() / timedelta64(1, 'D')
print("{:.3} days of data".format(number_of_days))

# call anything less than 10 minutes / move "live", and more than 6 hours correspondence
live_games_mask = move_times < 60 * 5
corr_games_mask = move_times > 60 * 60 * 6
print("{} total games".format(len(board_sizes)))
print("{} live".format(sum(live_games_mask)))
print("{} correspondence".format(sum(corr_games_mask)))
print("")


counts = np.bincount(board_sizes[live_games_mask])
order = np.argsort(counts)[::-1]
print("Game count by board size")
for sz in order:
    if counts[sz] > 0:
        print("{}x{}\t\t{}\t\t{:.2%}".format(sz, sz, counts[sz], counts[sz] / float(len(board_sizes))))
print("\n")

# show game starts over the last N days
plot(np.bincount(((start_times - start_times.min()) / np.timedelta64(1, 'D')).astype(int)));
title('# of game started by day')
ylabel('# of games started')
xlabel('day')
xticks(np.arange(0, 31, 5), np.datetime64('2014-09-09') + np.arange(0, 31, 5) * np.timedelta64(1, 'D'), rotation=45);

hist(move_times[live_games_mask] / 60.0, 30)
title('Live games only - # of games by time per move')
xlabel('minutes per move')
ylabel('# of games');

def rating_to_rank(r):
    if r >= 2100:
        return "{}d".format(1 + (r - 2100) / 100)
    else:
        return "{}k".format((2100 - r) / 100)

rank_ticks = np.array([100,  500, 1000, 1500, 1800, 2000, 2200, 2400])
rank_names = [rating_to_rank(r) for r in rank_ticks]

# Ranks of players
average_ratings = (ratings_white + ratings_black) / 2.0
hist(average_ratings, 25)
title('Average rank of players for all games')
axis('tight')
xticks(rank_ticks, rank_names)
ylabel('# of games')

figure()
hist(average_ratings[live_games_mask], 25)
title('Average rank of players for live games only')
axis('tight')
xticks(rank_ticks, rank_names)
ylabel('# of games');

# find the time of day - dates are in UTC
start_hours = np.array([t.item().hour for t in start_times])[live_games_mask]

# most OGS players are from the US, so move to EDT by subtracting 4 (mod 24)
start_hours = (start_hours + 20) % 24

hist(start_hours, 24)
axis('tight')
title('game starts by hour EDT')
xlabel('hour in EDT')
ylabel('# of games')

figure();
# group ranks into 2-stone bands, and put everything above 4d into one group
ranks = np.round(average_ratings / 200)[live_games_mask]
ranks[ranks > 12] = 12
rank_count = ranks.max() - ranks.min() + 1
print(rank_count, ranks.max())

D = np.histogram2d(start_hours, ranks, (24, rank_count))[0].T
# apply some smoothing across ranks
Dsmooth = 0.5 * D
Dsmooth[1:, :] += 0.25 * D[:-1, :]
Dsmooth[:-1, :] += 0.25 * D[1:, :]
D = Dsmooth

imshow(D, cmap=blue_green, interpolation='nearest', origin='lower-left')
axis('tight')
title('Game starts by EDT hour (x) and rank (y)')
xlabel('hour EDT')
ylabel('rank')
yticks(rank_ticks / 200 - ranks.min(), rank_names)

figure()
# apply a normalization to smooth out some of the spikes in the less active rank/hour combinations
D = D + 25
D = D / D.sum(axis=1).reshape((-1, 1))
imshow(D, cmap=blue_green, interpolation='nearest', origin='lower-left')
axis('tight')
title('Game starts by EDT hour (x) and rank (y), Normalized within rank')
xlabel('hour EDT')
ylabel('rank')
yticks(rank_ticks / 200 - ranks.min(), rank_names);

# We don't have the number of moves made in a game, but we can use (moves / minute) and a cummulative sum of rates
rates = 60 * 1.0 / move_times

starts_ends = np.hstack((start_times, end_times))
se_rates = np.hstack((rates, -rates))
order = np.argsort(starts_ends)
starts_ends = starts_ends[order]
cummulative_rates = np.cumsum(se_rates[order])

# sample every 1 minute
sample_times = np.arange(starts_ends[0], starts_ends[-1], numpy.timedelta64(1,'m'))
sample_rates = np.interp(sample_times.astype(float), starts_ends.astype(float), cummulative_rates)

from scipy.signal import medfilt
def smooth(v, order):
    for i in range(order):
        tmp = 0.5 * v
        tmp[1:] += v[:-1] * 0.25
        tmp[:-1] += v[1:] * 0.25
        tmp[0] += 0.25 * v[0]
        tmp[-1] += 0.25 * v[-1]
        v = tmp
    return tmp

# add together all the days, then sample across minutes
start_day = np.datetime64('2014-09-09T00:00')
time_from_midnight = (sample_times - start_day) / np.timedelta64(1, 'D')
time_from_midnight -= time_from_midnight.astype(int)
minutes_from_midnight = (time_from_midnight * 24 * 60).astype(int)

counts = np.bincount(minutes_from_midnight, sample_rates)
counts = smooth(counts, 31)
plot(counts / number_of_days)  # normalize by number of days we are summing together

# label by hour
xticks(np.arange(0, 25, 2) * 60, np.arange(0, 25, 2));
xlabel('Hour EDT')
ylabel('Moves per minute')
title('Total moves on server per minute by time of day');

!gist -p --update https://gist.github.com/thouis/be68b0d138e70b43769c "OGS Games.ipynb"