#!/usr/bin/env python
# coding: utf-8

# In[1]:


get_ipython().run_line_magic('pylab', 'inline')
import seaborn as sns
import pandas as pd
import statsmodels.api as sm


figsize(10,6)
plt.rcParams['figure.dpi'] = 300

plt.style.use('ggplot')
#plt.rcParams['font.family'] = 'Myriad Pro'
plt.rcParams['text.color'] = '#555555'


# In[2]:


# Load data

df = pd.read_csv('games_Jan2018.csv')
df.set_index('id', inplace=True)

df.type.value_counts()

# Let's filter out expansions, and focus our analysis on base games
df = df[df.type == 'boardgame']

# A little bit of data cleaning to set zeros to NAs where they should be NAs
df[['yearpublished', 'minplayers', 'maxplayers', 'playingtime', 'minplaytime', 'maxplaytime', 'minage']] = \
  df[['yearpublished', 'minplayers', 'maxplayers', 'playingtime', 'minplaytime', 'maxplaytime', 'minage']].replace(0,pd.np.nan)

enough_ratings = df.users_rated >= 150
enough_weights = df.total_weights >= 150
enough_ratings.sum(), enough_weights.sum()


# In[ ]:


# In[4]:


plt.style.use('ggplot')
#plt.rcParams['font.family'] = 'Myriad Pro'
plt.rcParams['text.color'] = '#555555'


# # Load data

# In[5]:


df = pd.read_csv('games_Jan2018.csv')
df.set_index('id', inplace=True)


# In[6]:


df.type.value_counts()


# In[7]:


# Let's filter out expansions, and focus our analysis on base games
df = df[df.type == 'boardgame']


# In[8]:


# A little bit of data cleaning to set zeros to NAs where they should be NAs
df[['yearpublished', 'minplayers', 'maxplayers', 'playingtime', 'minplaytime', 'maxplaytime', 'minage']] = \
  df[['yearpublished', 'minplayers', 'maxplayers', 'playingtime', 'minplaytime', 'maxplaytime', 'minage']].replace(0,pd.np.nan)


# In[9]:


enough_ratings = df.users_rated >= 150
enough_weights = df.total_weights >= 150
enough_ratings.sum(), enough_weights.sum()


# # Complexity / weight

# BGG allows users to rate the "weight" or complexity of a game. 
# This is a single measure for each game from a scale from 1-5 and it doesn't decouple the different types of complexity 
# (e.g. Chess that's relatively easy in terms of rules, but can be quite complex in terms of how to use those rules to play as opposed to Terra Mystica, where it take a while to learn all the rules, but once you've got the hang of the rules, the gameplay
# is relatively straight forward)
# 
# N.B. There's probably elegant terminology for these different types of complexity. Colm may know the answer.
# 
# 
# Let's find out how reliable this information is by running some rodimentary tests against some simple intuition-based proxies for complexity

# In[10]:


# Testing weight rating against minimum age

sns.regplot(x='minage', y='average_weight', data=df[df.total_weights>100], x_jitter=.3, scatter_kws={'s':6})


# Min age is a loose proxy for complexity, but it can also be filtered due to content type such as very 
# explicit material e.g. Cards against Humanity)

# In[12]:


cax = sns.regplot(x='average_weight',
            y='average_rating',
            data=df[enough_weights],
            scatter_kws={'alpha':0.8,
                         's':10,
                         'color':None,
                         'cmap':'seismic_r',},
            line_kws={'lw':1,
                      'ls':':',
                      'color':'k'})


plt.gca().annotate('Monopoly', xy=(1.685, 4.42057), xytext=(1.5,3), arrowprops=dict(facecolor='black', arrowstyle="->",edgecolor='k'))
plt.gca().annotate("Sid Meier's Civilization: The Boardgame", xy=(3.6454, 5.59183), xytext=(3,4.5), arrowprops=dict(facecolor='black', arrowstyle="->",edgecolor='k'))
plt.gca().annotate("Risk", xy=(2.1072, 5.57929), xytext=(2,4), arrowprops=dict(facecolor='black', arrowstyle="->",edgecolor='k'))
plt.gca().annotate("Gloomhaven", xy=(3.78, 9.00657), xytext=(3,9.1), arrowprops=dict(facecolor='black', arrowstyle="->",edgecolor='k'))
plt.gca().annotate("Kingdom Death: Monster", xy=(4.186, 8.97231), xytext=(3.2,8.7), arrowprops=dict(facecolor='black', arrowstyle="->",edgecolor='k'))
plt.gca().annotate("Pandemic Legacy", xy=(2.8026, 8.66878), xytext=(2.2,9.0), arrowprops=dict(facecolor='black', arrowstyle="->",edgecolor='k'))
plt.gca().annotate("Codenames", xy=(1.3535, 7.90691), xytext=(1.0,8.5), arrowprops=dict(facecolor='black', arrowstyle="->",edgecolor='k'))
plt.gca().annotate("7 Wonders: Duel", xy=(2.2463, 8.19443), xytext=(1.5,9.1), arrowprops=dict(facecolor='black', arrowstyle="->",edgecolor='k'))
plt.gca().annotate("Patchwork", xy=(1.7131, 7.83136), xytext=(1.2,8.8), arrowprops=dict(facecolor='black', arrowstyle="->",edgecolor='k'))
plt.gca().annotate("Blackbeard", xy=(3.3218, 6.09174), xytext=(3,5), arrowprops=dict(facecolor='black', arrowstyle="->",edgecolor='k'))
plt.gca().annotate("Rise and Decline of the Third Reich", xy=(4.2945, 6.79503), xytext=(3.5,6), arrowprops=dict(facecolor='black', arrowstyle="->",edgecolor='k'))

plt.ylabel('Average rating')
plt.xlabel('Average weight score (complexity)')
plt.title('Rating vs complexity')


#ax1 = plt.gcf().add_axes([0.91, 0.125, 0.01, 0.75])
#norm = mpl.colors.Normalize(vmin=3, vmax=5)
#cb1 = mpl.colorbar.ColorbarBase(ax1, cmap='seismic_r',
#                                norm=norm,
#                                orientation='vertical')
#cb1.set_ticks([pd.np.arange(3,6,1)])
#cb1.set_ticklabels(['1,000','10,000','100,000'])
#cb1.set_label('Number of owners')

plt.gcf().set_size_inches(10,6)


# In[13]:


pd.np.polyfit(df[enough_weights & (df.average_weight>1.5)].average_weight, df[enough_weights & (df.average_weight>1.5)].average_rating,1)


# In[14]:


df['rating_residual'] = df.average_rating - (df.average_weight * 0.48932139 + 5.80326323)
df['corrected_rating'] = df['rating_residual'] + df[enough_ratings].average_rating.mean()
df['bayes_corrected_rating'] = (df.users_rated *df['corrected_rating'] + 5.5*1000) / (df.users_rated + 1000)
df['BGG_rank'] = df.bayes_average_rating.replace(0.000, pd.np.nan).rank(method='min', ascending=False)
df['corrected_BGG_rank'] = df[df.users_rated >=30].bayes_corrected_rating.replace(0.00,pd.np.nan).rank(method='min', ascending=False)
df['rating_change'] = df['bayes_corrected_rating'] - df['bayes_average_rating']
df['rank_change'] = df['BGG_rank'] - df['corrected_BGG_rank']


# In[63]:


df.to_csv('complexity_corrected_data.csv')


# In[15]:


# Let's get the top games after correcting for the complexity bias
df.sort_values('bayes_corrected_rating', ascending=False).head(100)#.to_clipboard()


# In[16]:


# Biggest winners
df[(df.BGG_rank <= 100) | (df.corrected_BGG_rank <= 100)].sort_values('rank_change', ascending=False)


# In[17]:


# Biggest losers
df[(df.BGG_rank <= 100) | (df.corrected_BGG_rank <= 100)].sort_values('rank_change', ascending=True).head(50)#.to_clipboard()


# In[ ]:


# In[ ]:


# ### Rendering animated graph

# In[18]:


get_ipython().run_line_magic('matplotlib', 'inline')


# In[19]:


plt.rcParams['figure.dpi'] = 300


# In[20]:


import matplotlib.animation as manimation

FFMpegWriter = manimation.writers['ffmpeg']
metadata = dict(title='Movie Test', artist='Matplotlib',
                comment='Movie support!')
fps=25
writer = FFMpegWriter(fps=fps, metadata=metadata)

fig = plt.figure(figsize=(10,6), dpi=100)

def make_frame(t):
    plt.cla()
    if t < 1:
        w=0.0
    elif t < 3:
        w=(t-1)/2.0
    else:
        w=1.0
    sns.regplot(x=df[enough_weights].average_weight ,
                y= (1-w) * df[enough_weights].average_rating + w * df[enough_weights].corrected_rating,
                ci=None,
                scatter_kws={'alpha':0.8,
                             's':10})
    
    plt.ylabel('Average rating')
    plt.xlabel('Average weight score (complexity)')
    
    return (None)

with writer.saving(fig, "manual_animtion.mp4", 150):
    for t in pd.np.arange(0,4+1./fps,1./fps):
        make_frame(t)
        plt.ylim(2, 9.5)

        writer.grab_frame()


# In[21]:


# Convert the mp4 to gif using ffmpeg
# the code below does that, including a gif palette optimization

# #!/bin/sh
# 
# palette="/tmp/palette.png"
# 
# filters="fps=25,scale=750:-1:flags=lanczos"
# 
# ffmpeg -v warning -i manual_animtion.mp4 -vf "$filters,palettegen" -y $palette
# ffmpeg -v warning -i manual_animtion.mp4 -i $palette -lavfi "$filters [x]; [x][1:v] paletteuse" -y manual_animtion.gif


# In[ ]:


# ### Rendering interactive complexity vs rating plot

# In[22]:


# Iinteractive plot using Altair

import altair as alt
#from vega_datasets import data
alt.renderers.enable('notebook')

#iris = data.iris()
relevant_columns=['average_weight', 'average_rating', 'corrected_rating', 'name']

before_correction = alt.Chart(df[enough_weights][relevant_columns]).mark_point(filled=True).encode(
    alt.X('average_weight', axis=alt.Axis(title='Complexity score'), scale=alt.Scale(domain=(.8, 5))),
    alt.Y('average_rating', axis=alt.Axis(title='Game rating')),
    color=alt.value("#E24A33"),
    tooltip='name'
).properties(width=500, title='Before complexity-bias correction')

after_correction = alt.Chart(df[enough_weights][relevant_columns]).mark_point(filled=True).encode(
    alt.X('average_weight', axis=alt.Axis(title='Complexity score'), scale=alt.Scale(domain=(.8, 5))),
    alt.Y('corrected_rating', axis=alt.Axis(title='Complexity-bias-corrected game rating')),
    color=alt.value("#E24A33"),
    tooltip='name'
).properties(width=500, title='After complexity-bias correction')

alt.HConcatChart([before_correction, after_correction])#.save('interactive_chart.html')


# In[29]:


# Altair's API is pretty slick, but it doesn't seem flexible enough to accommodate what I'm trying to do
# There's no intuitive way to get multiline hover tooltips


# In[30]:


# Let's generate it using Bokeh instead...


# In[27]:


from bokeh.plotting import figure, output_file, output_notebook, show, ColumnDataSource
from bokeh.models import HoverTool

#output_file("toolbar.html")
output_notebook()

source = ColumnDataSource(data=df[enough_weights][['name', 'average_weight', 'average_rating', 'corrected_rating', 'BGG_rank', 'corrected_BGG_rank']])

hover = HoverTool(tooltips=[
    ("Name", "@name"),
    ("Average weight (complexity)", "@average_weight"),
    ("Original rating", "@average_rating"),
    ("Corrected rating", "@corrected_rating"),
    ("Original rank", "@BGG_rank"),
    ("Corrected rank", "@corrected_BGG_rank"),
])

p = figure(plot_width=800, plot_height=450, tools=[hover],
           title="Game ratings after complexity-bias correction")

p.circle('average_weight',
         'corrected_rating',
         size=4,
         source=source,
         color='#E24A33')

p.title.align = 'center'
p.title.text_font = 'Open Sans'
p.xaxis.axis_label = 'Average weight score (Complexity)'
p.xaxis.axis_label_text_font_style = "normal"
p.xaxis.axis_label_text_font = 'Open Sans'
p.yaxis.axis_label = 'Average rating'
p.yaxis.axis_label_text_font_style = "normal"
p.yaxis.axis_label_text_font = 'Open Sans'
show(p)


# In[ ]: