In [1]:

%pylab inline

Populating the interactive namespace from numpy and matplotlib

In [2]:

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

In [7]:

n = 50000
k =  60

In [8]:

pSM = pd.Series(stats.beta(6+1, 14+1).rvs(n))
sm = pSM.apply(lambda p: stats.binom(k, p).rvs())
sm.name = 'San Miguel'

In [9]:

sm.hist(bins=20)

Out[9]:

<matplotlib.axes._subplots.AxesSubplot at 0x1148b3a90>

In [11]:

pArg = pd.Series(stats.beta(10+1, 10+1).rvs(n))
arg = pArg.apply(lambda p: stats.binom(k, p).rvs())
arg.name = 'Argentinos'

In [12]:

arg.hist(bins=20)

Out[12]:

<matplotlib.axes._subplots.AxesSubplot at 0x114e7a150>

In [72]:

sm.hist(bins=20, alpha=0.5, label='Argentinos')
arg.hist(bins=20, alpha=0.5, label='San Miguel')
plt.legend()
plt.title('Beta-Binomial')
plt.savefig('resultsBetBinomial.png')

In [73]:

oldSM = pd.Series(stats.binom(60, 6.0/20).rvs(n))
oldArg = pd.Series(stats.binom(60, 10.0/20).rvs(n))

In [74]:

oldSM.hist(bins=20, alpha=0.5, label='Argentinos')
oldArg.hist(bins=20, alpha=0.5, label='San Miguel')
plt.legend()
plt.title('Binomial')
plt.savefig('Binomial.png')

In [75]:

(oldSM > oldArg).mean()

Out[75]:

0.0089800000000000001

In [15]:

arg.mean()

Out[15]:

30.0253

In [16]:

sm.mean()

Out[16]:

19.0511

In [17]:

(sm > arg).mean()

Out[17]:

0.12916

In [19]:

(sm >= 21).mean()

Out[19]:

0.39300000000000002

In [20]:

z = 1000

In [31]:

def bootsrap(n):
    comparison = sm > arg
    results = []
    for i in range(n):
        index = random.choice(comparison.index, sm.count(), replace=True)
        results.append(comparison.ix[index].mean())
    return pd.Series(results)
    

In [32]:

bootsrapped = bootsrap(500)

In [84]:

bootsrapped.hist(bins=20, alpha=0.5)
plt.title('Bootstrapping the proability of SM winning the match')
plt.savefig('bootstrap.png')

In [34]:

bootsrapped.describe()

Out[34]:

count    500.000000
mean       0.129136
std        0.001469
min        0.124100
25%        0.128175
50%        0.129110
75%        0.130125
max        0.133680
dtype: float64

In [79]:

bootsrapped.quantile(0.025), bootsrapped.quantile(0.975)

Out[79]:

(0.12609900000000002, 0.1318)

In [82]:

12.9/.8

Out[82]:

16.125

In [39]:

def betaBinomial(alpha, beta, n, k):
    p = pd.Series(stats.beta(alpha, beta).rvs(n))
    return p.apply(lambda p: stats.binom(k, p).rvs())

In [57]:

def resultsForXTimesBigger(x=1):
    n = 10000
    sm = betaBinomial(6*x + 1, 14*x +1, n, k)
    arg = betaBinomial(10*x + 1, 10*x +1, n, k)
    return (sm > arg).mean()

In [85]:

space = np.logspace(1, 4, 10)
resultsByIncrement = pd.Series([resultsForXTimesBigger(x) for x in space], index=space)

In [94]:

resultsByIncrement.plot(label='Beta-Binomial')
resultsByIncrement.apply(lambda x: 0.008).plot(label='Binomial')
plt.yscale('log')
plt.xscale('log')
plt.xlabel('x times original data size')
plt.ylabel('probability of SM winning the match')
plt.legend()
plt.savefig('convergence.png')

In [91]:

resultsByIncrement.plot()
resultsByIncrement.apply(lambda x: 0.008).plot()

Out[91]:

<matplotlib.axes._subplots.AxesSubplot at 0x11544c390>

In [ ]: