%pylab inline
Populating the interactive namespace from numpy and matplotlib
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
n = 50000
k = 60
pSM = pd.Series(stats.beta(6+1, 14+1).rvs(n))
sm = pSM.apply(lambda p: stats.binom(k, p).rvs())
sm.name = 'San Miguel'
sm.hist(bins=20)
<matplotlib.axes._subplots.AxesSubplot at 0x1148b3a90>
pArg = pd.Series(stats.beta(10+1, 10+1).rvs(n))
arg = pArg.apply(lambda p: stats.binom(k, p).rvs())
arg.name = 'Argentinos'
arg.hist(bins=20)
<matplotlib.axes._subplots.AxesSubplot at 0x114e7a150>
sm.hist(bins=20, alpha=0.5, label='Argentinos')
arg.hist(bins=20, alpha=0.5, label='San Miguel')
plt.legend()
plt.title('Beta-Binomial')
plt.savefig('resultsBetBinomial.png')
oldSM = pd.Series(stats.binom(60, 6.0/20).rvs(n))
oldArg = pd.Series(stats.binom(60, 10.0/20).rvs(n))
oldSM.hist(bins=20, alpha=0.5, label='Argentinos')
oldArg.hist(bins=20, alpha=0.5, label='San Miguel')
plt.legend()
plt.title('Binomial')
plt.savefig('Binomial.png')
(oldSM > oldArg).mean()
0.0089800000000000001
arg.mean()
30.0253
sm.mean()
19.0511
(sm > arg).mean()
0.12916
(sm >= 21).mean()
0.39300000000000002
z = 1000
def bootsrap(n):
comparison = sm > arg
results = []
for i in range(n):
index = random.choice(comparison.index, sm.count(), replace=True)
results.append(comparison.ix[index].mean())
return pd.Series(results)
bootsrapped = bootsrap(500)
bootsrapped.hist(bins=20, alpha=0.5)
plt.title('Bootstrapping the proability of SM winning the match')
plt.savefig('bootstrap.png')
bootsrapped.describe()
count 500.000000 mean 0.129136 std 0.001469 min 0.124100 25% 0.128175 50% 0.129110 75% 0.130125 max 0.133680 dtype: float64
bootsrapped.quantile(0.025), bootsrapped.quantile(0.975)
(0.12609900000000002, 0.1318)
12.9/.8
16.125
def betaBinomial(alpha, beta, n, k):
p = pd.Series(stats.beta(alpha, beta).rvs(n))
return p.apply(lambda p: stats.binom(k, p).rvs())
def resultsForXTimesBigger(x=1):
n = 10000
sm = betaBinomial(6*x + 1, 14*x +1, n, k)
arg = betaBinomial(10*x + 1, 10*x +1, n, k)
return (sm > arg).mean()
space = np.logspace(1, 4, 10)
resultsByIncrement = pd.Series([resultsForXTimesBigger(x) for x in space], index=space)
resultsByIncrement.plot(label='Beta-Binomial')
resultsByIncrement.apply(lambda x: 0.008).plot(label='Binomial')
plt.yscale('log')
plt.xscale('log')
plt.xlabel('x times original data size')
plt.ylabel('probability of SM winning the match')
plt.legend()
plt.savefig('convergence.png')
resultsByIncrement.plot()
resultsByIncrement.apply(lambda x: 0.008).plot()
<matplotlib.axes._subplots.AxesSubplot at 0x11544c390>