In [51]:
import numpy as np
import scipy.stats as st
thetas = st.beta(8,2).rvs(15)
total = np.array([3, 5, 10, 15, 20, 100, 200, 300, 500, 1000, 1200, 1300, 1500, 2000, 3000])
data = np.random.binomial(n=total, p=thetas)
In [52]:
data[0]/3
Out[52]:
1.0
In [53]:
thetas
Out[53]:
array([ 0.90234666,  0.77770652,  0.52304307,  0.73381546,  0.72889516,
        0.74627051,  0.57213476,  0.89690436,  0.92879218,  0.93841433,
        0.65561338,  0.61210462,  0.80802887,  0.69834349,  0.77299452])
In [69]:
import pymc3 as pm
import scipy.optimize as opt
with pm.Model() as model:
    mu = pm.Beta('mu', alpha=1, beta=1)
    sd = pm.HalfCauchy('sd', 1)
    p = pm.Beta('p', mu=mu, sd=sd, shape=15)
    like = pm.Binomial('like', n=total, p=p, observed=data)

    start = pm.find_MAP(fmin=opt.fmin_l_bfgs_b)
    step = pm.NUTS(scaling=start)
    tr = pm.sample(50000, step, start=start)
 [-----------------100%-----------------] 50000 of 50000 complete in 222.4 sec
In [70]:
pm.forestplot(tr[-10000:], vars=['p'])
Out[70]:
<matplotlib.gridspec.GridSpec at 0x1186d8400>
In [34]:
list(zip(tr['p'][-30000:].mean(axis=0), thetas, data, total))
Out[34]:
[(0.55961789598272638, 0.9048688449760327, 2, 3),
 (0.59743442511005618, 0.83703358495407454, 5, 5),
 (0.53587298845288611, 0.80082260744167066, 6, 10),
 (0.68179456178057607, 0.8943780067546685, 15, 15),
 (0.59995848759635884, 0.68379053483514951, 14, 20),
 (0.66827784888577924, 0.69657560428601351, 72, 100),
 (0.69001276048125215, 0.66860369029641487, 143, 200),
 (0.65467404552646546, 0.64741839146731395, 201, 300),
 (0.76976130691899136, 0.77045778535120635, 393, 500),
 (0.80446415637545554, 0.80695101187645635, 813, 1000),
 (0.95655570925941291, 0.96521909913351922, 1163, 1200),
 (0.82009969956183992, 0.82442652301704245, 1075, 1300),
 (0.46184460144948092, 0.48128043566213058, 690, 1500),
 (0.48730549488449937, 0.48482839460458582, 973, 2000),
 (0.87786523415434448, 0.86963912480896821, 2646, 3000)]
In [16]:
import pymc3 as pm
import scipy.optimize as opt
with pm.Model() as model2:
    alpha = pm.Gamma('alpha', alpha=1, beta=0.01)
    beta = pm.Gamma('beta', alpha=1, beta=0.01)
    p = pm.Beta('p', alpha, beta, shape=15)
    like = pm.Binomial('like', n=total, p=p, observed=data)

    start = pm.find_MAP(fmin=opt.fmin_l_bfgs_b)
    step = pm.NUTS(scaling=start)
    tr2 = pm.sample(20000, step, start=start)
 [-----------------100%-----------------] 20000 of 20000 complete in 65.9 sec
In [17]:
list(zip(tr2['p'].mean(axis=0), thetas, data, total))
Out[17]:
[(0.5995903822003994, 0.95732362362529921, 3, 3),
 (0.61665212443105855, 0.91781376687347549, 5, 5),
 (0.59482954583217384, 0.55017073059165544, 7, 10),
 (0.63274929156549808, 0.76285667729292361, 12, 15),
 (0.68015078734715018, 0.90380923113960754, 18, 20),
 (0.79972788800301109, 0.92437581366448396, 89, 100),
 (0.86659334378600883, 0.90748605495980261, 185, 200),
 (0.6758747745749355, 0.70974350810077791, 207, 300),
 (0.54404561663565509, 0.5555296239391142, 271, 500),
 (0.65543727956274489, 0.62922569658005001, 659, 1000),
 (0.90654867058680899, 0.90583557098755352, 1101, 1200),
 (0.81327883126700495, 0.83853767882007768, 1067, 1300),
 (0.8857385857915826, 0.90719813894152856, 1341, 1500),
 (0.79257519670653165, 0.79097531560881429, 1594, 2000),
 (0.94311971519281379, 0.9500718906290827, 2844, 3000)]
In [54]:
import pymc

def beta_binomial():
    
    alpha = pymc.Gamma('alpha', alpha=1, beta=0.01)
    beta = pymc.Gamma('beta', alpha=1, beta=0.01)
    p = pymc.Beta('p',alpha, beta, value=[0.5]*15)
    like = pymc.Binomial('like', n=total, p=p, observed=True, value=data)
    
    return locals()
In [60]:
M = pymc.MCMC(beta_binomial())
In [61]:
M.sample(50000, 40000)
M.sample(50000, 40000)
 [-----------------100%-----------------] 50000 of 50000 complete in 6.1 sec
In [62]:
pymc.Matplot.summary_plot(M.p)