import numpy as np
import scipy.stats as st
thetas = st.beta(8,2).rvs(15)
total = np.array([3, 5, 10, 15, 20, 100, 200, 300, 500, 1000, 1200, 1300, 1500, 2000, 3000])
data = np.random.binomial(n=total, p=thetas)
data[0]/3
1.0
thetas
array([ 0.90234666, 0.77770652, 0.52304307, 0.73381546, 0.72889516, 0.74627051, 0.57213476, 0.89690436, 0.92879218, 0.93841433, 0.65561338, 0.61210462, 0.80802887, 0.69834349, 0.77299452])
import pymc3 as pm
import scipy.optimize as opt
with pm.Model() as model:
mu = pm.Beta('mu', alpha=1, beta=1)
sd = pm.HalfCauchy('sd', 1)
p = pm.Beta('p', mu=mu, sd=sd, shape=15)
like = pm.Binomial('like', n=total, p=p, observed=data)
start = pm.find_MAP(fmin=opt.fmin_l_bfgs_b)
step = pm.NUTS(scaling=start)
tr = pm.sample(50000, step, start=start)
[-----------------100%-----------------] 50000 of 50000 complete in 222.4 sec
pm.forestplot(tr[-10000:], vars=['p'])
<matplotlib.gridspec.GridSpec at 0x1186d8400>
list(zip(tr['p'][-30000:].mean(axis=0), thetas, data, total))
[(0.55961789598272638, 0.9048688449760327, 2, 3), (0.59743442511005618, 0.83703358495407454, 5, 5), (0.53587298845288611, 0.80082260744167066, 6, 10), (0.68179456178057607, 0.8943780067546685, 15, 15), (0.59995848759635884, 0.68379053483514951, 14, 20), (0.66827784888577924, 0.69657560428601351, 72, 100), (0.69001276048125215, 0.66860369029641487, 143, 200), (0.65467404552646546, 0.64741839146731395, 201, 300), (0.76976130691899136, 0.77045778535120635, 393, 500), (0.80446415637545554, 0.80695101187645635, 813, 1000), (0.95655570925941291, 0.96521909913351922, 1163, 1200), (0.82009969956183992, 0.82442652301704245, 1075, 1300), (0.46184460144948092, 0.48128043566213058, 690, 1500), (0.48730549488449937, 0.48482839460458582, 973, 2000), (0.87786523415434448, 0.86963912480896821, 2646, 3000)]
import pymc3 as pm
import scipy.optimize as opt
with pm.Model() as model2:
alpha = pm.Gamma('alpha', alpha=1, beta=0.01)
beta = pm.Gamma('beta', alpha=1, beta=0.01)
p = pm.Beta('p', alpha, beta, shape=15)
like = pm.Binomial('like', n=total, p=p, observed=data)
start = pm.find_MAP(fmin=opt.fmin_l_bfgs_b)
step = pm.NUTS(scaling=start)
tr2 = pm.sample(20000, step, start=start)
[-----------------100%-----------------] 20000 of 20000 complete in 65.9 sec
list(zip(tr2['p'].mean(axis=0), thetas, data, total))
[(0.5995903822003994, 0.95732362362529921, 3, 3), (0.61665212443105855, 0.91781376687347549, 5, 5), (0.59482954583217384, 0.55017073059165544, 7, 10), (0.63274929156549808, 0.76285667729292361, 12, 15), (0.68015078734715018, 0.90380923113960754, 18, 20), (0.79972788800301109, 0.92437581366448396, 89, 100), (0.86659334378600883, 0.90748605495980261, 185, 200), (0.6758747745749355, 0.70974350810077791, 207, 300), (0.54404561663565509, 0.5555296239391142, 271, 500), (0.65543727956274489, 0.62922569658005001, 659, 1000), (0.90654867058680899, 0.90583557098755352, 1101, 1200), (0.81327883126700495, 0.83853767882007768, 1067, 1300), (0.8857385857915826, 0.90719813894152856, 1341, 1500), (0.79257519670653165, 0.79097531560881429, 1594, 2000), (0.94311971519281379, 0.9500718906290827, 2844, 3000)]
import pymc
def beta_binomial():
alpha = pymc.Gamma('alpha', alpha=1, beta=0.01)
beta = pymc.Gamma('beta', alpha=1, beta=0.01)
p = pymc.Beta('p',alpha, beta, value=[0.5]*15)
like = pymc.Binomial('like', n=total, p=p, observed=True, value=data)
return locals()
M = pymc.MCMC(beta_binomial())
M.sample(50000, 40000)
M.sample(50000, 40000)
[-----------------100%-----------------] 50000 of 50000 complete in 6.1 sec
pymc.Matplot.summary_plot(M.p)