import numpy as np
import pandas as pd
from lets_plot import *
LetsPlot.setup_html()
from scipy.stats import binom, beta
$\displaystyle \boxed{\begin{array}{l} X|\theta\sim B_n(\theta) \text{ - likelihood} \\ \theta \sim B(\alpha,\beta) \text{ - prior} \end{array} \Rightarrow \theta|(X=x) \sim B(\alpha+x,\beta+n-x) \text{ - posterior}}$
The model:
class BetaBinomial(object):
def __init__(self, prior_params=(1., 1.), dist=beta, x=np.linspace(0., 1., 100)):
self.prior_params = prior_params
self.dist = dist
self.x = x
def posterior(self, trials = None, observations = None):
a_prior, b_prior = self.prior_params
self.posterior_prob =[]
self.ci = []
for i, n in enumerate (trials):
y = observations[i]
post = self.dist(a_prior + y, b_prior + n - y)
self.posterior_prob.append(post.pdf(self.x))
self.ci.append(post.interval(0.95))
return self.posterior_prob
a_prior = 1.
b_prior = 1.
n = 50
prior_params = (a_prior, b_prior)
bb = BetaBinomial(prior_params)
true_theta = 0.25
trials = np.arange(n)
observations = np.array([binom(n=trials[i], p=true_theta).rvs(size=1)[0] for i in range(n)])
posterior = bb.posterior(trials, observations)
columns = ['x']
columns.extend(['p_' + str(i) for i in np.arange(n)])
post = columns[-1]
prior = columns[1]
data = pd.DataFrame(np.hstack((bb.x[:, np.newaxis], np.array(posterior).T)), columns=columns)
data_melt = data.melt(id_vars='x')
p = ggplot(data_melt) \
+ geom_area(aes(x='x', y='value', group='variable'),
position='identity',
size=0, alpha=0.1)
p
p += geom_vline(xintercept=true_theta, color='blue', linetype='dashed')
p += geom_text(x=true_theta, y=14, label='Actual', color='blue', hjust=1)
p += scale_y_continuous(limits=[0, 15], expand=[0, 0])
p
where_mode = bb.x[np.where(posterior[n - 1] == np.max(posterior[n - 1]))[0][0]]
p += geom_vline(xintercept=where_mode, color='red', linetype='dashed')
p += geom_text(x=where_mode, y=13, label='Posterior', color='red', hjust=1)
p
p += geom_line(aes(x='x', y=post), data=data, color='red')
p
p += geom_path(aes(x='x', y=prior), data=data, color='black')
p
p += geom_segment(x=bb.ci[n-1][0],
y=10,
xend=bb.ci[n - 1][1],
yend=10,
color='black',
size=2)
p += geom_rect(xmin=bb.ci[n-1][0],
xmax=bb.ci[n-1][1],
ymin=0,
ymax=10,
linetype='blank',
alpha=0.2)
p += geom_text(x=where_mode+0.1,
y=11,
label='Conf Int 0.95',
color='black')
p