Florent Leclercq,
Imperial Centre for Inference and Cosmology, Imperial College London,
florent.leclercq@polytechnique.org
import numpy as np
from scipy.stats import norm, binom, beta
from matplotlib import pyplot as plt
from cycler import cycler
np.random.seed(123456)
%matplotlib inline
plt.rcParams.update({'lines.linewidth': 2})
In this notebook we consider a Bernoulli experiment (a series of $N_{\mathrm{trials}}$ independent experiments, each with its own boolean-valued outcome). The target parameter is the success rate $\theta$. The likelihood for this problem is a binomial distribution and the beta distribution provides a family of conjuguate priors for Bayesian inference.
groundtruth = 0.38
Ntrials = 100
Nsuccesses = binom(Ntrials, groundtruth).rvs()
Nsuccesses
33
If the likelihood is a binomial distribution with $N_{\mathrm{successes}}$ for $N_{\mathrm{trials}}$, and if the prior is a beta distribution with parameters ($\alpha$, $\beta$), then the posterior is a beta distribution with parameters \begin{equation} \alpha' = \alpha+N_{\mathrm{successes}}, \quad \beta' = \beta + N_{\mathrm{trials}} - N_{\mathrm{successes}} \end{equation} (see for example https://en.wikipedia.org/wiki/Conjugate_prior#When_likelihood_function_is_a_discrete_distribution).
def target_pdf(theta,Ntrials,Nsuccesses,lh,prior):
if theta < 0 or theta > 1:
return 0
else:
return lh(Ntrials,theta).pmf(Nsuccesses)*prior.pdf(theta)
a = 10
b = 10
lh = binom
prior = beta(a,b)
posterior = beta(a+Nsuccesses, b+Ntrials-Nsuccesses)
thetas = np.linspace(0, 1, 200)
plt.figure(figsize=(10,6))
plt.xlim([0,1])
plt.ylim([0,1.05])
plt.xlabel("$\\theta$")
plt.plot(thetas, prior.pdf(thetas)/prior.pdf(thetas).max(), label="prior")
plt.plot(thetas, lh(Ntrials,thetas).pmf(Nsuccesses)/lh(Ntrials,thetas).pmf(Nsuccesses).max(), label="likelihood")
plt.plot(thetas, posterior.pdf(thetas)/posterior.pdf(thetas).max(), label="posterior")
plt.plot([groundtruth,groundtruth],[0,1.05],color='black',linestyle='--',label="groundtruth")
plt.title("Analytic solution")
plt.legend(loc='best')
plt.show()
def proposal_pdf(sigma):
return norm(0,sigma)
def MH_sampler(Ntries,theta_start,Ntrials,Nsuccesses,lh,prior,proposal_sigma):
Naccepted=0
samples=np.zeros(Ntries+1)
samples[0]=theta_start
theta=theta_start
for i in range(Ntries):
theta_p = theta + proposal_pdf(proposal_sigma).rvs()
# the Gaussian proposal pdf satisfies the detailed balance equation, so the
# acceptance ratio simplifies to the Metropolis ratio
a = min(1, target_pdf(theta_p,Ntrials,Nsuccesses,lh,prior)/target_pdf(theta,Ntrials,Nsuccesses,lh,prior))
u = np.random.uniform()
if u < a:
Naccepted+=1
theta=theta_p
samples[i+1] = theta
return Naccepted, samples
Ntries1=10000
Nburnin=1000
proposal_sigma=0.1
theta_start=0.6
Naccepted, samples = MH_sampler(Ntries1,theta_start,Ntrials,Nsuccesses,lh,prior,proposal_sigma)
fraction_accepted=float(Naccepted)/Ntries1
fig, ax = plt.subplots(figsize=(10,6))
ax.set_xlim(0,Ntries1)
ax.set_xlabel("sample number")
ax.set_title("Trace plot")
ax.set_ylabel("$\\theta$")
ax.plot(np.arange(Ntries1+1),samples,marker='.',color='C4')
ymin, ymax = ax.get_ylim()
ax.set_ylim([ymin,ymax])
ax.plot([Nburnin,Nburnin],[ymin,ymax],color='black',linestyle=':')
plt.show()
fraction_accepted
0.4524
burnedin_samples=samples[Nburnin:]
plt.figure(figsize=(10,6))
plt.xlim([0,1])
plt.xlabel("$\\theta$")
plt.plot(thetas, prior.pdf(thetas), label="prior")
plt.hist(burnedin_samples, 40, histtype='step', density=True, color='C2', linewidth=2, label="MH posterior")
plt.plot(thetas, posterior.pdf(thetas), color='C2', linestyle='--', label="analytic posterior")
plt.title("Metropolis-Hastings sampling, acceptance ratio={:.3f}".format(fraction_accepted))
plt.legend(loc='best')
plt.show()
Ntries2=1000
theta_start=0.6
# Suitable step size
proposal_sigma_1=0.1
Naccepted_1, samples_1 = MH_sampler(Ntries2,theta_start,Ntrials,Nsuccesses,lh,prior,proposal_sigma_1)
fraction_accepted_1=float(Naccepted_1)/Ntries2
# Step size too large
proposal_sigma_2=4
Naccepted_2, samples_2 = MH_sampler(Ntries2,theta_start,Ntrials,Nsuccesses,lh,prior,proposal_sigma_2)
fraction_accepted_2=float(Naccepted_2)/Ntries2
# Step size too small
proposal_sigma_3=0.003
Naccepted_3, samples_3 = MH_sampler(Ntries2,theta_start,Ntrials,Nsuccesses,lh,prior,proposal_sigma_3)
fraction_accepted_3=float(Naccepted_3)/Ntries2
f, (ax1, ax2, ax3) = plt.subplots(1, 3, sharey=True, figsize=(15,5))
ax1.set_xlim(0,Ntries2)
ax1.set_xlabel("sample number")
ax1.set_ylabel("$\\theta$")
ax1.plot(np.arange(Ntries2+1),samples_1,marker='.',color='C4')
ax1.set_title("sigma={}, acceptance ratio={:.3f}".format(proposal_sigma_1,fraction_accepted_1))
ax2.set_xlim(0,Ntries2)
ax2.set_xlabel("sample number")
ax2.plot(np.arange(Ntries2+1),samples_2,marker='.',color='C4')
ax2.set_title("sigma={}, acceptance ratio={:.3f}".format(proposal_sigma_2,fraction_accepted_2))
ax3.set_xlim(0,Ntries2)
ax3.set_xlabel("sample number")
ax3.plot(np.arange(Ntries2+1),samples_3,marker='.',color='C4')
ax3.set_title("sigma={}, acceptance ratio={:.3f}".format(proposal_sigma_3,fraction_accepted_3))
ymin, ymax = ax1.get_ylim()
ax1.text(200,ymax-(ymax-ymin)/10., "adequate step size",fontsize=14)
ax2.text(200,ymax-(ymax-ymin)/10., "step size too large",fontsize=14)
ax3.text(200,ymax-(ymax-ymin)/10., "step size too small",fontsize=14)
f.subplots_adjust(wspace=0.1)
plt.show()
Ntries3=100
proposal_sigma=0.05
Nchains=5
# Run Nchains different chains starting at different positions in parameter space
chains = [MH_sampler(Ntries3,theta_start,Ntrials,Nsuccesses,lh,prior,proposal_sigma)
for theta_start in np.linspace(0.1,0.9,Nchains)]
fig, ax = plt.subplots(figsize=(10,6))
ax.set_xlim(0,Ntries3)
ax.set_xlabel("sample number")
ax.set_ylabel("$\\theta$")
ax.set_prop_cycle(cycler('color', [plt.cm.Set2(i) for i in np.linspace(0, 1., 8)]))
for samples in chains:
ax.plot(np.arange(Ntries3+1),samples[1],marker='.')
plt.title("Trace plot for different chains")
plt.show()
Gelman et al., "Bayesian Data Analysis" (third edition), p. 284-285
Parameters:
Definitions:
Estimators: Estimators of the marginal posterior variance of the estimand:
Test:
psi_j = np.array([np.mean(chains[j][1]) for j in range(Nchains)])
psi = np.mean(psi_j)
B = float(Nchains)/(Ntries3-1)*np.sum((psi_j-psi*np.ones(Nchains))**2)
B
0.00040276881731590047
s_j = np.array([1./(Ntries3-1)*np.sum((chains[j][1]-psi_j[j])**2) for j in range(Nchains)])
W = np.mean(s_j)
W
0.01067472484604469
var_minus=W
var_plus=float(Ntries3)/(Ntries3-1)*W+1./Ntries3*B
var_minus, var_plus
(0.01067472484604469, 0.01078657803771325)
R=np.sqrt(var_plus/var_minus)
R
1.0052255074490637