Answer all questions and submit them either as an IPython notebook, LaTeX document, or Markdown document. Each question is worth 25 points.
%matplotlib inline
import numpy as np
import pymc as pm
import pylab as plt
import pandas as pd
# Set seed
np.random.seed(10011)
The goal of this problem is to investigate the role of the proposal distribution in a Metropolis-Hastings algorithm designed to simulate from the posterior distribution of the mixture parameter $\delta$.
In part (1), you are asked to simulate data from a distribution with $\delta$ known. For parts (2)–(4), assume $\delta$ is unknown with prior $\delta \sim Unif( 0,1)$. For parts (2)–(4), provide an appropriate plot and a table summarizing the output of the algorithm. To facilitate comparisons, use the same number of iterations, random seed, starting values, and burn-in period for all implementations of the algorithm.
Part 1
# Sample data
n = 200
delta = 0.7
y = np.where(np.random.random(n) < delta, np.random.normal(7, 0.5, n), np.random.normal(10, 0.5, n))
plt.hist(y)
(array([ 14., 59., 46., 17., 0., 5., 12., 31., 13., 3.]), array([ 5.82439521, 6.38620264, 6.94801007, 7.5098175 , 8.07162492, 8.63343235, 9.19523978, 9.75704721, 10.31885463, 10.88066206, 11.44246949]), <a list of 10 Patch objects>)
Part 2
np.mean(y), np.std(y)
(7.9231652131648911, 1.5150267222378637)
n_samples = 100
epsilons = [0.05, 0.01]
# List for holding accepted samples
samples = []
while len(samples) < n_samples:
# Simulate from prior
d = np.random.random()
# Simulate data
y_sim = np.where(np.random.random(n) < d, np.random.normal(7, 0.5, n), np.random.normal(10, 0.5, n))
if (np.abs(np.mean(y_sim) - np.mean(y)) < epsilons[0]) & (np.abs(np.std(y_sim) - np.std(y)) < epsilons[1]):
samples.append(d)
plt.hist(samples)
(array([ 1., 4., 8., 13., 23., 16., 19., 10., 4., 2.]), array([ 0.57736545, 0.59805126, 0.61873708, 0.63942289, 0.66010871, 0.68079452, 0.70148034, 0.72216616, 0.74285197, 0.76353779, 0.7842236 ]), <a list of 10 Patch objects>)
Part 3
plt.hist(y)
(array([ 14., 59., 46., 17., 0., 5., 12., 31., 13., 3.]), array([ 5.82439521, 6.38620264, 6.94801007, 7.5098175 , 8.07162492, 8.63343235, 9.19523978, 9.75704721, 10.31885463, 10.88066206, 11.44246949]), <a list of 10 Patch objects>)
There are several ways of solving this; here is the easiest (though not the most general)
from scipy.stats import norm
dnorm = norm.pdf
def calc_posterior(d):
return np.log(d*dnorm(y, 7, 0.5) + (1-d)*dnorm(y, 10, 0.5)).sum()
n_iterations = 10000
# Initialize trace for parameters
trace = np.empty(n_iterations+1)
# Set initial values
trace[0] = 0.5
# Calculate joint posterior for initial values
current_log_prob = calc_posterior(trace[0])
# Initialize acceptance counts
accepted = 0
for i in range(n_iterations):
if not i%1000: print('Iteration %i' % i)
# Grab current parameter values
d_current = trace[i]
# Propose new value for d
d_prop = d_current + np.random.uniform(-1, 1)
while (d_prop<0) or (d_prop>1):
d_prop = d_current + np.random.uniform(-1, 1)
# Calculate log posterior with proposed value
proposed_log_prob = calc_posterior(d_prop)
# Log-acceptance rate
alpha = proposed_log_prob - current_log_prob
# Test proposed value
if np.log(np.random.random()) < alpha:
# Accept
trace[i+1] = d_prop
current_log_prob = proposed_log_prob
accepted += 1
else:
# Reject
trace[i+1] = trace[i]
Iteration 0 Iteration 1000 Iteration 2000 Iteration 3000 Iteration 4000 Iteration 5000 Iteration 6000 Iteration 7000 Iteration 8000 Iteration 9000
plt.hist(trace)
(array([ 3.00000000e+00, 0.00000000e+00, 2.50000000e+01, 1.53000000e+02, 7.48000000e+02, 2.51400000e+03, 3.01300000e+03, 2.70600000e+03, 7.11000000e+02, 1.28000000e+02]), array([ 0.5 , 0.52758049, 0.55516098, 0.58274147, 0.61032196, 0.63790245, 0.66548293, 0.69306342, 0.72064391, 0.7482244 , 0.77580489]), <a list of 10 Patch objects>)
logit = lambda p: np.log(p/(1-p))
invlogit = lambda x: 1./(1 + np.exp(-x))
n_iterations = 10000
# Initialize trace for parameters
trace = np.empty(n_iterations+1)
# Set initial values
trace[0] = 0.5
# Calculate joint posterior for initial values
current_log_prob = calc_posterior(trace[0])
# Initialize acceptance counts
accepted = 0
for i in range(n_iterations):
if not i%1000: print('Iteration %i' % i)
# Grab current parameter values
d_current = trace[i]
# Propose new value for d
d_prop = invlogit(logit(d_current) + np.random.uniform(-1, 1))
# Calculate log posterior with proposed value
proposed_log_prob = calc_posterior(d_prop)
# Log-acceptance rate
alpha = proposed_log_prob - current_log_prob
# Test proposed value
if np.log(np.random.random()) < alpha:
# Accept
trace[i+1] = d_prop
current_log_prob = proposed_log_prob
accepted += 1
else:
# Reject
trace[i+1] = trace[i]
Iteration 0 Iteration 1000 Iteration 2000 Iteration 3000 Iteration 4000 Iteration 5000 Iteration 6000 Iteration 7000 Iteration 8000 Iteration 9000
plt.plot(trace)
[<matplotlib.lines.Line2D at 0x10c510f28>]
Carlin (1992) considers a Bayesian approach to meta-analysis, and includes the following examples of 22 trials of beta-blockers to prevent mortality after myocardial infarction. These data are given below.
In one possible random effects model we assume the true baseline mean (on a log-odds scale) $m_i$ in a trial $i$ is drawn from some population distribution. Let $r^C_i$ denote number of events in the control group in trial $i$, and $r^T_i$ denote events under active treatment in trial $i$. Our model is:
$$\begin{aligned} r^C_i &\sim \text{Binomial}\left(p^C_i, n^C_i\right) \\ r^T_i &\sim \text{Binomial}\left(p^T_i, n^T_i\right) \\ \text{logit}\left(p^C_i\right) &= \mu_i \\ \text{logit}\left(p^T_i\right) &= \mu_i + \delta \\ \mu_i &\sim \text{Normal}(m, s). \end{aligned}$$In this case, we want to make inferences about the population effect $m$, and the predictive distribution for the effect $\delta_{\text{new}}$ in a new trial.
This particular model uses a random effect for the population mean, and a fixed effect for the treatment effect. There are 3 other models you could fit to represent all possible combinations of fixed or random effects for these two parameters.
Build all 4 models to estimate the treatment effect in PyMC and
Which model would you select and why?
r_t_obs = [3, 7, 5, 102, 28, 4, 98, 60, 25, 138, 64, 45, 9, 57, 25, 33, 28, 8, 6, 32, 27, 22]
n_t_obs = [38, 114, 69, 1533, 355, 59, 945, 632, 278,1916, 873, 263, 291, 858, 154, 207, 251, 151, 174, 209, 391, 680]
r_c_obs = [3, 14, 11, 127, 27, 6, 152, 48, 37, 188, 52, 47, 16, 45, 31, 38, 12, 6, 3, 40, 43, 39]
n_c_obs = [39, 116, 93, 1520, 365, 52, 939, 471, 282, 1921, 583, 266, 293, 883, 147, 213, 122, 154, 134, 218, 364, 674]
N = len(n_c_obs)
def fixed():
delta = pm.Normal('delta', 0, 1e-5, value=0)
mu = pm.Normal('mu', 0, 1e-5, value=1)
theta_t = pm.Lambda('theta_t', lambda mu=mu, d=delta: pm.invlogit(mu + d)*np.ones(N))
theta_c = pm.Lambda('theta_c', lambda mu=mu: pm.invlogit(mu)*np.ones(N))
treat = pm.Binomial('treat', n_t_obs, theta_t, value=r_t_obs, observed=True)
control = pm.Binomial('control', n_c_obs, theta_c, value=r_c_obs, observed=True)
treat_pred = pm.Binomial('treat_pred', n_t_obs, theta_t, value=r_t_obs)
control_pred = pm.Binomial('control_pred', n_c_obs, theta_c, value=r_c_obs)
return locals()
M_fixed = pm.MCMC(fixed())
M_fixed.sample(10000, 5000)
M_fixed.sample(10000, 5000)
[-----------------100%-----------------] 10000 of 10000 complete in 4.7 sec
pm.Matplot.summary_plot([M_fixed.delta, M_fixed.mu])
def random_mean():
delta = pm.Normal('delta', 0, 1e-5, value=0)
m_mu = pm.Normal('m_mu', 0, 1e-5, value=0)
tau_mu = pm.Gamma('tau_m', 1, 0.01, value=1)
mu = pm.Normal('mu', m_mu, tau_mu, value=[1]*N)
theta_t = pm.Lambda('theta_t', lambda mu=mu, d=delta: pm.invlogit(mu + d))
theta_c = pm.Lambda('theta_c', lambda mu=mu: pm.invlogit(mu))
treat = pm.Binomial('treat', n_t_obs, theta_t, value=r_t_obs, observed=True)
control = pm.Binomial('control', n_c_obs, theta_c, value=r_c_obs, observed=True)
treat_pred = pm.Binomial('treat_pred', n_t_obs, theta_t, value=r_t_obs)
control_pred = pm.Binomial('control_pred', n_c_obs, theta_c, value=r_c_obs)
return locals()
M_random_mean = pm.MCMC(random_mean())
M_random_mean.sample(10000, 5000)
M_random_mean.sample(10000, 5000)
[-----------------100%-----------------] 10000 of 10000 complete in 6.2 sec
pm.Matplot.summary_plot([M_random_mean.delta, M_random_mean.m_mu, M_random_mean.tau_mu, M_random_mean.mu])
def random_treat():
mu = pm.Normal('mu', 0, 1e-5, value=1)
m_delta = pm.Normal('m_delta', 0, 1e-5, value=0.5)
tau_delta = pm.Gamma('tau_delta', 1, 0.01, value=1)
delta = pm.Normal('delta', m_delta, tau_delta, size=N)
theta_t = pm.Lambda('theta_t', lambda mu=mu, d=delta: pm.invlogit(mu + d))
theta_c = pm.Lambda('theta_c', lambda mu=mu: pm.invlogit(mu)*np.ones(N))
treat = pm.Binomial('treat', n_t_obs, theta_t, value=r_t_obs, observed=True)
control = pm.Binomial('control', n_c_obs, theta_c, value=r_c_obs, observed=True)
treat_pred = pm.Binomial('treat_pred', n_t_obs, theta_t, value=r_t_obs)
control_pred = pm.Binomial('control_pred', n_c_obs, theta_c, value=r_c_obs)
return locals()
M_random_treat = pm.MCMC(random_treat())
M_random_treat.sample(10000, 5000)
M_random_treat.sample(10000, 5000)
[-----------------100%-----------------] 10000 of 10000 complete in 6.8 sec
pm.Matplot.summary_plot([M_random_treat.m_delta, M_random_treat.delta, M_random_treat.tau_delta, M_random_treat.mu])
def random():
m_mu = pm.Normal('m_mu', 0, 1e-5, value=0)
tau_mu = pm.Gamma('tau_m', 1, 0.01, value=1)
mu = pm.Normal('mu', m_mu, tau_mu, value=[0]*N)
m_delta = pm.Normal('m_delta', 0, 1e-5, value=0)
tau_delta = pm.Gamma('tau_delta', 1, 0.01, value=1)
delta = pm.Normal('delta', m_delta, tau_delta, value=[0]*N)
theta_t = pm.Lambda('theta_t', lambda mu=mu, d=delta: pm.invlogit(mu + d))
theta_c = pm.Lambda('theta_c', lambda mu=mu, d=delta: pm.invlogit(mu))
treat = pm.Binomial('treat', n_t_obs, theta_t, value=r_t_obs, observed=True)
control = pm.Binomial('control', n_c_obs, theta_c, value=r_c_obs, observed=True)
treat_pred = pm.Binomial('treat_pred', n_t_obs, theta_t, value=r_t_obs)
control_pred = pm.Binomial('control_pred', n_c_obs, theta_c, value=r_c_obs)
return locals()
M_random = pm.MCMC(random())
M_random.sample(10000, 5000)
M_random.sample(10000, 5000)
[-----------------100%-----------------] 10000 of 10000 complete in 8.7 sec
pm.Matplot.summary_plot([M_random.m_delta, M_random.delta, M_random.tau_delta, M_random_treat.mu, M_random_mean.m_mu, M_random_mean.tau_mu])
DIC
dic = np.array([M_fixed.dic, M_random_mean.dic, M_random_treat.dic, M_random.dic])
dic
array([ 527.29959017, 281.75127105, 439.03270863, 285.24870453])
delta_dic = dic - dic.min()
np.round(np.exp(-0.5*delta_dic) / np.exp(-0.5*delta_dic).sum(), 3)
array([ 0. , 0.852, 0. , 0.148])
Hence, 85% of model weight is on the random mean model, while 15% is on the full random model.