#!/usr/bin/env python # coding: utf-8 # Answer all questions and submit them either as an IPython notebook, LaTeX document, or Markdown document. Each question is worth 25 points. # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') import numpy as np import pymc as pm import pylab as plt import pandas as pd # Set seed np.random.seed(10011) # ## Question 1 # # The goal of this problem is to investigate the role of the proposal distribution in a Metropolis-Hastings algorithm designed to simulate from the posterior distribution of the mixture parameter $\delta$. # # 1. Simulate 200 realizations from the mixture distribution: # $$y_i \sim \delta N(7, 0.5^2) + (1-\delta) N(10, 0.5^2)$$ # with $\delta = 0.7$. Plot a histogram of these data. # 2. Implement an ABC procedure to simulate from the posterior distribution of $\delta$, using your data from part (1). # 3. Implement a random walk M-H algorithm with proposal $\delta^{\prime} = \delta^{(i)} + \epsilon$ with $\epsilon \sim Unif(−1,1)$. # 4. Reparameterize the problem letting $U = \log\left[\frac{\delta}{1 - \delta}\right]$ and $u^{\prime} = u^{(i)} + \epsilon$. Implement a random walk chain in U-space. # 5. Compare the estimates and convergence behavior of the three algorithms. # # In part (1), you are asked to simulate data from a distribution with $\delta$ known. For parts (2)–(4), assume $\delta$ is unknown with prior $\delta \sim Unif( 0,1)$. For parts (2)–(4), provide an appropriate plot and a table summarizing the output of the algorithm. To facilitate comparisons, use the same number of iterations, random seed, starting values, and burn-in period for all implementations of the algorithm. # **Part 1** # In[2]: # Sample data n = 200 delta = 0.7 y = np.where(np.random.random(n) < delta, np.random.normal(7, 0.5, n), np.random.normal(10, 0.5, n)) # In[3]: plt.hist(y) # **Part 2** # In[4]: np.mean(y), np.std(y) # In[5]: n_samples = 100 epsilons = [0.05, 0.01] # List for holding accepted samples samples = [] while len(samples) < n_samples: # Simulate from prior d = np.random.random() # Simulate data y_sim = np.where(np.random.random(n) < d, np.random.normal(7, 0.5, n), np.random.normal(10, 0.5, n)) if (np.abs(np.mean(y_sim) - np.mean(y)) < epsilons[0]) & (np.abs(np.std(y_sim) - np.std(y)) < epsilons[1]): samples.append(d) # In[6]: plt.hist(samples) # **Part 3** # In[7]: plt.hist(y) # There are several ways of solving this; here is the easiest (though not the most general) # In[8]: from scipy.stats import norm dnorm = norm.pdf def calc_posterior(d): return np.log(d*dnorm(y, 7, 0.5) + (1-d)*dnorm(y, 10, 0.5)).sum() # In[9]: n_iterations = 10000 # Initialize trace for parameters trace = np.empty(n_iterations+1) # Set initial values trace[0] = 0.5 # Calculate joint posterior for initial values current_log_prob = calc_posterior(trace[0]) # Initialize acceptance counts accepted = 0 for i in range(n_iterations): if not i%1000: print('Iteration %i' % i) # Grab current parameter values d_current = trace[i] # Propose new value for d d_prop = d_current + np.random.uniform(-1, 1) while (d_prop<0) or (d_prop>1): d_prop = d_current + np.random.uniform(-1, 1) # Calculate log posterior with proposed value proposed_log_prob = calc_posterior(d_prop) # Log-acceptance rate alpha = proposed_log_prob - current_log_prob # Test proposed value if np.log(np.random.random()) < alpha: # Accept trace[i+1] = d_prop current_log_prob = proposed_log_prob accepted += 1 else: # Reject trace[i+1] = trace[i] # In[10]: plt.hist(trace) # In[11]: logit = lambda p: np.log(p/(1-p)) invlogit = lambda x: 1./(1 + np.exp(-x)) n_iterations = 10000 # Initialize trace for parameters trace = np.empty(n_iterations+1) # Set initial values trace[0] = 0.5 # Calculate joint posterior for initial values current_log_prob = calc_posterior(trace[0]) # Initialize acceptance counts accepted = 0 for i in range(n_iterations): if not i%1000: print('Iteration %i' % i) # Grab current parameter values d_current = trace[i] # Propose new value for d d_prop = invlogit(logit(d_current) + np.random.uniform(-1, 1)) # Calculate log posterior with proposed value proposed_log_prob = calc_posterior(d_prop) # Log-acceptance rate alpha = proposed_log_prob - current_log_prob # Test proposed value if np.log(np.random.random()) < alpha: # Accept trace[i+1] = d_prop current_log_prob = proposed_log_prob accepted += 1 else: # Reject trace[i+1] = trace[i] # In[12]: plt.plot(trace) # ## Question 2 # # Carlin (1992) considers a Bayesian approach to meta-analysis, and includes the following examples of 22 trials of beta-blockers to prevent mortality after myocardial infarction. These data are given below. # # In one possible random effects model we assume the true baseline mean (on a log-odds scale) $m_i$ in a trial $i$ # is drawn from some population distribution. Let $r^C_i$ denote number of events in the control group in trial $i$, and $r^T_i$ denote events under active treatment in trial $i$. Our model is: # # $$\begin{aligned} # r^C_i &\sim \text{Binomial}\left(p^C_i, n^C_i\right) \\ # r^T_i &\sim \text{Binomial}\left(p^T_i, n^T_i\right) \\ # \text{logit}\left(p^C_i\right) &= \mu_i \\ # \text{logit}\left(p^T_i\right) &= \mu_i + \delta \\ # \mu_i &\sim \text{Normal}(m, s). # \end{aligned}$$ # # In this case, we want to make inferences about the population effect $m$, and the predictive distribution for the effect $\delta_{\text{new}}$ in a new trial. # # This particular model uses a random effect for the population mean, and a fixed effect for the treatment effect. There are 3 other models you could fit to represent all possible combinations of fixed or random effects for these two parameters. # # Build all 4 models to estimate the treatment effect in PyMC and # # 1. use convergence diagnostics to check for convergence in each model # 2. use posterior predictive checks to compare the fit of the models # 3. use DIC to compare the models as approximations of the true generating model # # Which model would you select and why? # In[13]: r_t_obs = [3, 7, 5, 102, 28, 4, 98, 60, 25, 138, 64, 45, 9, 57, 25, 33, 28, 8, 6, 32, 27, 22] n_t_obs = [38, 114, 69, 1533, 355, 59, 945, 632, 278,1916, 873, 263, 291, 858, 154, 207, 251, 151, 174, 209, 391, 680] r_c_obs = [3, 14, 11, 127, 27, 6, 152, 48, 37, 188, 52, 47, 16, 45, 31, 38, 12, 6, 3, 40, 43, 39] n_c_obs = [39, 116, 93, 1520, 365, 52, 939, 471, 282, 1921, 583, 266, 293, 883, 147, 213, 122, 154, 134, 218, 364, 674] N = len(n_c_obs) # In[14]: def fixed(): delta = pm.Normal('delta', 0, 1e-5, value=0) mu = pm.Normal('mu', 0, 1e-5, value=1) theta_t = pm.Lambda('theta_t', lambda mu=mu, d=delta: pm.invlogit(mu + d)*np.ones(N)) theta_c = pm.Lambda('theta_c', lambda mu=mu: pm.invlogit(mu)*np.ones(N)) treat = pm.Binomial('treat', n_t_obs, theta_t, value=r_t_obs, observed=True) control = pm.Binomial('control', n_c_obs, theta_c, value=r_c_obs, observed=True) treat_pred = pm.Binomial('treat_pred', n_t_obs, theta_t, value=r_t_obs) control_pred = pm.Binomial('control_pred', n_c_obs, theta_c, value=r_c_obs) return locals() # In[15]: M_fixed = pm.MCMC(fixed()) M_fixed.sample(10000, 5000) M_fixed.sample(10000, 5000) # In[16]: pm.Matplot.summary_plot([M_fixed.delta, M_fixed.mu]) # In[17]: def random_mean(): delta = pm.Normal('delta', 0, 1e-5, value=0) m_mu = pm.Normal('m_mu', 0, 1e-5, value=0) tau_mu = pm.Gamma('tau_m', 1, 0.01, value=1) mu = pm.Normal('mu', m_mu, tau_mu, value=[1]*N) theta_t = pm.Lambda('theta_t', lambda mu=mu, d=delta: pm.invlogit(mu + d)) theta_c = pm.Lambda('theta_c', lambda mu=mu: pm.invlogit(mu)) treat = pm.Binomial('treat', n_t_obs, theta_t, value=r_t_obs, observed=True) control = pm.Binomial('control', n_c_obs, theta_c, value=r_c_obs, observed=True) treat_pred = pm.Binomial('treat_pred', n_t_obs, theta_t, value=r_t_obs) control_pred = pm.Binomial('control_pred', n_c_obs, theta_c, value=r_c_obs) return locals() # In[18]: M_random_mean = pm.MCMC(random_mean()) M_random_mean.sample(10000, 5000) M_random_mean.sample(10000, 5000) # In[20]: pm.Matplot.summary_plot([M_random_mean.delta, M_random_mean.m_mu, M_random_mean.tau_mu, M_random_mean.mu]) # In[21]: def random_treat(): mu = pm.Normal('mu', 0, 1e-5, value=1) m_delta = pm.Normal('m_delta', 0, 1e-5, value=0.5) tau_delta = pm.Gamma('tau_delta', 1, 0.01, value=1) delta = pm.Normal('delta', m_delta, tau_delta, size=N) theta_t = pm.Lambda('theta_t', lambda mu=mu, d=delta: pm.invlogit(mu + d)) theta_c = pm.Lambda('theta_c', lambda mu=mu: pm.invlogit(mu)*np.ones(N)) treat = pm.Binomial('treat', n_t_obs, theta_t, value=r_t_obs, observed=True) control = pm.Binomial('control', n_c_obs, theta_c, value=r_c_obs, observed=True) treat_pred = pm.Binomial('treat_pred', n_t_obs, theta_t, value=r_t_obs) control_pred = pm.Binomial('control_pred', n_c_obs, theta_c, value=r_c_obs) return locals() # In[22]: M_random_treat = pm.MCMC(random_treat()) M_random_treat.sample(10000, 5000) M_random_treat.sample(10000, 5000) # In[23]: pm.Matplot.summary_plot([M_random_treat.m_delta, M_random_treat.delta, M_random_treat.tau_delta, M_random_treat.mu]) # In[24]: def random(): m_mu = pm.Normal('m_mu', 0, 1e-5, value=0) tau_mu = pm.Gamma('tau_m', 1, 0.01, value=1) mu = pm.Normal('mu', m_mu, tau_mu, value=[0]*N) m_delta = pm.Normal('m_delta', 0, 1e-5, value=0) tau_delta = pm.Gamma('tau_delta', 1, 0.01, value=1) delta = pm.Normal('delta', m_delta, tau_delta, value=[0]*N) theta_t = pm.Lambda('theta_t', lambda mu=mu, d=delta: pm.invlogit(mu + d)) theta_c = pm.Lambda('theta_c', lambda mu=mu, d=delta: pm.invlogit(mu)) treat = pm.Binomial('treat', n_t_obs, theta_t, value=r_t_obs, observed=True) control = pm.Binomial('control', n_c_obs, theta_c, value=r_c_obs, observed=True) treat_pred = pm.Binomial('treat_pred', n_t_obs, theta_t, value=r_t_obs) control_pred = pm.Binomial('control_pred', n_c_obs, theta_c, value=r_c_obs) return locals() # In[25]: M_random = pm.MCMC(random()) M_random.sample(10000, 5000) M_random.sample(10000, 5000) # In[26]: pm.Matplot.summary_plot([M_random.m_delta, M_random.delta, M_random.tau_delta, M_random_treat.mu, M_random_mean.m_mu, M_random_mean.tau_mu]) # DIC # In[28]: dic = np.array([M_fixed.dic, M_random_mean.dic, M_random_treat.dic, M_random.dic]) dic # In[31]: delta_dic = dic - dic.min() np.round(np.exp(-0.5*delta_dic) / np.exp(-0.5*delta_dic).sum(), 3) # Hence, 85% of model weight is on the random mean model, while 15% is on the full random model.