#!/usr/bin/env python # coding: utf-8 # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns sns.set() import pystan import statsmodels.api as sm import statsmodels.formula.api as smf from scipy.stats import logistic, distributions as dst import rpy2 np.random.seed(1) # In[2]: pystan.__version__ # In[3]: get_ipython().run_line_magic('load_ext', 'rpy2.ipython') # In[4]: get_ipython().run_cell_magic('R', '-o data', "\n# Simulate a single trial with sample size n\nsim <- function(n) {\n trt <- c(rep('A', n / 2), rep('B', n / 2))\n sbp0 <- rnorm(n, 140, 7)\n sbp <- sbp0 - 5 - 3 * (trt == 'B') + rnorm(n, sd=7)\n logit <- -2.6 + log(0.8) * (trt == 'B') + 0.05 * (sbp0 - 140) +\n 0.05 * (sbp - 130)\n ds <- ifelse(runif(n) <= plogis(logit), 1, 0)\n data.frame(trt, sbp0, sbp, ds)\n}\n\nset.seed(7)\ndata <- sim(n=1500)\n") # In[5]: data.head() # In[6]: data.assign(treat=(data.trt=='B').astype(int)).plot.scatter('sbp0', 'sbp', c='treat', cmap='plasma', alpha=0.4); # In[7]: model = """ data { int n; vector[n] x; real y1[n]; int y2[n]; vector[n] treat; vector[2] Zero; vector[2] sigma_b; } parameters { vector[2] alpha; vector[2] beta; vector[2] mu; real sigma_y; cholesky_factor_corr[2] L_b; } transformed parameters { vector[n] theta1; vector[n] theta2; theta1 = mu[1] + alpha[1]*x + beta[1]*treat; theta2 = mu[2] + alpha[2]*x + beta[2]*treat; } model { beta ~ multi_normal_cholesky(Zero, diag_pre_multiply(sigma_b, L_b)); L_b ~ lkj_corr_cholesky(1); // correlation matrix for reg. parameters, LKJ prior y1 ~ normal(theta1, sigma_y); y2 ~ bernoulli_logit(theta2); } generated quantities { matrix[2,2] Omega; matrix[2,2] Sigma; Omega <- multiply_lower_tri_self_transpose(L_b); Sigma <- quad_form_diag(Omega, sigma_b); } """ # In[8]: fit = pystan.stan(model_code=model, seed=7, n_jobs=1, data=dict(x=data.sbp0.values, y1=data.sbp.values, y2=data.ds.astype(int).values, treat=(data.trt=='B').astype(int).values, n=data.shape[0], sigma_b=(-10/dst.norm.ppf(0.1), np.log(0.5)/dst.norm.ppf(0.05)), Zero=np.zeros(2))) # In[10]: fit.plot(pars=['beta']); # In[15]: output = fit.extract(permuted=True) # Look at the correlations of the posterior means of the thetas! # In[23]: sns.jointplot(output['theta1'].mean(0), output['theta2'].mean(0), kind='kde') # Meanwhile, here are the posteriors of the regression parameters: `beta`, `mu` and `alpha` # In[16]: sns.jointplot(*output['beta'].T, kind='kde') # In[17]: sns.jointplot(*output['alpha'].T, kind='kde') # In[18]: sns.jointplot(*output['mu'].T, kind='kde') # In[30]: output['Omega'].mean(0) # In[31]: output['Omega'].std(0) # In[19]: output['Sigma'].mean(0)