#!/usr/bin/env python # coding: utf-8 # In[85]: get_ipython().run_line_magic('matplotlib', 'inline') import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns sns.set() import pystan import statsmodels.api as sm import statsmodels.formula.api as smf np.random.seed(1) # In[86]: n = 200 treat = np.r_[np.ones(int(n/2)), np.zeros(int(n/2))] x = np.random.normal(100, 15, size=n) y1 = 5*(treat==1) + 0.1*x + np.random.normal(0, 5, size=n) data = pd.DataFrame(dict(x=x, y1=y1, treat=treat)) data.plot.scatter('x', 'y1', c='treat', cmap='plasma') # In[87]: L2 = 0.7 * (treat == 1) + .05 * (x - 100) y2 = (np.random.rand(n) <= 1 / (1 + np.exp(-L2))).astype(int) # In[88]: fit1 = smf.ols(formula='y1 ~ x + treat', data=data).fit() fit1.summary() # In[89]: fit2 = smf.glm(formula='y2 ~ x + treat', data=data, family=sm.families.Binomial()).fit() fit2.summary() # Specify Stan model # In[90]: model = """ data { int n; vector[n] x; real y1[n]; int y2[n]; vector[n] treat; vector[2] sigma_b; vector[2] Zero; } parameters { vector[2] alpha; vector[2] beta; vector[2] mu; real sigma; corr_matrix[2] Omega_b; } transformed parameters { vector[n] theta1; vector[n] theta2; cov_matrix[2] Sigma; Sigma <- quad_form_diag(Omega_b, sigma_b); theta1 <- mu[1] + alpha[1]*x + beta[1]*treat; theta2 <- mu[2] + alpha[2]*x + beta[2]*treat; } model { beta ~ multi_normal(Zero, Sigma); Omega_b ~ lkj_corr(1); sigma ~ cauchy(0, 5); y1 ~ normal(theta1, sigma); y2 ~ bernoulli_logit(theta2); } generated quantities { real A; real B; real A_or_B; real A_and_B; A <- if_else(beta[1] > 2, 1, 0); B <- if_else(exp(beta[2]) > 1.25, 1, 0); A_or_B <- if_else(A || B, 1, 0); A_and_B <- if_else(A && B, 1, 0); } """ # In[91]: fit = pystan.stan(model_code=model, data=dict(x=x, y1=y1, y2=y2, treat=treat, n=n, sigma_b=(2, 1.5), Zero=(0,0))) # In[92]: fit['beta'].mean(0) # In[93]: np.exp(fit['beta'][:,1]).mean() # In[94]: fit.plot(pars=['beta']); # Probability of A or B being true is essentially one, while the probability of both is 65%. # In[95]: sample = fit.extract(pars=['A_or_B', 'A_and_B']) # In[96]: [sample[s].mean(0) for s in sample] # Correlation between betas # In[97]: fit['Omega_b'].mean(0)