#!/usr/bin/env python # coding: utf-8 # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') import numpy as np import seaborn as sns sns.set() from pymc3 import * np.random.seed(42) # Extract data # In[6]: data = np.reshape([2005, 52, 30, 21, 1, 40280, 1.29, 0.74, 0.52, 2006, 55, 33, 19, 3, 42615, 1.29, 0.77, 0.45, 2007, 46, 25, 21, 0, 43199, 1.06, 0.58, 0.49, 2008, 52, 21, 22, 9, 43237, 1.20, 0.49, 0.51, 2009, 52, 19, 29, 4, 41476, 1.25, 0.46, 0.70, 2010, 35, 14, 20, 1, 40147, 0.87, 0.35, 0.50, 2011, 32, 14, 18, 0, 40366, 0.79, 0.35, 0.45, 2012, 40, 11, 23, 6, 40571, 0.99, 0.27, 0.57, 2013, 33, 14, 17, 2, 40792, 0.81, 0.34, 0.42, 2014, 34, 12, 18, 4, 41788, 0.81, 0.29, 0.43], (10,9)) # In[13]: year, total, early, late, late_late, births, *rates = data.T.astype(int) # Build binomial model for GBS # In[33]: with Model() as simple_model: # Probabilities for 2005-2009 p_early_0509 = Beta('p_early_0509', 1, 1) p_late_0509 = Beta('p_late_0509', 1, 1) # Difference between early and late in 2005-2009 diff_early_late_0509 = Deterministic('diff_early_late_0509', 1000*(p_late_0509 - p_early_0509)) # Probabilities for 2010-2014 p_early_1014 = Beta('p_early_1014', 1, 1) p_late_1014 = Beta('p_late_1014', 1, 1) # Difference between early and late in 2010-2014 diff_early_late_1014 = Deterministic('diff_early_late_1014', 1000*(p_late_1014 - p_early_1014)) # Differences between periods within early group diff_early_0509_1014 = Deterministic('diff_early_0509_1014', 1000*(p_early_1014 - p_early_0509)) # Differences between periods within late group diff_late_0509_1014 = Deterministic('diff_late_0509_1014', 1000*(p_late_1014 - p_late_0509)) pre_2010 = year < 2010 # Binomial likelihoods early_0509 = Binomial('early_0509', births[pre_2010], p_early_0509, observed=early[pre_2010]) late_0509 = Binomial('late_0509', births[pre_2010], p_late_0509, observed=late[pre_2010]) early_1014 = Binomial('early_1014', births[~pre_2010], p_early_1014, observed=early[~pre_2010]) late_1014 = Binomial('late_1014', births[~pre_2010], p_late_1014, observed=late[~pre_2010]) # Run the model using Hamiltonian MC # In[34]: with simple_model: start = find_MAP() step = NUTS(scaling=start) trace = sample(5000, step, start) # Estimates of binomial probabilities # In[35]: forestplot(trace, vars=['p_early_0509', 'p_late_0509', 'p_early_1014', 'p_late_1014']); # Estimates of differences between probabilities # In[36]: summary(trace, vars=[diff_early_late_0509, diff_early_late_1014, diff_early_0509_1014, diff_late_0509_1014]) # ### Conclusions # # There is a 95% probability the true difference between early rates in 2010-2014 versus 2005-2009 is between -0.421 and -0.155, with an estimated median difference of -0.288. This interval does not contain zero, so we are sure that the rate in 2010-14 is smaller than the rate in 2005-09. # # There is a 95% probability the true difference between late rates in 2010-2014 versus 2005-2009 is between -0.217 and 0.072, with an estimated median difference of -0.060. This interval contains zero, so we are not confident that the estimated difference is real. # # There is a 95% probability the true difference between early rate versus late rate in 2005-2009 is between -0.227 and 0.067, with an estimated median difference of -0.077. This interval contains zero, so we are not confident that the estimated difference is real. # # There is a 95% probability the true difference between early rate versus late rate in 2010-2014 is between 0.032 and 0.278, with an estimated median difference of 0.151. This interval does not contain zero, so we are sure that the late rate is larger than the early rate in 2010-14. # In[38]: traceplot(trace, vars=['diff_early_0509_1014', 'diff_late_0509_1014', 'diff_early_late_0509', 'diff_early_late_1014']); # In[39]: n_white = 327017.2 n_black = 70874.54 eos_white = 148 los_white = 96 eos_black = 221 los_black = 132 # In[40]: with Model() as race_model: p_eos_white = Beta('p_eos_white', 1, 1) p_los_white = Beta('p_los_white', 1, 1) p_eos_black = Beta('p_eos_black', 1, 1) p_los_black = Beta('p_los_black', 1, 1) eos_diff = Deterministic('eos_diff', 1000*(p_eos_white - p_eos_black)) los_diff = Deterministic('los_diff', 1000*(p_los_white - p_los_black)) y_eos_white = Binomial('y_eos_white', n_white, p_eos_white, observed=eos_white) y_los_white = Binomial('y_los_white', n_white, p_los_white, observed=eos_white) y_eos_black = Binomial('y_eos_black', n_black, p_eos_black, observed=eos_black) y_los_black = Binomial('y_los_black', n_black, p_los_black, observed=eos_black) # In[41]: with race_model: start = find_MAP() step = NUTS(scaling=start) race_trace = sample(5000, step, start) # In[42]: summary(race_trace, vars=['eos_diff', 'los_diff'])