!pip install --user engarde
Collecting engarde Using cached engarde-0.3.1-py2.py3-none-any.whl Requirement already satisfied (use --upgrade to upgrade): numpy in /usr/local/lib/python3.4/dist-packages (from engarde) Requirement already satisfied (use --upgrade to upgrade): pandas in /tmp/src/pandas (from engarde) Requirement already satisfied (use --upgrade to upgrade): python-dateutil>=2 in /usr/local/lib/python3.4/dist-packages (from pandas->engarde) Requirement already satisfied (use --upgrade to upgrade): pytz>=2011k in /usr/local/lib/python3.4/dist-packages (from pandas->engarde) Requirement already satisfied (use --upgrade to upgrade): six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2->pandas->engarde) Installing collected packages: engarde Successfully installed engarde-0.3.1
%matplotlib inline
import pandas as pd
import numpy as np
import numpy.ma as ma
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_context("notebook")
import engarde.checks as check
np.random.seed(20090425)
data_dir = "data/"
Import outbreak data
measles_data = pd.read_csv(data_dir+"measles.csv", index_col=0, encoding='latin-1')
measles_data.NOTIFICATION = pd.to_datetime(measles_data.NOTIFICATION)
measles_data.BIRTH = pd.to_datetime(measles_data.BIRTH)
measles_data.ONSET = pd.to_datetime(measles_data.ONSET)
measles_data = (measles_data.replace({'DISTRICT': {'BRASILANDIA':'BRAZILANDIA'}})
.drop('AGE', axis=1))
Sao Paulo population by district
sp_pop = pd.read_csv(data_dir+'sp_pop.csv', index_col=0)
_names = sp_pop.index.values
_names[_names=='BRASILANDIA'] = 'BRAZILANDIA'
sp_pop.set_index(_names, inplace = True)
sp_pop.head(3)
0 a 4 anos | 5 a 9 anos | 10 a 14 anos | 15 a 19 anos | 20 a 24 anos | 25 a 29 anos | 30 a 34 anos | 35 a 39 anos | 40 a 44 anos | 45 a 49 anos | 50 a 54 anos | 55 a 59 anos | 60 a 64 anos | 65 a 69 anos | 70 a 74 anos | 75 anos e + | Total | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
AGUA RASA | 5411 | 5750 | 6450 | 7122 | 7621 | 7340 | 6999 | 6984 | 6346 | 5608 | 4987 | 4212 | 4152 | 3595 | 2937 | 3637 | 89151 |
ALTO DE PINHEIROS | 2070 | 2369 | 2953 | 3661 | 4612 | 4190 | 3539 | 3633 | 3448 | 3289 | 3040 | 2533 | 2298 | 1732 | 1305 | 1823 | 46495 |
ANHANGUERA | 3068 | 3006 | 2755 | 2431 | 2426 | 2636 | 2695 | 2308 | 1653 | 1107 | 753 | 509 | 352 | 217 | 162 | 171 | 26249 |
Plot of cumulative cases by district
measles_onset_dist = measles_data.groupby(['DISTRICT','ONSET']).size().unstack(level=0).fillna(0)
measles_onset_dist.cumsum().plot(legend=False, grid=False)
<matplotlib.axes._subplots.AxesSubplot at 0x7fb9cff1b978>
measles_data[measles_data.ONSET<'1997-06-15'].shape
(3698, 14)
measles_data[measles_data.ONSET<'1997-07-15'].shape
(11982, 14)
Age distribution of cases, by confirmation status
by_conclusion = measles_data.groupby(["YEAR_AGE", "CONCLUSION"])
counts_by_cause = by_conclusion.size().unstack().fillna(0)
ax = counts_by_cause.plot(kind='bar', stacked=True, xlim=(0,50), figsize=(15,5))
vaccination_data = pd.read_csv('data/BrazilVaxRecords.csv', index_col=0)
vaccination_data.head()
BIRTHS | VAX | POP | SIA | beta_alpha | beta_beta | |
---|---|---|---|---|---|---|
YEAR | ||||||
1980 | 3896442 | 0.57 | 121740438 | 0.0 | 55.3128 | 41.7272 |
1981 | 3933136 | 0.73 | 124610790 | 0.0 | 56.8232 | 21.0168 |
1982 | 3952137 | 0.66 | 127525420 | 0.0 | 58.5816 | 30.1784 |
1983 | 3952735 | 0.68 | 130455659 | 0.0 | 58.5072 | 27.5328 |
1984 | 3935224 | 0.73 | 133364277 | 0.0 | 56.8232 | 21.0168 |
alphas, betas = vaccination_data[['beta_alpha', 'beta_beta']].values.T
Calculate residual susceptibility from routine vaccination
vax_97 = np.r_[[0]*(1979-1921+1), vaccination_data.VAX[:17]]
n = len(vax_97)
FOI_mat = np.resize((1 - vax_97*0.9), (n,n)).T
vacc_susc = (1 - vax_97*0.9)[::-1]
vacc_susc[0] = 0.5
Susceptiblity accounting for SIAs
sia_susc = np.ones(len(vax_97))
birth_year = np.arange(1922, 1998)[::-1]
by_mask = (birth_year > 1983) & (birth_year < 1992)
sia_susc[by_mask] *= 0.2
Age classes are defined in 5-year intervals. We will combine 40+ ages into a single class.
age_classes = [0,5,10,15,20,25,30,35,40,100]
measles_data.dropna(subset=['YEAR_AGE'], inplace=True)
measles_data['YEAR_AGE'] = measles_data.YEAR_AGE.astype(int)
measles_data['AGE_GROUP'] = pd.cut(measles_data.YEAR_AGE, age_classes, right=False)
Lab-checked observations are extracted for use in estimating lab confirmation probability.
CONFIRMED = measles_data.CONCLUSION == 'CONFIRMED'
CLINICAL = measles_data.CONCLUSION == 'CLINICAL'
DISCARDED = measles_data.CONCLUSION == 'DISCARDED'
Extract lab-confirmed and clinical-confirmed subsets, with no missing county information.
lab_subset = measles_data[(CONFIRMED | DISCARDED) & measles_data.COUNTY.notnull()].copy()
age = lab_subset.YEAR_AGE.values
ages = lab_subset.YEAR_AGE.unique()
counties = lab_subset.COUNTY.unique()
confirmed = (lab_subset.CONCLUSION=='CONFIRMED').values
clinic_subset = measles_data[CLINICAL & measles_data.COUNTY.notnull()].copy()
Histogram of lab subset, by outcome.
_lab_subset = lab_subset.replace({"CONCLUSION": {"CLINICAL": "UNCONFIRMED"}})
by_conclusion = _lab_subset.groupby(["YEAR_AGE", "CONCLUSION"])
counts_by_cause = by_conclusion.size().unstack().fillna(0)
ax = counts_by_cause.plot(kind='bar', stacked=True, xlim=(0,50), figsize=(15,5), grid=False)
lab_subset.shape
(41547, 15)
Empirical confirmation over time
ax = (_lab_subset.groupby(['CONCLUSION', 'ONSET'])
.size()
.unstack().T
.fillna(0)
.resample('2W').sum()
.apply(lambda x: x.CONFIRMED/x.sum(), axis=1)).plot()
ax.set_xlabel('Time')
ax.set_ylabel('Confirmation rate')
<matplotlib.text.Text at 0x7fb9ce8e41d0>
By age group
(_lab_subset.groupby(['CONCLUSION', 'AGE_GROUP', 'ONSET'])
.size()
.unstack().T
.fillna(0)
.resample('M').sum()
.apply(lambda x: x.CONFIRMED/(x.CONFIRMED + x.DISCARDED), axis=1).fillna(0)
.plot(colormap='Reds').legend(loc='center left', bbox_to_anchor=(1, 0.5)))
<matplotlib.legend.Legend at 0x7fb9cecd30b8>
lab_subset['notification_gap'] = (lab_subset.NOTIFICATION - lab_subset.ONSET).dt.days
Drop negative values for gap
lab_subset.loc[lab_subset.notification_gap<0, 'notification_gap'] = np.nan
lab_subset.notification_gap.hist()
<matplotlib.axes._subplots.AxesSubplot at 0x7fb9cecf1a90>
Define age groups
age_group = pd.cut(age, age_classes, right=False)
age_index = np.array([age_group.categories.tolist().index(i) for i in age_group])
age_groups = age_group.categories
age_groups
Index(['[0, 5)', '[5, 10)', '[10, 15)', '[15, 20)', '[20, 25)', '[25, 30)', '[30, 35)', '[35, 40)', '[40, 100)'], dtype='object')
age_slice_endpoints = [g[1:-1].split(',') for g in age_groups]
age_slices = [slice(int(i[0]), int(i[1])) for i in age_slice_endpoints]
Get index from full cross-tabulation to use as index for each district
dates_index = measles_data.groupby(['ONSET', 'AGE_GROUP']).size().unstack().index
(lab_subset.YEAR_AGE<2).mean()
0.22454088141141357
Match age groupings, exclude invalid districts.
unique_districts = measles_data.DISTRICT.dropna().unique()
excludes = ['BOM RETIRO']
N = sp_pop.drop(excludes).ix[unique_districts].sum().drop('Total')
N_age = N.iloc[:8]
N_age.index = age_groups[:-1]
N_age[age_groups[-1]] = N.iloc[8:].sum()
N_age.plot(kind='bar')
<matplotlib.axes._subplots.AxesSubplot at 0x7fb9cec4a2e8>
Compile bi-weekly confirmed and unconfirmed data by Sao Paulo district
# All confirmed cases, by district
confirmed_data = lab_subset[lab_subset.CONCLUSION=='CONFIRMED']
confirmed_counts = (confirmed_data.groupby(['ONSET', 'AGE_GROUP'])
.size()
.unstack()
.reindex(dates_index)
.fillna(0)
.sum())
all_confirmed_cases = (confirmed_counts.reindex_axis(measles_data['AGE_GROUP'].unique())
.fillna(0).values.astype(int))
confirmed_data.shape
(22097, 16)
confirmed_counts_2w = (confirmed_data
.groupby(['ONSET', 'AGE_GROUP'])
.size()
.unstack()
.reindex(dates_index)
.fillna(0)
.resample('2W')
.sum()
.pipe(check.is_shape,
(28, len(age_groups))))
# All clinical cases, by district
clinical_counts = (clinic_subset.groupby(['ONSET', 'AGE_GROUP'])
.size()
.unstack()
.reindex(dates_index)
.fillna(0)
.sum())
all_clinical_cases = (clinical_counts.reindex_axis(measles_data['AGE_GROUP'].unique())
.fillna(0).values.astype(int))
clinical_counts_2w = (clinic_subset
.groupby(['ONSET', 'AGE_GROUP'])
.size()
.unstack()
.reindex(dates_index)
.fillna(0)
.resample('2W')
.sum()
.pipe(check.is_shape,
(28, len(age_groups))))
confirmed_counts_2w.head()
AGE_GROUP | [0, 5) | [5, 10) | [10, 15) | [15, 20) | [20, 25) | [25, 30) | [30, 35) | [35, 40) | [40, 100) |
---|---|---|---|---|---|---|---|---|---|
ONSET | |||||||||
1997-01-05 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 |
1997-01-19 | 0.0 | 1.0 | 0.0 | 0.0 | 3.0 | 4.0 | 0.0 | 0.0 | 0.0 |
1997-02-02 | 4.0 | 1.0 | 0.0 | 0.0 | 2.0 | 1.0 | 0.0 | 0.0 | 0.0 |
1997-02-16 | 4.0 | 0.0 | 0.0 | 0.0 | 2.0 | 1.0 | 1.0 | 0.0 | 0.0 |
1997-03-02 | 9.0 | 0.0 | 0.0 | 2.0 | 4.0 | 5.0 | 1.0 | 0.0 | 1.0 |
clinical_counts_2w.head()
AGE_GROUP | [0, 5) | [5, 10) | [10, 15) | [15, 20) | [20, 25) | [25, 30) | [30, 35) | [35, 40) | [40, 100) |
---|---|---|---|---|---|---|---|---|---|
ONSET | |||||||||
1997-01-05 | 3.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
1997-01-19 | 30.0 | 3.0 | 1.0 | 1.0 | 1.0 | 3.0 | 2.0 | 1.0 | 0.0 |
1997-02-02 | 22.0 | 4.0 | 0.0 | 2.0 | 1.0 | 1.0 | 1.0 | 0.0 | 1.0 |
1997-02-16 | 21.0 | 2.0 | 2.0 | 2.0 | 2.0 | 1.0 | 1.0 | 0.0 | 2.0 |
1997-03-02 | 24.0 | 5.0 | 2.0 | 5.0 | 2.0 | 2.0 | 2.0 | 1.0 | 0.0 |
We will extend a simple SIR disease model, to account for confirmation status, which will be fit using MCMC.
This model fits the series of 2-week infection totals for each age group $a$ as a set of Poisson random variables:
$$Pr(I_{a}(t) | \lambda_a(t)) = \text{Poisson}(\lambda_a(t)) $$Where the age-specific outbreak intensity at time $t$ is modeled as:
$$\lambda_a(t) = S_a(t-1) \frac{I(t-1)\mathbf{B}}{N_a} $$where $S_a(t-1)$ is the number of susceptibles in age group $a$ in the previous time period, $I(t-1)$ an age-specific vector of the number of infected individuals in the previous time period, $\mathbf{B}$ a matrix of transmission coefficients (both within- and between-ages), and $N_a$ an estimate of the population of age-$a$ people in Sao Paulo.
The matrix $B$ was constructed from a scalar transmission parameter $\beta$, which was given a vague half-Cauchy prior (scale=25). This was used to represent within-age-group transmission, and hence placed on the diagonal of a square transmission matrix of size $A$. Off-diagonal elements, representing transmission between age groups were scaled by a decay parameter $\delta$ which was used to scale the transmission to adjacent groups according to:
$$\beta \delta^{|a-b|}$$where a and b are indices of two age group. The resulting transmission matrix is parameterized as follows:
$$\begin{aligned} \mathbf{B} = \left[{ \begin{array}{c} {\beta} & {\beta \delta} & {\beta \delta^2}& \ldots & {\beta \delta^{A-2}} & {\beta \delta^{A-1}} \\ {\beta \delta} & {\beta} & \beta \delta & \ldots & {\beta \delta^{A-3}} & {\beta \delta^{A-2}} \\ {\beta \delta^2} & \beta \delta & {\beta} & \ldots & {\beta \delta^{A-4}} & {\beta \delta^{A-3}} \\ \vdots & \vdots & \vdots & & \vdots & \vdots\\ {\beta \delta^{A-2}} & {\beta \delta^{A-3}} & {\beta \delta^{A-4}} & \ldots & {\beta} & \beta \delta \\ {\beta \delta^{A-1}} & {\beta \delta^{A-2}} & \beta \delta^{A-3} & \ldots & \beta \delta & {\beta} \end{array} }\right] \end{aligned}$$The basic reproductive number $R_0$ was calculated as the largest real-valued eigenvalue of the matrix $\mathbf{B}$. To impose a mild constraint on $R_0$, we applied a Gaussian prior distribution whose 1st and 99th quantiles are 8 and 24, respectively, a reasonable range for a measles outbreak:
from pymc import MCMC, Matplot, AdaptiveMetropolis, MAP, Slicer
from pymc import (Uniform, DiscreteUniform, Beta, Binomial, Normal,
CompletedDirichlet, Pareto,
Poisson, NegativeBinomial, negative_binomial_like, poisson_like,
Lognormal, Exponential, binomial_like,
TruncatedNormal, Binomial, Gamma, HalfCauchy, normal_like,
MvNormalCov, Bernoulli, Uninformative,
Multinomial, rmultinomial, rbinomial, runiform,
Dirichlet, multinomial_like, uniform_like)
from pymc import (Lambda, observed, invlogit, deterministic, potential, stochastic, logit)
def measles_model(obs_date, confirmation=True, migrant=False, constrain_R=True):
'''
Truncate data at observation period
'''
obs_index = clinical_counts_2w.index <= obs_date
confirmed_obs_t = confirmed_counts_2w[obs_index].values.astype(int)
clinical_obs_t = clinical_counts_2w[obs_index].values.astype(int)
n_periods, n_age_groups = confirmed_obs_t.shape
# Index for observation date, used to index out values of interest
# from the model.
t_obs = obs_index.sum() - 1
lab_index = (lab_subset.NOTIFICATION > obs_date).values
confirmed_t = confirmed[lab_index]
age_index_t = age_index[lab_index]
'''
Confirmation sub-model
'''
if confirmation:
# Specify priors on age-specific means
age_classes = np.unique(age_index)
μ = Normal("μ", mu=0, tau=0.0001, value=[0]*len(age_classes))
σ = HalfCauchy('σ', 0, 25, value=1)
var = σ**2
ρ = Uniform('ρ', -1, 1, value=0)
# Build variance-covariance matrix with first-order correlation
# among age classes
@deterministic
def Σ(var=var, cor=ρ):
I = np.eye(len(age_classes))*var
E = np.diag(np.ones(len(age_classes)-1), k=-1)*var*cor
return I + E + E.T
# Age-specific probabilities of confirmation as multivariate normal
# random variables
β_age = MvNormalCov("β_age", mu=μ, C=Σ, value=[1]*len(age_classes))
p_age = Lambda('p_age', lambda b=β_age: invlogit(b))
@deterministic(trace=False)
def p_confirm(b=β_age):
return invlogit(b[age_index_t])
# Confirmation likelihood
lab_confirmed = Bernoulli('lab_confirmed', p=p_confirm, value=confirmed_t,
observed=True)
if confirmation:
@stochastic(dtype=int)
def clinical_cases(value=(clinical_obs_t*0.5).astype(int),
n=clinical_obs_t, p=p_age):
# Binomial confirmation process
return np.sum([binomial_like(xi, ni, p) for xi,ni in zip(value,n)])
I = Lambda('I', lambda clinical=clinical_cases:
clinical + confirmed_obs_t.astype(int))
assert I.value.shape == (t_obs +1, n_age_groups)
age_dist_init = np.sum(I.value, 0)/ float(I.value.sum())
else:
I = confirmed_obs_t + clinical_obs_t
assert I.shape == (t_obs +1, n_age_groups)
age_dist_init = np.sum(I, 0) / float(I.sum())
# Calcuate age distribution from observed distribution of infecteds to date
_age_dist = Dirichlet('_age_dist', np.ones(n_age_groups),
value=age_dist_init[:-1]/age_dist_init.sum())
age_dist = CompletedDirichlet('age_dist', _age_dist)
@potential
def age_dist_like(p=age_dist, I=I):
return multinomial_like(I.sum(0), I.sum(), p)
'''
Disease transmission model
'''
# Transmission parameter
β = HalfCauchy('β', 0, 25, value=5) #[1]*n_age_groups)
decay = Beta('decay', 1, 5, value=0.8)
@deterministic
def B(b=β, d=decay):
b = np.ones(n_age_groups)*b
B = b*np.eye(n_age_groups)
for i in range(1, n_age_groups):
B += np.diag(np.ones(n_age_groups-i)*b[i:]*d**i, k=-i)
B += np.diag(np.ones(n_age_groups-i)*b[:-i]*d**i, k=i)
return B
# Downsample annual series to observed age groups
downsample = lambda x: np.array([x[s].mean() for s in age_slices])
@deterministic
def R0(B=B):
evs = np.linalg.eigvals(B)
return max(evs[np.isreal(evs)])
if constrain_R:
@potential
def constrain_R0(R0=R0):
# Weakly-informative prior to constrain R0 to be within the
# typical measles range
return normal_like(R0, 16, 3.4**-2)
vax = Beta('vax', alphas[:17], betas[:17], value=vaccination_data.VAX[:17])
vax_97 = Lambda('vax_97', lambda vax=vax: np.r_[[0]*(1979-1921+1), vax])
n = len(vax_97.value)
FOI_mat = Lambda('FOI_mat', lambda vax_97=vax_97: np.resize((1 - vax_97*0.9), (n,n)).T)
@deterministic
def vacc_susc(vax_97=vax_97):
v = (1 - vax_97*0.9)[::-1]
v[0] = 0.5
return v
coverage_residual = Uniform('coverage_residual', 0.2, 0.325)
@deterministic
def sia_susc(r=coverage_residual):
s = np.ones(n)
birth_year = np.arange(1922, 1998)[::-1]
by_mask = (birth_year > 1983) & (birth_year < 1992)
s[by_mask] *= r
return s
A = Lambda('A', lambda R0=R0: 75./(R0 - 1))
lt_sum = Lambda('lt_sum', lambda FOI_mat=FOI_mat: downsample(np.tril(FOI_mat).sum(0)[::-1]))
natural_susc = Lambda('natural_susc', lambda A=A, lt_sum=lt_sum: np.exp((-1/A) * lt_sum))
@deterministic
def p_μ(v=vacc_susc, n=natural_susc, s=sia_susc):
return downsample(s) * downsample(v) * n
# Following Stan manual chapter 16.2
λ_p = Pareto('λ_p', 1.5, 0.1, value=0.5)
a = Lambda('a', lambda mu=p_μ, lam=λ_p: mu*lam, trace=False)
b = Lambda('b', lambda mu=p_μ, lam=λ_p: (1-mu)*lam, trace=False)
p_susceptible = Beta('p_susceptible', a, b, value=p_μ.value)
# Estimated total initial susceptibles
if not migrant:
S_0_init = (N_age.values*p_μ.value).astype(int) + 30
S_0 = Binomial('S_0', n=N_age.values.astype(int), p=p_susceptible, value=S_0_init)
else:
S_0 = Binomial('S_0', n=N_age.values.astype(int), p=p_susceptible)
'''
Model of migrant influx of susceptibles
'''
if migrant:
# Data augmentation for migrant susceptibles
imaginary_migrants = 1000000
N_migrant = DiscreteUniform('N_migrant', 0, imaginary_migrants, value=100000)
μ_age = Uniform('μ_age', 15, 35, value=25)
σ_age = Uniform('σ_age', 1, 10, value=5)
M_age = Normal('M_age', μ_age, σ_age**-2,
size=imaginary_migrants, trace=False)
@deterministic
def M_0(M=M_age, N=N_migrant):
# Take first N augmented susceptibles
M_real = M[:N]
# Drop into age groups
M_group = pd.cut(M_real,
[0, 5, 10, 15, 20, 25, 30, 35, 40, 100],
right=False)
return M_group.value_counts().values
p_migrant = Lambda('p_migrant', lambda M_0=M_0, S_0=S_0: M_0/(M_0 + S_0))
I_migrant = [Binomial('I_migrant_%i' % i, I[i], p_migrant)
for i in range(t_obs + 1)]
I_local = Lambda('I_local',
lambda I=I, I_m=I_migrant:
np.array([Ii - Imi for Ii,Imi in zip(I,I_m)]))
S = Lambda('S', lambda I=I, S_0=S_0, M_0=M_0: S_0 + M_0 - I.cumsum(0))
S_local = Lambda('S_local', lambda I=I_local, S_0=S_0: S_0 - I.cumsum(0))
else:
# Remaining susceptibles at each 2-week period
S = Lambda('S', lambda I=I, S_0=S_0: S_0 - I.cumsum(axis=0))
# Check shape
assert S.value.shape == (t_obs+1., n_age_groups)
# Susceptibles at time t, by age
S_age = Lambda('S_age', lambda S=S: S[-1].astype(int))
# Force of infection
@deterministic
def λ(B=B, I=I, S=S):
return S * (I.dot(B) / N_age.values)
# Check shape
assert λ.value.shape == (t_obs+1, n_age_groups)
# FOI in observation period
λ_t = Lambda('λ_t', lambda lam=λ: lam[-1])
# Effective reproductive number
R_t = Lambda('R_t', lambda S=S, R0=R0: S.sum(1) * R0 / N_age.sum())
if migrant:
R_t_local = Lambda('R_t_local', lambda S=S_local, R0=R0: S.sum(1) * R0 / N_age.sum())
# Poisson likelihood for observed cases
@potential
def new_cases(I=I, lam=λ):
return poisson_like(I[1:], lam[:-1])
'''
Vaccination targets
'''
# Probability of vaccine efficacy
p_efficacy = 0.95
# Reach of campaign
p_reach = runiform(0.75, 0.95)
@deterministic
def vacc_5(S=S_age):
# Vaccination of 5 and under
p = [p_reach] + [0]*(n_age_groups - 1)
return rbinomial(rbinomial(S, p), p_efficacy)
# Proportion of susceptibles vaccinated
pct_5 = Lambda('pct_5',
lambda V=vacc_5, S=S_age: V.sum()/S.sum())
@deterministic
def vacc_15(S=S_age):
# Vaccination of 15 and under
p = [p_reach]*3 + [0]*(n_age_groups - 3)
return rbinomial(rbinomial(S, p), p_efficacy)
# Proportion of susceptibles vaccinated
pct_15 = Lambda('pct_15',
lambda V=vacc_15, S=S_age: V.sum()/S.sum())
@deterministic
def vacc_30(S=S_age):
# Vaccination of 30 and under
p = [p_reach]*6 + [0]*(n_age_groups - 6)
return rbinomial(rbinomial(S, p), p_efficacy)
# Proportion of 30 and under susceptibles vaccinated
pct_30 = Lambda('pct_30',
lambda V=vacc_30, S=S_age: V.sum()/S.sum())
@deterministic
def vacc_adult(S=S_age):
# Vaccination of adults under 30 (and young kids)
p = [p_reach, 0, 0, 0, p_reach, p_reach] + [0]*(n_age_groups - 6)
return rbinomial(rbinomial(S, p), p_efficacy)
# Proportion of adults under 30 (and young kids)
pct_adult = Lambda('pct_adult',
lambda V=vacc_adult, S=S_age: V.sum()/S.sum())
return locals()
Run models for June 15 and July 15 observation points, both with and without clinical confirmation.
n_iterations = 50000
n_burn = 40000
migrant = True
Use backgroundjobs
to run the models each in their own thread:
from IPython.lib import backgroundjobs as bg
jobs = bg.BackgroundJobManager()
Instantiate models
model_june = MCMC(measles_model('1997-06-15', confirmation=True, migrant=migrant))
model_july = MCMC(measles_model('1997-07-15', confirmation=True, migrant=migrant))
model_june_noconf = MCMC(measles_model('1997-06-15', confirmation=False, migrant=migrant))
model_july_noconf = MCMC(measles_model('1997-07-15', confirmation=False, migrant=migrant))
Run models
for model in model_june, model_july, model_june_noconf, model_july_noconf:
jobs.new(model.sample, n_iterations, n_burn, kw=dict(progress_bar=False))
Starting job # 0 in a separate thread. Starting job # 2 in a separate thread. Starting job # 3 in a separate thread. Starting job # 4 in a separate thread.
jobs.status()
Completed jobs: 0 : <bound method MCMC.sample of <pymc.MCMC.MCMC object at 0x7fb9ceb3ce48>> 2 : <bound method MCMC.sample of <pymc.MCMC.MCMC object at 0x7fb9d00ab438>> 3 : <bound method MCMC.sample of <pymc.MCMC.MCMC object at 0x7fb9d0039400>> 4 : <bound method MCMC.sample of <pymc.MCMC.MCMC object at 0x7fb9d0045550>>
Estimate of R0 for june (with confirmation submodel)
if model_june.R0.value.shape:
Matplot.summary_plot(model_june.R0, custom_labels=age_groups)
else:
Matplot.plot(model_june.R0)
Plotting R0
Estimate of R0 for june (no confirmation submodel)
if model_june.R0.value.shape:
Matplot.summary_plot(model_june_noconf.R0, custom_labels=age_groups)
else:
Matplot.plot(model_june_noconf.R0)
Plotting R0
Estimate of R0 for july (with confirmation submodel)
if model_july.β.shape:
Matplot.summary_plot(model_july.R0, custom_labels=age_groups)
else:
Matplot.plot(model_july.R0)
Plotting R0
Estimate of R0 for july (no confirmation submodel)
if model_july_noconf.β.shape:
Matplot.summary_plot(model_july_noconf.R0, custom_labels=age_groups)
else:
Matplot.plot(model_july_noconf.R0)
Plotting R0
Lab confirmation rates, June model
with sns.axes_style("ticks"):
p_age = pd.DataFrame(model_june.p_age.trace(), columns=age_groups)
f, axes = plt.subplots(figsize=(14,6))
sns.boxplot(data=p_age, linewidth=0.3, fliersize=0, ax=axes,
color=sns.color_palette("coolwarm", 5)[0],
order=age_group.categories)
axes.set_ylabel('Proportion lab positive')
axes.set_xlabel('Age interval (years)')
Proportion of local population susceptible, June model.
Matplot.summary_plot(model_june.p_susceptible, custom_labels=age_groups)
Could not calculate Gelman-Rubin statistics. Requires multiple chains of equal length.
Proportion of local population susceptible, June model with no confirmation correction
Matplot.summary_plot(model_june_noconf.p_susceptible, custom_labels=age_groups)
Could not calculate Gelman-Rubin statistics. Requires multiple chains of equal length.
Epidemic intensity estimates at June or July observation time, by age group.
Matplot.summary_plot(model_june.λ_t, custom_labels=age_groups)
Could not calculate Gelman-Rubin statistics. Requires multiple chains of equal length.
Matplot.summary_plot(model_july.λ_t, custom_labels=age_groups)
Could not calculate Gelman-Rubin statistics. Requires multiple chains of equal length.
Time series of epidemic intensities for lab- versus clinical-confirmation models, for each age group.
model_june.λ_t.trace().std()
90.823122374204431
lam_june = model_june.λ_t.stats()
lam_june = model_june.λ.stats()
fig, axes = plt.subplots(2, 1, sharey=True)
axes[0].plot(lam_june['quantiles'][50], 'b-', alpha=0.4)
axes[0].set_ylabel('Epidemic intensity')
axes[0].set_xlabel('time (2-week periods)')
axes[0].set_title('Lab confirmation')
lam_june_noconf = model_june_noconf.λ.stats()
axes[1].plot(lam_june_noconf['quantiles'][50], 'b-', alpha=0.4)
axes[1].set_ylabel('Epidemic intensity')
axes[1].set_xlabel('time (2-week periods)')
axes[1].set_title('Clinical confirmation')
plt.tight_layout()
Plots of susceptibility parameters for June model with confirmation.
Matplot.summary_plot(model_june.vax, main='Vaccination coverage by year')
Could not calculate Gelman-Rubin statistics. Requires multiple chains of equal length.
# SIA susceptibility
Matplot.summary_plot(model_june.sia_susc)
Could not calculate Gelman-Rubin statistics. Requires multiple chains of equal length.
# Vaccination susceptibility
Matplot.summary_plot(model_june.vacc_susc)
Could not calculate Gelman-Rubin statistics. Requires multiple chains of equal length.
Matplot.summary_plot(model_june.natural_susc)
Could not calculate Gelman-Rubin statistics. Requires multiple chains of equal length.
Rt_values = pd.DataFrame(np.c_[model_june.R_t.trace()[:, -1],
model_june.R_t_local.trace()[:, -1]],
columns=['June, total', 'June, local'])
ax = Rt_values.boxplot(return_type='axes');
ax.set_ylabel('R(t)')
<matplotlib.text.Text at 0x7fb9cc1cb898>
with sns.axes_style("ticks"):
Rt_values = pd.DataFrame(np.c_[model_june.R_t.trace()[:, -1],
model_june_noconf.R_t.trace()[:, -1],
model_july.R_t.trace()[:, -1],
model_july_noconf.R_t.trace()[:, -1]],
columns=['June\nage confirmation', 'June\nclinical only',
'July\nage confirmation', 'July\nclinical only'])
ax = Rt_values.boxplot(return_type='axes', figsize=(14,6), grid=False);
ax.set_ylabel('R(t)')
S_age_june = pd.DataFrame(model_june.S_age.trace().squeeze(), columns=age_groups).unstack().reset_index()
S_age_june.columns = 'Age', 'Iteration', 'S'
S_age_june['Confirmation'] = 'Lab'
S_age_june['Month'] = 'June'
S_age_june_noconf = pd.DataFrame(model_june_noconf.S_age.trace().squeeze(), columns=age_groups).unstack().reset_index()
S_age_june_noconf.columns = 'Age', 'Iteration', 'S'
S_age_june_noconf['Confirmation'] = 'Clinical'
S_age_june_noconf['Month'] = 'June'
S_age_june = pd.concat([S_age_june, S_age_june_noconf], ignore_index=True)
S_age_june.to_csv('output/S_age_june.csv')
S_age_june_local = pd.DataFrame(model_june.S_local.trace()[:,-1,:].squeeze(), columns=age_groups).unstack().reset_index()
S_age_june_local.columns = 'Age', 'Iteration', 'S'
S_age_june_local['Confirmation'] = 'Lab'
S_age_june_local['Month'] = 'June'
S_age_june_local_noconf = pd.DataFrame(model_june_noconf.S_local.trace()[:,-1,:].squeeze(), columns=age_groups).unstack().reset_index()
S_age_june_local_noconf.columns = 'Age', 'Iteration', 'S'
S_age_june_local_noconf['Confirmation'] = 'Clinical'
S_age_june_local_noconf['Month'] = 'June'
S_age_june_local = pd.concat([S_age_june_local, S_age_june_local_noconf], ignore_index=True)
S_age_june_local.to_csv('output/S_age_june_local.csv')
S_age_july = pd.DataFrame(model_july.S_age.trace().squeeze(), columns=age_groups).unstack().reset_index()
S_age_july.columns = 'Age', 'Iteration', 'S'
S_age_july['Confirmation'] = 'Lab'
S_age_july['Month'] = 'July'
S_age_july_noconf = pd.DataFrame(model_july_noconf.S_age.trace().squeeze(), columns=age_groups).unstack().reset_index()
S_age_july_noconf.columns = 'Age', 'Iteration', 'S'
S_age_july_noconf['Confirmation'] = 'Clinical'
S_age_july_noconf['Month'] = 'July'
S_age_july = pd.concat([S_age_july, S_age_july_noconf], ignore_index=True)
S_age_july.to_csv('output/S_age_july.csv')
S_age = pd.concat([S_age_june.assign(susceptibles='Migrant + Local'),
S_age_june_local.assign(susceptibles='Local only')], ignore_index=True)
S_age.to_csv('output/S_age.csv')
Numbers of suscepibles in each age group, under lab vs clinical confirmation
sns.despine()
with sns.axes_style("ticks"):
g = sns.factorplot("Age", "S1000", "Confirmation", data=S_age.assign(S1000=S_age.S/1000),
kind="box", palette="hls", row='susceptibles', size=6, aspect=2, linewidth=0.3, fliersize=0,
order=age_group.categories, legend=False, facet_kws={'ylim':(0, 250)})
g.despine(offset=10, trim=True)
g.set_axis_labels("Age Interval (years)", "Susceptibles (1000's)")
plt.tight_layout();
<matplotlib.figure.Figure at 0x7fb9109fb7f0>
model_june.summary(['pct_5', 'pct_15', 'pct_30', 'pct_adult'])
pct_5: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ 0.166 0.006 0.0 [ 0.156 0.175] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| 0.156 0.164 0.165 0.172 0.181 pct_15: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ 0.256 0.009 0.0 [ 0.241 0.268] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| 0.241 0.252 0.254 0.264 0.277 pct_30: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ 0.732 0.002 0.0 [ 0.729 0.736] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| 0.729 0.731 0.732 0.734 0.737 pct_adult: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ 0.516 0.001 0.0 [ 0.514 0.519] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| 0.513 0.515 0.517 0.517 0.519
june_coverage = pd.DataFrame({name: model_june.trace(name)[:9999] for name in ['pct_5', 'pct_15', 'pct_30', 'pct_adult']})
june_coverage['Month'] = 'June'
june_coverage['Confirmation'] = 'Lab'
june_noconf_coverage = pd.DataFrame({name: model_june_noconf.trace(name)[:9999] for name in ['pct_5', 'pct_15', 'pct_30', 'pct_adult']})
june_noconf_coverage['Month'] = 'June'
june_noconf_coverage['Confirmation'] = 'Clinical'
july_coverage = pd.DataFrame({name: model_july.trace(name)[:9999] for name in ['pct_5', 'pct_15', 'pct_30', 'pct_adult']})
july_coverage['Month'] = 'July'
july_coverage['Confirmation'] = 'Lab'
july_noconf_coverage = pd.DataFrame({name: model_july_noconf.trace(name)[:9999] for name in ['pct_5', 'pct_15', 'pct_30', 'pct_adult']})
july_noconf_coverage['Month'] = 'July'
july_noconf_coverage['Confirmation'] = 'Clinical'
coverage = pd.concat([june_coverage, june_noconf_coverage, july_coverage, july_noconf_coverage],
ignore_index=True)
coverage.to_csv('output/coverage.csv')
sns.factorplot(row="Month", col="Confirmation", data=coverage, kind='box',
row_order=['June', 'July'],
order=['pct_5', 'pct_15', 'pct_30', 'pct_adult'],
palette="YlGnBu_d", linewidth=0.7, fliersize=0, aspect=1.25).despine(left=True)
<seaborn.axisgrid.FacetGrid at 0x7fb958e1f400>
axes = sns.boxplot(data=june_coverage, order=['pct_5', 'pct_15', 'pct_30', 'pct_adult'],
color=sns.color_palette("coolwarm", 5)[0])
axes.set_xticklabels(['Under 5', 'Under 15', 'Under 30', 'Under 5 + 20-30'])
axes.set_ylabel('% susceptibles vaccinated')
sns.despine(offset=10, trim=True)
with sns.axes_style("ticks"):
pc_values = np.maximum(0, 1- 1/Rt_values)
ax = pc_values.boxplot(return_type='axes', figsize=(14,6), grid=False)
ax.set_ylabel('$p_c$')
ax.semilogy();
qcoverage = coverage.groupby(['Month','Confirmation'], sort=False).quantile([0.025, 0.975])
qcoverage
pct_15 | pct_30 | pct_5 | pct_adult | |||
---|---|---|---|---|---|---|
Month | Confirmation | |||||
June | Lab | 0.025 | 0.241274 | 0.729057 | 0.156162 | 0.513108 |
0.975 | 0.277205 | 0.736786 | 0.181394 | 0.518855 | ||
Clinical | 0.025 | 0.312788 | 0.791499 | 0.222198 | 0.618730 | |
0.975 | 0.358310 | 0.799303 | 0.258587 | 0.627796 | ||
July | Lab | 0.025 | 0.238358 | 0.700166 | 0.147069 | 0.485921 |
0.975 | 0.255405 | 0.704341 | 0.158505 | 0.489398 | ||
Clinical | 0.025 | 0.256499 | 0.671180 | 0.196884 | 0.544044 | |
0.975 | 0.275787 | 0.674649 | 0.212795 | 0.547529 |
mean_coverage = coverage.groupby(['Month','Confirmation'], sort=False).mean()
mean_coverage
pct_15 | pct_30 | pct_5 | pct_adult | ||
---|---|---|---|---|---|
Month | Confirmation | ||||
June | Lab | 0.255801 | 0.732447 | 0.166347 | 0.516359 |
Clinical | 0.335176 | 0.795189 | 0.240441 | 0.624041 | |
July | Lab | 0.246933 | 0.702224 | 0.152858 | 0.487683 |
Clinical | 0.265873 | 0.672903 | 0.204333 | 0.545750 |
label_lookup = dict(zip(['pct_5', 'pct_15', 'pct_30', 'pct_adult'],
['Under 5', 'Under 15', 'Under 30', 'Adults & young kids']))
from matplotlib.patches import Rectangle
plot_interval = True
with sns.axes_style("ticks"):
colors = ["#9b59b6", "#3498db", "#95a5a6", "#e74c3c"]
fig = plt.figure(figsize=(14, 6))
ax1 = fig.add_axes([0.1, 0.1, 0.4, 0.8])
ax1.semilogy()
ax2 = fig.add_axes([0.5, 0.1, 0.4, 0.8])
ax2.semilogy()
q = pc_values.quantile([0.025, 0.975]).values + 1e-8
m = pc_values.mean().values
months = ['June', 'July']
for i,ax in enumerate([ax1, ax2]):
ax.set_yticks([0.01, 0.1, 0.2, 0.4, 0.7])
ax.set_yticklabels([0.01, 0.1, 0.2, 0.4, 0.7])
ax.set_ybound(0.0001, .999)
ax.set_xticks([0.08, 0.27, 0.4])
ax.set_xticklabels(['age confirmation', 'clinical only'])
ax.tick_params(length=0)
ax.set_title(months[i])
ax.add_patch(Rectangle((0.0, q[0, 2*i]), 0.15, q[1, 2*i]-q[0, 2*i], alpha=0.1))
ax.add_patch(Rectangle((0.2, q[0, 1+2*i]), 0.15, q[1, 1+2*i]-q[0, 1+2*i], alpha=0.1))
# Add estimates
conf = ('Lab', 'Clinical')
handles = []
for j, pct in enumerate(['pct_5', 'pct_15', 'pct_30', 'pct_adult']):
interval = qcoverage.ix[(months[i], conf[0])]
ax.add_patch(Rectangle((0.04*j, interval.loc[0.025, pct]),
0.01, interval.loc[0.975, pct]-interval.loc[0.025, pct],
lw=2,
color=colors[j]))
interval = qcoverage.ix[(months[i], conf[1])]
rect = Rectangle((0.04*j + 0.2, interval.loc[0.025, pct]),
0.01, interval.loc[0.975, pct]-interval.loc[0.025, pct],
lw=2, label=label_lookup[pct],
color=colors[j])
ax.add_patch(rect)
handles.append(rect)
ax.axhline(m[2*i], 0.13, 0.45, color='black', lw=0.7)
ax.axhline(m[1+2*i], 0.57, 0.88, color='black', lw=0.7)
ax2.set_yticks([])
ax1.set_ylabel('Estimated vaccination threshold required')
ax2.legend(handles=handles, loc=4)
sns.despine(bottom=True)
model_june_noconf.summary(['pct_5', 'pct_15', 'pct_30', 'pct_adult'])
pct_5: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ 0.24 0.009 0.0 [ 0.22 0.255] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| 0.222 0.234 0.24 0.247 0.259 pct_15: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ 0.335 0.011 0.0 [ 0.312 0.357] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| 0.313 0.326 0.336 0.343 0.358 pct_30: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ 0.795 0.002 0.0 [ 0.792 0.799] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| 0.791 0.794 0.795 0.797 0.799 pct_adult: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ 0.624 0.003 0.0 [ 0.619 0.628] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| 0.619 0.622 0.625 0.626 0.628
model_july.summary(['pct_5', 'pct_15', 'pct_30', 'pct_adult'])
pct_5: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ 0.153 0.003 0.0 [ 0.147 0.158] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| 0.147 0.151 0.154 0.155 0.159 pct_15: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ 0.247 0.005 0.0 [ 0.238 0.255] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| 0.238 0.244 0.248 0.249 0.255 pct_30: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ 0.702 0.001 0.0 [ 0.7 0.704] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| 0.7 0.701 0.702 0.703 0.704 pct_adult: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ 0.488 0.001 0.0 [ 0.486 0.489] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| 0.486 0.487 0.488 0.488 0.489
model_july_noconf.summary(['pct_5', 'pct_15', 'pct_30', 'pct_adult'])
pct_5: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ 0.204 0.004 0.0 [ 0.197 0.213] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| 0.197 0.202 0.205 0.206 0.213 pct_15: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ 0.266 0.005 0.0 [ 0.257 0.276] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| 0.256 0.263 0.266 0.269 0.276 pct_30: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ 0.673 0.001 0.0 [ 0.671 0.675] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| 0.671 0.672 0.673 0.673 0.675 pct_adult: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ 0.546 0.001 0.0 [ 0.544 0.547] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| 0.544 0.545 0.546 0.546 0.548
Initial migrant susceptibles (June model, with confirmation)
model_june.summary(['N_migrant'])
N_migrant: Mean SD MC Error 95% HPD interval ------------------------------------------------------------------ 550455.475 34950.451 1331.743 [ 505830. 609750.] Posterior quantiles: 2.5 25 50 75 97.5 |---------------|===============|===============|---------------| 472729.0 517245.0 556334.0 563299.0 608547.0
By age group:
Matplot.summary_plot(model_june.M_0, custom_labels=age_group.categories)
Could not calculate Gelman-Rubin statistics. Requires multiple chains of equal length.
june_r = pd.DataFrame({'local': model_june.trace('R_t_local')[:, -1],
'total': model_june.trace('R_t')[:, -1]})
june_r['Month'] = 'June'
june_r['Confirmation'] = 'Lab'
june_noconf_r = pd.DataFrame({'local': model_june_noconf.trace('R_t_local')[:, -1],
'total': model_june_noconf.trace('R_t')[:, -1]})
june_noconf_r['Month'] = 'June'
june_noconf_r['Confirmation'] = 'Clinical'
july_r = pd.DataFrame({'local': model_july.trace('R_t_local')[:, -1],
'total': model_july.trace('R_t')[:, -1]})
july_r['Month'] = 'July'
july_r['Confirmation'] = 'Lab'
july_noconf_r = pd.DataFrame({'local': model_july_noconf.trace('R_t_local')[:, -1],
'total': model_july_noconf.trace('R_t')[:, -1]})
july_noconf_r['Month'] = 'July'
july_noconf_r['Confirmation'] = 'Clinical'
r_estimates = pd.concat([june_r, june_noconf_r, july_r, july_noconf_r],
ignore_index=True)
r_estimates.to_csv('output/r_estimates.csv')
g = sns.factorplot(row="Month", col="Confirmation", data=r_estimates, kind='box',
row_order=['June', 'July'],
order=['local', 'total'], margin_titles=True,
palette="YlGnBu_d", linewidth=0.7, fliersize=0, aspect=1.25).despine(left=True)
g.set_ylabels(r"$R_t$")
for ax in g.axes.ravel():
ax.hlines(1, -1, 2, linestyles='dashed')
model_june_nomigrant = MCMC(measles_model('1997-06-15', migrant=False))
model_june_nomigrant.sample(n_iterations, n_burn)
Matplot.plot(model_june_nomigrant.R0)
Plotting R0
Matplot.summary_plot(model_june_nomigrant.R_t)
Could not calculate Gelman-Rubin statistics. Requires multiple chains of equal length.