%matplotlib inline
import numpy as np
import pandas as pd
import seaborn as sns
import geopandas as gpd
import pymc3 as pm
import matplotlib.pylab as plt
import theano.tensor as tt
from theano import shared, function
from theano.tensor.shared_randomstreams import RandomStreams
sns.set_style('white')
DATA = '../data/clean/'
Check for the number of available compute cores
import multiprocessing
multiprocessing.cpu_count()
4
Line list to extract cases
line_list = pd.read_csv(DATA + 'line_list.csv')
line_list.head()
id | date_onset | provincecity | dob | age_years | died | confirmed | labconfirmed | date_lab | vaccine_ever | male | month_onset | year_onset | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 389.0 | 2015-03-30 | UB | 2014-09-01 | 0.577686 | 0.0 | 1.0 | 1.0 | 2015-04-06 | 0.0 | True | 3.0 | 2015.0 |
1 | 707.0 | 2015-04-03 | UB | 2013-02-02 | 2.162902 | 1.0 | 1.0 | 1.0 | 2015-04-04 | 0.0 | True | 4.0 | 2015.0 |
2 | 1762.0 | 2015-04-14 | KhU | 2015-01-04 | 0.279261 | 1.0 | 1.0 | 0.0 | NaN | 0.0 | True | 4.0 | 2015.0 |
3 | 2061.0 | 2015-04-22 | UB | 2014-09-23 | 0.577686 | 0.0 | 1.0 | 1.0 | 2015-04-24 | 0.0 | True | 4.0 | 2015.0 |
4 | 6465.0 | 2015-05-12 | UB | 2013-12-18 | 1.314168 | 1.0 | 0.0 | 0.0 | NaN | 0.0 | False | 5.0 | 2015.0 |
Extract and plot confirmed counts
confirmed_counts = pd.read_csv(DATA + 'confirmed_counts.csv', index_col=0)
confirmed_counts.tail()
[0, 1) | [1, 5) | [5, 10) | [10, 15) | [15, 20) | [20, 25) | [25, 30) | [30, 35) | [35, 40) | [40, 100) | |
---|---|---|---|---|---|---|---|---|---|---|
date_onset | ||||||||||
2016-05-08 | 318.0 | 97.0 | 83.0 | 119.0 | 180.0 | 155.0 | 152.0 | 109.0 | 63.0 | 41.0 |
2016-05-22 | 293.0 | 79.0 | 58.0 | 81.0 | 131.0 | 141.0 | 105.0 | 87.0 | 40.0 | 36.0 |
2016-06-05 | 179.0 | 71.0 | 39.0 | 53.0 | 70.0 | 62.0 | 40.0 | 45.0 | 20.0 | 21.0 |
2016-06-19 | 77.0 | 21.0 | 7.0 | 21.0 | 17.0 | 13.0 | 3.0 | 15.0 | 12.0 | 7.0 |
2016-07-03 | 12.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 3.0 | 0.0 | 0.0 | 2.0 |
Plot of confirmed counts by age group.
confirmed_counts.plot(cmap='viridis');
In order to account for change in the underlying population, we need the number of births in 2015 and 2016 (Source: Wikipedia)
births = 82130, 79920
We will employ the contact matrix for Mongolia, as estimated by Prem et al. 2017
B_prem = pd.read_csv(DATA + 'mongolia_prem.csv').values[:9, :9]
B_prem
array([[ 4.40123661, 1.76393655, 0.91943738, 0.62449763, 1.04620724, 1.48232365, 1.59726926, 1.27263358, 0.69913779], [ 1.51664206, 5.57982055, 1.63679975, 0.52656607, 0.37649241, 0.83552993, 1.0698524 , 1.04200163, 0.77666399], [ 0.56530473, 2.43013564, 9.17372545, 1.33873141, 0.67890573, 0.56166434, 0.75227034, 0.97884946, 0.97725743], [ 0.36238309, 0.73595028, 3.88089151, 12.32915786, 2.48410474, 1.11449654, 0.79194079, 1.04438411, 1.08448104], [ 0.69060422, 0.49460207, 0.65441906, 4.38870495, 6.8744335 , 2.87783706, 1.69684851, 1.35746918, 1.11479572], [ 1.13165827, 0.59517343, 0.33830222, 1.24692768, 3.31035503, 4.03747616, 2.29742657, 1.66957878, 1.29603167], [ 1.11906529, 1.42978486, 1.12904181, 0.68832499, 1.51871499, 2.32775238, 2.87889204, 2.1302899 , 1.51514744], [ 1.0230335 , 1.46318255, 1.28889523, 0.93261004, 0.98836616, 1.65427852, 2.02270357, 2.65928253, 2.02562053], [ 0.63654838, 1.05024748, 1.28670258, 1.37093328, 1.13235301, 1.33195969, 1.699263 , 1.87162165, 2.21015242]])
B_prem.shape
(9, 9)
Expand contact matrix to include infants; treat them the same as 1-4 year-olds.
B_prem_exp = np.zeros((10, 10))
B_prem_exp[1:, 1:] = B_prem.copy()
B_prem_exp[:2, :2] = B_prem_exp[1, 1]
B_prem_exp[0, 2:] = B_prem[0, 1:]
B_prem_exp[2:, 0] = B_prem[1:, 0]
B_prem_exp
array([[ 4.40123661, 4.40123661, 1.76393655, 0.91943738, 0.62449763, 1.04620724, 1.48232365, 1.59726926, 1.27263358, 0.69913779], [ 4.40123661, 4.40123661, 1.76393655, 0.91943738, 0.62449763, 1.04620724, 1.48232365, 1.59726926, 1.27263358, 0.69913779], [ 1.51664206, 1.51664206, 5.57982055, 1.63679975, 0.52656607, 0.37649241, 0.83552993, 1.0698524 , 1.04200163, 0.77666399], [ 0.56530473, 0.56530473, 2.43013564, 9.17372545, 1.33873141, 0.67890573, 0.56166434, 0.75227034, 0.97884946, 0.97725743], [ 0.36238309, 0.36238309, 0.73595028, 3.88089151, 12.32915786, 2.48410474, 1.11449654, 0.79194079, 1.04438411, 1.08448104], [ 0.69060422, 0.69060422, 0.49460207, 0.65441906, 4.38870495, 6.8744335 , 2.87783706, 1.69684851, 1.35746918, 1.11479572], [ 1.13165827, 1.13165827, 0.59517343, 0.33830222, 1.24692768, 3.31035503, 4.03747616, 2.29742657, 1.66957878, 1.29603167], [ 1.11906529, 1.11906529, 1.42978486, 1.12904181, 0.68832499, 1.51871499, 2.32775238, 2.87889204, 2.1302899 , 1.51514744], [ 1.0230335 , 1.0230335 , 1.46318255, 1.28889523, 0.93261004, 0.98836616, 1.65427852, 2.02270357, 2.65928253, 2.02562053], [ 0.63654838, 0.63654838, 1.05024748, 1.28670258, 1.37093328, 1.13235301, 1.33195969, 1.699263 , 1.87162165, 2.21015242]])
$R_0$ as estimated from this contact matrix
evs = np.linalg.eigvals(B_prem).real
max(evs[np.isreal(evs)])
17.686930739035084
Import the underlying population sizes, by province:
population = pd.read_csv(DATA + '../clean/denominators.csv', index_col=0)
population.head()
Total | 0-4 | 5-9 | 10-14 | 15-19 | 20-24 | 25-29 | 30-34 | 35-39 | 40-44 | 45-49 | 50-54 | 55-59 | 60-64 | 65-69 | 70+ | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Province | ||||||||||||||||
Arkhangai | 92896 | 10440 | 8360 | 7883 | 9249 | 9469 | 8207 | 6856 | 6824 | 6294 | 5498 | 4643 | 3266 | 2202 | 1237 | 2468 |
Bayan-Ulgii | 93165 | 11655 | 10219 | 9929 | 8794 | 8872 | 8531 | 7207 | 6199 | 5713 | 4633 | 3842 | 2747 | 1665 | 1006 | 2153 |
Bayankhongor | 79310 | 9760 | 7429 | 6675 | 7812 | 7992 | 7436 | 6280 | 5806 | 5165 | 4383 | 3714 | 2436 | 1480 | 1008 | 1934 |
Bulgan | 60324 | 6415 | 5351 | 4556 | 5423 | 5587 | 4977 | 4499 | 4592 | 4426 | 4066 | 3494 | 2619 | 1666 | 877 | 1776 |
Gobi-Altai | 56698 | 5953 | 5392 | 5046 | 5778 | 5631 | 4837 | 4401 | 4345 | 3994 | 3389 | 2730 | 1974 | 1064 | 706 | 1458 |
Age classes above 40 will be aggregated into a single group.
pop_classes = population.iloc[:, 1:9].copy()
pop_classes['40+'] = population.iloc[:, 9:].sum(1)
pop_classes.head()
0-4 | 5-9 | 10-14 | 15-19 | 20-24 | 25-29 | 30-34 | 35-39 | 40+ | |
---|---|---|---|---|---|---|---|---|---|
Province | |||||||||
Arkhangai | 10440 | 8360 | 7883 | 9249 | 9469 | 8207 | 6856 | 6824 | 25608 |
Bayan-Ulgii | 11655 | 10219 | 9929 | 8794 | 8872 | 8531 | 7207 | 6199 | 21759 |
Bayankhongor | 9760 | 7429 | 6675 | 7812 | 7992 | 7436 | 6280 | 5806 | 20120 |
Bulgan | 6415 | 5351 | 4556 | 5423 | 5587 | 4977 | 4499 | 4592 | 18924 |
Gobi-Altai | 5953 | 5392 | 5046 | 5778 | 5631 | 4837 | 4401 | 4345 | 15315 |
age_group_index = pop_classes.columns.str.strip()
age_group_index
Index(['0-4', '5-9', '10-14', '15-19', '20-24', '25-29', '30-34', '35-39', '40+'], dtype='object')
pop_classes_total = pop_classes.sum().values
Break out infants from 0-4 group. Will use average of 2015 and 2016 births, and subtract them from 0-4's.
mean_births = int(np.mean(births))
N = [mean_births, pop_classes_total[0] - mean_births]
N.extend(pop_classes_total[1:])
N = np.array(N)
Here is the assumed population size by age group, with infants as their own group.
N
array([ 81025, 441650, 418444, 352776, 386296, 427388, 448107, 379846, 348235, 1264181])
Try to obtain susceptible proportion from coverage history
coverage = pd.read_csv(DATA + 'coverage.csv', index_col=0)
coverage.tail(20)
MCV1 | MCV2 | SIA 1994 (M) | SIA 1996 (M) | SIA 2000 (M) | SIA 2007 (M) | SIA 2012 (MR) | |
---|---|---|---|---|---|---|---|
Age | |||||||
19 | 0.89 | 0.00 | 0.0 | 0.98 | 0.96 | 0.00 | 0.00 |
18 | 0.91 | 0.00 | 0.0 | 0.00 | 0.96 | 0.97 | 0.00 |
17 | 0.93 | 0.00 | 0.0 | 0.00 | 0.96 | 0.97 | 0.87 |
16 | 0.93 | 0.75 | 0.0 | 0.00 | 0.96 | 0.97 | 0.89 |
15 | 0.92 | 0.95 | 0.0 | 0.00 | 0.00 | 0.97 | 0.91 |
14 | 0.95 | 0.96 | 0.0 | 0.00 | 0.00 | 0.97 | 0.88 |
13 | 0.98 | 0.98 | 0.0 | 0.00 | 0.00 | 0.97 | 0.95 |
12 | 0.98 | 0.98 | 0.0 | 0.00 | 0.00 | 0.97 | 0.95 |
11 | 0.99 | 0.98 | 0.0 | 0.00 | 0.00 | 0.97 | 0.96 |
10 | 0.97 | 0.96 | 0.0 | 0.00 | 0.00 | 0.97 | 0.95 |
9 | 0.99 | 0.97 | 0.0 | 0.00 | 0.00 | 0.00 | 0.97 |
8 | 0.98 | 0.96 | 0.0 | 0.00 | 0.00 | 0.00 | 0.96 |
7 | 0.97 | 0.97 | 0.0 | 0.00 | 0.00 | 0.00 | 0.96 |
6 | 0.94 | 0.93 | 0.0 | 0.00 | 0.00 | 0.00 | 0.90 |
5 | 0.98 | 0.97 | 0.0 | 0.00 | 0.00 | 0.00 | 0.00 |
4 | 0.98 | 0.99 | 0.0 | 0.00 | 0.00 | 0.00 | 0.00 |
3 | 0.99 | 0.99 | 0.0 | 0.00 | 0.00 | 0.00 | 0.00 |
2 | 0.99 | 0.99 | 0.0 | 0.00 | 0.00 | 0.00 | 0.00 |
1 | 0.99 | 0.00 | 0.0 | 0.00 | 0.00 | 0.00 | 0.00 |
0 | 0.00 | 0.00 | 0.0 | 0.00 | 0.00 | 0.00 | 0.00 |
age_classes = [0,1,5,10,15,20,25,30,35,40,100]
age_slices = [slice(age_classes[i], age_classes[i+1]) for i in range(len(age_classes)-1)]
n_age_classes = len(age_classes) - 1
efficacy = 0.85
Dependent MCV immunization
immune_MCV1 = coverage[::-1].MCV1 * efficacy
immune_MCV2 = (1 - immune_MCV1) * coverage[::-1].MCV2 * efficacy
MCV_immunity = immune_MCV2 + immune_MCV1
downsample_mean = lambda x: np.array([x[s].mean() for s in age_slices])
downsample_sum = lambda x: np.array([x[s].sum() for s in age_slices])
group_coverage = downsample_mean(coverage[::-1])
downsample_mean(immune_MCV1)
array([0. , 0.839375, 0.8262 , 0.8279 , 0.7786 , 0.7055 , 0.544 , 0.0969 , 0.0289 , 0. ])
susceptibility = (1 - coverage*efficacy).prod(axis=1).round(3).sort_index()
susceptibility.plot()
plt.ylabel('Susceptibility')
Text(0, 0.5, 'Susceptibility')
p_susc = susceptibility.groupby(pd.cut(susceptibility.index, age_classes)).mean()
Down-sample susceptibility to age groups.
residual_susceptibility = downsample_mean(susceptibility)
age_labels = ['0', '1-4', '5-9', '10-14', '15-19', '20-24', '25-29',
'30-34', '35-39', '40+']
plt.bar(range(len(age_slices)), downsample_mean(susceptibility),
tick_label=age_labels);
These susceptibility rates can't be true given the distribution of cases that were observed!
Create a time x age matrix of SIA vaccinations to account for intervention
sia_2015 = pd.read_csv(DATA + 'sia_2015.csv', index_col=0)
sia_2016 = pd.read_csv(DATA + 'sia_2016.csv', index_col=0)
sia_2015.head()
0 | 1-4 | 5-9 | |
---|---|---|---|
Arkhangai | 860 | 7364 | 1837 |
Bayan-Ulgii | 1361 | 9069 | 1872 |
Bayankhongor | 928 | 6992 | 2019 |
Bulgan | 574 | 4494 | 1430 |
Gobi-Altai | 494 | 4490 | 1276 |
sia_2015_total = (sia_2015.sum()
.reindex(pd.Index(age_labels))
.fillna(0)
.astype(int))
sia_2015_total
0 34302 1-4 247851 5-9 65532 10-14 0 15-19 0 20-24 0 25-29 0 30-34 0 35-39 0 40+ 0 dtype: int64
sia_2016.sum()
15-19 78105 20-24 186022 25-29 230733 30-34 54439 dtype: int64
sia_2016_total = (sia_2016.sum()
.reindex(pd.Index(age_labels))
.fillna(0)
.astype(int))
sia_2016_total
0 0 1-4 0 5-9 0 10-14 0 15-19 78105 20-24 186022 25-29 230733 30-34 54439 35-39 0 40+ 0 dtype: int64
sia = pd.DataFrame(index=confirmed_counts.index, columns=pd.Index(age_labels)).fillna(0)
sia.loc['2016-05-22'] = sia_2016_total
sia.loc[['2015-05-24', '2015-06-07', '2015-06-21']] = [sia_2015_total/4,
sia_2015_total/2,
sia_2015_total/4]
sia
0 | 1-4 | 5-9 | 10-14 | 15-19 | 20-24 | 25-29 | 30-34 | 35-39 | 40+ | |
---|---|---|---|---|---|---|---|---|---|---|
date_onset | ||||||||||
2015-01-18 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2015-02-01 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2015-02-15 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2015-03-01 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2015-03-15 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2015-03-29 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2015-04-12 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2015-04-26 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2015-05-10 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2015-05-24 | 8575.5 | 61962.75 | 16383.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2015-06-07 | 17151.0 | 123925.50 | 32766.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2015-06-21 | 8575.5 | 61962.75 | 16383.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2015-07-05 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2015-07-19 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2015-08-02 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2015-08-16 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2015-08-30 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2015-09-13 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2015-09-27 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2015-10-11 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2015-10-25 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2015-11-08 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2015-11-22 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2015-12-06 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2015-12-20 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2016-01-03 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2016-01-17 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2016-01-31 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2016-02-14 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2016-02-28 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2016-03-13 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2016-03-27 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2016-04-10 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2016-04-24 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2016-05-08 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2016-05-22 | 0.0 | 0.00 | 0.0 | 0.0 | 78105.0 | 186022.0 | 230733.0 | 54439.0 | 0.0 | 0.0 |
2016-06-05 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2016-06-19 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2016-07-03 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
sia_vacc = sia.cumsum(0).values
We will extend a simple SIR disease model, to account for confirmation status, which will be fit using MCMC.
This model fits the series of 2-week infection totals for each age group $a$ as a set of Poisson random variables:
$$Pr(I_{a}(t) | \lambda_a(t)) = \text{Poisson}(\lambda_a(t)) $$Where the age-specific outbreak intensity at time $t$ is modeled as:
$$\lambda_a(t) = S_a(t-1) \frac{I(t-1)\mathbf{B}}{N_a} $$where $S_a(t-1)$ is the number of susceptibles in age group $a$ in the previous time period, $I(t-1)$ an age-specific vector of the number of infected individuals in the previous time period, $\mathbf{B}$ a matrix of transmission coefficients (from Prem et al., specified above), and $N_a$ an estimate of the population of age-$a$ people in Mongolia.
The basic reproductive number $R_0$ was calculated as the largest real-valued eigenvalue of the matrix $\mathbf{B}$. To impose a mild constraint on $R_0$, we applied a Gaussian prior distribution whose 1st and 99th quantiles are 8 and 24, respectively, a reasonable range for a measles outbreak:
Cases
I = confirmed_counts.values.astype(int)
I.shape
(39, 10)
I_age = I.sum(0)
lab_samples = line_list.dropna(subset=['date_lab']).copy()
lab_samples.head()
id | date_onset | provincecity | dob | age_years | died | confirmed | labconfirmed | date_lab | vaccine_ever | male | month_onset | year_onset | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 389.0 | 2015-03-30 | UB | 2014-09-01 | 0.577686 | 0.0 | 1.0 | 1.0 | 2015-04-06 | 0.0 | True | 3.0 | 2015.0 |
1 | 707.0 | 2015-04-03 | UB | 2013-02-02 | 2.162902 | 1.0 | 1.0 | 1.0 | 2015-04-04 | 0.0 | True | 4.0 | 2015.0 |
3 | 2061.0 | 2015-04-22 | UB | 2014-09-23 | 0.577686 | 0.0 | 1.0 | 1.0 | 2015-04-24 | 0.0 | True | 4.0 | 2015.0 |
8 | 20835.0 | 2015-11-25 | UB | 2015-09-01 | 0.238193 | 1.0 | 1.0 | 1.0 | 2015-12-01 | 0.0 | True | 11.0 | 2015.0 |
9 | 21053.0 | 2015-12-02 | UB | 2014-11-19 | 1.062286 | 0.0 | 1.0 | 1.0 | 2015-12-14 | 0.0 | False | 12.0 | 2015.0 |
clinic_confirmed = lab_samples.query('confirmed==1').dropna(subset=['age_years']).copy()
clinic_confirmed.shape
(5846, 13)
clinic_confirmed['age_group'] = pd.cut(clinic_confirmed.age_years, age_classes, labels=np.arange(len(age_slices)))
clinic_confirmed.dropna(subset=['age_years']).groupby('age_group')['labconfirmed'].mean()
age_group 0 0.768957 1 0.657143 2 0.791489 3 0.503497 4 0.528487 5 0.539967 6 0.533007 7 0.752212 8 0.727273 9 0.372093 Name: labconfirmed, dtype: float64
age_midpoints = np.array([0.5, 3, 7.5, 12.5, 17.5, 22.5, 27.5, 32.5, 37.5, 70])
confirmed, age, age_group = clinic_confirmed[['labconfirmed', 'age_years', 'age_group']].values.T
age_group = age_group.astype(int)
age = age.astype(float)
We use a Gaussian random walk to model the variation in confirmation rate across age groups. These functions support the random walk model.
n_knots = 20
knots = np.linspace(age.min(), age.max(), n_knots)
def interpolate(x0, y0, x):
idx = np.searchsorted(x0, x)
dl = np.array(x - x0[idx - 1])
dr = np.array(x0[idx] - x)
d = dl + dr
wl = dr / d
return wl * y0[idx - 1] + (1 - wl) * y0[idx]
Generates mcv2 effective coverage under assumption that second dose is given only to those children who received the first dose
routine_coverage_bias <- 0.95
mcv1 <- read.csv("input-mcv1.csv",row.names = 1) # read in first dose coverage
m1 <- unlist(mcv1[which(rownames(mcv1)=="MNG"),1:35]) # select out MNG
mcv2 <- read.csv("input-mcv2.csv",row.names = 1) # read in second dose coverage
m2 <- unlist(mcv2[which(rownames(mcv2)=="MNG"),1:35]) # select out MNG
m1 <- c(rep(0,15),m1) * routine_coverage_bias
m2 <- c(rep(0,15),m2) * routine_coverage_bias
L <- 60 # life expectancy
R <- 25 # R0 -- in practice this is fit, this is just a test value for illustration
# Expected force of natural infection, discounted by vaccination, per SP paper
foi <- rev(1-m1*.85) * rev(1-mcv2_dep(m1,m2,0.85)) *(R/L) # here scaling effective MCV2 by mcv2_dep function above
#########################################
#SIAs
SIA.coverage <- 0.5 # immunization due to sias
sia1 <- rep(0,35) # 2012 SIA just illustrating ages 1-35 here, since historical coverage above only goes that far back. Reasonable to assume no vaccination before 1980 -- if there was any it was trivial
sia1[7:18] <- SIA.coverage # current age classes that were targeted by SIA
sia2 <- rep(0,35) # same for 2007 SIA
sia2[11:19] <- SIA.coverage
sia3 <- rep(0,35) # same for 2000 SIA
sia3[15:29] <- SIA.coverage
# note that in the calculation below, I assume that these are all independent, which is the most optimistic interpretation of impact
sia1 <- c(sia1,rep(0,15))
sia2 <- c(sia2,rep(0,15))
sia3 <- c(sia3,rep(0,15))
#########################################
# The 4hree terms here are:
# susceptibles after the first dose
# suceptibles after the second dose
# susceptibles after natural infection
# susceptibles after sias
plot((1-rev(m1)*.85) * rev(1-mcv2_dep(m1,m2,0.85)) * exp(-cumsum(foi)) *(1-sia1)*(1-sia2)*(1-sia3),ylim=c(0,.25),type="l",
xlab="current age (years)",
ylab="fraction susceptible")
lines((1-rev(m1)*.85) * 1 * exp(-cumsum(foi)) *(1-sia1)*(1-sia2)*(1-sia3),ylim=c(0,.05),col=2)
legend(0,.25,legend=c("with mcv2","set mcv2 to 0"),lty=1,col=1:2,bty="n")
# for the first case (black line)
initial_susceptible_profile <- (1-rev(m1)*.85) * rev(1-mcv2_dep(m1,m2,0.85)) * exp(-cumsum(foi)) *(1-sia1)*(1-sia2)*(1-sia3)
##############################################################
# Initial susceptibles in numbers -- assume that that population in each age class is equal to the birth cohort in that year of birth
# then the estiamte of the absolute number of susceptible in each age class is initial_susceptible_profile (above) times the rev()
# of the birth cohorts. This ignores any mortality, but that isn't terrible, since it presumes that you're likely to move from S->R
# prior to death
births <- read.csv("input-births.csv",row.names = 1)
births <- unlist(births[which(rownames(births)=="MNG"),1:35]) # select out MNG
# This only gives the birth cohorts through 35 years. It would be reasonable to assume that the prior birth cohorts
# were the same size; e.g. c() births[1] as many times as needed to births.
births <- c(rep(births[1],15),births)
# The first dose of measles vaccine is delivered between 9-12m of age. So most of the first age cohort is not yet eligible.
# So we model the 0-1 class separately and assume that 25% are susceptible (older than 6m, when they lose maternal immunity on average,
# and younger than 9m). The 25% between 9m-12m are vaccinated at mcv1 level
#
barplot(rev(births)*c(.25 * ( 1 + (1-rev(m1)*.85)[1]), initial_susceptible_profile[-1]),
xlab = "birth year",
ylab="number susceptible")
expected_susceptibles <- rev(births)*c(.25 * ( 1 + (1-rev(m1)*.85)[1]), initial_susceptible_profile[-1])
def mcv2_dep(mcv1, mcv2, ve):
return (1 - ve) * mcv2 / (1 - mcv1 * ve)
routine_coverage_bias = 0.95
MCV1 = pd.read_csv(DATA + '../raw/input-mcv1.csv', index_col=0).loc['MNG', :'2014'].values
MCV2 = pd.read_csv(DATA + '../raw/input-mcv2.csv', index_col=0).loc['MNG', :'2014'].values
MCV1 = np.concatenate([np.zeros(15), MCV1]) * routine_coverage_bias
#MCV2 = np.concatenate([np.zeros(15), MCV2]) * routine_coverage_bias
MCV2 = np.zeros_like(MCV1)
foi = (1 - MCV1 * .85)[::-1] * (1 - mcv2_dep(MCV1, MCV2, 0.85))[::-1] * (16 / 60)
sia_coverage = 0.3
sia1 = np.zeros(50)
sia1[5:14] = sia_coverage
sia2 = np.zeros(50)
sia2[10:19] = sia_coverage
sia3 = np.zeros(50)
sia3[15:23] = sia_coverage
sia4 = np.zeros(50)
sia4[19:29] = sia_coverage
initial_susceptible_profile = ((1 - MCV1 * .85)[::-1]
* (1 - mcv2_dep(MCV1, MCV2, 0.85))[::-1]
* np.exp(-np.cumsum(foi))
* (1 - sia1) * (1 - sia2) * (1 - sia3) * (1 - sia4))
plt.plot(initial_susceptible_profile)
[<matplotlib.lines.Line2D at 0x7f954415fdd8>]
birth_counts = pd.read_csv(DATA + '../raw/input-births.csv', index_col=0).loc['MNG', :'2014'].values
birth_counts = np.concatenate([np.ones(15)*birth_counts[0], birth_counts])
initial_susceptible_profile[0] += 1
initial_susceptible_profile[0] *= 0.25
expected_susceptibles = birth_counts[::-1] * initial_susceptible_profile
S_exp = downsample_sum(expected_susceptibles)
Difference between expected susceptibles and total cases by age group
S_exp - I.cumsum(axis=0)[-1]
array([14931.76459601, 46680.81115632, 26527.17050497, 11339.19591366, 4382.42317616, 6763.38867463, 16847.03789221, 11458.07611094, 2861.71300339, 614.54847146])
plt.bar(np.arange(len(expected_susceptibles)), expected_susceptibles)
<BarContainer object of 50 artists>
def tdownsample(x):
return tt.stack([x[s].sum() for s in age_slices])
with pm.Model() as uncertain_vacc_model:
# Confirmation sub-model
σ = pm.HalfNormal('σ', 5)
y = pm.GaussianRandomWalk('y', sd=σ, shape=n_knots)
α = interpolate(knots, y, age)
π = pm.invlogit(α)
confirmation = pm.Bernoulli('confirmation', π, observed=confirmed)
# Calculate age group probabilities of confirmation
α_group = interpolate(knots, y, age_midpoints)
p_confirm = pm.Deterministic('p_confirm', pm.invlogit(α_group))
# Basic reproduction number centered on the dominant eigenvector of the contact matrix
R_0 = pm.Normal('R_0', max(evs[np.isreal(evs)]), sd=3.5)
# Force of infection based on dependent MCV efficacy
FOI = (1 - MCV1)[::-1] * (1 - mcv2_dep(MCV1, MCV2, efficacy))[::-1] * R_0 / 60
# Susceptibility prior to outbreaks
susceptible = ((1 - MCV1 * .85)[::-1]
* (1 - mcv2_dep(MCV1, MCV2, efficacy))[::-1]
* tt.exp(-tt.cumsum(FOI))
* (1 - sia1) * (1 - sia2) * (1 - sia3) * (1 - sia3))
susceptible = tt.concatenate([tt.stack(0.25*(1 + susceptible[0])), susceptible[1:]])
# Initial susceptibles
S_0 = pm.Deterministic('S_0', tdownsample(birth_counts[::-1] * susceptible))
# Susceptibles over time, removing individuals vaccinated by SIA
# S = S_0 - sia_vacc*efficacy*0.5 - shared(I.cumsum(axis=0))
S = S_0 - shared(I.cumsum(axis=0))
# Force of infection
λ = S * (I.dot(B_prem_exp) / N.sum()) + 0.001
# Effective reproductive number
R_t = pm.Deterministic('R_t', S.sum(1) * R_0 / N.sum())
# Adjust for confirmation bias
λ_apparent = λ / p_confirm
# Infections likelihood
infections = pm.Poisson('infections', λ_apparent[:-1], observed=I[1:])
uncertain_vacc_model.check_test_point()
σ_log__ -0.77 y -43.75 R_0 -2.17 confirmation -4052.14 infections -40827.80 Name: Log-probability of test_point, dtype: float64
with uncertain_vacc_model:
trace = pm.sample(1000, tune=2000, cores=2)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (2 chains in 2 jobs) NUTS: [R_0, y, σ] Sampling 2 chains: 100%|██████████| 6000/6000 [02:40<00:00, 37.33draws/s]
pm.energyplot(trace);
if 'p_vacc_interval__' in [v.name for v in uncertain_vacc_model.free_RVs]:
pm.forestplot(trace, varnames=['p_vacc'], ylabels=age_labels, rhat=False)
pm.traceplot(trace, varnames=['R_0'])
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x7f9512c4ab38>, <matplotlib.axes._subplots.AxesSubplot object at 0x7f9512ac4dd8>]], dtype=object)
However, this model estimates $R_e$ values that are too high.
pm.forestplot(trace, varnames=['R_t'])
GridSpec(1, 2, width_ratios=[3, 1])