In [1]:

%matplotlib inline
import numpy as np
import pandas as pd
import seaborn as sns
import geopandas as gpd
import pymc3 as pm
import matplotlib.pylab as plt
import theano.tensor as tt
from theano import shared, function
from theano.tensor.shared_randomstreams import RandomStreams
sns.set_style('white')

DATA = '../data/clean/'

Check for the number of available compute cores

In [2]:

import multiprocessing

multiprocessing.cpu_count()

Out[2]:

Data import¶

Line list to extract cases

In [3]:

line_list = pd.read_csv(DATA + 'line_list.csv')
line_list.head()

Out[3]:

	id	date_onset	provincecity	dob	age_years	died	confirmed	labconfirmed	date_lab	male	month_onset	year_onset
0	389.0	2015-03-30	UB	2014-09-01	0.577686	0.0	1.0	1.0	2015-04-06	True	3.0	2015.0
1	707.0	2015-04-03	UB	2013-02-02	2.162902	1.0	1.0	1.0	2015-04-04	True	4.0	2015.0
2	1762.0	2015-04-14	KhU	2015-01-04	0.279261	1.0	1.0	0.0	NaN	True	4.0	2015.0
3	2061.0	2015-04-22	UB	2014-09-23	0.577686	0.0	1.0	1.0	2015-04-24	True	4.0	2015.0
4	6465.0	2015-05-12	UB	2013-12-18	1.314168	1.0	0.0	0.0	NaN	False	5.0	2015.0

Extract and plot confirmed counts

In [4]:

confirmed_counts = pd.read_csv(DATA + 'confirmed_counts.csv', index_col=0)
confirmed_counts.tail()

Out[4]:

	[0, 1)	[1, 5)	[5, 10)	[10, 15)	[15, 20)	[20, 25)	[25, 30)	[30, 35)	[35, 40)	[40, 100)
date_onset
2016-05-08	318.0	97.0	83.0	119.0	180.0	155.0	152.0	109.0	63.0	41.0
2016-05-22	293.0	79.0	58.0	81.0	131.0	141.0	105.0	87.0	40.0	36.0
2016-06-05	179.0	71.0	39.0	53.0	70.0	62.0	40.0	45.0	20.0	21.0
2016-06-19	77.0	21.0	7.0	21.0	17.0	13.0	3.0	15.0	12.0	7.0
2016-07-03	12.0	0.0	1.0	0.0	0.0	0.0	3.0	0.0	0.0	2.0

Plot of confirmed counts by age group.

In [5]:

confirmed_counts.plot(cmap='viridis');

In order to account for change in the underlying population, we need the number of births in 2015 and 2016 (Source: Wikipedia)

In [6]:

births = 82130, 79920

We will employ the contact matrix for Mongolia, as estimated by Prem et al. 2017

In [7]:

B_prem = pd.read_csv(DATA + 'mongolia_prem.csv').values[:9, :9]
B_prem

Out[7]:

array([[ 4.40123661,  1.76393655,  0.91943738,  0.62449763,  1.04620724,
         1.48232365,  1.59726926,  1.27263358,  0.69913779],
       [ 1.51664206,  5.57982055,  1.63679975,  0.52656607,  0.37649241,
         0.83552993,  1.0698524 ,  1.04200163,  0.77666399],
       [ 0.56530473,  2.43013564,  9.17372545,  1.33873141,  0.67890573,
         0.56166434,  0.75227034,  0.97884946,  0.97725743],
       [ 0.36238309,  0.73595028,  3.88089151, 12.32915786,  2.48410474,
         1.11449654,  0.79194079,  1.04438411,  1.08448104],
       [ 0.69060422,  0.49460207,  0.65441906,  4.38870495,  6.8744335 ,
         2.87783706,  1.69684851,  1.35746918,  1.11479572],
       [ 1.13165827,  0.59517343,  0.33830222,  1.24692768,  3.31035503,
         4.03747616,  2.29742657,  1.66957878,  1.29603167],
       [ 1.11906529,  1.42978486,  1.12904181,  0.68832499,  1.51871499,
         2.32775238,  2.87889204,  2.1302899 ,  1.51514744],
       [ 1.0230335 ,  1.46318255,  1.28889523,  0.93261004,  0.98836616,
         1.65427852,  2.02270357,  2.65928253,  2.02562053],
       [ 0.63654838,  1.05024748,  1.28670258,  1.37093328,  1.13235301,
         1.33195969,  1.699263  ,  1.87162165,  2.21015242]])

In [8]:

B_prem.shape

Out[8]:

(9, 9)

Expand contact matrix to include infants; treat them the same as 1-4 year-olds.

In [9]:

B_prem_exp = np.zeros((10, 10))
B_prem_exp[1:, 1:] = B_prem.copy()
B_prem_exp[:2, :2] = B_prem_exp[1, 1]
B_prem_exp[0, 2:] = B_prem[0, 1:]
B_prem_exp[2:, 0] = B_prem[1:, 0]
B_prem_exp

Out[9]:

array([[ 4.40123661,  4.40123661,  1.76393655,  0.91943738,  0.62449763,
         1.04620724,  1.48232365,  1.59726926,  1.27263358,  0.69913779],
       [ 4.40123661,  4.40123661,  1.76393655,  0.91943738,  0.62449763,
         1.04620724,  1.48232365,  1.59726926,  1.27263358,  0.69913779],
       [ 1.51664206,  1.51664206,  5.57982055,  1.63679975,  0.52656607,
         0.37649241,  0.83552993,  1.0698524 ,  1.04200163,  0.77666399],
       [ 0.56530473,  0.56530473,  2.43013564,  9.17372545,  1.33873141,
         0.67890573,  0.56166434,  0.75227034,  0.97884946,  0.97725743],
       [ 0.36238309,  0.36238309,  0.73595028,  3.88089151, 12.32915786,
         2.48410474,  1.11449654,  0.79194079,  1.04438411,  1.08448104],
       [ 0.69060422,  0.69060422,  0.49460207,  0.65441906,  4.38870495,
         6.8744335 ,  2.87783706,  1.69684851,  1.35746918,  1.11479572],
       [ 1.13165827,  1.13165827,  0.59517343,  0.33830222,  1.24692768,
         3.31035503,  4.03747616,  2.29742657,  1.66957878,  1.29603167],
       [ 1.11906529,  1.11906529,  1.42978486,  1.12904181,  0.68832499,
         1.51871499,  2.32775238,  2.87889204,  2.1302899 ,  1.51514744],
       [ 1.0230335 ,  1.0230335 ,  1.46318255,  1.28889523,  0.93261004,
         0.98836616,  1.65427852,  2.02270357,  2.65928253,  2.02562053],
       [ 0.63654838,  0.63654838,  1.05024748,  1.28670258,  1.37093328,
         1.13235301,  1.33195969,  1.699263  ,  1.87162165,  2.21015242]])

$R_0$ as estimated from this contact matrix

In [10]:

evs = np.linalg.eigvals(B_prem).real
max(evs[np.isreal(evs)])

Out[10]:

17.686930739035084

Import the underlying population sizes, by province:

In [11]:

population = pd.read_csv(DATA + '../clean/denominators.csv', index_col=0)
population.head()

Out[11]:

	Total	0-4	5-9	10-14	15-19	20-24	25-29	30-34	35-39	40-44	45-49	50-54	55-59	60-64	65-69	70+
Province
Arkhangai	92896	10440	8360	7883	9249	9469	8207	6856	6824	6294	5498	4643	3266	2202	1237	2468
Bayan-Ulgii	93165	11655	10219	9929	8794	8872	8531	7207	6199	5713	4633	3842	2747	1665	1006	2153
Bayankhongor	79310	9760	7429	6675	7812	7992	7436	6280	5806	5165	4383	3714	2436	1480	1008	1934
Bulgan	60324	6415	5351	4556	5423	5587	4977	4499	4592	4426	4066	3494	2619	1666	877	1776
Gobi-Altai	56698	5953	5392	5046	5778	5631	4837	4401	4345	3994	3389	2730	1974	1064	706	1458

Age classes above 40 will be aggregated into a single group.

In [12]:

pop_classes = population.iloc[:, 1:9].copy()
pop_classes['40+'] = population.iloc[:, 9:].sum(1)
pop_classes.head()

Out[12]:

	0-4	5-9	10-14	15-19	20-24	25-29	30-34	35-39	40+
Province
Arkhangai	10440	8360	7883	9249	9469	8207	6856	6824	25608
Bayan-Ulgii	11655	10219	9929	8794	8872	8531	7207	6199	21759
Bayankhongor	9760	7429	6675	7812	7992	7436	6280	5806	20120
Bulgan	6415	5351	4556	5423	5587	4977	4499	4592	18924
Gobi-Altai	5953	5392	5046	5778	5631	4837	4401	4345	15315

In [13]:

age_group_index = pop_classes.columns.str.strip()
age_group_index

Out[13]:

Index(['0-4', '5-9', '10-14', '15-19', '20-24', '25-29', '30-34', '35-39',
       '40+'],
      dtype='object')

In [14]:

pop_classes_total = pop_classes.sum().values

Break out infants from 0-4 group. Will use average of 2015 and 2016 births, and subtract them from 0-4's.

In [15]:

mean_births = int(np.mean(births))
N = [mean_births, pop_classes_total[0] - mean_births]
N.extend(pop_classes_total[1:])
N = np.array(N)

Here is the assumed population size by age group, with infants as their own group.

In [16]:

Out[16]:

array([  81025,  441650,  418444,  352776,  386296,  427388,  448107,
        379846,  348235, 1264181])

Try to obtain susceptible proportion from coverage history

In [17]:

coverage = pd.read_csv(DATA + 'coverage.csv', index_col=0)

In [18]:

coverage.tail(20)

Out[18]:

	MCV1	MCV2	SIA 1994 (M)	SIA 1996 (M)	SIA 2000 (M)	SIA 2007 (M)	SIA 2012 (MR)
Age
19	0.89	0.00	0.0	0.98	0.96	0.00	0.00
18	0.91	0.00	0.0	0.00	0.96	0.97	0.00
17	0.93	0.00	0.0	0.00	0.96	0.97	0.87
16	0.93	0.75	0.0	0.00	0.96	0.97	0.89
15	0.92	0.95	0.0	0.00	0.00	0.97	0.91
14	0.95	0.96	0.0	0.00	0.00	0.97	0.88
13	0.98	0.98	0.0	0.00	0.00	0.97	0.95
12	0.98	0.98	0.0	0.00	0.00	0.97	0.95
11	0.99	0.98	0.0	0.00	0.00	0.97	0.96
10	0.97	0.96	0.0	0.00	0.00	0.97	0.95
9	0.99	0.97	0.0	0.00	0.00	0.00	0.97
8	0.98	0.96	0.0	0.00	0.00	0.00	0.96
7	0.97	0.97	0.0	0.00	0.00	0.00	0.96
6	0.94	0.93	0.0	0.00	0.00	0.00	0.90
5	0.98	0.97	0.0	0.00	0.00	0.00	0.00
4	0.98	0.99	0.0	0.00	0.00	0.00	0.00
3	0.99	0.99	0.0	0.00	0.00	0.00	0.00
2	0.99	0.99	0.0	0.00	0.00	0.00	0.00
1	0.99	0.00	0.0	0.00	0.00	0.00	0.00
0	0.00	0.00	0.0	0.00	0.00	0.00	0.00

In [19]:

age_classes = [0,1,5,10,15,20,25,30,35,40,100]
age_slices = [slice(age_classes[i], age_classes[i+1]) for i in range(len(age_classes)-1)]

In [20]:

n_age_classes = len(age_classes) - 1

In [21]:

efficacy = 0.85

Dependent MCV immunization

In [22]:

immune_MCV1 = coverage[::-1].MCV1 * efficacy
immune_MCV2 = (1 - immune_MCV1) * coverage[::-1].MCV2 * efficacy

In [23]:

MCV_immunity = immune_MCV2 + immune_MCV1

In [24]:

downsample_mean = lambda x: np.array([x[s].mean() for s in age_slices])
downsample_sum = lambda x: np.array([x[s].sum() for s in age_slices])

In [25]:

group_coverage = downsample_mean(coverage[::-1])

In [26]:

downsample_mean(immune_MCV1)

Out[26]:

array([0.      , 0.839375, 0.8262  , 0.8279  , 0.7786  , 0.7055  ,
       0.544   , 0.0969  , 0.0289  , 0.      ])

In [27]:

susceptibility = (1 - coverage*efficacy).prod(axis=1).round(3).sort_index()
susceptibility.plot()
plt.ylabel('Susceptibility')

Out[27]:

Text(0, 0.5, 'Susceptibility')

In [28]:

p_susc = susceptibility.groupby(pd.cut(susceptibility.index, age_classes)).mean()

Down-sample susceptibility to age groups.

In [30]:

residual_susceptibility = downsample_mean(susceptibility)

In [32]:

age_labels = ['0', '1-4', '5-9', '10-14', '15-19', '20-24', '25-29', 
                    '30-34', '35-39', '40+']
plt.bar(range(len(age_slices)), downsample_mean(susceptibility), 
        tick_label=age_labels);

These susceptibility rates can't be true given the distribution of cases that were observed!

SIA¶

Create a time x age matrix of SIA vaccinations to account for intervention

In [33]:

sia_2015 = pd.read_csv(DATA + 'sia_2015.csv', index_col=0)
sia_2016 = pd.read_csv(DATA + 'sia_2016.csv', index_col=0)
sia_2015.head()

Out[33]:

	0	1-4	5-9
Arkhangai	860	7364	1837
Bayan-Ulgii	1361	9069	1872
Bayankhongor	928	6992	2019
Bulgan	574	4494	1430
Gobi-Altai	494	4490	1276

In [34]:

sia_2015_total = (sia_2015.sum()
                          .reindex(pd.Index(age_labels))
                          .fillna(0)
                          .astype(int))
sia_2015_total

Out[34]:

0         34302
1-4      247851
5-9       65532
10-14         0
15-19         0
20-24         0
25-29         0
30-34         0
35-39         0
40+           0
dtype: int64

In [35]:

sia_2016.sum()

Out[35]:

15-19     78105
20-24    186022
25-29    230733
30-34     54439
dtype: int64

In [36]:

sia_2016_total = (sia_2016.sum()
                          .reindex(pd.Index(age_labels))
                          .fillna(0)
                          .astype(int))
sia_2016_total

Out[36]:

0             0
1-4           0
5-9           0
10-14         0
15-19     78105
20-24    186022
25-29    230733
30-34     54439
35-39         0
40+           0
dtype: int64

In [37]:

sia = pd.DataFrame(index=confirmed_counts.index, columns=pd.Index(age_labels)).fillna(0)

In [38]:

sia.loc['2016-05-22'] = sia_2016_total
sia.loc[['2015-05-24', '2015-06-07', '2015-06-21']] = [sia_2015_total/4,
                                                       sia_2015_total/2,
                                                       sia_2015_total/4]

In [39]:

sia

Out[39]:

	0	1-4	5-9	10-14	15-19	20-24	25-29	30-34	35-39	40+
date_onset
2015-01-18	0.0	0.00	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2015-02-01	0.0	0.00	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2015-02-15	0.0	0.00	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2015-03-01	0.0	0.00	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2015-03-15	0.0	0.00	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2015-03-29	0.0	0.00	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2015-04-12	0.0	0.00	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2015-04-26	0.0	0.00	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2015-05-10	0.0	0.00	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2015-05-24	8575.5	61962.75	16383.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2015-06-07	17151.0	123925.50	32766.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2015-06-21	8575.5	61962.75	16383.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2015-07-05	0.0	0.00	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2015-07-19	0.0	0.00	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2015-08-02	0.0	0.00	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2015-08-16	0.0	0.00	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2015-08-30	0.0	0.00	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2015-09-13	0.0	0.00	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2015-09-27	0.0	0.00	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2015-10-11	0.0	0.00	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2015-10-25	0.0	0.00	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2015-11-08	0.0	0.00	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2015-11-22	0.0	0.00	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2015-12-06	0.0	0.00	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2015-12-20	0.0	0.00	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2016-01-03	0.0	0.00	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2016-01-17	0.0	0.00	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2016-01-31	0.0	0.00	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2016-02-14	0.0	0.00	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2016-02-28	0.0	0.00	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2016-03-13	0.0	0.00	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2016-03-27	0.0	0.00	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2016-04-10	0.0	0.00	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2016-04-24	0.0	0.00	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2016-05-08	0.0	0.00	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2016-05-22	0.0	0.00	0.0	0.0	78105.0	186022.0	230733.0	54439.0	0.0	0.0
2016-06-05	0.0	0.00	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2016-06-19	0.0	0.00	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
2016-07-03	0.0	0.00	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0

In [40]:

sia_vacc = sia.cumsum(0).values

Stochastic Disease Transmission Model¶

We will extend a simple SIR disease model, to account for confirmation status, which will be fit using MCMC.

This model fits the series of 2-week infection totals for each age group $a$ as a set of Poisson random variables:

$$Pr(I_{a}(t) | \lambda_a(t)) = \text{Poisson}(\lambda_a(t)) $$

Where the age-specific outbreak intensity at time $t$ is modeled as:

$$\lambda_a(t) = S_a(t-1) \frac{I(t-1)\mathbf{B}}{N_a} $$

where $S_a(t-1)$ is the number of susceptibles in age group $a$ in the previous time period, $I(t-1)$ an age-specific vector of the number of infected individuals in the previous time period, $\mathbf{B}$ a matrix of transmission coefficients (from Prem et al., specified above), and $N_a$ an estimate of the population of age-$a$ people in Mongolia.

The basic reproductive number $R_0$ was calculated as the largest real-valued eigenvalue of the matrix $\mathbf{B}$. To impose a mild constraint on $R_0$, we applied a Gaussian prior distribution whose 1st and 99th quantiles are 8 and 24, respectively, a reasonable range for a measles outbreak:

Input data¶

Cases

In [41]:

I = confirmed_counts.values.astype(int)
I.shape

Out[41]:

(39, 10)

In [42]:

I_age = I.sum(0)

In [43]:

lab_samples = line_list.dropna(subset=['date_lab']).copy()
lab_samples.head()

Out[43]:

	id	date_onset	provincecity	dob	age_years	died	confirmed	labconfirmed	date_lab	male	month_onset	year_onset
0	389.0	2015-03-30	UB	2014-09-01	0.577686	0.0	1.0	1.0	2015-04-06	True	3.0	2015.0
1	707.0	2015-04-03	UB	2013-02-02	2.162902	1.0	1.0	1.0	2015-04-04	True	4.0	2015.0
3	2061.0	2015-04-22	UB	2014-09-23	0.577686	0.0	1.0	1.0	2015-04-24	True	4.0	2015.0
8	20835.0	2015-11-25	UB	2015-09-01	0.238193	1.0	1.0	1.0	2015-12-01	True	11.0	2015.0
9	21053.0	2015-12-02	UB	2014-11-19	1.062286	0.0	1.0	1.0	2015-12-14	False	12.0	2015.0

In [44]:

clinic_confirmed = lab_samples.query('confirmed==1').dropna(subset=['age_years']).copy()
clinic_confirmed.shape

Out[44]:

(5846, 13)

In [45]:

clinic_confirmed['age_group'] = pd.cut(clinic_confirmed.age_years, age_classes, labels=np.arange(len(age_slices)))

In [46]:

clinic_confirmed.dropna(subset=['age_years']).groupby('age_group')['labconfirmed'].mean()

Out[46]:

age_group
0    0.768957
1    0.657143
2    0.791489
3    0.503497
4    0.528487
5    0.539967
6    0.533007
7    0.752212
8    0.727273
9    0.372093
Name: labconfirmed, dtype: float64

In [47]:

age_midpoints = np.array([0.5, 3, 7.5, 12.5, 17.5, 22.5, 27.5, 32.5, 37.5, 70])

In [48]:

confirmed, age, age_group = clinic_confirmed[['labconfirmed', 'age_years', 'age_group']].values.T

In [49]:

age_group = age_group.astype(int)
age = age.astype(float)

We use a Gaussian random walk to model the variation in confirmation rate across age groups. These functions support the random walk model.

In [50]:

n_knots = 20
knots = np.linspace(age.min(), age.max(), n_knots)

In [51]:

def interpolate(x0, y0, x):
    
    idx = np.searchsorted(x0, x)
    dl = np.array(x - x0[idx - 1])
    dr = np.array(x0[idx] - x)
    d = dl + dr
    wl = dr / d

    return wl * y0[idx - 1] + (1 - wl) * y0[idx]

Generates mcv2 effective coverage under assumption that second dose is given only to those children who received the first dose

routine_coverage_bias <- 0.95
mcv1 <- read.csv("input-mcv1.csv",row.names = 1)  # read in first dose coverage
m1 <- unlist(mcv1[which(rownames(mcv1)=="MNG"),1:35]) # select out MNG
mcv2 <- read.csv("input-mcv2.csv",row.names = 1)  # read in second dose coverage
m2 <- unlist(mcv2[which(rownames(mcv2)=="MNG"),1:35]) # select out MNG
m1 <- c(rep(0,15),m1) * routine_coverage_bias
m2 <- c(rep(0,15),m2) * routine_coverage_bias

L <- 60                                           # life expectancy
R <- 25                                           # R0 -- in practice this is fit, this is just a test value for illustration

# Expected force of natural infection, discounted by vaccination, per SP paper
foi <- rev(1-m1*.85) * rev(1-mcv2_dep(m1,m2,0.85)) *(R/L)  # here scaling effective MCV2 by mcv2_dep function above

#########################################
#SIAs
SIA.coverage <- 0.5                              # immunization due to sias
sia1 <- rep(0,35)                                # 2012 SIA just illustrating ages 1-35 here, since historical coverage above only goes that far back.  Reasonable to assume no vaccination before 1980 -- if there was any it was trivial
sia1[7:18] <- SIA.coverage                                 # current age classes that were targeted by SIA
sia2 <- rep(0,35)                                # same for 2007 SIA 
sia2[11:19] <- SIA.coverage
sia3 <- rep(0,35)                               # same for 2000 SIA 
sia3[15:29] <- SIA.coverage
# note that in the calculation below, I assume that these are all independent, which is the most optimistic interpretation of impact

sia1 <- c(sia1,rep(0,15))
sia2 <- c(sia2,rep(0,15))
sia3 <- c(sia3,rep(0,15))
#########################################
# The 4hree terms here are:
# susceptibles after the first dose
# suceptibles after the second dose
# susceptibles after natural infection
# susceptibles after sias

plot((1-rev(m1)*.85) * rev(1-mcv2_dep(m1,m2,0.85)) * exp(-cumsum(foi)) *(1-sia1)*(1-sia2)*(1-sia3),ylim=c(0,.25),type="l",
     xlab="current age (years)",
     ylab="fraction susceptible")
lines((1-rev(m1)*.85) * 1 * exp(-cumsum(foi)) *(1-sia1)*(1-sia2)*(1-sia3),ylim=c(0,.05),col=2)
legend(0,.25,legend=c("with mcv2","set mcv2 to 0"),lty=1,col=1:2,bty="n")

# for the first case (black line)
initial_susceptible_profile <- (1-rev(m1)*.85) * rev(1-mcv2_dep(m1,m2,0.85)) * exp(-cumsum(foi)) *(1-sia1)*(1-sia2)*(1-sia3)

##############################################################
# Initial susceptibles in numbers -- assume that that population in each age class is equal to the birth cohort in that year of birth
# then the estiamte of the absolute number of susceptible in each age class is initial_susceptible_profile (above) times the rev()
# of the birth cohorts.  This ignores any mortality, but that isn't terrible, since it presumes that you're likely to move from S->R 
# prior to death
births <- read.csv("input-births.csv",row.names = 1)
births <- unlist(births[which(rownames(births)=="MNG"),1:35]) # select out MNG
# This only gives the birth cohorts through 35 years.  It would be reasonable to assume that the prior birth cohorts
# were the same size; e.g. c() births[1] as many times as needed to births.
births <- c(rep(births[1],15),births)

# The first dose of measles vaccine is delivered between 9-12m of age. So most of the first age cohort is not yet eligible.
# So we model the 0-1 class separately and assume that 25% are susceptible (older than 6m, when they lose maternal immunity on average, 
# and younger than 9m). The 25% between 9m-12m are vaccinated at mcv1 level
# 
barplot(rev(births)*c(.25 * ( 1 + (1-rev(m1)*.85)[1]), initial_susceptible_profile[-1]), 
        xlab = "birth year",
        ylab="number susceptible")

expected_susceptibles <- rev(births)*c(.25 * ( 1 + (1-rev(m1)*.85)[1]), initial_susceptible_profile[-1])

In [52]:

def mcv2_dep(mcv1, mcv2, ve):
    return (1 - ve) * mcv2 / (1 - mcv1 * ve)

In [53]:

routine_coverage_bias = 0.95

In [72]:

MCV1 = pd.read_csv(DATA + '../raw/input-mcv1.csv', index_col=0).loc['MNG', :'2014'].values
MCV2 = pd.read_csv(DATA + '../raw/input-mcv2.csv', index_col=0).loc['MNG', :'2014'].values

MCV1 = np.concatenate([np.zeros(15), MCV1]) * routine_coverage_bias
#MCV2 = np.concatenate([np.zeros(15), MCV2]) * routine_coverage_bias
MCV2 = np.zeros_like(MCV1)

In [107]:

foi = (1 - MCV1 * .85)[::-1] * (1 - mcv2_dep(MCV1, MCV2, 0.85))[::-1] * (16 / 60)

In [108]:

sia_coverage = 0.3
sia1 = np.zeros(50)
sia1[5:14] = sia_coverage
sia2 = np.zeros(50)
sia2[10:19] = sia_coverage
sia3 = np.zeros(50)
sia3[15:23] = sia_coverage
sia4 = np.zeros(50)
sia4[19:29] = sia_coverage

In [109]:

initial_susceptible_profile = ((1 - MCV1 * .85)[::-1] 
                         * (1 - mcv2_dep(MCV1, MCV2, 0.85))[::-1] 
                         * np.exp(-np.cumsum(foi)) 
                         * (1 - sia1) * (1 - sia2) * (1 - sia3) * (1 - sia4))

In [110]:

plt.plot(initial_susceptible_profile)

Out[110]:

[<matplotlib.lines.Line2D at 0x7f954415fdd8>]

In [111]:

birth_counts = pd.read_csv(DATA + '../raw/input-births.csv', index_col=0).loc['MNG', :'2014'].values
birth_counts = np.concatenate([np.ones(15)*birth_counts[0], birth_counts])

In [112]:

initial_susceptible_profile[0] += 1
initial_susceptible_profile[0] *= 0.25

In [113]:

expected_susceptibles = birth_counts[::-1] * initial_susceptible_profile

In [114]:

S_exp = downsample_sum(expected_susceptibles)

Difference between expected susceptibles and total cases by age group

In [115]:

S_exp - I.cumsum(axis=0)[-1]

Out[115]:

array([14931.76459601, 46680.81115632, 26527.17050497, 11339.19591366,
        4382.42317616,  6763.38867463, 16847.03789221, 11458.07611094,
        2861.71300339,   614.54847146])

In [116]:

plt.bar(np.arange(len(expected_susceptibles)), expected_susceptibles)

Out[116]:

<BarContainer object of 50 artists>

In [117]:

def tdownsample(x): 
    return tt.stack([x[s].sum() for s in age_slices])

In [120]:

with pm.Model() as uncertain_vacc_model:
    
    # Confirmation sub-model
    σ = pm.HalfNormal('σ', 5)
    y = pm.GaussianRandomWalk('y', sd=σ, shape=n_knots)
    α = interpolate(knots, y, age)
    π = pm.invlogit(α)
        
    confirmation = pm.Bernoulli('confirmation', π, observed=confirmed)
    
    # Calculate age group probabilities of confirmation
    α_group = interpolate(knots, y, age_midpoints)
    p_confirm = pm.Deterministic('p_confirm', pm.invlogit(α_group))
    
    # Basic reproduction number centered on the dominant eigenvector of the contact matrix
    R_0 = pm.Normal('R_0', max(evs[np.isreal(evs)]), sd=3.5)
    
    # Force of infection based on dependent MCV efficacy
    FOI = (1 - MCV1)[::-1] * (1 - mcv2_dep(MCV1, MCV2, efficacy))[::-1] * R_0 / 60
    # Susceptibility prior to outbreaks
    susceptible = ((1 - MCV1 * .85)[::-1] 
                         * (1 - mcv2_dep(MCV1, MCV2, efficacy))[::-1] 
                         * tt.exp(-tt.cumsum(FOI)) 
                         * (1 - sia1) * (1 - sia2) * (1 - sia3) * (1 - sia3))
    susceptible = tt.concatenate([tt.stack(0.25*(1 + susceptible[0])), susceptible[1:]])
    # Initial susceptibles
    S_0 = pm.Deterministic('S_0', tdownsample(birth_counts[::-1] * susceptible))
    
    # Susceptibles over time, removing individuals vaccinated by SIA
#     S = S_0 - sia_vacc*efficacy*0.5 - shared(I.cumsum(axis=0))
    S = S_0 - shared(I.cumsum(axis=0))

    # Force of infection
    λ = S * (I.dot(B_prem_exp) / N.sum()) + 0.001
    
    # Effective reproductive number
    R_t = pm.Deterministic('R_t', S.sum(1) * R_0 / N.sum())
    
    # Adjust for confirmation bias
    λ_apparent = λ / p_confirm
    
    # Infections likelihood
    infections = pm.Poisson('infections', λ_apparent[:-1], observed=I[1:])

In [121]:

uncertain_vacc_model.check_test_point()

Out[121]:

σ_log__            -0.77
y                 -43.75
R_0                -2.17
confirmation    -4052.14
infections     -40827.80
Name: Log-probability of test_point, dtype: float64

In [122]:

with uncertain_vacc_model:
    
    trace = pm.sample(1000, tune=2000, cores=2)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [R_0, y, σ]
Sampling 2 chains: 100%|██████████| 6000/6000 [02:40<00:00, 37.33draws/s]

In [123]:

pm.energyplot(trace);

In [124]:

if 'p_vacc_interval__' in [v.name for v in uncertain_vacc_model.free_RVs]:
    pm.forestplot(trace, varnames=['p_vacc'], ylabels=age_labels, rhat=False)

In [125]:

pm.traceplot(trace, varnames=['R_0'])

Out[125]:

array([[<matplotlib.axes._subplots.AxesSubplot object at 0x7f9512c4ab38>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f9512ac4dd8>]],
      dtype=object)

However, this model estimates $R_e$ values that are too high.

In [126]:

pm.forestplot(trace, varnames=['R_t'])

Out[126]:

GridSpec(1, 2, width_ratios=[3, 1])