In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pylab as plt
import itertools
sns.set_context('notebook')

seed_numbers = 20090425, 19700903

from datetime import timedelta, datetime

Data import and cleanup

In [2]:
# rotavirus_data = pd.read_csv('data/AGE Year 2-4 with Final Rota.csv', low_memory=False)
cases_file = 'data/AGE Cases and Healthy Controls Merged with Lab Results Year 2 to 4 v05.23.2017.csv'
rotavirus_data = pd.read_csv(cases_file, low_memory=False, parse_dates=['scrdate', 'dob'])
rotavirus_data.index.is_unique
Out[2]:
True
In [3]:
rotavirus_data.head()
Out[3]:
caseid case settingnew rota_od vaxstrain rota_genotype noro_type noroseq_gii_c noroseq_gii_d noroseq_gi_c ... ddxdioten1 ddxdioten2 ddxdioten3 ddxdioten4 ddxdioten5 ddxdioten6 ddxdioten7 ddxdioten8 ddxdioten9 ddxdioten10
0 EN1C0001 1 3 0.088 NaN NEG NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1 EN1C0002 1 3 NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2 EN1C0003 1 3 NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3 EN1C0004 1 3 NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4 EN1C0006 1 3 NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

5 rows × 698 columns

In [4]:
rotavirus_data.query('year==3').sort_values('scrdate').scrdate
Out[4]:
2046   2013-12-01
2048   2013-12-01
2049   2013-12-01
2050   2013-12-01
2051   2013-12-01
2052   2013-12-01
2054   2013-12-01
2047   2013-12-01
2053   2013-12-02
460    2013-12-02
4264   2013-12-02
4263   2013-12-02
4262   2013-12-02
4261   2013-12-02
4260   2013-12-02
4257   2013-12-02
4256   2013-12-02
4255   2013-12-02
2055   2013-12-02
4254   2013-12-02
462    2013-12-03
2058   2013-12-04
461    2013-12-04
2056   2013-12-04
2057   2013-12-04
2059   2013-12-04
463    2013-12-05
464    2013-12-05
465    2013-12-05
2060   2013-12-05
          ...    
882    2014-11-24
881    2014-11-24
880    2014-11-24
878    2014-11-24
4713   2014-11-24
885    2014-11-24
2829   2014-11-25
2827   2014-11-25
2828   2014-11-25
2830   2014-11-25
5220   2014-11-25
888    2014-11-25
889    2014-11-25
890    2014-11-25
892    2014-11-25
4715   2014-11-25
891    2014-11-26
5217   2014-11-26
2837   2014-11-30
2842   2014-11-30
2841   2014-11-30
2840   2014-11-30
2831   2014-11-30
2832   2014-11-30
2833   2014-11-30
2834   2014-11-30
2835   2014-11-30
2836   2014-11-30
2838   2014-11-30
2839   2014-11-30
Name: scrdate, Length: 1740, dtype: datetime64[ns]

Import eligibles

In [5]:
eligible_file = 'data/AGE Outpatient Eligibility Data Year 2-4.csv'
eligible_data = pd.read_csv(eligible_file, low_memory=False, index_col=0, parse_dates=['scrdate'])
eligible_data.index.is_unique
Out[5]:
True
In [6]:
eligible_data.head()
Out[6]:
scrdate admitdate dob agedays agemonths ageyears insurance sex race race2_8 adxd1 adxd2 transfer noninfect immcomp prevenroll elig
caseid
EN1C0001 2012-12-03 12/3/12 6/11/06 2366.08 77.74 6.478 1 1 6 1.0 6.0 7.0 0 0 0 0 1
EN1C0002 2012-12-03 12/3/12 8/20/05 2661.45 87.44 7.287 1 1 2 NaN 6.0 7.0 0 0 0 0 1
EN1C0003 2012-12-03 12/3/12 8/14/07 1936.96 63.64 5.303 1 2 2 NaN 1.0 4.0 0 0 0 0 1
EN1C0004 2012-12-04 12/4/12 11/6/09 1124.16 36.93 3.078 1 2 6 1.0 7.0 NaN 0 0 0 0 1
EN1C0006 2012-12-04 12/4/12 7/8/07 1974.40 64.87 5.406 1 2 2 NaN 7.0 1.0 0 0 0 0 1
In [7]:
eligible_data.scrdate.min(), eligible_data.scrdate.max()
Out[7]:
(Timestamp('2012-12-03 00:00:00'), Timestamp('2015-11-30 00:00:00'))
In [8]:
eligible_data['year'] = 1
eligible_data.loc[eligible_data.scrdate < datetime(2013,12,1), 'year'] = 0
eligible_data.loc[eligible_data.scrdate >= datetime(2014,12,1), 'year'] = 2
In [9]:
eligible_data['under_5'] = ((eligible_data.agemonths) < 60).astype(int)
eligible_data['under_2'] = ((eligible_data.agemonths) < 24).astype(int)
eligible_data['under_1'] = ((eligible_data.agemonths) < 12).astype(int)
In [10]:
eligible_data['age_group'] = 3
eligible_data.loc[eligible_data.under_5==1, 'age_group'] = 2
eligible_data.loc[eligible_data.under_2==1, 'age_group'] = 1
eligible_data.loc[eligible_data.under_1==1, 'age_group'] = 0
In [11]:
eligible_counts = eligible_data.groupby(['age_group', 'year']).size()
eligible_counts.name = 'eligible'
eligible_counts
Out[11]:
age_group  year
0          0       149
           1       150
           2       153
1          0       149
           1       129
           2       139
2          0       204
           1       187
           2       192
3          0       263
           1       211
           2       182
Name: eligible, dtype: int64
In [12]:
eligible_data.shape
Out[12]:
(2108, 22)

Rename columns and recode data

In [13]:
rotavirus_data.race.value_counts()
Out[13]:
1    2766
2    1968
6     411
4     105
7      10
5       3
3       1
8       1
Name: race, dtype: int64
In [14]:
rotavirus_data['white'] = rotavirus_data['race'] == 1
rotavirus_data['black'] = rotavirus_data['race'] == 2
In [15]:
rotavirus_data['astro'].value_counts().index
Out[15]:
Float64Index([0.0, 1.0], dtype='float64')
In [16]:
rotavirus_data=rotavirus_data.rename(columns={'rotacdc':'rotavirus',
                       'sapo':'sapovirus',
                       'astro':'astrovirus',
                       'noro':'norovirus'})
In [17]:
rotavirus_data.rotavirus.value_counts()
Out[17]:
0.0    3722
1.0     290
Name: rotavirus, dtype: int64
In [18]:
rotavirus_data.rotavu.value_counts()
Out[18]:
0.0    3574
1.0     441
Name: rotavu, dtype: int64
In [19]:
virus_cols = ['rotavirus','sapovirus','astrovirus','norovirus']

Proportion of test results missing

In [20]:
rotavirus_data[virus_cols].isnull().mean()
Out[20]:
rotavirus     0.237987
sapovirus     0.237417
astrovirus    0.237417
norovirus     0.237417
dtype: float64
In [21]:
astro_data = rotavirus_data[rotavirus_data.astrovirus==1].dropna(subset=['astro_ct'])
astro_data.shape
Out[21]:
(159, 700)
In [22]:
noro_data = rotavirus_data[rotavirus_data.norovirus==1].dropna(subset=['noro_ct'])
noro_data.shape
Out[22]:
(729, 700)
In [23]:
noro_data['coinfect'] = noro_data[['rotavirus', 'sapovirus', 'astrovirus']].sum(1)
In [24]:
astro_data['coinfect'] = astro_data[['rotavirus', 'sapovirus', 'norovirus']].sum(1)
In [25]:
with_coinf = noro_data.noro_ct[noro_data.coinfect==1].astype(float)
no_coinf = noro_data.noro_ct[(noro_data.coinfect==0)].astype(float)

# with_coinf = astro_data.astro_ct[astro_data.coinfect==1].astype(float)
# no_coinf = astro_data.astro_ct[(astro_data.coinfect==0)].astype(float)
In [26]:
from scipy.stats import ranksums

ranksums(with_coinf, no_coinf)
Out[26]:
RanksumsResult(statistic=3.6398985691965766, pvalue=0.00027274545863325173)
In [27]:
rotavirus_data.sapo_ct.dropna().astype(float).hist()
Out[27]:
<matplotlib.axes._subplots.AxesSubplot at 0x11d60c668>

Calculate age, and compare to given age.

In [28]:
rotavirus_data['age_months'] = (rotavirus_data['scrdate']
                              - rotavirus_data['dob'] + timedelta(1)).astype('timedelta64[M]') 
In [29]:
rotavirus_data.age_months.hist();

Group into ages under 5 and 5 or over

In [30]:
rotavirus_data['under_5'] = ((rotavirus_data.age_months) < 60).astype(int)
In [31]:
rotavirus_data['under_2'] = ((rotavirus_data.age_months) < 24).astype(int)
In [32]:
rotavirus_data['under_1'] = ((rotavirus_data.age_months) < 12).astype(int)
In [33]:
rotavirus_data['age_group'] = 3
rotavirus_data.loc[rotavirus_data.under_5==1, 'age_group'] = 2
rotavirus_data.loc[rotavirus_data.under_2==1, 'age_group'] = 1
rotavirus_data.loc[rotavirus_data.under_1==1, 'age_group'] = 0
In [34]:
rotavirus_data.age_group.value_counts()
Out[34]:
3    1459
0    1365
2    1344
1    1097
Name: age_group, dtype: int64
In [35]:
rotavirus_data.year.value_counts()
Out[35]:
2    1989
3    1740
4    1536
Name: year, dtype: int64

Calculate natural year

In [36]:
rotavirus_data['year'] = rotavirus_data.year - 2

Rename setting variable

In [37]:
assert not rotavirus_data.settingnew.isnull().sum()
In [38]:
rotavirus_data['setting'] = rotavirus_data.settingnew - 1
In [39]:
# Indices to each value
INPATIENT, ED, OUTPATIENT, HC = 0, 1, 2, 3

Recode sex and stool

In [40]:
assert not rotavirus_data['specimencol'].isnull().sum()
In [41]:
rotavirus_data['male'] = rotavirus_data['sex']==1
rotavirus_data['stool_collected'] = rotavirus_data['specimencol']==1

Recode insurance

In [42]:
rotavirus_data.insurance.value_counts()
Out[42]:
1    4599
2     500
3      84
8      44
4      38
Name: insurance, dtype: int64
In [43]:
rotavirus_data['public_ins'] = rotavirus_data.insurance.isin({1, 3})
rotavirus_data['private_ins'] = rotavirus_data.insurance.isin({2, 3})
rotavirus_data.loc[rotavirus_data.insurance==8, ['public_ins', 'private_ins']] = np.nan
In [44]:
# Drop extraneous columns
rotavirus_data = rotavirus_data.drop(['intvwdate', 'dob', 'provider', 'coldat', 'tdat', 
       'insurance', 'sex', 'hispcr', 'mdegree_7', 'rvacver_7',
       'rvacver_7r', 'race', 'race2_8'], axis=1)

Summary of missing values

In [45]:
rotavirus_data.isnull().sum()
Out[45]:
caseid                0
case                  0
settingnew            0
rota_od            5085
vaxstrain          5264
rota_genotype      4826
noro_type          4535
noroseq_gii_c      4772
noroseq_gii_d      4772
noroseq_gi_c       5022
noroseq_gi_d       5037
year                  0
rotavirus          1253
rotavu             1250
sapovirus          1250
sapo_ct            4917
astrovirus         1250
astro_ct           5106
norovirus          1250
noro_ct            4536
studysite             0
hospital              0
abiniscr              0
deiniscr              0
scrdate               0
admitdate             0
admittime            13
agedays               0
agemonths             0
ageyears              0
                   ... 
ddxdcten3          5235
ddxdcten4          5254
ddxdcten5          5259
ddxdcten6          5263
ddxdcten7          5264
ddxdcten8          5264
ddxdcten9          5265
ddxdcten10         5265
ddxdioten1         5258
ddxdioten2         5259
ddxdioten3         5261
ddxdioten4         5261
ddxdioten5         5263
ddxdioten6         5263
ddxdioten7         5263
ddxdioten8         5264
ddxdioten9         5264
ddxdioten10        5264
white                 0
black                 0
age_months            0
under_5               0
under_2               0
under_1               0
age_group             0
setting               0
male                  0
stool_collected       0
public_ins           44
private_ins          44
Length: 697, dtype: int64

Aggregate data by age group and setting

In [46]:
rotavirus_summary = (rotavirus_data.drop('scrdate', axis=1)
             .groupby(['age_group', 'year', 'setting', 'black'])['caseid'].count()
             .reset_index()
             .rename(columns={'caseid':'cases'}))

rotavirus_summary.head()
Out[46]:
age_group year setting black cases
0 0 0 0 False 25
1 0 0 0 True 10
2 0 0 1 False 94
3 0 0 1 True 72
4 0 0 2 False 56

Enrollment days/week for inpatient, outpatient, ED

In [47]:
monitoring_days = 7, 4, 4

# Only 5.5 days of intake on outpatient
monitoring_rate = np.array(monitoring_days)/np.array([7, 7, 5.5])
monitoring_rate
Out[47]:
array([ 1.        ,  0.57142857,  0.72727273])

Outpatient AGE Model

In [155]:
from pymc3 import Model, Deterministic, sample, NUTS, find_MAP, Potential, Metropolis
from pymc3 import Binomial, Beta, Poisson, Normal, DiscreteUniform, Uniform, Flat, Gamma
from pymc3 import traceplot, forestplot, summary, df_summary, energyplot

import theano.tensor as tt
In [49]:
iterations = 1000
tune = 1000
In [50]:
label_map = {'age_group':{0:'<1', 1:'1', 2:'2-4', 3:'5+'},
             'year':{0:'2012/13', 1:'2013/14', 2:'2014/15'},
             'black':{0:'white', 1:'black'},
             'setting':{0:'inpatient', 1:'ED', 2:'outpatient'}}

Rate factor for population

In [51]:
rate_factor = 1000
In [52]:
from pymc3 import hpd
def _hpd_df(x, alpha):
    cnames = ['hpd_{0:g}'.format(100 * alpha/2),
              'hpd_{0:g}'.format(100 * (1 - alpha/2))]
    return pd.DataFrame(hpd(x, alpha), columns=cnames)
In [53]:
def generate_table(trace, labels, index, columns, varnames=['λ']):
    rate_df_index = pd.MultiIndex.from_tuples(labels.values.tolist(), 
                                          names=labels.columns)
    rate_df = (df_summary(trace, varnames=varnames) * rate_factor)[['mean', 'hpd_2.5', 'hpd_97.5']]
    rate_df.index = rate_df_index
    pivot_all = pd.pivot_table(rate_df
                     .reset_index(), 
                               index=index, columns=columns)
    value_iterator = zip(pivot_all['mean'].values.flatten(), 
                     pivot_all['hpd_2.5'].values.flatten(), 
                     pivot_all['hpd_97.5'].values.flatten())
    rate_strings = np.reshape(['{0:.2f} ({1:.2f}, {2:.2f})'.format(*vals) for vals in value_iterator], 
                                  pivot_all['mean'].shape)
    
    return pd.DataFrame(rate_strings, index=pivot_all['mean'].index, columns=pivot_all['mean'].columns)
In [54]:
import redcap
import getpass

local_auth = True

api_url = 'https://redcap.vanderbilt.edu/api/'

if local_auth:
    api_key = open("/Users/fonnescj/Dropbox/Collaborations/Halasa/Tokens/outpatient_token.txt").read()

else:
    api_key = getpass.getpass()
In [55]:
outpatient_project = redcap.Project(api_url, api_key)
outpatients = outpatient_project.export_records(format='df')
In [56]:
outpatients.groupby('year').year.count()
Out[56]:
year
2012    15969
2015    17562
Name: year, dtype: int64
In [57]:
outpatients.shape
Out[57]:
(33531, 15)
In [58]:
outpatients[['ageinjan', 'ageinnov']].hist(bins=20)
Out[58]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x126a23240>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x11dcea2e8>]], dtype=object)

Going to assume that the outlier ages above were mistakenly entered as months.

In [59]:
outliers = outpatients.ageinjan > 20
outpatients.loc[outliers, ['ageinjan', 'ageinnov']] = outpatients.loc[outliers, ['ageinjan', 'ageinnov']] / 12
In [60]:
outpatients[['ageinjan', 'ageinnov']].hist(bins=20)
Out[60]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x1207c8be0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x126294208>]], dtype=object)
In [61]:
outpatients['age'] = np.maximum(outpatients[['ageinjan', 'ageinnov']].mean(1), 0.1)
In [62]:
outpatients_kids = outpatients[outpatients.age<18].copy()
In [63]:
outpatients_kids['5_and_older'] = (outpatients_kids.age>=5).astype(int)
outpatients_kids['under_5'] = (outpatients_kids.age<5).astype(int)
outpatients_kids['under_2'] = (outpatients_kids.age<2).astype(int)
outpatients_kids['under_1'] = (outpatients_kids.age<1).astype(int)
In [64]:
outpatients_kids['age_group'] = 3
outpatients_kids.loc[outpatients_kids.under_5==1, 'age_group'] = 2
outpatients_kids.loc[outpatients_kids.under_2==1, 'age_group'] = 1
outpatients_kids.loc[outpatients_kids.under_1==1, 'age_group'] = 0
In [65]:
outpatients_kids.groupby('year').age.hist(alpha=0.3)
Out[65]:
year
2012    Axes(0.125,0.125;0.775x0.755)
2015    Axes(0.125,0.125;0.775x0.755)
Name: age, dtype: object
In [66]:
outpatient_n = (outpatients_kids.rename(columns={'ptname':'n'})
                        .groupby(['year', 'age_group'])
                        .n.count())
outpatient_n
Out[66]:
year  age_group
2012  0            1237
      1            1652
      2            4446
      3            8605
2015  0            1618
      1            1436
      2            4307
      3            9870
Name: n, dtype: int64
In [67]:
rotavirus_data.head()
Out[67]:
caseid case settingnew rota_od vaxstrain rota_genotype noro_type noroseq_gii_c noroseq_gii_d noroseq_gi_c ... age_months under_5 under_2 under_1 age_group setting male stool_collected public_ins private_ins
0 EN1C0001 1 3 0.088 NaN NEG NaN NaN NaN NaN ... 77.0 0 0 0 3 2 True True 1.0 0.0
1 EN1C0002 1 3 NaN NaN NaN NaN NaN NaN NaN ... 87.0 0 0 0 3 2 True False 1.0 0.0
2 EN1C0003 1 3 NaN NaN NaN NaN NaN NaN NaN ... 63.0 0 0 0 3 2 False False 1.0 0.0
3 EN1C0004 1 3 NaN NaN NaN NaN NaN NaN NaN ... 36.0 1 0 0 2 2 False True 1.0 0.0
4 EN1C0006 1 3 NaN NaN NaN NaN NaN NaN NaN ... 64.0 0 0 0 3 2 False False 1.0 0.0

5 rows × 697 columns

In [68]:
eligible_data.head()
Out[68]:
scrdate admitdate dob agedays agemonths ageyears insurance sex race race2_8 ... transfer noninfect immcomp prevenroll elig year under_5 under_2 under_1 age_group
caseid
EN1C0001 2012-12-03 12/3/12 6/11/06 2366.08 77.74 6.478 1 1 6 1.0 ... 0 0 0 0 1 0 0 0 0 3
EN1C0002 2012-12-03 12/3/12 8/20/05 2661.45 87.44 7.287 1 1 2 NaN ... 0 0 0 0 1 0 0 0 0 3
EN1C0003 2012-12-03 12/3/12 8/14/07 1936.96 63.64 5.303 1 2 2 NaN ... 0 0 0 0 1 0 0 0 0 3
EN1C0004 2012-12-04 12/4/12 11/6/09 1124.16 36.93 3.078 1 2 6 1.0 ... 0 0 0 0 1 0 1 0 0 2
EN1C0006 2012-12-04 12/4/12 7/8/07 1974.40 64.87 5.406 1 2 2 NaN ... 0 0 0 0 1 0 0 0 0 3

5 rows × 22 columns

In [69]:
eligible_counts.sum()
Out[69]:
2108

Average denominators

In [70]:
outpatient_n.head()
Out[70]:
year  age_group
2012  0            1237
      1            1652
      2            4446
      3            8605
2015  0            1618
Name: n, dtype: int64
In [71]:
outpatient_n_mean = outpatient_n.groupby('age_group').mean().astype(int)
In [72]:
outpatient_data_full = (rotavirus_summary[rotavirus_summary.setting==OUTPATIENT].groupby(['age_group','year'])
                        .cases.sum().reset_index())
In [73]:
outpatient_data_full
Out[73]:
age_group year cases
0 0 0 86
1 0 1 95
2 0 2 94
3 1 0 101
4 1 1 90
5 1 2 85
6 2 0 120
7 2 1 119
8 2 2 116
9 3 0 152
10 3 1 124
11 3 2 92
In [74]:
outpatient_merged = (outpatient_data_full.merge(outpatient_n_mean.reset_index(), on=['age_group'])
                                         .merge(pd.DataFrame(eligible_counts).reset_index(), on=['age_group', 'year']))
In [78]:
outpatient_merged
Out[78]:
age_group year cases n eligible
0 0 0 86 1427 149
1 0 1 95 1427 150
2 0 2 94 1427 153
3 1 0 101 1544 149
4 1 1 90 1544 129
5 1 2 85 1544 139
6 2 0 120 4376 204
7 2 1 119 4376 187
8 2 2 116 4376 192
9 3 0 152 9237 263
10 3 1 124 9237 211
11 3 2 92 9237 182
In [76]:
outpatient_under_5 = outpatient_merged[outpatient_merged.age_group<3]
In [79]:
def outpatient_model(dataset):
    
    age_group, cases, n, eligible = dataset[['age_group','cases','n', 'eligible']].values.T

    shape = dataset.shape[0]
    n_groups = len(set(age_group))
    
    # Cases weighted by monitoring effort
    weights = monitoring_rate[OUTPATIENT]
    
    with Model() as mod:

        # Uniform prior on weighted cases
        weighted_cases = Uniform('weighted_cases', eligible, n, shape=shape)
        
        # Adjust for enrollment
        enrollment_rate = Beta('p_enrolled', 1, 1, shape=shape)
        eligible_cases = Binomial('eligible_cases', eligible, enrollment_rate, observed=cases)

        # Likelihood of eligible cases
        weighting = Binomial('weighting', n=weighted_cases, p=weights, observed=eligible)

        θ = Normal('θ', 0, sd=1000, shape=n_groups)

        λ = Deterministic('λ', tt.exp(θ))

        AGE_obs = Potential('AGE_obs', Poisson.dist(λ[age_group]*n).logp(weighted_cases))

    return mod
In [80]:
outpatient_age = outpatient_model(outpatient_under_5)
In [81]:
with outpatient_age:
    outpatient_age_trace = sample(iterations, tune=tune, njobs=2, random_seed=seed_numbers)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
100%|██████████| 2000/2000 [00:05<00:00, 382.62it/s]
In [139]:
age_labels = ['<1', '1', '2-4']
forestplot(outpatient_age_trace, varnames=['λ'], transform=lambda x: x*rate_factor, ylabels=age_labels)
Out[139]:
<matplotlib.gridspec.GridSpec at 0x13a087908>

Pooled AGE model

In [83]:
outpatient_pooled = outpatient_merged.assign(age_group=(outpatient_merged.age_group==3).astype(int))
outpatient_pooled.head()
Out[83]:
age_group year cases n eligible
0 0 0 86 1427 149
1 0 1 95 1427 150
2 0 2 94 1427 153
3 0 0 101 1544 149
4 0 1 90 1544 129
In [84]:
outpatient_age_pooled = outpatient_model(outpatient_pooled)
In [85]:
with outpatient_age_pooled:
    outpatient_age_pooled_trace = sample(iterations, tune=tune, njobs=2, random_seed=seed_numbers)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
100%|██████████| 2000/2000 [00:07<00:00, 267.22it/s]
In [86]:
age_labels_pooled = ['<5', '5+']
forestplot(outpatient_age_pooled_trace, varnames=['λ'], transform=lambda x: x*rate_factor, ylabels=age_labels_pooled)
Out[86]:
<matplotlib.gridspec.GridSpec at 0x131f07fd0>

Outpatient AGE estimates

In [90]:
outpatient_age_estimates
Out[90]:
mean hpd_2.5 hpd_97.5
λ__0 145.1 132.2 158.6
λ__1 123.7 112.1 135.6
λ__2 61.0 55.9 65.7
In [92]:
outpatient_age_estimates = (df_summary(outpatient_age_trace, varnames=['λ'], transform=lambda x: x*rate_factor)
                            .drop(['sd', 'mc_error', 'n_eff', 'Rhat'], axis=1)
                            .round(1))
outpatient_age_estimates.index = ['<1', '1', '2-4']
outpatient_age_estimates.columns = 'rate esimate', 'lower 95%', 'upper 95%'
outpatient_age_estimates
Out[92]:
rate esimate lower 95% upper 95%
<1 145.1 132.2 158.6
1 123.7 112.1 135.6
2-4 61.0 55.9 65.7
In [94]:
pooled_estimate = (df_summary(outpatient_age_pooled_trace, varnames=['λ'], transform=lambda x: x*rate_factor)
                            .drop(['sd', 'mc_error', 'n_eff', 'Rhat'], axis=1)
                            .round(1))
pooled_estimate.columns = outpatient_age_estimates.columns
pooled_estimate.index = ['<5', '5+']
outpatient_age_estimates.append(pooled_estimate)
Out[94]:
rate esimate lower 95% upper 95%
<1 145.1 132.2 158.6
1 123.7 112.1 135.6
2-4 61.0 55.9 65.7
<5 90.6 86.0 95.2
5+ 32.6 30.1 35.0
In [95]:
outpatient_age_estimates.to_excel('results/outpatient_age.xlsx')
In [96]:
mean_data = (outpatient_under_5.groupby('age_group')[['cases', 'n']].mean()
                 .rename(columns={'cases':'mean cases', 'n':'mean n'})
                 .round(1)
                .set_index(outpatient_age_estimates.index))

mean_data.join(outpatient_age_estimates)
mean_data.to_excel('results/outpatient_mean_data.xlsx')

Virus Rate Models

In [97]:
collected_stools = (rotavirus_data[rotavirus_data.setting==OUTPATIENT].groupby(['age_group', 'year'])
                        .agg({'stool_collected':['sum', 'count']}))

levels = collected_stools.columns.levels
labels = collected_stools.columns.labels
collected_stools.columns = ['stool_collected', 'enrolled']

collected_stools = collected_stools.reset_index()
collected_stools
Out[97]:
age_group year stool_collected enrolled
0 0 0 79.0 86
1 0 1 79.0 95
2 0 2 85.0 94
3 1 0 93.0 101
4 1 1 78.0 90
5 1 2 73.0 85
6 2 0 92.0 120
7 2 1 82.0 119
8 2 2 76.0 116
9 3 0 115.0 152
10 3 1 84.0 124
11 3 2 66.0 92
In [98]:
outpatient_n_mean.reset_index()
Out[98]:
age_group n
0 0 1427
1 1 1544
2 2 4376
3 3 9237

All possible combinations of age, year and setting for index.

In [99]:
astro_cases = (rotavirus_data[(rotavirus_data.astrovirus==1) & (rotavirus_data.setting==OUTPATIENT)]
                         .groupby(['age_group', 'year'])[['stool_collected']].count())
astro_cases.columns = ['cases']
astro_cases = astro_cases.reset_index()
In [100]:
eligible_counts
Out[100]:
age_group  year
0          0       149
           1       150
           2       153
1          0       149
           1       129
           2       139
2          0       204
           1       187
           2       192
3          0       263
           1       211
           2       182
Name: eligible, dtype: int64
In [102]:
astro_data = (astro_cases.merge(collected_stools, 
                                         on=['age_group', 'year'])
                                  .merge(outpatient_n_mean.reset_index(), 
                                         on=['age_group'], 
                                        how='left')
                                  .merge(pd.DataFrame(eligible_counts).reset_index(), 
                                         on=['age_group', 'year']))
astro_data
Out[102]:
age_group year cases stool_collected enrolled n eligible
0 0 0 2 79.0 86 1427 149
1 0 1 4 79.0 95 1427 150
2 0 2 1 85.0 94 1427 153
3 1 0 8 93.0 101 1544 149
4 1 1 8 78.0 90 1544 129
5 1 2 2 73.0 85 1544 139
6 2 0 12 92.0 120 4376 204
7 2 1 8 82.0 119 4376 187
8 2 2 4 76.0 116 4376 192
9 3 0 6 115.0 152 9237 263
10 3 1 2 84.0 124 9237 211
11 3 2 1 66.0 92 9237 182
In [103]:
sapo_cases = (rotavirus_data[(rotavirus_data.sapovirus==1) & (rotavirus_data.setting==OUTPATIENT)]
                         .groupby(['age_group', 'year'])[['stool_collected']].count())
sapo_cases.columns = ['cases']
sapo_cases = sapo_cases.reset_index()
In [104]:
sapo_data = (sapo_cases.merge(collected_stools, 
                                         on=['age_group', 'year'])
                                  .merge(outpatient_n_mean.reset_index(), 
                                         on=['age_group'], 
                                        how='left')
                                  .merge(pd.DataFrame(eligible_counts).reset_index(), 
                                         on=['age_group', 'year']))
sapo_data.head(10)
Out[104]:
age_group year cases stool_collected enrolled n eligible
0 0 0 4 79.0 86 1427 149
1 0 1 12 79.0 95 1427 150
2 0 2 5 85.0 94 1427 153
3 1 0 17 93.0 101 1544 149
4 1 1 13 78.0 90 1544 129
5 1 2 8 73.0 85 1544 139
6 2 0 10 92.0 120 4376 204
7 2 1 12 82.0 119 4376 187
8 2 2 4 76.0 116 4376 192
9 3 0 10 115.0 152 9237 263
In [105]:
rota_cases = (rotavirus_data[(rotavirus_data.rotavirus==1) & (rotavirus_data.setting==OUTPATIENT)]
                         .groupby(['age_group', 'year'])[['stool_collected']].count())
rota_cases.columns = ['cases']
rota_cases = rota_cases.reset_index()
In [106]:
rota_data = (rota_cases.merge(collected_stools, 
                                         on=['age_group', 'year'])
                                  .merge(outpatient_n_mean.reset_index(), 
                                         on=['age_group'], 
                                        how='left')
                                  .merge(pd.DataFrame(eligible_counts).reset_index(), 
                                         on=['age_group', 'year']))
rota_data.head(10)
Out[106]:
age_group year cases stool_collected enrolled n eligible
0 0 0 9 79.0 86 1427 149
1 0 1 4 79.0 95 1427 150
2 0 2 3 85.0 94 1427 153
3 1 0 11 93.0 101 1544 149
4 1 1 9 78.0 90 1544 129
5 1 2 10 73.0 85 1544 139
6 2 0 13 92.0 120 4376 204
7 2 1 3 82.0 119 4376 187
8 2 2 8 76.0 116 4376 192
9 3 0 6 115.0 152 9237 263
In [108]:
noro_cases = (rotavirus_data[(rotavirus_data.norovirus==1) & (rotavirus_data.setting==OUTPATIENT)]
                         .groupby(['age_group', 'year'])[['stool_collected']].count())
noro_cases.columns = ['cases']
noro_cases = noro_cases.reset_index()
In [109]:
noro_data = (noro_cases.merge(collected_stools, 
                                         on=['age_group', 'year'])
                                  .merge(outpatient_n_mean.reset_index(), 
                                         on=['age_group'], 
                                        how='left')
                                  .merge(pd.DataFrame(eligible_counts).reset_index(), 
                                         on=['age_group', 'year']))
noro_data.head(10)
Out[109]:
age_group year cases stool_collected enrolled n eligible
0 0 0 16 79.0 86 1427 149
1 0 1 22 79.0 95 1427 150
2 0 2 29 85.0 94 1427 153
3 1 0 18 93.0 101 1544 149
4 1 1 21 78.0 90 1544 129
5 1 2 26 73.0 85 1544 139
6 2 0 16 92.0 120 4376 204
7 2 1 18 82.0 119 4376 187
8 2 2 20 76.0 116 4376 192
9 3 0 16 115.0 152 9237 263
In [110]:
virus_totals = pd.concat([astro_data.assign(virus='astrovirus'),
           noro_data.assign(virus='norovirus'),
           rota_data.assign(virus='rotavirus'),
           sapo_data.assign(virus='sapovirus')]).reset_index()
virus_totals['cases'] = virus_totals.cases.astype(int)
virus_totals['n'] = virus_totals.n.astype(int)

Outpatient virus rate model

In [111]:
virus_lookup = {0:'norovirus',
                1:'rotavirus',
                2:'sapovirus',
                3:'astrovirus'}
In [112]:
virus_outpatient_data = (virus_totals.replace({'virus': virus_lookup}))
In [113]:
cases, enrolled, n = virus_outpatient_data[['cases', 'enrolled', 'n']].values.T
In [114]:
virus_outpatient_under_5 = virus_outpatient_data[virus_outpatient_data.age_group<3].drop('index', axis=1)
In [117]:
virus_outpatient_pooled = virus_outpatient_under_5.groupby(['virus', 'year']).agg({'cases':sum, 'stool_collected':sum,
                                                                     'enrolled':sum, 'n':sum, 'eligible':sum}).reset_index()
In [118]:
virus_outpatient_pooled['age_group'] = 3
virus_outpatient_pooled[virus_outpatient_under_5.columns]
Out[118]:
age_group year cases stool_collected enrolled n eligible virus
0 3 0 22 264.0 307 7347 502 astrovirus
1 3 1 20 239.0 304 7347 466 astrovirus
2 3 2 7 234.0 295 7347 484 astrovirus
3 3 0 50 264.0 307 7347 502 norovirus
4 3 1 61 239.0 304 7347 466 norovirus
5 3 2 75 234.0 295 7347 484 norovirus
6 3 0 33 264.0 307 7347 502 rotavirus
7 3 1 16 239.0 304 7347 466 rotavirus
8 3 2 21 234.0 295 7347 484 rotavirus
9 3 0 31 264.0 307 7347 502 sapovirus
10 3 1 37 239.0 304 7347 466 sapovirus
11 3 2 17 234.0 295 7347 484 sapovirus
In [119]:
virus_outpatient_5_over = (virus_outpatient_data[virus_outpatient_data.age_group==3]
                               .drop('index', axis=1)
                               .groupby(['virus', 'year'])
                               .agg({'cases':sum, 'stool_collected':sum,
                                        'enrolled':sum, 'n':sum, 'eligible':sum})
                               .reset_index())
In [120]:
virus_outpatient_5_over['age_group'] = 4
virus_outpatient_5_over[virus_outpatient_under_5.columns]
Out[120]:
age_group year cases stool_collected enrolled n eligible virus
0 4 0 6 115.0 152 9237 263 astrovirus
1 4 1 2 84.0 124 9237 211 astrovirus
2 4 2 1 66.0 92 9237 182 astrovirus
3 4 0 16 115.0 152 9237 263 norovirus
4 4 1 15 84.0 124 9237 211 norovirus
5 4 2 20 66.0 92 9237 182 norovirus
6 4 0 6 115.0 152 9237 263 rotavirus
7 4 1 3 84.0 124 9237 211 rotavirus
8 4 2 4 66.0 92 9237 182 rotavirus
9 4 0 10 115.0 152 9237 263 sapovirus
10 4 1 8 84.0 124 9237 211 sapovirus
11 4 2 2 66.0 92 9237 182 sapovirus
In [121]:
virus_outpatient_merged = pd.concat([virus_outpatient_under_5, 
                                     virus_outpatient_pooled[virus_outpatient_under_5.columns],
                                    virus_outpatient_5_over[virus_outpatient_under_5.columns]])
In [124]:
with Model() as virus_outpatient_model:
    
    groups = virus_outpatient_merged.shape[0]
    
    # Probability of stool collection
    ψ = Beta('ψ', 1, 1)
    outpatient_enrolled = (rotavirus_data.setting==OUTPATIENT).sum()
    collected = rotavirus_data[rotavirus_data.setting==OUTPATIENT].stool_collected.sum()
    stool = Binomial('stool', n=outpatient_enrolled, p=ψ, observed=collected)

    cases, enrolled, n, eligible = virus_outpatient_merged[['cases', 'enrolled', 'n', 'eligible']].values.T
    
    # Adjust for enrollment
    enrollment_rate = Beta('p_enrolled', 1, 1, shape=groups)
    enrolled_cases = Binomial('enrolled_cases', eligible, enrollment_rate, observed=enrolled)
    
    eligible_cases = Uniform('eligible_cases', lower=cases, upper=eligible, 
                                     shape=groups)
    
    p = monitoring_rate[OUTPATIENT]*ψ*enrollment_rate

    # Estimate total VUMC cases in setting
    eligible_cases_likelihood = Binomial('eligible_cases_likelihood', 
                                         n=eligible_cases, 
                                         p=p,
                                         observed=cases)
    

    π = Beta('π', 1, 10, shape=groups)

    # Data likeihood
    virus_out_obs = Potential('virus_out_obs', 
              Binomial.dist(n=n, p=π).logp(eligible_cases).sum())
        
In [125]:
with virus_outpatient_model:
    virus_outpatient_trace = sample(iterations, tune=tune,
                                    njobs=2, random_seed=seed_numbers)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
100%|██████████| 2000/2000 [00:17<00:00, 116.75it/s]

MCMC diagnostic plot

In [157]:
energyplot(virus_outpatient_trace)
Out[157]:
<matplotlib.axes._subplots.AxesSubplot at 0x13ad75668>

Posterior distribution of stool sampling rate

In [126]:
traceplot(virus_outpatient_trace, varnames=['ψ'])
Out[126]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x1379f87b8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x137a310b8>]], dtype=object)

Outpatient virus rates, plotted and tabulated

In [127]:
year_labels = ['2012/13','2013/14','2014/15']
pd.DataFrame(list(itertools.product(age_labels + ['<5', '5+'], year_labels)), columns=['age', 'year'])
Out[127]:
age year
0 <5 2012/13
1 <5 2013/14
2 <5 2014/15
3 5+ 2012/13
4 5+ 2013/14
5 5+ 2014/15
6 <5 2012/13
7 <5 2013/14
8 <5 2014/15
9 5+ 2012/13
10 5+ 2013/14
11 5+ 2014/15
In [128]:
outpatient_sample_melted = pd.melt(pd.DataFrame(virus_outpatient_trace['π']))
In [129]:
outpatient_virus_samples = (virus_outpatient_merged.drop(['enrolled', 'stool_collected', 'cases'],
                                          axis=1).reset_index(drop=True)
                 .merge(outpatient_sample_melted, left_index=True, right_on='variable')
                 .replace({'age_group':{0:'<1', 1:'1', 2:'2-4', 3:'<5', 4:'5+'},
                            'year':{0:'2012/13', 1:'2013/14', 2:'2014/15'},
                          'virus':virus_lookup})
                .rename(columns={'age_group':'age group'}))
In [130]:
outpatient_rateplot = sns.factorplot(x="year", y="rate", hue="age group", col='virus', 
        data=outpatient_virus_samples.assign(rate=outpatient_virus_samples.value*rate_factor),
        palette="Blues", size=5, width=0.5, kind='box', linewidth=0.6, col_wrap=2,
        fliersize=0)

outpatient_rateplot.despine(left=True)
Out[130]:
<seaborn.axisgrid.FacetGrid at 0x12f2a7b38>
In [134]:
outpatient_virus_labels = (virus_outpatient_merged.drop(['enrolled', 'n', 'stool_collected', 'cases', 'eligible'],
                                          axis=1).reset_index(drop=True)
                          .replace({'year':{0:'2012/13', 1:'2013/14', 2:'2014/15'},
                          'virus':virus_lookup})
                .rename(columns={'age_group':'age group'}))
In [135]:
virus_outpatient_table = generate_table(virus_outpatient_trace, outpatient_virus_labels, 
                                        index=['age group', 'year'], varnames=['π'],
               columns=['virus']).sort_index()
virus_outpatient_table.to_excel('results/virus_outpatient_table.xlsx')
In [138]:
virus_outpatient_table
Out[138]:
virus astrovirus norovirus rotavirus sapovirus
age group year
0 2012/13 6.38 (0.83, 13.40) 36.13 (20.62, 54.60) 21.27 (8.97, 34.35) 10.66 (3.35, 20.62)
2013/14 9.72 (2.46, 18.49) 44.44 (27.15, 62.63) 9.85 (2.31, 18.54) 25.30 (12.59, 39.93)
2014/15 4.16 (0.33, 10.01) 60.31 (37.80, 83.68) 8.08 (1.78, 16.27) 11.92 (3.68, 21.96)
1 2012/13 15.23 (5.91, 25.60) 32.15 (17.22, 47.45) 20.13 (9.33, 31.95) 30.39 (16.16, 44.69)
2013/14 14.82 (5.95, 24.78) 36.00 (22.01, 52.57) 16.42 (7.33, 26.62) 22.93 (11.78, 36.72)
2014/15 5.65 (0.50, 11.85) 50.22 (32.06, 71.74) 20.44 (9.05, 32.96) 16.83 (6.13, 28.55)
2 2012/13 8.96 (4.64, 13.77) 11.72 (6.35, 17.79) 9.63 (5.06, 14.90) 7.56 (3.05, 12.01)
2013/14 5.76 (2.34, 9.58) 12.10 (7.25, 18.22) 2.55 (0.49, 5.14) 8.26 (3.85, 12.47)
2014/15 3.37 (0.70, 6.20) 14.15 (8.65, 20.97) 6.05 (2.39, 10.17) 3.38 (0.91, 6.52)
3 2012/13 8.99 (5.32, 12.84) 19.97 (14.90, 25.69) 13.43 (9.05, 18.24) 12.60 (8.29, 16.97)
2013/14 7.76 (4.71, 11.20) 23.02 (17.17, 29.29) 6.28 (3.47, 9.33) 14.02 (9.72, 18.40)
2014/15 3.18 (1.09, 5.39) 29.89 (23.62, 37.16) 8.71 (5.36, 12.62) 7.15 (4.24, 10.96)
4 2012/13 2.36 (0.74, 4.03) 5.66 (2.94, 8.39) 2.34 (0.93, 4.16) 3.66 (1.55, 5.80)
2013/14 1.00 (0.14, 2.13) 5.20 (2.93, 8.00) 1.32 (0.29, 2.59) 2.94 (1.29, 5.04)
2014/15 0.79 (0.05, 1.88) 8.04 (4.49, 11.54) 1.89 (0.58, 3.66) 1.17 (0.15, 2.49)
In [140]:
virus_outpatient_table.index = virus_outpatient_table.index.set_levels(age_labels + ['<5', '5+'], level=0)
In [141]:
virus_outpatient_table
Out[141]:
virus astrovirus norovirus rotavirus sapovirus
age group year
<1 2012/13 6.38 (0.83, 13.40) 36.13 (20.62, 54.60) 21.27 (8.97, 34.35) 10.66 (3.35, 20.62)
2013/14 9.72 (2.46, 18.49) 44.44 (27.15, 62.63) 9.85 (2.31, 18.54) 25.30 (12.59, 39.93)
2014/15 4.16 (0.33, 10.01) 60.31 (37.80, 83.68) 8.08 (1.78, 16.27) 11.92 (3.68, 21.96)
1 2012/13 15.23 (5.91, 25.60) 32.15 (17.22, 47.45) 20.13 (9.33, 31.95) 30.39 (16.16, 44.69)
2013/14 14.82 (5.95, 24.78) 36.00 (22.01, 52.57) 16.42 (7.33, 26.62) 22.93 (11.78, 36.72)
2014/15 5.65 (0.50, 11.85) 50.22 (32.06, 71.74) 20.44 (9.05, 32.96) 16.83 (6.13, 28.55)
2-4 2012/13 8.96 (4.64, 13.77) 11.72 (6.35, 17.79) 9.63 (5.06, 14.90) 7.56 (3.05, 12.01)
2013/14 5.76 (2.34, 9.58) 12.10 (7.25, 18.22) 2.55 (0.49, 5.14) 8.26 (3.85, 12.47)
2014/15 3.37 (0.70, 6.20) 14.15 (8.65, 20.97) 6.05 (2.39, 10.17) 3.38 (0.91, 6.52)
<5 2012/13 8.99 (5.32, 12.84) 19.97 (14.90, 25.69) 13.43 (9.05, 18.24) 12.60 (8.29, 16.97)
2013/14 7.76 (4.71, 11.20) 23.02 (17.17, 29.29) 6.28 (3.47, 9.33) 14.02 (9.72, 18.40)
2014/15 3.18 (1.09, 5.39) 29.89 (23.62, 37.16) 8.71 (5.36, 12.62) 7.15 (4.24, 10.96)
5+ 2012/13 2.36 (0.74, 4.03) 5.66 (2.94, 8.39) 2.34 (0.93, 4.16) 3.66 (1.55, 5.80)
2013/14 1.00 (0.14, 2.13) 5.20 (2.93, 8.00) 1.32 (0.29, 2.59) 2.94 (1.29, 5.04)
2014/15 0.79 (0.05, 1.88) 8.04 (4.49, 11.54) 1.89 (0.58, 3.66) 1.17 (0.15, 2.49)
In [142]:
virus_outpatient_table[['astrovirus']]
Out[142]:
virus astrovirus
age group year
<1 2012/13 6.38 (0.83, 13.40)
2013/14 9.72 (2.46, 18.49)
2014/15 4.16 (0.33, 10.01)
1 2012/13 15.23 (5.91, 25.60)
2013/14 14.82 (5.95, 24.78)
2014/15 5.65 (0.50, 11.85)
2-4 2012/13 8.96 (4.64, 13.77)
2013/14 5.76 (2.34, 9.58)
2014/15 3.37 (0.70, 6.20)
<5 2012/13 8.99 (5.32, 12.84)
2013/14 7.76 (4.71, 11.20)
2014/15 3.18 (1.09, 5.39)
5+ 2012/13 2.36 (0.74, 4.03)
2013/14 1.00 (0.14, 2.13)
2014/15 0.79 (0.05, 1.88)
In [143]:
def create_virus_table(virus_name):
    estimates = virus_outpatient_table[[virus_name]].rename(columns={virus_name:'estimate'})
    virus_data = (virus_outpatient_merged[virus_outpatient_merged.virus==virus_name]
                          .set_index(['age_group', 'year']) [['cases', 'stool_collected', 'enrolled', 'n']]
                          .set_index(estimates.index))
    return estimates.join(virus_data)
In [144]:
astro_table = create_virus_table('astrovirus')
astro_table.to_excel('results/outpatient_astro.xlsx')
astro_table
Out[144]:
estimate cases stool_collected enrolled n
age group year
<1 2012/13 6.38 (0.83, 13.40) 2 79.0 86 1427
2013/14 9.72 (2.46, 18.49) 4 79.0 95 1427
2014/15 4.16 (0.33, 10.01) 1 85.0 94 1427
1 2012/13 15.23 (5.91, 25.60) 8 93.0 101 1544
2013/14 14.82 (5.95, 24.78) 8 78.0 90 1544
2014/15 5.65 (0.50, 11.85) 2 73.0 85 1544
2-4 2012/13 8.96 (4.64, 13.77) 12 92.0 120 4376
2013/14 5.76 (2.34, 9.58) 8 82.0 119 4376
2014/15 3.37 (0.70, 6.20) 4 76.0 116 4376
<5 2012/13 8.99 (5.32, 12.84) 22 264.0 307 7347
2013/14 7.76 (4.71, 11.20) 20 239.0 304 7347
2014/15 3.18 (1.09, 5.39) 7 234.0 295 7347
5+ 2012/13 2.36 (0.74, 4.03) 6 115.0 152 9237
2013/14 1.00 (0.14, 2.13) 2 84.0 124 9237
2014/15 0.79 (0.05, 1.88) 1 66.0 92 9237
In [145]:
noro_table = create_virus_table('norovirus')
noro_table.to_excel('results/outpatient_noro.xlsx')
noro_table
Out[145]:
estimate cases stool_collected enrolled n
age group year
<1 2012/13 36.13 (20.62, 54.60) 16 79.0 86 1427
2013/14 44.44 (27.15, 62.63) 22 79.0 95 1427
2014/15 60.31 (37.80, 83.68) 29 85.0 94 1427
1 2012/13 32.15 (17.22, 47.45) 18 93.0 101 1544
2013/14 36.00 (22.01, 52.57) 21 78.0 90 1544
2014/15 50.22 (32.06, 71.74) 26 73.0 85 1544
2-4 2012/13 11.72 (6.35, 17.79) 16 92.0 120 4376
2013/14 12.10 (7.25, 18.22) 18 82.0 119 4376
2014/15 14.15 (8.65, 20.97) 20 76.0 116 4376
<5 2012/13 19.97 (14.90, 25.69) 50 264.0 307 7347
2013/14 23.02 (17.17, 29.29) 61 239.0 304 7347
2014/15 29.89 (23.62, 37.16) 75 234.0 295 7347
5+ 2012/13 5.66 (2.94, 8.39) 16 115.0 152 9237
2013/14 5.20 (2.93, 8.00) 15 84.0 124 9237
2014/15 8.04 (4.49, 11.54) 20 66.0 92 9237
In [146]:
sapo_table = create_virus_table('sapovirus')
sapo_table.to_excel('results/outpatient_sapo.xlsx')
sapo_table
Out[146]:
estimate cases stool_collected enrolled n
age group year
<1 2012/13 10.66 (3.35, 20.62) 4 79.0 86 1427
2013/14 25.30 (12.59, 39.93) 12 79.0 95 1427
2014/15 11.92 (3.68, 21.96) 5 85.0 94 1427
1 2012/13 30.39 (16.16, 44.69) 17 93.0 101 1544
2013/14 22.93 (11.78, 36.72) 13 78.0 90 1544
2014/15 16.83 (6.13, 28.55) 8 73.0 85 1544
2-4 2012/13 7.56 (3.05, 12.01) 10 92.0 120 4376
2013/14 8.26 (3.85, 12.47) 12 82.0 119 4376
2014/15 3.38 (0.91, 6.52) 4 76.0 116 4376
<5 2012/13 12.60 (8.29, 16.97) 31 264.0 307 7347
2013/14 14.02 (9.72, 18.40) 37 239.0 304 7347
2014/15 7.15 (4.24, 10.96) 17 234.0 295 7347
5+ 2012/13 3.66 (1.55, 5.80) 10 115.0 152 9237
2013/14 2.94 (1.29, 5.04) 8 84.0 124 9237
2014/15 1.17 (0.15, 2.49) 2 66.0 92 9237
In [147]:
rota_table = create_virus_table('rotavirus')
rota_table.to_excel('results/outpatient_rota.xlsx')
rota_table
Out[147]:
estimate cases stool_collected enrolled n
age group year
<1 2012/13 21.27 (8.97, 34.35) 9 79.0 86 1427
2013/14 9.85 (2.31, 18.54) 4 79.0 95 1427
2014/15 8.08 (1.78, 16.27) 3 85.0 94 1427
1 2012/13 20.13 (9.33, 31.95) 11 93.0 101 1544
2013/14 16.42 (7.33, 26.62) 9 78.0 90 1544
2014/15 20.44 (9.05, 32.96) 10 73.0 85 1544
2-4 2012/13 9.63 (5.06, 14.90) 13 92.0 120 4376
2013/14 2.55 (0.49, 5.14) 3 82.0 119 4376
2014/15 6.05 (2.39, 10.17) 8 76.0 116 4376
<5 2012/13 13.43 (9.05, 18.24) 33 264.0 307 7347
2013/14 6.28 (3.47, 9.33) 16 239.0 304 7347
2014/15 8.71 (5.36, 12.62) 21 234.0 295 7347
5+ 2012/13 2.34 (0.93, 4.16) 6 115.0 152 9237
2013/14 1.32 (0.29, 2.59) 3 84.0 124 9237
2014/15 1.89 (0.58, 3.66) 4 66.0 92 9237

Pooled (over years) virus rate model

In [148]:
virus_outpatient_merged
Out[148]:
age_group year cases stool_collected enrolled n eligible virus
0 0 0 2 79.0 86 1427 149 astrovirus
1 0 1 4 79.0 95 1427 150 astrovirus
2 0 2 1 85.0 94 1427 153 astrovirus
3 1 0 8 93.0 101 1544 149 astrovirus
4 1 1 8 78.0 90 1544 129 astrovirus
5 1 2 2 73.0 85 1544 139 astrovirus
6 2 0 12 92.0 120 4376 204 astrovirus
7 2 1 8 82.0 119 4376 187 astrovirus
8 2 2 4 76.0 116 4376 192 astrovirus
12 0 0 16 79.0 86 1427 149 norovirus
13 0 1 22 79.0 95 1427 150 norovirus
14 0 2 29 85.0 94 1427 153 norovirus
15 1 0 18 93.0 101 1544 149 norovirus
16 1 1 21 78.0 90 1544 129 norovirus
17 1 2 26 73.0 85 1544 139 norovirus
18 2 0 16 92.0 120 4376 204 norovirus
19 2 1 18 82.0 119 4376 187 norovirus
20 2 2 20 76.0 116 4376 192 norovirus
24 0 0 9 79.0 86 1427 149 rotavirus
25 0 1 4 79.0 95 1427 150 rotavirus
26 0 2 3 85.0 94 1427 153 rotavirus
27 1 0 11 93.0 101 1544 149 rotavirus
28 1 1 9 78.0 90 1544 129 rotavirus
29 1 2 10 73.0 85 1544 139 rotavirus
30 2 0 13 92.0 120 4376 204 rotavirus
31 2 1 3 82.0 119 4376 187 rotavirus
32 2 2 8 76.0 116 4376 192 rotavirus
36 0 0 4 79.0 86 1427 149 sapovirus
37 0 1 12 79.0 95 1427 150 sapovirus
38 0 2 5 85.0 94 1427 153 sapovirus
39 1 0 17 93.0 101 1544 149 sapovirus
40 1 1 13 78.0 90 1544 129 sapovirus
41 1 2 8 73.0 85 1544 139 sapovirus
42 2 0 10 92.0 120 4376 204 sapovirus
43 2 1 12 82.0 119 4376 187 sapovirus
44 2 2 4 76.0 116 4376 192 sapovirus
0 3 0 22 264.0 307 7347 502 astrovirus
1 3 1 20 239.0 304 7347 466 astrovirus
2 3 2 7 234.0 295 7347 484 astrovirus
3 3 0 50 264.0 307 7347 502 norovirus
4 3 1 61 239.0 304 7347 466 norovirus
5 3 2 75 234.0 295 7347 484 norovirus
6 3 0 33 264.0 307 7347 502 rotavirus
7 3 1 16 239.0 304 7347 466 rotavirus
8 3 2 21 234.0 295 7347 484 rotavirus
9 3 0 31 264.0 307 7347 502 sapovirus
10 3 1 37 239.0 304 7347 466 sapovirus
11 3 2 17 234.0 295 7347 484 sapovirus
0 4 0 6 115.0 152 9237 263 astrovirus
1 4 1 2 84.0 124 9237 211 astrovirus
2 4 2 1 66.0 92 9237 182 astrovirus
3 4 0 16 115.0 152 9237 263 norovirus
4 4 1 15 84.0 124 9237 211 norovirus
5 4 2 20 66.0 92 9237 182 norovirus
6 4 0 6 115.0 152 9237 263 rotavirus
7 4 1 3 84.0 124 9237 211 rotavirus
8 4 2 4 66.0 92 9237 182 rotavirus
9 4 0 10 115.0 152 9237 263 sapovirus
10 4 1 8 84.0 124 9237 211 sapovirus
11 4 2 2 66.0 92 9237 182 sapovirus
In [151]:
virus_outpatient_pooled = virus_outpatient_merged.groupby(['age_group', 'virus']).agg({'n':lambda x: max(x)*3, 
                                                           'cases':sum, 
                                                           'stool_collected':sum, 
                                                           'enrolled':sum, 'eligible':sum}).reset_index()
In [153]:
with Model() as virus_pooled_model:
    
    groups = virus_outpatient_pooled.shape[0]
    
    # Probability of stool collection
    ψ = Beta('ψ', 1, 1)
    outpatient_enrolled = (rotavirus_data.setting==OUTPATIENT).sum()
    collected = rotavirus_data[rotavirus_data.setting==OUTPATIENT].stool_collected.sum()
    stool = Binomial('stool', n=outpatient_enrolled, p=ψ, observed=collected)

    cases, enrolled, n, eligible = virus_outpatient_pooled[['cases', 'enrolled', 'n', 'eligible']].values.T
    
    # Adjust for enrollment
    enrollment_rate = Beta('p_enrolled', 1, 1, shape=groups)
    enrolled_cases = Binomial('enrolled_cases', eligible, enrollment_rate, observed=enrolled)
    
    eligible_cases = Uniform('eligible_cases', lower=cases, upper=eligible, 
                                     shape=groups)
    
    p = monitoring_rate[OUTPATIENT]*ψ*enrollment_rate

    # Estimate total VUMC cases in setting
    eligible_cases_likelihood = Binomial('eligible_cases_likelihood', 
                                         n=eligible_cases, 
                                         p=p,
                                         observed=cases)
    

    π = Beta('π', 1, 10, shape=groups)

    # Data likeihood
    virus_out_obs = Potential('virus_out_obs', 
              Binomial.dist(n=n, p=π).logp(eligible_cases).sum())
        
        
In [154]:
with virus_pooled_model:
    virus_outpatient_pooled_trace = sample(iterations, tune=tune,
                                    njobs=2, random_seed=seed_numbers)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
100%|██████████| 2000/2000 [00:08<00:00, 222.65it/s]

MCMC diagnostic plot

In [156]:
energyplot(virus_outpatient_pooled_trace)
Out[156]:
<matplotlib.axes._subplots.AxesSubplot at 0x1392f8208>
In [158]:
outpatient_virus_pooled_labels = (virus_outpatient_pooled.drop(['enrolled', 'n', 'stool_collected', 'cases'],
                                          axis=1).reset_index(drop=True)
                          .replace({'age_group':{0:'<1', 1:'1', 2:'2-4', 3:'<5', 4:'5+'},
                          'virus':virus_lookup})
                .rename(columns={'age_group':'age group'}))
In [159]:
virus_outpatient_pooled_table = generate_table(virus_outpatient_pooled_trace, outpatient_virus_pooled_labels, 
                                        index=['age group'], varnames=['π'],
               columns=['virus']).sort_index()
# virus_outpatient_table.to_excel('results/virus_outpatient_table.xlsx')
In [160]:
virus_outpatient_pooled_table
Out[160]:
virus astrovirus norovirus rotavirus sapovirus
age group
1 10.83 (6.19, 16.13) 37.63 (27.79, 46.75) 17.56 (10.96, 23.74) 22.26 (15.27, 29.07)
2-4 5.45 (3.31, 7.72) 12.09 (8.77, 15.43) 5.53 (3.48, 7.89) 5.94 (3.59, 8.22)
5+ 1.12 (0.48, 1.78) 5.89 (4.32, 7.66) 1.57 (0.74, 2.42) 2.36 (1.36, 3.37)
<1 5.31 (1.94, 8.83) 45.72 (34.48, 56.44) 11.47 (6.32, 16.90) 14.83 (8.93, 21.35)
<5 6.39 (4.69, 8.20) 23.82 (20.24, 27.36) 9.07 (6.87, 11.11) 10.96 (8.69, 13.36)
In [161]:
def create_pooled_table(virus_name):
    estimates = virus_outpatient_pooled_table[[virus_name]].rename(columns={virus_name:'estimate'})
    virus_data = (virus_outpatient_pooled[virus_outpatient_pooled.virus==virus_name]
                          .set_index(['age_group']) [['cases', 'stool_collected', 'enrolled', 'n']]
                          .set_index(estimates.index))
    return estimates.join(virus_data).loc[['<1', '1', '2-4', '<5', '5+']]
In [162]:
astro_pooled = create_pooled_table('astrovirus')
astro_pooled.to_excel('results/outpatient_astro_pooled.xlsx')
astro_pooled
Out[162]:
estimate cases stool_collected enrolled n
age group
<1 5.31 (1.94, 8.83) 49 737.0 906 22041
1 10.83 (6.19, 16.13) 7 243.0 275 4281
2-4 5.45 (3.31, 7.72) 18 244.0 276 4632
<5 6.39 (4.69, 8.20) 9 265.0 368 27711
5+ 1.12 (0.48, 1.78) 24 250.0 355 13128
In [163]:
noro_pooled = create_pooled_table('norovirus')
noro_pooled.to_excel('results/outpatient_noro_pooled.xlsx')
noro_pooled
Out[163]:
estimate cases stool_collected enrolled n
age group
<1 45.72 (34.48, 56.44) 186 737.0 906 22041
1 37.63 (27.79, 46.75) 67 243.0 275 4281
2-4 12.09 (8.77, 15.43) 65 244.0 276 4632
<5 23.82 (20.24, 27.36) 51 265.0 368 27711
5+ 5.89 (4.32, 7.66) 54 250.0 355 13128
In [164]:
sapo_pooled = create_pooled_table('sapovirus')
sapo_pooled.to_excel('results/outpatient_sapo_pooled.xlsx')
sapo_pooled
Out[164]:
estimate cases stool_collected enrolled n
age group
<1 14.83 (8.93, 21.35) 85 737.0 906 22041
1 22.26 (15.27, 29.07) 21 243.0 275 4281
2-4 5.94 (3.59, 8.22) 38 244.0 276 4632
<5 10.96 (8.69, 13.36) 20 265.0 368 27711
5+ 2.36 (1.36, 3.37) 26 250.0 355 13128
In [165]:
rota_pooled = create_pooled_table('rotavirus')
rota_pooled.to_excel('results/outpatient_rota_pooled.xlsx')
rota_pooled
Out[165]:
estimate cases stool_collected enrolled n
age group
<1 11.47 (6.32, 16.90) 70 737.0 906 22041
1 17.56 (10.96, 23.74) 16 243.0 275 4281
2-4 5.53 (3.48, 7.89) 30 244.0 276 4632
<5 9.07 (6.87, 11.11) 13 265.0 368 27711
5+ 1.57 (0.74, 2.42) 24 250.0 355 13128