%matplotlib inline
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pylab as plt
import itertools
sns.set_context('notebook')
seed_numbers = 20090425, 19700903
from datetime import timedelta, datetime
# rotavirus_data = pd.read_csv('data/AGE Year 2-4 with Final Rota.csv', low_memory=False)
cases_file = 'data/AGE Cases and Healthy Controls Merged with Lab Results Year 2 to 4 v05.23.2017.csv'
rotavirus_data = pd.read_csv(cases_file, low_memory=False, parse_dates=['scrdate', 'dob'])
rotavirus_data.index.is_unique
True
rotavirus_data.head()
caseid | case | settingnew | rota_od | vaxstrain | rota_genotype | noro_type | noroseq_gii_c | noroseq_gii_d | noroseq_gi_c | ... | ddxdioten1 | ddxdioten2 | ddxdioten3 | ddxdioten4 | ddxdioten5 | ddxdioten6 | ddxdioten7 | ddxdioten8 | ddxdioten9 | ddxdioten10 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | EN1C0001 | 1 | 3 | 0.088 | NaN | NEG | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1 | EN1C0002 | 1 | 3 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2 | EN1C0003 | 1 | 3 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
3 | EN1C0004 | 1 | 3 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
4 | EN1C0006 | 1 | 3 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
5 rows × 698 columns
rotavirus_data.query('year==3').sort_values('scrdate').scrdate
2046 2013-12-01 2048 2013-12-01 2049 2013-12-01 2050 2013-12-01 2051 2013-12-01 2052 2013-12-01 2054 2013-12-01 2047 2013-12-01 2053 2013-12-02 460 2013-12-02 4264 2013-12-02 4263 2013-12-02 4262 2013-12-02 4261 2013-12-02 4260 2013-12-02 4257 2013-12-02 4256 2013-12-02 4255 2013-12-02 2055 2013-12-02 4254 2013-12-02 462 2013-12-03 2058 2013-12-04 461 2013-12-04 2056 2013-12-04 2057 2013-12-04 2059 2013-12-04 463 2013-12-05 464 2013-12-05 465 2013-12-05 2060 2013-12-05 ... 882 2014-11-24 881 2014-11-24 880 2014-11-24 878 2014-11-24 4713 2014-11-24 885 2014-11-24 2829 2014-11-25 2827 2014-11-25 2828 2014-11-25 2830 2014-11-25 5220 2014-11-25 888 2014-11-25 889 2014-11-25 890 2014-11-25 892 2014-11-25 4715 2014-11-25 891 2014-11-26 5217 2014-11-26 2837 2014-11-30 2842 2014-11-30 2841 2014-11-30 2840 2014-11-30 2831 2014-11-30 2832 2014-11-30 2833 2014-11-30 2834 2014-11-30 2835 2014-11-30 2836 2014-11-30 2838 2014-11-30 2839 2014-11-30 Name: scrdate, Length: 1740, dtype: datetime64[ns]
Import eligibles
eligible_file = 'data/AGE Outpatient Eligibility Data Year 2-4.csv'
eligible_data = pd.read_csv(eligible_file, low_memory=False, index_col=0, parse_dates=['scrdate'])
eligible_data.index.is_unique
True
eligible_data.head()
scrdate | admitdate | dob | agedays | agemonths | ageyears | insurance | sex | race | race2_8 | adxd1 | adxd2 | transfer | noninfect | immcomp | prevenroll | elig | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
caseid | |||||||||||||||||
EN1C0001 | 2012-12-03 | 12/3/12 | 6/11/06 | 2366.08 | 77.74 | 6.478 | 1 | 1 | 6 | 1.0 | 6.0 | 7.0 | 0 | 0 | 0 | 0 | 1 |
EN1C0002 | 2012-12-03 | 12/3/12 | 8/20/05 | 2661.45 | 87.44 | 7.287 | 1 | 1 | 2 | NaN | 6.0 | 7.0 | 0 | 0 | 0 | 0 | 1 |
EN1C0003 | 2012-12-03 | 12/3/12 | 8/14/07 | 1936.96 | 63.64 | 5.303 | 1 | 2 | 2 | NaN | 1.0 | 4.0 | 0 | 0 | 0 | 0 | 1 |
EN1C0004 | 2012-12-04 | 12/4/12 | 11/6/09 | 1124.16 | 36.93 | 3.078 | 1 | 2 | 6 | 1.0 | 7.0 | NaN | 0 | 0 | 0 | 0 | 1 |
EN1C0006 | 2012-12-04 | 12/4/12 | 7/8/07 | 1974.40 | 64.87 | 5.406 | 1 | 2 | 2 | NaN | 7.0 | 1.0 | 0 | 0 | 0 | 0 | 1 |
eligible_data.scrdate.min(), eligible_data.scrdate.max()
(Timestamp('2012-12-03 00:00:00'), Timestamp('2015-11-30 00:00:00'))
eligible_data['year'] = 1
eligible_data.loc[eligible_data.scrdate < datetime(2013,12,1), 'year'] = 0
eligible_data.loc[eligible_data.scrdate >= datetime(2014,12,1), 'year'] = 2
eligible_data['under_5'] = ((eligible_data.agemonths) < 60).astype(int)
eligible_data['under_2'] = ((eligible_data.agemonths) < 24).astype(int)
eligible_data['under_1'] = ((eligible_data.agemonths) < 12).astype(int)
eligible_data['age_group'] = 3
eligible_data.loc[eligible_data.under_5==1, 'age_group'] = 2
eligible_data.loc[eligible_data.under_2==1, 'age_group'] = 1
eligible_data.loc[eligible_data.under_1==1, 'age_group'] = 0
eligible_counts = eligible_data.groupby(['age_group', 'year']).size()
eligible_counts.name = 'eligible'
eligible_counts
age_group year 0 0 149 1 150 2 153 1 0 149 1 129 2 139 2 0 204 1 187 2 192 3 0 263 1 211 2 182 Name: eligible, dtype: int64
eligible_data.shape
(2108, 22)
Rename columns and recode data
rotavirus_data.race.value_counts()
1 2766 2 1968 6 411 4 105 7 10 5 3 3 1 8 1 Name: race, dtype: int64
rotavirus_data['white'] = rotavirus_data['race'] == 1
rotavirus_data['black'] = rotavirus_data['race'] == 2
rotavirus_data['astro'].value_counts().index
Float64Index([0.0, 1.0], dtype='float64')
rotavirus_data=rotavirus_data.rename(columns={'rotacdc':'rotavirus',
'sapo':'sapovirus',
'astro':'astrovirus',
'noro':'norovirus'})
rotavirus_data.rotavirus.value_counts()
0.0 3722 1.0 290 Name: rotavirus, dtype: int64
rotavirus_data.rotavu.value_counts()
0.0 3574 1.0 441 Name: rotavu, dtype: int64
virus_cols = ['rotavirus','sapovirus','astrovirus','norovirus']
Proportion of test results missing
rotavirus_data[virus_cols].isnull().mean()
rotavirus 0.237987 sapovirus 0.237417 astrovirus 0.237417 norovirus 0.237417 dtype: float64
astro_data = rotavirus_data[rotavirus_data.astrovirus==1].dropna(subset=['astro_ct'])
astro_data.shape
(159, 700)
noro_data = rotavirus_data[rotavirus_data.norovirus==1].dropna(subset=['noro_ct'])
noro_data.shape
(729, 700)
noro_data['coinfect'] = noro_data[['rotavirus', 'sapovirus', 'astrovirus']].sum(1)
astro_data['coinfect'] = astro_data[['rotavirus', 'sapovirus', 'norovirus']].sum(1)
with_coinf = noro_data.noro_ct[noro_data.coinfect==1].astype(float)
no_coinf = noro_data.noro_ct[(noro_data.coinfect==0)].astype(float)
# with_coinf = astro_data.astro_ct[astro_data.coinfect==1].astype(float)
# no_coinf = astro_data.astro_ct[(astro_data.coinfect==0)].astype(float)
from scipy.stats import ranksums
ranksums(with_coinf, no_coinf)
RanksumsResult(statistic=3.6398985691965766, pvalue=0.00027274545863325173)
rotavirus_data.sapo_ct.dropna().astype(float).hist()
<matplotlib.axes._subplots.AxesSubplot at 0x11d60c668>
Calculate age, and compare to given age.
rotavirus_data['age_months'] = (rotavirus_data['scrdate']
- rotavirus_data['dob'] + timedelta(1)).astype('timedelta64[M]')
rotavirus_data.age_months.hist();
Group into ages under 5 and 5 or over
rotavirus_data['under_5'] = ((rotavirus_data.age_months) < 60).astype(int)
rotavirus_data['under_2'] = ((rotavirus_data.age_months) < 24).astype(int)
rotavirus_data['under_1'] = ((rotavirus_data.age_months) < 12).astype(int)
rotavirus_data['age_group'] = 3
rotavirus_data.loc[rotavirus_data.under_5==1, 'age_group'] = 2
rotavirus_data.loc[rotavirus_data.under_2==1, 'age_group'] = 1
rotavirus_data.loc[rotavirus_data.under_1==1, 'age_group'] = 0
rotavirus_data.age_group.value_counts()
3 1459 0 1365 2 1344 1 1097 Name: age_group, dtype: int64
rotavirus_data.year.value_counts()
2 1989 3 1740 4 1536 Name: year, dtype: int64
Calculate natural year
rotavirus_data['year'] = rotavirus_data.year - 2
Rename setting variable
assert not rotavirus_data.settingnew.isnull().sum()
rotavirus_data['setting'] = rotavirus_data.settingnew - 1
# Indices to each value
INPATIENT, ED, OUTPATIENT, HC = 0, 1, 2, 3
Recode sex and stool
assert not rotavirus_data['specimencol'].isnull().sum()
rotavirus_data['male'] = rotavirus_data['sex']==1
rotavirus_data['stool_collected'] = rotavirus_data['specimencol']==1
Recode insurance
rotavirus_data.insurance.value_counts()
1 4599 2 500 3 84 8 44 4 38 Name: insurance, dtype: int64
rotavirus_data['public_ins'] = rotavirus_data.insurance.isin({1, 3})
rotavirus_data['private_ins'] = rotavirus_data.insurance.isin({2, 3})
rotavirus_data.loc[rotavirus_data.insurance==8, ['public_ins', 'private_ins']] = np.nan
# Drop extraneous columns
rotavirus_data = rotavirus_data.drop(['intvwdate', 'dob', 'provider', 'coldat', 'tdat',
'insurance', 'sex', 'hispcr', 'mdegree_7', 'rvacver_7',
'rvacver_7r', 'race', 'race2_8'], axis=1)
Summary of missing values
rotavirus_data.isnull().sum()
caseid 0 case 0 settingnew 0 rota_od 5085 vaxstrain 5264 rota_genotype 4826 noro_type 4535 noroseq_gii_c 4772 noroseq_gii_d 4772 noroseq_gi_c 5022 noroseq_gi_d 5037 year 0 rotavirus 1253 rotavu 1250 sapovirus 1250 sapo_ct 4917 astrovirus 1250 astro_ct 5106 norovirus 1250 noro_ct 4536 studysite 0 hospital 0 abiniscr 0 deiniscr 0 scrdate 0 admitdate 0 admittime 13 agedays 0 agemonths 0 ageyears 0 ... ddxdcten3 5235 ddxdcten4 5254 ddxdcten5 5259 ddxdcten6 5263 ddxdcten7 5264 ddxdcten8 5264 ddxdcten9 5265 ddxdcten10 5265 ddxdioten1 5258 ddxdioten2 5259 ddxdioten3 5261 ddxdioten4 5261 ddxdioten5 5263 ddxdioten6 5263 ddxdioten7 5263 ddxdioten8 5264 ddxdioten9 5264 ddxdioten10 5264 white 0 black 0 age_months 0 under_5 0 under_2 0 under_1 0 age_group 0 setting 0 male 0 stool_collected 0 public_ins 44 private_ins 44 Length: 697, dtype: int64
Aggregate data by age group and setting
rotavirus_summary = (rotavirus_data.drop('scrdate', axis=1)
.groupby(['age_group', 'year', 'setting', 'black'])['caseid'].count()
.reset_index()
.rename(columns={'caseid':'cases'}))
rotavirus_summary.head()
age_group | year | setting | black | cases | |
---|---|---|---|---|---|
0 | 0 | 0 | 0 | False | 25 |
1 | 0 | 0 | 0 | True | 10 |
2 | 0 | 0 | 1 | False | 94 |
3 | 0 | 0 | 1 | True | 72 |
4 | 0 | 0 | 2 | False | 56 |
Enrollment days/week for inpatient, outpatient, ED
monitoring_days = 7, 4, 4
# Only 5.5 days of intake on outpatient
monitoring_rate = np.array(monitoring_days)/np.array([7, 7, 5.5])
monitoring_rate
array([ 1. , 0.57142857, 0.72727273])
from pymc3 import Model, Deterministic, sample, NUTS, find_MAP, Potential, Metropolis
from pymc3 import Binomial, Beta, Poisson, Normal, DiscreteUniform, Uniform, Flat, Gamma
from pymc3 import traceplot, forestplot, summary, df_summary, energyplot
import theano.tensor as tt
iterations = 1000
tune = 1000
label_map = {'age_group':{0:'<1', 1:'1', 2:'2-4', 3:'5+'},
'year':{0:'2012/13', 1:'2013/14', 2:'2014/15'},
'black':{0:'white', 1:'black'},
'setting':{0:'inpatient', 1:'ED', 2:'outpatient'}}
Rate factor for population
rate_factor = 1000
from pymc3 import hpd
def _hpd_df(x, alpha):
cnames = ['hpd_{0:g}'.format(100 * alpha/2),
'hpd_{0:g}'.format(100 * (1 - alpha/2))]
return pd.DataFrame(hpd(x, alpha), columns=cnames)
def generate_table(trace, labels, index, columns, varnames=['λ']):
rate_df_index = pd.MultiIndex.from_tuples(labels.values.tolist(),
names=labels.columns)
rate_df = (df_summary(trace, varnames=varnames) * rate_factor)[['mean', 'hpd_2.5', 'hpd_97.5']]
rate_df.index = rate_df_index
pivot_all = pd.pivot_table(rate_df
.reset_index(),
index=index, columns=columns)
value_iterator = zip(pivot_all['mean'].values.flatten(),
pivot_all['hpd_2.5'].values.flatten(),
pivot_all['hpd_97.5'].values.flatten())
rate_strings = np.reshape(['{0:.2f} ({1:.2f}, {2:.2f})'.format(*vals) for vals in value_iterator],
pivot_all['mean'].shape)
return pd.DataFrame(rate_strings, index=pivot_all['mean'].index, columns=pivot_all['mean'].columns)
import redcap
import getpass
local_auth = True
api_url = 'https://redcap.vanderbilt.edu/api/'
if local_auth:
api_key = open("/Users/fonnescj/Dropbox/Collaborations/Halasa/Tokens/outpatient_token.txt").read()
else:
api_key = getpass.getpass()
outpatient_project = redcap.Project(api_url, api_key)
outpatients = outpatient_project.export_records(format='df')
outpatients.groupby('year').year.count()
year 2012 15969 2015 17562 Name: year, dtype: int64
outpatients.shape
(33531, 15)
outpatients[['ageinjan', 'ageinnov']].hist(bins=20)
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x126a23240>, <matplotlib.axes._subplots.AxesSubplot object at 0x11dcea2e8>]], dtype=object)
Going to assume that the outlier ages above were mistakenly entered as months.
outliers = outpatients.ageinjan > 20
outpatients.loc[outliers, ['ageinjan', 'ageinnov']] = outpatients.loc[outliers, ['ageinjan', 'ageinnov']] / 12
outpatients[['ageinjan', 'ageinnov']].hist(bins=20)
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x1207c8be0>, <matplotlib.axes._subplots.AxesSubplot object at 0x126294208>]], dtype=object)
outpatients['age'] = np.maximum(outpatients[['ageinjan', 'ageinnov']].mean(1), 0.1)
outpatients_kids = outpatients[outpatients.age<18].copy()
outpatients_kids['5_and_older'] = (outpatients_kids.age>=5).astype(int)
outpatients_kids['under_5'] = (outpatients_kids.age<5).astype(int)
outpatients_kids['under_2'] = (outpatients_kids.age<2).astype(int)
outpatients_kids['under_1'] = (outpatients_kids.age<1).astype(int)
outpatients_kids['age_group'] = 3
outpatients_kids.loc[outpatients_kids.under_5==1, 'age_group'] = 2
outpatients_kids.loc[outpatients_kids.under_2==1, 'age_group'] = 1
outpatients_kids.loc[outpatients_kids.under_1==1, 'age_group'] = 0
outpatients_kids.groupby('year').age.hist(alpha=0.3)
year 2012 Axes(0.125,0.125;0.775x0.755) 2015 Axes(0.125,0.125;0.775x0.755) Name: age, dtype: object
outpatient_n = (outpatients_kids.rename(columns={'ptname':'n'})
.groupby(['year', 'age_group'])
.n.count())
outpatient_n
year age_group 2012 0 1237 1 1652 2 4446 3 8605 2015 0 1618 1 1436 2 4307 3 9870 Name: n, dtype: int64
rotavirus_data.head()
caseid | case | settingnew | rota_od | vaxstrain | rota_genotype | noro_type | noroseq_gii_c | noroseq_gii_d | noroseq_gi_c | ... | age_months | under_5 | under_2 | under_1 | age_group | setting | male | stool_collected | public_ins | private_ins | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | EN1C0001 | 1 | 3 | 0.088 | NaN | NEG | NaN | NaN | NaN | NaN | ... | 77.0 | 0 | 0 | 0 | 3 | 2 | True | True | 1.0 | 0.0 |
1 | EN1C0002 | 1 | 3 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | 87.0 | 0 | 0 | 0 | 3 | 2 | True | False | 1.0 | 0.0 |
2 | EN1C0003 | 1 | 3 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | 63.0 | 0 | 0 | 0 | 3 | 2 | False | False | 1.0 | 0.0 |
3 | EN1C0004 | 1 | 3 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | 36.0 | 1 | 0 | 0 | 2 | 2 | False | True | 1.0 | 0.0 |
4 | EN1C0006 | 1 | 3 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | 64.0 | 0 | 0 | 0 | 3 | 2 | False | False | 1.0 | 0.0 |
5 rows × 697 columns
eligible_data.head()
scrdate | admitdate | dob | agedays | agemonths | ageyears | insurance | sex | race | race2_8 | ... | transfer | noninfect | immcomp | prevenroll | elig | year | under_5 | under_2 | under_1 | age_group | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
caseid | |||||||||||||||||||||
EN1C0001 | 2012-12-03 | 12/3/12 | 6/11/06 | 2366.08 | 77.74 | 6.478 | 1 | 1 | 6 | 1.0 | ... | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 3 |
EN1C0002 | 2012-12-03 | 12/3/12 | 8/20/05 | 2661.45 | 87.44 | 7.287 | 1 | 1 | 2 | NaN | ... | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 3 |
EN1C0003 | 2012-12-03 | 12/3/12 | 8/14/07 | 1936.96 | 63.64 | 5.303 | 1 | 2 | 2 | NaN | ... | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 3 |
EN1C0004 | 2012-12-04 | 12/4/12 | 11/6/09 | 1124.16 | 36.93 | 3.078 | 1 | 2 | 6 | 1.0 | ... | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 2 |
EN1C0006 | 2012-12-04 | 12/4/12 | 7/8/07 | 1974.40 | 64.87 | 5.406 | 1 | 2 | 2 | NaN | ... | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 3 |
5 rows × 22 columns
eligible_counts.sum()
2108
Average denominators
outpatient_n.head()
year age_group 2012 0 1237 1 1652 2 4446 3 8605 2015 0 1618 Name: n, dtype: int64
outpatient_n_mean = outpatient_n.groupby('age_group').mean().astype(int)
outpatient_data_full = (rotavirus_summary[rotavirus_summary.setting==OUTPATIENT].groupby(['age_group','year'])
.cases.sum().reset_index())
outpatient_data_full
age_group | year | cases | |
---|---|---|---|
0 | 0 | 0 | 86 |
1 | 0 | 1 | 95 |
2 | 0 | 2 | 94 |
3 | 1 | 0 | 101 |
4 | 1 | 1 | 90 |
5 | 1 | 2 | 85 |
6 | 2 | 0 | 120 |
7 | 2 | 1 | 119 |
8 | 2 | 2 | 116 |
9 | 3 | 0 | 152 |
10 | 3 | 1 | 124 |
11 | 3 | 2 | 92 |
outpatient_merged = (outpatient_data_full.merge(outpatient_n_mean.reset_index(), on=['age_group'])
.merge(pd.DataFrame(eligible_counts).reset_index(), on=['age_group', 'year']))
outpatient_merged
age_group | year | cases | n | eligible | |
---|---|---|---|---|---|
0 | 0 | 0 | 86 | 1427 | 149 |
1 | 0 | 1 | 95 | 1427 | 150 |
2 | 0 | 2 | 94 | 1427 | 153 |
3 | 1 | 0 | 101 | 1544 | 149 |
4 | 1 | 1 | 90 | 1544 | 129 |
5 | 1 | 2 | 85 | 1544 | 139 |
6 | 2 | 0 | 120 | 4376 | 204 |
7 | 2 | 1 | 119 | 4376 | 187 |
8 | 2 | 2 | 116 | 4376 | 192 |
9 | 3 | 0 | 152 | 9237 | 263 |
10 | 3 | 1 | 124 | 9237 | 211 |
11 | 3 | 2 | 92 | 9237 | 182 |
outpatient_under_5 = outpatient_merged[outpatient_merged.age_group<3]
def outpatient_model(dataset):
age_group, cases, n, eligible = dataset[['age_group','cases','n', 'eligible']].values.T
shape = dataset.shape[0]
n_groups = len(set(age_group))
# Cases weighted by monitoring effort
weights = monitoring_rate[OUTPATIENT]
with Model() as mod:
# Uniform prior on weighted cases
weighted_cases = Uniform('weighted_cases', eligible, n, shape=shape)
# Adjust for enrollment
enrollment_rate = Beta('p_enrolled', 1, 1, shape=shape)
eligible_cases = Binomial('eligible_cases', eligible, enrollment_rate, observed=cases)
# Likelihood of eligible cases
weighting = Binomial('weighting', n=weighted_cases, p=weights, observed=eligible)
θ = Normal('θ', 0, sd=1000, shape=n_groups)
λ = Deterministic('λ', tt.exp(θ))
AGE_obs = Potential('AGE_obs', Poisson.dist(λ[age_group]*n).logp(weighted_cases))
return mod
outpatient_age = outpatient_model(outpatient_under_5)
with outpatient_age:
outpatient_age_trace = sample(iterations, tune=tune, njobs=2, random_seed=seed_numbers)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... 100%|██████████| 2000/2000 [00:05<00:00, 382.62it/s]
age_labels = ['<1', '1', '2-4']
forestplot(outpatient_age_trace, varnames=['λ'], transform=lambda x: x*rate_factor, ylabels=age_labels)
<matplotlib.gridspec.GridSpec at 0x13a087908>
outpatient_pooled = outpatient_merged.assign(age_group=(outpatient_merged.age_group==3).astype(int))
outpatient_pooled.head()
age_group | year | cases | n | eligible | |
---|---|---|---|---|---|
0 | 0 | 0 | 86 | 1427 | 149 |
1 | 0 | 1 | 95 | 1427 | 150 |
2 | 0 | 2 | 94 | 1427 | 153 |
3 | 0 | 0 | 101 | 1544 | 149 |
4 | 0 | 1 | 90 | 1544 | 129 |
outpatient_age_pooled = outpatient_model(outpatient_pooled)
with outpatient_age_pooled:
outpatient_age_pooled_trace = sample(iterations, tune=tune, njobs=2, random_seed=seed_numbers)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... 100%|██████████| 2000/2000 [00:07<00:00, 267.22it/s]
age_labels_pooled = ['<5', '5+']
forestplot(outpatient_age_pooled_trace, varnames=['λ'], transform=lambda x: x*rate_factor, ylabels=age_labels_pooled)
<matplotlib.gridspec.GridSpec at 0x131f07fd0>
outpatient_age_estimates
mean | hpd_2.5 | hpd_97.5 | |
---|---|---|---|
λ__0 | 145.1 | 132.2 | 158.6 |
λ__1 | 123.7 | 112.1 | 135.6 |
λ__2 | 61.0 | 55.9 | 65.7 |
outpatient_age_estimates = (df_summary(outpatient_age_trace, varnames=['λ'], transform=lambda x: x*rate_factor)
.drop(['sd', 'mc_error', 'n_eff', 'Rhat'], axis=1)
.round(1))
outpatient_age_estimates.index = ['<1', '1', '2-4']
outpatient_age_estimates.columns = 'rate esimate', 'lower 95%', 'upper 95%'
outpatient_age_estimates
rate esimate | lower 95% | upper 95% | |
---|---|---|---|
<1 | 145.1 | 132.2 | 158.6 |
1 | 123.7 | 112.1 | 135.6 |
2-4 | 61.0 | 55.9 | 65.7 |
pooled_estimate = (df_summary(outpatient_age_pooled_trace, varnames=['λ'], transform=lambda x: x*rate_factor)
.drop(['sd', 'mc_error', 'n_eff', 'Rhat'], axis=1)
.round(1))
pooled_estimate.columns = outpatient_age_estimates.columns
pooled_estimate.index = ['<5', '5+']
outpatient_age_estimates.append(pooled_estimate)
rate esimate | lower 95% | upper 95% | |
---|---|---|---|
<1 | 145.1 | 132.2 | 158.6 |
1 | 123.7 | 112.1 | 135.6 |
2-4 | 61.0 | 55.9 | 65.7 |
<5 | 90.6 | 86.0 | 95.2 |
5+ | 32.6 | 30.1 | 35.0 |
outpatient_age_estimates.to_excel('results/outpatient_age.xlsx')
mean_data = (outpatient_under_5.groupby('age_group')[['cases', 'n']].mean()
.rename(columns={'cases':'mean cases', 'n':'mean n'})
.round(1)
.set_index(outpatient_age_estimates.index))
mean_data.join(outpatient_age_estimates)
mean_data.to_excel('results/outpatient_mean_data.xlsx')
collected_stools = (rotavirus_data[rotavirus_data.setting==OUTPATIENT].groupby(['age_group', 'year'])
.agg({'stool_collected':['sum', 'count']}))
levels = collected_stools.columns.levels
labels = collected_stools.columns.labels
collected_stools.columns = ['stool_collected', 'enrolled']
collected_stools = collected_stools.reset_index()
collected_stools
age_group | year | stool_collected | enrolled | |
---|---|---|---|---|
0 | 0 | 0 | 79.0 | 86 |
1 | 0 | 1 | 79.0 | 95 |
2 | 0 | 2 | 85.0 | 94 |
3 | 1 | 0 | 93.0 | 101 |
4 | 1 | 1 | 78.0 | 90 |
5 | 1 | 2 | 73.0 | 85 |
6 | 2 | 0 | 92.0 | 120 |
7 | 2 | 1 | 82.0 | 119 |
8 | 2 | 2 | 76.0 | 116 |
9 | 3 | 0 | 115.0 | 152 |
10 | 3 | 1 | 84.0 | 124 |
11 | 3 | 2 | 66.0 | 92 |
outpatient_n_mean.reset_index()
age_group | n | |
---|---|---|
0 | 0 | 1427 |
1 | 1 | 1544 |
2 | 2 | 4376 |
3 | 3 | 9237 |
All possible combinations of age, year and setting for index.
astro_cases = (rotavirus_data[(rotavirus_data.astrovirus==1) & (rotavirus_data.setting==OUTPATIENT)]
.groupby(['age_group', 'year'])[['stool_collected']].count())
astro_cases.columns = ['cases']
astro_cases = astro_cases.reset_index()
eligible_counts
age_group year 0 0 149 1 150 2 153 1 0 149 1 129 2 139 2 0 204 1 187 2 192 3 0 263 1 211 2 182 Name: eligible, dtype: int64
astro_data = (astro_cases.merge(collected_stools,
on=['age_group', 'year'])
.merge(outpatient_n_mean.reset_index(),
on=['age_group'],
how='left')
.merge(pd.DataFrame(eligible_counts).reset_index(),
on=['age_group', 'year']))
astro_data
age_group | year | cases | stool_collected | enrolled | n | eligible | |
---|---|---|---|---|---|---|---|
0 | 0 | 0 | 2 | 79.0 | 86 | 1427 | 149 |
1 | 0 | 1 | 4 | 79.0 | 95 | 1427 | 150 |
2 | 0 | 2 | 1 | 85.0 | 94 | 1427 | 153 |
3 | 1 | 0 | 8 | 93.0 | 101 | 1544 | 149 |
4 | 1 | 1 | 8 | 78.0 | 90 | 1544 | 129 |
5 | 1 | 2 | 2 | 73.0 | 85 | 1544 | 139 |
6 | 2 | 0 | 12 | 92.0 | 120 | 4376 | 204 |
7 | 2 | 1 | 8 | 82.0 | 119 | 4376 | 187 |
8 | 2 | 2 | 4 | 76.0 | 116 | 4376 | 192 |
9 | 3 | 0 | 6 | 115.0 | 152 | 9237 | 263 |
10 | 3 | 1 | 2 | 84.0 | 124 | 9237 | 211 |
11 | 3 | 2 | 1 | 66.0 | 92 | 9237 | 182 |
sapo_cases = (rotavirus_data[(rotavirus_data.sapovirus==1) & (rotavirus_data.setting==OUTPATIENT)]
.groupby(['age_group', 'year'])[['stool_collected']].count())
sapo_cases.columns = ['cases']
sapo_cases = sapo_cases.reset_index()
sapo_data = (sapo_cases.merge(collected_stools,
on=['age_group', 'year'])
.merge(outpatient_n_mean.reset_index(),
on=['age_group'],
how='left')
.merge(pd.DataFrame(eligible_counts).reset_index(),
on=['age_group', 'year']))
sapo_data.head(10)
age_group | year | cases | stool_collected | enrolled | n | eligible | |
---|---|---|---|---|---|---|---|
0 | 0 | 0 | 4 | 79.0 | 86 | 1427 | 149 |
1 | 0 | 1 | 12 | 79.0 | 95 | 1427 | 150 |
2 | 0 | 2 | 5 | 85.0 | 94 | 1427 | 153 |
3 | 1 | 0 | 17 | 93.0 | 101 | 1544 | 149 |
4 | 1 | 1 | 13 | 78.0 | 90 | 1544 | 129 |
5 | 1 | 2 | 8 | 73.0 | 85 | 1544 | 139 |
6 | 2 | 0 | 10 | 92.0 | 120 | 4376 | 204 |
7 | 2 | 1 | 12 | 82.0 | 119 | 4376 | 187 |
8 | 2 | 2 | 4 | 76.0 | 116 | 4376 | 192 |
9 | 3 | 0 | 10 | 115.0 | 152 | 9237 | 263 |
rota_cases = (rotavirus_data[(rotavirus_data.rotavirus==1) & (rotavirus_data.setting==OUTPATIENT)]
.groupby(['age_group', 'year'])[['stool_collected']].count())
rota_cases.columns = ['cases']
rota_cases = rota_cases.reset_index()
rota_data = (rota_cases.merge(collected_stools,
on=['age_group', 'year'])
.merge(outpatient_n_mean.reset_index(),
on=['age_group'],
how='left')
.merge(pd.DataFrame(eligible_counts).reset_index(),
on=['age_group', 'year']))
rota_data.head(10)
age_group | year | cases | stool_collected | enrolled | n | eligible | |
---|---|---|---|---|---|---|---|
0 | 0 | 0 | 9 | 79.0 | 86 | 1427 | 149 |
1 | 0 | 1 | 4 | 79.0 | 95 | 1427 | 150 |
2 | 0 | 2 | 3 | 85.0 | 94 | 1427 | 153 |
3 | 1 | 0 | 11 | 93.0 | 101 | 1544 | 149 |
4 | 1 | 1 | 9 | 78.0 | 90 | 1544 | 129 |
5 | 1 | 2 | 10 | 73.0 | 85 | 1544 | 139 |
6 | 2 | 0 | 13 | 92.0 | 120 | 4376 | 204 |
7 | 2 | 1 | 3 | 82.0 | 119 | 4376 | 187 |
8 | 2 | 2 | 8 | 76.0 | 116 | 4376 | 192 |
9 | 3 | 0 | 6 | 115.0 | 152 | 9237 | 263 |
noro_cases = (rotavirus_data[(rotavirus_data.norovirus==1) & (rotavirus_data.setting==OUTPATIENT)]
.groupby(['age_group', 'year'])[['stool_collected']].count())
noro_cases.columns = ['cases']
noro_cases = noro_cases.reset_index()
noro_data = (noro_cases.merge(collected_stools,
on=['age_group', 'year'])
.merge(outpatient_n_mean.reset_index(),
on=['age_group'],
how='left')
.merge(pd.DataFrame(eligible_counts).reset_index(),
on=['age_group', 'year']))
noro_data.head(10)
age_group | year | cases | stool_collected | enrolled | n | eligible | |
---|---|---|---|---|---|---|---|
0 | 0 | 0 | 16 | 79.0 | 86 | 1427 | 149 |
1 | 0 | 1 | 22 | 79.0 | 95 | 1427 | 150 |
2 | 0 | 2 | 29 | 85.0 | 94 | 1427 | 153 |
3 | 1 | 0 | 18 | 93.0 | 101 | 1544 | 149 |
4 | 1 | 1 | 21 | 78.0 | 90 | 1544 | 129 |
5 | 1 | 2 | 26 | 73.0 | 85 | 1544 | 139 |
6 | 2 | 0 | 16 | 92.0 | 120 | 4376 | 204 |
7 | 2 | 1 | 18 | 82.0 | 119 | 4376 | 187 |
8 | 2 | 2 | 20 | 76.0 | 116 | 4376 | 192 |
9 | 3 | 0 | 16 | 115.0 | 152 | 9237 | 263 |
virus_totals = pd.concat([astro_data.assign(virus='astrovirus'),
noro_data.assign(virus='norovirus'),
rota_data.assign(virus='rotavirus'),
sapo_data.assign(virus='sapovirus')]).reset_index()
virus_totals['cases'] = virus_totals.cases.astype(int)
virus_totals['n'] = virus_totals.n.astype(int)
virus_lookup = {0:'norovirus',
1:'rotavirus',
2:'sapovirus',
3:'astrovirus'}
virus_outpatient_data = (virus_totals.replace({'virus': virus_lookup}))
cases, enrolled, n = virus_outpatient_data[['cases', 'enrolled', 'n']].values.T
virus_outpatient_under_5 = virus_outpatient_data[virus_outpatient_data.age_group<3].drop('index', axis=1)
virus_outpatient_pooled = virus_outpatient_under_5.groupby(['virus', 'year']).agg({'cases':sum, 'stool_collected':sum,
'enrolled':sum, 'n':sum, 'eligible':sum}).reset_index()
virus_outpatient_pooled['age_group'] = 3
virus_outpatient_pooled[virus_outpatient_under_5.columns]
age_group | year | cases | stool_collected | enrolled | n | eligible | virus | |
---|---|---|---|---|---|---|---|---|
0 | 3 | 0 | 22 | 264.0 | 307 | 7347 | 502 | astrovirus |
1 | 3 | 1 | 20 | 239.0 | 304 | 7347 | 466 | astrovirus |
2 | 3 | 2 | 7 | 234.0 | 295 | 7347 | 484 | astrovirus |
3 | 3 | 0 | 50 | 264.0 | 307 | 7347 | 502 | norovirus |
4 | 3 | 1 | 61 | 239.0 | 304 | 7347 | 466 | norovirus |
5 | 3 | 2 | 75 | 234.0 | 295 | 7347 | 484 | norovirus |
6 | 3 | 0 | 33 | 264.0 | 307 | 7347 | 502 | rotavirus |
7 | 3 | 1 | 16 | 239.0 | 304 | 7347 | 466 | rotavirus |
8 | 3 | 2 | 21 | 234.0 | 295 | 7347 | 484 | rotavirus |
9 | 3 | 0 | 31 | 264.0 | 307 | 7347 | 502 | sapovirus |
10 | 3 | 1 | 37 | 239.0 | 304 | 7347 | 466 | sapovirus |
11 | 3 | 2 | 17 | 234.0 | 295 | 7347 | 484 | sapovirus |
virus_outpatient_5_over = (virus_outpatient_data[virus_outpatient_data.age_group==3]
.drop('index', axis=1)
.groupby(['virus', 'year'])
.agg({'cases':sum, 'stool_collected':sum,
'enrolled':sum, 'n':sum, 'eligible':sum})
.reset_index())
virus_outpatient_5_over['age_group'] = 4
virus_outpatient_5_over[virus_outpatient_under_5.columns]
age_group | year | cases | stool_collected | enrolled | n | eligible | virus | |
---|---|---|---|---|---|---|---|---|
0 | 4 | 0 | 6 | 115.0 | 152 | 9237 | 263 | astrovirus |
1 | 4 | 1 | 2 | 84.0 | 124 | 9237 | 211 | astrovirus |
2 | 4 | 2 | 1 | 66.0 | 92 | 9237 | 182 | astrovirus |
3 | 4 | 0 | 16 | 115.0 | 152 | 9237 | 263 | norovirus |
4 | 4 | 1 | 15 | 84.0 | 124 | 9237 | 211 | norovirus |
5 | 4 | 2 | 20 | 66.0 | 92 | 9237 | 182 | norovirus |
6 | 4 | 0 | 6 | 115.0 | 152 | 9237 | 263 | rotavirus |
7 | 4 | 1 | 3 | 84.0 | 124 | 9237 | 211 | rotavirus |
8 | 4 | 2 | 4 | 66.0 | 92 | 9237 | 182 | rotavirus |
9 | 4 | 0 | 10 | 115.0 | 152 | 9237 | 263 | sapovirus |
10 | 4 | 1 | 8 | 84.0 | 124 | 9237 | 211 | sapovirus |
11 | 4 | 2 | 2 | 66.0 | 92 | 9237 | 182 | sapovirus |
virus_outpatient_merged = pd.concat([virus_outpatient_under_5,
virus_outpatient_pooled[virus_outpatient_under_5.columns],
virus_outpatient_5_over[virus_outpatient_under_5.columns]])
with Model() as virus_outpatient_model:
groups = virus_outpatient_merged.shape[0]
# Probability of stool collection
ψ = Beta('ψ', 1, 1)
outpatient_enrolled = (rotavirus_data.setting==OUTPATIENT).sum()
collected = rotavirus_data[rotavirus_data.setting==OUTPATIENT].stool_collected.sum()
stool = Binomial('stool', n=outpatient_enrolled, p=ψ, observed=collected)
cases, enrolled, n, eligible = virus_outpatient_merged[['cases', 'enrolled', 'n', 'eligible']].values.T
# Adjust for enrollment
enrollment_rate = Beta('p_enrolled', 1, 1, shape=groups)
enrolled_cases = Binomial('enrolled_cases', eligible, enrollment_rate, observed=enrolled)
eligible_cases = Uniform('eligible_cases', lower=cases, upper=eligible,
shape=groups)
p = monitoring_rate[OUTPATIENT]*ψ*enrollment_rate
# Estimate total VUMC cases in setting
eligible_cases_likelihood = Binomial('eligible_cases_likelihood',
n=eligible_cases,
p=p,
observed=cases)
π = Beta('π', 1, 10, shape=groups)
# Data likeihood
virus_out_obs = Potential('virus_out_obs',
Binomial.dist(n=n, p=π).logp(eligible_cases).sum())
with virus_outpatient_model:
virus_outpatient_trace = sample(iterations, tune=tune,
njobs=2, random_seed=seed_numbers)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... 100%|██████████| 2000/2000 [00:17<00:00, 116.75it/s]
MCMC diagnostic plot
energyplot(virus_outpatient_trace)
<matplotlib.axes._subplots.AxesSubplot at 0x13ad75668>
Posterior distribution of stool sampling rate
traceplot(virus_outpatient_trace, varnames=['ψ'])
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x1379f87b8>, <matplotlib.axes._subplots.AxesSubplot object at 0x137a310b8>]], dtype=object)
year_labels = ['2012/13','2013/14','2014/15']
pd.DataFrame(list(itertools.product(age_labels + ['<5', '5+'], year_labels)), columns=['age', 'year'])
age | year | |
---|---|---|
0 | <5 | 2012/13 |
1 | <5 | 2013/14 |
2 | <5 | 2014/15 |
3 | 5+ | 2012/13 |
4 | 5+ | 2013/14 |
5 | 5+ | 2014/15 |
6 | <5 | 2012/13 |
7 | <5 | 2013/14 |
8 | <5 | 2014/15 |
9 | 5+ | 2012/13 |
10 | 5+ | 2013/14 |
11 | 5+ | 2014/15 |
outpatient_sample_melted = pd.melt(pd.DataFrame(virus_outpatient_trace['π']))
outpatient_virus_samples = (virus_outpatient_merged.drop(['enrolled', 'stool_collected', 'cases'],
axis=1).reset_index(drop=True)
.merge(outpatient_sample_melted, left_index=True, right_on='variable')
.replace({'age_group':{0:'<1', 1:'1', 2:'2-4', 3:'<5', 4:'5+'},
'year':{0:'2012/13', 1:'2013/14', 2:'2014/15'},
'virus':virus_lookup})
.rename(columns={'age_group':'age group'}))
outpatient_rateplot = sns.factorplot(x="year", y="rate", hue="age group", col='virus',
data=outpatient_virus_samples.assign(rate=outpatient_virus_samples.value*rate_factor),
palette="Blues", size=5, width=0.5, kind='box', linewidth=0.6, col_wrap=2,
fliersize=0)
outpatient_rateplot.despine(left=True)
<seaborn.axisgrid.FacetGrid at 0x12f2a7b38>
outpatient_virus_labels = (virus_outpatient_merged.drop(['enrolled', 'n', 'stool_collected', 'cases', 'eligible'],
axis=1).reset_index(drop=True)
.replace({'year':{0:'2012/13', 1:'2013/14', 2:'2014/15'},
'virus':virus_lookup})
.rename(columns={'age_group':'age group'}))
virus_outpatient_table = generate_table(virus_outpatient_trace, outpatient_virus_labels,
index=['age group', 'year'], varnames=['π'],
columns=['virus']).sort_index()
virus_outpatient_table.to_excel('results/virus_outpatient_table.xlsx')
virus_outpatient_table
virus | astrovirus | norovirus | rotavirus | sapovirus | |
---|---|---|---|---|---|
age group | year | ||||
0 | 2012/13 | 6.38 (0.83, 13.40) | 36.13 (20.62, 54.60) | 21.27 (8.97, 34.35) | 10.66 (3.35, 20.62) |
2013/14 | 9.72 (2.46, 18.49) | 44.44 (27.15, 62.63) | 9.85 (2.31, 18.54) | 25.30 (12.59, 39.93) | |
2014/15 | 4.16 (0.33, 10.01) | 60.31 (37.80, 83.68) | 8.08 (1.78, 16.27) | 11.92 (3.68, 21.96) | |
1 | 2012/13 | 15.23 (5.91, 25.60) | 32.15 (17.22, 47.45) | 20.13 (9.33, 31.95) | 30.39 (16.16, 44.69) |
2013/14 | 14.82 (5.95, 24.78) | 36.00 (22.01, 52.57) | 16.42 (7.33, 26.62) | 22.93 (11.78, 36.72) | |
2014/15 | 5.65 (0.50, 11.85) | 50.22 (32.06, 71.74) | 20.44 (9.05, 32.96) | 16.83 (6.13, 28.55) | |
2 | 2012/13 | 8.96 (4.64, 13.77) | 11.72 (6.35, 17.79) | 9.63 (5.06, 14.90) | 7.56 (3.05, 12.01) |
2013/14 | 5.76 (2.34, 9.58) | 12.10 (7.25, 18.22) | 2.55 (0.49, 5.14) | 8.26 (3.85, 12.47) | |
2014/15 | 3.37 (0.70, 6.20) | 14.15 (8.65, 20.97) | 6.05 (2.39, 10.17) | 3.38 (0.91, 6.52) | |
3 | 2012/13 | 8.99 (5.32, 12.84) | 19.97 (14.90, 25.69) | 13.43 (9.05, 18.24) | 12.60 (8.29, 16.97) |
2013/14 | 7.76 (4.71, 11.20) | 23.02 (17.17, 29.29) | 6.28 (3.47, 9.33) | 14.02 (9.72, 18.40) | |
2014/15 | 3.18 (1.09, 5.39) | 29.89 (23.62, 37.16) | 8.71 (5.36, 12.62) | 7.15 (4.24, 10.96) | |
4 | 2012/13 | 2.36 (0.74, 4.03) | 5.66 (2.94, 8.39) | 2.34 (0.93, 4.16) | 3.66 (1.55, 5.80) |
2013/14 | 1.00 (0.14, 2.13) | 5.20 (2.93, 8.00) | 1.32 (0.29, 2.59) | 2.94 (1.29, 5.04) | |
2014/15 | 0.79 (0.05, 1.88) | 8.04 (4.49, 11.54) | 1.89 (0.58, 3.66) | 1.17 (0.15, 2.49) |
virus_outpatient_table.index = virus_outpatient_table.index.set_levels(age_labels + ['<5', '5+'], level=0)
virus_outpatient_table
virus | astrovirus | norovirus | rotavirus | sapovirus | |
---|---|---|---|---|---|
age group | year | ||||
<1 | 2012/13 | 6.38 (0.83, 13.40) | 36.13 (20.62, 54.60) | 21.27 (8.97, 34.35) | 10.66 (3.35, 20.62) |
2013/14 | 9.72 (2.46, 18.49) | 44.44 (27.15, 62.63) | 9.85 (2.31, 18.54) | 25.30 (12.59, 39.93) | |
2014/15 | 4.16 (0.33, 10.01) | 60.31 (37.80, 83.68) | 8.08 (1.78, 16.27) | 11.92 (3.68, 21.96) | |
1 | 2012/13 | 15.23 (5.91, 25.60) | 32.15 (17.22, 47.45) | 20.13 (9.33, 31.95) | 30.39 (16.16, 44.69) |
2013/14 | 14.82 (5.95, 24.78) | 36.00 (22.01, 52.57) | 16.42 (7.33, 26.62) | 22.93 (11.78, 36.72) | |
2014/15 | 5.65 (0.50, 11.85) | 50.22 (32.06, 71.74) | 20.44 (9.05, 32.96) | 16.83 (6.13, 28.55) | |
2-4 | 2012/13 | 8.96 (4.64, 13.77) | 11.72 (6.35, 17.79) | 9.63 (5.06, 14.90) | 7.56 (3.05, 12.01) |
2013/14 | 5.76 (2.34, 9.58) | 12.10 (7.25, 18.22) | 2.55 (0.49, 5.14) | 8.26 (3.85, 12.47) | |
2014/15 | 3.37 (0.70, 6.20) | 14.15 (8.65, 20.97) | 6.05 (2.39, 10.17) | 3.38 (0.91, 6.52) | |
<5 | 2012/13 | 8.99 (5.32, 12.84) | 19.97 (14.90, 25.69) | 13.43 (9.05, 18.24) | 12.60 (8.29, 16.97) |
2013/14 | 7.76 (4.71, 11.20) | 23.02 (17.17, 29.29) | 6.28 (3.47, 9.33) | 14.02 (9.72, 18.40) | |
2014/15 | 3.18 (1.09, 5.39) | 29.89 (23.62, 37.16) | 8.71 (5.36, 12.62) | 7.15 (4.24, 10.96) | |
5+ | 2012/13 | 2.36 (0.74, 4.03) | 5.66 (2.94, 8.39) | 2.34 (0.93, 4.16) | 3.66 (1.55, 5.80) |
2013/14 | 1.00 (0.14, 2.13) | 5.20 (2.93, 8.00) | 1.32 (0.29, 2.59) | 2.94 (1.29, 5.04) | |
2014/15 | 0.79 (0.05, 1.88) | 8.04 (4.49, 11.54) | 1.89 (0.58, 3.66) | 1.17 (0.15, 2.49) |
virus_outpatient_table[['astrovirus']]
virus | astrovirus | |
---|---|---|
age group | year | |
<1 | 2012/13 | 6.38 (0.83, 13.40) |
2013/14 | 9.72 (2.46, 18.49) | |
2014/15 | 4.16 (0.33, 10.01) | |
1 | 2012/13 | 15.23 (5.91, 25.60) |
2013/14 | 14.82 (5.95, 24.78) | |
2014/15 | 5.65 (0.50, 11.85) | |
2-4 | 2012/13 | 8.96 (4.64, 13.77) |
2013/14 | 5.76 (2.34, 9.58) | |
2014/15 | 3.37 (0.70, 6.20) | |
<5 | 2012/13 | 8.99 (5.32, 12.84) |
2013/14 | 7.76 (4.71, 11.20) | |
2014/15 | 3.18 (1.09, 5.39) | |
5+ | 2012/13 | 2.36 (0.74, 4.03) |
2013/14 | 1.00 (0.14, 2.13) | |
2014/15 | 0.79 (0.05, 1.88) |
def create_virus_table(virus_name):
estimates = virus_outpatient_table[[virus_name]].rename(columns={virus_name:'estimate'})
virus_data = (virus_outpatient_merged[virus_outpatient_merged.virus==virus_name]
.set_index(['age_group', 'year']) [['cases', 'stool_collected', 'enrolled', 'n']]
.set_index(estimates.index))
return estimates.join(virus_data)
astro_table = create_virus_table('astrovirus')
astro_table.to_excel('results/outpatient_astro.xlsx')
astro_table
estimate | cases | stool_collected | enrolled | n | ||
---|---|---|---|---|---|---|
age group | year | |||||
<1 | 2012/13 | 6.38 (0.83, 13.40) | 2 | 79.0 | 86 | 1427 |
2013/14 | 9.72 (2.46, 18.49) | 4 | 79.0 | 95 | 1427 | |
2014/15 | 4.16 (0.33, 10.01) | 1 | 85.0 | 94 | 1427 | |
1 | 2012/13 | 15.23 (5.91, 25.60) | 8 | 93.0 | 101 | 1544 |
2013/14 | 14.82 (5.95, 24.78) | 8 | 78.0 | 90 | 1544 | |
2014/15 | 5.65 (0.50, 11.85) | 2 | 73.0 | 85 | 1544 | |
2-4 | 2012/13 | 8.96 (4.64, 13.77) | 12 | 92.0 | 120 | 4376 |
2013/14 | 5.76 (2.34, 9.58) | 8 | 82.0 | 119 | 4376 | |
2014/15 | 3.37 (0.70, 6.20) | 4 | 76.0 | 116 | 4376 | |
<5 | 2012/13 | 8.99 (5.32, 12.84) | 22 | 264.0 | 307 | 7347 |
2013/14 | 7.76 (4.71, 11.20) | 20 | 239.0 | 304 | 7347 | |
2014/15 | 3.18 (1.09, 5.39) | 7 | 234.0 | 295 | 7347 | |
5+ | 2012/13 | 2.36 (0.74, 4.03) | 6 | 115.0 | 152 | 9237 |
2013/14 | 1.00 (0.14, 2.13) | 2 | 84.0 | 124 | 9237 | |
2014/15 | 0.79 (0.05, 1.88) | 1 | 66.0 | 92 | 9237 |
noro_table = create_virus_table('norovirus')
noro_table.to_excel('results/outpatient_noro.xlsx')
noro_table
estimate | cases | stool_collected | enrolled | n | ||
---|---|---|---|---|---|---|
age group | year | |||||
<1 | 2012/13 | 36.13 (20.62, 54.60) | 16 | 79.0 | 86 | 1427 |
2013/14 | 44.44 (27.15, 62.63) | 22 | 79.0 | 95 | 1427 | |
2014/15 | 60.31 (37.80, 83.68) | 29 | 85.0 | 94 | 1427 | |
1 | 2012/13 | 32.15 (17.22, 47.45) | 18 | 93.0 | 101 | 1544 |
2013/14 | 36.00 (22.01, 52.57) | 21 | 78.0 | 90 | 1544 | |
2014/15 | 50.22 (32.06, 71.74) | 26 | 73.0 | 85 | 1544 | |
2-4 | 2012/13 | 11.72 (6.35, 17.79) | 16 | 92.0 | 120 | 4376 |
2013/14 | 12.10 (7.25, 18.22) | 18 | 82.0 | 119 | 4376 | |
2014/15 | 14.15 (8.65, 20.97) | 20 | 76.0 | 116 | 4376 | |
<5 | 2012/13 | 19.97 (14.90, 25.69) | 50 | 264.0 | 307 | 7347 |
2013/14 | 23.02 (17.17, 29.29) | 61 | 239.0 | 304 | 7347 | |
2014/15 | 29.89 (23.62, 37.16) | 75 | 234.0 | 295 | 7347 | |
5+ | 2012/13 | 5.66 (2.94, 8.39) | 16 | 115.0 | 152 | 9237 |
2013/14 | 5.20 (2.93, 8.00) | 15 | 84.0 | 124 | 9237 | |
2014/15 | 8.04 (4.49, 11.54) | 20 | 66.0 | 92 | 9237 |
sapo_table = create_virus_table('sapovirus')
sapo_table.to_excel('results/outpatient_sapo.xlsx')
sapo_table
estimate | cases | stool_collected | enrolled | n | ||
---|---|---|---|---|---|---|
age group | year | |||||
<1 | 2012/13 | 10.66 (3.35, 20.62) | 4 | 79.0 | 86 | 1427 |
2013/14 | 25.30 (12.59, 39.93) | 12 | 79.0 | 95 | 1427 | |
2014/15 | 11.92 (3.68, 21.96) | 5 | 85.0 | 94 | 1427 | |
1 | 2012/13 | 30.39 (16.16, 44.69) | 17 | 93.0 | 101 | 1544 |
2013/14 | 22.93 (11.78, 36.72) | 13 | 78.0 | 90 | 1544 | |
2014/15 | 16.83 (6.13, 28.55) | 8 | 73.0 | 85 | 1544 | |
2-4 | 2012/13 | 7.56 (3.05, 12.01) | 10 | 92.0 | 120 | 4376 |
2013/14 | 8.26 (3.85, 12.47) | 12 | 82.0 | 119 | 4376 | |
2014/15 | 3.38 (0.91, 6.52) | 4 | 76.0 | 116 | 4376 | |
<5 | 2012/13 | 12.60 (8.29, 16.97) | 31 | 264.0 | 307 | 7347 |
2013/14 | 14.02 (9.72, 18.40) | 37 | 239.0 | 304 | 7347 | |
2014/15 | 7.15 (4.24, 10.96) | 17 | 234.0 | 295 | 7347 | |
5+ | 2012/13 | 3.66 (1.55, 5.80) | 10 | 115.0 | 152 | 9237 |
2013/14 | 2.94 (1.29, 5.04) | 8 | 84.0 | 124 | 9237 | |
2014/15 | 1.17 (0.15, 2.49) | 2 | 66.0 | 92 | 9237 |
rota_table = create_virus_table('rotavirus')
rota_table.to_excel('results/outpatient_rota.xlsx')
rota_table
estimate | cases | stool_collected | enrolled | n | ||
---|---|---|---|---|---|---|
age group | year | |||||
<1 | 2012/13 | 21.27 (8.97, 34.35) | 9 | 79.0 | 86 | 1427 |
2013/14 | 9.85 (2.31, 18.54) | 4 | 79.0 | 95 | 1427 | |
2014/15 | 8.08 (1.78, 16.27) | 3 | 85.0 | 94 | 1427 | |
1 | 2012/13 | 20.13 (9.33, 31.95) | 11 | 93.0 | 101 | 1544 |
2013/14 | 16.42 (7.33, 26.62) | 9 | 78.0 | 90 | 1544 | |
2014/15 | 20.44 (9.05, 32.96) | 10 | 73.0 | 85 | 1544 | |
2-4 | 2012/13 | 9.63 (5.06, 14.90) | 13 | 92.0 | 120 | 4376 |
2013/14 | 2.55 (0.49, 5.14) | 3 | 82.0 | 119 | 4376 | |
2014/15 | 6.05 (2.39, 10.17) | 8 | 76.0 | 116 | 4376 | |
<5 | 2012/13 | 13.43 (9.05, 18.24) | 33 | 264.0 | 307 | 7347 |
2013/14 | 6.28 (3.47, 9.33) | 16 | 239.0 | 304 | 7347 | |
2014/15 | 8.71 (5.36, 12.62) | 21 | 234.0 | 295 | 7347 | |
5+ | 2012/13 | 2.34 (0.93, 4.16) | 6 | 115.0 | 152 | 9237 |
2013/14 | 1.32 (0.29, 2.59) | 3 | 84.0 | 124 | 9237 | |
2014/15 | 1.89 (0.58, 3.66) | 4 | 66.0 | 92 | 9237 |
virus_outpatient_merged
age_group | year | cases | stool_collected | enrolled | n | eligible | virus | |
---|---|---|---|---|---|---|---|---|
0 | 0 | 0 | 2 | 79.0 | 86 | 1427 | 149 | astrovirus |
1 | 0 | 1 | 4 | 79.0 | 95 | 1427 | 150 | astrovirus |
2 | 0 | 2 | 1 | 85.0 | 94 | 1427 | 153 | astrovirus |
3 | 1 | 0 | 8 | 93.0 | 101 | 1544 | 149 | astrovirus |
4 | 1 | 1 | 8 | 78.0 | 90 | 1544 | 129 | astrovirus |
5 | 1 | 2 | 2 | 73.0 | 85 | 1544 | 139 | astrovirus |
6 | 2 | 0 | 12 | 92.0 | 120 | 4376 | 204 | astrovirus |
7 | 2 | 1 | 8 | 82.0 | 119 | 4376 | 187 | astrovirus |
8 | 2 | 2 | 4 | 76.0 | 116 | 4376 | 192 | astrovirus |
12 | 0 | 0 | 16 | 79.0 | 86 | 1427 | 149 | norovirus |
13 | 0 | 1 | 22 | 79.0 | 95 | 1427 | 150 | norovirus |
14 | 0 | 2 | 29 | 85.0 | 94 | 1427 | 153 | norovirus |
15 | 1 | 0 | 18 | 93.0 | 101 | 1544 | 149 | norovirus |
16 | 1 | 1 | 21 | 78.0 | 90 | 1544 | 129 | norovirus |
17 | 1 | 2 | 26 | 73.0 | 85 | 1544 | 139 | norovirus |
18 | 2 | 0 | 16 | 92.0 | 120 | 4376 | 204 | norovirus |
19 | 2 | 1 | 18 | 82.0 | 119 | 4376 | 187 | norovirus |
20 | 2 | 2 | 20 | 76.0 | 116 | 4376 | 192 | norovirus |
24 | 0 | 0 | 9 | 79.0 | 86 | 1427 | 149 | rotavirus |
25 | 0 | 1 | 4 | 79.0 | 95 | 1427 | 150 | rotavirus |
26 | 0 | 2 | 3 | 85.0 | 94 | 1427 | 153 | rotavirus |
27 | 1 | 0 | 11 | 93.0 | 101 | 1544 | 149 | rotavirus |
28 | 1 | 1 | 9 | 78.0 | 90 | 1544 | 129 | rotavirus |
29 | 1 | 2 | 10 | 73.0 | 85 | 1544 | 139 | rotavirus |
30 | 2 | 0 | 13 | 92.0 | 120 | 4376 | 204 | rotavirus |
31 | 2 | 1 | 3 | 82.0 | 119 | 4376 | 187 | rotavirus |
32 | 2 | 2 | 8 | 76.0 | 116 | 4376 | 192 | rotavirus |
36 | 0 | 0 | 4 | 79.0 | 86 | 1427 | 149 | sapovirus |
37 | 0 | 1 | 12 | 79.0 | 95 | 1427 | 150 | sapovirus |
38 | 0 | 2 | 5 | 85.0 | 94 | 1427 | 153 | sapovirus |
39 | 1 | 0 | 17 | 93.0 | 101 | 1544 | 149 | sapovirus |
40 | 1 | 1 | 13 | 78.0 | 90 | 1544 | 129 | sapovirus |
41 | 1 | 2 | 8 | 73.0 | 85 | 1544 | 139 | sapovirus |
42 | 2 | 0 | 10 | 92.0 | 120 | 4376 | 204 | sapovirus |
43 | 2 | 1 | 12 | 82.0 | 119 | 4376 | 187 | sapovirus |
44 | 2 | 2 | 4 | 76.0 | 116 | 4376 | 192 | sapovirus |
0 | 3 | 0 | 22 | 264.0 | 307 | 7347 | 502 | astrovirus |
1 | 3 | 1 | 20 | 239.0 | 304 | 7347 | 466 | astrovirus |
2 | 3 | 2 | 7 | 234.0 | 295 | 7347 | 484 | astrovirus |
3 | 3 | 0 | 50 | 264.0 | 307 | 7347 | 502 | norovirus |
4 | 3 | 1 | 61 | 239.0 | 304 | 7347 | 466 | norovirus |
5 | 3 | 2 | 75 | 234.0 | 295 | 7347 | 484 | norovirus |
6 | 3 | 0 | 33 | 264.0 | 307 | 7347 | 502 | rotavirus |
7 | 3 | 1 | 16 | 239.0 | 304 | 7347 | 466 | rotavirus |
8 | 3 | 2 | 21 | 234.0 | 295 | 7347 | 484 | rotavirus |
9 | 3 | 0 | 31 | 264.0 | 307 | 7347 | 502 | sapovirus |
10 | 3 | 1 | 37 | 239.0 | 304 | 7347 | 466 | sapovirus |
11 | 3 | 2 | 17 | 234.0 | 295 | 7347 | 484 | sapovirus |
0 | 4 | 0 | 6 | 115.0 | 152 | 9237 | 263 | astrovirus |
1 | 4 | 1 | 2 | 84.0 | 124 | 9237 | 211 | astrovirus |
2 | 4 | 2 | 1 | 66.0 | 92 | 9237 | 182 | astrovirus |
3 | 4 | 0 | 16 | 115.0 | 152 | 9237 | 263 | norovirus |
4 | 4 | 1 | 15 | 84.0 | 124 | 9237 | 211 | norovirus |
5 | 4 | 2 | 20 | 66.0 | 92 | 9237 | 182 | norovirus |
6 | 4 | 0 | 6 | 115.0 | 152 | 9237 | 263 | rotavirus |
7 | 4 | 1 | 3 | 84.0 | 124 | 9237 | 211 | rotavirus |
8 | 4 | 2 | 4 | 66.0 | 92 | 9237 | 182 | rotavirus |
9 | 4 | 0 | 10 | 115.0 | 152 | 9237 | 263 | sapovirus |
10 | 4 | 1 | 8 | 84.0 | 124 | 9237 | 211 | sapovirus |
11 | 4 | 2 | 2 | 66.0 | 92 | 9237 | 182 | sapovirus |
virus_outpatient_pooled = virus_outpatient_merged.groupby(['age_group', 'virus']).agg({'n':lambda x: max(x)*3,
'cases':sum,
'stool_collected':sum,
'enrolled':sum, 'eligible':sum}).reset_index()
with Model() as virus_pooled_model:
groups = virus_outpatient_pooled.shape[0]
# Probability of stool collection
ψ = Beta('ψ', 1, 1)
outpatient_enrolled = (rotavirus_data.setting==OUTPATIENT).sum()
collected = rotavirus_data[rotavirus_data.setting==OUTPATIENT].stool_collected.sum()
stool = Binomial('stool', n=outpatient_enrolled, p=ψ, observed=collected)
cases, enrolled, n, eligible = virus_outpatient_pooled[['cases', 'enrolled', 'n', 'eligible']].values.T
# Adjust for enrollment
enrollment_rate = Beta('p_enrolled', 1, 1, shape=groups)
enrolled_cases = Binomial('enrolled_cases', eligible, enrollment_rate, observed=enrolled)
eligible_cases = Uniform('eligible_cases', lower=cases, upper=eligible,
shape=groups)
p = monitoring_rate[OUTPATIENT]*ψ*enrollment_rate
# Estimate total VUMC cases in setting
eligible_cases_likelihood = Binomial('eligible_cases_likelihood',
n=eligible_cases,
p=p,
observed=cases)
π = Beta('π', 1, 10, shape=groups)
# Data likeihood
virus_out_obs = Potential('virus_out_obs',
Binomial.dist(n=n, p=π).logp(eligible_cases).sum())
with virus_pooled_model:
virus_outpatient_pooled_trace = sample(iterations, tune=tune,
njobs=2, random_seed=seed_numbers)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... 100%|██████████| 2000/2000 [00:08<00:00, 222.65it/s]
MCMC diagnostic plot
energyplot(virus_outpatient_pooled_trace)
<matplotlib.axes._subplots.AxesSubplot at 0x1392f8208>
outpatient_virus_pooled_labels = (virus_outpatient_pooled.drop(['enrolled', 'n', 'stool_collected', 'cases'],
axis=1).reset_index(drop=True)
.replace({'age_group':{0:'<1', 1:'1', 2:'2-4', 3:'<5', 4:'5+'},
'virus':virus_lookup})
.rename(columns={'age_group':'age group'}))
virus_outpatient_pooled_table = generate_table(virus_outpatient_pooled_trace, outpatient_virus_pooled_labels,
index=['age group'], varnames=['π'],
columns=['virus']).sort_index()
# virus_outpatient_table.to_excel('results/virus_outpatient_table.xlsx')
virus_outpatient_pooled_table
virus | astrovirus | norovirus | rotavirus | sapovirus |
---|---|---|---|---|
age group | ||||
1 | 10.83 (6.19, 16.13) | 37.63 (27.79, 46.75) | 17.56 (10.96, 23.74) | 22.26 (15.27, 29.07) |
2-4 | 5.45 (3.31, 7.72) | 12.09 (8.77, 15.43) | 5.53 (3.48, 7.89) | 5.94 (3.59, 8.22) |
5+ | 1.12 (0.48, 1.78) | 5.89 (4.32, 7.66) | 1.57 (0.74, 2.42) | 2.36 (1.36, 3.37) |
<1 | 5.31 (1.94, 8.83) | 45.72 (34.48, 56.44) | 11.47 (6.32, 16.90) | 14.83 (8.93, 21.35) |
<5 | 6.39 (4.69, 8.20) | 23.82 (20.24, 27.36) | 9.07 (6.87, 11.11) | 10.96 (8.69, 13.36) |
def create_pooled_table(virus_name):
estimates = virus_outpatient_pooled_table[[virus_name]].rename(columns={virus_name:'estimate'})
virus_data = (virus_outpatient_pooled[virus_outpatient_pooled.virus==virus_name]
.set_index(['age_group']) [['cases', 'stool_collected', 'enrolled', 'n']]
.set_index(estimates.index))
return estimates.join(virus_data).loc[['<1', '1', '2-4', '<5', '5+']]
astro_pooled = create_pooled_table('astrovirus')
astro_pooled.to_excel('results/outpatient_astro_pooled.xlsx')
astro_pooled
estimate | cases | stool_collected | enrolled | n | |
---|---|---|---|---|---|
age group | |||||
<1 | 5.31 (1.94, 8.83) | 49 | 737.0 | 906 | 22041 |
1 | 10.83 (6.19, 16.13) | 7 | 243.0 | 275 | 4281 |
2-4 | 5.45 (3.31, 7.72) | 18 | 244.0 | 276 | 4632 |
<5 | 6.39 (4.69, 8.20) | 9 | 265.0 | 368 | 27711 |
5+ | 1.12 (0.48, 1.78) | 24 | 250.0 | 355 | 13128 |
noro_pooled = create_pooled_table('norovirus')
noro_pooled.to_excel('results/outpatient_noro_pooled.xlsx')
noro_pooled
estimate | cases | stool_collected | enrolled | n | |
---|---|---|---|---|---|
age group | |||||
<1 | 45.72 (34.48, 56.44) | 186 | 737.0 | 906 | 22041 |
1 | 37.63 (27.79, 46.75) | 67 | 243.0 | 275 | 4281 |
2-4 | 12.09 (8.77, 15.43) | 65 | 244.0 | 276 | 4632 |
<5 | 23.82 (20.24, 27.36) | 51 | 265.0 | 368 | 27711 |
5+ | 5.89 (4.32, 7.66) | 54 | 250.0 | 355 | 13128 |
sapo_pooled = create_pooled_table('sapovirus')
sapo_pooled.to_excel('results/outpatient_sapo_pooled.xlsx')
sapo_pooled
estimate | cases | stool_collected | enrolled | n | |
---|---|---|---|---|---|
age group | |||||
<1 | 14.83 (8.93, 21.35) | 85 | 737.0 | 906 | 22041 |
1 | 22.26 (15.27, 29.07) | 21 | 243.0 | 275 | 4281 |
2-4 | 5.94 (3.59, 8.22) | 38 | 244.0 | 276 | 4632 |
<5 | 10.96 (8.69, 13.36) | 20 | 265.0 | 368 | 27711 |
5+ | 2.36 (1.36, 3.37) | 26 | 250.0 | 355 | 13128 |
rota_pooled = create_pooled_table('rotavirus')
rota_pooled.to_excel('results/outpatient_rota_pooled.xlsx')
rota_pooled
estimate | cases | stool_collected | enrolled | n | |
---|---|---|---|---|---|
age group | |||||
<1 | 11.47 (6.32, 16.90) | 70 | 737.0 | 906 | 22041 |
1 | 17.56 (10.96, 23.74) | 16 | 243.0 | 275 | 4281 |
2-4 | 5.53 (3.48, 7.89) | 30 | 244.0 | 276 | 4632 |
<5 | 9.07 (6.87, 11.11) | 13 | 265.0 | 368 | 27711 |
5+ | 1.57 (0.74, 2.42) | 24 | 250.0 | 355 | 13128 |