#!/usr/bin/env python # coding: utf-8 # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') import numpy as np import pandas as pd import seaborn as sns import matplotlib.pylab as plt import itertools sns.set_context('notebook') seed_numbers = 20090425, 19700903 from datetime import timedelta, datetime # # Data import and cleanup # In[2]: # rotavirus_data = pd.read_csv('data/AGE Year 2-4 with Final Rota.csv', low_memory=False) cases_file = 'data/AGE Cases and Healthy Controls Merged with Lab Results Year 2 to 4 v05.23.2017.csv' rotavirus_data = pd.read_csv(cases_file, low_memory=False, parse_dates=['scrdate', 'dob']) rotavirus_data.index.is_unique # In[3]: rotavirus_data.head() # In[4]: rotavirus_data.query('year==3').sort_values('scrdate').scrdate # Import eligibles # In[5]: eligible_file = 'data/AGE Outpatient Eligibility Data Year 2-4.csv' eligible_data = pd.read_csv(eligible_file, low_memory=False, index_col=0, parse_dates=['scrdate']) eligible_data.index.is_unique # In[6]: eligible_data.head() # In[7]: eligible_data.scrdate.min(), eligible_data.scrdate.max() # In[8]: eligible_data['year'] = 1 eligible_data.loc[eligible_data.scrdate < datetime(2013,12,1), 'year'] = 0 eligible_data.loc[eligible_data.scrdate >= datetime(2014,12,1), 'year'] = 2 # In[9]: eligible_data['under_5'] = ((eligible_data.agemonths) < 60).astype(int) eligible_data['under_2'] = ((eligible_data.agemonths) < 24).astype(int) eligible_data['under_1'] = ((eligible_data.agemonths) < 12).astype(int) # In[10]: eligible_data['age_group'] = 3 eligible_data.loc[eligible_data.under_5==1, 'age_group'] = 2 eligible_data.loc[eligible_data.under_2==1, 'age_group'] = 1 eligible_data.loc[eligible_data.under_1==1, 'age_group'] = 0 # In[11]: eligible_counts = eligible_data.groupby(['age_group', 'year']).size() eligible_counts.name = 'eligible' eligible_counts # In[12]: eligible_data.shape # Rename columns and recode data # In[13]: rotavirus_data.race.value_counts() # In[14]: rotavirus_data['white'] = rotavirus_data['race'] == 1 rotavirus_data['black'] = rotavirus_data['race'] == 2 # In[15]: rotavirus_data['astro'].value_counts().index # In[16]: rotavirus_data=rotavirus_data.rename(columns={'rotacdc':'rotavirus', 'sapo':'sapovirus', 'astro':'astrovirus', 'noro':'norovirus'}) # In[17]: rotavirus_data.rotavirus.value_counts() # In[18]: rotavirus_data.rotavu.value_counts() # In[19]: virus_cols = ['rotavirus','sapovirus','astrovirus','norovirus'] # Proportion of test results missing # In[20]: rotavirus_data[virus_cols].isnull().mean() # In[21]: astro_data = rotavirus_data[rotavirus_data.astrovirus==1].dropna(subset=['astro_ct']) astro_data.shape # In[22]: noro_data = rotavirus_data[rotavirus_data.norovirus==1].dropna(subset=['noro_ct']) noro_data.shape # In[23]: noro_data['coinfect'] = noro_data[['rotavirus', 'sapovirus', 'astrovirus']].sum(1) # In[24]: astro_data['coinfect'] = astro_data[['rotavirus', 'sapovirus', 'norovirus']].sum(1) # In[25]: with_coinf = noro_data.noro_ct[noro_data.coinfect==1].astype(float) no_coinf = noro_data.noro_ct[(noro_data.coinfect==0)].astype(float) # with_coinf = astro_data.astro_ct[astro_data.coinfect==1].astype(float) # no_coinf = astro_data.astro_ct[(astro_data.coinfect==0)].astype(float) # In[26]: from scipy.stats import ranksums ranksums(with_coinf, no_coinf) # In[27]: rotavirus_data.sapo_ct.dropna().astype(float).hist() # Calculate age, and compare to given age. # In[28]: rotavirus_data['age_months'] = (rotavirus_data['scrdate'] - rotavirus_data['dob'] + timedelta(1)).astype('timedelta64[M]') # In[29]: rotavirus_data.age_months.hist(); # Group into ages under 5 and 5 or over # In[30]: rotavirus_data['under_5'] = ((rotavirus_data.age_months) < 60).astype(int) # In[31]: rotavirus_data['under_2'] = ((rotavirus_data.age_months) < 24).astype(int) # In[32]: rotavirus_data['under_1'] = ((rotavirus_data.age_months) < 12).astype(int) # In[33]: rotavirus_data['age_group'] = 3 rotavirus_data.loc[rotavirus_data.under_5==1, 'age_group'] = 2 rotavirus_data.loc[rotavirus_data.under_2==1, 'age_group'] = 1 rotavirus_data.loc[rotavirus_data.under_1==1, 'age_group'] = 0 # In[34]: rotavirus_data.age_group.value_counts() # In[35]: rotavirus_data.year.value_counts() # Calculate natural year # In[36]: rotavirus_data['year'] = rotavirus_data.year - 2 # Rename setting variable # In[37]: assert not rotavirus_data.settingnew.isnull().sum() # In[38]: rotavirus_data['setting'] = rotavirus_data.settingnew - 1 # In[39]: # Indices to each value INPATIENT, ED, OUTPATIENT, HC = 0, 1, 2, 3 # Recode sex and stool # In[40]: assert not rotavirus_data['specimencol'].isnull().sum() # In[41]: rotavirus_data['male'] = rotavirus_data['sex']==1 rotavirus_data['stool_collected'] = rotavirus_data['specimencol']==1 # Recode insurance # In[42]: rotavirus_data.insurance.value_counts() # In[43]: rotavirus_data['public_ins'] = rotavirus_data.insurance.isin({1, 3}) rotavirus_data['private_ins'] = rotavirus_data.insurance.isin({2, 3}) rotavirus_data.loc[rotavirus_data.insurance==8, ['public_ins', 'private_ins']] = np.nan # In[44]: # Drop extraneous columns rotavirus_data = rotavirus_data.drop(['intvwdate', 'dob', 'provider', 'coldat', 'tdat', 'insurance', 'sex', 'hispcr', 'mdegree_7', 'rvacver_7', 'rvacver_7r', 'race', 'race2_8'], axis=1) # Summary of missing values # In[45]: rotavirus_data.isnull().sum() # Aggregate data by age group and setting # In[46]: rotavirus_summary = (rotavirus_data.drop('scrdate', axis=1) .groupby(['age_group', 'year', 'setting', 'black'])['caseid'].count() .reset_index() .rename(columns={'caseid':'cases'})) rotavirus_summary.head() # Enrollment days/week for inpatient, outpatient, ED # In[47]: monitoring_days = 7, 4, 4 # Only 5.5 days of intake on outpatient monitoring_rate = np.array(monitoring_days)/np.array([7, 7, 5.5]) monitoring_rate # # Outpatient AGE Model # In[155]: from pymc3 import Model, Deterministic, sample, NUTS, find_MAP, Potential, Metropolis from pymc3 import Binomial, Beta, Poisson, Normal, DiscreteUniform, Uniform, Flat, Gamma from pymc3 import traceplot, forestplot, summary, df_summary, energyplot import theano.tensor as tt # In[49]: iterations = 1000 tune = 1000 # In[50]: label_map = {'age_group':{0:'<1', 1:'1', 2:'2-4', 3:'5+'}, 'year':{0:'2012/13', 1:'2013/14', 2:'2014/15'}, 'black':{0:'white', 1:'black'}, 'setting':{0:'inpatient', 1:'ED', 2:'outpatient'}} # Rate factor for population # In[51]: rate_factor = 1000 # In[52]: from pymc3 import hpd def _hpd_df(x, alpha): cnames = ['hpd_{0:g}'.format(100 * alpha/2), 'hpd_{0:g}'.format(100 * (1 - alpha/2))] return pd.DataFrame(hpd(x, alpha), columns=cnames) # In[53]: def generate_table(trace, labels, index, columns, varnames=['λ']): rate_df_index = pd.MultiIndex.from_tuples(labels.values.tolist(), names=labels.columns) rate_df = (df_summary(trace, varnames=varnames) * rate_factor)[['mean', 'hpd_2.5', 'hpd_97.5']] rate_df.index = rate_df_index pivot_all = pd.pivot_table(rate_df .reset_index(), index=index, columns=columns) value_iterator = zip(pivot_all['mean'].values.flatten(), pivot_all['hpd_2.5'].values.flatten(), pivot_all['hpd_97.5'].values.flatten()) rate_strings = np.reshape(['{0:.2f} ({1:.2f}, {2:.2f})'.format(*vals) for vals in value_iterator], pivot_all['mean'].shape) return pd.DataFrame(rate_strings, index=pivot_all['mean'].index, columns=pivot_all['mean'].columns) # In[54]: import redcap import getpass local_auth = True api_url = 'https://redcap.vanderbilt.edu/api/' if local_auth: api_key = open("/Users/fonnescj/Dropbox/Collaborations/Halasa/Tokens/outpatient_token.txt").read() else: api_key = getpass.getpass() # In[55]: outpatient_project = redcap.Project(api_url, api_key) outpatients = outpatient_project.export_records(format='df') # In[56]: outpatients.groupby('year').year.count() # In[57]: outpatients.shape # In[58]: outpatients[['ageinjan', 'ageinnov']].hist(bins=20) # Going to assume that the outlier ages above were mistakenly entered as months. # In[59]: outliers = outpatients.ageinjan > 20 outpatients.loc[outliers, ['ageinjan', 'ageinnov']] = outpatients.loc[outliers, ['ageinjan', 'ageinnov']] / 12 # In[60]: outpatients[['ageinjan', 'ageinnov']].hist(bins=20) # In[61]: outpatients['age'] = np.maximum(outpatients[['ageinjan', 'ageinnov']].mean(1), 0.1) # In[62]: outpatients_kids = outpatients[outpatients.age<18].copy() # In[63]: outpatients_kids['5_and_older'] = (outpatients_kids.age>=5).astype(int) outpatients_kids['under_5'] = (outpatients_kids.age<5).astype(int) outpatients_kids['under_2'] = (outpatients_kids.age<2).astype(int) outpatients_kids['under_1'] = (outpatients_kids.age<1).astype(int) # In[64]: outpatients_kids['age_group'] = 3 outpatients_kids.loc[outpatients_kids.under_5==1, 'age_group'] = 2 outpatients_kids.loc[outpatients_kids.under_2==1, 'age_group'] = 1 outpatients_kids.loc[outpatients_kids.under_1==1, 'age_group'] = 0 # In[65]: outpatients_kids.groupby('year').age.hist(alpha=0.3) # In[66]: outpatient_n = (outpatients_kids.rename(columns={'ptname':'n'}) .groupby(['year', 'age_group']) .n.count()) outpatient_n # In[67]: rotavirus_data.head() # In[68]: eligible_data.head() # In[69]: eligible_counts.sum() # Average denominators # In[70]: outpatient_n.head() # In[71]: outpatient_n_mean = outpatient_n.groupby('age_group').mean().astype(int) # In[72]: outpatient_data_full = (rotavirus_summary[rotavirus_summary.setting==OUTPATIENT].groupby(['age_group','year']) .cases.sum().reset_index()) # In[73]: outpatient_data_full # In[74]: outpatient_merged = (outpatient_data_full.merge(outpatient_n_mean.reset_index(), on=['age_group']) .merge(pd.DataFrame(eligible_counts).reset_index(), on=['age_group', 'year'])) # In[78]: outpatient_merged # In[76]: outpatient_under_5 = outpatient_merged[outpatient_merged.age_group<3] # In[79]: def outpatient_model(dataset): age_group, cases, n, eligible = dataset[['age_group','cases','n', 'eligible']].values.T shape = dataset.shape[0] n_groups = len(set(age_group)) # Cases weighted by monitoring effort weights = monitoring_rate[OUTPATIENT] with Model() as mod: # Uniform prior on weighted cases weighted_cases = Uniform('weighted_cases', eligible, n, shape=shape) # Adjust for enrollment enrollment_rate = Beta('p_enrolled', 1, 1, shape=shape) eligible_cases = Binomial('eligible_cases', eligible, enrollment_rate, observed=cases) # Likelihood of eligible cases weighting = Binomial('weighting', n=weighted_cases, p=weights, observed=eligible) θ = Normal('θ', 0, sd=1000, shape=n_groups) λ = Deterministic('λ', tt.exp(θ)) AGE_obs = Potential('AGE_obs', Poisson.dist(λ[age_group]*n).logp(weighted_cases)) return mod # In[80]: outpatient_age = outpatient_model(outpatient_under_5) # In[81]: with outpatient_age: outpatient_age_trace = sample(iterations, tune=tune, njobs=2, random_seed=seed_numbers) # In[139]: age_labels = ['<1', '1', '2-4'] forestplot(outpatient_age_trace, varnames=['λ'], transform=lambda x: x*rate_factor, ylabels=age_labels) # ## Pooled AGE model # In[83]: outpatient_pooled = outpatient_merged.assign(age_group=(outpatient_merged.age_group==3).astype(int)) outpatient_pooled.head() # In[84]: outpatient_age_pooled = outpatient_model(outpatient_pooled) # In[85]: with outpatient_age_pooled: outpatient_age_pooled_trace = sample(iterations, tune=tune, njobs=2, random_seed=seed_numbers) # In[86]: age_labels_pooled = ['<5', '5+'] forestplot(outpatient_age_pooled_trace, varnames=['λ'], transform=lambda x: x*rate_factor, ylabels=age_labels_pooled) # ### Outpatient AGE estimates # In[90]: outpatient_age_estimates # In[92]: outpatient_age_estimates = (df_summary(outpatient_age_trace, varnames=['λ'], transform=lambda x: x*rate_factor) .drop(['sd', 'mc_error', 'n_eff', 'Rhat'], axis=1) .round(1)) outpatient_age_estimates.index = ['<1', '1', '2-4'] outpatient_age_estimates.columns = 'rate esimate', 'lower 95%', 'upper 95%' outpatient_age_estimates # In[94]: pooled_estimate = (df_summary(outpatient_age_pooled_trace, varnames=['λ'], transform=lambda x: x*rate_factor) .drop(['sd', 'mc_error', 'n_eff', 'Rhat'], axis=1) .round(1)) pooled_estimate.columns = outpatient_age_estimates.columns pooled_estimate.index = ['<5', '5+'] outpatient_age_estimates.append(pooled_estimate) # In[95]: outpatient_age_estimates.to_excel('results/outpatient_age.xlsx') # In[96]: mean_data = (outpatient_under_5.groupby('age_group')[['cases', 'n']].mean() .rename(columns={'cases':'mean cases', 'n':'mean n'}) .round(1) .set_index(outpatient_age_estimates.index)) mean_data.join(outpatient_age_estimates) mean_data.to_excel('results/outpatient_mean_data.xlsx') # # Virus Rate Models # In[97]: collected_stools = (rotavirus_data[rotavirus_data.setting==OUTPATIENT].groupby(['age_group', 'year']) .agg({'stool_collected':['sum', 'count']})) levels = collected_stools.columns.levels labels = collected_stools.columns.labels collected_stools.columns = ['stool_collected', 'enrolled'] collected_stools = collected_stools.reset_index() collected_stools # In[98]: outpatient_n_mean.reset_index() # All possible combinations of age, year and setting for index. # In[99]: astro_cases = (rotavirus_data[(rotavirus_data.astrovirus==1) & (rotavirus_data.setting==OUTPATIENT)] .groupby(['age_group', 'year'])[['stool_collected']].count()) astro_cases.columns = ['cases'] astro_cases = astro_cases.reset_index() # In[100]: eligible_counts # In[102]: astro_data = (astro_cases.merge(collected_stools, on=['age_group', 'year']) .merge(outpatient_n_mean.reset_index(), on=['age_group'], how='left') .merge(pd.DataFrame(eligible_counts).reset_index(), on=['age_group', 'year'])) astro_data # In[103]: sapo_cases = (rotavirus_data[(rotavirus_data.sapovirus==1) & (rotavirus_data.setting==OUTPATIENT)] .groupby(['age_group', 'year'])[['stool_collected']].count()) sapo_cases.columns = ['cases'] sapo_cases = sapo_cases.reset_index() # In[104]: sapo_data = (sapo_cases.merge(collected_stools, on=['age_group', 'year']) .merge(outpatient_n_mean.reset_index(), on=['age_group'], how='left') .merge(pd.DataFrame(eligible_counts).reset_index(), on=['age_group', 'year'])) sapo_data.head(10) # In[105]: rota_cases = (rotavirus_data[(rotavirus_data.rotavirus==1) & (rotavirus_data.setting==OUTPATIENT)] .groupby(['age_group', 'year'])[['stool_collected']].count()) rota_cases.columns = ['cases'] rota_cases = rota_cases.reset_index() # In[106]: rota_data = (rota_cases.merge(collected_stools, on=['age_group', 'year']) .merge(outpatient_n_mean.reset_index(), on=['age_group'], how='left') .merge(pd.DataFrame(eligible_counts).reset_index(), on=['age_group', 'year'])) rota_data.head(10) # In[108]: noro_cases = (rotavirus_data[(rotavirus_data.norovirus==1) & (rotavirus_data.setting==OUTPATIENT)] .groupby(['age_group', 'year'])[['stool_collected']].count()) noro_cases.columns = ['cases'] noro_cases = noro_cases.reset_index() # In[109]: noro_data = (noro_cases.merge(collected_stools, on=['age_group', 'year']) .merge(outpatient_n_mean.reset_index(), on=['age_group'], how='left') .merge(pd.DataFrame(eligible_counts).reset_index(), on=['age_group', 'year'])) noro_data.head(10) # In[110]: virus_totals = pd.concat([astro_data.assign(virus='astrovirus'), noro_data.assign(virus='norovirus'), rota_data.assign(virus='rotavirus'), sapo_data.assign(virus='sapovirus')]).reset_index() virus_totals['cases'] = virus_totals.cases.astype(int) virus_totals['n'] = virus_totals.n.astype(int) # ## Outpatient virus rate model # In[111]: virus_lookup = {0:'norovirus', 1:'rotavirus', 2:'sapovirus', 3:'astrovirus'} # In[112]: virus_outpatient_data = (virus_totals.replace({'virus': virus_lookup})) # In[113]: cases, enrolled, n = virus_outpatient_data[['cases', 'enrolled', 'n']].values.T # In[114]: virus_outpatient_under_5 = virus_outpatient_data[virus_outpatient_data.age_group<3].drop('index', axis=1) # In[117]: virus_outpatient_pooled = virus_outpatient_under_5.groupby(['virus', 'year']).agg({'cases':sum, 'stool_collected':sum, 'enrolled':sum, 'n':sum, 'eligible':sum}).reset_index() # In[118]: virus_outpatient_pooled['age_group'] = 3 virus_outpatient_pooled[virus_outpatient_under_5.columns] # In[119]: virus_outpatient_5_over = (virus_outpatient_data[virus_outpatient_data.age_group==3] .drop('index', axis=1) .groupby(['virus', 'year']) .agg({'cases':sum, 'stool_collected':sum, 'enrolled':sum, 'n':sum, 'eligible':sum}) .reset_index()) # In[120]: virus_outpatient_5_over['age_group'] = 4 virus_outpatient_5_over[virus_outpatient_under_5.columns] # In[121]: virus_outpatient_merged = pd.concat([virus_outpatient_under_5, virus_outpatient_pooled[virus_outpatient_under_5.columns], virus_outpatient_5_over[virus_outpatient_under_5.columns]]) # In[124]: with Model() as virus_outpatient_model: groups = virus_outpatient_merged.shape[0] # Probability of stool collection ψ = Beta('ψ', 1, 1) outpatient_enrolled = (rotavirus_data.setting==OUTPATIENT).sum() collected = rotavirus_data[rotavirus_data.setting==OUTPATIENT].stool_collected.sum() stool = Binomial('stool', n=outpatient_enrolled, p=ψ, observed=collected) cases, enrolled, n, eligible = virus_outpatient_merged[['cases', 'enrolled', 'n', 'eligible']].values.T # Adjust for enrollment enrollment_rate = Beta('p_enrolled', 1, 1, shape=groups) enrolled_cases = Binomial('enrolled_cases', eligible, enrollment_rate, observed=enrolled) eligible_cases = Uniform('eligible_cases', lower=cases, upper=eligible, shape=groups) p = monitoring_rate[OUTPATIENT]*ψ*enrollment_rate # Estimate total VUMC cases in setting eligible_cases_likelihood = Binomial('eligible_cases_likelihood', n=eligible_cases, p=p, observed=cases) π = Beta('π', 1, 10, shape=groups) # Data likeihood virus_out_obs = Potential('virus_out_obs', Binomial.dist(n=n, p=π).logp(eligible_cases).sum()) # In[125]: with virus_outpatient_model: virus_outpatient_trace = sample(iterations, tune=tune, njobs=2, random_seed=seed_numbers) # MCMC diagnostic plot # In[157]: energyplot(virus_outpatient_trace) # Posterior distribution of stool sampling rate # In[126]: traceplot(virus_outpatient_trace, varnames=['ψ']) # ### Outpatient virus rates, plotted and tabulated # In[127]: year_labels = ['2012/13','2013/14','2014/15'] pd.DataFrame(list(itertools.product(age_labels + ['<5', '5+'], year_labels)), columns=['age', 'year']) # In[128]: outpatient_sample_melted = pd.melt(pd.DataFrame(virus_outpatient_trace['π'])) # In[129]: outpatient_virus_samples = (virus_outpatient_merged.drop(['enrolled', 'stool_collected', 'cases'], axis=1).reset_index(drop=True) .merge(outpatient_sample_melted, left_index=True, right_on='variable') .replace({'age_group':{0:'<1', 1:'1', 2:'2-4', 3:'<5', 4:'5+'}, 'year':{0:'2012/13', 1:'2013/14', 2:'2014/15'}, 'virus':virus_lookup}) .rename(columns={'age_group':'age group'})) # In[130]: outpatient_rateplot = sns.factorplot(x="year", y="rate", hue="age group", col='virus', data=outpatient_virus_samples.assign(rate=outpatient_virus_samples.value*rate_factor), palette="Blues", size=5, width=0.5, kind='box', linewidth=0.6, col_wrap=2, fliersize=0) outpatient_rateplot.despine(left=True) # In[134]: outpatient_virus_labels = (virus_outpatient_merged.drop(['enrolled', 'n', 'stool_collected', 'cases', 'eligible'], axis=1).reset_index(drop=True) .replace({'year':{0:'2012/13', 1:'2013/14', 2:'2014/15'}, 'virus':virus_lookup}) .rename(columns={'age_group':'age group'})) # In[135]: virus_outpatient_table = generate_table(virus_outpatient_trace, outpatient_virus_labels, index=['age group', 'year'], varnames=['π'], columns=['virus']).sort_index() virus_outpatient_table.to_excel('results/virus_outpatient_table.xlsx') # In[138]: virus_outpatient_table # In[140]: virus_outpatient_table.index = virus_outpatient_table.index.set_levels(age_labels + ['<5', '5+'], level=0) # In[141]: virus_outpatient_table # In[142]: virus_outpatient_table[['astrovirus']] # In[143]: def create_virus_table(virus_name): estimates = virus_outpatient_table[[virus_name]].rename(columns={virus_name:'estimate'}) virus_data = (virus_outpatient_merged[virus_outpatient_merged.virus==virus_name] .set_index(['age_group', 'year']) [['cases', 'stool_collected', 'enrolled', 'n']] .set_index(estimates.index)) return estimates.join(virus_data) # In[144]: astro_table = create_virus_table('astrovirus') astro_table.to_excel('results/outpatient_astro.xlsx') astro_table # In[145]: noro_table = create_virus_table('norovirus') noro_table.to_excel('results/outpatient_noro.xlsx') noro_table # In[146]: sapo_table = create_virus_table('sapovirus') sapo_table.to_excel('results/outpatient_sapo.xlsx') sapo_table # In[147]: rota_table = create_virus_table('rotavirus') rota_table.to_excel('results/outpatient_rota.xlsx') rota_table # ### Pooled (over years) virus rate model # In[148]: virus_outpatient_merged # In[151]: virus_outpatient_pooled = virus_outpatient_merged.groupby(['age_group', 'virus']).agg({'n':lambda x: max(x)*3, 'cases':sum, 'stool_collected':sum, 'enrolled':sum, 'eligible':sum}).reset_index() # In[153]: with Model() as virus_pooled_model: groups = virus_outpatient_pooled.shape[0] # Probability of stool collection ψ = Beta('ψ', 1, 1) outpatient_enrolled = (rotavirus_data.setting==OUTPATIENT).sum() collected = rotavirus_data[rotavirus_data.setting==OUTPATIENT].stool_collected.sum() stool = Binomial('stool', n=outpatient_enrolled, p=ψ, observed=collected) cases, enrolled, n, eligible = virus_outpatient_pooled[['cases', 'enrolled', 'n', 'eligible']].values.T # Adjust for enrollment enrollment_rate = Beta('p_enrolled', 1, 1, shape=groups) enrolled_cases = Binomial('enrolled_cases', eligible, enrollment_rate, observed=enrolled) eligible_cases = Uniform('eligible_cases', lower=cases, upper=eligible, shape=groups) p = monitoring_rate[OUTPATIENT]*ψ*enrollment_rate # Estimate total VUMC cases in setting eligible_cases_likelihood = Binomial('eligible_cases_likelihood', n=eligible_cases, p=p, observed=cases) π = Beta('π', 1, 10, shape=groups) # Data likeihood virus_out_obs = Potential('virus_out_obs', Binomial.dist(n=n, p=π).logp(eligible_cases).sum()) # In[154]: with virus_pooled_model: virus_outpatient_pooled_trace = sample(iterations, tune=tune, njobs=2, random_seed=seed_numbers) # MCMC diagnostic plot # In[156]: energyplot(virus_outpatient_pooled_trace) # In[158]: outpatient_virus_pooled_labels = (virus_outpatient_pooled.drop(['enrolled', 'n', 'stool_collected', 'cases'], axis=1).reset_index(drop=True) .replace({'age_group':{0:'<1', 1:'1', 2:'2-4', 3:'<5', 4:'5+'}, 'virus':virus_lookup}) .rename(columns={'age_group':'age group'})) # In[159]: virus_outpatient_pooled_table = generate_table(virus_outpatient_pooled_trace, outpatient_virus_pooled_labels, index=['age group'], varnames=['π'], columns=['virus']).sort_index() # virus_outpatient_table.to_excel('results/virus_outpatient_table.xlsx') # In[160]: virus_outpatient_pooled_table # In[161]: def create_pooled_table(virus_name): estimates = virus_outpatient_pooled_table[[virus_name]].rename(columns={virus_name:'estimate'}) virus_data = (virus_outpatient_pooled[virus_outpatient_pooled.virus==virus_name] .set_index(['age_group']) [['cases', 'stool_collected', 'enrolled', 'n']] .set_index(estimates.index)) return estimates.join(virus_data).loc[['<1', '1', '2-4', '<5', '5+']] # In[162]: astro_pooled = create_pooled_table('astrovirus') astro_pooled.to_excel('results/outpatient_astro_pooled.xlsx') astro_pooled # In[163]: noro_pooled = create_pooled_table('norovirus') noro_pooled.to_excel('results/outpatient_noro_pooled.xlsx') noro_pooled # In[164]: sapo_pooled = create_pooled_table('sapovirus') sapo_pooled.to_excel('results/outpatient_sapo_pooled.xlsx') sapo_pooled # In[165]: rota_pooled = create_pooled_table('rotavirus') rota_pooled.to_excel('results/outpatient_rota_pooled.xlsx') rota_pooled