#!/usr/bin/env python # coding: utf-8 # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') import pandas as pd import numpy as np import matplotlib.pyplot as plt from datetime import datetime import seaborn as sb import pymc3 as pm import theano.tensor as tt sb.set_style("white") # Import data # In[102]: hospitalized = pd.read_csv('data/hospitalized.csv', index_col=0) # In[3]: # Positive culture lookup pcr_lookup = {'pcr_result___1': 'RSV', 'pcr_result___2': 'HMPV', 'pcr_result___3': 'flu A', 'pcr_result___4': 'flu B', 'pcr_result___5': 'rhino', 'pcr_result___6': 'PIV1', 'pcr_result___7': 'PIV2', 'pcr_result___8': 'PIV3', 'pcr_result___13': 'H1N1', 'pcr_result___14': 'H3N2', 'pcr_result___15': 'Swine', 'pcr_result___16': 'Swine H1', 'pcr_result___17': 'flu C', 'pcr_result___18': 'Adeno'} # In[4]: hospitalized['RSV'] = hospitalized.pcr_result___1.astype(bool) hospitalized['HMPV'] = hospitalized.pcr_result___2.astype(bool) hospitalized['Rhino'] = hospitalized.pcr_result___5.astype(bool) hospitalized['Influenza'] = (hospitalized.pcr_result___3 | hospitalized.pcr_result___4 | hospitalized.pcr_result___17) hospitalized['Adeno'] = hospitalized.pcr_result___18.astype(bool) hospitalized['PIV'] = (hospitalized.pcr_result___6 | hospitalized.pcr_result___7 | hospitalized.pcr_result___8) hospitalized['No virus'] = hospitalized[list(pcr_lookup.keys())].sum(1) == 0 hospitalized['All'] = True # In[5]: viruses = ['RSV', 'HMPV', 'Rhino', 'Influenza', 'Adeno', 'PIV', 'No virus', 'All'] # In[6]: non_rsv_lookup = pcr_lookup.copy() non_rsv_lookup.pop('pcr_result___1') # Identify individuals with coinfection # In[7]: hospitalized['coinfection'] = hospitalized[list(pcr_lookup.keys())].sum(1) > 1 # Virus frequency by coinfection status # In[8]: hospitalized[(~hospitalized.coinfection) & (hospitalized.RSV)].shape[0]/hospitalized.shape[0] # In[9]: hospitalized['male'] = (hospitalized.sex=='M').astype(int) age_groups = pd.get_dummies(pd.cut(hospitalized.age_months, [0,1,11,23])) age_groups.index = hospitalized.index age_groups.columns = 'under 2 months', '2-11 months', '12-23 months' hospitalized = hospitalized.join(age_groups) # Diagnosis # In[10]: hospitalized['ros'] = hospitalized.adm_sepsis | hospitalized.adm_febrile # In[11]: hospitalized['pertussis-like cough'] = hospitalized.adm_pertussis | hospitalized.adm_cough # In[12]: hospitalized['brochopneumonia'] = hospitalized.adm_bronchopneumo # In[13]: hospitalized['bronchiolitis'] = hospitalized.adm_bronchiolitis # In[14]: hospitalized['pneumonia'] = hospitalized.adm_pneumo # ## Rate Estimation # In[15]: hospitalized.admission_date = pd.to_datetime(hospitalized.admission_date) hospitalized.admission_date.describe() # Age groups: # # - Under 2 months # - 2-5 mo. # - 6-11 mo. # - Over 11 mo. # In[16]: age_groups = pd.get_dummies(pd.cut(hospitalized.age_months, [0,1,5,11,24], include_lowest=True)) age_groups.index = hospitalized.index age_groups.columns = 'age_under_2', 'age_2_5', 'age_6_11', 'age_over_11' hospitalized = hospitalized.join(age_groups) # In[17]: age_group_lookup = {'age_under_2': '<2', 'age_2_5': '2-5', 'age_6_11': '6-11', 'age_over_11': '>11'} # ## Population rate estimation # # Recode year to virus season: # # - 2011: March 2010 - March 2011 # - 2012: Apr 2011 - Mar 2012 # - 2013: April 2012 - Mar 2013 # In[18]: hospitalized['virus_year'] = 2011 hospitalized.loc[(hospitalized.admission_date >= '2011-03-31') & (hospitalized.admission_date <= '2012-03-31'), 'virus_year'] = 2012 hospitalized.loc[hospitalized.admission_date > '2012-03-31', 'virus_year'] = 2013 hospitalized.virus_year.value_counts() # Recode zones # In[19]: zone_string = "1, Amman Zone 1 | 2, Abdoun Zone 1 | 3, Abu Alanda Zone 3 | 4, Abu Nusair Zone 2 | 5, Airport street Zone 4 | 6, Al.Ashrafeyeh Zone 5 | 7, Al.Badya Zone 31 | 8, Al.Baqa'a Zone 22 | 9, Al.Ghour Zone 32 | 10, Al.Hashmi Zone 6 | 11, Al.Hezam Zone 29 | 12, Al.Hussein Camping Zone 1 | 13, Al.Istiklal Zone 1 | 14, Al.Jeeza Zone 8 | 15, Al.Joufeh Zone 5 | 16, Al.Karak Zone 35 | 17, Al.Lubban Zone 16 | 18, Al.Madenah Al.Reyadeyeh Zone 1 | 19, Al.Mahatta Zone 6 | 20, Al.Manarah Zone 5 | 21, Al.Mareikh Zone 5 | 22, Al.Marqab Zone 6 | 23, Al.Muhajreen Zone 5 | 24, Al.Musdar Zone 5 | 25, Al.Musherfeh Zone 15 | 26, Al.Naser Zone 6 | 27, Al.Natheef Zone 5 | 28, Al.Nuzha Zone 1 | 29, Al.Qastal Zone 10 | 30, Al.Qwesmeh Zone 5 | 31, Al.Shouneh Zone 32 | 32, Al.Taj Zone 5 | 33, Al.Taybeh Zone 7 | 34, Aqaba Zone 30 | 35, Arjan Zone 1 | 36, Bayader Wadi AL.Seer Zone 20 | 37, Bnayyat Zone 12 | 38, D.Al.Ameer Ali Zone 13 | 39, D.Al.Aqsa Zone 9 | 40, D.Haj Hasan Zone 9 | 41, Daheyet Al.Rasheed Zone 17 | 42, Daheyet al.Yasmeen Zone 1 | 43, Dead Sea Zone 32 | 44, Deer.Al.Ghbar Zone 1 | 45, Down Town (Al.Balad) Zone 1 | 46, Dra'a al.Qarbi Zone 1 | 47, Ein Al.Basha Zone 22 | 48, Ein Ghazal Zone 6 | 49, Eskan Al.Ameer Hashem Zone 29 | 50, Eskan Al.Ameer Talal Zone 29 | 51, Etha'a wal Telvesion Zone 9 | 52, Hay Al.Dabaybeh Zone 5 | 53, Hay Al.Tafayleh Zone 5 | 54, Hay Nazzal Zone 1 | 55, Huttein (shneler ) Refugee camping Zone 6 | 56, Iraq Al.Ameer Zone 23 | 57, Jabal AL.Akhdar Zone 1 | 58, Jabal Al.Ameer Faisal Zone 29 | 59, Jabal AL.Hadeed Zone 5 | 60, Jabal Al.Hussein Zone 1 | 61, Jabal Al.Qosoor Zone 1 | 62, Jabal Amman Zone 1 | 63, Jarash Zone 36 | 64, Jawa Zone 7 | 65, Juwaydeh Zone 14 | 66, Khalda Zone 18 | 67, Khreibt Al.Souk Zone 7 | 68, Ma'an Zone 31 | 69, Madaba Zone 34 | 70, Mafraq Zone 33 | 71, Marj Al.Hamam Zone 19 | 72, Marka Zone 6 | 73, Muqableen Zone 9 | 74, Muwaqqar Zone 28 | 75, Nadi Al.Sebaq Zone 6 | 76, Naur Zone 19 | 77, Petra Zone 30 | 78, Qatranah Zone 30 | 79, Raghadan Zone 1 | 80, Ras Al.Ein Zone 1 | 81, Rusayfah Zone 29 | 82, Saffout Zone 22 | 83, Sahab Zone 11 | 84, Salheyet Al.Abed Zone 6 | 85, Shafa Badran Zone 25 | 86, Sharq Al.Awsat Zone 11 | 87, Shemasani Zone 1 | 88, Street 30 Zone 5 | 89, Summaya Street Zone 5 | 90, Suweileh Zone 27 | 91, Tabarbour Zone 21 | 92, Tla'a al Ali Zone 17 | 93, Um Al.Heran Zone 11 | 94, Um Al.Summaq Zone 17 | 95, Um Nuwwara and Adan Zone 5 | 96, Um Uthayna Zone 1 | 97, Wadi Abdoun Zone 1 | 98, Wadi AL.Haddadeh Zone 1 | 99, Wadi AL.Hajar Zone 29 | 100, Wadi Al.Remam Zone 5 | 101, Wadi AL.Seer Zone 20 | 102, Wadi Saqra Zone 1 | 103, Wehdat Zone 11 | 104, Yadoudeh Zone 13 | 105, Yajouz Zone 26 | 106, Zarqa Zone 29 | 107, Zezya Zone 24" # In[20]: zones = [z.strip().split(',') for z in zone_string.split('|')] # In[21]: zone_dict = {int(n):int(s.strip().split(' ')[-1]) for n,s in zones} # In[22]: hospitalized['zone'] = hospitalized.city_zone.replace(zone_dict) # Define Amman zone # In[23]: hospitalized['amman_zone'] = hospitalized.zone<28 # Hospitalized in Amman # In[24]: hospitalized_amman = hospitalized[hospitalized.amman_zone] # In[25]: hospitalized_amman.shape # In[26]: assert hospitalized_amman.index.is_unique # In[27]: hosp_age_counts = hospitalized_amman.groupby('admission_date')[age_groups.columns].sum().resample('M').sum().values # In[28]: pre_2013 = hospitalized_amman.admission_date < datetime(2013, 1, 1) # In[29]: age_counts = hospitalized_amman[pre_2013].groupby('admission_date')[age_groups.columns].sum().resample('M').sum() age_counts # Rates and demographics (via [World Bank](http://databank.worldbank.org)) and 2004 census data (via Jordan [Department of Statistics](http://www.dos.gov.jo/dos_home_e/main/population/census2004/group3/table_31.pdf)). # In[30]: # Jordan population, 2008-2012 population = 5786000, 5915000, 6046000, 6181000, 6318000 # Interpolated population by gender and age, 2010-2012 female_0 = 93649, 95739, 96486 male_0 = 98435, 100525, 101160 female_1 = 87941, 93511, 93067 male_1 = 92548, 98306, 97763 kids_0 = np.array(female_0) + np.array(male_0) kids_1 = np.array(female_1) + np.array(male_1) kids = kids_0 + kids_1 kids_under6mo = kids_6to12mo = kids_0/2. # Proportion in Amman amman_urban_2004 = 1784502. jordan_2004 = 5103639. amman_prop = amman_urban_2004 / jordan_2004 # Birth rates (per 1000) birth_rate = 29.665, 29.322, 28.869, 28.317, 27.699 # Neonatal mortality (per 1000) neonatal_mort = 12.7, 12.4, 12.1, 11.8, 11.5 # Infant mortality (per 1000) infant_mort = 18.4, 17.9, 17.3, 16.8, 16.4 # In[31]: births = np.array(population[-3:])/1000. * birth_rate[-3:] births # In[32]: births_6m = births/2. births_6m # In[33]: deaths_6m = births_6m/1000. * infant_mort[-3:] deaths_6m # In[34]: amman_prop # In[35]: (births_6m - deaths_6m)*amman_prop # In[36]: n = np.array((kids_under6mo, kids_6to12mo, kids_1, kids)) n # In[37]: kids_0 # In[38]: kids_1/kids_0 # In[39]: amman_prop # In[40]: n_amman = np.floor(n*amman_prop).astype(int) # In[41]: n_amman # Monthly admissions in Ammaon # In[42]: admissions_by_month = hospitalized_amman.groupby('admission_date')['child_name'].count().resample('1M').sum() # In[43]: admissions_by_month.head() # In[44]: # Enrolling 5 days per week p_enroll = 5./7 # 'age_under_2', 'age_2_5', 'age_6_11', 'age_over_11' # In[45]: hospitalized_amman['admission_year'] = hospitalized_amman.admission_date.apply(lambda x: x.year) _under_6 = hospitalized_amman[hospitalized_amman.age_under_2.astype(bool) | hospitalized_amman.age_2_5].groupby('admission_year')['child_name'].count() # In[46]: _6_11 = hospitalized_amman[hospitalized_amman.age_6_11.astype(bool)].groupby('admission_year')['child_name'].count() # In[47]: _over_11 = hospitalized_amman[hospitalized_amman.age_over_11.astype(bool)].groupby('admission_year')['child_name'].count() # In[48]: _all = hospitalized_amman.groupby('admission_year')['child_name'].count() # In[49]: hospitalized_amman.set_index(hospitalized_amman.admission_date).groupby([pd.TimeGrouper('M')]).count()['mother_name'] # In[50]: diagnosis_df = pd.concat([_under_6, _6_11, _over_11, _all], axis=1) diagnosis_df.columns = ('under 6 mo.', '6-11 mo.', '11-23 mo.', 'all under 2 yr.') diagnosis_df # Use proportion in Amman to calculate proportion of kids in each age group and year in Amman # In[59]: amman_prop # In[65]: n.T.ravel() # In[60]: n[:-1] # In[72]: from theano import shared def rate_model(diagnosis): # Extract data subset for passed virus diagnosis_subset = hospitalized_amman[hospitalized_amman[diagnosis]==1].copy() # Create data frame of age x year counts u6 = diagnosis_subset.age_under_2.astype(bool) | diagnosis_subset.age_2_5 _under_6 = diagnosis_subset[u6].groupby('virus_year')['child_name'].count() _6_11 = diagnosis_subset[diagnosis_subset.age_6_11.astype(bool)].groupby('virus_year')['child_name'].count() _over_11 = diagnosis_subset[diagnosis_subset.age_over_11.astype(bool)].groupby('virus_year')['child_name'].count() _all = diagnosis_subset.groupby('virus_year')['child_name'].count() diagnosis_df = pd.concat([_under_6, _6_11, _over_11, _all], axis=1) diagnosis_df.columns = ('under 6 mo.', '6-11 mo.', '11-23 mo.', 'all ages') diagnosis_array = diagnosis_df.values.ravel() print(diagnosis_array) kids = n.T.ravel() print(kids) with pm.Model() as model: # Al Bashir hospital market share market_share = pm.Uniform('market_share', 0.5, 0.6) # Correct for 5 days of enrollment per week p_hosp = market_share * (5./7) # Number of 1 y.o. in Amman n_amman = pm.Binomial('n_amman', kids, amman_prop, shape=kids.shape) # rng = tt.shared_randomstreams.RandomStreams() # n_amman = rng.binomial(n=kids.astype(int), p=amman_prop, size=kids.shape) # Prior probability prev_diag = pm.Beta('prev_diag', 1, 5, shape=diagnosis_array.size) per_1000 = pm.Deterministic('per_10000', prev_diag*10000) # Infected count in Amman y_amman = pm.Binomial('y_amman', n_amman, prev_diag, shape=diagnosis_df.size) # y_amman = rng.binomial(n=n_amman, p=prev_diag, size=(diagnosis_df.size,)) # Likelihood for number with virus in hospital (assumes Pr(hosp | virus) = 1) pm.Binomial('y_hosp', y_amman, p_hosp, observed=diagnosis_array) return model # ### Pneumonia rates # In[73]: niterations = 100000 nkeep = 10000 # In[74]: with rate_model('pneumonia') as pneumo_model: trace_pneumo = pm.sample(niterations, step=pm.Metropolis(), njobs=2) # In[75]: prevalence_labels = ['%s %s' % (year, age) for year in ('2011', '2012', '2013') for age in ('under 6 mo.', '6-11 mo.', '12-23 mo.', 'all ages')] # In[76]: pm.forestplot(trace_pneumo[-nkeep:], varnames=['per_10000'], ylabels=prevalence_labels, main='Pneumonia (per 10000)') # In[77]: pm.summary(trace_pneumo[-nkeep:], varnames=['per_10000']) # In[85]: pneumo_df = pm.df_summary(trace_pneumo[-nkeep:], varnames=['per_10000'], ) pneumo_df.index = prevalence_labels # In[86]: pneumo_df # In[93]: pneumo_df.to_csv('pneumonia_per_10000.csv') # ### Brochopneumonia rates # In[89]: with rate_model('brochopneumonia') as bpneumo_model: trace_bpneumo = pm.sample(niterations, njobs=2) # In[90]: pm.forestplot(trace_bpneumo[-nkeep:], varnames=['per_10000'], ylabels=prevalence_labels, main='Bronchopneumonia (per 10000)') # In[91]: pm.summary(trace_bpneumo[-nkeep:], varnames=['per_10000']) # In[92]: bpneumo_df = pm.df_summary(trace_bpneumo[-nkeep:], varnames=['per_10000'], ) bpneumo_df.index = prevalence_labels # In[97]: bpneumo_df.to_csv('bronchopneumonia_per_10000.csv') # ### Bronchiolitis rates # In[95]: with rate_model('bronchiolitis') as bronchiolitis_model: trace_bronchiolitis = pm.sample(niterations, njobs=2) # In[98]: pm.forestplot(trace_bronchiolitis[-nkeep:], varnames=['per_10000'], ylabels=prevalence_labels, main='Bronchiolitis (per 10000)') # In[99]: pm.summary(trace_bronchiolitis[-nkeep:], varnames=['per_10000']) # In[100]: bronchiolitis_df = pm.df_summary(trace_bronchiolitis[-nkeep:], varnames=['per_10000'], ) bronchiolitis_df.index = prevalence_labels # In[101]: bronchiolitis_df.to_csv('bronchiolitis_per_10000.csv')