In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
import seaborn as sb
import pymc3 as pm
import theano.tensor as tt
sb.set_style("white")

Import data

In [102]:
hospitalized = pd.read_csv('data/hospitalized.csv', index_col=0)
/Users/fonnescj/anaconda3/envs/dev/lib/python3.6/site-packages/IPython/core/interactiveshell.py:2717: DtypeWarning: Columns (141,143,145,147,149,182,207,213,214,263,284,285,286,300,301) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)
In [3]:
# Positive culture lookup
pcr_lookup = {'pcr_result___1': 'RSV',
'pcr_result___2': 'HMPV',
'pcr_result___3': 'flu A',
'pcr_result___4': 'flu B',
'pcr_result___5': 'rhino',
'pcr_result___6': 'PIV1',
'pcr_result___7': 'PIV2',
'pcr_result___8': 'PIV3',
'pcr_result___13': 'H1N1',
'pcr_result___14': 'H3N2',
'pcr_result___15': 'Swine',
'pcr_result___16': 'Swine H1',
'pcr_result___17': 'flu C',
'pcr_result___18': 'Adeno'}
In [4]:
hospitalized['RSV'] = hospitalized.pcr_result___1.astype(bool)
hospitalized['HMPV'] = hospitalized.pcr_result___2.astype(bool)
hospitalized['Rhino'] = hospitalized.pcr_result___5.astype(bool)
hospitalized['Influenza'] = (hospitalized.pcr_result___3 | hospitalized.pcr_result___4 |
                             hospitalized.pcr_result___17)
hospitalized['Adeno'] = hospitalized.pcr_result___18.astype(bool)
hospitalized['PIV'] = (hospitalized.pcr_result___6 | hospitalized.pcr_result___7 | 
                       hospitalized.pcr_result___8)
hospitalized['No virus'] = hospitalized[list(pcr_lookup.keys())].sum(1) == 0
hospitalized['All'] = True
In [5]:
viruses = ['RSV', 'HMPV', 'Rhino', 'Influenza', 'Adeno', 'PIV', 'No virus', 'All']
In [6]:
non_rsv_lookup = pcr_lookup.copy()
non_rsv_lookup.pop('pcr_result___1')
Out[6]:
'RSV'

Identify individuals with coinfection

In [7]:
hospitalized['coinfection'] = hospitalized[list(pcr_lookup.keys())].sum(1) > 1

Virus frequency by coinfection status

In [8]:
hospitalized[(~hospitalized.coinfection) & (hospitalized.RSV)].shape[0]/hospitalized.shape[0]
Out[8]:
0.2297979797979798
In [9]:
hospitalized['male'] = (hospitalized.sex=='M').astype(int)
age_groups = pd.get_dummies(pd.cut(hospitalized.age_months, [0,1,11,23]))
age_groups.index = hospitalized.index
age_groups.columns = 'under 2 months', '2-11 months', '12-23 months'
hospitalized = hospitalized.join(age_groups)

Diagnosis

In [10]:
hospitalized['ros'] = hospitalized.adm_sepsis | hospitalized.adm_febrile
In [11]:
hospitalized['pertussis-like cough'] = hospitalized.adm_pertussis | hospitalized.adm_cough
In [12]:
hospitalized['brochopneumonia'] = hospitalized.adm_bronchopneumo
In [13]:
hospitalized['bronchiolitis'] = hospitalized.adm_bronchiolitis
In [14]:
hospitalized['pneumonia'] = hospitalized.adm_pneumo

Rate Estimation

In [15]:
hospitalized.admission_date = pd.to_datetime(hospitalized.admission_date)
hospitalized.admission_date.describe()
count                    3168
unique                    794
top       2011-02-07 00:00:00
freq                       17
first     2010-03-15 00:00:00
last      2013-03-30 00:00:00
Name: admission_date, dtype: object

Age groups:

  • Under 2 months
  • 2-5 mo.
  • 6-11 mo.
  • Over 11 mo.
In [16]:
age_groups = pd.get_dummies(pd.cut(hospitalized.age_months, [0,1,5,11,24], include_lowest=True))
age_groups.index = hospitalized.index
age_groups.columns = 'age_under_2', 'age_2_5', 'age_6_11', 'age_over_11'
hospitalized = hospitalized.join(age_groups)
In [17]:
age_group_lookup = {'age_under_2': '<2', 
                    'age_2_5': '2-5', 
                    'age_6_11': '6-11', 
                    'age_over_11': '>11'}

Population rate estimation

Recode year to virus season:

  • 2011: March 2010 - March 2011
  • 2012: Apr 2011 - Mar 2012
  • 2013: April 2012 - Mar 2013
In [18]:
hospitalized['virus_year'] = 2011
hospitalized.loc[(hospitalized.admission_date >= '2011-03-31') 
                 & (hospitalized.admission_date <= '2012-03-31'), 'virus_year'] = 2012
hospitalized.loc[hospitalized.admission_date > '2012-03-31', 'virus_year'] = 2013

hospitalized.virus_year.value_counts()
2012    1191
2013    1179
2011     798
Name: virus_year, dtype: int64

Recode zones

In [19]:
zone_string = "1, Amman Zone 1 | 2, Abdoun Zone 1 | 3, Abu Alanda Zone 3 | 4, Abu Nusair Zone 2 | 5, Airport street Zone 4 | 6, Al.Ashrafeyeh Zone 5 | 7, Al.Badya Zone 31 | 8, Al.Baqa'a Zone 22 | 9, Al.Ghour Zone 32 | 10, Al.Hashmi Zone 6 | 11, Al.Hezam Zone 29 | 12, Al.Hussein Camping Zone 1 | 13, Al.Istiklal Zone 1 | 14, Al.Jeeza Zone 8 | 15, Al.Joufeh Zone 5 | 16, Al.Karak Zone 35 | 17, Al.Lubban Zone 16 | 18, Al.Madenah Al.Reyadeyeh Zone 1 | 19, Al.Mahatta Zone 6 | 20, Al.Manarah Zone 5 | 21, Al.Mareikh Zone 5 | 22, Al.Marqab Zone 6 | 23, Al.Muhajreen Zone 5 | 24, Al.Musdar Zone 5 | 25, Al.Musherfeh Zone 15 | 26, Al.Naser Zone 6 | 27, Al.Natheef Zone 5 | 28, Al.Nuzha Zone 1 | 29, Al.Qastal Zone 10 | 30, Al.Qwesmeh Zone 5 | 31, Al.Shouneh Zone 32 | 32, Al.Taj Zone 5 | 33, Al.Taybeh Zone 7 | 34, Aqaba Zone 30 | 35, Arjan Zone 1 | 36, Bayader Wadi AL.Seer Zone 20 | 37, Bnayyat Zone 12 | 38, D.Al.Ameer Ali Zone 13 | 39, D.Al.Aqsa Zone 9 | 40, D.Haj Hasan Zone 9 | 41, Daheyet Al.Rasheed Zone 17 | 42, Daheyet al.Yasmeen Zone 1 | 43, Dead Sea Zone 32 | 44, Deer.Al.Ghbar Zone 1 | 45, Down Town (Al.Balad) Zone 1 | 46, Dra'a al.Qarbi Zone 1 | 47, Ein Al.Basha Zone 22 | 48, Ein Ghazal Zone 6 | 49, Eskan Al.Ameer Hashem Zone 29 | 50, Eskan Al.Ameer Talal Zone 29 | 51, Etha'a wal Telvesion Zone 9 | 52, Hay Al.Dabaybeh Zone 5 | 53, Hay Al.Tafayleh Zone 5 | 54, Hay Nazzal Zone 1 | 55, Huttein (shneler ) Refugee camping Zone 6 | 56, Iraq Al.Ameer Zone 23 | 57, Jabal AL.Akhdar Zone 1 | 58, Jabal Al.Ameer Faisal Zone 29 | 59, Jabal AL.Hadeed Zone 5 | 60, Jabal Al.Hussein Zone 1 | 61, Jabal Al.Qosoor Zone 1 | 62, Jabal Amman Zone 1 | 63, Jarash Zone 36 | 64, Jawa Zone 7 | 65, Juwaydeh Zone 14 | 66, Khalda Zone 18 | 67, Khreibt Al.Souk Zone 7 | 68, Ma'an Zone 31 | 69, Madaba Zone 34 | 70, Mafraq Zone 33 | 71, Marj Al.Hamam Zone 19 | 72, Marka Zone 6 | 73, Muqableen Zone 9 | 74, Muwaqqar Zone 28 | 75, Nadi Al.Sebaq Zone 6 | 76, Naur Zone 19 | 77, Petra Zone 30 | 78, Qatranah Zone 30 | 79, Raghadan Zone 1 | 80, Ras Al.Ein Zone 1 | 81, Rusayfah Zone 29 | 82, Saffout Zone 22 | 83, Sahab Zone 11 | 84, Salheyet Al.Abed Zone 6 | 85, Shafa Badran Zone 25 | 86, Sharq Al.Awsat Zone 11 | 87, Shemasani Zone 1 | 88, Street 30 Zone 5 | 89, Summaya Street Zone 5 | 90, Suweileh Zone 27 | 91, Tabarbour Zone 21 | 92, Tla'a al Ali Zone 17 | 93, Um Al.Heran Zone 11 | 94, Um Al.Summaq Zone 17 | 95, Um Nuwwara and Adan Zone 5 | 96, Um Uthayna Zone 1 | 97, Wadi Abdoun Zone 1 | 98, Wadi AL.Haddadeh Zone 1 | 99, Wadi AL.Hajar Zone 29 | 100, Wadi Al.Remam Zone 5 | 101, Wadi AL.Seer Zone 20 | 102, Wadi Saqra Zone 1 | 103, Wehdat Zone 11 | 104, Yadoudeh Zone 13 | 105, Yajouz Zone 26 | 106, Zarqa Zone 29 | 107, Zezya Zone 24"
In [20]:
zones = [z.strip().split(',') for z in zone_string.split('|')]
In [21]:
zone_dict = {int(n):int(s.strip().split(' ')[-1]) for n,s in zones}
In [22]:
hospitalized['zone'] = hospitalized.city_zone.replace(zone_dict)

Define Amman zone

In [23]:
hospitalized['amman_zone'] = hospitalized.zone<28

Hospitalized in Amman

In [24]:
hospitalized_amman = hospitalized[hospitalized.amman_zone]
In [25]:
hospitalized_amman.shape
Out[25]:
(3048, 433)
In [26]:
assert hospitalized_amman.index.is_unique
In [27]:
hosp_age_counts = hospitalized_amman.groupby('admission_date')[age_groups.columns].sum().resample('M').sum().values
In [28]:
pre_2013 = hospitalized_amman.admission_date <  datetime(2013, 1, 1)
In [29]:
age_counts = hospitalized_amman[pre_2013].groupby('admission_date')[age_groups.columns].sum().resample('M').sum()
age_counts
age_under_2 age_2_5 age_6_11 age_over_11
admission_date
2010-03-31 15 16 14 3
2010-04-30 17 22 14 6
2010-05-31 12 16 11 2
2010-06-30 21 6 7 3
2010-07-31 19 6 2 2
2010-08-31 7 8 3 3
2010-09-30 10 5 4 2
2010-10-31 5 5 4 4
2010-11-30 5 5 8 7
2010-12-31 7 8 5 10
2011-01-31 32 58 34 21
2011-02-28 54 67 39 24
2011-03-31 40 31 18 23
2011-04-30 23 37 26 12
2011-05-31 24 34 11 17
2011-06-30 25 17 13 9
2011-07-31 20 16 10 10
2011-08-31 21 7 0 3
2011-09-30 17 8 5 8
2011-10-31 20 9 6 10
2011-11-30 10 14 7 14
2011-12-31 23 21 21 18
2012-01-31 60 68 37 37
2012-02-29 55 83 39 25
2012-03-31 49 71 37 33
2012-04-30 34 43 32 23
2012-05-31 34 27 15 11
2012-06-30 23 21 15 10
2012-07-31 29 18 8 14
2012-08-31 21 11 7 5
2012-09-30 13 17 15 10
2012-10-31 16 20 4 9
2012-11-30 11 17 5 9
2012-12-31 43 46 31 17

Rates and demographics (via World Bank) and 2004 census data (via Jordan Department of Statistics).

In [30]:
# Jordan population, 2008-2012
population = 5786000, 5915000, 6046000, 6181000, 6318000

# Interpolated population by gender and age, 2010-2012
female_0 = 93649, 95739, 96486
male_0 = 98435, 100525, 101160
female_1 = 87941, 93511, 93067
male_1 = 92548, 98306, 97763

kids_0 = np.array(female_0) + np.array(male_0)
kids_1 = np.array(female_1) + np.array(male_1)
kids = kids_0 + kids_1

kids_under6mo = kids_6to12mo = kids_0/2.

# Proportion in Amman
amman_urban_2004 = 1784502.
jordan_2004 = 5103639.
amman_prop = amman_urban_2004 / jordan_2004

# Birth rates (per 1000)
birth_rate = 29.665, 29.322, 28.869, 28.317, 27.699

# Neonatal mortality (per 1000)
neonatal_mort = 12.7, 12.4, 12.1, 11.8, 11.5

# Infant mortality (per 1000)
infant_mort = 18.4, 17.9, 17.3, 16.8, 16.4
In [31]:
births = np.array(population[-3:])/1000. * birth_rate[-3:]
births
Out[31]:
array([ 174541.974,  175027.377,  175002.282])
In [32]:
births_6m = births/2.
births_6m
Out[32]:
array([ 87270.987 ,  87513.6885,  87501.141 ])
In [33]:
deaths_6m = births_6m/1000. * infant_mort[-3:]
deaths_6m
Out[33]:
array([ 1509.7880751,  1470.2299668,  1435.0187124])
In [34]:
amman_prop
Out[34]:
0.3496528653378501
In [35]:
(births_6m - deaths_6m)*amman_prop
Out[35]:
array([ 29986.6489389 ,  30085.34181971,  30093.26626638])
In [36]:
n = np.array((kids_under6mo, kids_6to12mo, kids_1, kids))
n
Out[36]:
array([[  96042.,   98132.,   98823.],
       [  96042.,   98132.,   98823.],
       [ 180489.,  191817.,  190830.],
       [ 372573.,  388081.,  388476.]])
In [37]:
kids_0
Out[37]:
array([192084, 196264, 197646])
In [38]:
kids_1/kids_0
Out[38]:
array([ 0.93963578,  0.97734174,  0.9655141 ])
In [39]:
amman_prop
Out[39]:
0.3496528653378501
In [40]:
n_amman = np.floor(n*amman_prop).astype(int)
In [41]:
n_amman
Out[41]:
array([[ 33581,  34312,  34553],
       [ 33581,  34312,  34553],
       [ 63108,  67069,  66724],
       [130271, 135693, 135831]])

Monthly admissions in Ammaon

In [42]:
admissions_by_month = hospitalized_amman.groupby('admission_date')['child_name'].count().resample('1M').sum()
In [43]:
admissions_by_month.head()
admission_date
2010-03-31    48
2010-04-30    59
2010-05-31    41
2010-06-30    37
2010-07-31    29
Freq: M, Name: child_name, dtype: int64
In [44]:
# Enrolling 5 days per week
p_enroll = 5./7

'age_under_2', 'age_2_5', 'age_6_11', 'age_over_11'

In [45]:
hospitalized_amman['admission_year'] = hospitalized_amman.admission_date.apply(lambda x: x.year)
_under_6 = hospitalized_amman[hospitalized_amman.age_under_2.astype(bool) | hospitalized_amman.age_2_5].groupby('admission_year')['child_name'].count()
/Users/fonnescj/anaconda3/envs/dev/lib/python3.6/site-packages/ipykernel/__main__.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
In [46]:
_6_11 = hospitalized_amman[hospitalized_amman.age_6_11.astype(bool)].groupby('admission_year')['child_name'].count()
In [47]:
_over_11 = hospitalized_amman[hospitalized_amman.age_over_11.astype(bool)].groupby('admission_year')['child_name'].count()
In [48]:
_all = hospitalized_amman.groupby('admission_year')['child_name'].count()
In [49]:
hospitalized_amman.set_index(hospitalized_amman.admission_date).groupby([pd.TimeGrouper('M')]).count()['mother_name']
admission_date
2010-03-31     48
2010-04-30     59
2010-05-31     41
2010-06-30     37
2010-07-31     29
2010-08-31     21
2010-09-30     21
2010-10-31     18
2010-11-30     25
2010-12-31     30
2011-01-31    145
2011-02-28    184
2011-03-31    112
2011-04-30     98
2011-05-31     84
2011-06-30     64
2011-07-31     56
2011-08-31     31
2011-09-30     38
2011-10-31     45
2011-11-30     45
2011-12-31     83
2012-01-31    202
2012-02-29    202
2012-03-31    190
2012-04-30    132
2012-05-31     87
2012-06-30     69
2012-07-31     68
2012-08-31     44
2012-09-30     55
2012-10-31     49
2012-11-30     42
2012-12-31    137
2013-01-31    179
2013-02-28    158
2013-03-31    117
Name: mother_name, dtype: int64
In [50]:
diagnosis_df = pd.concat([_under_6, _6_11, _over_11, _all], axis=1)
diagnosis_df.columns = ('under 6 mo.', '6-11 mo.', '11-23 mo.', 'all under 2 yr.')
diagnosis_df
under 6 mo. 6-11 mo. 11-23 mo. all under 2 yr.
admission_year
2010 215 72 42 329
2011 628 190 169 987
2012 830 245 203 1278
2013 298 98 58 454

Use proportion in Amman to calculate proportion of kids in each age group and year in Amman

In [59]:
amman_prop
Out[59]:
0.3496528653378501
In [65]:
n.T.ravel()
Out[65]:
array([  96042.,   96042.,  180489.,  372573.,   98132.,   98132.,
        191817.,  388081.,   98823.,   98823.,  190830.,  388476.])
In [60]:
n[:-1]
Out[60]:
array([[  96042.,   98132.,   98823.],
       [  96042.,   98132.,   98823.],
       [ 180489.,  191817.,  190830.],
       [ 372573.,  388081.,  388476.]])
In [72]:
from theano import shared

def rate_model(diagnosis):
    
    # Extract data subset for passed virus
    diagnosis_subset = hospitalized_amman[hospitalized_amman[diagnosis]==1].copy()
    
    # Create data frame of age x year counts
    u6 = diagnosis_subset.age_under_2.astype(bool) | diagnosis_subset.age_2_5
    _under_6 = diagnosis_subset[u6].groupby('virus_year')['child_name'].count()
    _6_11 = diagnosis_subset[diagnosis_subset.age_6_11.astype(bool)].groupby('virus_year')['child_name'].count()
    _over_11 = diagnosis_subset[diagnosis_subset.age_over_11.astype(bool)].groupby('virus_year')['child_name'].count()
    _all = diagnosis_subset.groupby('virus_year')['child_name'].count()
    
    diagnosis_df = pd.concat([_under_6, _6_11, _over_11, _all], axis=1)
    diagnosis_df.columns = ('under 6 mo.', '6-11 mo.', '11-23 mo.', 'all ages')
    
    diagnosis_array = diagnosis_df.values.ravel()
    print(diagnosis_array)
    kids = n.T.ravel()
    print(kids)
    
    with pm.Model() as model:
        
        # Al Bashir hospital market share
        market_share = pm.Uniform('market_share', 0.5, 0.6)

        # Correct for 5 days of enrollment per week
        p_hosp = market_share * (5./7)

        # Number of 1 y.o. in Amman
        n_amman = pm.Binomial('n_amman', kids, amman_prop, shape=kids.shape)
#         rng = tt.shared_randomstreams.RandomStreams()
#         n_amman = rng.binomial(n=kids.astype(int), p=amman_prop, size=kids.shape)

        # Prior probability
        prev_diag = pm.Beta('prev_diag', 1, 5, shape=diagnosis_array.size)
        per_1000 = pm.Deterministic('per_10000', prev_diag*10000)

        # Infected count in Amman
        y_amman = pm.Binomial('y_amman', n_amman, prev_diag, shape=diagnosis_df.size)
#         y_amman = rng.binomial(n=n_amman, p=prev_diag, size=(diagnosis_df.size,))

        # Likelihood for number with virus in hospital (assumes Pr(hosp | virus) = 1)
        pm.Binomial('y_hosp', y_amman, p_hosp, observed=diagnosis_array)

    
    return model

Pneumonia rates

In [73]:
niterations = 100000
nkeep = 10000
In [74]:
with rate_model('pneumonia') as pneumo_model:
    trace_pneumo = pm.sample(niterations, step=pm.Metropolis(), njobs=2)
[ 56  13  11  80  75  17  32 124 117  24  30 171]
[  96042.   96042.  180489.  372573.   98132.   98132.  191817.  388081.
   98823.   98823.  190830.  388476.]
/Users/fonnescj/Repos/pymc3/pymc3/sampling.py:166: UserWarning: Instantiated step methods cannot be automatically initialized. init argument ignored.
  warnings.warn('Instantiated step methods cannot be automatically initialized. init argument ignored.')
100%|██████████| 100000/100000 [04:32<00:00, 366.87it/s]
In [75]:
prevalence_labels = ['%s %s' % (year, age) for year in ('2011', '2012', '2013') 
                     for age in ('under 6 mo.', '6-11 mo.', '12-23 mo.', 'all ages')]
In [76]:
pm.forestplot(trace_pneumo[-nkeep:], varnames=['per_10000'], 
              ylabels=prevalence_labels, main='Pneumonia (per 10000)')
Out[76]:
<matplotlib.gridspec.GridSpec at 0x11d8d9748>
In [77]:
pm.summary(trace_pneumo[-nkeep:], varnames=['per_10000'])
per_10000:

  Mean             SD               MC Error         95% HPD interval
  -------------------------------------------------------------------
  
  45.556           6.330            0.485            [34.138, 58.135]
  11.773           2.724            0.229            [6.757, 17.055]
  4.956            1.582            0.140            [2.496, 8.052]
  16.613           1.811            0.139            [13.030, 20.058]
  58.715           7.633            0.617            [43.439, 72.344]
  14.124           3.327            0.289            [8.041, 21.096]
  13.498           2.351            0.187            [9.319, 18.422]
  24.544           2.333            0.182            [20.307, 29.432]
  91.497           9.589            0.772            [72.088, 109.119]
  19.545           3.990            0.327            [11.782, 26.846]
  12.403           2.219            0.178            [8.441, 17.394]
  33.697           2.464            0.178            [29.010, 38.656]

  Posterior quantiles:
  2.5            25             50             75             97.5
  |--------------|==============|==============|--------------|
  
  34.493         41.023         45.017         49.701         58.815
  7.082          9.725          11.507         13.680         17.400
  2.669          3.771          4.754          5.899          8.821
  13.148         15.350         16.531         17.825         20.279
  44.176         53.361         58.889         63.988         73.693
  8.300          11.828         13.839         16.185         21.471
  9.567          11.821         13.299         14.944         18.681
  20.071         22.945         24.485         26.069         29.309
  72.755         84.863         91.805         98.158         110.056
  12.562         16.631         19.308         22.077         28.472
  8.366          10.936         12.236         13.751         17.351
  28.888         32.045         33.633         35.383         38.582

In [85]:
pneumo_df = pm.df_summary(trace_pneumo[-nkeep:], varnames=['per_10000'], )
pneumo_df.index = prevalence_labels
In [86]:
pneumo_df
mean sd mc_error hpd_2.5 hpd_97.5
2011 under 6 mo. 45.556492 6.330157 0.485433 34.137976 58.134535
2011 6-11 mo. 11.773122 2.724259 0.229005 6.757460 17.055463
2011 12-23 mo. 4.956200 1.582006 0.139846 2.496128 8.052311
2011 all ages 16.612553 1.810870 0.139455 13.029766 20.057730
2012 under 6 mo. 58.714962 7.633362 0.616922 43.439193 72.343841
2012 6-11 mo. 14.124459 3.327001 0.288640 8.040512 21.095547
2012 12-23 mo. 13.497997 2.350882 0.187344 9.319081 18.421821
2012 all ages 24.544155 2.332945 0.181614 20.307164 29.431807
2013 under 6 mo. 91.496812 9.588543 0.771705 72.087852 109.118532
2013 6-11 mo. 19.544625 3.990094 0.327031 11.781594 26.846234
2013 12-23 mo. 12.403117 2.218972 0.178091 8.441360 17.393714
2013 all ages 33.696513 2.463959 0.178474 29.010174 38.655915
In [93]:
pneumo_df.to_csv('pneumonia_per_10000.csv')

Brochopneumonia rates

In [89]:
with rate_model('brochopneumonia') as bpneumo_model:
    trace_bpneumo = pm.sample(niterations, njobs=2)
[ 86 106  78 270 142 124 108 374 118 128  92 338]
[  96042.   96042.  180489.  372573.   98132.   98132.  191817.  388081.
   98823.   98823.  190830.  388476.]
Assigned NUTS to market_share_interval_
Assigned Metropolis to n_amman
Assigned NUTS to prev_diag_logodds_
Assigned Metropolis to y_amman
100%|██████████| 100000/100000 [17:14<00:00, 96.68it/s]
In [90]:
pm.forestplot(trace_bpneumo[-nkeep:], varnames=['per_10000'], 
              ylabels=prevalence_labels, main='Bronchopneumonia (per 10000)')
Out[90]:
<matplotlib.gridspec.GridSpec at 0x12ac47cc0>
In [91]:
pm.summary(trace_bpneumo[-nkeep:], varnames=['per_10000'])
per_10000:

  Mean             SD               MC Error         95% HPD interval
  -------------------------------------------------------------------
  
  69.036           7.904            0.428            [53.418, 84.091]
  83.219           9.649            0.626            [64.624, 101.797]
  33.071           4.064            0.225            [25.284, 41.197]
  55.128           4.178            0.325            [46.920, 63.393]
  109.649          10.930           0.748            [89.261, 131.856]
  95.231           9.786            0.652            [76.515, 114.594]
  42.744           4.830            0.307            [33.168, 52.024]
  73.140           5.424            0.458            [62.443, 83.150]
  91.293           9.427            0.585            [72.894, 109.471]
  97.433           10.067           0.665            [78.771, 117.983]
  36.820           4.181            0.240            [28.748, 45.051]
  65.432           4.763            0.390            [56.137, 74.144]

  Posterior quantiles:
  2.5            25             50             75             97.5
  |--------------|==============|==============|--------------|
  
  54.030         63.550         68.860         74.318         84.803
  65.226         76.381         83.086         89.742         102.493
  25.740         30.203         32.856         35.650         41.852
  46.834         52.398         55.109         57.879         63.354
  88.521         102.149        109.577        117.040        131.274
  76.536         88.428         95.082         101.865        114.642
  33.736         39.381         42.587         45.950         52.640
  62.148         69.467         73.437         77.042         82.899
  73.757         84.817         91.003         97.485         110.434
  78.130         90.435         97.267         104.230        117.413
  29.020         33.909         36.670         39.593         45.360
  55.992         62.030         65.723         68.860         74.040

In [92]:
bpneumo_df = pm.df_summary(trace_bpneumo[-nkeep:], varnames=['per_10000'], )
bpneumo_df.index = prevalence_labels
In [97]:
bpneumo_df.to_csv('bronchopneumonia_per_10000.csv')

Bronchiolitis rates

In [95]:
with rate_model('bronchiolitis') as bronchiolitis_model:
    trace_bronchiolitis = pm.sample(niterations, njobs=2)
[142  27   2 171 149  36   9 194 127  29   7 163]
[  96042.   96042.  180489.  372573.   98132.   98132.  191817.  388081.
   98823.   98823.  190830.  388476.]
Assigned NUTS to market_share_interval_
Assigned Metropolis to n_amman
Assigned NUTS to prev_diag_logodds_
Assigned Metropolis to y_amman
100%|██████████| 100000/100000 [26:50<00:00, 62.10it/s] 
In [98]:
pm.forestplot(trace_bronchiolitis[-nkeep:], varnames=['per_10000'], ylabels=prevalence_labels, 
              main='Bronchiolitis (per 10000)')
Out[98]:
<matplotlib.gridspec.GridSpec at 0x12f188630>
In [99]:
pm.summary(trace_bronchiolitis[-nkeep:], varnames=['per_10000'])
per_10000:

  Mean             SD               MC Error         95% HPD interval
  -------------------------------------------------------------------
  
  113.751          8.321            0.526            [98.144, 130.298]
  22.115           4.086            0.228            [14.264, 30.072]
  1.290            0.751            0.027            [0.124, 2.778]
  35.178           2.897            0.227            [29.865, 41.214]
  116.663          10.889           0.884            [96.628, 138.553]
  28.758           4.938            0.316            [19.685, 38.441]
  4.025            1.255            0.059            [1.773, 6.544]
  39.013           3.251            0.269            [32.710, 45.437]
  99.030           9.092            0.693            [80.752, 116.585]
  23.172           4.245            0.252            [15.233, 31.630]
  3.202            1.144            0.046            [1.153, 5.484]
  32.641           3.244            0.279            [26.752, 39.377]

  Posterior quantiles:
  2.5            25             50             75             97.5
  |--------------|==============|==============|--------------|
  
  98.199         108.028        113.540        119.318        130.392
  14.594         19.219         21.966         24.825         30.462
  0.268          0.735          1.140          1.702          3.107
  29.972         33.124         35.008         37.038         41.351
  96.767         109.021        116.022        123.982        138.777
  20.451         25.344         28.302         31.637         39.661
  1.945          3.137          3.886          4.785          6.860
  32.544         36.831         38.996         41.267         45.277
  81.507         92.812         98.908         105.014        117.594
  15.819         20.185         22.807         25.793         32.536
  1.354          2.371          3.062          3.890          5.808
  27.078         30.308         32.411         34.590         39.865

In [100]:
bronchiolitis_df = pm.df_summary(trace_bronchiolitis[-nkeep:], varnames=['per_10000'], )
bronchiolitis_df.index = prevalence_labels
In [101]:
bronchiolitis_df.to_csv('bronchiolitis_per_10000.csv')