In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
import seaborn as sb
In [76]:
hospitalized = pd.read_csv('data/hospitalized.csv', index_col=0)
/Users/fonnescj/anaconda3/envs/dev/lib/python3.5/site-packages/IPython/core/interactiveshell.py:2717: DtypeWarning: Columns (140,142,144,146,148,181,206,212,213,262,281,282,283,297,298) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)
In [3]:
hospitalized.shape
Out[3]:
(3168, 408)

Virus-negative individuals

In [39]:
hospitalized['pcr_growth'] = (hospitalized[hospitalized.columns[hospitalized.columns.str.startswith('pcr_result')]]
                                     .sum(1)==0).replace({False:'Growth', True:'No Growth'})
In [45]:
by_growth = hospitalized.groupby('pcr_growth')

Plot of age by PCR growth status

In [42]:
hospitalized.hist('age_months', by='pcr_growth', sharex=True)
Out[42]:
array([<matplotlib.axes._subplots.AxesSubplot object at 0x110f0e0f0>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x1136cc390>], dtype=object)

Plot of length of stay by PCR growth

In [43]:
hospitalized.hist('length_of_stay', by='pcr_growth', sharex=True)
Out[43]:
array([<matplotlib.axes._subplots.AxesSubplot object at 0x111fc70f0>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x113c6b080>], dtype=object)

Higher sepsis and blood culture rates for no growth individuals

In [46]:
by_growth[['adm_pneumo', 'adm_bronchopneumo', 'adm_sepsis', 'adm_bronchiolitis', 'blood_culture']].mean()
Out[46]:
adm_pneumo adm_bronchopneumo adm_sepsis adm_bronchiolitis blood_culture
pcr_growth
Growth 0.135994 0.343278 0.235568 0.194886 0.423922
No Growth 0.073254 0.228279 0.497445 0.074957 0.576068

Bacteria positives

In [49]:
by_growth[['blood_acintobacter',
 'blood_alcaligenese',
 'blood_candida',
 'blood_ecoli',
 'blood_klebsiella',
 'blood_pneumo',
 'blood_mening',
 'blood_staph',
 'blood_strep',
 'blood_other_gram_neg']].mean().round(3)
Out[49]:
blood_acintobacter blood_alcaligenese blood_candida blood_ecoli blood_klebsiella blood_pneumo blood_mening blood_staph blood_strep blood_other_gram_neg
pcr_growth
Growth 0.375 0.167 0.143 0.000 0.167 0.014 0.027 0.123 0.571 0.167
No Growth 0.000 0.000 0.000 0.286 0.429 0.048 0.000 0.143 0.300 0.000
In [68]:
by_sepsis = hospitalized.groupby('adm_sepsis')
In [74]:
pcr_lookup = {'pcr_result___1': 'RSV',
'pcr_result___2': 'HMPV',
'pcr_result___3': 'flu A',
'pcr_result___4': 'flu B',
'pcr_result___5': 'rhino',
'pcr_result___6': 'PIV1',
'pcr_result___7': 'PIV2',
'pcr_result___8': 'PIV3',
'pcr_result___13': 'H1N1',
'pcr_result___14': 'H3N2',
'pcr_result___15': 'Swine',
'pcr_result___16': 'Swine H1',
'pcr_result___17': 'flu C',
'pcr_result___18': 'Adeno'}

hospitalized['RSV'] = hospitalized.pcr_result___1.astype(int)
hospitalized['HMPV'] = hospitalized.pcr_result___2.astype(int)
hospitalized['Rhino'] = hospitalized.pcr_result___5.astype(int)
hospitalized['Influenza'] = (hospitalized.pcr_result___3 | hospitalized.pcr_result___4).astype(int)
hospitalized['Adeno'] = hospitalized.pcr_result___18.astype(int)
hospitalized['PIV'] = (hospitalized.pcr_result___6 | hospitalized.pcr_result___7 | hospitalized.pcr_result___8).astype(int)
hospitalized['No virus'] = (hospitalized[list(pcr_lookup.keys())].sum(1) == 0).astype(int)

Risk factors by sepsis status

In [75]:
by_sepsis[['age_months', 'icu', 'length_of_stay', 'oxygen', 'no_growth', 'death',
          'RSV', 'Adeno', 'PIV', 'Influenza', 'Rhino']].describe()
Out[75]:
age_months icu length_of_stay oxygen RSV Adeno PIV Influenza Rhino
adm_sepsis
0 count 2268.000000 2242.000000 2245.000000 2243.000000 2268.000000 2268.000000 2268.000000 2268.000000 2268.000000
mean 6.932981 0.063336 4.912695 0.362015 0.507055 0.164021 0.055996 0.031746 0.383598
std 5.636301 0.243621 3.839174 0.480690 0.500060 0.370376 0.229966 0.175362 0.486369
min 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
25% 2.000000 0.000000 2.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
50% 5.000000 0.000000 4.000000 0.000000 1.000000 0.000000 0.000000 0.000000 0.000000
75% 10.000000 0.000000 6.000000 1.000000 1.000000 0.000000 0.000000 0.000000 1.000000
max 23.000000 1.000000 47.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000
1 count 900.000000 894.000000 894.000000 894.000000 900.000000 900.000000 900.000000 900.000000 900.000000
mean 1.078889 0.116331 7.295302 0.224832 0.274444 0.114444 0.053333 0.031111 0.408889
std 1.317039 0.320801 4.054506 0.417705 0.446482 0.318527 0.224822 0.173715 0.491902
min 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
25% 0.000000 0.000000 5.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
50% 1.000000 0.000000 7.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
75% 2.000000 0.000000 9.000000 0.000000 1.000000 0.000000 0.000000 0.000000 1.000000
max 14.000000 1.000000 42.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000