%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
import seaborn as sb
hospitalized = pd.read_csv('data/hospitalized.csv', index_col=0)
/Users/fonnescj/anaconda3/envs/dev/lib/python3.5/site-packages/IPython/core/interactiveshell.py:2717: DtypeWarning: Columns (140,142,144,146,148,181,206,212,213,262,281,282,283,297,298) have mixed types. Specify dtype option on import or set low_memory=False. interactivity=interactivity, compiler=compiler, result=result)
hospitalized.shape
(3168, 408)
hospitalized['pcr_growth'] = (hospitalized[hospitalized.columns[hospitalized.columns.str.startswith('pcr_result')]]
.sum(1)==0).replace({False:'Growth', True:'No Growth'})
by_growth = hospitalized.groupby('pcr_growth')
Plot of age by PCR growth status
hospitalized.hist('age_months', by='pcr_growth', sharex=True)
array([<matplotlib.axes._subplots.AxesSubplot object at 0x110f0e0f0>, <matplotlib.axes._subplots.AxesSubplot object at 0x1136cc390>], dtype=object)
Plot of length of stay by PCR growth
hospitalized.hist('length_of_stay', by='pcr_growth', sharex=True)
array([<matplotlib.axes._subplots.AxesSubplot object at 0x111fc70f0>, <matplotlib.axes._subplots.AxesSubplot object at 0x113c6b080>], dtype=object)
Higher sepsis and blood culture rates for no growth individuals
by_growth[['adm_pneumo', 'adm_bronchopneumo', 'adm_sepsis', 'adm_bronchiolitis', 'blood_culture']].mean()
adm_pneumo | adm_bronchopneumo | adm_sepsis | adm_bronchiolitis | blood_culture | |
---|---|---|---|---|---|
pcr_growth | |||||
Growth | 0.135994 | 0.343278 | 0.235568 | 0.194886 | 0.423922 |
No Growth | 0.073254 | 0.228279 | 0.497445 | 0.074957 | 0.576068 |
Bacteria positives
by_growth[['blood_acintobacter',
'blood_alcaligenese',
'blood_candida',
'blood_ecoli',
'blood_klebsiella',
'blood_pneumo',
'blood_mening',
'blood_staph',
'blood_strep',
'blood_other_gram_neg']].mean().round(3)
blood_acintobacter | blood_alcaligenese | blood_candida | blood_ecoli | blood_klebsiella | blood_pneumo | blood_mening | blood_staph | blood_strep | blood_other_gram_neg | |
---|---|---|---|---|---|---|---|---|---|---|
pcr_growth | ||||||||||
Growth | 0.375 | 0.167 | 0.143 | 0.000 | 0.167 | 0.014 | 0.027 | 0.123 | 0.571 | 0.167 |
No Growth | 0.000 | 0.000 | 0.000 | 0.286 | 0.429 | 0.048 | 0.000 | 0.143 | 0.300 | 0.000 |
by_sepsis = hospitalized.groupby('adm_sepsis')
pcr_lookup = {'pcr_result___1': 'RSV',
'pcr_result___2': 'HMPV',
'pcr_result___3': 'flu A',
'pcr_result___4': 'flu B',
'pcr_result___5': 'rhino',
'pcr_result___6': 'PIV1',
'pcr_result___7': 'PIV2',
'pcr_result___8': 'PIV3',
'pcr_result___13': 'H1N1',
'pcr_result___14': 'H3N2',
'pcr_result___15': 'Swine',
'pcr_result___16': 'Swine H1',
'pcr_result___17': 'flu C',
'pcr_result___18': 'Adeno'}
hospitalized['RSV'] = hospitalized.pcr_result___1.astype(int)
hospitalized['HMPV'] = hospitalized.pcr_result___2.astype(int)
hospitalized['Rhino'] = hospitalized.pcr_result___5.astype(int)
hospitalized['Influenza'] = (hospitalized.pcr_result___3 | hospitalized.pcr_result___4).astype(int)
hospitalized['Adeno'] = hospitalized.pcr_result___18.astype(int)
hospitalized['PIV'] = (hospitalized.pcr_result___6 | hospitalized.pcr_result___7 | hospitalized.pcr_result___8).astype(int)
hospitalized['No virus'] = (hospitalized[list(pcr_lookup.keys())].sum(1) == 0).astype(int)
Risk factors by sepsis status
by_sepsis[['age_months', 'icu', 'length_of_stay', 'oxygen', 'no_growth', 'death',
'RSV', 'Adeno', 'PIV', 'Influenza', 'Rhino']].describe()
age_months | icu | length_of_stay | oxygen | RSV | Adeno | PIV | Influenza | Rhino | ||
---|---|---|---|---|---|---|---|---|---|---|
adm_sepsis | ||||||||||
0 | count | 2268.000000 | 2242.000000 | 2245.000000 | 2243.000000 | 2268.000000 | 2268.000000 | 2268.000000 | 2268.000000 | 2268.000000 |
mean | 6.932981 | 0.063336 | 4.912695 | 0.362015 | 0.507055 | 0.164021 | 0.055996 | 0.031746 | 0.383598 | |
std | 5.636301 | 0.243621 | 3.839174 | 0.480690 | 0.500060 | 0.370376 | 0.229966 | 0.175362 | 0.486369 | |
min | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | |
25% | 2.000000 | 0.000000 | 2.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | |
50% | 5.000000 | 0.000000 | 4.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | |
75% | 10.000000 | 0.000000 | 6.000000 | 1.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | |
max | 23.000000 | 1.000000 | 47.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | |
1 | count | 900.000000 | 894.000000 | 894.000000 | 894.000000 | 900.000000 | 900.000000 | 900.000000 | 900.000000 | 900.000000 |
mean | 1.078889 | 0.116331 | 7.295302 | 0.224832 | 0.274444 | 0.114444 | 0.053333 | 0.031111 | 0.408889 | |
std | 1.317039 | 0.320801 | 4.054506 | 0.417705 | 0.446482 | 0.318527 | 0.224822 | 0.173715 | 0.491902 | |
min | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | |
25% | 0.000000 | 0.000000 | 5.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | |
50% | 1.000000 | 0.000000 | 7.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | |
75% | 2.000000 | 0.000000 | 9.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | |
max | 14.000000 | 1.000000 | 42.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |