#!/usr/bin/env python # coding: utf-8 # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') import pandas as pd import numpy as np import matplotlib.pyplot as plt from datetime import datetime import seaborn as sb # In[76]: hospitalized = pd.read_csv('data/hospitalized.csv', index_col=0) # In[3]: hospitalized.shape # ## Virus-negative individuals # In[39]: hospitalized['pcr_growth'] = (hospitalized[hospitalized.columns[hospitalized.columns.str.startswith('pcr_result')]] .sum(1)==0).replace({False:'Growth', True:'No Growth'}) # In[45]: by_growth = hospitalized.groupby('pcr_growth') # Plot of age by PCR growth status # In[42]: hospitalized.hist('age_months', by='pcr_growth', sharex=True) # Plot of length of stay by PCR growth # In[43]: hospitalized.hist('length_of_stay', by='pcr_growth', sharex=True) # Higher sepsis and blood culture rates for no growth individuals # In[46]: by_growth[['adm_pneumo', 'adm_bronchopneumo', 'adm_sepsis', 'adm_bronchiolitis', 'blood_culture']].mean() # Bacteria positives # In[49]: by_growth[['blood_acintobacter', 'blood_alcaligenese', 'blood_candida', 'blood_ecoli', 'blood_klebsiella', 'blood_pneumo', 'blood_mening', 'blood_staph', 'blood_strep', 'blood_other_gram_neg']].mean().round(3) # In[68]: by_sepsis = hospitalized.groupby('adm_sepsis') # In[74]: pcr_lookup = {'pcr_result___1': 'RSV', 'pcr_result___2': 'HMPV', 'pcr_result___3': 'flu A', 'pcr_result___4': 'flu B', 'pcr_result___5': 'rhino', 'pcr_result___6': 'PIV1', 'pcr_result___7': 'PIV2', 'pcr_result___8': 'PIV3', 'pcr_result___13': 'H1N1', 'pcr_result___14': 'H3N2', 'pcr_result___15': 'Swine', 'pcr_result___16': 'Swine H1', 'pcr_result___17': 'flu C', 'pcr_result___18': 'Adeno'} hospitalized['RSV'] = hospitalized.pcr_result___1.astype(int) hospitalized['HMPV'] = hospitalized.pcr_result___2.astype(int) hospitalized['Rhino'] = hospitalized.pcr_result___5.astype(int) hospitalized['Influenza'] = (hospitalized.pcr_result___3 | hospitalized.pcr_result___4).astype(int) hospitalized['Adeno'] = hospitalized.pcr_result___18.astype(int) hospitalized['PIV'] = (hospitalized.pcr_result___6 | hospitalized.pcr_result___7 | hospitalized.pcr_result___8).astype(int) hospitalized['No virus'] = (hospitalized[list(pcr_lookup.keys())].sum(1) == 0).astype(int) # Risk factors by sepsis status # In[75]: by_sepsis[['age_months', 'icu', 'length_of_stay', 'oxygen', 'no_growth', 'death', 'RSV', 'Adeno', 'PIV', 'Influenza', 'Rhino']].describe()