#!/usr/bin/env python # coding: utf-8 # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') import pandas as pd import numpy as np import matplotlib.pyplot as plt from datetime import datetime import seaborn as sb import pymc as pm sb.set_style("white") # In[2]: hospitalized = pd.read_csv('data/hospitalized.csv', index_col=0) hospitalized.head() # Convert dates # In[3]: hospitalized.child_birth_date = pd.to_datetime(hospitalized.child_birth_date) hospitalized.enrollment_date = pd.to_datetime(hospitalized.enrollment_date) hospitalized.admission_date = pd.to_datetime(hospitalized.admission_date) hospitalized.discharge_date = pd.to_datetime(hospitalized.discharge_date) # Assign virus year # In[10]: hospitalized['virus_year'] = 2011 hospitalized.loc[(hospitalized.admission_date >= '2011-03-31') & (hospitalized.admission_date <= '2012-03-31'), 'virus_year'] = 2012 hospitalized.loc[hospitalized.admission_date > '2012-03-31', 'virus_year'] = 2013 hospitalized.virus_year.value_counts() # Extract RSV subset # In[72]: hospitalized['RSV'] = hospitalized['pcr_result___1'] RSV_subset = hospitalized[hospitalized.RSV==1] RSV_subset.death.sum() # In[78]: RSV_subset.icu.mean() # In[73]: deaths = RSV_subset[RSV_subset.death==1] # Dictionary to hold samples # In[79]: random_sample = {2011: [], 2012: [], 2013: []} # In[80]: for i,d in deaths.iterrows(): random_sample[d.virus_year].append(i) # In[81]: for year in random_sample: year_subset = RSV_subset[RSV_subset.virus_year==year] n_required = 31 - len(random_sample[year]) # Determine number in each group n_oxygen = int(n_required * 0.4) n_vent_icu = int(n_required * 0.1) n_none = n_required - n_oxygen - n_vent # Mechanical vent or ICU patients random_sample[year] += year_subset[(year_subset.vent==1) | (year_subset.icu==1)].sample(n=n_vent).index.values.tolist() # Oxygen patients random_sample[year] += year_subset[(year_subset.oxygen==1) & (year_subset.vent==0)].sample(n=n_oxygen).index.values.tolist() # No oxygen or ventilator random_sample[year] += year_subset[(year_subset.oxygen==0) & (year_subset.vent==0)].sample(n=n_none).index.values.tolist() # Here is the sample from each study year # In[82]: random_sample