In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Set some Pandas options
pd.set_option('display.notebook_repr_html', False)
pd.set_option('display.max_columns', 40)
pd.set_option('display.max_rows', 100)

Import data from Excel, using RunID as an index.

In [9]:
flu = pd.read_excel("Data/Flu.xlsx", "Flu", index_col='RunID', na_values=['V04.82','v03.81'])
diagnoses = pd.read_excel("Data/Flu.xlsx", "Diagnoses")
complications = pd.read_excel("Data/Flu.xlsx", "Complications")
organisms = pd.read_excel("Data/Flu.xlsx", "Organisms")
runs_year = pd.read_excel("Data/Flu.xlsx", "Runs by Year")
# runs_sex = pd.read_excel("Data/Flu.xlsx", "Runs by Year and Sex")
# runs_race = pd.read_excel("Data/Flu.xlsx", "Runs by Year and Race")
# Average age of patients on ECMO
year_days = pd.read_csv("Data/year_days.csv", index_col='YearEcls')

support = pd.read_excel("Data/Flu.xlsx", "Pre-ECLS Support")
survived_year = pd.read_csv("Data/survived.csv")
In [10]:
flu.columns
Out[10]:
Index(['PatientID', 'RunNo', 'AgeDays', 'HoursECMO', 'SupportType',
       'PrimaryDx', 'Mode', 'Discontinuation', 'DischargedAlive',
       'DischargeLocation', 'YearECLS', 'VentType', 'Rate', 'FiO2', 'PIP',
       'PEEP', 'MAP', 'HandBagging', 'pH', 'PCO2', 'PO2', 'HCO3', 'SaO2',
       'Venttype24', 'Rate24', 'Fio224', 'PIP24', 'PEEP24', 'MAP24',
       'Handbagging24', 'SBP', 'DBP', 'MapHemo', 'SVO2', 'PCWP', 'SPAP',
       'DPAP', 'MPAP', 'CI', 'Race', 'Sex', 'AdmitToTimeOnHours',
       'TimeOffToExtubationDateHours', 'TimeOffToDeathDateHours',
       'TimeOffToDCDateHours', 'ExtubationToDCDateHours',
       'ExtubationToDeathDateHours'],
      dtype='object')
In [11]:
(year_days/365).plot(legend=False, grid=False)
plt.xlabel('Year'); plt.ylabel('Age (years)')
plt.xlim(1992, 2013);

Number of patients inf flu dataset

In [12]:
flu.PatientID.unique().size
Out[12]:
1712

Convert year to integer value

In [13]:
flu['year'] = flu.YearECLS.astype(int)

Constrain data to study years

In [14]:
flu = flu[(flu.YearECLS>=1992) & (flu.YearECLS<=2012)]
flu.PatientID.unique().size
Out[14]:
922
In [15]:
runs_year = runs_year[(runs_year.YearEcls>=1992) & (runs_year.YearEcls<=2012)]

Create hours on ventilator variable

In [16]:
flu['HoursVent'] = flu.HoursECMO + flu.TimeOffToExtubationDateHours.fillna(0)

Complications

Divide into mechanical, hemorragic, renal, cardiovascular, pulmonary, neural (ignore others).

In [17]:
complications.head()
Out[17]:
                                  RunID  ComplicationCode  \
0  69CC56CD-208B-4C05-8E3F-002A6541F768               111   
1  69CC56CD-208B-4C05-8E3F-002A6541F768               312   
2  69CC56CD-208B-4C05-8E3F-002A6541F768               401   
3  69CC56CD-208B-4C05-8E3F-002A6541F768               502   
4  69CC56CD-208B-4C05-8E3F-002A6541F768               514   

                                         Description  
0                      Mechanical: Clots: oxygenator  
1               Neurologic: Seizures: EEG determined  
2                        Renal: Creatinine 1.5 - 3.0  
3                       Cardiovascular: CPR required  
4  Cardiovascular: Hypertension requiring vasodil...  
In [18]:
complications['complication_type'] = complications.Description.apply(lambda x: x[:x.index(':')])
In [19]:
complications.complication_type.value_counts()
Out[19]:
Cardiovascular    1471
Renal             1368
Mechanical        1142
Hemorrhagic        922
Metabolic          612
Infectious         478
Pulmonary          399
Neurologic         275
Limb                28
dtype: int64

Age Summary

Convert age to years

In [20]:
flu['AgeYears'] = flu.AgeDays/365.

Age distribution of flu data.

In [21]:
(flu.drop_duplicates('PatientID').AgeYears).hist()
Out[21]:
<matplotlib.axes._subplots.AxesSubplot at 0x110e7b978>

Create age classes:

  • neonatal: <29 days
  • pediatric 29 days - 17 years
  • adult >17 years
In [22]:
flu['age_class'] = 'pediatric'
flu.age_class[flu.AgeDays<29] = 'neonatal'
flu.age_class[flu.AgeYears>17] = 'adult'
/usr/local/lib/python3.4/site-packages/IPython/kernel/__main__.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from IPython.kernel.zmq import kernelapp as app
/usr/local/lib/python3.4/site-packages/IPython/kernel/__main__.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()

Here is the proportion in each class:

In [23]:
flu.age_class.value_counts(True)
Out[23]:
adult        0.570222
pediatric    0.403379
neonatal     0.026399
dtype: float64

Distribution by race and sex:

In [24]:
pd.crosstab(flu.Race, flu.Sex).sort_index(by='F', ascending=False)
Out[24]:
Sex     F    M
Race          
W     271  315
A      47   68
B      43   41
H      38   49
O      25   32

Diagnoses

Proportion with flu as PrimaryDx (ICD9 codes 487 to (but not including) 489) from diagnosis dataset:

In [25]:
primary_dx = diagnoses.ICD9Code[diagnoses.PrimaryDiagnosis==1]
primary_flu = primary_dx.apply(lambda x: x.startswith('487') or x.startswith('488'))

Proportion and number with flu as primary diagnosis

In [26]:
primary_flu.mean()
Out[26]:
0.41896551724137931
In [27]:
primary_flu.sum()
Out[27]:
729

The same calculation using the flu dataset:

In [32]:
flu.PrimaryDx = flu.PrimaryDx.astype(float)
In [33]:
((flu.PrimaryDx >= 487) & (flu.PrimaryDx < 489)).mean()
Out[33]:
0.43505807814149949
In [34]:
((flu.PrimaryDx >= 487) & (flu.PrimaryDx < 489)).sum()
Out[34]:
412

Here is a plot of the frequencies of secondary diagnoses (frequencies >5):

In [35]:
secondary_diag_count = diagnoses.ICD9Code[diagnoses.PrimaryDiagnosis==0].value_counts()
secondary_diag_count[secondary_diag_count>5].plot(kind='bar', figsize=(16,6), grid=False)
Out[35]:
<matplotlib.axes._subplots.AxesSubplot at 0x1106b5be0>
518.83 chronic respiratory failure
491* obstructive chronic bronchitis 
493 asthma
In [36]:
pulmonary_codes = '518.83', '491', '493'
In [37]:
diagnoses.ICD9Code.isin(pulmonary_codes).mean()
Out[37]:
0.0037817396002160996
746* congenital heart disease
428* congestive heart failure
427.5 cardiac arrest
In [38]:
cardiac_codes = '746', '428', '427.5'
In [39]:
diagnoses.ICD9Code.isin(cardiac_codes).mean()
Out[39]:
0.014451647757968665
Primary
279.3 Unspecified Immunity Deficiency
279.2 Combined Immunity Deficiency
229.12 Wiskott Aldrich Syndrome
279.11 DiGeorge’s Syndrome
288.0 Neutropenia
Secondary
042 HIV
249* Diabetes Mellitus (DMII)
250* Diabetes Mellitus (DM1)
Transplant Organ 
996.8* Complications of transplanted organ
V42 Organ replacement/HSCT
Malignancy
200.22 Burkitt’s tumor
201.5* Hodgkin’s disease
208* Leukemia
202.8 lymphoma
204.00 acute lymphoid lymphoma
205.0 myeloid leukemia
155* neoplasm of liver
158* neoplasm of retroperitoneum
163.9 neoplasm of pleura
170.6 malignant neoplasm of pelvic bones and coccyx
171.4 neoplasm of connective and other soft tissue of thorax
189* neoplasm of kidney, ureter etc.
191.9 malignant neoplasm of brain
194* malignant neoplasm of glands
195* malignant neoplasm of different body parts
In [40]:
immuno_codes = ('279.3', '279.2', '229.12', '279.11', '288.0', '042', '249', '250', '996.8', 'V42', '200.22', 
                '201.5', '208', '202.8', '204.00', '205.0', '155', '158', '163.9', '170.6', '171.4', '189', '191.9', 
                '194', '195')
In [41]:
diagnoses.ICD9Code.isin(immuno_codes).mean()
Out[41]:
0.0068881685575364667
In [42]:
s_aureus_codes = ["038.11","038.12","O41.11","041.12","482.41","482.42","V02.53","V02.54","V12.0"]
In [43]:
diagnoses.ICD9Code.isin(s_aureus_codes).sum()
Out[43]:
72
In [44]:
s_aureus_diagnoses = diagnoses[diagnoses.ICD9Code.isin(s_aureus_codes)]

Organisms

Import organism lookup table

In [45]:
organism_type = pd.read_csv("Data/organisms.csv")

Merge organism information in a single table

In [46]:
organisms = organisms.merge(organism_type)
organisms.head()
Out[46]:
                                  RunID  OrganismNo OrganismName  \
0  69CC56CD-208B-4C05-8E3F-002A6541F768          63  Influenza A   
1  0E553625-2C5D-45D6-A147-008034605059          63  Influenza A   
2  018ED307-A20E-4E20-A06A-00E2CBA5E64F          63  Influenza A   
3  DB54A4A4-89DF-4177-885B-00F522D76259          63  Influenza A   
4  D03B37F7-1394-4EC7-ADF1-012C767BE64D          63  Influenza A   

         CultureSite  CultureTimeIsApproximate OrganismTiming   Type  
0  Respiratory tract                         0       Pre-ECLS  viral  
1            Unknown                         1       Pre-ECLS  viral  
2  Respiratory tract                         1       Pre-ECLS  viral  
3  Respiratory tract                         1       Pre-ECLS  viral  
4  Respiratory tract                         0        On-ECLS  viral  

Number of unique runs in organisms table

In [47]:
organisms.RunID.unique().size
Out[47]:
1468

Frequencies of organism counts per run

In [48]:
organisms.groupby('RunID').OrganismNo.count().value_counts()
Out[48]:
1     729
2     505
3     102
4      65
5      23
6      14
7      12
9       6
8       6
10      5
19      1
dtype: int64

Count of organisms by run

In [49]:
counts = organisms.groupby('RunID').OrganismNo.count()

Frequencies of organism occurrence (for counts >10)

In [50]:
organism_counts = organisms.groupby('OrganismName').RunID.count()
organism_counts.sort(ascending=False)
organism_counts[organism_counts>10].plot(kind='bar', figsize=(18,4), grid=False)
Out[50]:
<matplotlib.axes._subplots.AxesSubplot at 0x110b3f908>

Rate of organism occurence (do not sum to one due to multple coinfections):

type_counts = organisms.groupby('Type').RunID.count() type_counts.sort(ascending=False) (type_counts.astype(float)/organisms.RunID.unique().size).round(2)
In [51]:
ax1 = flu.groupby('year')['AgeYears'].median().plot(kind='bar', stacked=True, figsize=(10,6), grid=False)
#ax1.set_xticks(range(1992, 2012))
#ax2 = ax1.twinx()
ax1.plot(year_days.values/10.)
plt.ylim(0,100)
Out[51]:
(0, 100)

ECMO

Distribution of hours on ECMO, by year

In [52]:
flu.groupby(['year', 'age_class'])['PatientID'].count().unstack().plot(kind='bar', stacked=True, figsize=(10,6), grid=False)
plt.ylabel('count')
# flu[flu.Sex=='F'].groupby(['year', 'age_class'])['PatientID'].count().unstack().plot(
#                                             kind='bar', stacked=True, figsize=(10,6), grid=False)
# flu[flu.Sex=='M'].groupby(['year', 'age_class'])['PatientID'].count().unstack().plot(
#                                             kind='bar', stacked=True, figsize=(10,6), grid=False)
Out[52]:
<matplotlib.text.Text at 0x110ea6c18>

Same plot as above, except on log scale.

In [53]:
flu.groupby(['year', 'age_class'])['PatientID'].count().unstack().apply(np.log).plot(kind='bar', stacked=True, 
                                                                                     figsize=(10,6), grid=False)
plt.ylabel('log(count)')
Out[53]:
<matplotlib.text.Text at 0x111a036a0>

ECMO incidence, by year and sex:

In [54]:
flu.groupby(['year','Sex']).RunNo.count()
Out[54]:
year  Sex
1992  F        1
1993  F        1
1994  F        1
1995  F        1
1996  F        1
      M        1
1997  F        2
      M        1
1998  F        6
      M        5
1999  F        2
      M        7
2000  F        8
      M        7
2001  F        5
      M        3
2002  F        2
      M        5
2003  F       10
      M       15
2004  F        3
      M        3
2005  F       13
      M        6
2006  F       11
      M        6
2007  F       14
      M       11
2008  F       15
      M       16
2009  F      169
      M      184
2010  F       50
      M       70
2011  F       84
      M      122
2012  F       33
      M       46
Name: RunNo, dtype: int64

Abstract Tables

Table 1 summaries

Demographics of patients with influenza on ECMO

In [59]:
from scipy.stats import beta
from scipy.stats import norm

def binomial_hpdr(x, pct=0.95, a=1, b=1, n_pbins=1e3, roundto=2):
    """
    Function computes the posterior mode along with the upper and lower bounds of the
    **Highest Posterior Density Region**.

    Parameters
    ----------
    x: data (boolean)
    pct: the size of the confidence interval (between 0 and 1)
    a: the alpha hyper-parameter for the Beta distribution used as a prior (Default=1)
    b: the beta hyper-parameter for the Beta distribution used as a prior (Default=1)
    n_pbins: the number of bins to segment the p_range into (Default=1e3)

    Returns
    -------
    A tuple that contains the mode as well as the lower and upper bounds of the interval
    (mode, lower, upper)

    """
    n, N = sum(x), len(x)
    # fixed random variable object for posterior Beta distribution
    rv = beta(n+a, N-n+b)
    # determine the mode and standard deviation of the posterior
    stdev = rv.stats('v')**0.5
    mode = (n+a-1.)/(N+a+b-2.)
    # compute the number of sigma that corresponds to this confidence
    # this is used to set the rough range of possible success probabilities
    n_sigma = np.ceil(norm.ppf( (1+pct)/2. ))+1
    # set the min and max values for success probability 
    max_p = mode + n_sigma * stdev
    if max_p > 1:
        max_p = 1.
    min_p = mode - n_sigma * stdev
    if min_p > 1:
        min_p = 1.
    # make the range of success probabilities
    p_range = np.linspace(min_p, max_p, n_pbins+1)
    # construct the probability mass function over the given range
    if mode > 0.5:
        sf = rv.sf(p_range)
        pmf = sf[:-1] - sf[1:]
    else:
        cdf = rv.cdf(p_range)
        pmf = cdf[1:] - cdf[:-1]
    # find the upper and lower bounds of the interval 
    sorted_idxs = np.argsort( pmf )[::-1]
    cumsum = np.cumsum( np.sort(pmf)[::-1] )
    j = np.argmin( np.abs(cumsum - pct) )
    upper = p_range[ (sorted_idxs[:j+1]).max()+1 ]
    lower = p_range[ (sorted_idxs[:j+1]).min() ]    

    return [np.round(v, roundto) for v in (mode, lower, upper)]

Code for calculating lognormal posterior intervals

In [60]:
from scipy.stats import invgamma
from numpy.random import normal

def lognorm_interval(y, pct=0.95, draws=10000, roundto=2, logm=True):
    
    x = np.log(y + 0.001)
    # Calculate sufficiecnt statistics
    mu_hat = x.mean()
    s2_hat = x.var()
    nu  = len(x) - 1
    
    beta = nu * s2_hat / 2.
    
    s2_sim = invgamma.rvs(nu/2., scale=beta, size=draws)
    mu_sim = normal(mu_hat, np.sqrt(s2_sim)/2.)
    
    m = mu_sim + 0.5*s2_sim
    if not logm:
        m = np.exp(m)
    m.sort()
    
    c = int(draws*(1-pct)/2.)

    return([np.round(v, roundto) for v in (m[int(draws/2)], m[c], m[-c])]) 

Merge flu dataset with organism dataset for summarization by infection type

In [61]:
flu_organism= flu.merge(organisms, left_index=True, right_on='RunID', how='left')
In [62]:
flu_organism.groupby('PatientID')['OrganismName'].count().hist(bins=20)
Out[62]:
<matplotlib.axes._subplots.AxesSubplot at 0x1106ae748>
In [63]:
flu_organism.head()
Out[63]:
                                 PatientID  RunNo  AgeDays  HoursECMO  \
41    89C16594-7B2B-42A4-81FF-002B2E92CA75      1     8992        192   
496   F54AD8CA-5FEF-4724-A89D-0061A3C51519      1       79         96   
497   F54AD8CA-5FEF-4724-A89D-0061A3C51519      1       79         96   
128   78545F85-C3BC-47EF-967A-00D941B25CF8      1     3062        102   
1212  78545F85-C3BC-47EF-967A-00D941B25CF8      1     3062        102   

      SupportType  PrimaryDx Mode  Discontinuation  DischargedAlive  \
41              1     488.10   VV                1                1   
496             2     746.11   VA                1                0   
497             2     746.11   VA                1                0   
128             2     422.90   VA                1                1   
1212            2     422.90   VA                1                1   

      DischargeLocation  YearECLS  VentType  Rate  FiO2  PIP  PEEP  MAP  \
41                    1      2009         2     5   100   66   NaN   26   
496                 NaN      2000         2     9    88   44   NaN  NaN   
497                 NaN      2000         2     9    88   44   NaN  NaN   
128                   3      1997         2    20    70   22     2  NaN   
1212                  3      1997         2    20    70   22     2  NaN   

      HandBagging    pH  PCO2    ...      CI  Race  Sex  AdmitToTimeOnHours  \
41              0  7.39  32.0    ...     NaN     B    F                  39   
496             0  7.42  35.0    ...     NaN     W    F                 132   
497             0  7.42  35.0    ...     NaN     W    F                 132   
128             0  7.39  31.6    ...     NaN     W    F                  30   
1212            0  7.39  31.6    ...     NaN     W    F                  30   

      TimeOffToExtubationDateHours  TimeOffToDeathDateHours  \
41                             NaN                      NaN   
496                            NaN                      296   
497                            NaN                      296   
128                            132                      NaN   
1212                           132                      NaN   

      TimeOffToDCDateHours  ExtubationToDCDateHours  \
41                     609                      NaN   
496                    NaN                      NaN   
497                    NaN                      NaN   
128                    204                       72   
1212                   204                       72   

      ExtubationToDeathDateHours  year  HoursVent   AgeYears  age_class  \
41                           NaN  2009        192  24.635616      adult   
496                          NaN  2000         96   0.216438  pediatric   
497                          NaN  2000         96   0.216438  pediatric   
128                          NaN  1997        234   8.389041  pediatric   
1212                         NaN  1997        234   8.389041  pediatric   

                                     RunID  OrganismNo          OrganismName  \
41    4BA2348E-18D8-44AE-8E7A-0EC2748E0AFE          63           Influenza A   
496   4247D183-3A46-4945-ADE6-81FE57E97688          63           Influenza A   
497   4247D183-3A46-4945-ADE6-81FE57E97688          63           Influenza A   
128   70DC2DC7-B983-40B0-81B4-2998E80F7163          63           Influenza A   
1212  70DC2DC7-B983-40B0-81B4-2998E80F7163          19  Gram negative, other   

      CultureSite  CultureTimeIsApproximate  OrganismTiming       Type  
41        Unknown                         1        Pre-ECLS      viral  
496       Unknown                         1        Pre-ECLS      viral  
497       Unknown                         1         On-ECLS      viral  
128       Unknown                         1        Pre-ECLS      viral  
1212      Unknown                         1         On-ECLS  bacterial  

[5 rows x 58 columns]

Number with flu by organism name.

In [64]:
has_flu_organism = flu_organism[flu_organism.OrganismName.str.startswith('Influenza')==True].drop_duplicates('PatientID')
len(has_flu_organism)
Out[64]:
533
In [65]:
flu_organism['has_flu'] = (flu_organism.OrganismName.str.startswith('Influenza')==True).replace(
                            {True: 'ECMO Flu', False: 'ECMO No Flu'})
In [66]:
year_days.columns = ['All Runs']
In [67]:
axes = flu_organism.groupby(['YearECLS', 'has_flu'])['AgeYears'].mean().unstack().plot()
(year_days/365).plot(ax=axes, legend=False, grid=False)
plt.xlabel('Year'); plt.ylabel('Age (years)')
plt.xlim(1992, 2013);
In [68]:
axes = flu_organism.groupby(['YearECLS', 'has_flu'])['PatientID'].count().unstack().plot()
plt.xlabel('Year');
plt.xlim(1996, 2013);

Merge with diagnoses dataset for ICD9 lookup.

In [69]:
flu_ICD9 = flu.merge(diagnoses, left_index=True, right_on='RunID', how='left')

Number with flu by ICD9

In [70]:
has_flu_ICD9 = flu_ICD9[flu_ICD9.ICD9Code.apply(
                        lambda x: x.startswith('487') or x.startswith('488'))].drop_duplicates('PatientID')
len(has_flu_ICD9)
Out[70]:
760

This is the union of the patients with flu via ICD9 and those with flu via organism. I think this is what we need.

In [71]:
len(set(has_flu_ICD9.PatientID).union(set(has_flu_organism.PatientID)))
Out[71]:
922

Extract runs from patients with Flu by either ICD9 or organism

In [72]:
flu_organism_runs = flu_organism[flu_organism.PatientID.isin(list(set(has_flu_ICD9.PatientID).union(set(has_flu_organism.PatientID))))]
In [73]:
flu_ICD9_runs = flu_ICD9[flu_ICD9.PatientID.isin(list(set(has_flu_ICD9.PatientID).union(set(has_flu_organism.PatientID))))]

This is a smaller number than is in the Flu table.

In [74]:
len(flu)
Out[74]:
947

Total number of runs for people with influenza on ECMO

In [75]:
flu_runs_organisms = organisms.RunID[organisms.OrganismName.str.startswith('Influenza')==True]
flu_runs_organisms.unique().shape
Out[75]:
(1087,)
In [76]:
flu_runs_ICD9 = diagnoses.RunID[diagnoses.ICD9Code.apply(
                        lambda x: x.startswith('487') or x.startswith('488'))]
flu_runs_ICD9.unique().shape
Out[76]:
(1401,)
In [77]:
# This is the union of the RunIDs from both sets
len(set(flu_runs_organisms).union(set(flu_runs_ICD9)))
Out[77]:
1747

Proportion of runs with influenza

In [78]:
# From organism list
float(len(flu_runs_organisms.unique())) / runs_year.CNT.sum()
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-78-a9a9f73ac3dd> in <module>()
      1 # From organism list
----> 2 float(len(flu_runs_organisms.unique())) / runs_year.CNT.sum()

/usr/local/lib/python3.4/site-packages/pandas/core/generic.py in __getattr__(self, name)
   2081                 return self[name]
   2082             raise AttributeError("'%s' object has no attribute '%s'" %
-> 2083                                  (type(self).__name__, name))
   2084 
   2085     def __setattr__(self, name, value):

AttributeError: 'DataFrame' object has no attribute 'CNT'
In [79]:
# From ICD9 list
float(len(flu_runs_ICD9.unique())) / runs_year.CNT.sum()
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-79-daffd8b25576> in <module>()
      1 # From ICD9 list
----> 2 float(len(flu_runs_ICD9.unique())) / runs_year.CNT.sum()

/usr/local/lib/python3.4/site-packages/pandas/core/generic.py in __getattr__(self, name)
   2081                 return self[name]
   2082             raise AttributeError("'%s' object has no attribute '%s'" %
-> 2083                                  (type(self).__name__, name))
   2084 
   2085     def __setattr__(self, name, value):

AttributeError: 'DataFrame' object has no attribute 'CNT'

Proportion female before and during 2009

In [80]:
flu['female'] = flu.Sex=='F'
x_2009 = flu.drop_duplicates('PatientID')[flu.YearECLS==2009]['female'].sum()
n_2009 = flu.drop_duplicates('PatientID')[flu.YearECLS==2009]['female'].count()
print('Proportion female in 2009: {0:.3f}'.format(float(x_2009)/n_2009))
Proportion female in 2009: 0.481
/usr/local/lib/python3.4/site-packages/pandas/core/frame.py:1819: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
  "DataFrame index.", UserWarning)
In [81]:
flu['pre_2009'] = flu['YearECLS'] < 2009
x_pre_2009 = flu.drop_duplicates('PatientID')[flu.pre_2009]['female'].sum()
n_pre_2009 = flu.drop_duplicates('PatientID')[flu.pre_2009]['female'].count()
print('Proportion female before 2009: {0:.3f}'.format(float(x_pre_2009)/n_pre_2009))
Proportion female before 2009: 0.522
/usr/local/lib/python3.4/site-packages/pandas/core/frame.py:1819: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
  "DataFrame index.", UserWarning)
In [82]:
from scipy import stats

def ztest(x, n): 
    n = np.array(n, float)
    pbar = sum(x)/sum(n)
    z = np.diff(x/n) / np.sqrt(pbar * (1. - pbar) * sum(1./n))
    return {'z':z, 'p':1.- stats.norm.cdf(np.abs(z))}
In [83]:
ztest([x_2009, x_pre_2009], [n_2009, n_pre_2009])
Out[83]:
{'p': array([ 0.18546614]), 'z': array([ 0.89472842])}

Proportion of females with flu.

In [84]:
with_flu = flu[flu.PatientID.isin(pd.Series(list(set(has_flu_ICD9.PatientID).union(set(has_flu_organism.PatientID)))))]
In [85]:
(with_flu.Sex=='F').mean()
Out[85]:
0.45617740232312565
In [86]:
binomial_hpdr((with_flu.Sex=='F'))
Out[86]:
[0.46000000000000002, 0.41999999999999998, 0.48999999999999999]

Age statistics

In [87]:
with_flu.AgeYears.describe()
Out[87]:
count    947.000000
mean      24.904292
std       19.629840
min        0.000000
25%        6.189041
50%       23.090411
75%       40.226027
max       81.312329
Name: AgeYears, dtype: float64

Mortality of patients with the diagnosis of influenza

In [88]:
with_flu_survivors = flu.drop_duplicates('PatientID').DischargedAlive==True
with_flu_survivors.mean()
Out[88]:
0.60086767895878523
In [89]:
binomial_hpdr(with_flu_survivors)
Out[89]:
[0.59999999999999998, 0.56999999999999995, 0.63]

Median hours on ECMO for influenza

In [90]:
# Estimate of interval on log scale, due to skew in distribution
lognorm_interval(with_flu.HoursECMO/24., draws=100000)
Out[90]:
[2.7000000000000002, 1.6699999999999999, 3.7200000000000002]
In [91]:
# Age distribution
flu.drop_duplicates('PatientID')['AgeYears'].describe()
Out[91]:
count    922.000000
mean      24.793166
std       19.640615
min        0.000000
25%        6.058219
50%       22.954795
75%       40.239041
max       81.312329
Name: AgeYears, dtype: float64
In [92]:
# Race distribution
race_counts = flu.drop_duplicates('PatientID')['Race'].value_counts()
race_counts/float(race_counts.sum())
Out[92]:
W    0.632044
A    0.121547
H    0.091713
B    0.091713
O    0.062983
dtype: float64
In [93]:
binomial_hpdr(flu.drop_duplicates('PatientID')['Race']=='W')
Out[93]:
[0.62, 0.58999999999999997, 0.65000000000000002]
In [94]:
# Survival
flu.drop_duplicates('PatientID')['DischargedAlive'].mean()
Out[94]:
0.60086767895878523
In [95]:
# Sex
sex_counts = flu.drop_duplicates('PatientID')['Sex'].value_counts()
sex_counts/float(sex_counts.sum())
Out[95]:
M    0.537705
F    0.462295
dtype: float64

Complications

In [96]:
complications_with_flu = with_flu.merge(complications, left_index=True, right_on='RunID', how='left')
In [97]:
complications_with_flu.complication_type.value_counts()
Out[97]:
Cardiovascular    933
Renal             783
Mechanical        741
Hemorrhagic       580
Metabolic         344
Infectious        269
Pulmonary         261
Neurologic        168
dtype: int64
In [98]:
def complication_intervals(dataset):
    
    complications_merged = dataset.merge(complications, left_index=True, right_on='RunID', how='left')

    for kind in ('Neurologic', 'Renal', 'Hemorrhagic', 'Cardiovascular', 'Pulmonary'):
    
        x = complications_merged[complications_merged.complication_type==kind].PatientID.drop_duplicates().count()
        n = dataset.PatientID.drop_duplicates().count()
        
        m, lo, up = binomial_hpdr([1]*x + [0]*(n-x))
        
        print('{0}:\n\t {1} ({2}, {3})\n'.format(kind, m, lo, up))
In [99]:
complication_intervals(with_flu)
Neurologic:
	 0.14 (0.12, 0.16)

Renal:
	 0.55 (0.52, 0.58)

Hemorrhagic:
	 0.45 (0.42, 0.48)

Cardiovascular:
	 0.66 (0.63, 0.69)

Pulmonary:
	 0.25 (0.22, 0.28)

Days on ventilator

In [100]:
(with_flu.HoursVent.dropna()/24).hist(bins=20)
Out[100]:
<matplotlib.axes._subplots.AxesSubplot at 0x1141a9828>
In [101]:
# Interval on log scale
lognorm_interval(with_flu.HoursVent.dropna()/24)
Out[101]:
[3.1200000000000001, 2.0, 4.21]

Flu only

Patients with influenza ONLY on ECMO= patients btwn 1992-2012 with EITHER an ICD9 code of flu AND/OR an organism of flu but NO additional organism codes for other bacteria or viruses.

We have established above that all the entries in the Flu table have influenza. We only have to filter out co-infection.

Number of infections by PatientID

In [102]:
infection_count = flu_organism.groupby('PatientID')['PatientID'].count()
In [103]:
infection_count.count()
Out[103]:
922
In [104]:
infection_count.hist()
Out[104]:
<matplotlib.axes._subplots.AxesSubplot at 0x114ad0550>
In [105]:
organisms[organisms.OrganismName=='Staphylococcus aureus, meth resist'].drop_duplicates('RunID').shape
Out[105]:
(121, 7)
In [106]:
single_infection = flu_organism.merge(pd.DataFrame(infection_count[infection_count==1]), left_on='PatientID', right_index=True)
In [107]:
single_infection_ICD9 = flu_ICD9.merge(pd.DataFrame(infection_count[infection_count==1]), left_on='PatientID', right_index=True)
In [108]:
set(single_infection.PatientID.drop_duplicates()).difference(set(single_infection_ICD9.PatientID.drop_duplicates()))
Out[108]:
set()
In [109]:
len(set(single_infection.PatientID.drop_duplicates()))
Out[109]:
550
In [110]:
len(set(single_infection_ICD9.PatientID.drop_duplicates()))
Out[110]:
550
In [111]:
single_infection.drop_duplicates('PatientID').PatientID.count()
Out[111]:
550
In [112]:
single_infection_ICD9.drop_duplicates('PatientID').PatientID.count()
Out[112]:
550
In [113]:
influenza_only = single_infection[single_infection.OrganismName.str.startswith('Influenza')==True]
In [114]:
influenza_only['AgeYears'].describe()
Out[114]:
count    301.000000
mean      22.404952
std       18.951795
min        0.005479
25%        4.682192
50%       18.561644
75%       35.421918
max       68.550685
Name: AgeYears, dtype: float64
In [115]:
race_counts_flu = influenza_only['Race'].value_counts()
race_counts_flu/float(race_counts_flu.sum())
Out[115]:
W    0.626712
B    0.116438
H    0.109589
A    0.082192
O    0.065068
dtype: float64
In [116]:
binomial_hpdr(influenza_only.Race.dropna()=='W')
Out[116]:
[0.63, 0.56999999999999995, 0.68000000000000005]

Posterior interval for days on ECMO

In [117]:
lognorm_interval(influenza_only.HoursECMO/24., draws=100000)
Out[117]:
[2.3999999999999999, 1.4099999999999999, 3.4100000000000001]
In [118]:
flu_only_survivors = influenza_only['DischargedAlive']
flu_only_survivors.mean()
Out[118]:
0.59800664451827246
In [119]:
binomial_hpdr(flu_only_survivors)
Out[119]:
[0.59999999999999998, 0.54000000000000004, 0.65000000000000002]
In [120]:
sex_counts_flu = influenza_only['Sex'].value_counts()
sex_counts_flu/float(sex_counts_flu.sum())
Out[120]:
M    0.531773
F    0.468227
dtype: float64
In [121]:
binomial_hpdr((influenza_only.Sex=='F'))
Out[121]:
[0.46999999999999997, 0.40999999999999998, 0.52000000000000002]
In [122]:
# Index out patients with flu only
complication_intervals(with_flu[with_flu.PatientID.isin(influenza_only.PatientID)])
Neurologic:
	 0.15 (0.11, 0.19)

Renal:
	 0.5 (0.44, 0.55)

Hemorrhagic:
	 0.43 (0.37, 0.48)

Cardiovascular:
	 0.58 (0.53, 0.64)

Pulmonary:
	 0.21 (0.17, 0.26)

Days on ventilator

In [123]:
# Interval on log scale
lognorm_interval(influenza_only.HoursVent.dropna()/24)
Out[123]:
[2.8100000000000001, 1.73, 3.9199999999999999]

Subset with co-infection

Patients with influenza and co-infection on ECMO=patients btwn 1992-2012 with EITHER an ICD9 code of flu AND/OR an organism of flu AND an organism code of something else.

First, identify individuals with multiple infections based on organism code.

In [124]:
multiple_infection = flu_organism.merge(pd.DataFrame(infection_count[infection_count>1]), 
                                        left_on='PatientID', right_index=True)

Merge with the subset that has flu by ICD9

In [125]:
multiple_infection_ICD9 = flu_ICD9.merge(pd.DataFrame(infection_count[infection_count>1]), 
                                         left_on='PatientID', right_index=True)

No patients in one set that arent in the other

In [126]:
set(multiple_infection.PatientID.drop_duplicates()).difference(set(multiple_infection_ICD9.PatientID.drop_duplicates()))
Out[126]:
set()
In [127]:
flu_coinfection = multiple_infection.drop_duplicates('PatientID')

Organism numbers for bacterial infections

In [128]:
bacterial_numbers = [1,2,9,11,12,13,14,15,16,19,30,31,32,35,36,37,38,
                     39,40,48,52,54,55,58,59,60,61,67,68,69,71,77,80,84,85,86,91,95,104]
In [129]:
bacterial_coinf = flu_organism[flu_organism.OrganismNo.isin(bacterial_numbers)].merge(
                pd.DataFrame(infection_count[infection_count>1]), 
                left_on='PatientID', right_index=True)

Number with bacterial coinfection via organism code

In [130]:
with_bacterial_coinf = bacterial_coinf.drop_duplicates(cols=['PatientID'])
with_bacterial_coinf.shape
/usr/local/lib/python3.4/site-packages/pandas/util/decorators.py:81: FutureWarning: the 'cols' keyword is deprecated, use 'subset' instead
  warnings.warn(msg, FutureWarning)
Out[130]:
(222, 61)
In [131]:
bacterial_ICD9 = ['033.9 ','033 ','036.2 ','038.0 ','038.1','038.11','038.12','038.19',
                  '038.2','038.3','038.4','038.41','038.42','03.43','038.44','038.49',
                  '040.82','040.89','081','100.9','481']
In [132]:
with_bacterial_ICD9 = flu_ICD9[flu_ICD9.ICD9Code.apply(
        lambda x: x.startswith('041') or x.startswith('320') or x.startswith('482') or
        bool(bacterial_ICD9.count(x)))]

Number with bacterial coinfection via ICD9

In [133]:
with_bacterial_ICD9.PatientID.drop_duplicates().shape
Out[133]:
(180,)

Comparison with flu-only:

In [134]:
influenza_only['AgeYears'].hist(alpha=0.3, grid=False)
with_bacterial_coinf['AgeYears'].hist(alpha=0.3, grid=False)
Out[134]:
<matplotlib.axes._subplots.AxesSubplot at 0x114efce10>
In [135]:
print('u={0:.0f}, p={1:.3f}'.format(*stats.mannwhitneyu(influenza_only['AgeYears'].dropna().values, 
                                                        with_bacterial_coinf['AgeYears'].dropna().values)))
u=29942, p=0.021
In [136]:
with_bacterial_coinf['AgeYears'].describe()
Out[136]:
count    222.000000
mean      25.738517
std       19.239357
min        0.000000
25%        8.606164
50%       24.983562
75%       40.582877
max       76.326027
Name: AgeYears, dtype: float64

Survival

In [137]:
flu_bacterial_survivors = with_bacterial_coinf['DischargedAlive']
flu_bacterial_survivors.mean()
Out[137]:
0.62612612612612617
In [138]:
binomial_hpdr(flu_bacterial_survivors)
Out[138]:
[0.63, 0.56000000000000005, 0.68999999999999995]

Days on ECMO

In [139]:
lognorm_interval(with_bacterial_coinf.HoursECMO/24., draws=100000)
Out[139]:
[2.96, 2.0600000000000001, 3.8700000000000001]
In [140]:
race_counts_bacterial = with_bacterial_coinf['Race'].value_counts()
race_counts_bacterial/float(race_counts_bacterial.sum())
Out[140]:
W    0.615385
A    0.135747
H    0.108597
B    0.072398
O    0.067873
dtype: float64

Posterior interval for proportion white

In [141]:
binomial_hpdr(with_bacterial_coinf.Race=='W')
Out[141]:
[0.60999999999999999, 0.55000000000000004, 0.67000000000000004]

Proportion female

In [142]:
binomial_hpdr((with_bacterial_coinf.Sex=='F'))
Out[142]:
[0.42999999999999999, 0.35999999999999999, 0.48999999999999999]

Complications

In [143]:
complication_intervals(with_flu[with_flu.PatientID.isin(with_bacterial_coinf.PatientID)])
Neurologic:
	 0.14 (0.1, 0.19)

Renal:
	 0.63 (0.57, 0.69)

Hemorrhagic:
	 0.55 (0.48, 0.61)

Cardiovascular:
	 0.65 (0.59, 0.71)

Pulmonary:
	 0.33 (0.27, 0.4)

In [144]:
# Interval on log scale
lognorm_interval(with_bacterial_coinf.HoursVent.dropna()/24)
Out[144]:
[3.4100000000000001, 2.4500000000000002, 4.3899999999999997]

Table 2 summaries

race, age, gender pre-ECMO lab values (blood gas (pH, pCO2, HCO3, Oxygen Index) type of ECMO, ionotrope/pressor support, nitric, CVVH, oscillator type of co-infection

Race distribution

In [145]:
flu_organism['fate'] = flu_organism.DischargedAlive.replace({1: 'Survived', 0: 'Died'})
by_fate = flu_organism.drop_duplicates('PatientID').groupby("fate")
In [146]:
by_fate['Race'].apply(lambda x: (x.value_counts()/float(x.value_counts().sum())).round(3))
Out[146]:
fate       
Died      W    0.642
          A    0.100
          B    0.094
          H    0.089
          O    0.075
Survived  W    0.626
          A    0.136
          H    0.094
          B    0.090
          O    0.055
dtype: float64
In [147]:
by_fate['Race'].value_counts().plot(kind='bar')
Out[147]:
<matplotlib.axes._subplots.AxesSubplot at 0x114fc8e48>

Age distribution

In [148]:
by_fate['AgeYears'].describe()
Out[148]:
fate           
Died      count    368.000000
          mean      23.698295
          std       20.250845
          min        0.002740
          25%        4.233562
          50%       17.271233
          75%       40.534247
          max       76.454795
Survived  count    554.000000
          mean      25.520444
          std       19.208551
          min        0.000000
          25%        6.821918
          50%       26.312329
          75%       39.902055
          max       81.312329
dtype: float64

Age distribution is not normally distributed.

In [149]:
age_dist = by_fate['AgeYears']
age_dist.hist(alpha=0.3, grid=False)
Out[149]:
fate
Died        Axes(0.125,0.125;0.775x0.775)
Survived    Axes(0.125,0.125;0.775x0.775)
Name: AgeYears, dtype: object
In [150]:
_ = by_fate.boxplot(column='AgeYears')
/usr/local/lib/python3.4/site-packages/pandas/tools/plotting.py:2633: FutureWarning: 
The default value for 'return_type' will change to 'axes' in a future release.
 To use the future behavior now, set return_type='axes'.
 To keep the previous behavior and silence this warning, set return_type='dict'.
  warnings.warn(msg, FutureWarning)

Mann-Whitney U-test (non-parametric)

In [151]:
age_data = list(age_dist)
print('u={0:.0f}, p={1:.3f}'.format(*stats.mannwhitneyu(age_data[0][1].dropna().values, age_data[1][1].dropna().values)))
u=95086, p=0.042

Sex proportions

In [152]:
by_fate['Sex'].apply(lambda x: x.value_counts()/float(x.value_counts().sum()))
Out[152]:
fate       
Died      M    0.552198
          F    0.447802
Survived  M    0.528131
          F    0.471869
dtype: float64
In [153]:
sex_dist = by_fate['Sex'].apply(lambda x: x.value_counts())
ztest(sex_dist.unstack()['M'].values, sex_dist.unstack().sum(1))
Out[153]:
{'p': array([ 0.23740493]), 'z': array([-0.71467505])}

Gases

In [154]:
gases = ['pH', 'PCO2', 'HCO3', 'PO2', 'SaO2']

by_fate.describe()[gases]
Out[154]:
                        pH        PCO2        HCO3         PO2        SaO2
fate                                                                      
Died     count  328.000000  331.000000  299.000000  332.000000  304.000000
         mean     7.217500   62.947432   24.470234   61.249699   76.943750
         std      0.162997   26.688554    8.677740   50.421004   18.561107
         min      6.560000    7.000000    2.400000    4.700000    9.000000
         25%      7.127500   45.000000   18.950000   39.000000   70.000000
         50%      7.225000   58.300000   23.200000   52.000000   82.000000
         75%      7.332500   76.500000   29.000000   65.000000   90.000000
         max      7.570000  190.500000   60.000000  512.000000  100.000000
Survived count  495.000000  502.000000  448.000000  505.000000  465.000000
         mean     7.253152   60.037649   25.810491   68.051683   82.342796
         std      0.162546   27.480175    8.036870   66.519264   15.281890
         min      6.290000    6.000000    3.000000    4.000000    4.000000
         25%      7.160000   42.000000   20.100000   44.200000   78.000000
         50%      7.280000   55.200000   24.650000   56.000000   86.000000
         75%      7.370000   72.000000   30.000000   69.700000   92.000000
         max      7.670000  213.000000   60.000000  707.000000  100.000000
In [155]:
for g in gases:
    by_fate.boxplot(column=g)
/usr/local/lib/python3.4/site-packages/pandas/tools/plotting.py:2633: FutureWarning: 
The default value for 'return_type' will change to 'axes' in a future release.
 To use the future behavior now, set return_type='axes'.
 To keep the previous behavior and silence this warning, set return_type='dict'.
  warnings.warn(msg, FutureWarning)
In [156]:
for i in gases:
    data = list(flu[i].groupby(flu['DischargedAlive']))
    print('{0}: u={1:.0f}, p={2:.3f}'.format(i, *stats.mannwhitneyu(data[0][1].dropna().values, data[1][1].dropna().values)))
pH: u=74566, p=0.001
PCO2: u=81282, p=0.038
HCO3: u=62278, p=0.003
PO2: u=77355, p=0.001
SaO2: u=60212, p=0.000

Prevalence of each type of confection:

Bacterial

In [157]:
bacterial = flu_organism[flu_organism.Type=='bacterial'].drop_duplicates('PatientID').groupby('DischargedAlive')['PatientID'].count()
bacterial
Out[157]:
DischargedAlive
0    116
1    164
Name: PatientID, dtype: int64
In [158]:
bacterial_prop = bacterial/multiple_infection.groupby('DischargedAlive')['PatientID'].count().astype(float)
bacterial_prop
Out[158]:
DischargedAlive
0    0.361371
1    0.286213
Name: PatientID, dtype: float64
In [159]:
ztest(bacterial, bacterial / bacterial_prop)
Out[159]:
{'p': array([ 0.01005223]), 'z': array([-2.32439245])}

Fungal

In [160]:
fungal = flu_organism[flu_organism.Type=='fungal'].drop_duplicates('PatientID').groupby('DischargedAlive')['PatientID'].count()
fungal
Out[160]:
DischargedAlive
0    42
1    39
Name: PatientID, dtype: int64
In [161]:
fungal_prop = fungal/multiple_infection.groupby('DischargedAlive')['PatientID'].count().astype(float)
fungal_prop
Out[161]:
DischargedAlive
0    0.130841
1    0.068063
Name: PatientID, dtype: float64
In [162]:
ztest(fungal, fungal / fungal_prop)
Out[162]:
{'p': array([ 0.00085331]), 'z': array([-3.13704187])}

Viral

In [163]:
viral = flu_organism[(flu_organism.Type=='viral') & (flu_organism.OrganismName.str.startswith('Influenza')==False)].drop_duplicates('PatientID').groupby('DischargedAlive')['PatientID'].count()
viral
Out[163]:
DischargedAlive
0    18
1    24
Name: PatientID, dtype: int64
In [164]:
viral_prop = viral/multiple_infection.groupby('DischargedAlive')['PatientID'].count().astype(float)
viral_prop
Out[164]:
DischargedAlive
0    0.056075
1    0.041885
Name: PatientID, dtype: float64
In [165]:
ztest(viral, viral / viral_prop)
Out[165]:
{'p': array([ 0.1680471]), 'z': array([-0.96191123])}

Type of ECMO

In [166]:
ecmo_type = (flu.Mode=='VA').replace({True:'VA', False:'VV'})
ecmo_type.value_counts() / float(ecmo_type.notnull().sum())
Out[166]:
VV    0.725449
VA    0.274551
dtype: float64
In [167]:
flu['ecmo_type'] = (flu.Mode=='VA').replace({True:'VA', False:'VV'})
flu.ecmo_type.groupby(flu['DischargedAlive']).apply(lambda x: x.value_counts()/float(x.notnull().sum()))
Out[167]:
DischargedAlive    
0                VV    0.656992
                 VA    0.343008
1                VV    0.771127
                 VA    0.228873
dtype: float64

Merge Flu with Pre-ECLS Support

In [168]:
flu_support = flu.merge(support, left_index=True, right_on='RunID', how='left')
In [169]:
support.Description.value_counts()
Out[169]:
Narcotics                                 924
Neuromuscular blockers                    835
Vasopressor/inotropic drugs               791
Norepinephrine                            471
Nitric oxide                              437
High frequency ventilation/oscillation    333
Epinephrine                               259
Bicarbonate                               222
Dopamine                                  184
Vasodilator drugs                         167
Steroids                                  158
Milrinone                                  73
Dobutamine                                 65
CVVH                                       61
Epoprostenol                               34
THAM                                       34
Surfactant                                 26
Hyperventilation                           17
Intra-aortic balloon                       16
Nitroprusside                              11
Cardiopulmonary bypass                     11
Inhaled anesthetic                         10
Abdominal compression                       8
Sildenafil                                  6
AVCO2R                                      5
Cardiac pacemaker                           4
LVAD                                        4
Hypothermia                                 3
Liquid ventilation                          3
Plasmapheresis                              3
Berlin Heart                                1
Inamrinone                                  1
dtype: int64
In [170]:
n = float(flu_support.PatientID.drop_duplicates().count())
n
Out[170]:
922.0
In [171]:
n_alive = flu_support.drop_duplicates('PatientID').groupby('DischargedAlive')['PatientID'].count().astype(float)
n_alive
Out[171]:
DischargedAlive
0    368
1    554
Name: PatientID, dtype: float64

Nitric oxide

In [172]:
flu_support.PatientID[flu_support.Description=='Nitric oxide'].drop_duplicates().count()/n
Out[172]:
0.31670281995661603
In [173]:
nitric_oxide = flu_support[flu_support.Description=='Nitric oxide'].drop_duplicates('PatientID').groupby(
                                                    'DischargedAlive')['PatientID'].count()
nitric_oxide/n_alive
Out[173]:
DischargedAlive
0    0.366848
1    0.283394
Name: PatientID, dtype: float64

CVVH

In [174]:
flu_support.PatientID[flu_support.Description=='CVVH'].drop_duplicates().count()/n
Out[174]:
0.032537960954446853
In [175]:
cvvh = flu_support[flu_support.Description=='CVVH'].drop_duplicates('PatientID').groupby(
                                                    'DischargedAlive')['PatientID'].count()
cvvh/n_alive
Out[175]:
DischargedAlive
0    0.043478
1    0.025271
Name: PatientID, dtype: float64

High frequency ventilation/oscillation

In [176]:
flu_support.PatientID[flu_support.Description=='High frequency ventilation/oscillation'].drop_duplicates().count()/n
Out[176]:
0.2646420824295011
In [177]:
hfvo = flu_support[flu_support.Description=='High frequency ventilation/oscillation'].drop_duplicates('PatientID').groupby(
                                                    'DischargedAlive')['PatientID'].count()
hfvo/n_alive
Out[177]:
DischargedAlive
0    0.320652
1    0.227437
Name: PatientID, dtype: float64

Ionotrope/pressor support

In [178]:
pressor_set = ['Dobutamine', 'Vasopressor/inotropic drugs', 'Norepinephrine', 'Epinephrine', 'Dopamine', 'Milrinone']

ionotrope_pressor = flu_support[flu_support.Description.isin(pressor_set)].drop_duplicates('PatientID').groupby(
                                                    'DischargedAlive')['PatientID'].count()
ionotrope_pressor/n_alive
Out[178]:
DischargedAlive
0    0.801630
1    0.743682
Name: PatientID, dtype: float64

Table 1 summaries

All ECMO

In [179]:
# Age
fig, axes = plt.subplots(ncols=2, figsize=(14,4))
flu.AgeDays.hist(ax=axes[0])
axes[1].boxplot(flu.AgeDays.values);
In [180]:
flu.AgeDays.describe()
Out[180]:
count      947.000000
mean      9090.066526
std       7164.891484
min          0.000000
25%       2259.000000
50%       8428.000000
75%      14682.500000
max      29679.000000
Name: AgeDays, dtype: float64
In [181]:
# Race
flu.Race.value_counts()
Out[181]:
W    587
A    115
H     87
B     84
O     57
dtype: int64
In [182]:
flu.Race.value_counts()/flu.Race.value_counts().sum()
Out[182]:
W    0.631183
A    0.123656
H    0.093548
B    0.090323
O    0.061290
dtype: float64
In [183]:
# Gender
flu.Sex.value_counts()
Out[183]:
M    508
F    432
dtype: int64
In [184]:
flu.Sex.value_counts()/flu.Sex.value_counts().sum()
Out[184]:
M    0.540426
F    0.459574
dtype: float64
In [185]:
# Death
flu.DischargedAlive.value_counts().rename({1: 'Survived', 0: 'Died'})
Out[185]:
Survived    568
Died        379
dtype: int64
In [186]:
(flu.DischargedAlive.value_counts().rename({1: 'Survived', 0: 'Died'})
    /flu.DischargedAlive.value_counts().sum())
Out[186]:
Survived    0.599789
Died        0.400211
dtype: float64

TODO: Break down by Mode

In [187]:
# ECMO Type
flu.Mode.unique()
Out[187]:
array(['VV', 'VA', 'VV-VA', 'Other', 'VVDL', 'VA-VV', 'VVDL+V', 'VA+V',
       'VVA', nan], dtype=object)
In [188]:
# ECMO Type
flu['VA'] = flu.Mode.isin(['VA', 'VV-VA'])
# Set "Other" type to NA (there are only a couple)
flu.VA[flu.Mode=='Other'] = np.nan
/usr/local/lib/python3.4/site-packages/IPython/kernel/__main__.py:4: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

This is the proportion that is VA

In [189]:
flu.VA.mean()
Out[189]:
0.3236870310825295
In [190]:
# Support type
flu.SupportType.value_counts()
Out[190]:
1    817
2     97
3     33
dtype: int64

Proportion CPR

In [191]:
(flu.SupportType==3).mean()
Out[191]:
0.034846884899683211
In [192]:
# Complications
ecmo_complications = flu_organism.merge(complications, on='RunID')
ecmo_complications.complication_type.value_counts()
Out[192]:
Cardiovascular    1525
Renal             1315
Mechanical        1150
Hemorrhagic       1018
Metabolic          603
Infectious         538
Pulmonary          463
Neurologic         241
dtype: int64

Influenza only

In [193]:
# Age
fig, axes = plt.subplots(ncols=2, figsize=(14,4))
influenza_only.AgeDays.hist(ax=axes[0])
axes[1].boxplot(influenza_only.AgeDays.values);
In [194]:
influenza_only.AgeDays.describe()
Out[194]:
count      301.000000
mean      8177.807309
std       6917.405295
min          2.000000
25%       1709.000000
50%       6775.000000
75%      12929.000000
max      25021.000000
Name: AgeDays, dtype: float64
In [195]:
# Race
influenza_only.Race.value_counts()
Out[195]:
W    183
B     34
H     32
A     24
O     19
dtype: int64
In [196]:
influenza_only.Race.value_counts()/influenza_only.Race.value_counts().sum()
Out[196]:
W    0.626712
B    0.116438
H    0.109589
A    0.082192
O    0.065068
dtype: float64
In [197]:
# Gender
influenza_only.Sex.value_counts()
Out[197]:
M    159
F    140
dtype: int64
In [198]:
influenza_only.Sex.value_counts()/influenza_only.Sex.value_counts().sum()
Out[198]:
M    0.531773
F    0.468227
dtype: float64
In [199]:
# Death
influenza_only.DischargedAlive.value_counts().rename({1: 'Survived', 0: 'Died'})
Out[199]:
Survived    180
Died        121
dtype: int64
In [200]:
(influenza_only.DischargedAlive.value_counts().rename({1: 'Survived', 0: 'Died'})
    /influenza_only.DischargedAlive.value_counts().sum())
Out[200]:
Survived    0.598007
Died        0.401993
dtype: float64
In [201]:
# ECMO Type
influenza_only.Mode.unique()
Out[201]:
array(['VV', 'VV-VA', 'VA', 'VVDL+V', 'VVDL', 'VA-VV', 'VA+V', 'Other'], dtype=object)
In [202]:
influenza_only['VA'] = influenza_only.Mode.isin(['VA', 'VV-VA'])
# Set "Other" type to NA (there are only a couple)
influenza_only.VA[influenza_only.Mode=='Other'] = np.nan
/usr/local/lib/python3.4/site-packages/IPython/kernel/__main__.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
/usr/local/lib/python3.4/site-packages/IPython/kernel/__main__.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()
/usr/local/lib/python3.4/site-packages/pandas/core/generic.py:3572: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._update_inplace(new_data)
/usr/local/lib/python3.4/site-packages/IPython/core/interactiveshell.py:3035: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  exec(code_obj, self.user_global_ns, self.user_ns)

This is the proportion that is VA

In [203]:
influenza_only.VA.mean()
Out[203]:
0.32214765100671139
In [204]:
# Support type
influenza_only.SupportType.value_counts()
Out[204]:
1    255
2     36
3     10
dtype: int64
In [205]:
(influenza_only.SupportType==3).mean()
Out[205]:
0.033222591362126248
In [206]:
# Complications
influenza_complications = influenza_only.merge(complications, on='RunID')
influenza_complications.complication_type.value_counts()
Out[206]:
Cardiovascular    255
Renal             212
Mechanical        184
Hemorrhagic       165
Metabolic         104
Pulmonary          68
Neurologic         55
Infectious         31
dtype: int64

Table 3 summaries

Severity of illness in patients with influenza alone on ECMO versus patients with bacterial co-infection

In [207]:
def normal_post(y, pct=0.95, mu_0=0., var_0=1e6, draws=1000, roundto=2):
    
    s2 = y.var()
    ybar = y.mean()
    n = len(y)
    
    mu = (n*ybar/s2 + mu_0/var_0)/(n*1./s2 + 1./var_0)
    sigma = np.sqrt(1./(n/s2 + 1./var_0))
    
    y_post = np.sort(np.random.normal(mu, sigma, size=draws))
    c = int(draws*(1-pct)/2.)

    return([np.round(v, roundto) for v in (mu, y_post[c], y_post[-c])])

Days on ECMO

In [208]:
flu_hours_ecmo = (flu.drop_duplicates(cols=['PatientID']).HoursECMO/24.)
flu_hours_ecmo.describe()
/usr/local/lib/python3.4/site-packages/pandas/util/decorators.py:81: FutureWarning: the 'cols' keyword is deprecated, use 'subset' instead
  warnings.warn(msg, FutureWarning)
Out[208]:
count    918.000000
mean      12.699165
std       11.819622
min        0.000000
25%        5.510417
50%        9.604167
75%       16.354167
max      125.750000
Name: HoursECMO, dtype: float64
In [209]:
np.log(flu_hours_ecmo+0.01).hist(bins=25)
Out[209]:
<matplotlib.axes._subplots.AxesSubplot at 0x11530e978>
In [210]:
flu_days_ECMO = with_flu.HoursECMO/24.
flu_days_ECMO.describe()
Out[210]:
count    943.000000
mean      12.760251
std       11.806250
min        0.000000
25%        5.541667
50%        9.625000
75%       16.520833
max      125.750000
Name: HoursECMO, dtype: float64
In [211]:
flu_days_ECMO.hist(bins=25)
Out[211]:
<matplotlib.axes._subplots.AxesSubplot at 0x11580ba20>
In [212]:
(influenza_only.HoursECMO/24.).describe()
Out[212]:
count    301.000000
mean       9.511766
std        7.765752
min        0.041667
25%        4.583333
50%        7.375000
75%       12.666667
max       53.833333
Name: HoursECMO, dtype: float64
In [213]:
(influenza_only.HoursECMO/24.).hist(bins=20)
Out[213]:
<matplotlib.axes._subplots.AxesSubplot at 0x111b44860>
In [214]:
lognorm_interval(influenza_only.HoursECMO/24., draws=100000)
Out[214]:
[2.3999999999999999, 1.3999999999999999, 3.4100000000000001]
In [215]:
multiple_infection['VA'] = multiple_infection.Mode.isin(['VA', 'VV-VA'])
# Set "Other" type to NA (there are only a couple)
multiple_infection.VA[multiple_infection.Mode=='Other'] = np.nan
/usr/local/lib/python3.4/site-packages/IPython/kernel/__main__.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()
In [216]:
has_flu = multiple_infection[multiple_infection.OrganismName.str.startswith('Influenza')==True].drop_duplicates('PatientID')
has_mrsa = multiple_infection[multiple_infection.OrganismName=='Staphylococcus aureus, meth resist'].drop_duplicates('PatientID')
has_mssa = multiple_infection[multiple_infection.OrganismName=='Staphylococcus aureus'].drop_duplicates('PatientID')
In [217]:
flu_with_mrsa = has_flu[has_flu.PatientID.isin(has_mrsa.PatientID)]
In [218]:
(flu_with_mrsa.HoursECMO/24.).describe()
Out[218]:
count    28.000000
mean     13.831845
std       9.215900
min       1.250000
25%       7.322917
50%      12.895833
75%      17.822917
max      34.666667
Name: HoursECMO, dtype: float64
In [219]:
flu_with_mssa = has_flu[has_flu.PatientID.isin(has_mssa.PatientID)]
In [220]:
(flu_with_mssa.HoursECMO/24.).describe()
Out[220]:
count    33.000000
mean     15.878788
std      15.318045
min       0.333333
25%       6.500000
50%      12.166667
75%      20.583333
max      64.375000
Name: HoursECMO, dtype: float64

Proportion VA (vs. VV)

In [221]:
influenza_only.VA.mean()
Out[221]:
0.32214765100671139
In [222]:
flu_coinfection['VA'] = flu_coinfection.Mode.isin(['VA', 'VV-VA'])
# Set "Other" type to NA (there are only a couple)
flu_coinfection.loc[flu_coinfection.Mode=='Other', 'VA'] = np.nan
/usr/local/lib/python3.4/site-packages/IPython/kernel/__main__.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
/usr/local/lib/python3.4/site-packages/pandas/core/indexing.py:407: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s
In [223]:
flu_coinfection.VA.mean()
Out[223]:
0.25824175824175827
In [224]:
flu_with_mrsa.VA.mean()
Out[224]:
0.38461538461538464
In [225]:
flu_with_mssa.VA.mean()
Out[225]:
0.34375

Death

In [226]:
influenza_only.DischargedAlive.value_counts().rename({1: 'Survived', 0: 'Died'})
Out[226]:
Survived    180
Died        121
dtype: int64
In [227]:
(influenza_only.DischargedAlive.value_counts().rename({1: 'Survived', 0: 'Died'})
    /influenza_only.DischargedAlive.value_counts().sum())
Out[227]:
Survived    0.598007
Died        0.401993
dtype: float64
In [228]:
flu_coinfection.DischargedAlive.value_counts().rename({1: 'Survived', 0: 'Died'})
Out[228]:
Survived    239
Died        133
dtype: int64
In [229]:
(flu_coinfection.DischargedAlive.value_counts().rename({1: 'Survived', 0: 'Died'})
    /flu_coinfection.DischargedAlive.value_counts().sum())
Out[229]:
Survived    0.642473
Died        0.357527
dtype: float64
In [230]:
flu_with_mrsa.DischargedAlive.value_counts().rename({1: 'Survived', 0: 'Died'})
Out[230]:
Died        16
Survived    12
dtype: int64
In [231]:
(flu_with_mrsa.DischargedAlive.value_counts().rename({1: 'Survived', 0: 'Died'})
    /flu_with_mrsa.DischargedAlive.value_counts().sum())
Out[231]:
Died        0.571429
Survived    0.428571
dtype: float64
In [232]:
flu_with_mssa.DischargedAlive.value_counts().rename({1: 'Survived', 0: 'Died'})
Out[232]:
Survived    19
Died        14
dtype: int64
In [233]:
(flu_with_mssa.DischargedAlive.value_counts().rename({1: 'Survived', 0: 'Died'})
    /flu_with_mssa.DischargedAlive.value_counts().sum())
Out[233]:
Survived    0.575758
Died        0.424242
dtype: float64

Enumerate complications

In [234]:
complications.complication_type.value_counts()
Out[234]:
Cardiovascular    1471
Renal             1368
Mechanical        1142
Hemorrhagic        922
Metabolic          612
Infectious         478
Pulmonary          399
Neurologic         275
Limb                28
dtype: int64
In [235]:
complication_list = ['Mechanical', 'Hemorrhagic', 'Renal', 'Cardiovascular', 'Pulmonary']
In [236]:
has_complication = pd.DataFrame(complications.groupby('RunID').apply(
        lambda x: x['complication_type'].isin(complication_list).any()))
In [237]:
has_complication.columns = ['complication']
In [238]:
flu_with_comp = pd.merge(flu_organism, has_complication, left_on='RunID', right_index=True, how='left')
In [239]:
flu_with_comp.shape
Out[239]:
(1444, 61)
In [240]:
flu_with_comp.PatientID.unique().shape
Out[240]:
(922,)
In [241]:
flu_with_comp.complication.mean()
Out[241]:
0.9724907063197026
In [242]:
flu_organism.RunID.isin(has_complication[has_complication.complication].index).mean()
Out[242]:
0.90581717451523547
In [243]:
flu_organism['has_complication'] = flu_organism.RunID.isin(has_complication[has_complication.complication].index)
In [250]:
flu_organism = flu_organism.drop('has_complication', axis=1)

Merge with S. Aureus diagnoses from ICD9

In [251]:
flu_organism_icd9 = pd.merge(flu_organism, s_aureus_diagnoses, on='RunID', how='left')
In [252]:
flu_organism_icd9['s_aureus_icd9'] = flu_organism_icd9.ICD9Code.notnull()
In [253]:
flu_organism_icd9 = flu_organism_icd9.drop(['ICD9Code', 'PrimaryDiagnosis'], axis=1 )
In [254]:
flu_organism.PatientID.unique().shape
Out[254]:
(922,)
In [255]:
flu_organism_icd9.head()
Out[255]:
                              PatientID  RunNo  AgeDays  HoursECMO  \
0  89C16594-7B2B-42A4-81FF-002B2E92CA75      1     8992        192   
1  F54AD8CA-5FEF-4724-A89D-0061A3C51519      1       79         96   
2  F54AD8CA-5FEF-4724-A89D-0061A3C51519      1       79         96   
3  78545F85-C3BC-47EF-967A-00D941B25CF8      1     3062        102   
4  78545F85-C3BC-47EF-967A-00D941B25CF8      1     3062        102   

   SupportType  PrimaryDx Mode  Discontinuation  DischargedAlive  \
0            1     488.10   VV                1                1   
1            2     746.11   VA                1                0   
2            2     746.11   VA                1                0   
3            2     422.90   VA                1                1   
4            2     422.90   VA                1                1   

   DischargeLocation  YearECLS  VentType  Rate  FiO2  PIP  PEEP  MAP  \
0                  1      2009         2     5   100   66   NaN   26   
1                NaN      2000         2     9    88   44   NaN  NaN   
2                NaN      2000         2     9    88   44   NaN  NaN   
3                  3      1997         2    20    70   22     2  NaN   
4                  3      1997         2    20    70   22     2  NaN   

   HandBagging    pH  PCO2      ...       AdmitToTimeOnHours  \
0            0  7.39  32.0      ...                       39   
1            0  7.42  35.0      ...                      132   
2            0  7.42  35.0      ...                      132   
3            0  7.39  31.6      ...                       30   
4            0  7.39  31.6      ...                       30   

   TimeOffToExtubationDateHours  TimeOffToDeathDateHours  \
0                           NaN                      NaN   
1                           NaN                      296   
2                           NaN                      296   
3                           132                      NaN   
4                           132                      NaN   

   TimeOffToDCDateHours  ExtubationToDCDateHours  ExtubationToDeathDateHours  \
0                   609                      NaN                         NaN   
1                   NaN                      NaN                         NaN   
2                   NaN                      NaN                         NaN   
3                   204                       72                         NaN   
4                   204                       72                         NaN   

   year  HoursVent   AgeYears  age_class  \
0  2009        192  24.635616      adult   
1  2000         96   0.216438  pediatric   
2  2000         96   0.216438  pediatric   
3  1997        234   8.389041  pediatric   
4  1997        234   8.389041  pediatric   

                                  RunID  OrganismNo          OrganismName  \
0  4BA2348E-18D8-44AE-8E7A-0EC2748E0AFE          63           Influenza A   
1  4247D183-3A46-4945-ADE6-81FE57E97688          63           Influenza A   
2  4247D183-3A46-4945-ADE6-81FE57E97688          63           Influenza A   
3  70DC2DC7-B983-40B0-81B4-2998E80F7163          63           Influenza A   
4  70DC2DC7-B983-40B0-81B4-2998E80F7163          19  Gram negative, other   

   CultureSite  CultureTimeIsApproximate  OrganismTiming       Type  \
0      Unknown                         1        Pre-ECLS      viral   
1      Unknown                         1        Pre-ECLS      viral   
2      Unknown                         1         On-ECLS      viral   
3      Unknown                         1        Pre-ECLS      viral   
4      Unknown                         1         On-ECLS  bacterial   

       has_flu      fate s_aureus_icd9  
0     ECMO Flu  Survived         False  
1     ECMO Flu      Died         False  
2     ECMO Flu      Died         False  
3     ECMO Flu  Survived         False  
4  ECMO No Flu  Survived         False  

[5 rows x 61 columns]

Export data

In [256]:
flu_organism_icd9.to_csv("data/flu_organism.csv")
In [257]:
flu_with_comp.to_csv("data/flu.csv")
In [258]:
flu_with_comp.head()
Out[258]:
                                 PatientID  RunNo  AgeDays  HoursECMO  \
41    89C16594-7B2B-42A4-81FF-002B2E92CA75      1     8992        192   
496   F54AD8CA-5FEF-4724-A89D-0061A3C51519      1       79         96   
497   F54AD8CA-5FEF-4724-A89D-0061A3C51519      1       79         96   
128   78545F85-C3BC-47EF-967A-00D941B25CF8      1     3062        102   
1212  78545F85-C3BC-47EF-967A-00D941B25CF8      1     3062        102   

      SupportType  PrimaryDx Mode  Discontinuation  DischargedAlive  \
41              1     488.10   VV                1                1   
496             2     746.11   VA                1                0   
497             2     746.11   VA                1                0   
128             2     422.90   VA                1                1   
1212            2     422.90   VA                1                1   

      DischargeLocation  YearECLS  VentType  Rate  FiO2  PIP  PEEP  MAP  \
41                    1      2009         2     5   100   66   NaN   26   
496                 NaN      2000         2     9    88   44   NaN  NaN   
497                 NaN      2000         2     9    88   44   NaN  NaN   
128                   3      1997         2    20    70   22     2  NaN   
1212                  3      1997         2    20    70   22     2  NaN   

      HandBagging    pH  PCO2     ...       AdmitToTimeOnHours  \
41              0  7.39  32.0     ...                       39   
496             0  7.42  35.0     ...                      132   
497             0  7.42  35.0     ...                      132   
128             0  7.39  31.6     ...                       30   
1212            0  7.39  31.6     ...                       30   

      TimeOffToExtubationDateHours  TimeOffToDeathDateHours  \
41                             NaN                      NaN   
496                            NaN                      296   
497                            NaN                      296   
128                            132                      NaN   
1212                           132                      NaN   

      TimeOffToDCDateHours  ExtubationToDCDateHours  \
41                     609                      NaN   
496                    NaN                      NaN   
497                    NaN                      NaN   
128                    204                       72   
1212                   204                       72   

      ExtubationToDeathDateHours  year  HoursVent   AgeYears  age_class  \
41                           NaN  2009        192  24.635616      adult   
496                          NaN  2000         96   0.216438  pediatric   
497                          NaN  2000         96   0.216438  pediatric   
128                          NaN  1997        234   8.389041  pediatric   
1212                         NaN  1997        234   8.389041  pediatric   

                                     RunID  OrganismNo          OrganismName  \
41    4BA2348E-18D8-44AE-8E7A-0EC2748E0AFE          63           Influenza A   
496   4247D183-3A46-4945-ADE6-81FE57E97688          63           Influenza A   
497   4247D183-3A46-4945-ADE6-81FE57E97688          63           Influenza A   
128   70DC2DC7-B983-40B0-81B4-2998E80F7163          63           Influenza A   
1212  70DC2DC7-B983-40B0-81B4-2998E80F7163          19  Gram negative, other   

      CultureSite  CultureTimeIsApproximate  OrganismTiming       Type  \
41        Unknown                         1        Pre-ECLS      viral   
496       Unknown                         1        Pre-ECLS      viral   
497       Unknown                         1         On-ECLS      viral   
128       Unknown                         1        Pre-ECLS      viral   
1212      Unknown                         1         On-ECLS  bacterial   

          has_flu      fate complication  
41       ECMO Flu  Survived         True  
496      ECMO Flu      Died         True  
497      ECMO Flu      Died         True  
128      ECMO Flu  Survived         True  
1212  ECMO No Flu  Survived         True  

[5 rows x 61 columns]