In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pymc3 as pm
:0: FutureWarning: IPython widgets are experimental and may change in the future.
In [2]:
flu = pd.read_csv('data/flu.csv', index_col=0)
In [3]:
flu.shape
Out[3]:
(1406, 60)

Turn into table of unique patients, with appropriate organism columns

In [4]:
flu_only = flu.groupby('PatientID')['OrganismName'].apply(lambda s: 
                        len([x for x in s if str(x).startswith('Influenza')])==len(s)).astype(int)
In [5]:
coinfections = flu.groupby('PatientID')['OrganismName'].apply(lambda s: 
                        [x for x in s.dropna() if not str(x).startswith('Influenza')])

MRSA coinfection

In [6]:
mrsa = coinfections.apply(lambda x: x.count('Staphylococcus aureus, meth resist') > 0).astype(int)

Any coinfection

In [7]:
any_coinf = coinfections.apply(lambda x: int(len(x)>0))
In [8]:
flu_unique = flu.drop_duplicates(cols=['PatientID']).set_index('PatientID')
/usr/local/lib/python3.4/site-packages/pandas/util/decorators.py:81: FutureWarning: the 'cols' keyword is deprecated, use 'subset' instead
  warnings.warn(msg, FutureWarning)
In [9]:
flu_unique['flu_only'] = flu_only
flu_unique['mrsa'] = mrsa
flu_unique['any_coinf'] = any_coinf

Add time from admission to time on ECMO

In [10]:
admit_through_emco = flu_unique.AdmitToTimeOnHours.add(flu_unique.HoursECMO, fill_value=0)

See how many have no time information from admission, and drop these

In [11]:
missing = admit_through_emco.isnull()
missing.sum()
Out[11]:
1
In [12]:
flu_complete = flu_unique[missing-True]
admit_through_emco.dropna(inplace=True)
/usr/local/lib/python3.4/site-packages/pandas/computation/expressions.py:190: UserWarning: evaluating in Python space because the '-' operator is not supported by numexpr for the bool dtype, use '^' instead
  unsupported[op_str]))
In [13]:
# Confirm no null here
admit_through_emco.isnull().sum()
Out[13]:
0

Time off ECMO through to event (death or discharge)

In [14]:
off_ecmo_to_event = flu_complete.TimeOffToDCDateHours.add(flu_complete.TimeOffToDeathDateHours, 
                                                          fill_value=0)
In [15]:
off_ecmo_to_event.isnull().sum()
Out[15]:
51
In [16]:
# Not sure why add() does not work here
flu_complete['time_to_event'] = (admit_through_emco.values + off_ecmo_to_event.fillna(0).values)
/usr/local/lib/python3.4/site-packages/IPython/kernel/__main__.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from IPython.kernel.zmq import kernelapp as app
In [17]:
flu_complete.time_to_event.isnull().sum()
Out[17]:
0

Clean covariates

In [18]:
flu_complete.Race.value_counts()
Out[18]:
W    554
A    102
B     83
H     82
O     56
dtype: int64
In [19]:
flu_complete['non_white'] = (flu_complete.Race!='W').astype(int)
/usr/local/lib/python3.4/site-packages/IPython/kernel/__main__.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
In [20]:
flu_complete['male'] = (flu_complete.Sex=='M').astype(int)
/usr/local/lib/python3.4/site-packages/IPython/kernel/__main__.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
In [21]:
flu_complete['OI'] = flu_complete.FiO2 * flu_complete.MAP / flu_complete.PO2
/usr/local/lib/python3.4/site-packages/IPython/kernel/__main__.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
In [22]:
# ECMO Type
flu_complete['VA'] = flu_complete.Mode.isin(['VA', 'VV-VA'])
# Set "Other" type to NA (there are only a couple)
flu_complete.loc[flu_complete.Mode=='Other', 'VA'] = None
/usr/local/lib/python3.4/site-packages/IPython/kernel/__main__.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from IPython.kernel.zmq import kernelapp as app
/usr/local/lib/python3.4/site-packages/pandas/core/indexing.py:415: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s
In [23]:
# covariates = ['male', 'AgeYears', 'non_white', 'HoursECMO', 'pH', 'PCO2', 'PO2', 'HCO3', 
#               'SaO2', 'mrsa', 'any_coinf']
covariates = ['male', 'AgeYears', 'non_white', 'pH', 'PCO2', 'HCO3', 'PO2', 'OI', 
              'SaO2', 'mrsa', 'any_coinf', 'AdmitToTimeOnHours', 'VA', 'PEEP']

Get counts of missing values

In [24]:
flu_complete[covariates].isnull().mean().round(2)*100
Out[24]:
male                   0
AgeYears               0
non_white              0
HoursECMO              0
pH                    11
PCO2                  10
HCO3                  19
PO2                    9
OI                    43
SaO2                  16
mrsa                   0
any_coinf              0
AdmitToTimeOnHours    13
VA                     2
PEEP                  38
dtype: float64
In [28]:
flu_complete.columns
Out[28]:
Index(['RunNo', 'AgeDays', 'HoursECMO', 'SupportType', 'PrimaryDx', 'Mode', 'Discontinuation', 'DischargedAlive', 'DischargeLocation', 'YearECLS', 'VentType', 'Rate', 'FiO2', 'PIP', 'PEEP', 'MAP', 'HandBagging', 'pH', 'PCO2', 'PO2', 'HCO3', 'SaO2', 'Venttype24', 'Rate24', 'Fio224', 'PIP24', 'PEEP24', 'MAP24', 'Handbagging24', 'SBP', 'DBP', 'MapHemo', 'SVO2', 'PCWP', 'SPAP', 'DPAP', 'MPAP', 'CI', 'Race', 'Sex', 'AdmitToTimeOnHours', 'TimeOffToExtubationDateHours', 'TimeOffToDeathDateHours', 'TimeOffToDCDateHours', 'ExtubationToDCDateHours', 'ExtubationToDeathDateHours', 'year', 'HoursVent', 'AgeYears', 'age_class', 'RunID', 'OrganismNo', 'OrganismName', 'CultureSite', 'CultureTimeIsApproximate', 'OrganismTiming', 'Type', 'has_flu', 'fate', 'flu_only', 'mrsa', 'any_coinf', 'time_to_event', 'non_white', 'male', 'OI', 'VA'], dtype='object')
In [29]:
flu_complete.groupby('fate')[['HoursECMO', 'AdmitToTimeOnHours']].mean()
Out[29]:
HoursECMO AdmitToTimeOnHours
fate
Died 324.935754 101.009709
Survived 293.647280 60.439914

We are going to drop the missing values for some of the covariates, and not use some covariates with many missing values.

In [25]:
drop_subset = ['HoursECMO', 'pH', 'PCO2', 'PO2', 'VA']
In [26]:
flu_complete = flu_complete.dropna(subset=drop_subset)
In [27]:
flu_complete[['pH', 'PCO2', 'PO2', 'HCO3', 
              'SaO2']].hist(bins=25);

Survival Analysis

Surivival time, in days

In [28]:
obs_t = flu_complete.time_to_event.values/24.
In [29]:
plt.hist(obs_t, bins=25);

Fate of each patient

In [30]:
died = (flu_complete.fate=='Died').astype(int).values

Kaplan-Meier Curves

In [31]:
from lifelines import KaplanMeierFitter
In [32]:
km = KaplanMeierFitter()
km.fit(obs_t, event_observed=died)
ax = km.plot()
In [33]:
mrsa_ind = flu_complete.mrsa.astype(bool).values
In [34]:
km = KaplanMeierFitter()
km.fit(obs_t[~mrsa_ind], event_observed=died[~mrsa_ind], label='non-MRSA')
ax = km.plot()

km.fit(obs_t[mrsa_ind], event_observed=died[mrsa_ind], label='MRSA')
km.plot(ax=ax)
Out[34]:
<matplotlib.axes._subplots.AxesSubplot at 0x10b8d1ba8>

Cumulative Hazard

In [35]:
from lifelines import NelsonAalenFitter
na = NelsonAalenFitter()

na.fit(obs_t, event_observed=died)
na.plot()
Out[35]:
<matplotlib.axes._subplots.AxesSubplot at 0x10ba0d748>

Unique failure times

In [36]:
times = np.unique((flu_complete.time_to_event[flu_complete.fate=='Died'].values/24).astype(int))
times
Out[36]:
array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
        13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
        26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
        39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  50,  53,  54,
        55,  59,  61,  62,  63,  67,  68,  73,  77, 108, 121, 125, 126,
       133, 142, 150, 201, 227, 235, 301])
In [37]:
N_obs = len(obs_t)
T = len(times) - 1

Calculate risk set

In [38]:
Y = np.array([[int(obs >= t) for t in times] for obs in obs_t])
Y
Out[38]:
array([[1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0],
       ..., 
       [1, 0, 0, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0]])

Counting process. Jump = 1 if $\text{obs}_t \in [ t_j, t_{j+1} )$

In [39]:
dN = np.array([[Y[i,j]*(times[j+1] >= obs_t[i])*died[i] for i in range(N_obs)] for j in range(T)])
In [40]:
# Process for one patient
dN[:,1]
Out[40]:
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0])

Covariates

In [41]:
standardize = lambda x: (x - x[np.isnan(x)-True].mean()) / x[np.isnan(x)-True].std()
In [42]:
analysis_covariates = ['male', 'AgeYears', 'non_white', 'HoursECMO', 'pH', 'PCO2', 'PO2', 'mrsa', 'any_coinf', 'VA']
In [43]:
flu_complete[analysis_covariates].isnull().sum()
Out[43]:
male         0
AgeYears     0
non_white    0
HoursECMO    0
pH           0
PCO2         0
PO2          0
mrsa         0
any_coinf    0
VA           0
dtype: int64
In [44]:
male, AgeYears, non_white, HoursECMO, pH, PCO2, PO2, mrsa, any_coinf, VA= \
    flu_complete[analysis_covariates].values.T


HoursECMO_std = standardize(HoursECMO)
AgeYears_std = standardize(AgeYears)
pH_std = standardize(pH)
PCO2_std = standardize(PCO2)
PO2_std = standardize(PO2)
# HCO3_std = standardize(HCO3)

Model

In [45]:
import theano.tensor as tt
from pymc3 import Model, Normal, Poisson, DensityDist, T as StudentT, Uniform, Deterministic, HalfCauchy, Laplace, Exponential
from pymc3 import Bernoulli, Gamma, MvNormal, BinaryMetropolis
from pymc3.distributions.timeseries import GaussianRandomWalk
from pymc3 import sample, NUTS, Slice, Metropolis, forestplot, find_MAP
In [46]:
n_kernels = 5
kernels = np.linspace(AgeYears_std.min(), AgeYears_std.max(), n_kernels)

def rbf(s, x):
    return [tt.exp( ((x - k)/ s)**2) for k in kernels]
In [54]:
with Model() as model:
    
    sigma = Uniform('sigma', 0, 100)
    
    alpha_sd = Uniform('alpha_sd', 0, 100)

    alpha = GaussianRandomWalk('alpha', sd=alpha_sd, shape=n_kernels)

    a = tt.dot(alpha, rbf(sigma, AgeYears_std))
    
    X = np.c_[[male, pH_std, PCO2_std, PO2_std, HoursECMO_std, non_white, mrsa, any_coinf]].T
    
    # Intercept for survival rate
    beta0 = Normal('beta0', 0.0, 0.001)
    # Treatment effect
    beta = Normal('beta', 0, 0.001, shape=X.shape[1])

    # Survival rates
    lam = tt.exp(beta0 + tt.dot(X, beta) + a)

    def exp_logp(failure, value):
        # Exponential survival log-likelihood
        return tt.sum(failure * tt.log(lam) - lam * value)

    surv_like = DensityDist('surv_like', exp_logp, observed=(died, obs_t))
In [55]:
with model:
    trace = sample(2000, step=NUTS(), njobs=2)
 [-----------------100%-----------------] 2000 of 2000 complete in 821.2 sec
In [56]:
forestplot(trace, vars=['alpha'])
Out[56]:
<matplotlib.gridspec.GridSpec at 0x10dcbffd0>
In [57]:
forestplot(trace, vars=['beta'], ylabels=['male', 'pH', 'PCO2', 'PO2', 'HoursECMO', 'non_white', 'mrsa', 'any_coinf'])
Out[57]:
<matplotlib.gridspec.GridSpec at 0x10fae6b70>
In [31]:
flu_complete.columns
Out[31]:
Index(['RunNo', 'AgeDays', 'HoursECMO', 'SupportType', 'PrimaryDx', 'Mode', 'Discontinuation', 'DischargedAlive', 'DischargeLocation', 'YearECLS', 'VentType', 'Rate', 'FiO2', 'PIP', 'PEEP', 'MAP', 'HandBagging', 'pH', 'PCO2', 'PO2', 'HCO3', 'SaO2', 'Venttype24', 'Rate24', 'Fio224', 'PIP24', 'PEEP24', 'MAP24', 'Handbagging24', 'SBP', 'DBP', 'MapHemo', 'SVO2', 'PCWP', 'SPAP', 'DPAP', 'MPAP', 'CI', 'Race', 'Sex', 'AdmitToTimeOnHours', 'TimeOffToExtubationDateHours', 'TimeOffToDeathDateHours', 'TimeOffToDCDateHours', 'ExtubationToDCDateHours', 'ExtubationToDeathDateHours', 'year', 'HoursVent', 'AgeYears', 'age_class', 'RunID', 'OrganismNo', 'OrganismName', 'CultureSite', 'CultureTimeIsApproximate', 'OrganismTiming', 'Type', 'has_flu', 'fate', 'flu_only', 'mrsa', 'any_coinf', 'time_to_event', 'non_white', 'male', 'OI', 'VA'], dtype='object')
In [33]:
flu_complete.year.value_counts()
Out[33]:
2009    338
2011    186
2010    115
2012     73
2008     31
2007     25
2003     24
2005     19
2006     17
2000     14
1998     10
1999      9
2002      8
2001      8
2004      6
1997      3
1996      2
1995      1
1994      1
1993      1
1992      1
dtype: int64
In [82]:
forestplot(trace, vars=['beta'], ylabels=['male', 'pH', 'PCO2', 'PO2', 'HoursECMO', 'non_white', 'mrsa', 'any_coinf'])
Out[82]:
<matplotlib.gridspec.GridSpec at 0x125108850>

Same model above, but with a Weibull hazard function

In [51]:
with Model() as model_weibull:
    
    X = np.c_[[male, pH_std, PCO2_std, PO2_std, HoursECMO_std, non_white, mrsa, any_coinf]].T
    
    # Intercept for survival rate
    beta0 = Normal('beta0', 0.0, 0.001)
    # Treatment effect
    beta = Normal('beta', 0, 0.001, shape=X.shape[1])

    # Survival rates
    lam = tt.exp(beta0 + tt.dot(X, beta))

    alpha = Exponential('alpha', 1)
    def weibull_logp(failure, times):
        # Weibull survival log-likelihood
        return tt.sum(failure*(tt.log(alpha) + (alpha-1)*tt.log(times*lam) + tt.log(lam)) - (lam*times)**alpha)

    surv_like = DensityDist('surv_like', weibull_logp, observed=(died, obs_t))
In [52]:
with model_weibull:
    trace_weibull = sample(2000, step=NUTS())
---------------------------------------------------------------------------
PositiveDefiniteError                     Traceback (most recent call last)
<ipython-input-52-4e1716b5764d> in <module>()
      1 with model_weibull:
----> 2     trace_weibull = sample(2000, step=NUTS())

/usr/local/lib/python3.4/site-packages/pymc3/step_methods/nuts.py in __init__(self, vars, scaling, step_scale, is_cov, state, Emax, target_accept, gamma, k, t0, model, **kwargs)
     67 
     68 
---> 69         self.potential = quad_potential(scaling, is_cov, as_cov=False)
     70 
     71         if state is None:

/usr/local/lib/python3.4/site-packages/pymc3/step_methods/quadpotential.py in quad_potential(C, is_cov, as_cov)
     33         return QuadPotential_SparseInv(C)
     34 
---> 35     partial_check_positive_definite(C)
     36     if C.ndim == 1:
     37         if is_cov != as_cov:

/usr/local/lib/python3.4/site-packages/pymc3/step_methods/quadpotential.py in partial_check_positive_definite(C)
     56     if len(i):
     57         raise PositiveDefiniteError(
---> 58             "Simple check failed. Diagonal contains negatives", i)
     59 
     60 

PositiveDefiniteError: Scaling is not positive definite. Simple check failed. Diagonal contains negatives. Check indexes [0 1 2 3 4 5 6 7 8]

The following incorporates a spline for the age effect.

In [41]:
def interpolate(x0, y0, x):
    
    x = np.array(x)

    idx = np.searchsorted(x0, x)
    dl = np.array(x - x0[idx - 1])
    dr = np.array(x0[idx] - x)
    d=dl+dr
    wl = dr/d

    return wl*y0[idx-1] + (1-wl)*y0[idx]
In [37]:
nknots = 10

with Model() as model_spline:
    
    #X = np.c_[[male, non_white, mrsa, any_coinf]].T
    
    coeff_sd = Uniform('coeff_sd', 0, 100)
    coeff_tau = coeff_sd**-2
    spline = Normal('spline', 0, coeff_tau, shape=nknots)
    knots = np.linspace(AgeYears.min(), AgeYears.max()+1, nknots)
    beta1 = Deterministic('beta1', interpolate(knots, spline, AgeYears))
    
    # Intercept for survival rate
    beta0 = Normal('beta0', 0.0, 0.001)
    # Treatment effect
    #beta = Normal('beta', 0, 0.001, shape=X.shape[1])

    # Survival rates
    lam = tt.exp(beta0 + beta1)# + tt.dot(X, beta))
    
    def exp_logp(failure, value):
        # Exponential survival log-likelihood
        return tt.sum(failure * tt.log(lam) - lam * value)

    surv_like = DensityDist('surv_like', exp_logp, observed=(died, obs_t))
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-37-db5cbe3c3dee> in <module>()
      9     spline = Normal('spline', 0, coeff_tau, shape=nknots)
     10     knots = np.linspace(AgeYears.min(), AgeYears.max()+1, nknots)
---> 11     beta1 = Deterministic('beta1', interpolate(knots, spline, AgeYears))
     12 
     13     # Intercept for survival rate

NameError: name 'interpolate' is not defined
In [ ]:
with model_spline:
    trace_spline = sample(1000, step=NUTS())
In [51]:
forestplot(trace_spline, vars=['beta'], 
           ylabels=['male', 'non_white', 'mrsa', 'any_coinf'])
Out[51]:
<matplotlib.gridspec.GridSpec at 0x11bb06a90>
In [59]:
forestplot(trace_spline, vars=['spline']) 
Out[59]:
<matplotlib.gridspec.GridSpec at 0x11da06210>

Log-logistic model

In [ ]:
with Model() as log_logistic_model:
    
    sigma = Uniform('sigma', 0, 100)
    
    alpha_sd = Uniform('alpha_sd', 0, 100)

    alpha = GaussianRandomWalk('alpha', sd=alpha_sd, shape=n_kernels)

    a = tt.dot(alpha, rbf(sigma, AgeYears_std))
    
    X = np.c_[[male, pH_std, PCO2_std, PO2_std, HoursECMO_std, non_white, mrsa, any_coinf]].T
    
    # Intercept for survival rate
    beta0 = Normal('beta0', 0.0, 0.001)
    # Treatment effect
    beta = Normal('beta', 0, 0.001, shape=X.shape[1])

    # Survival rates
    lam = tt.exp(beta0 + tt.dot(X, beta) + a)

    def exp_logp(failure, value):
        # Exponential survival log-likelihood
        return tt.sum(failure * tt.log(lam) - lam * value)

    surv_like = DensityDist('surv_like', exp_logp, observed=(died, obs_t))

Radial basis function model

In [40]:
plt.hist(AgeYears)
Out[40]:
(array([ 248.,  106.,   75.,  103.,   79.,   64.,   66.,   40.,    7.,    4.]),
 array([  0.        ,   8.13123288,  16.26246575,  24.39369863,
         32.52493151,  40.65616438,  48.78739726,  56.91863014,
         65.04986301,  73.18109589,  81.31232877]),
 <a list of 10 Patch objects>)
In [75]:
np.eye(4)*[2,3,4,5]
Out[75]:
array([[ 2.,  0.,  0.,  0.],
       [ 0.,  3.,  0.,  0.],
       [ 0.,  0.,  4.,  0.],
       [ 0.,  0.,  0.,  5.]])
In [ ]:
gaussian_basis = lambda x, h, l: np.exp(-(np.abs(x - h) / l)**2)

n_rbf = 20
centers = np.linspace(AgeYears.min(), AgeYears.max()+1, n_rbf)
l = 3
# Weights
w = [[gaussian_basis(x, h, l) for h in centers] for x in AgeYears]
w_pred = [[gaussian_basis(x, h, l) for h in centers] for x in np.linspace(AgeYears.min(), AgeYears.max()+1, 100)]

with Model() as model_rbf:
    
    # Prior on number of basis functions
    z = Bernoulli('z', 0.5, shape=n_rbf)
    
    # Prior for non-zero coefficients
    sigma = HalfCauchy('sigma', 25)
    #kappa = Gamma('kappa', 0.5, 0.5, shape=n_rbf)
    #Tau = tt.diag(kappa) #* sigma**-2
    
    #beta = MvNormal('beta', [0]*n_rbf, Tau, shape=n_rbf)
    beta = GaussianRandomWalk('beta', sd=sigma, shape=n_rbf)


    # Intercept for survival rate
    beta0 = Normal('beta0', 0.0, 0.001)

    # Hazard rates
    lam = Deterministic('lam', tt.exp(beta0 + tt.dot(w, z*beta)))
    lam_pred = Deterministic('lam_pred', tt.exp(beta0 + tt.dot(w_pred, z*beta)))
    
    def exp_logp(failure, value):
        # Exponential survival log-likelihood
        return tt.sum(failure * tt.log(lam) - lam * value)

    surv_like = DensityDist('surv_like', exp_logp, observed=(died, obs_t))
In [ ]:
with model_rbf:
    start = {'z': [1]*n_rbf, 'sigma': 10, 'beta': [0]*n_rbf, 'beta0': 0}
    step1 = BinaryMetropolis(vars=[z])
    step2 = NUTS(vars=[sigma, beta, beta0], scaling=start)
    trace_rbf = sample(2000, step=[step1, step2], start=start)
/Library/Python/2.7/site-packages/theano/scan_module/scan_perform_ext.py:85: RuntimeWarning: numpy.ndarray size changed, may indicate binary incompatibility
  from scan_perform.scan_perform import *
In [ ]:
from pymc import traceplot

forestplot(trace_rbf[5000:], vars=['lam_pred'])
In [ ]:
_ = traceplot(trace_rbf[5000:], vars=['sigma'])
In [ ]:
from pymc import summary
summary(trace_rbf, vars=['z'])