#!/usr/bin/env python # coding: utf-8 # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') import pandas as pd import numpy as np from numpy.ma import masked_values from pymc3 import Model, sample, Metropolis, NUTS, traceplot, forestplot, find_MAP from pymc3 import Bernoulli, Normal, Uniform, invlogit, log, exp, Deterministic, DensityDist from pymc3 import Dirichlet, Categorical, Poisson, Lognormal, HalfCauchy, Gamma # In[2]: dataset = pd.read_csv("rtw.csv", index_col=0).reset_index(drop=True) dataset.head() # In[3]: failure = (dataset.day1.values==0).astype(int) t = dataset.day0b.replace(500, np.nan).combine_first(dataset.day1).values + 1 # In[4]: beta_names = ['age1', 'age2', 'age3', 'bmi1', 'bmi2', 'bmi3', 'osteoporosis', 'cad', 'gender', 'hispanic', 'compensation', 'pastsurgery', 'diabetes', 'anxiety', 'depression', 'motordef', 'duration', 'arthrodesis', 'interbody', 'approach'] # In[5]: n_prac = dataset.pracind.unique().shape[0] # In[6]: assert not np.isnan(t).sum() assert not np.isnan(failure).sum() # In[7]: with Model() as weibull_surv: mu_int = Normal('mu_int', 0, sd=100) sigma_int = HalfCauchy('sigma_int', 5) intercept = Normal('intercept', mu_int, sigma_int, shape=n_prac) beta = Normal('beta', 0, sd=1000, shape=len(beta_names)) X = dataset[beta_names].values.T theta = exp(intercept[dataset.pracind.values] + beta.dot(X)) alpha = HalfCauchy("alpha", 2.5) # Weibull survival likelihood, accounting for censoring def logp(failure, value): hazard = alpha * theta**alpha * value**(alpha-1) return (failure * log(hazard) - (theta * value)**alpha).sum() day = DensityDist('day', logp, observed={'failure':failure, 'value':t}) # In[13]: from pymc3.variational import advi, sample_vp with weibull_surv: fit = advi(n=100000) trace = sample_vp(fit) # In[15]: forestplot(trace, varnames=['beta'], ylabels=beta_names) # In[16]: traceplot(trace, varnames=['mu_int', 'sigma_int'])