#!/usr/bin/env python
# coding: utf-8

# In[1]:


get_ipython().run_line_magic('matplotlib', 'inline')
import pandas as pd
import numpy as np
from numpy.ma import masked_values

from pymc3 import Model, sample, Metropolis, NUTS, traceplot, forestplot, find_MAP
from pymc3 import Bernoulli, Normal, Uniform, invlogit, log, exp, Deterministic, DensityDist
from pymc3 import Dirichlet, Categorical, Poisson, Lognormal, HalfCauchy, Gamma


# In[2]:


dataset = pd.read_csv("rtw.csv", index_col=0).reset_index(drop=True)
dataset.head()


# In[3]:


failure = (dataset.day1.values==0).astype(int)
t = dataset.day0b.replace(500, np.nan).combine_first(dataset.day1).values + 1


# In[4]:


beta_names = ['age1', 'age2', 'age3', 'bmi1', 'bmi2', 'bmi3', 'osteoporosis', 'cad',  'gender', 
       'hispanic', 'compensation', 'pastsurgery', 'diabetes', 'anxiety', 'depression', 
        'motordef', 'duration', 'arthrodesis', 'interbody', 'approach']


# In[5]:


n_prac = dataset.pracind.unique().shape[0]


# In[6]:


assert not np.isnan(t).sum()
assert not np.isnan(failure).sum()


# In[7]:


with Model() as weibull_surv:
    
    mu_int = Normal('mu_int', 0, sd=100)
    sigma_int = HalfCauchy('sigma_int', 5)
    intercept = Normal('intercept', mu_int, sigma_int, shape=n_prac)
    
    beta = Normal('beta', 0, sd=1000, shape=len(beta_names))
    X = dataset[beta_names].values.T
    
    theta = exp(intercept[dataset.pracind.values] + beta.dot(X))
    
    alpha = HalfCauchy("alpha", 2.5)
    
    
    # Weibull survival likelihood, accounting for censoring
    def logp(failure, value):
        hazard = alpha * theta**alpha * value**(alpha-1)
        return (failure * log(hazard) - (theta * value)**alpha).sum()

    day = DensityDist('day', logp, observed={'failure':failure, 'value':t})


# In[13]:


from pymc3.variational import advi, sample_vp

with weibull_surv:
    
    fit = advi(n=100000)
    trace = sample_vp(fit)


# In[15]:


forestplot(trace, varnames=['beta'], ylabels=beta_names)


# In[16]:


traceplot(trace, varnames=['mu_int', 'sigma_int'])