#!/usr/bin/env python
# coding: utf-8

# In[1]:


get_ipython().run_line_magic('load_ext', 'autoreload')
get_ipython().run_line_magic('autoreload', '2')
get_ipython().run_line_magic('matplotlib', 'inline')
import random
random.seed(13847942484)
import survivalstan
import numpy as np
import pandas as pd
from stancache import stancache
from matplotlib import pyplot as plt


# In[2]:


print(survivalstan.models.pem_survival_model_timevarying)


# In[3]:


d = stancache.cached(
    survivalstan.sim.sim_data_exp_correlated,
    N=100,
    censor_time=20,
    rate_form='1 + sex',
    rate_coefs=[-3, 0.5],
)
d['age_centered'] = d['age'] - d['age'].mean()
d.head()


# In[4]:


survivalstan.utils.plot_observed_survival(df=d[d['sex']=='female'], event_col='event', time_col='t', label='female')
survivalstan.utils.plot_observed_survival(df=d[d['sex']=='male'], event_col='event', time_col='t', label='male')
plt.legend()


# In[5]:


dlong = stancache.cached(
    survivalstan.prep_data_long_surv,
    df=d, event_col='event', time_col='t'
)
dlong.sort_values(['index', 'end_time'], inplace=True)


# In[6]:


dlong.head()


# In[7]:


testfit = survivalstan.fit_stan_survival_model(
    model_cohort = 'test model',
    model_code = survivalstan.models.pem_survival_model_timevarying,
    df = dlong,
    sample_col = 'index',
    timepoint_end_col = 'end_time',
    event_col = 'end_failure',
    formula = '~ age_centered + sex',
    iter = 10000,
    chains = 4,
    seed = 9001,
    FIT_FUN = stancache.cached_stan_fit,
    )


# ## superficial check of convergence

# In[8]:


survivalstan.utils.print_stan_summary([testfit], pars='lp__')


# In[9]:


survivalstan.utils.plot_stan_summary([testfit], pars='log_baseline')


# ## summarize coefficient estimates

# In[10]:


survivalstan.utils.plot_coefs([testfit], element='baseline')


# In[11]:


survivalstan.utils.plot_coefs([testfit])


# ## posterior-predictive checks

# In[12]:


survivalstan.utils.plot_pp_survival([testfit], fill=False)
survivalstan.utils.plot_observed_survival(df=d, event_col='event', time_col='t', color='green', label='observed')
plt.legend()


# In[13]:


survivalstan.utils.plot_pp_survival([testfit], by='sex')


# In[14]:


survivalstan.utils.plot_pp_survival([testfit], by='sex', pal=['red', 'blue'])


# ## summarize time-varying effect of sex on survival

# Standard behavior is to plot estimated betas at each timepoint, for each coefficient in the model.

# In[15]:


survivalstan.utils.plot_coefs([testfit], element='beta_time', ylim=[-1, 2.5])


# ## accessing lower-level functions for plotting effects over time

# In[16]:


survivalstan.utils.plot_time_betas(models=[testfit], by=['coef'], y='exp(beta)', ylim=[0, 10])


# Alternatively, you can extract the beta-estimates for each timepoint & plot them yourself.

# In[17]:


testfit['time_beta'] = survivalstan.utils.extract_time_betas([testfit])
testfit['time_beta'].head()


# You can also extract and/or plot data for single coefficients of interest at a time.

# In[18]:


first_beta = survivalstan.utils.extract_time_betas([testfit], coefs=['sex'])
first_beta.head()


# In[19]:


import seaborn as sns
sns.boxplot(data=first_beta, x='_timepoint_id', y='beta')


# In[20]:


survivalstan.utils.plot_time_betas(models=[testfit], y='beta', x='end_time', coefs=['sex'])


# Note that this same plot can be produced by passing data to `plot_time_betas` directly.

# In[21]:


survivalstan.utils.plot_time_betas(df=first_beta, by=['coef'], y='beta', x='end_time')


# In[ ]: