#!/usr/bin/env python # coding: utf-8 # In[1]: get_ipython().run_line_magic('load_ext', 'autoreload') get_ipython().run_line_magic('autoreload', '2') get_ipython().run_line_magic('matplotlib', 'inline') import random random.seed(13847942484) import survivalstan import numpy as np import pandas as pd from stancache import stancache from matplotlib import pyplot as plt # In[2]: print(survivalstan.models.pem_survival_model_timevarying) # In[3]: d = stancache.cached( survivalstan.sim.sim_data_exp_correlated, N=100, censor_time=20, rate_form='1 + sex', rate_coefs=[-3, 0.5], ) d['age_centered'] = d['age'] - d['age'].mean() d.head() # In[4]: survivalstan.utils.plot_observed_survival(df=d[d['sex']=='female'], event_col='event', time_col='t', label='female') survivalstan.utils.plot_observed_survival(df=d[d['sex']=='male'], event_col='event', time_col='t', label='male') plt.legend() # In[5]: dlong = stancache.cached( survivalstan.prep_data_long_surv, df=d, event_col='event', time_col='t' ) dlong.sort_values(['index', 'end_time'], inplace=True) # In[6]: dlong.head() # In[7]: testfit = survivalstan.fit_stan_survival_model( model_cohort = 'test model', model_code = survivalstan.models.pem_survival_model_timevarying, df = dlong, sample_col = 'index', timepoint_end_col = 'end_time', event_col = 'end_failure', formula = '~ age_centered + sex', iter = 10000, chains = 4, seed = 9001, FIT_FUN = stancache.cached_stan_fit, ) # ## superficial check of convergence # In[8]: survivalstan.utils.print_stan_summary([testfit], pars='lp__') # In[9]: survivalstan.utils.plot_stan_summary([testfit], pars='log_baseline') # ## summarize coefficient estimates # In[10]: survivalstan.utils.plot_coefs([testfit], element='baseline') # In[11]: survivalstan.utils.plot_coefs([testfit]) # ## posterior-predictive checks # In[12]: survivalstan.utils.plot_pp_survival([testfit], fill=False) survivalstan.utils.plot_observed_survival(df=d, event_col='event', time_col='t', color='green', label='observed') plt.legend() # In[13]: survivalstan.utils.plot_pp_survival([testfit], by='sex') # In[14]: survivalstan.utils.plot_pp_survival([testfit], by='sex', pal=['red', 'blue']) # ## summarize time-varying effect of sex on survival # Standard behavior is to plot estimated betas at each timepoint, for each coefficient in the model. # In[15]: survivalstan.utils.plot_coefs([testfit], element='beta_time', ylim=[-1, 2.5]) # ## accessing lower-level functions for plotting effects over time # In[16]: survivalstan.utils.plot_time_betas(models=[testfit], by=['coef'], y='exp(beta)', ylim=[0, 10]) # Alternatively, you can extract the beta-estimates for each timepoint & plot them yourself. # In[17]: testfit['time_beta'] = survivalstan.utils.extract_time_betas([testfit]) testfit['time_beta'].head() # You can also extract and/or plot data for single coefficients of interest at a time. # In[18]: first_beta = survivalstan.utils.extract_time_betas([testfit], coefs=['sex']) first_beta.head() # In[19]: import seaborn as sns sns.boxplot(data=first_beta, x='_timepoint_id', y='beta') # In[20]: survivalstan.utils.plot_time_betas(models=[testfit], y='beta', x='end_time', coefs=['sex']) # Note that this same plot can be produced by passing data to `plot_time_betas` directly. # In[21]: survivalstan.utils.plot_time_betas(df=first_beta, by=['coef'], y='beta', x='end_time') # In[ ]: