In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
import pandas as pd
import numpy as np
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
In [2]:
import plotly
import plotly.plotly as py
import plotly.graph_objs as go
plotly.offline.init_notebook_mode(connected=True)

load pp_survival data from a random survival model

In [3]:
ppsurv = pd.read_csv('plotly_example_data.csv')
In [4]:
ppsurv.head()
Out[4]:
Unnamed: 0 iter model_cohort sex level_3 event_time survival
0 0 0 test model female 0 0.000000 1.000000
1 1 0 test model female 1 2.615961 1.000000
2 2 0 test model female 2 3.584694 0.977162
3 3 0 test model female 3 4.379338 0.952078
4 4 0 test model female 4 6.253546 0.945339

plot posterior predicted survival time by sex

In [5]:
ppsummary = ppsurv.groupby(['sex','event_time'])['survival'].agg({
        '95_lower': lambda x: np.percentile(x, 2.5),
        '95_upper': lambda x: np.percentile(x, 97.5),
        '50_lower': lambda x: np.percentile(x, 25),
        '50_upper': lambda x: np.percentile(x, 75),
        'median': lambda x: np.percentile(x, 50),
    }).reset_index()
In [6]:
ppsummary[ppsummary['sex']=='female'].tail()
Out[6]:
sex event_time median 50_lower 95_lower 95_upper 50_upper
72 female 18.543842 0.338076 0.283789 0.182441 0.519674 0.405841
73 female 18.656898 0.331524 0.275271 0.176058 0.507488 0.398727
74 female 18.932325 0.320479 0.269188 0.176320 0.492980 0.384687
75 female 19.811832 0.306252 0.251404 0.157844 0.471677 0.367278
76 female 20.000000 0.284725 0.229034 0.134397 0.467671 0.344815
In [7]:
shade_colors = dict(male='rgba(0, 128, 128, {})', female='rgba(214, 12, 140, {})')
line_colors = dict(male='rgb(0, 128, 128)', female='rgb(214, 12, 140)')
ppsummary.sort_values(['sex', 'event_time'], inplace=True)
In [8]:
data5 = list()
for grp, grp_df in ppsummary.groupby('sex'):
    x = list(grp_df['event_time'].values)
    x_rev = x[::-1]
    y_upper = list(grp_df['50_upper'].values)
    y_lower = list(grp_df['50_lower'].values)
    y_lower = y_lower[::-1]
    y2_upper = list(grp_df['95_upper'].values)
    y2_lower = list(grp_df['95_lower'].values)
    y2_lower = y2_lower[::-1]
    y = list(grp_df['median'].values)
    my_shading50 = go.Scatter(
        x = x + x_rev,
        y = y_upper + y_lower,
        fill = 'tozerox',
        fillcolor = shade_colors[grp].format(0.3),
        line = go.Line(color = 'transparent'),
        showlegend = True,
        name = '{} - 50% CI'.format(grp),
    )
    my_shading95 = go.Scatter(
        x = x + x_rev,
        y = y2_upper + y2_lower,
        fill = 'tozerox',
        fillcolor = shade_colors[grp].format(0.1),
        line = go.Line(color = 'transparent'),
        showlegend = True,
        name = '{} - 95% CI'.format(grp),
    )
    my_line = go.Scatter(
        x = x,
        y = y,
        line = go.Line(color=line_colors[grp]),
        mode = 'lines',
        name = grp,
    )
    data5.append(my_line)    
    data5.append(my_shading50)
    data5.append(my_shading95)
In [9]:
layout5 = go.Layout(
    yaxis=dict(
        title='Survival (%)',
        #zeroline=False,
        tickformat='.0%',
    ),
    xaxis=dict(title='Days since enrollment')
)
In [10]:
py.iplot(go.Figure(data=data5, layout=layout5), filename='survivalstan/posterior-predicted-values')
Out[10]:
In [ ]: