import NotebookImport
from Imports import *
importing IPython notebook from Imports.ipynb Populating the interactive namespace from numpy and matplotlib changing to source dirctory populating namespace with data
p53_mut = mut.features.ix['TP53'].ix[keepers_o].dropna() > 0
del_3p = cn.features.ix['Deletion'].ix['3p14.2'].ix[0].ix[keepers_o].dropna()
p53_mut.name = 'TP53'
del_3p.name = 'del_3p'
combo = combine(p53_mut==1, del_3p==-1)
combo = combo.map({'del_3p':'b', 'neither':'a', 'TP53':'c', 'both':'d'})
two_hit = combo=='d'
Stage
pd.crosstab(two_hit.map({True: 'TP53-3p Pos.', False:'TP53-3p Neg.'}),
clinical.stage.clinicalstage)
clinicalstage | stage i | stage ii | stage iii | stage iva | stage ivb | stage ivc |
---|---|---|---|---|---|---|
TP53 | ||||||
TP53-3p Neg. | 5 | 18 | 19 | 27 | 1 | 1 |
TP53-3p Pos. | 5 | 31 | 45 | 92 | 4 | 2 |
2 rows × 6 columns
th = two_hit.map({True: 'TP53-3p Pos.', False:'TP53-3p Neg.'})
st = clinical.stage.clinicalstage.replace(['stage iva','stage ivb','stage ivc'],
'stage iv')
st = st.dropna().map(lambda s: s.split()[1].upper())
st.name = 'Stage'
th.name=''
ct = pd.crosstab(th, st).T
feature = two_hit
assignment = st
fig, axs = subplots(1,5, figsize=(10,3))
ct.plot(kind='bar', rot=0, ax=axs[0])
axs[0].legend(loc='upper left', frameon=False)
axs[0].set_ylabel('# of Patients')
for i, (l, s) in enumerate(feature.groupby(assignment)):
draw_survival_curve(s, surv, ax=axs[i+1],
title='{} = {}'.format(assignment.name, l))
axs[i+1].get_legend().set_visible(False)
for ax in axs:
prettify_ax(ax)
fig.tight_layout()
fig.savefig('/cellar/users/agross/figures/stage_breakdown.pdf')
kruskal_pandas(two_hit, st.replace({'I':1, 'II':2, 'III':3, 'IV':4}))
H 5.31 p 0.02 dtype: float64
fisher_exact_test(two_hit, st=='IV')
odds_ratio 1.75 p 0.05 dtype: float64
Grade
pd.crosstab(two_hit.map({True: 'TP53-3p Pos.', False:'TP53-3p Neg.'}),
clinical.clinical.neoplasmhistologicgrade)
neoplasmhistologicgrade | g1 | g2 | g3 | g4 | gx |
---|---|---|---|---|---|
h | |||||
TP53-3p Neg. | 14 | 37 | 15 | 1 | 4 |
TP53-3p Pos. | 9 | 125 | 44 | 0 | 1 |
2 rows × 5 columns
grade = clinical.clinical.neoplasmhistologicgrade.replace('gx',nan).dropna()
grade.name = 'grade'
fisher_exact_test(two_hit, grade.dropna().isin(['g2','g3','g4']).dropna()==False)
odds_ratio 2.02e-01 p 4.04e-04 dtype: float64
gr = grade.replace('g4','g3+').replace('g3','g3+').str.upper()
gr.name = 'Grade'
ct = pd.crosstab(gr, th)
feature = two_hit
assignment = gr
fig, axs = subplots(1,4, figsize=(8,3))
ct.plot(kind='bar', rot=0, ax=axs[0])
axs[0].legend(loc='upper left', frameon=False)
axs[0].set_ylabel('# of Patients')
for i, (l, s) in enumerate(feature.groupby(assignment)):
draw_survival_curve(s, surv, ax=axs[i+1],
title='{} = {}'.format(assignment.name, l))
axs[i+1].get_legend().set_visible(False)
for ax in axs:
prettify_ax(ax)
fig.tight_layout()
fig.savefig('/cellar/users/agross/figures/grade_breakdown.pdf')
a,b = match_series(two_hit, grade.replace({'g1':1, 'g2':2, 'g3':3, 'g4':4}))
kruskal_pandas(a,b)
H 3.19 p 0.07 dtype: float64
kruskal_pandas(two_hit, grade.replace({'g1':1, 'g2':2, 'g3':3, 'g4':4}))
H 3.19 p 0.07 dtype: float64
grade = clinical.clinical.neoplasmhistologicgrade.replace('gx',nan).dropna()
grade.name = 'grade'
fisher_exact_test(two_hit, grade[grade.isin(['g2','g3','g4'])] == 'g2')
odds_ratio 1.23 p 0.60 dtype: float64
This cohort is overwhelmingly Stage IV and of high grade. This is likely due to the selection criteria for the TCGA study requiring a large amount of tissue, and thus skewing the cohort towards patients with larger tumors. In general we find that patients with TP53-3p events trend towards being in stage IV (odd ratio 1.7, P = 0.07) and of higher grade (g2,g3,g4 vs g1, odds ratio 4.9 P < .001). While this may be the case, this combination of events is still predictive of survival within these subgroups.
Tumor Subdivision
pd.crosstab(th,
clinical.processed.tumor_subdivision)
tumor_subdivision | larynx | oral cavity | oropharynx |
---|---|---|---|
TP53-3p Neg. | 15 | 52 | 4 |
TP53-3p Pos. | 60 | 109 | 9 |
2 rows × 3 columns
pd.crosstab(th, clinical.processed.tumor_subdivision).T.plot(kind='bar', rot=0)
<matplotlib.axes.AxesSubplot at 0xbe3a290>
feature = two_hit
assignment = clinical.processed.tumor_subdivision
fig, axs = subplots(1,4, figsize=(15,4))
ct.plot(kind='bar', rot=0, ax=axs[0])
axs[0].legend(loc='upper left', frameon=False)
axs[0].set_ylabel('# of Patients')
for i, (l, s) in enumerate(feature.groupby(assignment)):
draw_survival_curve(s, surv, ax=axs[i+1],
title='{} = {}'.format(assignment.name, l))
axs[i+1].get_legend().set_visible(False)
fig.tight_layout()
fisher_exact_test(two_hit.map({True: 'TP53-3p Pos.', False:'TP53-3p Neg.'}),
clinical.processed.tumor_subdivision=='larynx')
odds_ratio 1.88 p 0.07 dtype: float64
Smoking Status
smoking = clinical.clinical.tobaccosmokinghistory
pd.crosstab(two_hit, smoking)
tobaccosmokinghistory | current reformed smoker for < or = 15 years | current reformed smoker for > 15 years | current smoker | lifelong non-smoker |
---|---|---|---|---|
h | ||||
False | 12 | 22 | 18 | 18 |
True | 60 | 18 | 66 | 26 |
2 rows × 4 columns
draw_survival_curves(two_hit, surv, smoking.dropna())
age_bin = 1.*(age > 55) + 1.*(age > 70)
violin_plot_pandas(two_hit, age)
draw_survival_curves(two_hit, surv, age_bin)
arms = ['3p14.2','13q12.11','13q14.2','18q23','18q21.2','21q22.3']
cnn = cn.features.copy()
cnn.index = cnn.index.droplevel(2)
cnn = cnn.ix['Deletion'].ix[:, keepers_o]
count = (cnn.ix[arms] < 0).sum(1).order()
count.name = 'pts. w/ deletion'
count_p53 = pd.Series({i: sum(combine(p53_mut>0, v<0) == 'both')
for i,v in cnn.ix[arms].iterrows()}, name='pts. w/ TP53 + deletion')
fet = screen_feature(p53_mut, fisher_exact_test, cnn.ix[arms]<0)[['odds_ratio','p']]
fet = fet.join(count).join(count_p53)[['pts. w/ deletion','pts. w/ TP53 + deletion',
'odds_ratio','p']]
r = {}
for c,pts in {'All': keepers_o, 'TP53 mut.': ti(p53_mut>0), 'TP53 wt': ti(p53_mut==0)}.iteritems():
cox = cox_screen(cnn.ix[arms, pts] < 0, surv)
haz = cox['hazard']['exp(coef)']
p_uni = cox['LR']['p']
p_full = pd.Series({i: get_cox_ph_ms(surv, v, [age,old], interactions='just_feature')['LR']
for i,v in cnn.ix[arms, pts].iterrows()})
r[c] = pd.concat([haz, p_uni, p_full], keys=['hazard','p uni.', 'p full'], axis=1)
pd.concat(r, axis=1).sort([('All','hazard')])[::-1]
All | TP53 mut. | TP53 wt | |||||||
---|---|---|---|---|---|---|---|---|---|
hazard | p uni. | p full | hazard | p uni. | p full | hazard | p uni. | p full | |
3p14.2 | 3.54 | 4.32e-05 | 5.74e-06 | 2.97 | 0.01 | 0.00 | 2.29 | 0.21 | 0.06 |
18q23 | 1.13 | 5.64e-01 | 4.06e-01 | 0.79 | 0.30 | 0.34 | 5.14 | 0.02 | 0.00 |
13q12.11 | 1.13 | 5.45e-01 | 4.64e-01 | 0.82 | 0.33 | 0.33 | 3.72 | 0.05 | 0.01 |
21q22.3 | 1.08 | 7.05e-01 | 3.71e-01 | 0.90 | 0.61 | 0.94 | 1.41 | 0.62 | 1.00 |
18q21.2 | 1.05 | 8.21e-01 | 6.90e-01 | 0.75 | 0.19 | 0.20 | 3.73 | 0.05 | 0.01 |
13q14.2 | 0.90 | 6.05e-01 | 6.93e-01 | 0.67 | 0.06 | 0.06 | 3.68 | 0.05 | 0.01 |
6 rows × 9 columns
p53_mut.ix[keepers_o].value_counts()
True 202 False 48 dtype: int64
fet
pts. w/ deletion | pts. w/ TP53 + deletion | odds_ratio | p | |
---|---|---|---|---|
3p14.2 | 205 | 179 | 6.59 | 3.56e-07 |
13q12.11 | 121 | 107 | 2.74 | 3.64e-03 |
21q22.3 | 107 | 95 | 2.66 | 5.80e-03 |
13q14.2 | 102 | 90 | 2.41 | 1.43e-02 |
18q23 | 164 | 140 | 2.26 | 1.73e-02 |
18q21.2 | 154 | 132 | 2.23 | 2.01e-02 |
6 rows × 4 columns
del_18q = cn.features.ix['Deletion'].ix['18q23'].ix[0]
survival_and_stats(combine(p53_mut, del_18q<0), surv)
del_21q = cn.features.ix['Deletion'].ix['21q22.3'].ix[0]
survival_and_stats(combine(p53_mut, del_21q<0), surv)
del_13q = cn.features.ix['Deletion'].ix['13q14.2'].ix[0]
survival_and_stats(combine(p53_mut, del_13q<0), surv)
model hr_j = beta0 + beta1TP53_j + beta2(3ploss_j) + beta3(TP53_j3ploss_j) against the additive model hr_j = beta0 + beta1TP53_j + beta2(3ploss_j), where TP53_j*3ploss_j = 1
import Data.Firehose as FH
cn_by_gene = FH.get_gistic_gene_matrix(run.data_path, cancer.name)
cn_by_gene = cn_by_gene.ix[[i for i in cn_by_gene.index if i[2] in rna.df.index]]
pct_del = (cn_by_gene < 0).sum() / (1.*len(cn_by_gene))
pct_amp = (cn_by_gene > 0).sum() / (1.*len(cn_by_gene))
pct_altered = pct_amp + pct_del
pct_altered.name = 'CIN'
p53_mut.name = 'TP53'
del_3p.name = 'del_3p'
del_3p = del_3p
two_hit = combine(p53_mut>0, del_3p<0) == 'both'
two_hit.name = 'interaction'
old = age > 75
old.name = 'old'
pct_del.name = 'pct_del'
pct_altered.name = 'pct_altered'
two_hit.name = 'two_hit'
draw_survival_curves(del_3p, surv, p53_mut)
pct_altered.name = 'pct_del'
fmla0 = 'Surv(days, event) ~ TP53 + del_3p + old'
fmla1 = 'Surv(days, event) ~ TP53 + del_3p + pct_del + TP53:pct_del + old'
fmla2 = 'Surv(days, event) ~ TP53 + del_3p + pct_del + two_hit + TP53:pct_del + old'
fmla3 = 'Surv(days, event) ~ TP53 + del_3p + two_hit + old'
m0 = get_cox_ph(surv, covariates=[p53_mut, del_3p < 0, two_hit, old, age, pct_del], formula=fmla0)
m1 = get_cox_ph(surv, covariates=[p53_mut, del_3p < 0, two_hit, old, age, pct_del], formula=fmla1)
m2 = get_cox_ph(surv, covariates=[p53_mut, del_3p < 0, two_hit, old, age, pct_del], formula=fmla2)
m3 = get_cox_ph(surv, covariates=[p53_mut, del_3p < 0, two_hit, old, age, pct_del], formula=fmla3)
LR_test(m2,m0), LR_test(m2, m1)
(0.016999296949315183, 0.022793238361218802)
get_cox_ph(surv, covariates=[p53_mut, del_3p < 0, two_hit, old, age, pct_del],
formula=fmla2, print_desc=True);
coef exp(coef) se(coef) z p TP53 -0.568 0.567 0.3940 -1.44 0.1500 del_3p -0.693 0.500 0.5359 -1.29 0.2000 pct_del 0.222 1.249 0.1434 1.55 0.1200 two_hit 1.411 4.100 0.6665 2.12 0.0340 old 0.281 1.325 0.0868 3.24 0.0012 TP53:pct_del -0.631 0.532 0.2130 -2.96 0.0030 Likelihood ratio test=41.3 on 6 df, p=2.48e-07 n= 250, number of events= 102
fig, axs = subplots(3,1, figsize=(6,5), sharex=True)
ax = axs[0]
ci = convert_robj(robjects.r.summary(m0)[7])
haz = ci['exp(coef)']
for j,h in enumerate(haz):
ax.scatter(h, j, marker='s', s=100, color='grey',
edgecolors=['black'], zorder=10)
ax.plot(*zip(*((ci.iloc[j]['lower .95'],j), (ci.iloc[j]['upper .95'],j))),
lw=3, ls='-', marker='o', dash_joinstyle='bevel', color='grey')
ax.axvline(1, ls='--', color='black')
ax.set_xbound(.5,3.5)
ax.set_ybound(-.5,len(ci.index) - .5)
ax.set_yticks(range(len(ci.index)))
ax.set_yticklabels(ci.index)
ax = axs[1]
ci = convert_robj(robjects.r.summary(m3)[7])
ci = ci.ix[['old','TP53','del_3p','two_hit']][::-1]
haz = ci['exp(coef)']
for j,h in enumerate(haz):
ax.scatter(h, j, marker='s', s=100, color='grey',
edgecolors=['black'], zorder=10)
ax.plot(*zip(*((ci.iloc[j]['lower .95'],j), (ci.iloc[j]['upper .95'],j))),
lw=3, ls='-', marker='o', dash_joinstyle='bevel', color='grey')
ax.axvline(1, ls='--', color='black')
ax.set_xbound(.5,3.5)
ax.set_ybound(-.5,len(ci.index) - .5)
ax.set_yticks(range(len(ci.index)))
ax.set_yticklabels(ci.index)
ax = axs[2]
ci = convert_robj(robjects.r.summary(m2)[7])
ci = ci.ix[['TP53','del_3p','pct_del','TP53:pct_del','two_hit']][::-1]
haz = ci['exp(coef)']
for j,h in enumerate(haz):
ax.scatter(h, j, marker='s', s=100, color='grey',
edgecolors=['black'], zorder=10)
ax.plot(*zip(*((ci.iloc[j]['lower .95'],j), (ci.iloc[j]['upper .95'],j))),
lw=3, ls='-', marker='o', dash_joinstyle='bevel', color='grey')
ax.axvline(1, ls='--', color='black')
ax.set_xbound(.5,3.5)
ax.set_ybound(-.5,len(ci.index) - .5)
ax.set_yticks(range(len(ci.index)))
ax.set_yticklabels(ci.index)
for ax in axs:
prettify_ax(ax)
ax.set_xscale('log')
ax.set_xticks([.25, .5, 1, 1.5, 2, 4,8,16])
ax.set_xticklabels([.25, .5, 1, 1.5, 2, 4,8,16])
ax.set_xlabel('Hazard Ratio')
ax.set_xbound(.15, 17)
fig.tight_layout()
glm = robjects.r.glm
logit = robjects.r.binomial('logit')
summary = robjects.r.summary
boot = robjects.packages.importr('boot')
f0 = robjects.Formula('event ~ old')
f1 = robjects.Formula('event ~ TP53 + old')
f2 = robjects.Formula('event ~ del_3p + old')
f3 = robjects.Formula('event ~ TP53 + del_3p + old')
f8 = robjects.Formula('event ~ TP53 + del_3p + pct_del + old')
f4 = robjects.Formula('event ~ TP53 + del_3p + two_hit + old')
f5 = robjects.Formula('event ~ two_hit + old')
f6 = robjects.Formula('event ~ TP53 + del_3p + pct_del + TP53:pct_del + old')
f7 = robjects.Formula('event ~ TP53 + del_3p + pct_del + two_hit + TP53:pct_del + old')
formulas = [f0, f1, f2, f3, f8, f7]
year = clinical.clinical.yearofinitialpathologicdiagnosis.astype(float)
accuracy = {}
predictions = {}
events = {}
error = {}
sample_size = {}
for t in [1,2,3,4,5]:
ss = clinical.survival.survival.unstack()
ss = ss[(ss.days >= (365*t)) | (ss.event == 1)]
ss = ss.ix[ti(year < (2013 - (t-1)))]
event = ss.ix[ss.index.intersection(keepers_o)].dropna().days >= (365*t)
event = event.astype(float)
event.name = 'event'
df = pd.concat([p53_mut, del_3p<0, event, old, two_hit, pct_del, age], axis=1).dropna()
df_m = df.groupby(['TP53','del_3p']).event.mean().reset_index().astype(float)
df_r = convert_to_r_dataframe(df)
outcome = df.event
outcome = outcome.map(len(outcome) - outcome.value_counts())
outcome = outcome / sum(outcome)
weights = robjects.FloatVector(outcome)
weights.names = list(outcome.index)
models = [glm(f, family='binomial', weights=weights, data=df_r)
for f in formulas]
prediction = {}
for i,p in enumerate(df.index):
pt = df.index[[i]]
df_r = convert_to_r_dataframe(df.ix[df.index.diff(pt)])
o2 = outcome.ix[outcome.index.diff(pt)]
weights = robjects.FloatVector(o2)
weights.names = list(o2.index)
models = [glm(f, family='binomial', weights=weights, data=df_r)
for f in formulas]
df_p = convert_to_r_dataframe(df.ix[pt])
op = outcome.ix[pt]
wp = robjects.FloatVector(op)
wp.names = list(op.index)
prediction[p] = [robjects.r.predict(m, df_p, type='response')[0] for m in models]
prediction = pd.DataFrame(prediction).T
prediction.columns = ['1','TP53','3p',
'TP53 + 3p',
'TP53 + 3p + TP53:3p',
'TP53 + 3p + pct_del + TP53:3p + TP53:pct_del']
accuracy[t] = ((((prediction.T >= .5)) == (df.event == 1)) * outcome).sum(1)
predictions[t] = prediction
events[t] = event
error[t] = ((prediction.T - df.event).abs() * outcome).sum(1)
sample_size[t] = len(event)
from sklearn.metrics import roc_curve, auc, precision_recall_curve
v1 = ss.ix[ss.index.intersection(keepers_o)].dropna().days <= (365*t)
v2 = ss.ix[ss.index.intersection(keepers_o)].dropna().event == 1
e = {}
for t in [1,2,3,4,5]:
ss = clinical.survival.survival.unstack()
ss = ss[(ss.days >= (365*t)) | (ss.event == 1)]
ss = ss.ix[ti(year < (2013 - (t-1)))]
event = ss.ix[ss.index.intersection(keepers_o)].dropna().days >= (365*t)
event = event.astype(float)
event.name = 'event'
df = pd.concat([p53_mut, del_3p<0, event, old, two_hit, pct_del, age], axis=1).dropna()
e[t] = event.value_counts()
event = pd.concat(e, axis=1)
event.T
0.0 | 1.0 | |
---|---|---|
1 | 44 | 172 |
2 | 85 | 91 |
3 | 92 | 51 |
4 | 80 | 34 |
5 | 74 | 25 |
5 rows × 2 columns
accuracy = {}
predictions = {}
events = {}
error = {}
sample_size = {}
roc_area = {}
aupur = {}
for t in [1,2,3,4,5]:
ss = clinical.survival.survival.unstack()
ss = ss[(ss.days >= (365*t)) | (ss.event == 1)]
ss = ss.ix[ti(year < (2013 - (t-1)))]
event = ss.ix[ss.index.intersection(keepers_o)].dropna().days >= (365*t)
event = event.astype(float)
event.name = 'event'
df = pd.concat([p53_mut, del_3p<0, event, old, two_hit, pct_del, age], axis=1).dropna()
df_m = df.groupby(['TP53','del_3p']).event.mean().reset_index().astype(float)
df_r = convert_to_r_dataframe(df)
outcome = df.event
outcome = outcome.map(len(outcome) - outcome.value_counts())
outcome = outcome / sum(outcome)
weights = robjects.FloatVector(outcome)
weights.names = list(outcome.index)
models = [glm(f, family='binomial', weights=weights, data=df_r)
for f in formulas]
prediction = {}
for i,p in enumerate(df.index):
pt = df.index[[i]]
df_r = convert_to_r_dataframe(df.ix[df.index.diff(pt)])
o2 = outcome.ix[outcome.index.diff(pt)]
weights = robjects.FloatVector(o2)
weights.names = list(o2.index)
models = [glm(f, family='binomial', weights=weights, data=df_r)
for f in formulas]
df_p = convert_to_r_dataframe(df.ix[pt])
op = outcome.ix[pt]
wp = robjects.FloatVector(op)
wp.names = list(op.index)
prediction[p] = [robjects.r.predict(m, df_p, type='response')[0] for m in models]
prediction = pd.DataFrame(prediction).T
prediction.columns = ['1','TP53','3p',
'TP53 + 3p',
'TP53 + 3p + TP53:3p',
'TP53 + 3p + pct_del + TP53:3p + TP53:pct_del']
accuracy[t] = ((((prediction.T >= .5)) == (df.event == 1)) * outcome).sum(1)
predictions[t] = prediction
events[t] = event
error[t] = ((prediction.T - df.event).abs() * outcome).sum(1)
sample_size[t] = len(event)
fpr = {}
tpr = {}
roc_auc = {}
aupur_auc = {}
for m,v in prediction.iteritems():
a,b = match_series(v, df.event)
fpr, tpr, thresh = roc_curve(b, a)
roc_auc[m] = auc(fpr, tpr)
precision, recall, thresh = precision_recall_curve(b, a)
aupur_auc[m] = auc(recall, precision)
roc_area[t] = roc_auc
aupur[t] = aupur_auc
roc_area = pd.DataFrame(roc_area).T
roc_area
1 | 3p | TP53 | TP53 + 3p | TP53 + 3p + TP53:3p | TP53 + 3p + pct_del + TP53:3p + TP53:pct_del | |
---|---|---|---|---|---|---|
1 | 0.19 | 0.36 | 0.32 | 0.43 | 0.43 | 0.66 |
2 | 0.13 | 0.34 | 0.34 | 0.43 | 0.49 | 0.62 |
3 | 0.14 | 0.39 | 0.36 | 0.49 | 0.68 | 0.69 |
4 | 0.16 | 0.42 | 0.40 | 0.52 | 0.69 | 0.70 |
5 | 0.19 | 0.41 | 0.41 | 0.51 | 0.71 | 0.73 |
5 rows × 6 columns
aupur = pd.DataFrame(aupur).T
aupur
1 | 3p | TP53 | TP53 + 3p | TP53 + 3p + TP53:3p | TP53 + 3p + pct_del + TP53:3p + TP53:pct_del | |
---|---|---|---|---|---|---|
1 | 0.59 | 0.74 | 0.72 | 0.78 | 0.79 | 0.86 |
2 | 0.29 | 0.48 | 0.46 | 0.55 | 0.57 | 0.66 |
3 | 0.20 | 0.39 | 0.34 | 0.47 | 0.56 | 0.61 |
4 | 0.17 | 0.34 | 0.30 | 0.43 | 0.50 | 0.54 |
5 | 0.16 | 0.28 | 0.26 | 0.34 | 0.43 | 0.52 |
5 rows × 6 columns
fig, axs = subplots(1,2, figsize=(10,3.5))
ax = axs[0]
roc_area.columns = ['m0','m1','m2','m3','m4','m5']
roc_area.plot(kind='bar', legend=False, ax=ax)
ax.legend(loc='lower left')
ax.set_xticklabels([1,2,3,4,5], rotation=0)
#ax.set_ylim(0.35,0.52)
ax.set_ylabel('AUROC')
ax.set_xlabel('Time (years)')
prettify_ax(ax)
ax = axs[1]
aupur.columns = ['m0','m1','m2','m3','m4','m5']
aupur.plot(kind='bar', legend=False, ax=ax)
#ax.legend(loc='lower right')
ax.set_xticklabels([1,2,3,4,5], rotation=0)
#ax.set_ylim(0.5,0.72)
ax.set_ylabel('AUPUR')
ax.set_xlabel('Time (years)')
prettify_ax(ax)
fig.tight_layout()
fig.savefig('/cellar/users/agross/figures/CV_supp_AUC.pdf')
fig, axs = subplots(1,2, figsize=(10,3.5))
ax = axs[0]
ct = pd.concat(error, axis=1).T
ct.columns = ['m0','m1','m2','m3','m4','m5']
ct.plot(kind='bar', legend=False, ax=ax)
ax.legend(loc='lower left')
ax.set_xticklabels([1,2,3,4,5], rotation=0)
ax.set_ylim(0.35,0.52)
ax.set_ylabel('Linear Prediction Error')
ax.set_xlabel('Time (years)')
prettify_ax(ax)
ax = axs[1]
ct = pd.concat(accuracy, axis=1).T
ct.columns = ['m0','m1','m2','m3','m4','m5']
ct.plot(kind='bar', legend=False, ax=ax)
#ax.legend(loc='lower right')
ax.set_xticklabels([1,2,3,4,5], rotation=0)
ax.set_ylim(0.5,0.72)
ax.set_ylabel('Weighted Accuracy')
ax.set_xlabel('Time (years)')
prettify_ax(ax)
fig.tight_layout()
fig.savefig('/cellar/users/agross/figures/CV_supp.pdf')
ct1= pd.concat(accuracy, axis=1)
ct2 = pd.concat(error, axis=1)
ct = pd.concat([ct1, ct2], keys=['Weighted Accuracy', 'Linear Prediction Error'], axis=0)
ct.columns = ['1y','2y','3y','4y','5y']
ct.columns.name = 'Time Cutoff'
ct
Time Cutoff | 1y | 2y | 3y | 4y | 5y | |
---|---|---|---|---|---|---|
Weighted Accuracy | 1 | 0.56 | 0.52 | 0.53 | 0.54 | 0.58 |
TP53 | 0.20 | 0.60 | 0.60 | 0.62 | 0.63 | |
3p | 0.58 | 0.60 | 0.63 | 0.64 | 0.62 | |
TP53 + 3p | 0.61 | 0.63 | 0.67 | 0.68 | 0.67 | |
TP53 + 3p + TP53:3p | 0.56 | 0.63 | 0.66 | 0.64 | 0.66 | |
TP53 + 3p + pct_del + TP53:3p + TP53:pct_del | 0.63 | 0.62 | 0.69 | 0.68 | 0.71 | |
Linear Prediction Error | 1 | 0.49 | 0.50 | 0.50 | 0.50 | 0.48 |
TP53 | 0.49 | 0.48 | 0.47 | 0.46 | 0.45 | |
3p | 0.46 | 0.47 | 0.44 | 0.44 | 0.45 | |
TP53 + 3p | 0.46 | 0.46 | 0.43 | 0.42 | 0.44 | |
TP53 + 3p + TP53:3p | 0.47 | 0.46 | 0.43 | 0.42 | 0.43 | |
TP53 + 3p + pct_del + TP53:3p + TP53:pct_del | 0.45 | 0.45 | 0.42 | 0.41 | 0.40 |
12 rows × 5 columns