Here we conduct a general expration of TP53 mutations within the HNSCC discovery cohort. While we try and remain unbiased in our screen for molecular coorelates of survival, we do have much more information on TP53 mutations than most others.
In Poeta, a TP53 mutation is labeled as disruptive if it is either a stop mutation, or if is located at a binding site and induces a change in polarity of the encoded amino acid. Interestingly, we found that the polarity of the substitution had little effect on prognosis and that patients with a mutation to the L2 binding site had worse outcomes than patients with a mutation to the L3 binding site. In addition, within the context of the framework we set forth for biomarker discovery, we chose to ignore the classification of mutations (past silent/non-silent) in order to keep sample size high at the risk of false positives. For these reasons we elected to simply display the functional assignment of the mutations in Figure 1 rather than obscure these results with a classification scheme.
import NotebookImport
from Imports import *
p53_mut = mut.df.ix['TP53'].ix[keepers_o].dropna().astype(int)
survival_and_stats(p53_mut, surv, figsize=(5,4), order=[2,1,0])
screen_feature(p53_mut>0, kruskal_pandas, clinical.processed.T).head()
ecs = clinical.clinical.presenceofpathologicalnodalextracapsularspread
ecs.name = 'Extra Capsular Spread'
pd.crosstab(p53_mut>0, ecs).T.plot(kind='bar', rot=15)
It is important to note, that here a patient with multiple mutation is counted multiple times.
import re as re
get_nums = lambda s: re.findall(r'\d+', s)
def is_disruptive(v):
c = v.Variant_Classification
if c != 'Missense_Mutation':
if 'Ins' in c or 'Del' in c:
return 'InDel'
else:
return v.Variant_Classification.split('_')[0]
else:
s = v.Protein_Change
aa = int(get_nums(s)[0])
if int(aa) in range(163,196):
return 'L2'
if int(aa) in range(236, 252):
return 'L3'
return 'other'
p53 = FH.get_submaf(run.data_path, cancer.name, ['TP53'], fields='All').ix['TP53']
dd = p53.apply(is_disruptive, 1)
dd = dd.replace('Silent',nan).dropna()
p53 = p53.ix[dd.index]
others = keepers_o.diff(p53.Tumor_Sample_Barcode.ix[dd.index]).intersection(mut.df.columns)
dd.index = p53.Tumor_Sample_Barcode.ix[dd.index]
dd = pd.concat([pd.Series('WT', others), dd])
dd = dd[[i in keepers_o for i in dd.index]]
pc = pd.Series(list(p53.Protein_Change), index=p53.Tumor_Sample_Barcode)
pc = pd.concat([pd.Series('WT', others), pc])
pc = pc[[i in keepers_o for i in pc.index]]
s2 = surv.unstack().ix[dd.index]
s2.index = range(len(dd))
s2 = s2.stack()
pats = pd.Series(dd.index, range(len(dd)))
dd.index = range(len(dd))
pc.index = range(len(dd))
df = pd.concat([pats, pc, dd, s2[:,'days'], s2[:,'event']],
keys=['patient ID','Functional Class','Protien Change',
'Days to Death/Censoring', 'Death Indicator'],
axis=1).sort(['patient ID'])
df = df.set_index('patient ID')
df.to_csv(FIGDIR + 'fig2b.csv')
fig, ax = subplots(figsize=(3.5,2.7))
c={'WT': 'grey', 'Splice':colors[0], 'other': colors[5], 'L3': colors[1], 'L2':colors[2],
'Nonsense': colors[3], 'InDel': colors[4]}
draw_survival_curve(dd, s2, colors=c, ax=ax)
ax.legend().set_visible(False)
prettify_ax(ax)
fig.tight_layout()
fig.savefig(FIGDIR + 'fig2b.pdf', transparent=True)
survival_and_stats(dd, s2, colors=colors[:6] + ['grey'] + colors[6:], figsize=(4.5,6))
get_surv_fit_lr(s2, dd[dd!='WT'])
Bar Plot of Hazard Ratios for Supplement
dd = dd.replace('WT', 'aWT')
f = get_cox_ph(s2, dd, interactions=False)
ci = convert_robj(robjects.r.summary(f)[7])
ci.index = map(lambda s: s[7:], ci.index)
n = ci.ix[0]*0 +1
n.name = 'WT'
ci = ci.append(n)
fig, ax = subplots(figsize=(7,4))
ci = ci.sort('exp(coef)')
haz = ci['exp(coef)']
b = haz.plot(kind='bar', ax=ax,
yerr=[haz - ci['lower .95'], ci['upper .95'] - haz], ecolor='black',
rot=0, color=['grey', colors[5], colors[4], colors[0], colors[3],
colors[2], colors[1]])
prettify_ax(ax)
ax.set_ylabel('Hazard Ratio')
P-values for Bar Comparisons
from itertools import combinations
sig = pd.Series({c: get_cox_ph_ms(s2, dd[dd.isin(c)], interactions=False)['LR']
for c in combinations(dd.unique(),2)})
sig.order()
lo = pd.read_csv('../Extra_Data/amino_acids.csv', index_col=1)
lo = lo.groupby(level=0).first()
def is_disruptive(s):
if s.endswith('*'):
return True
if s.endswith('splice'):
return False
if 'fs' in s:
return False
aa = s[3:-1]
try:
if int(aa) in range(163,196) + range(236, 252):
if lo.Polarity[s[2]] != lo.Polarity[s[-1]]:
return True
except:
pass
return False
p53 = FH.get_submaf(run.data_path, cancer.name, ['TP53'], fields='All').ix['TP53']
status = pd.concat([combine(p53.Protein_Change.map(is_disruptive), p53.is_silent==0),
p53.Tumor_Sample_Barcode], axis=1,
keys=['status','barcode'])
status = status.set_index('barcode')['status']
status = (status == 'both').groupby(level=0).sum().clip_upper(1.)
status = status.ix[mut.df.columns].fillna(-1).map({-1:'WT',0:'Non-Disruptive',1:'Disruptive'})
status = status.ix[keepers_o]
survival_and_stats(status, surv, colors=colors[:6] + ['grey'] + colors[6:], figsize=(7,5))
get_surv_fit_lr(surv, status[status.isin(['Non-Disruptive', 'WT'])])
def is_disruptive_mod(s):
if s.endswith('*'):
return True
if s.endswith('splice'):
return True
if 'fs' in s:
return False
aa = s[3:-1]
try:
if int(aa) in range(163,196) + range(236, 252):
return True
except:
pass
return False
p53 = FH.get_submaf(run.data_path, cancer.name, ['TP53'], fields='All').ix['TP53']
status = pd.concat([combine(p53.Protein_Change.map(is_disruptive_mod), p53.is_silent==0),
p53.Tumor_Sample_Barcode], axis=1,
keys=['status','barcode'])
status = status.set_index('barcode')['status']
status = (status == 'both').groupby(level=0).sum().clip_upper(1.)
status = status.ix[mut.df.columns].fillna(-1).map({-1:'WT',0:'Non-Disruptive',1:'Disruptive'})
status = status.ix[keepers_o]
survival_and_stats(status, surv, colors=colors[:6] + ['grey'] + colors[6:], figsize=(7,5))
f = get_cox_ph(surv, status[status.isin(['Non-Disruptive', 'WT'])]=='Non-Disruptive', interactions=False,
print_desc=True);
exp(.79), exp(.79) - exp(.79 - .353)
cc = p53.set_index('Tumor_Sample_Barcode').Protein_Change
cc = pd.concat([pd.Series('WT', others), cc])
cc = cc[cc.isin(true_index(cc.value_counts() > 5))]
s2 = surv.unstack().ix[cc.index]
s2.index = range(len(cc))
s2 = s2.stack()
cc.index = range(len(cc))
survival_and_stats(cc, s2, colors=['grey'] + colors, figsize=(7,5))