Here we are building some multivariate Cox models to explore the effect of the TP53-3p event within the context of clinical variables. Some of the results from this analysis show up in Supplemental Figure 7. In the Clinical_Covariates notebook, we do similar analysis but only looking at one covariate at a time.
import NotebookImport
from Imports import *
importing IPython notebook from Imports.ipynb Populating the interactive namespace from numpy and matplotlib changing to source dirctory populating namespace with data
p53_mut = mut.features.ix['TP53'].ix[keepers_o].dropna()
del_3p = cn.features.ix['Deletion'].ix['3p14.2'].ix[0].ix[keepers_o].dropna()
combo = combine(p53_mut==1, del_3p==-1)
combo = combo.map({'Lesion':'b', 'neither':'a', 'TP53':'c', 'both':'d'})
two_hit = combo=='d'
clin_stage = clinical.processed.stage.ix[keepers_o].dropna()
clin_stage.name = 'Clinical_Stage'
lymph_stage = clinical.processed.lymph_stage.replace('n2', 'n2+').replace('n3', 'n2+').dropna()
lymph_stage = lymph_stage == 'n2+'
lymph_stage = lymph_stage.ix[keepers_o].fillna('nx')
s4 = clin_stage == 'Stage iv'
s4.name = 'Stage_IV'
drinker = clinical.processed.drinker
current_smoker = clinical.processed.smoker.dropna() == 'current smoker'
current_smoker.name = 'current_smoker'
non_smoker = clinical.processed.smoker.dropna() == 'lifelong non-smoker'
non_smoker.name = 'non_smoker'
recent_smoker = clinical.processed.smoker.dropna().isin(['current reformed smoker for < or = 15 years',
'current smoker'])
recent_smoker.name = 'recent_smoker'
spread = clinical.processed.spread
spread = spread.replace({'yes':True, 'no':False})
invasion = clinical.processed.invasion
invasion = invasion.replace({'yes':True, 'no':False})
year = clinical.processed.year
old = clinical.processed.old_age
clinical_vars = pd.concat([drinker, current_smoker, non_smoker, recent_smoker,
spread, invasion, year, old, s4,
lymph_stage], 1).fillna(' missing')
clinical_vars = clinical_vars.ix[keepers_o].dropna()
m2 = get_cox_ph(surv, two_hit, covariates=clinical_vars, print_desc=True, interactions=False);
coef exp(coef) se(coef) z p feature 0.7868 2.20 0.307 2.560 0.0100 drinker missing 0.7522 2.12 0.400 1.879 0.0600 drinkerTrue 0.6583 1.93 0.459 1.434 0.1500 recent_smoker missing 0.3422 1.41 0.486 0.704 0.4800 recent_smokerTrue 0.7342 2.08 0.273 2.694 0.0071 spread missing 0.0892 1.09 0.258 0.345 0.7300 spreadTrue 0.5384 1.71 0.259 2.076 0.0380 invasion missing 0.5071 1.66 0.297 1.710 0.0870 invasionTrue 0.5237 1.69 0.262 2.003 0.0450 yearpre_2000 0.6767 1.97 0.222 3.054 0.0023 old_ageAge > 75 0.6564 1.93 0.292 2.244 0.0250 Likelihood ratio test=66.8 on 11 df, p=4.9e-10 n= 250, number of events= 102
m3 = get_cox_ph(surv, covariates=clinical_vars[['drinker','recent_smoker','spread','invasion','year', 'old_age']],
print_desc=True, interactions=False);
coef exp(coef) se(coef) z p drinker missing 0.7391 2.09 0.400 1.847 6.5e-02 drinkerTrue 0.6028 1.83 0.459 1.315 1.9e-01 recent_smoker missing 0.6972 2.01 0.452 1.542 1.2e-01 recent_smokerTrue 1.0121 2.75 0.255 3.963 7.4e-05 spread missing 0.0932 1.10 0.257 0.363 7.2e-01 spreadTrue 0.6928 2.00 0.257 2.692 7.1e-03 invasion missing 0.4440 1.56 0.297 1.492 1.4e-01 invasionTrue 0.5386 1.71 0.262 2.055 4.0e-02 yearpre_2000 0.6896 1.99 0.222 3.109 1.9e-03 old_ageAge > 75 0.6691 1.95 0.287 2.332 2.0e-02 Likelihood ratio test=59.4 on 10 df, p=4.66e-09 n= 250, number of events= 102
LR_test(m2, m3)
0.0065466476845842825
cc = [colors[4], 'grey','grey', '#9ecae1','#9ecae1', 'grey','grey', '#9ecae1','#9ecae1',
'grey', '#9ecae1','#9ecae1']
fig, ax = subplots(figsize=(6,5))
ci = convert_robj(robjects.r.summary(m2)[7])
haz = ci['exp(coef)']
for j,h in enumerate(haz):
ax.scatter(h, j, marker='s', s=100, color=cc[j],
edgecolors=['black'], zorder=10)
ax.plot(*zip(*((ci.iloc[j]['lower .95'],j), (ci.iloc[j]['upper .95'],j))),
lw=3, ls='-', marker='o', dash_joinstyle='bevel', color=cc[j])
ax.axvline(1, ls='--', color='black')
ax.set_xscale('log')
ax.set_xbound(.5,5)
ax.set_ybound(-.5,len(ci.index) - .5)
ax.set_xticks([.5, 1, 1.5, 2, 4])
ax.set_xticklabels([.5, 1, 1.5, 2, 4])
ax.set_yticks(range(len(ci.index)))
ax.set_yticklabels(ci.index)
ax.set_xlabel('Hazard Ratio')
prettify_ax(ax)
fig.tight_layout()
fig.savefig(FIGDIR + 'mv_pb.pdf', tranparent=True)
clinical_vars = pd.concat([current_smoker, non_smoker, recent_smoker,
spread, invasion, year, old, s4,
lymph_stage], 1).fillna(' missing')
clinical_vars_y = clinical_vars.ix[ti(age < 75)].ix[keepers_o].dropna()
clinical_vars_y = clinical_vars_y.replace(' missing', 'zzz')
m1 = get_cox_ph(surv, covariates=clinical_vars_y, print_desc=True, interactions=False);
m2 = get_cox_ph(surv, two_hit, covariates=clinical_vars_y, print_desc=True, interactions=False);
coef exp(coef) se(coef) z p non_smokerTrue 1.182 3.26 0.605 1.95 5.1e-02 non_smokerzzz 1.929 6.88 0.680 2.84 4.6e-03 recent_smokerTrue 1.826 6.21 0.521 3.50 4.6e-04 recent_smokerzzz NA NA 0.000 NA NA spreadTrue 0.600 1.82 0.276 2.17 3.0e-02 spreadzzz 0.299 1.35 0.282 1.06 2.9e-01 invasionTrue 0.660 1.94 0.282 2.34 1.9e-02 invasionzzz 0.379 1.46 0.327 1.16 2.5e-01 yearpre_2000 0.963 2.62 0.240 4.02 5.8e-05 Likelihood ratio test=54.7 on 8 df, p=5.05e-09 n= 220, number of events= 83 coef exp(coef) se(coef) z p feature 1.117 3.06 0.366 3.05 0.00230 recent_smokerTrue 0.852 2.34 0.307 2.78 0.00550 recent_smokerzzz 1.086 2.96 0.561 1.94 0.05300 invasionTrue 0.665 1.94 0.282 2.36 0.01800 invasionzzz 0.496 1.64 0.312 1.59 0.11000 yearpre_2000 0.901 2.46 0.238 3.79 0.00015 Likelihood ratio test=58.2 on 6 df, p=1.05e-10 n= 220, number of events= 83
fig, ax = subplots(figsize=(6,4))
ci = convert_robj(robjects.r.summary(m2)[7])
haz = ci['exp(coef)']
for j,h in enumerate(haz):
ax.scatter(h, j, marker='s', s=100, color=cc[j],
edgecolors=['black'], zorder=10)
ax.plot(*zip(*((ci.iloc[j]['lower .95'],j), (ci.iloc[j]['upper .95'],j))),
lw=3, ls='-', marker='o', dash_joinstyle='bevel', color=cc[j])
ax.axvline(1, ls='--', color='black')
ax.set_xscale('log')
ax.set_xbound(.5,12)
ax.set_ybound(-.5,len(ci.index) - .5)
ax.set_xticks([.5, 1, 1.5, 2, 4,8])
ax.set_xticklabels([.5, 1, 1.5, 2, 4,8])
ax.set_yticks(range(len(ci.index)))
ax.set_yticklabels(ci.index)
ax.set_xlabel('Hazard Ratio')
prettify_ax(ax)
fig.tight_layout()
fig.savefig(FIGDIR + 'mv_pa.pdf', tranparent=True)
clinical_vars_y = clinical_vars.ix[ti(age >= 75)].ix[keepers_o].dropna()
clinical_vars_y = clinical_vars_y.replace(' missing', 'zzz')
m1 = get_cox_ph(surv, covariates=clinical_vars_y, print_desc=True, interactions=False);
m2 = get_cox_ph(surv, two_hit, covariates=clinical_vars_y, print_desc=True, interactions=False);
coef exp(coef) se(coef) z p current_smokerTrue 1.236 3.442 0.745 1.660 0.0970 current_smokerzzz -0.980 0.375 0.953 -1.028 0.3000 spreadTrue 0.588 1.801 0.867 0.678 0.5000 spreadzzz -1.642 0.194 0.837 -1.962 0.0500 invasionTrue 1.214 3.368 0.818 1.485 0.1400 invasionzzz 2.642 14.039 1.174 2.250 0.0240 Stage_IV 0.962 2.616 0.367 2.617 0.0089 Likelihood ratio test=16.4 on 7 df, p=0.0214 n= 30, number of events= 19 coef exp(coef) se(coef) z p current_smokerTrue 1.236 3.442 0.745 1.660 0.0970 current_smokerzzz -0.980 0.375 0.953 -1.028 0.3000 spreadTrue 0.588 1.801 0.867 0.678 0.5000 spreadzzz -1.642 0.194 0.837 -1.962 0.0500 invasionTrue 1.214 3.368 0.818 1.485 0.1400 invasionzzz 2.642 14.039 1.174 2.250 0.0240 Stage_IV 0.962 2.616 0.367 2.617 0.0089 Likelihood ratio test=16.4 on 7 df, p=0.0214 n= 30, number of events= 19
fig, ax = subplots(figsize=(6,4))
ci = convert_robj(robjects.r.summary(m2)[7])
haz = ci['exp(coef)']
for j,h in enumerate(haz):
ax.scatter(h, j, marker='s', s=100, color=cc[j],
edgecolors=['black'], zorder=10)
ax.plot(*zip(*((ci.iloc[j]['lower .95'],j), (ci.iloc[j]['upper .95'],j))),
lw=3, ls='-', marker='o', dash_joinstyle='bevel', color=cc[j])
ax.axvline(1, ls='--', color='black')
ax.set_xscale('log')
ax.set_xbound(.5,12)
ax.set_ybound(-.5,len(ci.index) - .5)
ax.set_xticks([.5, 1, 1.5, 2, 4,8])
ax.set_xticklabels([.5, 1, 1.5, 2, 4,8])
ax.set_yticks(range(len(ci.index)))
ax.set_yticklabels(ci.index)
ax.set_xlabel('Hazard Ratio')
prettify_ax(ax)
fig.tight_layout()
pts = ti(non_smoker.ix[keepers_o] != True).intersection(ti(old=='Age < 75'))
clinical_vars_y = clinical_vars.ix[pts].fillna(' missing')
m1 = get_cox_ph(surv, covariates=clinical_vars_y, print_desc=True, interactions=False);
m2 = get_cox_ph(surv, two_hit, covariates=clinical_vars_y, print_desc=True, interactions=False);
coef exp(coef) se(coef) z p recent_smoker missing 2.1670 8.732 0.700 3.096 0.00200 recent_smokerTrue 2.0168 7.514 0.542 3.722 0.00020 spread missing 0.5551 1.742 0.330 1.683 0.09200 spreadTrue 0.8222 2.275 0.319 2.576 0.01000 invasion missing 0.0698 1.072 0.355 0.196 0.84000 invasionTrue 0.5971 1.817 0.317 1.883 0.06000 yearpre_2000 0.8787 2.408 0.257 3.415 0.00064 Stage_IV -0.4363 0.646 0.198 -2.206 0.02700 lymph_stagenx 2.1785 8.833 1.085 2.008 0.04500 lymph_stageTrue 0.7148 2.044 0.406 1.762 0.07800 Likelihood ratio test=52.2 on 10 df, p=1.03e-07 n= 181, number of events= 73 coef exp(coef) se(coef) z p feature 1.1710 3.225 0.456 2.567 0.0100 recent_smoker missing 1.9795 7.239 0.725 2.732 0.0063 recent_smokerTrue 1.6461 5.186 0.557 2.953 0.0031 spread missing 0.6027 1.827 0.327 1.844 0.0650 spreadTrue 0.7243 2.063 0.318 2.275 0.0230 invasion missing 0.0992 1.104 0.353 0.281 0.7800 invasionTrue 0.6279 1.874 0.320 1.963 0.0500 yearpre_2000 0.7344 2.084 0.262 2.807 0.0050 Stage_IV -0.4144 0.661 0.198 -2.097 0.0360 lymph_stagenx 1.6626 5.273 1.105 1.505 0.1300 lymph_stageTrue 0.5123 1.669 0.404 1.269 0.2000 Likelihood ratio test=60.5 on 11 df, p=7.38e-09 n= 181, number of events= 73
LR_test(m2, m1)
0.0039819954269985402
fig, ax = subplots(figsize=(6,4))
ci = convert_robj(robjects.r.summary(m2)[7])
haz = ci['exp(coef)']
for j,h in enumerate(haz):
ax.scatter(h, j, marker='s', s=100, color='grey',
edgecolors=['black'], zorder=10)
ax.plot(*zip(*((ci.iloc[j]['lower .95'],j), (ci.iloc[j]['upper .95'],j))),
lw=3, ls='-', marker='o', dash_joinstyle='bevel', color='grey')
ax.axvline(1, ls='--', color='black')
ax.set_xscale('log')
ax.set_xbound(.5,3.5)
ax.set_ybound(-.5,len(ci.index) - .5)
ax.set_xticks([.25,.5, 1, 1.5, 2, 4,8,16])
ax.set_xticklabels([.25,.5, 1, 1.5, 2, 4,8,16])
ax.set_yticks(range(len(ci.index)))
ax.set_yticklabels(ci.index)
ax.set_xlabel('Hazard Ratio')
prettify_ax(ax)
fig.tight_layout()