import NotebookImport
from Imports import *
importing IPython notebook from Imports.ipynb Populating the interactive namespace from numpy and matplotlib changing to source dirctory populating namespace with data
meta = pd.read_csv('../Extra_Data/UPMC_cohort/meta.csv', index_col=0)
meta.index = pd.MultiIndex.from_tuples([('-'.join(i.split('-')[:-1]), i.split('-')[-1])
for i in meta.index])
clin = pd.read_csv('../Extra_Data/UPMC_cohort/pitt_broad_data.csv', index_col=0)
clin.Tumor_type = clin.Tumor_type.map(str.strip)
surv = pd.concat([clin.os_5yr, clin.os_5yr_mons*30.5], keys=['event','days'], axis=1).stack()
clin2 = pd.read_csv('../Extra_Data/UPMC_cohort/pitt_broad_data_update.csv', index_col=0)
surv2 = pd.concat([clin2.os_5yr, clin2.os_5yr_mons*30.5], keys=['event','days'], axis=1).stack()
surv = surv2.combine_first(surv)
keepers = ti(clin.HPV == 'Negative')
cc = clin.ix[true_index(meta.Final_74_exome_analysis[:,'Tumor']>0)]
cc = cc.ix[:,cc.apply(pd.value_counts).count().isin([2,3,4])]
cc = cc.dropna(1)
cox_screen(cc.T, surv)[7:].head(10)
hazard | LR | concordance | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
exp(-coef) | exp(coef) | lower .95 | upper .95 | df | p | stat | q | se | stat | |
Neoadj_tx | 0.01 | 115.91 | 10.25 | 1311.28 | 1 | 4.32e-04 | 12.39 | 0.00 | 0.00 | 0.53 |
Chemo | 0.04 | 27.59 | 5.33 | 142.93 | 2 | 1.54e-03 | 12.95 | 0.00 | 0.04 | 0.62 |
Adj_chemo | 0.04 | 27.59 | 5.33 | 142.93 | 2 | 1.54e-03 | 12.95 | 0.00 | 0.04 | 0.62 |
Recurrence | 0.45 | 2.21 | 1.18 | 4.12 | 1 | 1.41e-02 | 6.02 | 0.04 | 0.04 | 0.57 |
PNI | 0.42 | 2.36 | 0.86 | 6.50 | 2 | 1.49e-02 | 8.41 | 0.04 | 0.04 | 0.62 |
DistMets | 0.35 | 2.82 | 1.33 | 6.01 | 1 | 1.44e-02 | 5.98 | 0.04 | 0.03 | 0.55 |
EPS | 1.45 | 0.69 | 0.30 | 1.58 | 2 | 2.14e-02 | 7.69 | 0.05 | 0.04 | 0.62 |
Dif | 0.90 | 1.11 | 0.58 | 2.13 | 3 | 6.69e-02 | 7.16 | 0.14 | 0.04 | 0.54 |
Ethnicity | 6.71 | 0.15 | 0.02 | 1.16 | 1 | 1.54e-01 | 2.04 | 0.28 | 0.01 | 0.51 |
Local_recurr | 0.55 | 1.83 | 0.84 | 3.98 | 1 | 1.53e-01 | 2.04 | 0.28 | 0.03 | 0.53 |
f = clin.Primary_site.ix[keepers]
f = f.replace('Hypopharynx','Other').replace('Sinonasal','Other')
get_surv_fit_lr(surv, f)
Stats | Median Survival | 5y Survival | Log-Rank | |||||||
---|---|---|---|---|---|---|---|---|---|---|
# Patients | # Events | Median | Lower | Upper | Surv | Lower | Upper | chi2 | p | |
4.02 | 0.259 | |||||||||
Oral cavity | 36 | 22 | 2.99 | 1.3 | NaN | 0.411 | 0.277 | 0.61 | ||
Larynx | 15 | 6 | NaN | 1.5 | NaN | 0.574 | 0.366 | 0.901 | ||
Other | 8 | 4 | 3.7 | 1.8 | NaN | 0.5 | 0.25 | 1 | ||
Oropharynx | 4 | 3 | 0.844 | 0.535 | NaN | NaN | NaN | NaN |
f = clin.Nodal_stage.ix[keepers].fillna('.')
f = f.replace('N2A','N2').replace('N2B','N2').replace('N2C','N2')
get_surv_fit_lr(surv, f)
Stats | Median Survival | 5y Survival | Log-Rank | |||||||
---|---|---|---|---|---|---|---|---|---|---|
# Patients | # Events | Median | Lower | Upper | Surv | Lower | Upper | chi2 | p | |
22.1 | 0.000192 | |||||||||
N2 | 28 | 20 | 1.15 | 0.978 | NaN | 0.298 | 0.167 | 0.532 | ||
N0 | 19 | 6 | NaN | 3.98 | NaN | 0.672 | 0.487 | 0.928 | ||
N1 | 8 | 4 | 3.7 | 2.11 | NaN | 0.5 | 0.25 | 1 | ||
. | 5 | 2 | NaN | 1.55 | NaN | 0.6 | 0.293 | 1 | ||
N3 | 3 | 3 | 0.894 | 0.535 | NaN | NaN | NaN | NaN |
survival_and_stats(f.isin(['N2','N3']), surv)
f = clin.clinical_stage.ix[keepers]
f = f.replace('IVa','IV').replace('IVb','IV').replace('.', 'Unstaged').fillna('Unstaged')
get_surv_fit_lr(surv, f)
Stats | Median Survival | 5y Survival | Log-Rank | |||||||
---|---|---|---|---|---|---|---|---|---|---|
# Patients | # Events | Median | Lower | Upper | Surv | Lower | Upper | chi2 | p | |
4.3 | 0.231 | |||||||||
IV | 43 | 27 | 1.6 | 1.1 | NaN | 0.38 | 0.258 | 0.56 | ||
III | 12 | 5 | NaN | 2.11 | NaN | 0.583 | 0.362 | 0.941 | ||
Unstaged | 5 | 2 | NaN | 1.55 | NaN | 0.6 | 0.293 | 1 | ||
II | 3 | 1 | NaN | 3.98 | NaN | 0.667 | 0.3 | 1 |
f = clin.Alcohol_amt.fillna(0).ix[keepers].replace('.', 0).astype(float) > 13
f2 = clin.Alcohol_amt.fillna(0).ix[keepers].replace('.', 0).astype(float) < 7
get_surv_fit_lr(surv, 1.*f - 1.*f2)
Stats | Median Survival | 5y Survival | Log-Rank | |||||||
---|---|---|---|---|---|---|---|---|---|---|
# Patients | # Events | Median | Lower | Upper | Surv | Lower | Upper | chi2 | p | |
3.25 | 0.197 | |||||||||
-1 | 36 | 19 | 3.98 | 1.8 | NaN | 0.497 | 0.357 | 0.692 | ||
0 | 18 | 9 | 3.65 | 1.29 | NaN | 0.463 | 0.276 | 0.778 | ||
1 | 9 | 7 | 1.1 | 0.953 | NaN | 0.222 | 0.0655 | 0.754 |
get_surv_fit_lr(surv, clin.Tob_history.ix[keepers])
Stats | Median Survival | 5y Survival | Log-Rank | |||||||
---|---|---|---|---|---|---|---|---|---|---|
# Patients | # Events | Median | Lower | Upper | Surv | Lower | Upper | chi2 | p | |
0.0913 | 0.763 | |||||||||
yes | 55 | 31 | 3.65 | 1.55 | NaN | 0.44 | 0.324 | 0.596 | ||
no | 8 | 4 | 1.43 | 1.3 | NaN | 0.5 | 0.25 | 1 |
get_surv_fit_lr(surv, clin.PNI.ix[keepers])
Stats | Median Survival | 5y Survival | Log-Rank | |||||||
---|---|---|---|---|---|---|---|---|---|---|
# Patients | # Events | Median | Lower | Upper | Surv | Lower | Upper | chi2 | p | |
6.78 | 0.0338 | |||||||||
yes | 31 | 22 | 1.55 | 1 | NaN | 0.323 | 0.194 | 0.537 | ||
no | 26 | 9 | NaN | 3.98 | NaN | 0.636 | 0.471 | 0.858 | ||
unknown | 6 | 4 | 3.25 | 1.1 | NaN | NaN | NaN | NaN |
survival_and_stats(clin['Chemo '], surv, figsize=(5,4), title='Chemo')
get_surv_fit_lr(surv, clin.Date_Dx.map(lambda s: s[-2:]).ix[keepers].astype(int)>10)
Stats | Median Survival | 5y Survival | Log-Rank | |||||||
---|---|---|---|---|---|---|---|---|---|---|
# Patients | # Events | Median | Lower | Upper | Surv | Lower | Upper | chi2 | p | |
0.0558 | 0.813 | |||||||||
0 | 62 | 34 | 3.17 | 1.5 | NaN | 0.457 | 0.347 | 0.602 | ||
1 | 1 | 1 | 3.98 | NaN | NaN | NaN | NaN | NaN |
survival_and_stats(clin.EPS, surv, figsize=(5,4), title='EPS')
survival_and_stats(clin.HPV, surv, figsize=(5,4), title='HPV')
survival_and_stats(to_quants(clin.Age_Dx, q=.33, labels=True), surv, figsize=(5,4), title='Age')
maf = pd.read_csv('../Extra_Data/UPMC_cohort/maf.csv')
maf = maf.dropna(how='all', axis=[0,1])
maf['Tumor_Sample_Barcode'] = maf['Tumor_Sample_Barcode'].str.replace('-Tumor','').copy()
maf = maf.set_index(['Hugo_Symbol','Tumor_Sample_Barcode'])
maf = maf[maf.Validation_Status != 'Wildtype']
non_silent = maf[maf.Variant_Classification != 'Synonymous']
non_silent['counter'] = 1
hit_matrix = non_silent.counter.groupby(level=[0,1]).sum().unstack()
hit_matrix.columns = map(lambda s: s.replace('-Tumor',''), hit_matrix.columns)
hit_matrix = hit_matrix.fillna(0)
The TCGA has a much higher mutation rate as compared to this data. This could be due to differences in sample prep, sequencing depth, or bioinformatic processing. The two cohorts are fairly well matched clinically, so we expect that this difference can be contributed to technical factors as compared to biological ones.
fig, axs = subplots(1,3, figsize=(8,3), sharey=True)
c = pd.concat([(mut.df > 0).sum(), (hit_matrix > 0).sum(0)], keys=['TCGA','UPMC'], axis=1).stack()
violin_plot_series(c.clip_upper(500), ax=axs[0])
hh = hit_matrix.ix[:, true_index(clin.HPV == 'Negative')].sum()
yy = mut.df.ix[:, true_index(clinical.hpv.dropna() == False)].sum()
c = pd.concat([yy,hh], keys=['TCGA','UPMC'], axis=1).stack()
violin_plot_series(c.clip_upper(500), ax=axs[1])
hh = hit_matrix.ix[:, true_index(clin.HPV == 'Positive')].sum()
yy = mut.df.ix[:, true_index(clinical.hpv.dropna())].sum()
c = pd.concat([yy,hh], keys=['TCGA','UPMC'], axis=1).stack()
violin_plot_series(c.clip_upper(500), ax=axs[2])
for ax in axs:
prettify_ax(ax)
ax.set_ylabel('')
ax.set_xlabel('Study')
axs[0].set_ylabel('# of Mutations')
fig.tight_layout()
fig.savefig(FIGDIR + 'mutation_rate_comparison.pdf')
hh = hit_matrix.ix[:, true_index(clin.HPV == 'Negative')].sum()
yy = mut.df.ix[:, true_index(clinical.hpv.dropna() == False)].sum()
c = pd.concat([yy,hh], keys=['TCGA','UPMC'], axis=1).stack()
stats.mannwhitneyu(hh.dropna(), yy.dropna())
(5909.0, 0.0002041546634473598)
The median mutation rate of TCGA HPV- samples is 104 verses 74 in the UPMC cohort.
yy = mut.df.ix[:, true_index(clinical.hpv.dropna() == False)].sum()
yy.median()
104.0
hh = hit_matrix.ix[:, true_index(clin.HPV == 'Negative')].sum()
hh.median()
73.0
p53_mut = hit_matrix.ix['TP53'].ix[true_index(clin.HPV=='Negative')] > 0
survival_and_stats(p53_mut, surv)
Broken down by functional type.
import re as re
get_nums = lambda s: re.findall(r'\d+', s)
def is_disruptive(v):
c = v.Variant_Classification
if c != 'Missense':
if 'Ins' in c or 'Del' in c:
return 'InDel'
else:
return v.Variant_Classification.split('_')[0]
else:
s = v.Protein_Change
aa = int(get_nums(s)[0])
if int(aa) in range(163,196):
return 'L2'
if int(aa) in range(236, 252):
return 'L3'
return 'other'
p53 = maf.ix['TP53'].reset_index()
dd = p53.apply(is_disruptive, 1)
dd = dd.replace('Synonymous',nan).dropna()
others = hit_matrix.columns.diff(p53.Tumor_Sample_Barcode.ix[dd.index])
dd.index = p53.Tumor_Sample_Barcode.ix[dd.index]
dd = pd.concat([pd.Series('WT', others), dd])
dd = dd.ix[keepers]
s2 = surv.unstack().ix[dd.index]
s2.index = range(len(dd))
s2 = s2.stack()
dd.index = range(len(dd))
survival_and_stats(dd, s2, colors=colors[:6] + ['grey'] + colors[6:], figsize=(7,6))
By Validation Status. Validated mutations do seem to have worse prognosis, leading us to think there could be a number of false positives in the mix.
v = maf.ix['TP53'].Validation_Status == 'Valid'
v = v.groupby(level=0).sum() > 0
v.name = 'valid'
v = v.ix[hit_matrix.columns]
v = v.fillna('WT')
v.value_counts()
False 37 WT 28 True 9 dtype: int64
draw_survival_curve(v.ix[true_index(clin.HPV=='Negative')], surv)
MUC5B mutation status. Can't really say much here.
survival_and_stats(hit_matrix.ix['MUC5B'].ix[true_index(clin.HPV=='Negative')]>0, surv)
cn_tumor = meta.xs('Tumor', level=1)['SNP array ID'].dropna()
lu = pd.Series({v:i for i,v in cn_tumor.iteritems()})
I am loading the GISTIC2 processed data for this cohort. The data was processed together with the TCGA samples.
path = '../Extra_Data/UPMC_cohort/'
gistic = pd.read_table(path + 'all_thresholded.by_genes.txt', index_col=[0,1, 2])
gistic.index = gistic.index.droplevel(1)
cn_upmc = pd.DataFrame({i: gistic[t] for i,t in cn_tumor.iteritems() if t in gistic})
pct_del = (cn_upmc < 0).sum() / (1.*len(cn_upmc))
pct_amp = (cn_upmc > 0).sum() / (1.*len(cn_upmc))
pct_altered = pct_amp + pct_del
pct_altered.name = 'CIN_pct_altered'
Chromosomal instability in the UPMC cohort.
survival_and_stats(to_quants(pct_altered, q=.33, labels=True), surv)
Taking a look at 3p deletion.
del_3p = cn_upmc.xs('3p14.2',0,1).median().ix[true_index(clin.HPV=='Negative')].dropna() < 0
b = combine(p53_mut, del_3p).index
keepers = meta['SNP array ID'].notnull() & (meta['Final_74_exome_analysis'] == 1)
keepers = keepers.groupby(level=0).sum()
keepers = (keepers > 0) & (clin.HPV == 'Negative')
keepers = keepers.ix[surv.index.get_level_values(0)].dropna()
keepers = keepers.groupby(level=0).last()
keepers.value_counts()
True 49 False 25 dtype: int64
survival_and_stats(del_3p, surv)
Association of TP53 mutation with 3p deletion
len(combine(p53_mut, del_3p))
fisher_exact_test(p53_mut, del_3p, alternative='greater')
odds_ratio 2.50 p 0.17 dtype: float64
Survival association
combo = combine(p53_mut, del_3p)
survival_and_stats(combo, surv, figsize=(5,4), title='PITT')
pd.crosstab(clin.Alcohol, clin.Alcohol_amt.fillna(0))
Alcohol_amt | 0 | . | 1 | 10 | 12 | 15 | 16 | 2 | 20 | 24 | 27 | 3 | 5 | 6 | 7 | 8 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Alcohol | ||||||||||||||||
. | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
no | 15 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
yes | 0 | 1 | 1 | 8 | 4 | 3 | 1 | 3 | 2 | 3 | 1 | 4 | 4 | 11 | 3 | 9 |
alch = clin.Alcohol_amt.fillna(0).replace('.', nan).astype(float)
alch.hist()
<matplotlib.axes.AxesSubplot at 0x970b550>
get_surv_fit_lr(surv, (alch.ix[combo.index].dropna() > 10).fillna('M'))
Stats | Median Survival | 5y Survival | Log-Rank | |||||||
---|---|---|---|---|---|---|---|---|---|---|
# Patients | # Events | Median | Lower | Upper | Surv | Lower | Upper | chi2 | p | |
6.83 | 0.00898 | |||||||||
0 | 39 | 19 | 5.01 | 1.8 | NaN | 0.526 | 0.389 | 0.712 | ||
1 | 9 | 8 | 1.1 | 0.953 | NaN | 0.111 | 0.0175 | 0.705 |
get_surv_fit_lr(surv, clin.Smoking_history.ix[combo.index])
Stats | Median Survival | 5y Survival | Log-Rank | |||||||
---|---|---|---|---|---|---|---|---|---|---|
# Patients | # Events | Median | Lower | Upper | Surv | Lower | Upper | chi2 | p | |
1.14 | 0.285 | |||||||||
yes | 41 | 24 | 1.85 | 1.15 | NaN | 0.399 | 0.273 | 0.585 | ||
no | 7 | 3 | 5.01 | 1.43 | NaN | 0.714 | 0.447 | 1 |
p2 = p53_mut
p2.name = 'TP53'
p2 = p2.map({True:'Mut.',False:'WT'})
draw_survival_curves(del_3p.map({0:'3p +/+', 1:'3p +/-'}), surv, p2, legend=True)
get_cox_ph(surv, del_3p.ix[true_index(p53_mut)], print_desc=True, interactions=False);
coef exp(coef) se(coef) z p feature 1.92 6.84 1.03 1.87 0.061 Likelihood ratio test=6.37 on 1 df, p=0.0116 n= 35, number of events= 20 (11 observations deleted due to missingness)
exp(1.92), exp(1.92) - exp(1.92 - 1.03)
(6.8209584692907494, 4.3858288180008751)
pd.crosstab(p53_mut, del_3p.ix[p53_mut.index].fillna('M'))
d | False | True | M |
---|---|---|---|
TP53 | |||
False | 5 | 8 | 4 |
True | 7 | 28 | 11 |
two_hit_both = combine(p53_mut>0, del_3p) == 'both'
no_cna = true_index(del_3p.ix[true_index(p53_mut==False)].isnull())
no_cna = pd.Series(False, index=no_cna)
two_hit = two_hit_both.append(no_cna).ix[keepers].dropna()
survival_and_stats(two_hit.ix[keepers].dropna(), surv)
This was a suggestion from a review. The R coin package allows for exact permutation tests for log-rank statistics.
%load_ext rmagic
coin = robjects.packages.importr('coin')
th2 = two_hit[true_index(clin.Age_Dx < 75)].dropna()
time = robjects.r.c(*list(surv.unstack().days.ix[th2.index]))
event = robjects.r.c(*list(surv.unstack().event.ix[th2.index]))
g = robjects.r.c(*list(th2*1.))
%%R -i time,event,g
exdata <- data.frame(time = time,
event = event,
group = factor(g))
surv_test(Surv(time, event) ~ group, data=exdata,
distribution=exact(), alternative='less')
Exact Logrank Test data: Surv(time, event) by group (0, 1) Z = -1.5691, p-value = 0.05894 alternative hypothesis: less
f = del_3p.ix[true_index(p53_mut)].dropna()*1.
f = f.ix[clin.Age_Dx < 77]
g = robjects.r.c(*list(f))
time = robjects.r.c(*list(surv.unstack().days.ix[f.index]))
event = robjects.r.c(*list(surv.unstack().event.ix[f.index]))
cox(f, surv)
hazard exp(coef) 6.84 exp(-coef) 0.15 lower .95 0.91 upper .95 51.20 LR stat 6.37 df 1.00 p 0.01 concordance stat 0.61 se 0.05 dtype: float64
%%R -i time,event,g
exdata <- data.frame(time = time,
event = event,
group = factor(g))
surv_test(Surv(time, event) ~ group, data=exdata,
distribution=exact(), alternative='less')
Exact Logrank Test data: Surv(time, event) by group (0, 1) Z = -2.4025, p-value = 0.009194 alternative hypothesis: less
f = p53_mut.ix[true_index(del_3p)].dropna()*1.
g = robjects.r.c(*list(f))
time = robjects.r.c(*list(surv.unstack().days.ix[f.index]))
event = robjects.r.c(*list(surv.unstack().event.ix[f.index]))
%%R -i time,event,g
exdata <- data.frame(time = time,
event = event,
group = factor(g))
surv_test(Surv(time, event) ~ group, data=exdata,
distribution=exact(), alternative='less')
Exact Logrank Test data: Surv(time, event) by group (0, 1) Z = 0.0197, p-value = 0.5014 alternative hypothesis: less
We do not have expression data for this cohort so we use copy number. The trend seems to hold here but we do not report this in the manuscript.
cn_upmc = cn_upmc.ix[:, two_hit.index]
survival_and_stats(cn_upmc.ix['MIR548K'].ix[0].dropna()>0, surv)
st = (two_hit + 1).add(2*(cn_upmc.ix['MIR548K'].ix[0] > 0)).replace(3,0).replace(4,3)
st = st.dropna()
mir = cn_upmc.ix['MIR548K'].ix[0].ix[two_hit.index]
eq = cn_upmc.ix['MIR548K'].ix[0].ix[two_hit.index]
#eq = cn_upmc.xs('11q13.3',0,1).median()
st = (two_hit + 1).add(2*(eq > 1)).replace(3,1).replace(4,3)
st = st.dropna()
survival_and_stats(st, surv)
get_cox_ph(surv, st, print_desc=True, interactions=False);
coef exp(coef) se(coef) z p feature 0.433 1.54 0.243 1.78 0.074 Likelihood ratio test=3.14 on 1 df, p=0.0763 n= 48, number of events= 27
cn_upmc = pd.DataFrame({i: gistic[t] for i,t in cn_tumor.iteritems() if t in gistic})
cn_upmc = pd.DataFrame({i: gistic[t] for i,t in cn_tumor.iteritems() if t in gistic})
del_3p_upmc = cn_upmc.xs('3p14.2',0,1).median().ix[true_index(clin.HPV=='Positive')].dropna()
surv_combined = pd.concat([surv, clinical.survival.survival_5y])
cn_2 = cn_upmc.groupby(level=1).median().ix[:, true_index(clin.HPV=='Positive')].dropna(1, how='all')
cn_3 = cn.features.copy().ix[:, ti(clinical.hpv)]
cn_3.index = cn_3.index.get_level_values(1)
cn_4 = cn_2.join(cn_3)
cn_4 = cn_4.dropna(1, how='all').dropna(how='any')
cn_d = (cn_4 < 0)
cn_d = cn_d[cn_d.sum(1) > 0]
cn_a = (cn_4 > 0)
cn_a = cn_a[cn_a.sum(1) > 0]
cn_all = pd.concat([cn_a, cn_d], keys=['Amplification','Deletion'])
cox_screen(cn_all, surv_combined).sort([('LR','p')]).head()
hazard | LR | concordance | |||||||||
---|---|---|---|---|---|---|---|---|---|---|---|
exp(-coef) | exp(coef) | lower .95 | upper .95 | df | p | stat | q | se | stat | ||
Deletion | 3p14.2 | 0.18 | 5.54 | 1.51 | 20.33 | 1 | 0.00 | 8.21 | 0.23 | 0.08 | 0.75 |
Amplification | 8q24.21 | 6.06 | 0.17 | 0.04 | 0.76 | 1 | 0.01 | 7.44 | 0.23 | 0.08 | 0.68 |
13q22.1 | 0.16 | 6.25 | 1.82 | 21.46 | 1 | 0.01 | 7.38 | 0.23 | 0.06 | 0.69 | |
20q11.22 | 7.82 | 0.13 | 0.02 | 0.99 | 1 | 0.01 | 6.69 | 0.23 | 0.08 | 0.64 | |
8p11.23 | 7.71 | 0.13 | 0.02 | 1.02 | 1 | 0.01 | 6.44 | 0.23 | 0.08 | 0.60 |
del_3p_all = cn_all.ix['Deletion'].ix['3p14.2']
p53_upmc = hit_matrix.ix['TP53', true_index(clin.HPV=='Positive')].dropna()
p53_tcga = mut.features.ix['TP53', true_index(clinical.hpv==True)]
p53_all = (mut.features.ix['TP53'].append(hit_matrix.ix['TP53']>0)).map({True:'mut',False:'wt'})
hpv_all = clinical.hpv.append(clin.HPV=='Positive')
pd.crosstab(hpv_all, p53_all)
TP53 | mut | wt |
---|---|---|
HPV | ||
False | 257 | 69 |
True | 2 | 52 |
1 / fisher_exact_test(hpv_all, p53_all)['odds_ratio']
0.010326249625860521
fisher_exact_test(hit_matrix.ix['TP53']>0, clin.HPV=='Positive')
odds_ratio 0.00e+00 p 5.14e-06 dtype: float64
print len(combine(hit_matrix.ix['TP53']>0, clin.HPV=='Positive'))
pd.crosstab(hit_matrix.ix['TP53']>0, clin.HPV)
74
HPV | Negative | Positive |
---|---|---|
TP53 | ||
False | 17 | 11 |
True | 46 | 0 |
fisher_exact_test(hpv_all, p53_all)
odds_ratio 9.68e+01 p 1.17e-27 dtype: float64
survival_and_stats(del_3p_all, surv_combined)
log_rank(del_3p_all, surv_combined).ix['p']
0.0038336554312233214
get_cox_ph(surv_combined, del_3p_all, print_desc=True);
coef exp(coef) se(coef) z p feature 1.71 5.54 0.663 2.58 0.0098 Likelihood ratio test=8.21 on 1 df, p=0.00417 n= 59, number of events= 13
exp(1.71), exp(1.71) - exp(1.71 - .663)
(5.5289614776240041, 2.6798704663347901)
p53_mut.name = 'TP53'
del_3p.name = '3p'
combo = combine(p53_mut, del_3p)
fig, ax = subplots(figsize=(3,2.5))
c = {'TP53': colors_th[1], 'both': colors_th[0]}
draw_survival_curve(combo[combo.isin(['both','TP53'])], surv, colors=c, ax=ax)
ax.legend(loc='lower left', frameon=False)
prettify_ax(ax)
fig.tight_layout()
fig.savefig(FIGDIR + 'fig3a.pdf')
c = combo[combo.isin(['both','TP53'])]
c = c.replace({'both': 'TP53 and 3p'})
df = pd.concat([c, surv[:,'days'], surv[:,'event']],
keys=['Molecular Status', 'Days to Death/Censoring', 'Death Indicator'],
axis=1).dropna()
df.to_csv(FIGDIR + 'fig3a.csv')
fig, ax = subplots(figsize=(3,2.5))
surv_fit = get_surv_fit(surv, combo)
tt = surv_fit['5y Survival']
b = (tt['Surv']).plot(kind='bar', ax=ax, color=[colors_th[2], colors_th[0],
colors_th[3], colors_th[1]],
yerr=[tt.Surv-tt.Lower, tt.Upper-tt.Surv], ecolor='black')
ax.set_ylabel('5Y Survival')
ax.set_yticks([0, .5, 1.0])
bar_labels = ['{}\n\n{}'.format(j,int(v)) for j,v in
surv_fit['Stats']['# Patients'].iteritems()]
ax.set_xticklabels(bar_labels, rotation=0);
prettify_ax(ax)
fig.tight_layout()
fig, ax = subplots(figsize=(3,2.5))
draw_survival_curve(del_3p_all.astype(int), surv_combined, ax=ax,
colors=[colors_th[3], colors_th[2]])
ax.legend().set_visible(False)
prettify_ax(ax)
fig.tight_layout()
fig.savefig(FIGDIR + 'fig3d.pdf')
df = pd.concat([del_3p_all, surv_combined[:,'days'], surv_combined[:,'event']],
keys=['3p Deletion', 'Days to Death/Censoring', 'Death Indicator'],
axis=1).dropna()
df.to_csv(FIGDIR + 'fig3ba.csv')