%pylab inline
Populating the interactive namespace from numpy and matplotlib
cd ../src/
/cellar/users/agross/TCGA_Code/TCGA/src
from Processing.Imports import *
params = pd.read_table('../global_params.txt', header=None, squeeze=True,
index_col=0)
run_path = '{}/Firehose__{}/'.format(params.ix['OUT_PATH'], params.ix['RUN_DATE'])
run = get_run(run_path, 'Run_' + params.ix['VERSION'])
cancer = run.load_cancer(params.ix['CANCER'])
clinical = cancer.load_clinical()
mut = cancer.load_data('Mutation')
mut.uncompress()
cn = cancer.load_data('CN_broad')
cn.uncompress()
rna = pickle.load(open(cancer.path + '/mRNASeq/store/no_hpv.p', 'rb'))
#meth = pickle.load(open(cancer.path + '/Methylation/store/no_hpv.p', 'rb'))
mirna = pickle.load(open(cancer.path + '/miRNASeq/store/no_hpv.p', 'rb'))
clinical_processed = clinical.processed
#clinical_processed = clinical_processed.replace('yes', 1.).replace('no', 0.)
clinical_processed['year'] = clinical_processed.year == 'post_2000'
hpv_inferred = clinical_processed.hpv_inferred
surv = clinical.survival.survival_5y
age = clinical.clinical.age.astype(float)
old = pd.Series(1.*(age>=75), name='old')
keepers_o = true_index(hpv_inferred == 0)
keepers_o = keepers_o.intersection(mut.features.columns)
keepers_o = keepers_o.intersection(cn.features.columns)
keepers_o = keepers_o.intersection(surv.unstack().index)
keepers_o = keepers_o.intersection(rna.features.columns)
keepers_o = keepers_o.intersection(mirna.features.columns)
rr = screen_feature(mut.df.ix[['SMC4','LIG4','SMC2']].sum() > 0 , kruskal_pandas, mirna.features)
rr.head()
H | p | q | |
---|---|---|---|
(real, hsa-mir-31) | 10.83 | 0.00 | 0.19 |
(binary, hsa-mir-211) | 9.62 | 0.00 | 0.19 |
(real, hsa-mir-193b) | 8.49 | 0.00 | 0.19 |
(real, hsa-mir-758) | 8.29 | 0.00 | 0.19 |
(binary, hsa-mir-3171) | 7.84 | 0.01 | 0.19 |
5 rows × 3 columns
violin_plot_pandas(mut.df.ix[['SMC4','LIG4','SMC2']].sum(), mirna.features.ix['real'].ix['hsa-mir-31'])
from Figures.MemoPlot import pathway_plot
fig = plt.figure(figsize=(6,4))
ax = plt.subplot2grid((1,4), (0,0), colspan=3)
df = pathway_plot(mut.df.ix[run.gene_sets['REACTOME_SOS_MEDIATED_SIGNALLING']]>0,bar=None, ax=ax)
ax = plt.subplot2grid((1,4), (0,3), colspan=1)
dd = mut.df.ix[run.gene_sets['REACTOME_SOS_MEDIATED_SIGNALLING'], keepers_o]
dd = dd.ix[dd.sum(1).dropna().order().index[::-1]]
vec = dd.apply(lambda s: s.idxmax() if s.max() > 0 else 'WT')
mm = pd.concat([vec[vec == 'WT'], vec[vec!='WT']], keys=['WT','Mut.']).unstack()
mm = mm.apply(pd.value_counts, 1).fillna(0)
mm = mm.ix[:,vec.value_counts().index]
mm.plot(kind='bar', stacked=True, color=['grey'] + ['green'] * 10, ax=ax, lw=.5)
ax.get_legend().set_visible(False)
ax.set_ylabel('Num. Patients')
prettify_ax(ax)
fig.tight_layout()
df = rna.df.xs('01',1,1)
v = extract_pc(df.ix[run.gene_sets['BIOCARTA_RAS_PATHWAY']])['pat_vec']
#ras = rna.pathways.ix['BIOCARTA_RAS_PATHWAY']
ras = v
ras.groupby(binarize(ras)).min()
0 False -0.26 True -0.14 dtype: float64
draw_survival_curves(binarize(v.ix[keepers_o]), surv)
fig, axs = subplots(1,2, figsize=(6,3))
rna.loadings['BIOCARTA_RAS_PATHWAY'].order().plot(kind='barh', ax=axs[0], color=colors[0])
axs[0].set_xticks([-.4, -.2, 0, .2, .4])
axs[0].set_xlabel('Gene Loading')
axs[0].set_ylabel('Ras Signaling Pathway')
h = rna.pathways.ix['BIOCARTA_RAS_PATHWAY'].hist(ax=axs[1], bins=15, color=colors[1])
[b.set_facecolor('grey') for b in h.patches[:-6]]
axs[1].set_ylabel('Num. Patients')
axs[1].set_xlabel('Consensus Pathway Value')
for ax in axs:
prettify_ax(ax)
fig.tight_layout()
p53_mut = mut.features.ix['TP53'].ix[keepers_o].dropna()
del_3p = cn.features.ix['Deletion'].ix['3p14.2'].ix[0].ix[keepers_o].dropna()
muc5b = mut.features.ix['MUC5B']
mir548 = mirna.features.ix['binary'].ix['hsa-mir-548k']
combo = combine(p53_mut==1, del_3p==-1)
combo = combo.map({'Lesion':'b', 'neither':'a', 'TP53':'c', 'both':'d'})
two_hit = combo=='d'
st = (two_hit*1.).copy() + 1.
#st.ix[true_index(st==2).intersection(true_index(muc5b==1))] = 4
st.ix[true_index(st==2).intersection(true_index(mir548==1))] = 3
st.name = 'subtype'
st = st.astype(int)
p53_mut = mut.features.ix['TP53'].ix[keepers_o]
survival_and_stats(p53_mut.ix[keepers_o], surv, figsize=(6,4))
del_3p = cn.features.ix['Deletion'].ix['3p14.2'].ix[0].ix[keepers_o]
survival_and_stats(del_3p, surv, figsize=(6,4))
mir_3170 = mirna.features.ix['binary'].ix['hsa-mir-3170'].ix[keepers_o]
survival_and_stats(mir_3170, surv, figsize=(6,4))
ras = binarize(rna.pathways.ix['BIOCARTA_RAS_PATHWAY'].ix[keepers_o])
survival_and_stats(ras, surv, figsize=(6,4))
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) <ipython-input-39-201e00743fba> in <module>() ----> 1 ras = binarize(rna.pathways.ix['BIOCARTA_RAS_PATHWAY'].ix[keepers_o]) 2 survival_and_stats(ras, surv, figsize=(6,4)) /cellar/users/agross/anaconda2/lib/python2.7/site-packages/pandas-0.13.0_247_g82bcbb8-py2.7-linux-x86_64.egg/pandas/core/indexing.pyc in __getitem__(self, key) 54 return self._getitem_tuple(key) 55 else: ---> 56 return self._getitem_axis(key, axis=0) 57 58 def _get_label(self, label, axis=0): /cellar/users/agross/anaconda2/lib/python2.7/site-packages/pandas-0.13.0_247_g82bcbb8-py2.7-linux-x86_64.egg/pandas/core/indexing.pyc in _getitem_axis(self, key, axis) 753 return self._get_loc(key, axis=axis) 754 --> 755 return self._get_label(key, axis=axis) 756 757 def _getitem_iterable(self, key, axis=0): /cellar/users/agross/anaconda2/lib/python2.7/site-packages/pandas-0.13.0_247_g82bcbb8-py2.7-linux-x86_64.egg/pandas/core/indexing.pyc in _get_label(self, label, axis) 66 return self.obj._xs(label, axis=axis, copy=False) 67 except Exception: ---> 68 return self.obj._xs(label, axis=axis, copy=True) 69 70 def _get_loc(self, key, axis=0): /cellar/users/agross/anaconda2/lib/python2.7/site-packages/pandas-0.13.0_247_g82bcbb8-py2.7-linux-x86_64.egg/pandas/core/frame.pyc in xs(self, key, axis, level, copy, drop_level) 2150 drop_level=drop_level) 2151 else: -> 2152 loc = self.index.get_loc(key) 2153 2154 if isinstance(loc, np.ndarray): /cellar/users/agross/anaconda2/lib/python2.7/site-packages/pandas-0.13.0_247_g82bcbb8-py2.7-linux-x86_64.egg/pandas/core/index.pyc in get_loc(self, key) 1015 loc : int if unique index, possibly slice or mask if not 1016 """ -> 1017 return self._engine.get_loc(_values_from_object(key)) 1018 1019 def get_value(self, series, key): /cellar/users/agross/anaconda2/lib/python2.7/site-packages/pandas-0.13.0_247_g82bcbb8-py2.7-linux-x86_64.egg/pandas/index.so in pandas.index.IndexEngine.get_loc (pandas/index.c:3553)() /cellar/users/agross/anaconda2/lib/python2.7/site-packages/pandas-0.13.0_247_g82bcbb8-py2.7-linux-x86_64.egg/pandas/index.so in pandas.index.IndexEngine.get_loc (pandas/index.c:3433)() /cellar/users/agross/anaconda2/lib/python2.7/site-packages/pandas-0.13.0_247_g82bcbb8-py2.7-linux-x86_64.egg/pandas/hashtable.so in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:10780)() /cellar/users/agross/anaconda2/lib/python2.7/site-packages/pandas-0.13.0_247_g82bcbb8-py2.7-linux-x86_64.egg/pandas/hashtable.so in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:10733)() KeyError: 'BIOCARTA_RAS_PATHWAY'
spread = clinical.processed.spread_inferred.ix[keepers_o]
survival_and_stats(spread, surv, figsize=(6,4))
fig, axs = subplots(5,1, figsize=(3.2, 5), sharex=True)
var_list = [p53_mut.map({1: 'mutated',0:'wild-type'}),
del_3p.map({-1: 'deleted', 0: 'not deleted'}),
mir_3170.map({0: 'absent', 1: 'present'}),
ras.map({0:'low', 1:'high'}),
spread.map({1: 'present', 0 : 'absent'})
]
n_rows = len(var_list)
for i,var in enumerate(var_list):
ax = axs[i]
surv_fit = get_surv_fit(surv, var)
surv_fit = surv_fit.sort([('5y Survival', 'Surv')])
t = surv_fit['5y Survival']
b = (t['Surv']).plot(kind='barh', ax=ax, color=[colors[0], colors[1]],
xerr=[t.Surv-t.Lower, t.Upper-t.Surv], ecolor='black')
bar_labels = ['{} ({})'.format(j, int(v)) for j,v in
surv_fit['Stats']['# Patients'].iteritems()]
ax.set_yticklabels(bar_labels, position=(-.1,0))
prettify_ax(ax)
ax.set_xlabel('5 Year Survival')
ax.set_xticks([0, .5, 1.])
fig.tight_layout(w_pad=.1)
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-41-87ba10a80e0a> in <module>() 12 surv_fit = surv_fit.sort([('5y Survival', 'Surv')]) 13 t = surv_fit['5y Survival'] ---> 14 b = (t['Surv']).plot(kind='barh', ax=ax, color=[colors[0], colors[1]], 15 xerr=[t.Surv-t.Lower, t.Upper-t.Surv], ecolor='black') 16 bar_labels = ['{} ({})'.format(j, int(v)) for j,v in TypeError: 'function' object has no attribute '__getitem__'
survival_and_stats(mir548, surv)
survival_and_stats(combo, surv, order=['d','c','b','a'], figsize=(5,4), colors=colors_th)
from matplotlib_venn import venn2
fig, ax = subplots(figsize=(2.,2.))
a,b = p53_mut == 1, del_3p==-1
b.name = ' '
venn_pandas(a,b, colors=['white','white','white'], alpha=1)
venn_pandas(a,b, colors=np.array(colors_th)[[2,1,0]], alpha=.8)
r = plt.Rectangle((-.65,-.6), 1.28, 1.2, fill=True, alpha=.7, zorder=-10, facecolor=colors_th[3],
edgecolor='black', lw=3)
ax.add_artist(r);
get_cox_ph(surv, combo=='d', [age, old], print_desc=True, interactions='just_feature');
coef exp(coef) se(coef) z p feature 1.293 3.642 0.311 4.16 3.2e-05 old 0.627 1.872 0.157 3.99 6.6e-05 feature:old -0.392 0.675 0.183 -2.14 3.2e-02 Likelihood ratio test=36.9 on 3 df, p=4.95e-08 n= 259, number of events= 109 (119 observations deleted due to missingness)
get_cox_ph_ms(surv, combo=='d', [age, old], interactions='just_feature')
LR 4.95e-06 feature_p 3.24e-05 fmla Surv(days, event) ~ feature + old + feature:old\n hazzard 3.64 dtype: object
get_cox_ph_ms(surv, combo=='d', [age, old], interactions='just_feature')
LR 4.95e-06 feature_p 3.24e-05 fmla Surv(days, event) ~ feature + old + feature:old\n hazzard 3.64 dtype: object
survival_and_stats(st, surv, order=[3,2,1], figsize=(5,4), colors=colors_st)
get_cox_ph(surv, '_' + st.astype(str), [age, old], print_desc=True, interactions=None);
coef exp(coef) se(coef) z p feature_2 0.749 2.11 0.2781 2.69 7.1e-03 feature_3 1.354 3.87 0.2731 4.96 7.2e-07 old 0.317 1.37 0.0773 4.10 4.0e-05 Likelihood ratio test=40.3 on 3 df, p=9.12e-09 n= 259, number of events= 109 (119 observations deleted due to missingness)
get_cox_ph(surv, '_' + st.astype(str), [age, old], print_desc=True, interactions=None);
coef exp(coef) se(coef) z p feature_2 0.749 2.11 0.2781 2.69 7.1e-03 feature_3 1.354 3.87 0.2731 4.96 7.2e-07 old 0.317 1.37 0.0773 4.10 4.0e-05 Likelihood ratio test=40.3 on 3 df, p=9.12e-09 n= 259, number of events= 109 (119 observations deleted due to missingness)
get_cox_ph_ms(surv, '_' + st.astype(str), [age, old], interactions=None)
LR 8.74e-07 feature_p NaN fmla Surv(days, event) ~ feature + old\n hazzard NaN dtype: object
r = screen_feature(st==1, kruskal_pandas, rna.features)
r.head(10)
H | p | q | |
---|---|---|---|
(pathways, BIOCARTA_MAL_PATHWAY) | 52.99 | 3.35e-13 | 1.77e-09 |
(real, ACTR8) | 48.58 | 3.18e-12 | 6.98e-09 |
(pathways, KEGG_WNT_SIGNALING_PATHWAY) | 48.15 | 3.96e-12 | 6.98e-09 |
(real, MTMR14) | 47.49 | 5.52e-12 | 7.31e-09 |
(pathways, REACTOME_MITOCHONDRIAL_TRNA_AMINOACYLATION) | 46.19 | 1.07e-11 | 1.13e-08 |
(pathways, BIOCARTA_RAB_PATHWAY) | 44.43 | 2.64e-11 | 2.33e-08 |
(pathways, BIOCARTA_PAR1_PATHWAY) | 43.58 | 4.06e-11 | 3.07e-08 |
(pathways, REACTOME_MTORC1_MEDIATED_SIGNALLING) | 42.77 | 6.16e-11 | 3.62e-08 |
(pathways, BIOCARTA_BARRESTIN_SRC_PATHWAY) | 42.77 | 6.16e-11 | 3.62e-08 |
(real, RHOA) | 41.89 | 9.64e-11 | 5.10e-08 |
10 rows × 3 columns
violin_plot_pandas(st, rna.features.ix['pathways'].ix['KEGG_WNT_SIGNALING_PATHWAY'], order=[1,2,3,4])
features = [p for p in mut.features.index if 'TP53' not in p and
((p not in run.gene_sets) or ('TP53' not in run.gene_sets[p]))]
df = mut.features.ix[features]
fisher_exact_test??
r = screen_feature(st==1, fisher_exact_test, df)
r.head(10)
odds_ratio | p | q | |
---|---|---|---|
REACTOME_SOS_MEDIATED_SIGNALLING | 8.98 | 4.54e-07 | 4.41e-04 |
REACTOME_SHC_MEDIATED_SIGNALLING | 8.38 | 1.38e-06 | 6.73e-04 |
REACTOME_SHC_RELATED_EVENTS | 6.44 | 8.59e-06 | 2.50e-03 |
CASP8 | 6.79 | 1.03e-05 | 2.50e-03 |
BIOCARTA_BIOPEPTIDES_PATHWAY | 3.71 | 2.34e-05 | 4.55e-03 |
REACTOME_SIGNALLING_TO_RAS | 4.53 | 3.33e-05 | 5.41e-03 |
BIOCARTA_EPO_PATHWAY | 4.24 | 5.23e-05 | 7.27e-03 |
REACTOME_SIGNALLING_TO_ERKS | 3.81 | 7.62e-05 | 9.27e-03 |
BIOCARTA_MAL_PATHWAY | 3.48 | 1.38e-04 | 1.49e-02 |
REACTOME_GRB2_EVENTS_IN_EGFR_SIGNALING | 3.98 | 1.88e-04 | 1.83e-02 |
10 rows × 3 columns
pd.crosstab(mut.features.ix['CASP8'], st==1)
subtype | False | True |
---|---|---|
CASP8 | ||
0 | 175 | 58 |
1 | 8 | 18 |
2 rows × 2 columns
(fisher_exact_test(mut.features.ix['CASP8'], st==1)*len(mut.features)).ix['p']
0.01001397466082391
pd.crosstab(mut.features.ix['REACTOME_SOS_MEDIATED_SIGNALLING'], st==1)
subtype | False | True |
---|---|---|
REACTOME_SOS_MEDIATED_SIGNALLING | ||
0 | 176 | 56 |
1 | 7 | 20 |
2 rows × 2 columns
(fisher_exact_test(mut.features.ix['REACTOME_SOS_MEDIATED_SIGNALLING'], st==1)*len(mut.features)).ix['p']
0.00044263786606995285
r = screen_feature(st==2, chi2_cont_test, df)
r.head()
chi2 | p | dof | q | |
---|---|---|---|---|
BIOCARTA_ETS_PATHWAY | 13.31 | 2.64e-04 | 1 | 0.26 |
REACTOME_GLOBAL_GENOMIC_NER | 9.56 | 1.99e-03 | 1 | 0.95 |
KEGG_NUCLEOTIDE_EXCISION_REPAIR | 8.86 | 2.92e-03 | 1 | 0.95 |
REACTOME_CLASS_B2_SECRETIN_FAMILY_RECEPTORS | 7.76 | 5.33e-03 | 1 | 0.98 |
REACTOME_NUCLEOTIDE_EXCISION_REPAIR | 7.44 | 6.37e-03 | 1 | 0.98 |
5 rows × 4 columns
r = screen_feature(st==3, chi2_cont_test, df)
r.head()
chi2 | p | dof | q | |
---|---|---|---|---|
ASPM | 16.33 | 5.33e-05 | 1 | 0.05 |
REACTOME_DOWN_STREAM_SIGNAL_TRANSDUCTION | 8.47 | 3.60e-03 | 1 | 0.99 |
KEGG_CALCIUM_SIGNALING_PATHWAY | 7.57 | 5.92e-03 | 1 | 0.99 |
BIOCARTA_NFAT_PATHWAY | 7.20 | 7.30e-03 | 1 | 0.99 |
REACTOME_GENERATION_OF_SECOND_MESSENGER_MOLECULES | 6.62 | 1.01e-02 | 1 | 0.99 |
5 rows × 4 columns
pd.crosstab(mut.features.ix['ASPM'], st==3)
subtype | False | True |
---|---|---|
ASPM | ||
0 | 176 | 64 |
1 | 5 | 14 |
2 rows × 2 columns
(fisher_exact_test(mut.features.ix['ASPM'], st==3)*len(df)).ix['p']
0.057986399319199439
violin_plot_pandas(st==3, rna.df.ix['ASPM'][:,'01'])
draw_survival_curves(mut.features.ix['ASPM'].ix[ti(st==3)], surv, old)
get_cox_ph_ms(surv, mut.features.ix['ASPM'].ix[ti(st==3)], [age, old], interactions='just_feature')['fmla']
'Surv(days, event) ~ feature + age + old + feature:old\n'
from Figures.MemoPlot import pathway_plot
from Figures.Boxplots import paired_boxplot_tumor_normal
import Data.Firehose as FH
rna_df = {}
for c in run.cancers:
try:
rna_df[c] = FH.read_rnaSeq(run.data_path, c, tissue_code='All')
except:
pass
rna_df = {a:b for a,b in rna_df.iteritems() if b is not None}
aspm = pd.concat({d: c.ix['ASPM'] for d,c in rna_df.iteritems()})
aspm = aspm.groupby(level=[0,1,2]).first()
aspm = aspm.reset_index().set_index('level_1')
codes = aspm['level_0']
codes = codes[codes.isin(['COADREAD', 'PANCAN12'])==False]
codes = codes.groupby(level=0).first()
aspm = aspm.reset_index().set_index(['level_1','level_2'])[0]
aspm = aspm.groupby(level=[0,1]).first()
vec = pd.concat([aspm[:,'01'], aspm[:,'11']], keys=['01','11'], axis=1)
vec = vec.dropna().stack()
pd.crosstab(mut.features.ix['ASPM'], two_hit)
d | False | True |
---|---|---|
ASPM | ||
0 | 73 | 167 |
1 | 3 | 16 |
2 rows × 2 columns
draw_survival_curves(mut.features.ix['ASPM'], surv, st)
fig = plt.figure(figsize=(10, 5))
ax = plt.subplot2grid((2,8), (0,0), colspan=2)
ct = pd.crosstab(st, mut.features.ix['ASPM'].map({1:'mut',0:'wt'}))
ct.index = ['TP53-3p neg.', 'TP53-3p pos.', 'TP53-3p pos. / \n mir548k pos.','TP53-3p pos. / \n MUC5B pos.']
ct = ct.ix[::-1]
ct.plot(kind='barh', color=[colors[2],'grey'], alpha=.6, ax=ax)
ax.set_xlabel('Num. Patients')
ax.legend(frameon=False, loc='lower right')
prettify_ax(ax)
ax = plt.subplot2grid((2,8), (0,2), colspan=6)
counts = vec.unstack().groupby(codes).size()
cancers = list(true_index(counts > 5))
cancers = vec.unstack().groupby(codes).median()['01'].ix[cancers].order().index[::-1]
l1 = [array(vec.ix[:,'01'].ix[true_index(codes==c)].dropna()) for c in cancers]
l2 = [array(vec.ix[:,'11'].ix[true_index(codes==c)].dropna()) for c in cancers]
boxes = [x for t in zip(l1, l2) for x in t if len(t[1]) > 5]
ax, bp = paired_boxplot(boxes, ax)
labels = ['{}\n({})'.format(c, counts[c]) for c in cancers]
ax.set_xticklabels(labels);
prettify_ax(ax)
ax.set_ylabel('Log2 ASPM mRNA')
ax = plt.subplot2grid((2,8), (1,0), colspan=3)
violin_plot_pandas(st==3, rna.df.ix['ASPM'][:,'01'], ann=None, ax=ax)
ax.set_xticklabels(['Other\nSubtypes', 'TP53 pos. / \nmir548k pos.'])
ax.set_ylabel('Log2 ASPM mRNA')
prettify_ax(ax)
ax = plt.subplot2grid((2,8), (1,3), colspan=4)
draw_survival_curve(mut.features.ix['ASPM'].ix[true_index(st==3)], surv,
ax=ax, colors=['grey', colors[2]])
ax.legend(loc='lower left', frameon=False)
prettify_ax(ax)
fig.tight_layout()
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-133-4db5b24c2bb8> in <module>() 2 ax = plt.subplot2grid((2,8), (0,0), colspan=2) 3 ct = pd.crosstab(st, mut.features.ix['ASPM'].map({1:'mut',0:'wt'})) ----> 4 ct.index = ['TP53-3p neg.', 'TP53-3p pos.', 'TP53-3p pos. / \n mir548k pos.','TP53-3p pos. / \n MUC5B pos.'] 5 ct = ct.ix[::-1] 6 ct.plot(kind='barh', color=[colors[2],'grey'], alpha=.6, ax=ax) /cellar/users/agross/anaconda2/lib/python2.7/site-packages/pandas-0.13.0_247_g82bcbb8-py2.7-linux-x86_64.egg/pandas/core/generic.pyc in __setattr__(self, name, value) 1638 existing = getattr(self, name) 1639 if isinstance(existing, Index): -> 1640 object.__setattr__(self, name, value) 1641 elif name in self._info_axis: 1642 self[name] = value /cellar/users/agross/anaconda2/lib/python2.7/site-packages/pandas-0.13.0_247_g82bcbb8-py2.7-linux-x86_64.egg/pandas/lib.so in pandas.lib.AxisProperty.__set__ (pandas/lib.c:30529)() /cellar/users/agross/anaconda2/lib/python2.7/site-packages/pandas-0.13.0_247_g82bcbb8-py2.7-linux-x86_64.egg/pandas/core/generic.pyc in _set_axis(self, axis, labels) 392 393 def _set_axis(self, axis, labels): --> 394 self._data.set_axis(axis, labels) 395 self._clear_item_cache() 396 /cellar/users/agross/anaconda2/lib/python2.7/site-packages/pandas-0.13.0_247_g82bcbb8-py2.7-linux-x86_64.egg/pandas/core/internals.pyc in set_axis(self, axis, value, maybe_rename, check_axis) 2003 2004 def set_axis(self, axis, value, maybe_rename=True, check_axis=True): -> 2005 cur_axis, value = self._set_axis(axis, value, check_axis) 2006 2007 if axis == 0: /cellar/users/agross/anaconda2/lib/python2.7/site-packages/pandas-0.13.0_247_g82bcbb8-py2.7-linux-x86_64.egg/pandas/core/internals.pyc in _set_axis(self, axis, value, check_axis) 1996 raise ValueError('Length mismatch: Expected axis has %d elements, ' 1997 'new values have %d elements' % (len(cur_axis), -> 1998 len(value))) 1999 2000 self.axes[axis] = value ValueError: Length mismatch: Expected axis has 3 elements, new values have 4 elements
fig = plt.figure(figsize=(12,7))
ax = plt.subplot2grid((2,3), (1,0), colspan=3)
counts = vec.unstack().groupby(codes).size()
cancers = true_index(counts > 5)
l1 = [array(vec.ix[:,'01'].ix[true_index(codes==c)].dropna()) for c in cancers]
l2 = [array(vec.ix[:,'11'].ix[true_index(codes==c)].dropna()) for c in cancers]
boxes = [x for t in zip(l1, l2) for x in t if len(t[1]) > 5]
ax, bp = paired_boxplot(boxes, ax)
labels = ['{}\n({})'.format(c, counts[c]) for c in cancers]
ax.set_xticklabels(labels);
prettify_ax(ax)
ax.set_ylabel('Log2 ASPM mRNA')
ax = plt.subplot2grid((2,3), (0,0), colspan=1)
violin_plot_pandas(st==3, rna.df.ix['ASPM'].ix[:, '01'], ax=ax)
ax.set_xlabel('TCGA HNSCC HPV-')
ax.set_ylabel('Log2 ASPM mRNA')
prettify_ax(ax)
ax = plt.subplot2grid((2,3), (0,1), colspan=2)
tt = pd.Series({'01': 'Primary solid\nTumor','02':'Recurrent Solid\nTumor',
'03':'Peripheral\nBlood', '06':'Metastatic', '11':'Solid Tissue\nNormal'})
tissue = pd.Series(map(lambda s: tt[s], aspm.index.get_level_values(1)), aspm.index)
violin_plot_pandas(tissue, aspm, order=aspm.groupby(tissue).median().order().index, ax=ax)
prettify_ax(ax)
fig.tight_layout()
from Figures.MemoPlot import pathway_plot
dd = mut.df.ix[run.gene_sets['REACTOME_SOS_MEDIATED_SIGNALLING']]
dd = dd.ix[dd.sum(1).dropna().order().index[::-1]]
vec = dd.apply(lambda s: s.idxmax() if s.max() > 0 else 'WT')
vec = vec.ix[keepers_o].dropna()
vec.ix[true_index(st!=1)] = 'TP53-3p'
pp = screen_feature(vec=='HRAS', kruskal_pandas, rna.features.ix['real'])
vals = extract_pc(rna.features.ix['real'].ix[pp.q < .05], 0)['pat_vec']
hit_vec = vec
fig = plt.figure(figsize=(10,5))
ax = plt.subplot2grid((2,7), (0,0), colspan=2)
ct = pd.crosstab(st, mut.features.ix['REACTOME_SOS_MEDIATED_SIGNALLING'].map({1:'mutated',0:'wt'}))
ct.plot(kind='bar', ax=ax, color=[colors[2],'grey'], alpha=.7)
ax.set_ylabel('Num. Patients')
ax.get_legend().set_visible(False)
ax.set_xticklabels([1,2,3,4], rotation=True)
prettify_ax(ax)
ax = plt.subplot2grid((2,7), (0,2), colspan=2)
df = pathway_plot(mut.df.ix[run.gene_sets['REACTOME_SOS_MEDIATED_SIGNALLING'], true_index(st==1)]>0, ax=ax, bar=None)
ax = plt.subplot2grid((2,7), (0,4), colspan=3)
groups = hit_vec.value_counts().index
for x,group in enumerate(groups):
y = vals.ix[true_index(hit_vec==group)]
pct = (1.*len(y)) / len(vals)
ax.scatter(np.random.normal(scale=.2*(pct**.5), size=len(y)).clip(-.5,.5) + x, y, alpha=.5, s=50,
facecolors='none', edgecolors=colors[1], lw=1)
ax.annotate(len(y), (x, -2.7), ha='center')
ax.set_xticks(range(len(groups)))
ax.set_xticklabels(groups, rotation=320);
box_plot_pandas(hit_vec, vals, ax)
prettify_ax(ax)
ax = plt.subplot2grid((2,7), (1,0), colspan=2)
ct = pd.crosstab(st, mut.features.ix['ASPM'].map({1:'mut',0:'wt'}))
ct.plot(kind='bar', ax=ax, color=[colors[2],'grey'], alpha=.6)
ax.set_ylabel('Num. Patients')
ax.legend(loc='upper right', frameon=False)
ax.set_xticklabels([1,2,3,4], rotation=True)
ax.set_xlabel('subtype')
prettify_ax(ax)
ax = plt.subplot2grid((2,7), (1,2), colspan=2)
s = pd.Series(aspm.index.get_level_values(1), aspm.index)
s = s[s.isin(['01','11'])]
paired = true_index(s.groupby(level=0).size() == 2)
s = pd.Series({i: v for i,v in s.iteritems() if i[0] in paired})
s.index = pd.MultiIndex.from_tuples(s.index)
violin_plot_pandas(s, aspm, ann=None, ax=ax)
ax.set_xticklabels(['Normal', 'Tumor'])
ax.set_xlabel('Matched TCGA Samples (n=541)')
ax.set_ylabel('Log2 ASPM mRNA')
prettify_ax(ax)
ax = plt.subplot2grid((2,7), (1,4), colspan=3)
draw_survival_curve(mut.features.ix['ASPM'].ix[true_index(st==3)], surv,
ax=ax, colors=['grey', colors[2]])
ax.legend(loc='lower left', frameon=False)
prettify_ax(ax)
fig.tight_layout()
from Data.Intermediate import get_mutation_rates
rates = get_mutation_rates(run.data_path, cancer.name, patients=keepers_o)
screen_feature(st[st.isin([2,3,4])]==4, kruskal_pandas, rates.T).sort('p')
H | p | q | |
---|---|---|---|
A->mut | 2.51 | 0.11 | 0.59 |
rate_non | 2.28 | 0.13 | 0.59 |
rate_dbsnp | 1.19 | 0.27 | 0.62 |
*CpG->T | 1.09 | 0.30 | 0.62 |
rate_sil | 0.89 | 0.35 | 0.62 |
double_null | 0.42 | 0.52 | 0.78 |
indel+null | 0.08 | 0.78 | 0.88 |
*Cp(A_C_T)->T | 0.04 | 0.84 | 0.88 |
C->(G_A) | 0.02 | 0.88 | 0.88 |
screen_feature(two_hit, kruskal_pandas, rates.T).sort('p')
H | p | q | |
---|---|---|---|
rate_sil | 8.66 | 0.00 | 0.02 |
*CpG->T | 7.60 | 0.01 | 0.02 |
A->mut | 7.50 | 0.01 | 0.02 |
rate_non | 6.28 | 0.01 | 0.03 |
C->(G_A) | 4.35 | 0.04 | 0.07 |
*Cp(A_C_T)->T | 1.33 | 0.25 | 0.37 |
rate_dbsnp | 1.06 | 0.30 | 0.39 |
indel+null | 0.65 | 0.42 | 0.47 |
double_null | 0.09 | 0.77 | 0.77 |
violin_plot_pandas(two_hit, np.log(rates.rate_non))
patients = true_index(st.isin([2,3]))
survival_and_stats(mirna.features.ix['binary'].ix['hsa-mir-548k'].ix[patients],
surv, figsize=(6,4))
patients = ti(two_hit)
coords = pd.read_csv('../Extra_Data/gene_coords.csv', index_col=0)
coords['start'] = coords['start'].astype(float)
cn_by_gene = FH.get_gistic_gene_matrix(run.data_path, cancer.name)
cn_by_gene = cn_by_gene.reset_index(level=[0,1])
cn_by_gene = cn_by_gene.join(coords).set_index(['chrom','start', 'Cytoband','Locus ID'], append=True)
cn_by_gene = cn_by_gene.ix[:,patients].dropna(axis=1)
ch_3 = cn_by_gene.xs('11', level='chrom')
p_arm = pd.Series(ch_3.index.get_level_values('Cytoband').map(lambda s: '11q13' in s), ch_3.index)
p_arm = ch_3[p_arm].sortlevel('start')
p_arm = p_arm[1:]
df = rna.df.xs('01', 1, 1).ix[p_arm.index.get_level_values(0)].dropna()
df = df.ix[:,patients].dropna(1)
df = df[(df == -3).sum(1) != len(df.T)]
test = lambda s: get_cox_ph_ms(surv, s, [age, old, clinical.processed.year], interactions=False)
exp = df.apply(test, axis=1)
exp = exp.sort_index()
exp.index = p_arm.sort_index().ix[exp.index].index
exp2 = -exp.LR.map(log10)
mir = pd.read_table('ftp://mirbase.smith.man.ac.uk/pub/mirbase/12.0/genomes/hsa.gff', skiprows=7, header=None)
mir.index = mir.apply(lambda s: s[8].split('"')[-2], 1)
mir = mir[[0,3]]
mir.columns = ['Chromosome', 'start']
r = p_arm.index.get_level_values('start')[[0,-1]]
p_range = lambda s: (s > r[0]) * (s < r[1])
loc = mir[mir.Chromosome == '11'].sort('start')
loc = loc[loc['start'].apply(p_range)]
df = mirna.df.ix[loc.index].dropna().xs('01',1,1)
df = df.ix[:,patients].dropna(1)
df = df[(df == -3).sum(1) != len(df.T)]
mir_surv = df.apply(test, axis=1)
loc = loc.set_index(['Chromosome', 'start'], drop=False, append=True)
mir_surv.index = loc.index
mir2 = -mir_surv.LR.map(log10)
fig = plt.figure(figsize=(10,4.5))
ax1 = plt.subplot2grid((7,1), (0,0), rowspan=3, colspan=1)
ax2 = plt.subplot2grid((7,1), (3,0), rowspan=3, colspan=1)
ax3 = plt.subplot2grid((7,1), (6,0), rowspan=1, colspan=1)
bands = p_arm.reset_index().groupby('Cytoband')
bands = bands.first().start.order()
starts = p_arm.index.get_level_values('start')
for i,(idx,v) in enumerate(bands.iteritems()):
try:
r = plt.Rectangle((v,0), bands.iloc[i+1]-v, 1, lw=2,
alpha=.5, fc=colors[i%2])
except:
r = plt.Rectangle((v,0), starts[-1] - v, 1, lw=2,
alpha=.5, fc=colors[i%2])
ax3.add_artist(r)
ax3.set_xticks(bands)
b = [s for i,s in enumerate(bands.index)]
ax3.set_xticklabels(b, rotation=270+45, ha='left')
ax1.scatter(starts, (p_arm > 1).sum(1), s=40, c=colors[2],
edgecolor='grey', alpha=1)
ax2.scatter(exp2.index.get_level_values('start'), exp2, s=40,
c=colors[4], edgecolor='grey', alpha=1)
ax2.scatter(mir2.index.get_level_values('start'), mir2, s=40,
c=colors[0], edgecolor='grey', alpha=1)
#ax1.set_ybound(215,255)
ax1.set_ylabel('# of Patients\nwith Amp.\n\n', ha='center', va='center')
ax2.set_ybound(-.5,0)
ax2.set_ylabel('mRNA/miRNA\nCox Surv.\n(Log10 p)\n\n\n', ha='center', va='center')
ax3.set_yticks([])
fig.tight_layout()
for ax in [ax1, ax2, ax3]:
ax.set_xbound(starts[0],
starts[-1])
ax.set_xticks(bands)
for n, ss in mir2.order().tail(1).iteritems():
ax2.annotate(n[0], (n[2], ss), xytext=(15, -5),
textcoords='offset points',
arrowprops=dict(arrowstyle="->"))
for n, ss in exp2.ix[['CCND1', 'FADD']].iteritems():
ax2.annotate(n[0], (n[1], ss), xytext=(15, -5),
textcoords='offset points',
arrowprops=dict(arrowstyle="->"))
ax1.set_xticklabels('')
ax2.set_xticklabels('')
ax2.set_yticks([0,1,2,3,4])
fig.tight_layout()
f = mirna.df.ix['hsa-mir-548k'].ix[:,'01'].ix[patients]
fig, axs = subplots(2,1, figsize=(4,4.5))
ax = axs[1]
violin_plot_pandas(cn_by_gene.ix['FADD'].ix[0], f, ax=ax)
ax.axhline(-1, ls='--', lw=2, color='grey')
ax.set_xlabel('FADD Copy Number Status')
prettify_ax(ax)
ax=axs[0]
violin_plot_series(mirna.df.ix['hsa-mir-548k'].ix[patients].clip_lower(-3), ax=ax)
ax.axhline(-1, ls='--', lw=2, color='grey')
ax.set_xlabel('')
ax.set_ylabel('mir-548k Exp.')
prettify_ax(ax)
fig.tight_layout()
f = mirna.features.ix['binary'].ix['hsa-mir-548k'].ix[patients]
g = cn_by_gene.ix['FADD'].ix[0]
g.name = 'cn'
survival_and_stats(combine(g>1, f), surv, figsize=(5.5,4.5))