%pylab inline
Populating the interactive namespace from numpy and matplotlib
import os as os
import pickle as pickle
import pandas as pd
print 'changing to source dirctory'
changing to source dirctory
os.chdir('../src')
from Stats.Scipy import *
from Stats.Survival import *
from Processing.Helpers import *
from Figures.Helpers import *
from Figures.Pandas import *
from Figures.Boxplots import *
from Figures.Survival import draw_survival_curve, survival_and_stats
from Figures.Survival import draw_survival_curves
from Figures.Survival import survival_stat_plot
import Data.Firehose as FH
Load default custom.css file from ipython profile
from IPython import utils
from IPython.display import HTML
css_file = 'profile_default/static/custom/custom.css'
base = utils.path.get_ipython_dir()
styles = "<style>\n%s\n</style>" % (open(os.path.join(base, css_file),'r').read())
display(HTML(styles))
Pandas display parameters
pd.set_option('precision', 3)
pd.set_option('display.width', 300)
plt.rcParams['font.size'] = 12
Tweaking color scheme
'''Color schemes for paper taken from http://colorbrewer2.org/'''
colors = plt.rcParams['axes.color_cycle']
colors_st = ['#CA0020', '#F4A582', '#92C5DE', '#0571B0']
colors_th = ['#E66101', '#FDB863', '#B2ABD2', '#5E3C99']
def get_run(firehose_dir, version='Latest'):
'''
Helper to get a run from the file-system.
'''
path = '{}/ucsd_analyses'.format(firehose_dir)
if version is 'Latest':
version = sorted(os.listdir(path))[-1]
run = pickle.load(open('{}/{}/RunObject.p'.format(path, version), 'rb'))
return run
OUT_PATH = '../Data'
RUN_DATE = '2014_01_15'
VERSION = 'all'
CANCER = 'HNSC'
FIGDIR = '../Figures/'
if not os.path.isdir(FIGDIR):
os.makedirs(FIGDIR)
Here we read in the pre-processed data that we downloaded and initialized in the download_data notebook.
print 'populating namespace with data'
populating namespace with data
run_path = '{}/Firehose__{}/'.format(OUT_PATH, RUN_DATE)
run = get_run(run_path, 'Run_' + VERSION)
cancer = run.load_cancer(CANCER)
clinical = cancer.load_clinical()
mut = cancer.load_data('Mutation')
mut.uncompress()
cn = cancer.load_data('CN_broad')
cn.uncompress()
The expression objects we use are the updated ones using only HPV- patients. We do this becuase the expression uses some global variables as filters, so we need to reform the feature set after we limit the cohort to HPV- patients.
rna = pickle.load(open(cancer.path + '/mRNASeq/store/no_hpv2.p', 'rb'))
mirna = pickle.load(open(cancer.path + '/miRNASeq/store/no_hpv2.p', 'rb'))
hpv = clinical.hpv
surv = clinical.survival.survival_5y
age = clinical.clinical.age.astype(float)
old = pd.Series(1.*(age>=75), name='old')
pats = [true_index(hpv == 0), mut.features.columns, cn.features.columns,
surv.unstack().index, rna.features.columns, mirna.features.columns,
true_index(age < 85)]
keepers_o = reduce(set.intersection, map(set, pats))
keepers_o = pd.Index(keepers_o)
len(keepers_o)
250