%pylab inline import os as os import pickle as pickle import pandas as pd print 'changing to source dirctory' os.chdir('../src') from Stats.Scipy import * from Stats.Survival import * from Processing.Helpers import * from Figures.Helpers import * from Figures.Pandas import * from Figures.Boxplots import * from Figures.Survival import draw_survival_curve, survival_and_stats from Figures.Survival import draw_survival_curves from Figures.Survival import survival_stat_plot import Data.Firehose as FH from IPython import utils from IPython.display import HTML css_file = 'profile_default/static/custom/custom.css' base = utils.path.get_ipython_dir() styles = "" % (open(os.path.join(base, css_file),'r').read()) display(HTML(styles)) pd.set_option('precision', 3) pd.set_option('display.width', 300) plt.rcParams['font.size'] = 12 '''Color schemes for paper taken from http://colorbrewer2.org/''' colors = plt.rcParams['axes.color_cycle'] colors_st = ['#CA0020', '#F4A582', '#92C5DE', '#0571B0'] colors_th = ['#E66101', '#FDB863', '#B2ABD2', '#5E3C99'] def get_run(firehose_dir, version='Latest'): ''' Helper to get a run from the file-system. ''' path = '{}/ucsd_analyses'.format(firehose_dir) if version is 'Latest': version = sorted(os.listdir(path))[-1] run = pickle.load(open('{}/{}/RunObject.p'.format(path, version), 'rb')) return run OUT_PATH = '../Data' RUN_DATE = '2014_01_15' VERSION = 'all' CANCER = 'HNSC' FIGDIR = '../Figures/' if not os.path.isdir(FIGDIR): os.makedirs(FIGDIR) print 'populating namespace with data' run_path = '{}/Firehose__{}/'.format(OUT_PATH, RUN_DATE) run = get_run(run_path, 'Run_' + VERSION) cancer = run.load_cancer(CANCER) clinical = cancer.load_clinical() mut = cancer.load_data('Mutation') mut.uncompress() cn = cancer.load_data('CN_broad') cn.uncompress() rna = pickle.load(open(cancer.path + '/mRNASeq/store/no_hpv2.p', 'rb')) mirna = pickle.load(open(cancer.path + '/miRNASeq/store/no_hpv2.p', 'rb')) hpv = clinical.hpv surv = clinical.survival.survival_5y age = clinical.clinical.age.astype(float) old = pd.Series(1.*(age>=75), name='old') pats = [true_index(hpv == 0), mut.features.columns, cn.features.columns, surv.unstack().index, rna.features.columns, mirna.features.columns, true_index(age < 85)] keepers_o = reduce(set.intersection, map(set, pats)) keepers_o = pd.Index(keepers_o) len(keepers_o)