%pylab inline import os as os import pickle as pickle import pandas as pd print 'changing to source dirctory' os.chdir('../src') from Stats.Scipy import * from Stats.Survival import * from Processing.Helpers import * from Figures.Helpers import * from Figures.Pandas import * from Figures.Boxplots import * from Figures.Survival import draw_survival_curve, survival_and_stats from Figures.Survival import draw_survival_curves from Figures.Survival import survival_stat_plot import Data.Firehose as FH pd.set_option('precision', 3) pd.set_option('display.line_width', 100) pd.set_option('display.width', 300) plt.rcParams['font.size'] = 12 '''Color schemes for paper taken from http://colorbrewer2.org/''' colors = plt.rcParams['axes.color_cycle'] colors_st = ['#CA0020', '#F4A582', '#92C5DE', '#0571B0'] colors_th = ['#E66101', '#FDB863', '#B2ABD2', '#5E3C99'] def get_run(firehose_dir, version='Latest'): ''' Helper to get a run from the file-system. ''' path = '{}/ucsd_analyses'.format(firehose_dir) if version is 'Latest': version = sorted(os.listdir(path))[-1] run = pickle.load(open('{}/{}/RunObject.p'.format(path, version), 'rb')) return run print 'populating namespace with data' params = pd.read_table('../global_params.txt', header=None, squeeze=True, index_col=0) run_path = '{}/Firehose__{}/'.format(params.ix['OUT_PATH'], params.ix['RUN_DATE']) run = get_run(run_path, 'Run_' + params.ix['VERSION']) cancer = run.load_cancer(params.ix['CANCER']) clinical = cancer.load_clinical() mut = cancer.load_data('Mutation') mut.uncompress() cn = cancer.load_data('CN_broad') cn.uncompress() rna = pickle.load(open(cancer.path + '/mRNASeq/store/no_hpv2.p', 'rb')) mirna = pickle.load(open(cancer.path + '/miRNASeq/store/no_hpv2.p', 'rb')) hpv = clinical.hpv surv = clinical.survival.survival_5y age = clinical.clinical.age.astype(float) old = pd.Series(1.*(age>=75), name='old') pats = [true_index(hpv == 0), mut.features.columns, cn.features.columns, surv.unstack().index, rna.features.columns, mirna.features.columns, true_index(age < 85)] keepers_o = reduce(set.intersection, map(set, pats)) keepers_o = pd.Index(keepers_o) len(keepers_o)