#!/usr/bin/env python # coding: utf-8 # # Calculate CCLE Tissue Heatmaps # This notebook will calculate heatmaps for each tissue in the CCLE. # In[1]: from clustergrammer_widget import * net = Network(clustergrammer_widget) # ### Load CCLE data # I will load the CCLE data and export it as a Pandas DataFrame that will be used to generate tissue-specific heatmaps. # In[28]: net.load_file('../original_data/CCLE.txt') ccle = net.export_df() print(ccle.shape) # ### Get Unique Tissues # In[29]: cols = ccle.columns.tolist() tissues = [] for inst_col in cols: tissues.append(inst_col[1]) tissues = sorted(list(set(tissues))) # # Intra-Normalized Tissue-Specific Heatmaps # In[30]: # intra-tissue normalization: filter, enrich, cluster, and save JSON keep_tissues = [] for inst_tissue in tissues: net.load_df(ccle) net.filter_cat('col', 1, inst_tissue) num_cols = net.dat['mat'].shape[1] # only keep tissues that have more than one cell line if num_cols > 1: print(inst_tissue + ': ' + str(num_cols)) # keep list of tissues with multiple cell lines keep_tissues.append(inst_tissue) # filter for top 250 genes in tissue based on variance net.filter_N_top('row', 250, 'var') # normalize gene expression across cell lines in tissue net.normalize(axis='row', norm_type='zscore') # pre-calculate enrichment analysis for Gene Ontology Biological Process net.enrichrgram('GO_Biological_Process_2015') # cluster and tell front-end to enable enrichrgram (do not calculate row-filtered views) net.cluster(views=[], enrichrgram=True) # save to JSON filename = '../json/intra-norm_' + inst_tissue.split(': ')[1] + '.json' net.write_json_to_file('viz', filename, indent='no-indent') # # Inter-Normalized Tissue-Specific Heatmaps # Here, we are making tissue-specific heatmaps using the most consistently differentially expressed genes across each tissue relative to all cell lines in the CCLE. # In[31]: # make inter-tissue normalized ccle DataFrame net.load_df(ccle) net.normalize(axis='row', norm_type='zscore') ccle_zscore = net.export_df() for inst_tissue in keep_tissues: print(inst_tissue + ': ' + str(num_cols)) # load inter-tissue normalized data net.load_df(ccle_zscore) # filter for tissue of interest net.filter_cat('col', 1, inst_tissue) # keep the top 250 differentially expressed genes net.filter_N_top('row', 250, 'sum') # pre-calculate enrichment analysis for Gene Ontology Biological Process net.enrichrgram('GO_Biological_Process_2015') # cluster and tell front-end to enable enrichrgram net.cluster(enrichrgram=True) # save to JSON filename = '../json/inter-norm_' + inst_tissue.split(': ')[1] + '.json' net.write_json_to_file('viz', filename, indent='no-indent') # In[ ]: