This notebook uses Clustergrammer2 to visualize the Cancer cell line Encyclopedia gene expression data (data obtained from the Broad-Institute). The CCLE project measured genetic data from over 1000 cancer cell lines and provides cell line annotations (e.g. tissue) that is used to generate cell type categories.
from clustergrammer2 import Network, CGM2
import warnings
warnings.filterwarnings('ignore')
df = {}
>> clustergrammer2 backend version 0.14.0
import pandas as pd
df['ini'] = pd.read_csv('../data/CCLE/CCLE.txt.gz', compression='gzip', index_col=0)
from ast import literal_eval as make_tuple
cols = df['ini'].columns.tolist()
new_cols = [make_tuple(x) for x in cols]
df['ini'].columns = new_cols
n1 = Network(CGM2)
n2 = Network(CGM2)
from copy import deepcopy
df['meta_col'] = n1.make_df_from_cols(df['ini'].columns.tolist())
df['clean'] = deepcopy(df['ini'])
df['clean'].columns = df['meta_col'].index.tolist()
df['meta_cat_col'] = pd.DataFrame()
df['meta_cat_col'].loc['Cat', 'color'] = 'red'
df['meta_cat_col'].loc['Dog', 'color'] = 'yellow'
df['meta_cat_col'].loc['Shark', 'color'] = 'black'
df['meta_cat_col'].loc['Snake', 'color'] = 'blue'
df['meta_cat_col'].loc['Lizard', 'color'] = 'green'
df['meta_cat_col']
color | |
---|---|
Cat | red |
Dog | yellow |
Shark | black |
Snake | blue |
Lizard | green |
n1.load_df(df['clean'], meta_col=df['meta_col'])
n1.set_global_cat_colors(df['meta_cat_col'])
n1.filter_N_top(axis='row', N_top=500, rank_type='var')
n1.normalize(axis='row', norm_type='zscore')
n1.set_manual_category(col='tissue', preferred_cats=df['meta_cat_col'])
n1.widget()
CGM2(manual_cat='{"col": {"col_cat_colors": {"tissue: autonomic_ganglia": "#393b79", "tissue: biliary_tract": …
df['meta_col']['tissue'].value_counts()
Snake 388 Something!!!!!! 324 haematopoietic_and_lymphoid_tissue 174 lung 66 autonomic_ganglia 13 liver 12 bone 10 ovary 9 stomach 8 soft_tissue 8 pancreas 5 endometrium 5 Lizard 4 central_nervous_system 3 large_intestine 3 breast 2 small_intestine 1 prostate 1 thyroid 1 Name: tissue, dtype: int64
keep_genes = [ 'KRT19', 'EPCAM', 'TACSTD2', 'MAL2', 'AGR2', 'C19orf33', 'S100P', 'DSP', 'MIR205HG', 'SPINT2', 'ANXA3', 'ESRP1', 'KRT7', 'TSPAN8', 'VAMP8', 'LCN2', 'RAB25', 'SERPINB5', 'CSTA', 'SFN', 'CEACAM6', 'GPX2', 'NMU', 'C10orf116', 'TSTD1', 'FOXQ1', 'GALNT3', 'GDA', 'FOXA1', 'DMKN', 'SLPI', 'DSG2', 'TOX3', 'ASS1', 'CLDN1', 'KRT17', 'S100A8', 'KRT6A', 'FGFBP1', 'PLBD1', 'CEACAM5', 'CHMP4C', 'SPINT1', 'UCA1', 'GJB2', 'MMP7', 'TSPAN1', 'EEF1A2', 'FXYD3', 'WFDC2', 'CDH1', 'CLDN4', 'AIM1', 'ERBB3', 'TPD52L1', 'CST6', 'GPR87', 'IL18', 'GSTO2', 'FAM110C', 'S100A14', 'CLIC3', 'KLF5', 'MYO5C', 'KRT14', 'F2RL1', 'SELENBP1', 'KRT5', 'IRX2', 'TFF1', 'SLCO1B3', 'ST14', 'SORL1', 'MANSC1', 'PPL', 'KRT80', 'CDH3', 'S100A9', 'CRABP2', 'KCNK1', 'ESRP2', 'KRT23', 'INHBB', 'HOOK1', 'CALB1', 'KRT13', 'ZBED2', 'GPR160', 'TMEM45B', 'PI3', 'ELOVL7', 'SLC27A2']
n2.load_df(df['clean'].loc[keep_genes], meta_col=df['meta_col'])
n2.set_global_cat_colors(df['meta_cat_col'])
n2.filter_N_top(axis='row', N_top=500, rank_type='var')
n2.normalize(axis='row', norm_type='zscore')
n2.set_manual_category(col='tissue', preferred_cats=df['meta_cat_col'])
n2.widget(link_net=n1)
CGM2(manual_cat='{"col": {"col_cat_colors": {"tissue: Dog": "#393b79", "tissue: autonomic_ganglia": "#ff7f0e",…
df_export = net.export_df()
df_export.shape
df_export.to_csv('../data/CCLE/CCLE_500x1037.csv')
net.save_dict_to_json(net.viz, '../data/CCLE/CCLE_500x1037.json')
# man_cat = net.get_manual_category('col', 'tissue')
# man_cat['tissue'].value_counts()
# man_cat['new_colors']
net.load_df(df_export)
net.set_manual_category(col='tissue')
net.normalize(axis='row', norm_type='zscore')
net.set_matrix_colors(pos='red', neg='blue')
net.widget()
man_cat = net.get_manual_category('col', 'tissue')
man_cat['tissue'].value_counts()
df_export = net.export_df()
df_export.to_csv('../data/CCLE/CCLE_500x1037-z.csv')
net.save_dict_to_json(net.viz, '../data/CCLE/CCLE_500x1037-z.json')
net.load_df(df)
net.filter_N_top(axis='row', N_top=1000, rank_type='var')
net.normalize(axis='row', norm_type='zscore')
net.load_df(net.export_df().round(2))
net.manual_category(col='tissue')
net.widget()
man_cat = net.get_manual_category('col', 'tissue')
man_cat['tissue'].value_counts()