from clustergrammer_widget import *
net = Network()
filename = '../lung_cellline_3_1_16/lung_cellline_phospho/' + \
'lung_cellline_TMT_phospho_combined_ratios.tsv'
net.load_file(filename)
# quantile normalize to normalize cell lines
net.normalize(axis='col', norm_type='qn')
# only keep most differentially regulated PTMs
net.filter_N_top('row', 250, 'sum')
# take zscore of rows
net.normalize(axis='row', norm_type='zscore', keep_orig=True)
net.swap_nan_for_zero()
# threshold filter PTMs
net.filter_threshold('row', threshold=1.75, num_occur=3)
views = ['N_row_sum','N_row_var']
net.make_clust(dist_type='cos',views=views, dendro=True,
sim_mat=True, calc_cat_pval=False)
make similarity matrices of rows and columns, add to viz data structure
# view heatmap of data
clustergrammer_widget(network=net.widget())
net = Network()
filename = '../lung_cellline_3_1_16/lung_cl_all_ptm/precalc_processed/ptm45_col-iqn.txt'
net.load_file(filename)
# take zscore of rows
net.normalize(axis='row', norm_type='zscore', keep_orig=True)
net.swap_nan_for_zero()
# threshold filter PTMs
net.filter_threshold('row', threshold=2.65, num_occur=2)
views = ['N_row_sum','N_row_var']
net.make_clust(dist_type='cos',views=views, dendro=True,
sim_mat=True, calc_cat_pval=False)
make similarity matrices of rows and columns, add to viz data structure
When the 'improved' quantile normalization data is processed in the same way as the homepage data (e.g. threshold filtering) then the biological replicates cluster close to each other: H2703s are next to each other, H1437s are next to each other, and H209s are in the same large group. This is just a sanity check to make sure that I am seeing the same broad reasults I saw with the previous normalization type.
# view heatmap of data
clustergrammer_widget(network=net.widget())
net.dat['mat'].shape
(88, 45)