#!/usr/bin/env python # coding: utf-8 # # Overview of CyTOF Data # The original data was given as two tab-separated matrices # * ``Plasma.txt`` (original name: 160202_CGI002_Plasma_Plasma_singlets.fcs_raw_events.txt) # * ``PMA.txt`` (original name: 160202_CGI002_PMA_PMA_singlets.fcs_raw_events.txt) # # These files had individual cell measurements as rows and dimensions (e.g. antibodies) as columns. I only kept the dimensions of interest surface marker and phospho marker antibody columns/dimensions and renamed these files. I then semi-automatically identified 'roughly-defined' cell types using hierarchical clustering and the surface markers associated cell types. # # ``Plasma_CT.txt`` and ``PMA_CT.txt``. # In[1]: import pandas as pd import numpy as np from clustergrammer_widget import * net = Network(clustergrammer_widget) # # Plasma # In[2]: # load Plasma treated data with defined cell types net.load_file('../cytof_data/Plasma_UCT.txt') # subsample the data so that both treatments have the same number of cells net.random_sample(axis='row',num_samples=110000, random_state=99) df_plasma = net.export_df() print(df_plasma.shape) net.normalize(axis='col', norm_type='zscore', keep_orig=False) net.downsample(ds_type='kmeans', axis='row', num_samples=1000) print(net.dat['mat'].shape) # clip z-scores since we do not care about extreme outliers net.clip(-10,10) net.write_matrix_to_tsv('../cytof_data/ds_plasma.txt') # In[3]: net.set_cat_color('row', 1, 'Majority-Treatment: Plasma', 'blue') net.set_cat_color('row', 1, 'Majority-Treatment: PMA', 'red') # greens net.set_cat_color('row', 2, 'Majority-Category: CD14hi monocytes', 'yellow') net.set_cat_color('row', 2, 'Majority-Category: CD4 Tcells', 'blue') net.set_cat_color('row', 2, 'Majority-Category: NK cells_CD16hi', 'red') net.set_cat_color('row', 2, 'Majority-Category: NK cells_CD16hi_CD57hi', 'orange') net.set_cat_color('row', 2, 'Majority-Category: NK cells_CD56hi', '#FF6347') net.set_cat_color('col', 1, 'Marker-type: phospho marker', 'red') net.set_cat_color('col', 1, 'Marker-type: surface marker', 'blue') # In[4]: net.cluster(views=[]) net.widget() # # PMA # In[5]: net.load_file('../cytof_data/PMA_UCT.txt') net.random_sample(axis='row',num_samples=110000, random_state=99) df_pma = net.export_df() net.load_df(df_pma) net.normalize(axis='col', norm_type='zscore', keep_orig=False) net.downsample(ds_type='kmeans', axis='row', num_samples=1000) net.dat['mat'].shape net.clip(-10,10) net.write_matrix_to_tsv('../cytof_data/ds_pma.txt') net.cluster(views=[]) net.widget() # # Plasma vs PMA Treated # # ### Merge Plasma and PMA # In[6]: df_merge = pd.concat([df_plasma, df_pma]) print(df_merge.shape) net.load_df(df_merge) net.normalize(axis='col', norm_type='zscore', keep_orig=False) net.downsample(ds_type='kmeans', axis='row', num_samples=2000) net.clip(-10,10) net.dat['mat'].shape net.cluster(views=[]) net.widget() # # Plasma vs PMA based on Surface markers only # In[7]: df_merge = pd.concat([df_plasma, df_pma]) net.load_df(df_merge) net.filter_cat('col', 1, 'Marker-type: surface marker') net.normalize(axis='col', norm_type='zscore', keep_orig=False) net.downsample(ds_type='kmeans', axis='row', num_samples=2000) net.clip(-10,10) print(net.dat['mat'].shape) net.cluster(views=[]) net.widget() # # Plasma vs PMA based on Phospho markers only # In[8]: df_merge = pd.concat([df_plasma, df_pma]) net.load_df(df_merge) net.filter_cat('col', 1, 'Marker-type: phospho marker') net.normalize(axis='col', norm_type='zscore', keep_orig=False) net.downsample(ds_type='kmeans', axis='row', num_samples=2000) net.clip(-10,10) print(net.dat['mat'].shape) net.cluster(views=[]) net.widget() # PMA and Plasma treated cells separate more based on phospho markers than based on surface markers. This makes sense since PMA treatment is expected to influence phosphorylation levels. # We see a cluster of Monocytes and Granulocytes with high phosphorylation markers: pCREB, pMAPKAP2, pERK1 2, pp38. Below we will export this cluster using the interactive dendrogram and the widget DataFrame export method, widget_df, below: # In[10]: df_CD14hi = net.widget_df() # In[11]: net.load_df(df_CD14hi) net.cluster(views=[]) net.widget() # In[ ]: