#!/usr/bin/env python
# coding: utf-8

# # Overview of CyTOF Data
# The original data was given as two tab-separated matrices
# * ``Plasma.txt`` (original name: 160202_CGI002_Plasma_Plasma_singlets.fcs_raw_events.txt)
# * ``PMA.txt`` (original name: 160202_CGI002_PMA_PMA_singlets.fcs_raw_events.txt)
# 
# These files had individual cell measurements as rows and dimensions (e.g. antibodies) as columns. I only kept the dimensions of interest surface marker and phospho marker antibody columns/dimensions and renamed these files. I then semi-automatically identified 'roughly-defined' cell types using hierarchical clustering and the surface markers associated cell types. 
# 
# ``Plasma_CT.txt`` and ``PMA_CT.txt``.

# In[1]:


import pandas as pd
import numpy as np
from clustergrammer_widget import *
net = Network(clustergrammer_widget)


# # Plasma

# In[2]:


# load Plasma treated data with defined cell types
net.load_file('../cytof_data/Plasma_UCT.txt')

# subsample the data so that both treatments have the same number of cells
net.random_sample(axis='row',num_samples=110000, random_state=99)
df_plasma = net.export_df()
print(df_plasma.shape)

net.normalize(axis='col', norm_type='zscore', keep_orig=False)
net.downsample(ds_type='kmeans', axis='row', num_samples=1000)
print(net.dat['mat'].shape)

# clip z-scores since we do not care about extreme outliers
net.clip(-10,10)
net.write_matrix_to_tsv('../cytof_data/ds_plasma.txt')


# In[3]:


net.set_cat_color('row', 1, 'Majority-Treatment: Plasma', 'blue')
net.set_cat_color('row', 1, 'Majority-Treatment: PMA', 'red')

# greens
net.set_cat_color('row', 2, 'Majority-Category: CD14hi monocytes', 'yellow')
net.set_cat_color('row', 2, 'Majority-Category: CD4 Tcells', 'blue')
net.set_cat_color('row', 2, 'Majority-Category: NK cells_CD16hi', 'red')
net.set_cat_color('row', 2, 'Majority-Category: NK cells_CD16hi_CD57hi', 'orange')
net.set_cat_color('row', 2, 'Majority-Category: NK cells_CD56hi', '#FF6347')

net.set_cat_color('col', 1, 'Marker-type: phospho marker', 'red')
net.set_cat_color('col', 1, 'Marker-type: surface marker', 'blue')


# In[4]:


net.cluster(views=[])
net.widget()


# # PMA

# In[5]:


net.load_file('../cytof_data/PMA_UCT.txt')
net.random_sample(axis='row',num_samples=110000, random_state=99)
df_pma = net.export_df()

net.load_df(df_pma)

net.normalize(axis='col', norm_type='zscore', keep_orig=False)
net.downsample(ds_type='kmeans', axis='row', num_samples=1000)
net.dat['mat'].shape
net.clip(-10,10)
net.write_matrix_to_tsv('../cytof_data/ds_pma.txt')

net.cluster(views=[])
net.widget()


# # Plasma vs PMA Treated
# 
# ### Merge Plasma and PMA

# In[6]:


df_merge = pd.concat([df_plasma, df_pma])
print(df_merge.shape)
net.load_df(df_merge)
net.normalize(axis='col', norm_type='zscore', keep_orig=False)
net.downsample(ds_type='kmeans', axis='row', num_samples=2000)
net.clip(-10,10)
net.dat['mat'].shape
net.cluster(views=[])
net.widget()


# # Plasma vs PMA based on Surface markers only

# In[7]:


df_merge = pd.concat([df_plasma, df_pma])
net.load_df(df_merge)

net.filter_cat('col', 1, 'Marker-type: surface marker')
net.normalize(axis='col', norm_type='zscore', keep_orig=False)
net.downsample(ds_type='kmeans', axis='row', num_samples=2000)
net.clip(-10,10)
print(net.dat['mat'].shape)

net.cluster(views=[])
net.widget()


# # Plasma vs PMA based on Phospho markers only

# In[8]:


df_merge = pd.concat([df_plasma, df_pma])
net.load_df(df_merge)

net.filter_cat('col', 1, 'Marker-type: phospho marker')
net.normalize(axis='col', norm_type='zscore', keep_orig=False)
net.downsample(ds_type='kmeans', axis='row', num_samples=2000)
net.clip(-10,10)
print(net.dat['mat'].shape)

net.cluster(views=[])
net.widget()


# PMA and Plasma treated cells separate more based on phospho markers than based on surface markers. This makes sense since PMA treatment is expected to influence phosphorylation levels.

# We see a cluster of Monocytes and Granulocytes with high phosphorylation markers: pCREB, pMAPKAP2, pERK1 2, pp38. Below we will export this cluster using the interactive dendrogram and the widget DataFrame export method, widget_df, below:

# In[10]:


df_CD14hi = net.widget_df()


# In[11]:


net.load_df(df_CD14hi)
net.cluster(views=[])
net.widget()


# In[ ]: