#!/usr/bin/env python
# coding: utf-8

# # Introduction
# 
# Here we present interactive versions of several figures from Lin et al. (2018), *Highly multiplexed immunofluorescence imaging of human tissues and tumors using t-CyCIF and conventional optical microscopes*.
# 
# In the 16-cycle experiment, three kinds of plots were used to compare cycles: paired histograms, density scatter plots, and four-histogram plots. Below we provide code for generating these plots for all possibly relevant comparisons in the data for convenience. For sake of speed, each dataset only includes 10000 randomly sampled points of the original dataset.
# 
# If you have downloaded these files and are viewing this in Jupyter Notebook, click "Cell" 🡲 "Run All" to interact with the individual plots below. If you are just browsing online however, the plots are only snapshots and not interactive. Please download the files and run Jupyter Notebook for interactive use.

# # Setup and load data

# In[1]:


get_ipython().run_line_magic('matplotlib', 'inline')

from collections import OrderedDict
import numpy as np
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
from scipy.stats.stats import pearsonr as rho

from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import display

import pandas as pd
from math import log

with open("labels.txt", "r") as text_file:
    labels = text_file.read().split('\t')

datasets = OrderedDict([
    ('TonsilA', 'T_sampleA.csv'), ('TonsilB', 'T_sampleB.csv'),
    ('MA', 'M_sampleA.csv'), ('MB', 'M_sampleB.csv'),
    ('NA', 'N_sampleA.csv'), ('NB', 'N_sampleB.csv'),
])

channels = ['Hoechst', 'PCNA', 'Vimentin', 'Tubulin']

alldata = [pd.read_csv(f) for f in datasets.values()]
datadict = OrderedDict(zip(datasets.keys(), range(len(datasets))))
cycldict = OrderedDict(zip(labels, range(len(labels))))
channeldict = OrderedDict(zip(channels, range(len(channels))))


# # Figure 6A: Density Scatter Plot of Two Markers from Same Dataset
# 
# When the dataset is the same, we can make a scatter plot for any of the two channels. Here we also compute the Pearson correlation, denoted by "rho", and the slope of the line-of-best-fit. (The density calculation takes a little more time, so give a minute for changes to implement)

# In[2]:


def dscatter(Marker_A,Marker_B,dataset):
    if Marker_A==Marker_B:
       print('Please Select Differing Markers (changes take a minute)')
        
    else:
        df = alldata[dataset]
        colA = df.iloc[:,Marker_A];
        colB = df.iloc[:,Marker_B];
    
        x = [log(y) for y in colA];
        y = [log(y) for y in colB];
        xy = np.vstack([x,y])
        z= gaussian_kde(xy)(xy)
    
        rhoval = rho(x,y)
        LineFit = np.polyfit(x, y, 1)

        
        plt.scatter(x,y,c=z,s=10,edgecolor='')
        plt.title('Correlation='+"{:.2f}".format(rhoval[0])+', Slope='+"{:.2f}".format(LineFit[0]))
        plt.xlabel(labels[Marker_A]+' Log-Intensity')
        plt.ylabel(labels[Marker_B]+' Log-Intensity')
        plt.show()

it2 = interactive(dscatter,Marker_A=cycldict,Marker_B=cycldict,dataset=datadict)
display(it2)


# # Figure 6A: Four Histogram Plot
# 
# For a handful of markers, we reused the antibodies at multiple different cycles in the same sample.

# In[3]:


def fourcycle(dataset,channel):
    df = alldata[dataset]
    col3 = df.iloc[:,3+16*channel-1]
    col7 = df.iloc[:,7+16*channel-1];
    col12 = df.iloc[:,12+16*channel-1]
    col16 = df.iloc[:,16+16*channel-1];
    
    x3 = [log(y) for y in col3];
    x7 = [log(y) for y in col7];
    x12 = [log(y) for y in col12];
    x16 = [log(y) for y in col16];

    z3 = gaussian_kde(x3)
    z7 = gaussian_kde(x7)
    z12 = gaussian_kde(x12)
    z16 = gaussian_kde(x16)
    binchoice = np.linspace(min(x3+x7+x12+x16),max(x3+x7+x12+x16),100)
    
    plt.plot(binchoice,z3.evaluate(binchoice),label='Cycle3')
    plt.plot(binchoice,z7.evaluate(binchoice),label='Cycle7')
    plt.plot(binchoice,z12.evaluate(binchoice),label='Cycle12')
    plt.plot(binchoice,z16.evaluate(binchoice),label='Cycle16')
    plt.xlabel('Log-Intensity')
    plt.ylabel('Estimated PDF')
    plt.legend()
    plt.show()

it3 = interactive(fourcycle,dataset=datadict,channel=channeldict)
display(it3)


# # Figure 6D: Histogram Comparison of Two Channels
# 
# The code below allows you to choose one channel from one dataset, and another channel from another dataset. The overlapping area is also computed.

# In[4]:


def overlaparea(kde1,kde2,trapzpts):
    z1vals = kde1.evaluate(trapzpts)
    z2vals = kde2.evaluate(trapzpts)
    minvals = np.minimum(z1vals,z2vals)
    #plt.plot(trapzpts,minvals,label='Overlap')
    a = trapzpts[0]
    c = trapzpts[len(trapzpts)-1]
    dx = (c-a)/(len(trapzpts)-1)
    integ = (sum(minvals)-minvals[0]/2-minvals[len(trapzpts)-1]/2)*dx
    return(integ)

def histocompare(Marker_A,Marker_B,dataset1,dataset2):
    df1 = alldata[dataset1]
    df2 = alldata[dataset2]
    colA = df1.iloc[:,Marker_A];
    colB = df2.iloc[:,Marker_B];

    x = [log(y) for y in colA];
    y = [log(y) for y in colB];

    z1 = gaussian_kde(x)
    z2 = gaussian_kde(y)
    binchoice = np.linspace(min(x+y),max(x+y),100)
    
    plt.plot(binchoice,z1.evaluate(binchoice),label=labels[Marker_A])
    plt.plot(binchoice,z2.evaluate(binchoice),label=labels[Marker_B])
    plt.xlabel('Log-Intensity')
    plt.ylabel('Estimated PDF')
    plt.title('Overlap='+"{:.2f}".format(overlaparea(z1,z2,binchoice)))
    plt.legend()
    plt.show()

it1 = interactive(histocompare,Marker_A=cycldict,Marker_B=cycldict,dataset1=datadict,dataset2=datadict)
display(it1)