#!/usr/bin/env python # coding: utf-8 # # Introduction # # Here we present interactive versions of several figures from Lin et al. (2018), *Highly multiplexed immunofluorescence imaging of human tissues and tumors using t-CyCIF and conventional optical microscopes*. # # In the 16-cycle experiment, three kinds of plots were used to compare cycles: paired histograms, density scatter plots, and four-histogram plots. Below we provide code for generating these plots for all possibly relevant comparisons in the data for convenience. For sake of speed, each dataset only includes 10000 randomly sampled points of the original dataset. # # If you have downloaded these files and are viewing this in Jupyter Notebook, click "Cell" 🡲 "Run All" to interact with the individual plots below. If you are just browsing online however, the plots are only snapshots and not interactive. Please download the files and run Jupyter Notebook for interactive use. # # Setup and load data # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') from collections import OrderedDict import numpy as np import matplotlib.mlab as mlab import matplotlib.pyplot as plt from scipy.stats import gaussian_kde from scipy.stats.stats import pearsonr as rho from ipywidgets import interact, interactive, fixed, interact_manual import ipywidgets as widgets from IPython.display import display import pandas as pd from math import log with open("labels.txt", "r") as text_file: labels = text_file.read().split('\t') datasets = OrderedDict([ ('TonsilA', 'T_sampleA.csv'), ('TonsilB', 'T_sampleB.csv'), ('MA', 'M_sampleA.csv'), ('MB', 'M_sampleB.csv'), ('NA', 'N_sampleA.csv'), ('NB', 'N_sampleB.csv'), ]) channels = ['Hoechst', 'PCNA', 'Vimentin', 'Tubulin'] alldata = [pd.read_csv(f) for f in datasets.values()] datadict = OrderedDict(zip(datasets.keys(), range(len(datasets)))) cycldict = OrderedDict(zip(labels, range(len(labels)))) channeldict = OrderedDict(zip(channels, range(len(channels)))) # # Figure 6A: Density Scatter Plot of Two Markers from Same Dataset # # When the dataset is the same, we can make a scatter plot for any of the two channels. Here we also compute the Pearson correlation, denoted by "rho", and the slope of the line-of-best-fit. (The density calculation takes a little more time, so give a minute for changes to implement) # In[2]: def dscatter(Marker_A,Marker_B,dataset): if Marker_A==Marker_B: print('Please Select Differing Markers (changes take a minute)') else: df = alldata[dataset] colA = df.iloc[:,Marker_A]; colB = df.iloc[:,Marker_B]; x = [log(y) for y in colA]; y = [log(y) for y in colB]; xy = np.vstack([x,y]) z= gaussian_kde(xy)(xy) rhoval = rho(x,y) LineFit = np.polyfit(x, y, 1) plt.scatter(x,y,c=z,s=10,edgecolor='') plt.title('Correlation='+"{:.2f}".format(rhoval[0])+', Slope='+"{:.2f}".format(LineFit[0])) plt.xlabel(labels[Marker_A]+' Log-Intensity') plt.ylabel(labels[Marker_B]+' Log-Intensity') plt.show() it2 = interactive(dscatter,Marker_A=cycldict,Marker_B=cycldict,dataset=datadict) display(it2) # # Figure 6A: Four Histogram Plot # # For a handful of markers, we reused the antibodies at multiple different cycles in the same sample. # In[3]: def fourcycle(dataset,channel): df = alldata[dataset] col3 = df.iloc[:,3+16*channel-1] col7 = df.iloc[:,7+16*channel-1]; col12 = df.iloc[:,12+16*channel-1] col16 = df.iloc[:,16+16*channel-1]; x3 = [log(y) for y in col3]; x7 = [log(y) for y in col7]; x12 = [log(y) for y in col12]; x16 = [log(y) for y in col16]; z3 = gaussian_kde(x3) z7 = gaussian_kde(x7) z12 = gaussian_kde(x12) z16 = gaussian_kde(x16) binchoice = np.linspace(min(x3+x7+x12+x16),max(x3+x7+x12+x16),100) plt.plot(binchoice,z3.evaluate(binchoice),label='Cycle3') plt.plot(binchoice,z7.evaluate(binchoice),label='Cycle7') plt.plot(binchoice,z12.evaluate(binchoice),label='Cycle12') plt.plot(binchoice,z16.evaluate(binchoice),label='Cycle16') plt.xlabel('Log-Intensity') plt.ylabel('Estimated PDF') plt.legend() plt.show() it3 = interactive(fourcycle,dataset=datadict,channel=channeldict) display(it3) # # Figure 6D: Histogram Comparison of Two Channels # # The code below allows you to choose one channel from one dataset, and another channel from another dataset. The overlapping area is also computed. # In[4]: def overlaparea(kde1,kde2,trapzpts): z1vals = kde1.evaluate(trapzpts) z2vals = kde2.evaluate(trapzpts) minvals = np.minimum(z1vals,z2vals) #plt.plot(trapzpts,minvals,label='Overlap') a = trapzpts[0] c = trapzpts[len(trapzpts)-1] dx = (c-a)/(len(trapzpts)-1) integ = (sum(minvals)-minvals[0]/2-minvals[len(trapzpts)-1]/2)*dx return(integ) def histocompare(Marker_A,Marker_B,dataset1,dataset2): df1 = alldata[dataset1] df2 = alldata[dataset2] colA = df1.iloc[:,Marker_A]; colB = df2.iloc[:,Marker_B]; x = [log(y) for y in colA]; y = [log(y) for y in colB]; z1 = gaussian_kde(x) z2 = gaussian_kde(y) binchoice = np.linspace(min(x+y),max(x+y),100) plt.plot(binchoice,z1.evaluate(binchoice),label=labels[Marker_A]) plt.plot(binchoice,z2.evaluate(binchoice),label=labels[Marker_B]) plt.xlabel('Log-Intensity') plt.ylabel('Estimated PDF') plt.title('Overlap='+"{:.2f}".format(overlaparea(z1,z2,binchoice))) plt.legend() plt.show() it1 = interactive(histocompare,Marker_A=cycldict,Marker_B=cycldict,dataset1=datadict,dataset2=datadict) display(it1)