Here we present interactive versions of several figures from Lin et al. (2018), Highly multiplexed immunofluorescence imaging of human tissues and tumors using t-CyCIF and conventional optical microscopes.
In the 16-cycle experiment, three kinds of plots were used to compare cycles: paired histograms, density scatter plots, and four-histogram plots. Below we provide code for generating these plots for all possibly relevant comparisons in the data for convenience. For sake of speed, each dataset only includes 10000 randomly sampled points of the original dataset.
If you have downloaded these files and are viewing this in Jupyter Notebook, click "Cell" 🡲 "Run All" to interact with the individual plots below. If you are just browsing online however, the plots are only snapshots and not interactive. Please download the files and run Jupyter Notebook for interactive use.
%matplotlib inline
from collections import OrderedDict
import numpy as np
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
from scipy.stats.stats import pearsonr as rho
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import display
import pandas as pd
from math import log
with open("labels.txt", "r") as text_file:
labels = text_file.read().split('\t')
datasets = OrderedDict([
('TonsilA', 'T_sampleA.csv'), ('TonsilB', 'T_sampleB.csv'),
('MA', 'M_sampleA.csv'), ('MB', 'M_sampleB.csv'),
('NA', 'N_sampleA.csv'), ('NB', 'N_sampleB.csv'),
])
channels = ['Hoechst', 'PCNA', 'Vimentin', 'Tubulin']
alldata = [pd.read_csv(f) for f in datasets.values()]
datadict = OrderedDict(zip(datasets.keys(), range(len(datasets))))
cycldict = OrderedDict(zip(labels, range(len(labels))))
channeldict = OrderedDict(zip(channels, range(len(channels))))
When the dataset is the same, we can make a scatter plot for any of the two channels. Here we also compute the Pearson correlation, denoted by "rho", and the slope of the line-of-best-fit. (The density calculation takes a little more time, so give a minute for changes to implement)
def dscatter(Marker_A,Marker_B,dataset):
if Marker_A==Marker_B:
print('Please Select Differing Markers (changes take a minute)')
else:
df = alldata[dataset]
colA = df.iloc[:,Marker_A];
colB = df.iloc[:,Marker_B];
x = [log(y) for y in colA];
y = [log(y) for y in colB];
xy = np.vstack([x,y])
z= gaussian_kde(xy)(xy)
rhoval = rho(x,y)
LineFit = np.polyfit(x, y, 1)
plt.scatter(x,y,c=z,s=10,edgecolor='')
plt.title('Correlation='+"{:.2f}".format(rhoval[0])+', Slope='+"{:.2f}".format(LineFit[0]))
plt.xlabel(labels[Marker_A]+' Log-Intensity')
plt.ylabel(labels[Marker_B]+' Log-Intensity')
plt.show()
it2 = interactive(dscatter,Marker_A=cycldict,Marker_B=cycldict,dataset=datadict)
display(it2)
A Jupyter Widget
For a handful of markers, we reused the antibodies at multiple different cycles in the same sample.
def fourcycle(dataset,channel):
df = alldata[dataset]
col3 = df.iloc[:,3+16*channel-1]
col7 = df.iloc[:,7+16*channel-1];
col12 = df.iloc[:,12+16*channel-1]
col16 = df.iloc[:,16+16*channel-1];
x3 = [log(y) for y in col3];
x7 = [log(y) for y in col7];
x12 = [log(y) for y in col12];
x16 = [log(y) for y in col16];
z3 = gaussian_kde(x3)
z7 = gaussian_kde(x7)
z12 = gaussian_kde(x12)
z16 = gaussian_kde(x16)
binchoice = np.linspace(min(x3+x7+x12+x16),max(x3+x7+x12+x16),100)
plt.plot(binchoice,z3.evaluate(binchoice),label='Cycle3')
plt.plot(binchoice,z7.evaluate(binchoice),label='Cycle7')
plt.plot(binchoice,z12.evaluate(binchoice),label='Cycle12')
plt.plot(binchoice,z16.evaluate(binchoice),label='Cycle16')
plt.xlabel('Log-Intensity')
plt.ylabel('Estimated PDF')
plt.legend()
plt.show()
it3 = interactive(fourcycle,dataset=datadict,channel=channeldict)
display(it3)
A Jupyter Widget
The code below allows you to choose one channel from one dataset, and another channel from another dataset. The overlapping area is also computed.
def overlaparea(kde1,kde2,trapzpts):
z1vals = kde1.evaluate(trapzpts)
z2vals = kde2.evaluate(trapzpts)
minvals = np.minimum(z1vals,z2vals)
#plt.plot(trapzpts,minvals,label='Overlap')
a = trapzpts[0]
c = trapzpts[len(trapzpts)-1]
dx = (c-a)/(len(trapzpts)-1)
integ = (sum(minvals)-minvals[0]/2-minvals[len(trapzpts)-1]/2)*dx
return(integ)
def histocompare(Marker_A,Marker_B,dataset1,dataset2):
df1 = alldata[dataset1]
df2 = alldata[dataset2]
colA = df1.iloc[:,Marker_A];
colB = df2.iloc[:,Marker_B];
x = [log(y) for y in colA];
y = [log(y) for y in colB];
z1 = gaussian_kde(x)
z2 = gaussian_kde(y)
binchoice = np.linspace(min(x+y),max(x+y),100)
plt.plot(binchoice,z1.evaluate(binchoice),label=labels[Marker_A])
plt.plot(binchoice,z2.evaluate(binchoice),label=labels[Marker_B])
plt.xlabel('Log-Intensity')
plt.ylabel('Estimated PDF')
plt.title('Overlap='+"{:.2f}".format(overlaparea(z1,z2,binchoice)))
plt.legend()
plt.show()
it1 = interactive(histocompare,Marker_A=cycldict,Marker_B=cycldict,dataset1=datadict,dataset2=datadict)
display(it1)
A Jupyter Widget