import numpy as np
import matplotlib.pyplot as plt
import warnings
import itertools
warnings.filterwarnings('ignore')
import plotly
import plotly.graph_objs as go
import plotly.express as px
from bokeh.palettes import Category20, Viridis3, Set1
from bokeh.plotting import figure, output_file, show, save, output_notebook
from bokeh.models import ColumnDataSource, HoverTool, CategoricalColorMapper, OpenURL, TapTool
import hdbscan
import umap
import seaborn as sns
import pandas as pd
from scipy.spatial.distance import cdist
import DataManager
from utils import get_umap_projection, get_hdbscan_clustering
from sklearn.decomposition import PCA
import importlib
importlib.reload(DataManager)
palette1 = Category20
palette2 = Viridis3
output_notebook()
dm = DataManager.DataManager()
control_samples = dm.get_control_samples()
n_comp=3
pca = PCA(n_components=n_comp)
pca_embedding = pca.fit_transform(dm.data.values[:,3:])
# Calculate UMAP projection
umap_embedding_3d, umap_reducer_3d = get_umap_projection(dm.data.values[:,3:], n_components=3)
Loading cached UMAP...
embedding = pca_embedding
# Configure Plotly to be rendered inline in the notebook.
plotly.offline.init_notebook_mode()
data = []
colors = itertools.cycle(palette1[12])
for cell_line, color in zip(range(12), colors):
name = dm.cell_line_df["mutation"].iloc[cell_line]
cl_indexes = dm.data.index[dm.data["cell_line"]==cell_line].tolist()
# Configure the trace.
trace = go.Scatter3d(
x=embedding[cl_indexes,0], # <-- Put your data instead
y=embedding[cl_indexes,1], # <-- Put your data instead
z=embedding[cl_indexes,2], # <-- Put your data instead
mode='markers',
marker={
'size': 1,
'opacity': 0.6,
"color": color
},
name=name
)
data.append(trace)
data.append(go.Scatter3d(
x=embedding[control_samples,0], # <-- Put your data instead
y=embedding[control_samples,1], # <-- Put your data instead
z=embedding[control_samples,2], # <-- Put your data instead
mode='markers',
marker={
'size': 2,
'opacity': 0.65,
"color": "red",
"symbol": 'diamond'
},
name = "Control Samples"
))
# Configure the layout.
layout = go.Layout(
margin={'l': 0, 'r': 0, 'b': 0, 't': 0}
)
#data = [trace, trace2]
plot_figure = go.Figure(data=data, layout=layout)
# Render the plot.
plotly.offline.iplot(plot_figure)
descriptions = []
distance_matrices = []
#embedding = umap_embedding_3d
for cl in range(12):
for rep in range(2):
idxs = (dm.data["cell_line"]==cl) & (dm.data["replicate"] == rep)
distance_matrix = cdist(embedding[idxs,:], embedding[idxs,:], metric="euclidean")
distance_matrices.append(distance_matrix)
descriptions.append("CL {} - Rep {}".format(cl, rep))
for i, mat in enumerate(distance_matrices):
distance_matrices[i] = (mat- np.min(mat))/np.max(mat)
np.allclose(distance_matrices[0], distance_matrices[0].T)
True
fig, axs = plt.subplots(figsize=(25, 25))
sns.heatmap(distance_matrices[2], ax=axs, square=True)
plt.show()
plt.close(fig)
median_distance_mat = np.median(np.array(distance_matrices), axis=0)
mean_distance_mat = np.mean(np.array(distance_matrices), axis=0)
median_distance_mat.shape
(1372, 1372)
absolute_median_deviations = [np.abs(mat - median_distance_mat) for mat in distance_matrices]
for i in range(0, len(absolute_median_deviations), 2):
fig, axs = plt.subplots(1, 2, figsize=(20, 8))
sns.heatmap(absolute_median_deviations[i], ax=axs[0])
axs[0].set_title(descriptions[i])
sns.heatmap(absolute_median_deviations[i+1], ax=axs[1])
axs[1].set_title(descriptions[i+1])
plt.show()
plt.close(fig)
# for i, (c, r) in enumerate(itertools.product(range(12), range(2))):
# fig, axs = plt.subplots(1, 2, figsize=(20, 80))
# mat = absolute_median_deviations[i]
# sns.heatmap(mat, ax=axs[c,r])
# axs[c,r].set_title(descriptions[i])
# break