#!/usr/bin/env python # coding: utf-8 # In[1]: import numpy as np import matplotlib.pyplot as plt import warnings import itertools warnings.filterwarnings('ignore') import plotly import plotly.graph_objs as go import plotly.express as px from bokeh.palettes import Category20, Viridis3, Set1 from bokeh.plotting import figure, output_file, show, save, output_notebook from bokeh.models import ColumnDataSource, HoverTool, CategoricalColorMapper, OpenURL, TapTool import hdbscan import umap import seaborn as sns import pandas as pd from scipy.spatial.distance import cdist import DataManager from utils import get_umap_projection, get_hdbscan_clustering from sklearn.decomposition import PCA import importlib importlib.reload(DataManager) palette1 = Category20 palette2 = Viridis3 output_notebook() # In[2]: dm = DataManager.DataManager() control_samples = dm.get_control_samples() # In[3]: n_comp=3 pca = PCA(n_components=n_comp) pca_embedding = pca.fit_transform(dm.data.values[:,3:]) # In[4]: # Calculate UMAP projection umap_embedding_3d, umap_reducer_3d = get_umap_projection(dm.data.values[:,3:], n_components=3) # In[5]: embedding = pca_embedding # In[6]: # Configure Plotly to be rendered inline in the notebook. plotly.offline.init_notebook_mode() data = [] colors = itertools.cycle(palette1[12]) for cell_line, color in zip(range(12), colors): name = dm.cell_line_df["mutation"].iloc[cell_line] cl_indexes = dm.data.index[dm.data["cell_line"]==cell_line].tolist() # Configure the trace. trace = go.Scatter3d( x=embedding[cl_indexes,0], # <-- Put your data instead y=embedding[cl_indexes,1], # <-- Put your data instead z=embedding[cl_indexes,2], # <-- Put your data instead mode='markers', marker={ 'size': 1, 'opacity': 0.6, "color": color }, name=name ) data.append(trace) data.append(go.Scatter3d( x=embedding[control_samples,0], # <-- Put your data instead y=embedding[control_samples,1], # <-- Put your data instead z=embedding[control_samples,2], # <-- Put your data instead mode='markers', marker={ 'size': 2, 'opacity': 0.65, "color": "red", "symbol": 'diamond' }, name = "Control Samples" )) # Configure the layout. layout = go.Layout( margin={'l': 0, 'r': 0, 'b': 0, 't': 0} ) #data = [trace, trace2] plot_figure = go.Figure(data=data, layout=layout) # Render the plot. plotly.offline.iplot(plot_figure) # In[7]: descriptions = [] distance_matrices = [] #embedding = umap_embedding_3d for cl in range(12): for rep in range(2): idxs = (dm.data["cell_line"]==cl) & (dm.data["replicate"] == rep) distance_matrix = cdist(embedding[idxs,:], embedding[idxs,:], metric="euclidean") distance_matrices.append(distance_matrix) descriptions.append("CL {} - Rep {}".format(cl, rep)) # In[8]: for i, mat in enumerate(distance_matrices): distance_matrices[i] = (mat- np.min(mat))/np.max(mat) # In[9]: np.allclose(distance_matrices[0], distance_matrices[0].T) # In[18]: fig, axs = plt.subplots(figsize=(25, 25)) sns.heatmap(distance_matrices[2], ax=axs, square=True) plt.show() plt.close(fig) # In[11]: median_distance_mat = np.median(np.array(distance_matrices), axis=0) mean_distance_mat = np.mean(np.array(distance_matrices), axis=0) median_distance_mat.shape # In[12]: absolute_median_deviations = [np.abs(mat - median_distance_mat) for mat in distance_matrices] # In[13]: for i in range(0, len(absolute_median_deviations), 2): fig, axs = plt.subplots(1, 2, figsize=(20, 8)) sns.heatmap(absolute_median_deviations[i], ax=axs[0]) axs[0].set_title(descriptions[i]) sns.heatmap(absolute_median_deviations[i+1], ax=axs[1]) axs[1].set_title(descriptions[i+1]) plt.show() plt.close(fig) # for i, (c, r) in enumerate(itertools.product(range(12), range(2))): # fig, axs = plt.subplots(1, 2, figsize=(20, 80)) # mat = absolute_median_deviations[i] # sns.heatmap(mat, ax=axs[c,r]) # axs[c,r].set_title(descriptions[i]) # break # In[14]: n_examples = 8 for i in range(0, len(absolute_median_deviations), 2): avg_distances_0 = np.mean(absolute_median_deviations[i], axis=0) sorted_drugs_ids_0 = sorted(list(range(len(absolute_median_deviations[i]))), key=lambda i: avg_distances_0[i], reverse=True)[:n_examples] sorted_distances_0 = [avg_distances_0[i] for i in sorted_drugs_ids_0] sorted_drugs_names_0 = [dm.drug_df.iloc[i]["Name"] if dm.drug_df.iloc[i]["Name"]!= "" else "index_{}".format(i) for i in sorted_drugs_ids_0] avg_distances_1 = np.mean(absolute_median_deviations[i+1], axis=0) sorted_drugs_ids_1 = sorted(list(range(len(absolute_median_deviations[i+1]))), key=lambda i: avg_distances_1[i], reverse=True)[:n_examples] sorted_distances_1 = [avg_distances_1[i] for i in sorted_drugs_ids_1] sorted_drugs_names_1 = [dm.drug_df.iloc[i]["Name"] if dm.drug_df.iloc[i]["Name"]!= "" else "index_{}".format(i) for i in sorted_drugs_ids_1] color_0 = ["green" if name in sorted_drugs_names_1 else "red" for name in sorted_drugs_names_0] color_1 = ["green" if name in sorted_drugs_names_0 else "red" for name in sorted_drugs_names_1] fig, axs = plt.subplots(1, 2, figsize=(20, 8)) fig.autofmt_xdate(rotation=45) sns.barplot(x=sorted_drugs_names_0, y=sorted_distances_0, palette=color_0, ax=axs[0]) axs[0].set_ylim(0,1) axs[0].set_title(descriptions[i]) sns.barplot(x=sorted_drugs_names_1, y=sorted_distances_1, palette=color_1, ax=axs[1]) axs[1].set_ylim(0,1) axs[1].set_title(descriptions[i+1]) plt.show() plt.close(fig) # In[15]: sorted_drugs_ids_0 # In[16]: sorted_drugs_names_0 # In[17]: sorted_distances_0 # In[ ]: