This is based on the Velocyto La Manno et al. analysis of Plass et al..
import sys
import numpy as np
from scipy.stats import norm
from scipy.spatial.distance import pdist, squareform
import matplotlib
import matplotlib.pyplot as plt
from sklearn.neighbors import NearestNeighbors
import loompy
import velocyto as vcy
import pandas as pd
import scanpy.api as sc
from IPython.core.display import display, HTML
import logging
logging.basicConfig(stream=sys.stdout, format='%(asctime)s - %(levelname)s - %(message)s', level=logging.DEBUG)
display(HTML("<style>.container { width:90% !important; }</style>"))
%matplotlib inline
dirr='./data/'
vlm = vcy.VelocytoLoom(dirr+"all_sgete_4GU75.loom")
ds = loompy.connect(dirr+"all_sgete_4GU75.loom")
### Get the velocyto cell barcodes
velo_list = [x[16:29] for x in list(vlm.ca["CellID"])]
### Get seurat cells and annotation
base_dir = "./data/"
file = "Planaria_Seurat_annot.csv"
annotation = pd.read_csv(base_dir+file, delimiter=',', index_col=0)
#tsneCoord = pd.read_csv(base_dir+"tsneCoordinates.csv", delimiter=',', index_col=0)
#annotation = pd.merge(annotation, tsneCoord, left_index=True, right_index=True)
annotation = annotation.set_index('New_CBC')
annotation = annotation.loc[velo_list]
color_dict = dict(zip(list(annotation["final_Id"].value_counts().index), list(annotation["col_hex"].value_counts().index)))
# add cluster, color and time as annotation from Seurat object to velocyto object
vlm.set_clusters(cluster_labels=list(np.array(annotation["final_Id"])), cluster_colors_dict=color_dict)
vlm.ca["Clusters"] = vlm.cluster_ix
vlm.ca["time"] = np.array(annotation["time"])
vlm.ca["final_Id"] = np.array(annotation["final_Id"])
def match_anndata_with_loom(filename_h5ad='./write/planaria_pp_extended.h5ad', filename_loom='./data/all_sgete_4GU75.loom'):
adata = sc.read(filename_h5ad)
ds = loompy.connect(filename_loom)
loom_cells = [x[16:29] for x in ds.ca['CellID']]
annotation = pd.read_csv('./data/Planaria_Seurat_annot.csv', delimiter=',', index_col=0)
annotation = annotation.loc[adata.obs_names]
adata.obs_names = annotation['New_CBC']
loom_cells = [x for x in loom_cells if x in adata.obs_names]
adata = adata[loom_cells]
# adata.obs_names = list(ds.ca['CellID']) # restore the cell names of the loom file in AnnData
adata.write(filename_h5ad.replace('.h5ad', '_loom.h5ad'))
match_anndata_with_loom('./write/planaria_pp_extended_epidermis.h5ad')
adata = sc.read('./write/planaria_pp_extended_epidermis_loom.h5ad')
vlm.ts = adata.obsm['X_umap']
epidermal_clusters = [
'epidermal neoblasts',
'early epidermal progenitors',
'late epidermal progenitors 1',
'late epidermal progenitors 2', 'epidermis']
vlm.filter_cells(np.isin(vlm.ca["final_Id"], epidermal_clusters))
plt.figure(figsize=(15,15))
vcy.scatter_viz(vlm.ts[:,0], vlm.ts[:,1], c=vlm.colorandum, s=5)
for cluster in np.unique(vlm.ca["final_Id"]):
ts_m = np.median(vlm.ts[vlm.ca["final_Id"] == cluster, :], 0)
plt.text(ts_m[0], ts_m[1], str(vlm.cluster_labels[vlm.ca["final_Id"] == cluster][0]),
fontsize=13, bbox={"facecolor":"w", "alpha":0.6})
plt.axis("off");
vlm.plot_fractions()
vlm.normalize("S", size=True, log=False)
vlm.normalize("U", size=True, log=False)
vlm.score_cv_vs_mean(4000, plot=True, max_expr_avg=35)
print(sum(vlm.cv_mean_selected))
vlm.filter_genes(by_cv_vs_mean=True)
4003
vlm.score_detection_levels(min_expr_counts=3, min_cells_express=3, min_expr_counts_U=3, min_cells_express_U=3)
vlm.filter_genes(by_detection_levels=True)
print("Number of genes to be used:",vlm.S.shape[0])
Number of genes to be used: 2789
vlm.normalize_by_total()
vlm.perform_PCA()
plt.plot(np.cumsum(vlm.pca.explained_variance_ratio_)[:100])
n_comps = np.where(np.diff(np.diff(np.cumsum(vlm.pca.explained_variance_ratio_))>0.002))[0][0]
plt.axvline(n_comps, c="k")
print("number of PCs to be used:",n_comps)
number of PCs to be used: 82
k = 200
vlm.knn_imputation(n_pca_dims=n_comps, k=k, balanced=True, b_sight=k*8, b_maxl=k*4)
vlm.normalize_median()
vlm.fit_gammas(limit_gamma=True)
vlm.predict_U()
vlm.calculate_velocity()
vlm.calculate_shift(assumption="constant_velocity")
vlm.extrapolate_cell_at_t(delta_t=1)
vlm.estimate_transition_prob(hidim="Sx_sz", embed="ts", transform="sqrt", n_neighbors=1000, knn_random=True, sampled_fraction=1)
vlm.calculate_embedding_shift(sigma_corr = 0.02)
np.save('./write/vlm_transition_prob_epidermis.npy', vlm.transition_prob)
np.save('./write/vlm_delta_embedding_epidermis.npy', vlm.delta_embedding)
fig = plt.figure(None,(20,20))
quiver_scale = 1
plt.scatter(vlm.embedding[:, 0], vlm.embedding[:, 1],
c="0.8", alpha=0.2, s=10, edgecolor="")
ix_choice = np.random.choice(vlm.embedding.shape[0], size=int(vlm.embedding.shape[0]/1.), replace=False)
plt.scatter(vlm.embedding[ix_choice, 0], vlm.embedding[ix_choice, 1],
c="0.8", alpha=0.4, s=10, edgecolor=(0,0,0,1), lw=0.3)
quiver_kwargs=dict(headaxislength=6, headlength=8, headwidth=7,linewidths=0.2, width=0.00045,edgecolors="k", color=vlm.colorandum[ix_choice], alpha=1)
plt.quiver(vlm.embedding[ix_choice, 0], vlm.embedding[ix_choice, 1],
vlm.delta_embedding[ix_choice, 0], vlm.delta_embedding[ix_choice, 1],
scale=quiver_scale, **quiver_kwargs)
plt.axis("off")
(-8.0906167716201285, 5.9139372522803475, -4.0870554688704841, 15.309048816768399)