import anndata
import numpy as np
import pandas as pd
import scplot as sp
Read in data. The data consists of 3K PBMCs from a healthy donor from 10x Genomics.
adata = anndata.read('pbmc3k.h5ad')
Violin plot of QC metrics
sp.violin(adata, ['n_genes', 'n_counts', 'percent_mito'])
Scatter plot matrix of QC metrics. You can optionally color by the plot by cluster assignment.
sp.scatter_matrix(adata, ['n_genes', 'n_counts', 'percent_mito'], color='louvain')
Violin plot of QC metrics by cluster assignment
sp.violin(adata, ['n_genes', 'n_counts', 'percent_mito'], by='louvain', width=450, height=400, cols=2)
Violin plot of expression of gene expression by cluster
sp.violin(adata, ['CST3', 'NKG7'], by='louvain', width=450, height=400)
Embedding of gene expression and cluster assignments
sp.embedding(adata, basis=['umap'], keys=['CST3', 'louvain'])
We can display the labels directly on the plot
sp.embedding(adata, basis='umap', keys=['louvain'], legend='data', width=500)
Link plots across multiple embeddings
sp.embedding(adata, basis=['umap', 'pca'], keys=['CST3', 'louvain'], sort=False, brush_categorical=True)
Visualize binned number of counts within each louvain clustering
# You can optionally provide a color map
cmap={'0':'#e41a1c','8': 'LightGrey', '9':'black'}
adata.obs['binned_n_counts'] = pd.cut(adata.obs['n_counts'], bins=10, labels=False)
sp.composition_plot(adata, 'louvain', 'binned_n_counts', cmap=cmap, height=400)
# Sort binned_n_counts by mean fraction across all louvain clusters
sp.composition_plot(adata, 'louvain', 'binned_n_counts', cmap=cmap, height=400, condition_sort_by='mean')
Gene expression heatmap
marker_genes = ['IL7R', 'CD79A', 'MS4A1', 'CD8A', 'CD8B', 'LYZ', 'CD14',
'LGALS3', 'S100A8', 'GNLY', 'NKG7', 'KLRB1',
'FCGR3A', 'MS4A7', 'FCER1A', 'CST3', 'PPBP']
sp.heatmap(adata, keys=marker_genes,by='louvain')
Gene expression dotplot
sp.dotplot(adata, keys=marker_genes, by='louvain')
Scatter plot of FCGR3A versus MS4A7, colored by expression of CD14
sp.scatter(adata, x='FCGR3A', y='MS4A7', color='CD14')
Use the box select tool to select cells. After selection is complete, you can get the selected range.
# save a reference to the plot to get the selection bounds
embedding_plot = sp.embedding(adata, basis='umap', keys=['CST3'])
embedding_plot
from IPython.display import display, clear_output
import ipywidgets as widgets
clear_output()
basis = 'umap'
x_coord = embedding_plot.df['X_{}1'.format(basis)]
y_coord = embedding_plot.df['X_{}2'.format(basis)]
bounds = sp.get_bounds(embedding_plot[0,0])
if bounds is not None:
selected_bounds = (x_coord>=bounds[0]) & (x_coord<=bounds[2]) & (y_coord>= bounds[1])&(y_coord<=bounds[3])
selected_adata = adata[selected_bounds]
if selected_adata.shape[0] > 0:
print('{} cells selected'.format(selected_adata.shape[0]))
# get union of selected cells and cells belonging to the clusters in the selection
cluster_selection = (adata.obs['louvain'].isin(selected_adata.obs['louvain'].unique()))
selected_adata = adata[selected_bounds.values|cluster_selection.values]
print('{} clusters'.format(len(selected_adata.obs['louvain'].unique())))
print('{} total cells'.format(selected_adata.shape[0]))
# you can also run a tool here to generate a new embedding using the selected data only
display(sp.embedding(selected_adata, basis=basis, keys=['CST3']))
# Save plot to png
# hv.save(p, 'test.png')
Duplicate cells to create a dataset with 5 million cells. We include only 3 genes to conserve memory. You can also open a large AnnData file in backed
mode to load data on demand.
genes_to_include = ['CST3', 'NKG7', 'PPBP']
upsampled_adata = anndata.AnnData(adata.raw[:, genes_to_include].X, adata.obs.copy(), pd.DataFrame(index=genes_to_include))
upsampled_adata.obsm['X_umap'] = adata.obsm['X_umap']
upsampled_adata = upsampled_adata[np.repeat(np.arange(0, upsampled_adata.shape[0]), 2000)]
"{:,} cells".format(upsampled_adata.shape[0])
'5,276,000 cells'
sp.embedding(upsampled_adata, basis='umap', keys=['CST3', 'louvain']) # will automatically bin
sp.scatter(upsampled_adata, x='CST3', y='NKG7', color='PPBP') # will automatically bin