In [1]:
import episcanpy.api as epi
import scanpy.api as sc
import anndata as ad
import numpy as np
/anaconda3/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
  from ._conv import register_converters as _register_converters
In [ ]:
# DON"T RUN THIS. You don't have the data and it will most likely kill your kernel
matrix = 'matrix.mtx'
cell_names = 'barcodes.tsv'
var_names = 'peaks.bed'
path_file = '/Volumes/Maxtor/10X_data/atac_v1_pbmc_10k/raw_peak_bc_matrix/'
adata = epi.tl.read_ATAC(matrix, cell_names, var_names, path_file)

Loading the filtered matrix from 10X

In [2]:
matrix = '/Volumes/Maxtor/10X_data/atac_v1_pbmc_10k/filtered_peak_bc_matrix/matrix.mtx'
cell_names = '/Volumes/Maxtor/10X_data/atac_v1_pbmc_10k/filtered_peak_bc_matrix/barcodes.tsv'
var_names = '/Volumes/Maxtor/10X_data/atac_v1_pbmc_10k/filtered_peak_bc_matrix/peaks.bed'
adata = epi.tl.read_ATAC(matrix, cell_names, var_names, path_file='')
In [ ]:
adata = epi.tl.read_ATAC(matrix, cell_names, var_names)
In [3]:
adata
Out[3]:
AnnData object with n_obs × n_vars = 8728 × 89796 
    uns: 'omic'

Quality control

In [5]:
epi.pp.commoness_features(adata)
In [6]:
epi.pp.coverage_cells(adata)
In [8]:
# create another adata object with binarised matrix
adatabin = epi.pp.binarize(adata, copy=True)
In [9]:
epi.pp.coverage_cells(adata)
In [10]:
epi.pp.coverage_cells(adatabin)
In [2]:
epi.pp.coverage_cells?

Visualisation

initial matrix first

In [11]:
sc.pp.pca(adata)
sc.pp.neighbors(adata)
In [12]:
sc.tl.pca(adata)
sc.tl.tsne(adata)
sc.tl.umap(adata)
sc.tl.louvain(adata)
In [13]:
sc.pl.pca(adata, color='louvain')
sc.pl.tsne(adata, color='louvain')
sc.pl.umap(adata, color='louvain')

binary matrix

In [14]:
sc.pp.pca(adatabin)
sc.pp.neighbors(adatabin)
sc.tl.pca(adatabin)
sc.tl.tsne(adatabin)
sc.tl.umap(adatabin)
sc.tl.louvain(adatabin, key_added='louvain_bin')
In [17]:
sc.pl.pca(adatabin, color=['sum_peaks', 'louvain_bin'])
sc.pl.tsne(adatabin, color=['sum_peaks', 'louvain_bin'])
sc.pl.umap(adatabin, color=['sum_peaks', 'louvain_bin'])

do we need to regress or do extra filtering?

In [19]:
# transfer annotations from the binary matrix to the original one
adata.obs['louvain_bin'] = adatabin.obs['louvain_bin'] 
adata.obs['sum_peaks_bin'] = adatabin.obs['sum_peaks']
In [20]:
sc.pl.pca(adata, color=['sum_peaks', 'sum_peaks_bin', 'louvain_bin'])
sc.pl.tsne(adata, color=['sum_peaks', 'sum_peaks_bin', 'louvain_bin'])
sc.pl.umap(adata, color=['sum_peaks', 'sum_peaks_bin', 'louvain_bin'])

filtering out peaks that are not covered enough

In [25]:
min(adatabin.obs['sum_peaks'])
Out[25]:
486.0
In [30]:
np.median(adatabin.var['commonness'])
Out[30]:
365.0
In [31]:
np.mean(adatabin.var['commonness'])
Out[31]:
1691.465900485545
In [33]:
#adatabin400 = adatabin[adatabin.var['commonness'] > 365,:]
adatabin400 = adatabin[:, adatabin.var['commonness'] > 365]
In [34]:
sc.pp.pca(adatabin400)
sc.pp.neighbors(adatabin400)
sc.tl.pca(adatabin400)
sc.tl.tsne(adatabin400)
sc.tl.umap(adatabin400)
sc.tl.louvain(adatabin400, key_added='louvain_bin400')
Up to anndata 0.6.12, `.copy()` cast a non-'float32' `.X` to 'float32'. Now, the dtype 'float64' is maintained. 
In [36]:
sc.pl.pca(adatabin400, color=['sum_peaks', 'louvain_bin', 'louvain_bin400'])
sc.pl.tsne(adatabin400, color=['sum_peaks', 'louvain_bin', 'louvain_bin400'])
sc.pl.umap(adatabin400, color=['sum_peaks', 'louvain_bin', 'louvain_bin400'])

regressing the binary matrix bassed on the number of covered peaks

In [37]:
sc.pp.regress_out(adatabin400, 'sum_peaks')
In [38]:
sc.pp.pca(adatabin400)
sc.pp.neighbors(adatabin400)
sc.tl.pca(adatabin400)
sc.tl.tsne(adatabin400)
sc.tl.umap(adatabin400)
sc.tl.louvain(adatabin400, key_added='louvain_bin400_reg')
In [39]:
sc.pl.pca(adatabin400, color=['sum_peaks', 'louvain_bin400', 'louvain_bin400_reg'])
sc.pl.tsne(adatabin400, color=['sum_peaks', 'louvain_bin400', 'louvain_bin400_reg'])
sc.pl.umap(adatabin400, color=['sum_peaks', 'louvain_bin400', 'louvain_bin400_reg'])
In [42]:
sc.tl.louvain(adatabin400, resolution=0.2, key_added='louvain_lowres')
In [44]:
sc.pl.pca(adatabin400, color=['louvain_lowres'])
sc.pl.umap(adatabin400, color=['louvain_lowres'])
In [46]:
sc.tl.diffmap(adatabin400)
In [47]:
sc.pl.diffmap(adatabin400, color=['louvain_lowres'])
In [60]:
adatabin400.write('peaks_filtered2_bin_10Xdata.h5ad')
In [ ]:
adata = sc.read('peaks_filtered2_bin_10Xdata.h5ad')