import episcanpy.api as epi
import scanpy.api as sc
import anndata as ad
import numpy as np
# DON"T RUN THIS. You don't have the data and it will most likely kill your kernel
matrix = 'matrix.mtx'
cell_names = 'barcodes.tsv'
var_names = 'peaks.bed'
path_file = '/Volumes/Maxtor/10X_data/atac_v1_pbmc_10k/raw_peak_bc_matrix/'
adata = epi.tl.read_ATAC(matrix, cell_names, var_names, path_file)
matrix = '/Volumes/Maxtor/10X_data/atac_v1_pbmc_10k/filtered_peak_bc_matrix/matrix.mtx'
cell_names = '/Volumes/Maxtor/10X_data/atac_v1_pbmc_10k/filtered_peak_bc_matrix/barcodes.tsv'
var_names = '/Volumes/Maxtor/10X_data/atac_v1_pbmc_10k/filtered_peak_bc_matrix/peaks.bed'
adata = epi.tl.read_ATAC(matrix, cell_names, var_names, path_file='')
adata = epi.tl.read_ATAC(matrix, cell_names, var_names)
adata
epi.pp.commoness_features(adata)
epi.pp.coverage_cells(adata)
# create another adata object with binarised matrix
adatabin = epi.pp.binarize(adata, copy=True)
epi.pp.coverage_cells(adata)
epi.pp.coverage_cells(adatabin)
epi.pp.coverage_cells?
initial matrix first
sc.pp.pca(adata)
sc.pp.neighbors(adata)
sc.tl.pca(adata)
sc.tl.tsne(adata)
sc.tl.umap(adata)
sc.tl.louvain(adata)
sc.pl.pca(adata, color='louvain')
sc.pl.tsne(adata, color='louvain')
sc.pl.umap(adata, color='louvain')
sc.pp.pca(adatabin)
sc.pp.neighbors(adatabin)
sc.tl.pca(adatabin)
sc.tl.tsne(adatabin)
sc.tl.umap(adatabin)
sc.tl.louvain(adatabin, key_added='louvain_bin')
sc.pl.pca(adatabin, color=['sum_peaks', 'louvain_bin'])
sc.pl.tsne(adatabin, color=['sum_peaks', 'louvain_bin'])
sc.pl.umap(adatabin, color=['sum_peaks', 'louvain_bin'])
# transfer annotations from the binary matrix to the original one
adata.obs['louvain_bin'] = adatabin.obs['louvain_bin']
adata.obs['sum_peaks_bin'] = adatabin.obs['sum_peaks']
sc.pl.pca(adata, color=['sum_peaks', 'sum_peaks_bin', 'louvain_bin'])
sc.pl.tsne(adata, color=['sum_peaks', 'sum_peaks_bin', 'louvain_bin'])
sc.pl.umap(adata, color=['sum_peaks', 'sum_peaks_bin', 'louvain_bin'])
min(adatabin.obs['sum_peaks'])
np.median(adatabin.var['commonness'])
np.mean(adatabin.var['commonness'])
#adatabin400 = adatabin[adatabin.var['commonness'] > 365,:]
adatabin400 = adatabin[:, adatabin.var['commonness'] > 365]
sc.pp.pca(adatabin400)
sc.pp.neighbors(adatabin400)
sc.tl.pca(adatabin400)
sc.tl.tsne(adatabin400)
sc.tl.umap(adatabin400)
sc.tl.louvain(adatabin400, key_added='louvain_bin400')
sc.pl.pca(adatabin400, color=['sum_peaks', 'louvain_bin', 'louvain_bin400'])
sc.pl.tsne(adatabin400, color=['sum_peaks', 'louvain_bin', 'louvain_bin400'])
sc.pl.umap(adatabin400, color=['sum_peaks', 'louvain_bin', 'louvain_bin400'])
sc.pp.regress_out(adatabin400, 'sum_peaks')
sc.pp.pca(adatabin400)
sc.pp.neighbors(adatabin400)
sc.tl.pca(adatabin400)
sc.tl.tsne(adatabin400)
sc.tl.umap(adatabin400)
sc.tl.louvain(adatabin400, key_added='louvain_bin400_reg')
sc.pl.pca(adatabin400, color=['sum_peaks', 'louvain_bin400', 'louvain_bin400_reg'])
sc.pl.tsne(adatabin400, color=['sum_peaks', 'louvain_bin400', 'louvain_bin400_reg'])
sc.pl.umap(adatabin400, color=['sum_peaks', 'louvain_bin400', 'louvain_bin400_reg'])
sc.tl.louvain(adatabin400, resolution=0.2, key_added='louvain_lowres')
sc.pl.pca(adatabin400, color=['louvain_lowres'])
sc.pl.umap(adatabin400, color=['louvain_lowres'])
sc.tl.diffmap(adatabin400)
sc.pl.diffmap(adatabin400, color=['louvain_lowres'])
adatabin400.write('peaks_filtered2_bin_10Xdata.h5ad')
adata = sc.read('peaks_filtered2_bin_10Xdata.h5ad')