conda create -n ATACseq_SnapATAC python r-essentials jupyter pip pysam pybedtools -y
pip install snaptools
conda install -c bioconda bioconductor-rhdf5 bioconductor-rhdf5lib -y
SnapATAC_buenrostro2018_preprocess.ipynb
library(SnapATAC);
library(GenomicRanges);
Loading required package: Matrix Loading required package: rhdf5 Loading required package: stats4 Loading required package: BiocGenerics Loading required package: parallel Attaching package: ‘BiocGenerics’ The following objects are masked from ‘package:parallel’: clusterApply, clusterApplyLB, clusterCall, clusterEvalQ, clusterExport, clusterMap, parApply, parCapply, parLapply, parLapplyLB, parRapply, parSapply, parSapplyLB The following objects are masked from ‘package:SnapATAC’: colMeans, colSums, rowMeans, rowSums The following objects are masked from ‘package:Matrix’: colMeans, colSums, rowMeans, rowSums, which The following objects are masked from ‘package:stats’: IQR, mad, sd, var, xtabs The following objects are masked from ‘package:base’: anyDuplicated, append, as.data.frame, basename, cbind, colMeans, colnames, colSums, dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep, grepl, intersect, is.unsorted, lapply, lengths, Map, mapply, match, mget, order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce, rowMeans, rownames, rowSums, sapply, setdiff, sort, table, tapply, union, unique, unsplit, which, which.max, which.min Loading required package: S4Vectors Attaching package: ‘S4Vectors’ The following object is masked from ‘package:Matrix’: expand The following object is masked from ‘package:base’: expand.grid Loading required package: IRanges Loading required package: GenomeInfoDb
packageVersion("SnapATAC")
[1] ‘1.0.0’
time ./run_snaptools.sh
output: 59mins 56seconds
start_time <- Sys.time()
metadata <- read.table('../../input/metadata.tsv',
header = TRUE,
stringsAsFactors=FALSE,quote="",row.names=1)
SnapATAC_metadata <- read.table('./SnapATAC_metadata.tsv',
header = TRUE,
stringsAsFactors=FALSE,quote="",row.names=1)
sum(SnapATAC_metadata$ID == rownames(metadata))
x.sp = createSnap(
file="Buenrostro2018.snap",
sample="Buenrostro2018",
do.par = TRUE,
num.cores=10)
Epoch: reading the barcode session ...
plotBarcode(
obj=x.sp,
pdf.file.name=NULL,
pdf.width=7,
pdf.height=7,
col="grey",
border="grey",
breaks=50
);
# # filter cells only using number of fragments and UMI with the following cutoffs
# x.sp = filterCells(
# obj=x.sp,
# subset.names=c("fragment.num", "UMI"),
# low.thresholds=c(1000,1000),
# high.thresholds=c(Inf, Inf)
# );
# show what bin sizes exist in atac_v1_adult_brain_fresh_5k.snap file
showBinSizes("Buenrostro2018.snap");
x.sp = addBmatToSnap(x.sp, bin.size=5000, num.cores=10);
Epoch: reading cell-bin count matrix session ...
x.sp;
number of barcodes: 2034 number of bins: 627478 number of genes: 0 number of peaks: 0
summarySnap(x.sp);
Total number of barcodes: 2034 Median number of sequencing fragments: 127155.5 Median number of uniquely mapped fragments: 16430.5 Median number of mappability ratio: 0.14 Median number of properly paired ratio: 1 Median number of duplicate ratio: 0 Median number of chrM ratio: 0.41 Median number of unique molecules (UMI): 16430.5
x.sp = makeBinary(x.sp, mat="bmat");
# system("wget http://mitra.stanford.edu/kundaje/akundaje/release/blacklists/hg19-human/wgEncodeHg19ConsensusSignalArtifactRegions.bed.gz")
black_list = read.table('wgEncodeHg19ConsensusSignalArtifactRegions.bed.gz')
black_list.gr = GRanges(black_list[,1], IRanges(black_list[,2], black_list[,3]));
idy1 = queryHits(findOverlaps(x.sp@feature, black_list.gr));
idy2 = grep("chrM|random", x.sp@feature);
idy = unique(c(idy1, idy2));
x.sp = x.sp[,-idy, mat="bmat"];
x.sp
number of barcodes: 2034 number of bins: 624297 number of genes: 0 number of peaks: 0
options(repr.plot.width=4, repr.plot.height=4)
plotBinCoverage(
x.sp,
pdf.file.name=NULL,
col="grey",
border="grey",
breaks=10,
xlim=c(-6,6)
);
x.sp = filterBins(
x.sp,
low.threshold=-2,
high.threshold=2,
mat="bmat"
);
x.sp
number of barcodes: 2034 number of bins: 527055 number of genes: 0 number of peaks: 0
### the function failed when do.par=TRUE
x.sp = runJaccard(
obj = x.sp,
tmp.folder=tempdir(),
mat = "bmat",
max.var=2000,
ncell.chunk=1000,
do.par=FALSE,
num.cores=10,
seed.use=10
);
### the function failed when do.par=TRUE
x.sp = runNormJaccard(
obj = x.sp,
tmp.folder=tempdir(),
ncell.chunk=1000,
method="normOVE",
row.center=TRUE,
row.scale=TRUE,
low.threshold=-5,
high.threshold=5,
do.par=FALSE,
num.cores=10,
seed.use=10
);
x.sp = runDimReduct(
x.sp,
pc.num=50,
input.mat="jmat",
method="svd",
center=TRUE,
scale=FALSE,
seed.use=10
);
options(repr.plot.width=6, repr.plot.height=6)
plotDimReductElbow(
obj=x.sp,
point.size=1.5,
point.shape=19,
point.color="red",
point.alpha=1,
pdf.file.name=NULL,
pdf.height=7,
pdf.width=7,
labs.title="PCA Elbow plot",
labs.subtitle=NULL
);
plotDimReductPW(
obj=x.sp,
pca.dims=1:50,
point.size=0.3,
point.color="grey",
point.shape=19,
point.alpha=0.6,
down.sample=5000,
pdf.file.name=NULL,
pdf.height=7,
pdf.width=7
);
str(x.sp)
Formal class 'snap' [package "SnapATAC"] with 16 slots ..@ des : chr(0) ..@ barcode : chr [1:2034] "AAAAATCT" "AAAACCTC" "AAAACGTA" "AAAAGACC" ... ..@ file : chr [1:2034] "/data/pinello/PROJECTS/2019_03_scATAC/Github/Real_Data/Buenrostro_2018/run_methods/SnapATAC/Buenrostro2018.snap" "/data/pinello/PROJECTS/2019_03_scATAC/Github/Real_Data/Buenrostro_2018/run_methods/SnapATAC/Buenrostro2018.snap" "/data/pinello/PROJECTS/2019_03_scATAC/Github/Real_Data/Buenrostro_2018/run_methods/SnapATAC/Buenrostro2018.snap" "/data/pinello/PROJECTS/2019_03_scATAC/Github/Real_Data/Buenrostro_2018/run_methods/SnapATAC/Buenrostro2018.snap" ... ..@ sample : chr [1:2034] "Buenrostro2018" "Buenrostro2018" "Buenrostro2018" "Buenrostro2018" ... ..@ metaData:'data.frame': 2034 obs. of 6 variables: .. ..$ barcode: Factor w/ 2034 levels "AAAAATCT","AAAACCTC",..: 1 2 3 4 5 6 7 8 9 10 ... .. ..$ TN : num [1:2034] 135497 197491 154610 112464 69803 ... .. ..$ UM : num [1:2034] 15252 12022 7949 40605 24294 ... .. ..$ PP : num [1:2034] 15233 11995 7942 40551 24271 ... .. ..$ UQ : num [1:2034] 15233 11995 7942 40530 24265 ... .. ..$ CM : num [1:2034] 6191 6924 3147 15836 7504 ... ..@ feature :Formal class 'GRanges' [package "GenomicRanges"] with 7 slots .. .. ..@ seqnames :Formal class 'Rle' [package "S4Vectors"] with 4 slots .. .. .. .. ..@ values : Factor w/ 93 levels "chr1","chr2",..: 1 2 3 4 5 6 7 8 9 10 ... .. .. .. .. ..@ lengths : int [1:60] 41045 44885 36961 36083 33475 31586 28823 27494 26865 21068 ... .. .. .. .. ..@ elementMetadata: NULL .. .. .. .. ..@ metadata : list() .. .. ..@ ranges :Formal class 'IRanges' [package "IRanges"] with 6 slots .. .. .. .. ..@ start : int [1:527055] 10001 15001 25001 35001 50001 55001 60001 65001 70001 75001 ... .. .. .. .. ..@ width : int [1:527055] 5000 5000 5000 5000 5000 5000 5000 5000 5000 5000 ... .. .. .. .. ..@ NAMES : NULL .. .. .. .. ..@ elementType : chr "ANY" .. .. .. .. ..@ elementMetadata: NULL .. .. .. .. ..@ metadata : list() .. .. ..@ strand :Formal class 'Rle' [package "S4Vectors"] with 4 slots .. .. .. .. ..@ values : Factor w/ 3 levels "+","-","*": 3 .. .. .. .. ..@ lengths : int 527055 .. .. .. .. ..@ elementMetadata: NULL .. .. .. .. ..@ metadata : list() .. .. ..@ seqinfo :Formal class 'Seqinfo' [package "GenomeInfoDb"] with 4 slots .. .. .. .. ..@ seqnames : chr [1:93] "chr1" "chr2" "chr3" "chr4" ... .. .. .. .. ..@ seqlengths : int [1:93] NA NA NA NA NA NA NA NA NA NA ... .. .. .. .. ..@ is_circular: logi [1:93] NA NA NA NA NA NA ... .. .. .. .. ..@ genome : chr [1:93] NA NA NA NA ... .. .. ..@ elementMetadata:Formal class 'DataFrame' [package "S4Vectors"] with 6 slots .. .. .. .. ..@ rownames : NULL .. .. .. .. ..@ nrows : int 527055 .. .. .. .. ..@ listData :List of 1 .. .. .. .. .. ..$ name: chr [1:527055] "chr1:10001-15000" "chr1:15001-20000" "chr1:25001-30000" "chr1:35001-40000" ... .. .. .. .. ..@ elementType : chr "ANY" .. .. .. .. ..@ elementMetadata: NULL .. .. .. .. ..@ metadata : list() .. .. ..@ elementType : chr "ANY" .. .. ..@ metadata : list() ..@ peak :Formal class 'GRanges' [package "GenomicRanges"] with 7 slots .. .. ..@ seqnames :Formal class 'Rle' [package "S4Vectors"] with 4 slots .. .. .. .. ..@ values : Factor w/ 0 levels: .. .. .. .. ..@ lengths : int(0) .. .. .. .. ..@ elementMetadata: NULL .. .. .. .. ..@ metadata : list() .. .. ..@ ranges :Formal class 'IRanges' [package "IRanges"] with 6 slots .. .. .. .. ..@ start : int(0) .. .. .. .. ..@ width : int(0) .. .. .. .. ..@ NAMES : NULL .. .. .. .. ..@ elementType : chr "ANY" .. .. .. .. ..@ elementMetadata: NULL .. .. .. .. ..@ metadata : list() .. .. ..@ strand :Formal class 'Rle' [package "S4Vectors"] with 4 slots .. .. .. .. ..@ values : Factor w/ 3 levels "+","-","*": .. .. .. .. ..@ lengths : int(0) .. .. .. .. ..@ elementMetadata: NULL .. .. .. .. ..@ metadata : list() .. .. ..@ seqinfo :Formal class 'Seqinfo' [package "GenomeInfoDb"] with 4 slots .. .. .. .. ..@ seqnames : chr(0) .. .. .. .. ..@ seqlengths : int(0) .. .. .. .. ..@ is_circular: logi(0) .. .. .. .. ..@ genome : chr(0) .. .. ..@ elementMetadata:Formal class 'DataFrame' [package "S4Vectors"] with 6 slots .. .. .. .. ..@ rownames : NULL .. .. .. .. ..@ nrows : int 0 .. .. .. .. ..@ listData : Named list() .. .. .. .. ..@ elementType : chr "ANY" .. .. .. .. ..@ elementMetadata: NULL .. .. .. .. ..@ metadata : list() .. .. ..@ elementType : chr "ANY" .. .. ..@ metadata : list() ..@ bmat :Formal class 'dgCMatrix' [package "Matrix"] with 6 slots .. .. ..@ i : int [1:8845694] 140 423 603 754 790 797 816 893 1024 1134 ... .. .. ..@ p : int [1:527056] 0 20 32 38 39 40 45 50 52 53 ... .. .. ..@ Dim : int [1:2] 2034 527055 .. .. ..@ Dimnames:List of 2 .. .. .. ..$ : NULL .. .. .. ..$ : NULL .. .. ..@ x : num [1:8845694] 1 1 1 1 1 1 1 1 1 1 ... .. .. ..@ factors : list() ..@ pmat :Formal class 'lsCMatrix' [package "Matrix"] with 7 slots .. .. ..@ i : int(0) .. .. ..@ p : int 0 .. .. ..@ Dim : int [1:2] 0 0 .. .. ..@ Dimnames:List of 2 .. .. .. ..$ : NULL .. .. .. ..$ : NULL .. .. ..@ x : logi(0) .. .. ..@ uplo : chr "U" .. .. ..@ factors : list() ..@ gmat :Formal class 'lsCMatrix' [package "Matrix"] with 7 slots .. .. ..@ i : int(0) .. .. ..@ p : int 0 .. .. ..@ Dim : int [1:2] 0 0 .. .. ..@ Dimnames:List of 2 .. .. .. ..$ : NULL .. .. .. ..$ : NULL .. .. ..@ x : logi(0) .. .. ..@ uplo : chr "U" .. .. ..@ factors : list() ..@ jmat :Formal class 'jaccard' [package "SnapATAC"] with 5 slots .. .. ..@ jmat : num [1:2034, 1:2000] 0.368 0.565 -0.462 -1.002 -0.605 ... .. .. .. ..- attr(*, "scaled:center")= num [1:2034] -0.001078 -0.001479 0.000499 0.003922 0.001086 ... .. .. .. ..- attr(*, "scaled:scale")= num [1:2034] 0.00169 0.00147 0.0018 0.00412 0.0052 ... .. .. ..@ p1 : num [1:2034] 0.0068 0.00418 0.00372 0.01058 0.01066 ... .. .. ..@ p2 : num [1:2000] 0.0068 0.00418 0.00372 0.01058 0.01066 ... .. .. ..@ norm : logi TRUE .. .. ..@ method: chr "normOVE" ..@ smat :Formal class 'dim.reduct' [package "SnapATAC"] with 5 slots .. .. ..@ imat : chr "jmat" .. .. ..@ dmat : num [1:2034, 1:50] 0.02064 0.01184 0.02701 0.03521 0.00685 ... .. .. ..@ sdev : num [1:50] 1060 731 436 401 300 ... .. .. ..@ iter : num 1000 .. .. ..@ method: chr "svd" ..@ graph :Formal class 'kgraph' [package "SnapATAC"] with 5 slots .. .. ..@ mat :Formal class 'dsCMatrix' [package "Matrix"] with 7 slots .. .. .. .. ..@ i : int(0) .. .. .. .. ..@ p : int 0 .. .. .. .. ..@ Dim : int [1:2] 0 0 .. .. .. .. ..@ Dimnames:List of 2 .. .. .. .. .. ..$ : NULL .. .. .. .. .. ..$ : NULL .. .. .. .. ..@ x : num(0) .. .. .. .. ..@ uplo : chr "U" .. .. .. .. ..@ factors : list() .. .. ..@ file : chr(0) .. .. ..@ k : num(0) .. .. ..@ snn : logi FALSE .. .. ..@ snn.prune: num(0) ..@ tsne : logi[0 , 0 ] ..@ umap : logi[0 , 0 ] ..@ cluster : Factor w/ 0 levels:
end_time <- Sys.time()
end_time - start_time
Time difference of 26.8676 secs
dim(x.sp@smat@dmat)
df_out = data.frame(t(x.sp@smat@dmat)[1:20,],row.names = sprintf("PC_%s",seq(1:20)))
colnames(df_out) = SnapATAC_metadata[as.character(x.sp@metaData$barcode),]$ID
dim(df_out)
df_out[1:5,1:5]
singles-BM1214-GMP-160421-66 | singles-BM0828-GMP-151027-33 | BM1077-CMP-Frozen-160106-36 | singles-BM0828-CMP-frozen-151118-87 | singles-160818-BM1137-pDC-LS-78 | |
---|---|---|---|---|---|
<dbl> | <dbl> | <dbl> | <dbl> | <dbl> | |
PC_1 | 0.020643987 | 0.01184386 | 0.027009824 | 0.035205241 | 0.006850604 |
PC_2 | -0.025593622 | -0.01493672 | -0.006608372 | 0.011777608 | -0.039564712 |
PC_3 | -0.008941719 | -0.02134873 | 0.009880046 | 0.003967227 | 0.011335911 |
PC_4 | 0.024685366 | 0.05269438 | -0.012504068 | 0.004067868 | 0.048309956 |
PC_5 | -0.031439949 | -0.05372980 | 0.003005843 | 0.042520841 | 0.028402383 |
sum(colnames(df_out) == rownames(metadata))
df_out = df_out[rownames(metadata)]
dim(df_out)
df_out[1:5,1:5]
BM1077-CLP-Frozen-160106-13 | BM1077-CLP-Frozen-160106-14 | BM1077-CLP-Frozen-160106-2 | BM1077-CLP-Frozen-160106-21 | BM1077-CLP-Frozen-160106-27 | |
---|---|---|---|---|---|
<dbl> | <dbl> | <dbl> | <dbl> | <dbl> | |
PC_1 | 0.007044647 | 0.00003982818 | 0.0003291854 | 0.005399350 | 0.015577712 |
PC_2 | -0.012394908 | -0.01210933592 | -0.0110375832 | -0.009926376 | -0.007061154 |
PC_3 | -0.019031289 | -0.01511729781 | -0.0202136273 | -0.019532144 | -0.011437661 |
PC_4 | 0.036394483 | 0.03491502928 | 0.0443325083 | 0.035654907 | 0.027807567 |
PC_5 | -0.044011702 | -0.05368559223 | -0.0465244794 | -0.055047584 | -0.014651750 |
sum(colnames(df_out) == rownames(metadata))
saveRDS(df_out, file = '../../output/feature_matrices/FM_SnapATAC_buenrostro2018.rds')
x.sp = runKNN(
obj=x.sp,
pca.dims=1:30,
weight.by.sd=FALSE,
k=15
);
x.sp = runCluster(
obj=x.sp,
tmp.folder=tempdir(),
louvain.lib="R-igraph",
path.to.snaptools=NULL,
seed.use=10
);
x.sp = runViz(
obj=x.sp,
tmp.folder=tempdir(),
dims=2,
pca.dims=1:30,
weight.by.sd=FALSE,
method="Rtsne",
fast_tsne_path=NULL,
Y.init=NULL,
seed.use=10,
num.cores=5
);
x.sp = runViz(
obj=x.sp,
tmp.folder=tempdir(),
dims=2,
pca.dims=1:30,
weight.by.sd=FALSE,
method="umap",
fast_tsne_path=NULL,
Y.init=NULL,
seed.use=10,
num.cores=5
);
x.sp@sample = SnapATAC_metadata[as.character(x.sp@metaData$barcode),]$label
plotViz(
obj=x.sp,
method="tsne",
point.size=0.5,
point.shape=19,
point.alpha=0.8,
point.color="cluster",
text.add=TRUE,
text.size=1.5,
text.color="black",
text.halo.add=TRUE,
text.halo.color="white",
text.halo.width=0.2,
down.sample=10000,
pdf.file.name=NULL,
pdf.width=7,
pdf.height=7
);
plotViz(
obj=x.sp,
method="umap",
point.size=0.5,
point.shape=19,
point.alpha=0.8,
point.color="cluster",
text.add=FALSE,
text.size=1.5,
text.color="black",
text.halo.add=TRUE,
text.halo.color="white",
text.halo.width=0.2,
down.sample=10000,
legend.add=TRUE,
pdf.file.name=NULL,
pdf.width=7,
pdf.height=7
);
plotViz(
obj=x.sp,
method="tsne",
point.size=0.5,
point.shape=19,
point.alpha=0.8,
point.color="sample",
text.add=TRUE,
text.size=1.5,
text.color="black",
text.halo.add=TRUE,
text.halo.color="white",
text.halo.width=0.2,
down.sample=10000,
pdf.file.name=NULL,
pdf.width=5,
pdf.height=5
);
plotViz(
obj=x.sp,
method="umap",
point.size=0.5,
point.shape=19,
point.alpha=0.8,
point.color="sample",
text.add=FALSE,
text.size=1.5,
text.color="black",
text.halo.add=TRUE,
text.halo.color="white",
text.halo.width=0.2,
down.sample=10000,
legend.add=TRUE,
pdf.file.name=NULL,
pdf.width=5,
pdf.height=5
);
feature.value = SnapATAC::rowSums(x.sp@bmat);
feature.value = pmin(feature.value, quantile(feature.value, 0.99));
feature.value = pmax(feature.value, 0);
feature.value = (feature.value-min(feature.value))/(max(feature.value)-min(feature.value));
PlotFeatureSingle(
obj=x.sp,
feature.value=feature.value,
method="tsne",
point.size=0.3,
point.shape=19,
point.color="red",
down.sample=10000,
pdf.file.name=NULL,
pdf.width=7,
pdf.height==7
);
PlotFeatureSingle(
obj=x.sp,
feature.value=feature.value,
method="umap",
point.size=0.2,
point.shape=19,
point.color="red",
down.sample=10000,
pdf.file.name=NULL,
pdf.width=7,
pdf.height==7
);
sessionInfo()
R version 3.5.1 (2018-07-02) Platform: x86_64-conda_cos6-linux-gnu (64-bit) Running under: CentOS Linux 7 (Core) Matrix products: default BLAS: /data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_SnapATAC_py37/lib/R/lib/libRblas.so LAPACK: /data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_SnapATAC_py37/lib/R/lib/libRlapack.so locale: [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 [7] LC_PAPER=en_US.UTF-8 LC_NAME=C [9] LC_ADDRESS=C LC_TELEPHONE=C [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C attached base packages: [1] parallel stats4 stats graphics grDevices utils datasets [8] methods base other attached packages: [1] GenomicRanges_1.34.0 GenomeInfoDb_1.18.2 IRanges_2.16.0 [4] S4Vectors_0.20.1 BiocGenerics_0.28.0 SnapATAC_1.0.0 [7] rhdf5_2.26.2 Matrix_1.2-17 loaded via a namespace (and not attached): [1] reticulate_1.12 pbdZMQ_0.3-3 locfit_1.5-9.1 [4] repr_1.0.1 lattice_0.20-38 colorspace_1.4-1 [7] vctrs_0.1.0 doSNOW_1.0.16 htmltools_0.3.6 [10] snow_0.4-3 base64enc_0.1-3 rlang_0.3.4 [13] pillar_1.4.1 sp_1.3-1 RColorBrewer_1.1-2 [16] uuid_0.1-2 GenomeInfoDbData_1.2.0 foreach_1.4.4 [19] plyr_1.8.4 umap_0.2.2.0 zlibbioc_1.28.0 [22] munsell_0.5.0 raster_2.9-5 codetools_0.2-16 [25] evaluate_0.14 doParallel_1.0.14 irlba_2.3.3 [28] IRdisplay_0.7.0 Rcpp_1.0.1 edgeR_3.24.3 [31] backports_1.1.4 scales_1.0.0 limma_3.38.3 [34] IRkernel_1.0.1 jsonlite_1.6 XVector_0.22.0 [37] RSpectra_0.14-0 RANN_2.6.1 digest_0.6.19 [40] Rtsne_0.15 grid_3.5.1 tools_3.5.1 [43] bitops_1.0-6 magrittr_1.5 RCurl_1.95-4.12 [46] crayon_1.3.4 bigmemory.sri_0.1.3 bigmemory_4.5.33 [49] pkgconfig_2.0.2 zeallot_0.1.0 iterators_1.0.10 [52] Rhdf5lib_1.6.0 igraph_1.2.4.1 compiler_3.5.1
save.image(file = 'SnapATAC_buenrostro2018.RData')