library(cisTopic)
packageVersion("cisTopic")
[1] ‘0.2.1’
start_time = Sys.time()
set.seed(2019)
metadata <- read.table('../../input/metadata.tsv',
header = TRUE,
stringsAsFactors=FALSE,quote="",row.names=1)
head(metadata)
label | |
---|---|
AAACGAAAGCGCAATG-1 | 2 |
AAACGAAAGGGTATCG-1 | 6 |
AAACGAAAGTAACATG-1 | 1 |
AAACGAAAGTTACACC-1 | 4 |
AAACGAACAGAGATGC-1 | 5 |
AAACGAACATGCTATG-1 | 6 |
pathToBams <- '../../input/sc-bams_nodup/'
bamFiles <- paste(pathToBams, list.files(pathToBams), sep='')
head(bamFiles)
cellnames <- sapply(strsplit(basename(bamFiles),'.',fixed = TRUE), "[[", 2)
head(cellnames)
sum(cellnames == rownames(metadata))
ix = match(rownames(metadata),cellnames)
bamFiles = bamFiles[ix]
cellnames = cellnames[ix]
sum(cellnames == rownames(metadata))
regions <- '../../input/atac_v1_pbmc_5k_peaks.bed'
cisTopicObject <- createcisTopicObjectFromBAM(bamFiles, regions, project.name='10xpbmc5k')
cisTopicObject <- renameCells(cisTopicObject, cellnames)
IOPub data rate exceeded. The notebook server will temporarily stop sending output to the client in order to avoid crashing it. To change this limit, set the config variable `--NotebookApp.iopub_data_rate_limit`. Current values: NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec) NotebookApp.rate_limit_window=3.0 (secs)
cisTopicObject <- runModels(cisTopicObject, topic=c(10, 20, 30, 40, 45, 50), seed=987, nCores=10, burnin = 120, iterations = 150, addModels=FALSE)
[1] "Formatting data..." [1] "The number of cores (10) is higher than the number of models (6)." [1] "Exporting data..." [1] "Running models..."
cisTopicObject <- selectModel(cisTopicObject)
logLikelihoodByIter(cisTopicObject, select=c(10, 20, 30, 40, 45, 50))
end_time <- Sys.time()
end_time - start_time
Time difference of 1.672813 hours
cellassign <- modelMatSelection(cisTopicObject, 'cell', 'Probability')
dim(cellassign)
cellassign[1:5,1:5]
AAACGAAAGCGCAATG-1 | AAACGAAAGGGTATCG-1 | AAACGAAAGTAACATG-1 | AAACGAAAGTTACACC-1 | AAACGAACAGAGATGC-1 | |
---|---|---|---|---|---|
Topic1 | 0.0001023751 | 0.0001793239 | 0.0004855547 | 0.0001616684 | 0.0003309614 |
Topic2 | 0.0173013923 | 0.0173047611 | 0.0531682447 | 0.0522188990 | 0.0008274036 |
Topic3 | 0.0709459459 | 0.0475208464 | 0.0005664805 | 0.0002425026 | 0.0738044018 |
Topic4 | 0.0138206388 | 0.0552317762 | 0.0003237032 | 0.0004041710 | 0.0294555684 |
Topic5 | 0.0197583948 | 0.0272572402 | 0.0266245853 | 0.0317678442 | 0.0081085554 |
sum(colnames(cellassign) == rownames(metadata))
# colnames(cellassign) = rownames(metadata)
# cellassign[1:5,1:5]
saveRDS(cellassign, file = '../../output/feature_matrices/FM_cisTopic_10xpbmc5k.rds')
metadata$label = as.factor(metadata$label)
cisTopicObject <- addCellMetadata(cisTopicObject, cell.data = metadata)
cisTopicObject <- runUmap(cisTopicObject, target='cell')
options(repr.plot.width=10, repr.plot.height=5)
par(mfrow=c(1,2))
plotFeatures(cisTopicObject, method='Umap', target='cell', topic_contr=NULL, colorBy=c('label','pct_ReadsInPeaks'), cex.legend = 0.8, factor.max=.75, dim=2, legend=TRUE, col.low='darkgreen', col.mid='yellow', col.high='brown1', intervals=20)
sessionInfo()
R version 3.5.1 (2018-07-02) Platform: x86_64-pc-linux-gnu (64-bit) Running under: CentOS Linux 7 (Core) Matrix products: default BLAS: /data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_cisTopic/lib/R/lib/libRblas.so LAPACK: /data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_cisTopic/lib/R/lib/libRlapack.so locale: [1] en_US.UTF-8 attached base packages: [1] stats graphics grDevices utils datasets methods base other attached packages: [1] scatterplot3d_0.3-41 plotly_4.9.0 ggplot2_3.1.1 [4] umap_0.2.0.0 Matrix_1.2-17 Rsubread_1.30.9 [7] cisTopic_0.2.1 RevoUtils_11.0.1 RevoUtilsMath_11.0.0 loaded via a namespace (and not attached): [1] bitops_1.0-6 matrixStats_0.54.0 [3] bit64_0.9-7 httr_1.4.0 [5] GenomeInfoDb_1.16.0 repr_0.19.2 [7] tools_3.5.1 R6_2.4.0 [9] DBI_1.0.0 BiocGenerics_0.26.0 [11] lazyeval_0.2.2 colorspace_1.4-1 [13] npsurv_0.4-0 withr_2.1.2 [15] tidyselect_0.2.5 bit_1.1-14 [17] compiler_3.5.1 AUCell_1.5.5 [19] graph_1.58.2 Biobase_2.40.0 [21] RcisTarget_1.3.5 DelayedArray_0.6.6 [23] scales_1.0.0 pbdZMQ_0.3-3 [25] digest_0.6.18 R.utils_2.8.0 [27] XVector_0.20.0 base64enc_0.1-3 [29] pkgconfig_2.0.2 htmltools_0.3.6 [31] htmlwidgets_1.3 rlang_0.3.4 [33] RSQLite_2.1.1 shiny_1.3.1 [35] jsonlite_1.6 BiocParallel_1.14.2 [37] dplyr_0.8.0.1 R.oo_1.22.0 [39] RCurl_1.95-4.12 magrittr_1.5 [41] feather_0.3.3 GenomeInfoDbData_1.1.0 [43] Rcpp_1.0.1 IRkernel_0.8.15 [45] munsell_0.5.0 S4Vectors_0.18.3 [47] reticulate_1.12 R.methodsS3_1.7.1 [49] lda_1.4.2 MASS_7.3-51.3 [51] SummarizedExperiment_1.10.1 zlibbioc_1.26.0 [53] plyr_1.8.4 grid_3.5.1 [55] blob_1.1.1 parallel_3.5.1 [57] promises_1.0.1 crayon_1.3.4 [59] doSNOW_1.0.16 lattice_0.20-38 [61] IRdisplay_0.7.0 splines_3.5.1 [63] annotate_1.58.0 hms_0.4.2 [65] pillar_1.3.1 GenomicRanges_1.32.7 [67] uuid_0.1-2 codetools_0.2-16 [69] stats4_3.5.1 XML_3.98-1.19 [71] glue_1.3.1 evaluate_0.13 [73] lsei_1.2-0 data.table_1.12.2 [75] httpuv_1.5.1 foreach_1.5.0 [77] tidyr_0.8.3 gtable_0.3.0 [79] purrr_0.3.2 assertthat_0.2.1 [81] mime_0.6 xtable_1.8-3 [83] RSpectra_0.14-0 later_0.8.0 [85] viridisLite_0.3.0 survival_2.44-1.1 [87] tibble_2.1.1 snow_0.4-3 [89] iterators_1.0.10 AnnotationDbi_1.42.1 [91] memoise_1.1.0 IRanges_2.14.12 [93] fitdistrplus_1.0-14 GSEABase_1.42.0
save.image(file = 'cisTopic_10xpbmc5k.RData')