devtools::install_github("zji90/SCRATdatahg19")
source("https://raw.githubusercontent.com/zji90/SCRATdata/master/installcode.R")
library(devtools)
library(GenomicAlignments)
library(Rsamtools)
library(SCRATdatahg19)
library(SCRAT)
Loading required package: BiocGenerics Loading required package: parallel Attaching package: ‘BiocGenerics’ The following objects are masked from ‘package:parallel’: clusterApply, clusterApplyLB, clusterCall, clusterEvalQ, clusterExport, clusterMap, parApply, parCapply, parLapply, parLapplyLB, parRapply, parSapply, parSapplyLB The following objects are masked from ‘package:stats’: IQR, mad, sd, var, xtabs The following objects are masked from ‘package:base’: anyDuplicated, append, as.data.frame, basename, cbind, colMeans, colnames, colSums, dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep, grepl, intersect, is.unsorted, lapply, lengths, Map, mapply, match, mget, order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce, rowMeans, rownames, rowSums, sapply, setdiff, sort, table, tapply, union, unique, unsplit, which, which.max, which.min Loading required package: S4Vectors Loading required package: stats4 Attaching package: ‘S4Vectors’ The following object is masked from ‘package:base’: expand.grid Loading required package: IRanges Loading required package: GenomeInfoDb Loading required package: GenomicRanges Loading required package: SummarizedExperiment Loading required package: Biobase Welcome to Bioconductor Vignettes contain introductory material; view with 'browseVignettes()'. To cite Bioconductor, see 'citation("Biobase")', and for packages 'citation("pkgname")'. Loading required package: DelayedArray Loading required package: matrixStats Attaching package: ‘matrixStats’ The following objects are masked from ‘package:Biobase’: anyMissing, rowMedians Loading required package: BiocParallel Attaching package: ‘DelayedArray’ The following objects are masked from ‘package:matrixStats’: colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges The following objects are masked from ‘package:base’: aperm, apply Loading required package: Biostrings Loading required package: XVector Attaching package: ‘Biostrings’ The following object is masked from ‘package:DelayedArray’: type The following object is masked from ‘package:base’: strsplit Loading required package: Rsamtools Warning message: “replacing previous import ‘DT::dataTableOutput’ by ‘shiny::dataTableOutput’ when loading ‘SCRAT’”Warning message: “replacing previous import ‘DT::renderDataTable’ by ‘shiny::renderDataTable’ when loading ‘SCRAT’”Warning message: “replacing previous import ‘mclust::em’ by ‘shiny::em’ when loading ‘SCRAT’”
start_time = Sys.time()
metadata <- read.table('../../input/metadata.tsv',
header = TRUE,
stringsAsFactors=FALSE,quote="",row.names=1)
SCRATsummary <- function (dir = "", genome, bamfile = NULL, singlepair = "automated",
removeblacklist = T, log2transform = T, adjustlen = T, featurelist = c("GENE",
"ENCL", "MOTIF_TRANSFAC", "MOTIF_JASPAR", "GSEA"), customfeature = NULL,
Genestarttype = "TSSup", Geneendtype = "TSSdown", Genestartbp = 3000,
Geneendbp = 1000, ENCLclunum = 2000, Motifflank = 100, GSEAterm = "c5.bp",
GSEAstarttype = "TSSup", GSEAendtype = "TSSdown", GSEAstartbp = 3000,
GSEAendbp = 1000)
{
if (is.null(bamfile)) {
bamfile <- list.files(dir, pattern = ".bam$")
}
datapath <- system.file("extdata", package = paste0("SCRATdata",
genome))
bamdata <- list()
for (i in bamfile) {
filepath <- file.path(dir, i)
if (singlepair == "automated") {
bamfile <- BamFile(filepath)
tmpsingle <- readGAlignments(bamfile)
tmppair <- readGAlignmentPairs(bamfile)
pairendtf <- testPairedEndBam(bamfile)
if (pairendtf) {
tmp <- tmppair
startpos <- pmin(start(first(tmp)), start(last(tmp)))
endpos <- pmax(end(first(tmp)), end(last(tmp)))
id <- which(!is.na(as.character(seqnames(tmp))))
tmp <- GRanges(seqnames=as.character(seqnames(tmp))[id],IRanges(start=startpos[id],end=endpos[id]))
}
else {
tmp <- GRanges(tmpsingle)
}
}
else if (singlepair == "single") {
tmp <- GRanges(readGAlignments(filepath))
}
else if (singlepair == "pair") {
tmp <- readGAlignmentPairs(filepath)
startpos <- pmin(start(first(tmp)), start(last(tmp)))
endpos <- pmax(end(first(tmp)), end(last(tmp)))
id <- which(!is.na(as.character(seqnames(tmp))))
tmp <- GRanges(seqnames=as.character(seqnames(tmp))[id],IRanges(start=startpos[id],end=endpos[id]))
}
if (removeblacklist) {
load(paste0(datapath, "/gr/blacklist.rda"))
tmp <- tmp[-as.matrix(findOverlaps(tmp, gr))[, 1],
]
}
bamdata[[i]] <- tmp
}
bamsummary <- sapply(bamdata, length)
allres <- NULL
datapath <- system.file("extdata", package = paste0("SCRATdata",
genome))
if ("GENE" %in% featurelist) {
print("Processing GENE features")
load(paste0(datapath, "/gr/generegion.rda"))
if (Genestarttype == "TSSup") {
grstart <- ifelse(as.character(strand(gr)) == "+",
start(gr) - as.numeric(Genestartbp), end(gr) +
as.numeric(Genestartbp))
}
else if (Genestarttype == "TSSdown") {
grstart <- ifelse(as.character(strand(gr)) == "+",
start(gr) + as.numeric(Genestartbp), end(gr) -
as.numeric(Genestartbp))
}
else if (Genestarttype == "TESup") {
grstart <- ifelse(as.character(strand(gr)) == "+",
end(gr) - as.numeric(Genestartbp), start(gr) +
as.numeric(Genestartbp))
}
else if (Genestarttype == "TESdown") {
grstart <- ifelse(as.character(strand(gr)) == "+",
end(gr) + as.numeric(Genestartbp), start(gr) -
as.numeric(Genestartbp))
}
if (Geneendtype == "TSSup") {
grend <- ifelse(as.character(strand(gr)) == "+",
start(gr) - as.numeric(Geneendbp), end(gr) +
as.numeric(Geneendbp))
}
else if (Geneendtype == "TSSdown") {
grend <- ifelse(as.character(strand(gr)) == "+",
start(gr) + as.numeric(Geneendbp), end(gr) -
as.numeric(Geneendbp))
}
else if (Geneendtype == "TESup") {
grend <- ifelse(as.character(strand(gr)) == "+",
end(gr) - as.numeric(Geneendbp), start(gr) +
as.numeric(Geneendbp))
}
else if (Geneendtype == "TESdown") {
grend <- ifelse(as.character(strand(gr)) == "+",
end(gr) + as.numeric(Geneendbp), start(gr) -
as.numeric(Geneendbp))
}
ngr <- names(gr)
gr <- GRanges(seqnames = seqnames(gr), IRanges(start = pmin(grstart,
grend), end = pmax(grstart, grend)))
names(gr) <- ngr
tmp <- sapply(bamdata, function(i) countOverlaps(gr,
i))
tmp <- sweep(tmp, 2, bamsummary, "/") * 10000
if (log2transform) {
tmp <- log2(tmp + 1)
}
if (adjustlen) {
grrange <- end(gr) - start(gr) + 1
tmp <- sweep(tmp, 1, grrange, "/") * 1e+06
}
tmp <- tmp[rowSums(tmp) > 0, , drop = F]
allres <- rbind(allres, tmp)
}
if ("ENCL" %in% featurelist) {
print("Processing ENCL features")
load(paste0(datapath, "/gr/ENCL", ENCLclunum, ".rda"))
tmp <- sapply(bamdata, function(i) countOverlaps(gr,
i))
tmp <- sweep(tmp, 2, bamsummary, "/") * 10000
if (log2transform) {
tmp <- log2(tmp + 1)
}
if (adjustlen) {
grrange <- sapply(gr, function(i) sum(end(i) - start(i) +
1))
tmp <- sweep(tmp, 1, grrange, "/") * 1e+06
}
tmp <- tmp[rowSums(tmp) > 0, , drop = F]
allres <- rbind(allres, tmp)
}
if ("MOTIF_TRANSFAC" %in% featurelist) {
print("Processing MOTIF_TRANSFAC features")
load(paste0(datapath, "/gr/transfac1.rda"))
gr <- flank(gr, as.numeric(Motifflank), both = T)
tmp <- sapply(bamdata, function(i) countOverlaps(gr,
i))
tmp <- sweep(tmp, 2, bamsummary, "/") * 10000
if (log2transform) {
tmp <- log2(tmp + 1)
}
if (adjustlen) {
grrange <- sapply(gr, function(i) sum(end(i) - start(i) +
1))
tmp <- sweep(tmp, 1, grrange, "/") * 1e+06
}
tmp <- tmp[rowSums(tmp) > 0, , drop = F]
allres <- rbind(allres, tmp)
load(paste0(datapath, "/gr/transfac2.rda"))
gr <- flank(gr, as.numeric(Motifflank), both = T)
tmp <- sapply(bamdata, function(i) countOverlaps(gr,
i))
tmp <- sweep(tmp, 2, bamsummary, "/") * 10000
if (log2transform) {
tmp <- log2(tmp + 1)
}
if (adjustlen) {
grrange <- sapply(gr, function(i) sum(end(i) - start(i) +
1))
tmp <- sweep(tmp, 1, grrange, "/") * 1e+06
}
tmp <- tmp[rowSums(tmp) > 0, , drop = F]
allres <- rbind(allres, tmp)
if (genome %in% c("hg19", "hg38")) {
load(paste0(datapath, "/gr/transfac3.rda"))
gr <- flank(gr, as.numeric(Motifflank), both = T)
tmp <- sapply(bamdata, function(i) countOverlaps(gr,
i))
tmp <- sweep(tmp, 2, bamsummary, "/") * 10000
if (log2transform) {
tmp <- log2(tmp + 1)
}
if (adjustlen) {
grrange <- sapply(gr, function(i) sum(end(i) -
start(i) + 1))
tmp <- sweep(tmp, 1, grrange, "/") * 1e+06
}
tmp <- tmp[rowSums(tmp) > 0, , drop = F]
allres <- rbind(allres, tmp)
}
}
if ("MOTIF_JASPAR" %in% featurelist) {
print("Processing MOTIF_JASPAR features")
load(paste0(datapath, "/gr/jaspar1.rda"))
gr <- flank(gr, as.numeric(Motifflank), both = T)
tmp <- sapply(bamdata, function(i) countOverlaps(gr,
i))
tmp <- sweep(tmp, 2, bamsummary, "/") * 10000
if (log2transform) {
tmp <- log2(tmp + 1)
}
if (adjustlen) {
grrange <- sapply(gr, function(i) sum(end(i) - start(i) +
1))
tmp <- sweep(tmp, 1, grrange, "/") * 1e+06
}
tmp <- tmp[rowSums(tmp) > 0, , drop = F]
allres <- rbind(allres, tmp)
load(paste0(datapath, "/gr/jaspar2.rda"))
gr <- flank(gr, as.numeric(Motifflank), both = T)
tmp <- sapply(bamdata, function(i) countOverlaps(gr,
i))
tmp <- sweep(tmp, 2, bamsummary, "/") * 10000
if (log2transform) {
tmp <- log2(tmp + 1)
}
if (adjustlen) {
grrange <- sapply(gr, function(i) sum(end(i) - start(i) +
1))
tmp <- sweep(tmp, 1, grrange, "/") * 1e+06
}
tmp <- tmp[rowSums(tmp) > 0, , drop = F]
allres <- rbind(allres, tmp)
}
if ("GSEA" %in% featurelist) {
print("Processing GSEA features")
for (i in GSEAterm) {
load(paste0(datapath, "/gr/GSEA", i, ".rda"))
allgr <- gr
for (sgrn in names(allgr)) {
gr <- allgr[[sgrn]]
if (GSEAstarttype == "TSSup") {
grstart <- ifelse(as.character(strand(gr)) ==
"+", start(gr) - as.numeric(GSEAstartbp),
end(gr) + as.numeric(GSEAstartbp))
}
else if (GSEAstarttype == "TSSdown") {
grstart <- ifelse(as.character(strand(gr)) ==
"+", start(gr) + as.numeric(GSEAstartbp),
end(gr) - as.numeric(GSEAstartbp))
}
else if (GSEAstarttype == "TESup") {
grstart <- ifelse(as.character(strand(gr)) ==
"+", end(gr) - as.numeric(GSEAstartbp), start(gr) +
as.numeric(GSEAstartbp))
}
else if (GSEAstarttype == "TESdown") {
grstart <- ifelse(as.character(strand(gr)) ==
"+", end(gr) + as.numeric(GSEAstartbp), start(gr) -
as.numeric(GSEAstartbp))
}
if (GSEAendtype == "TSSup") {
grend <- ifelse(as.character(strand(gr)) ==
"+", start(gr) - as.numeric(GSEAendbp), end(gr) +
as.numeric(GSEAendbp))
}
else if (GSEAendtype == "TSSdown") {
grend <- ifelse(as.character(strand(gr)) ==
"+", start(gr) + as.numeric(GSEAendbp), end(gr) -
as.numeric(GSEAendbp))
}
else if (GSEAendtype == "TESup") {
grend <- ifelse(as.character(strand(gr)) ==
"+", end(gr) - as.numeric(GSEAendbp), start(gr) +
as.numeric(GSEAendbp))
}
else if (GSEAendtype == "TESdown") {
grend <- ifelse(as.character(strand(gr)) ==
"+", end(gr) + as.numeric(GSEAendbp), start(gr) -
as.numeric(GSEAendbp))
}
ngr <- names(gr)
gr <- GRanges(seqnames = seqnames(gr), IRanges(start = pmin(grstart,
grend), end = pmax(grstart, grend)))
names(gr) <- ngr
allgr[[sgrn]] <- gr
}
gr <- allgr
tmp <- sapply(bamdata, function(i) countOverlaps(gr,
i))
tmp <- sweep(tmp, 2, bamsummary, "/") * 10000
if (log2transform) {
tmp <- log2(tmp + 1)
}
if (adjustlen) {
grrange <- sapply(gr, function(i) sum(end(i) -
start(i) + 1))
tmp <- sweep(tmp, 1, grrange, "/") * 1e+06
}
tmp <- tmp[rowSums(tmp) > 0, , drop = F]
allres <- rbind(allres, tmp)
}
}
if ("Custom" %in% featurelist) {
print("Processing custom features")
gr <- read.table(customfeature, as.is = T, sep = "\t")
gr <- GRanges(seqnames = gr[, 1], IRanges(start = gr[,
2], end = gr[, 3]))
tmp <- sapply(bamdata, function(i) countOverlaps(gr,
i))
tmp <- sweep(tmp, 2, bamsummary, "/") * 10000
if (log2transform) {
tmp <- log2(tmp + 1)
}
if (adjustlen) {
grrange <- end(gr) - start(gr) + 1
tmp <- sweep(tmp, 1, grrange, "/") * 1e+06
}
tmp <- tmp[rowSums(tmp) > 0, , drop = F]
allres <- rbind(allres, tmp)
}
allres
}
df_out <- SCRATsummary(dir = "../../input/sc-bams_nodup/",
genome = "hg19",
featurelist="MOTIF_JASPAR",
log2transform = FALSE, adjustlen = FALSE)
Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chr1_gl000192_random, chr7_gl000195_random, chr8_gl000197_random, chr9_gl000201_random, chrUn_gl000220, chrUn_gl000240 - in 'y': chrY Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chr7_gl000195_random, chr17_gl000205_random, chr19_gl000208_random, chrUn_gl000220, chrUn_gl000222 - in 'y': chrY Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chr1_gl000192_random, chr17_gl000205_random, chrUn_gl000220, chrUn_gl000223 - in 'y': chrY Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chr7_gl000195_random, chrUn_gl000219 - in 'y': chrY Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chr17_gl000205_random - in 'y': chrY Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chrUn_gl000220 - in 'y': chrY Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chr1_gl000191_random, chrUn_gl000220 - in 'y': chrY Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chr7_gl000195_random, chrUn_gl000215, chrUn_gl000220 - in 'y': chrY Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chrUn_gl000216, chrUn_gl000225 - in 'y': chrY Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chr17_gl000205_random, chrUn_gl000220, chrUn_gl000225 - in 'y': chrY Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chrUn_gl000220, chrUn_gl000221, chrUn_gl000223 - in 'y': chrY Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chr17_gl000205_random, chrUn_gl000220, chrUn_gl000239 - in 'y': chrY Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chr1_gl000192_random, chr17_gl000205_random, chrUn_gl000220 - in 'y': chrY Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chrUn_gl000220, chrUn_gl000228 - in 'y': chrY Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chr7_gl000195_random, chr17_gl000205_random, chr17_gl000206_random, chrUn_gl000220 - in 'y': chrY Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chr17_gl000205_random - in 'y': chr21, chrY Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chrUn_gl000220 - in 'y': chrY Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chr7_gl000195_random, chrUn_gl000219, chrUn_gl000220 - in 'y': chr15, chrY Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chrUn_gl000220 - in 'y': chrY Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chr17_gl000205_random - in 'y': chrY Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chr17_gl000205_random, chrUn_gl000220, chrUn_gl000223, chrUn_gl000225 - in 'y': chrY Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chr17_gl000205_random, chrUn_gl000220 - in 'y': chrY Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chrUn_gl000220 - in 'y': chrY Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chr7_gl000195_random, chr11_gl000202_random, chrUn_gl000220 - in 'y': chrY Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chr17_gl000205_random, chrUn_gl000220, chrUn_gl000225 - in 'y': chrY Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chr17_gl000205_random, chrUn_gl000214, chrUn_gl000220 - in 'y': chrY Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chr7_gl000195_random, chrUn_gl000220, chrUn_gl000242 - in 'y': chrY Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chrUn_gl000220 - in 'y': chr18 Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chrUn_gl000220 - in 'y': chrY Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chr7_gl000195_random, chrUn_gl000220, chrUn_gl000225 - in 'y': chrY Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chrUn_gl000220 - in 'y': chr14, chrY Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chr7_gl000195_random, chr9_gl000199_random, chr17_gl000205_random, chrUn_gl000212, chrUn_gl000219, chrUn_gl000220 - in 'y': chrY Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”
[1] "Processing MOTIF_JASPAR features"
Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chrY - in 'y': chrM, chr1_gl000192_random, chr7_gl000195_random, chr8_gl000197_random, chr9_gl000201_random, chrUn_gl000220, chrUn_gl000240 Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chrY - in 'y': chrM, chr7_gl000195_random, chr17_gl000205_random, chr19_gl000208_random, chrUn_gl000220, chrUn_gl000222 Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chrY - in 'y': chrM, chr1_gl000192_random, chr17_gl000205_random, chrUn_gl000220, chrUn_gl000223 Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chrY - in 'y': chrM, chr7_gl000195_random, chrUn_gl000219 Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chrY - in 'y': chrM, chr17_gl000205_random Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chrY - in 'y': chrM, chrUn_gl000220 Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chrY - in 'y': chrM, chr1_gl000191_random, chrUn_gl000220 Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chrY - in 'y': chrM, chr7_gl000195_random, chrUn_gl000215, chrUn_gl000220 Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chrY - in 'y': chrM, chrUn_gl000216, chrUn_gl000225 Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chrY - in 'y': chrM, chr17_gl000205_random, chrUn_gl000220, chrUn_gl000225 Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chrY - in 'y': chrM, chrUn_gl000220, chrUn_gl000221, chrUn_gl000223 Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chrY - in 'y': chrM, chr17_gl000205_random, chrUn_gl000220, chrUn_gl000239 Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chrY - in 'y': chrM, chr1_gl000192_random, chr17_gl000205_random, chrUn_gl000220 Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chrY - in 'y': chrM, chrUn_gl000220, chrUn_gl000228 Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chrY - in 'y': chrM, chr7_gl000195_random, chr17_gl000205_random, chr17_gl000206_random, chrUn_gl000220 Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chrY - in 'y': chrM Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chr21, chrY - in 'y': chrM, chr17_gl000205_random Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chrY - in 'y': chrM Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chrY - in 'y': chrM, chrUn_gl000220 Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chr15, chrY - in 'y': chrM, chr7_gl000195_random, chrUn_gl000219, chrUn_gl000220 Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chrY - in 'y': chrM, chrUn_gl000220 Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chrY - in 'y': chrM, chr17_gl000205_random Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chrY - in 'y': chrM, chr17_gl000205_random, chrUn_gl000220, chrUn_gl000223, chrUn_gl000225 Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chrY - in 'y': chrM, chr17_gl000205_random, chrUn_gl000220 Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chrY - in 'y': chrM, chrUn_gl000220 Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chrY - in 'y': chrM, chr7_gl000195_random, chr11_gl000202_random, chrUn_gl000220 Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chrY - in 'y': chrM, chr17_gl000205_random, chrUn_gl000220, chrUn_gl000225 Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chrY - in 'y': chrM, chr17_gl000205_random, chrUn_gl000214, chrUn_gl000220 Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chrY - in 'y': chrM, chr7_gl000195_random, chrUn_gl000220, chrUn_gl000242 Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chr18 - in 'y': chrM, chrUn_gl000220 Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chr18, chrY - in 'y': chrM Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chrX, chrY - in 'y': chrM Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chr20, chrX, chrY - in 'y': chrM Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chrY - in 'y': chrM, chrUn_gl000220 Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chr15 - in 'y': chrM Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chrY - in 'y': chrM, chr7_gl000195_random, chrUn_gl000220, chrUn_gl000225 Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chr14, chrY - in 'y': chrM, chrUn_gl000220 Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y): “Each of the 2 combined objects has sequence levels not in the other: - in 'x': chrY - in 'y': chrM, chr7_gl000195_random, chr9_gl000199_random, chr17_gl000205_random, chrUn_gl000212, chrUn_gl000219, chrUn_gl000220 Make sure to always combine/compare objects based on the same reference genome (use suppressWarnings() to suppress this warning).”
end_time <- Sys.time()
end_time - start_time
Time difference of 6.359369 hours
dim(df_out)
df_out[1:5,1:5]
BM1077-CLP-Frozen-160106-13.dedup.st.bam | BM1077-CLP-Frozen-160106-14.dedup.st.bam | BM1077-CLP-Frozen-160106-2.dedup.st.bam | BM1077-CLP-Frozen-160106-21.dedup.st.bam | BM1077-CLP-Frozen-160106-27.dedup.st.bam | |
---|---|---|---|---|---|
MOTIF:MA0002.2:RUNX1 | 1872.6592 | 1327.8576 | 1626.8382 | 1472.3032 | 2131.500 |
MOTIF:MA0003.3:TFAP2A | 3489.9557 | 2505.1335 | 3281.2500 | 2736.8805 | 3790.288 |
MOTIF:MA0004.1:Arnt | 1467.4838 | 1115.6742 | 1484.3750 | 1169.8251 | 1443.919 |
MOTIF:MA0006.1:Ahr::Arnt | 2659.1760 | 2060.2327 | 2693.0147 | 2099.1254 | 2819.080 |
MOTIF:MA0007.3:Ar | 146.4079 | 102.6694 | 119.4853 | 116.6181 | 124.624 |
colnames(df_out) = sapply(strsplit(colnames(df_out), "\\."),'[',1)
dim(df_out)
df_out[1:5,1:5]
BM1077-CLP-Frozen-160106-13 | BM1077-CLP-Frozen-160106-14 | BM1077-CLP-Frozen-160106-2 | BM1077-CLP-Frozen-160106-21 | BM1077-CLP-Frozen-160106-27 | |
---|---|---|---|---|---|
MOTIF:MA0002.2:RUNX1 | 1872.6592 | 1327.8576 | 1626.8382 | 1472.3032 | 2131.500 |
MOTIF:MA0003.3:TFAP2A | 3489.9557 | 2505.1335 | 3281.2500 | 2736.8805 | 3790.288 |
MOTIF:MA0004.1:Arnt | 1467.4838 | 1115.6742 | 1484.3750 | 1169.8251 | 1443.919 |
MOTIF:MA0006.1:Ahr::Arnt | 2659.1760 | 2060.2327 | 2693.0147 | 2099.1254 | 2819.080 |
MOTIF:MA0007.3:Ar | 146.4079 | 102.6694 | 119.4853 | 116.6181 | 124.624 |
if(! all(colnames(df_out) == rownames(metadata))){
df_out = df_out[,rownames(metadata)]
dim(df_out)
df_out[1:5,1:5]
}
dim(df_out)
df_out[1:5,1:5]
BM1077-CLP-Frozen-160106-13 | BM1077-CLP-Frozen-160106-14 | BM1077-CLP-Frozen-160106-2 | BM1077-CLP-Frozen-160106-21 | BM1077-CLP-Frozen-160106-27 | |
---|---|---|---|---|---|
MOTIF:MA0002.2:RUNX1 | 1872.6592 | 1327.8576 | 1626.8382 | 1472.3032 | 2131.500 |
MOTIF:MA0003.3:TFAP2A | 3489.9557 | 2505.1335 | 3281.2500 | 2736.8805 | 3790.288 |
MOTIF:MA0004.1:Arnt | 1467.4838 | 1115.6742 | 1484.3750 | 1169.8251 | 1443.919 |
MOTIF:MA0006.1:Ahr::Arnt | 2659.1760 | 2060.2327 | 2693.0147 | 2099.1254 | 2819.080 |
MOTIF:MA0007.3:Ar | 146.4079 | 102.6694 | 119.4853 | 116.6181 | 124.624 |
saveRDS(df_out, file = '../../output/feature_matrices/FM_SCRAT_buenrostro2018_motifs.rds')
sessionInfo()
R version 3.5.1 (2018-07-02) Platform: x86_64-conda_cos6-linux-gnu (64-bit) Running under: CentOS Linux 7 (Core) Matrix products: default BLAS/LAPACK: /data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_SCRAT/lib/R/lib/libRblas.so locale: [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 [7] LC_PAPER=en_US.UTF-8 LC_NAME=C [9] LC_ADDRESS=C LC_TELEPHONE=C [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C attached base packages: [1] stats4 parallel stats graphics grDevices utils datasets [8] methods base other attached packages: [1] SCRAT_0.99.0 SCRATdatahg19_0.99.1 [3] GenomicAlignments_1.18.1 Rsamtools_1.34.0 [5] Biostrings_2.50.2 XVector_0.22.0 [7] SummarizedExperiment_1.12.0 DelayedArray_0.8.0 [9] BiocParallel_1.16.6 matrixStats_0.54.0 [11] Biobase_2.42.0 GenomicRanges_1.34.0 [13] GenomeInfoDb_1.18.1 IRanges_2.16.0 [15] S4Vectors_0.20.1 BiocGenerics_0.28.0 [17] usethis_1.5.0 devtools_2.0.2 loaded via a namespace (and not attached): [1] tsne_0.1-3 bitops_1.0-6 fs_1.2.7 [4] RColorBrewer_1.1-2 rprojroot_1.3-2 repr_0.19.2 [7] tools_3.5.1 backports_1.1.4 R6_2.4.0 [10] DT_0.5 KernSmooth_2.23-15 lazyeval_0.2.2 [13] colorspace_1.4-1 withr_2.1.2 tidyselect_0.2.5 [16] prettyunits_1.0.2 processx_3.3.0 compiler_3.5.1 [19] cli_1.1.0 desc_1.2.0 caTools_1.17.1.2 [22] scales_1.0.0 callr_3.2.0 pbdZMQ_0.3-3 [25] stringr_1.4.0 digest_0.6.18 shinyBS_0.61 [28] scatterD3_0.9 dbscan_1.1-3 base64enc_0.1-3 [31] pkgconfig_2.0.2 htmltools_0.3.6 sessioninfo_1.1.1 [34] htmlwidgets_1.3 rlang_0.3.4 shiny_1.3.2 [37] jsonlite_1.6 mclust_5.4.3 gtools_3.8.1 [40] dplyr_0.8.0.1 RCurl_1.95-4.11 magrittr_1.5 [43] GenomeInfoDbData_1.2.0 Matrix_1.2-17 Rcpp_1.0.1 [46] IRkernel_0.8.15 munsell_0.5.0 stringi_1.4.3 [49] zlibbioc_1.28.0 pkgbuild_1.0.3 gplots_3.0.1.1 [52] plyr_1.8.4 grid_3.5.1 gdata_2.18.0 [55] promises_1.0.1 crayon_1.3.4 lattice_0.20-38 [58] IRdisplay_0.7.0 ps_1.3.0 pillar_1.3.1 [61] uuid_0.1-2 reshape2_1.4.3 pkgload_1.0.2 [64] glue_1.3.1 evaluate_0.13 remotes_2.0.4 [67] httpuv_1.5.1 gtable_0.3.0 purrr_0.3.2 [70] assertthat_0.2.1 ggplot2_3.1.1 mime_0.6 [73] xtable_1.8-4 later_0.8.0 tibble_2.1.1 [76] pheatmap_1.0.12 memoise_1.1.0 ellipse_0.4.1
save.image(file = 'SCRAT_buenrostro2018.RData')