options(stringsAsFactors = FALSE)
library(GenomicRanges)
library(scABC)
library(Rsamtools)
library(data.table)
library(dplyr)
library(tidyverse)
library(Matrix)
library(gplots)
library(RColorBrewer)
library(devtools)
source_url("https://raw.githubusercontent.com/obigriffith/biostar-tutorials/master/Heatmaps/heatmap.3.R")
Loading required package: stats4 Loading required package: BiocGenerics Loading required package: parallel Attaching package: ‘BiocGenerics’ The following objects are masked from ‘package:parallel’: clusterApply, clusterApplyLB, clusterCall, clusterEvalQ, clusterExport, clusterMap, parApply, parCapply, parLapply, parLapplyLB, parRapply, parSapply, parSapplyLB The following objects are masked from ‘package:stats’: IQR, mad, sd, var, xtabs The following objects are masked from ‘package:base’: anyDuplicated, append, as.data.frame, basename, cbind, colMeans, colnames, colSums, dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep, grepl, intersect, is.unsorted, lapply, lengths, Map, mapply, match, mget, order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce, rowMeans, rownames, rowSums, sapply, setdiff, sort, table, tapply, union, unique, unsplit, which, which.max, which.min Loading required package: S4Vectors Attaching package: ‘S4Vectors’ The following object is masked from ‘package:base’: expand.grid Loading required package: IRanges Loading required package: GenomeInfoDb Warning message: “replacing previous import ‘IRanges::which’ by ‘Matrix::which’ when loading ‘scABC’”Loading required package: Biostrings Loading required package: XVector Attaching package: ‘Biostrings’ The following object is masked from ‘package:base’: strsplit Attaching package: ‘data.table’ The following object is masked from ‘package:GenomicRanges’: shift The following object is masked from ‘package:IRanges’: shift The following objects are masked from ‘package:S4Vectors’: first, second Attaching package: ‘dplyr’ The following objects are masked from ‘package:data.table’: between, first, last The following objects are masked from ‘package:Biostrings’: collapse, intersect, setdiff, setequal, union The following object is masked from ‘package:XVector’: slice The following objects are masked from ‘package:GenomicRanges’: intersect, setdiff, union The following object is masked from ‘package:GenomeInfoDb’: intersect The following objects are masked from ‘package:IRanges’: collapse, desc, intersect, setdiff, slice, union The following objects are masked from ‘package:S4Vectors’: first, intersect, rename, setdiff, setequal, union The following objects are masked from ‘package:BiocGenerics’: combine, intersect, setdiff, union The following objects are masked from ‘package:stats’: filter, lag The following objects are masked from ‘package:base’: intersect, setdiff, setequal, union ── Attaching packages ─────────────────────────────────────── tidyverse 1.2.1 ── ✔ ggplot2 3.1.0 ✔ readr 1.3.1 ✔ tibble 2.1.1 ✔ purrr 0.3.2 ✔ tidyr 0.8.3 ✔ stringr 1.4.0 ✔ ggplot2 3.1.0 ✔ forcats 0.4.0 ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ── ✖ dplyr::between() masks data.table::between() ✖ dplyr::collapse() masks Biostrings::collapse(), IRanges::collapse() ✖ dplyr::combine() masks BiocGenerics::combine() ✖ purrr::compact() masks XVector::compact() ✖ dplyr::desc() masks IRanges::desc() ✖ tidyr::expand() masks S4Vectors::expand() ✖ dplyr::filter() masks stats::filter() ✖ dplyr::first() masks data.table::first(), S4Vectors::first() ✖ dplyr::lag() masks stats::lag() ✖ dplyr::last() masks data.table::last() ✖ ggplot2::Position() masks BiocGenerics::Position(), base::Position() ✖ purrr::reduce() masks GenomicRanges::reduce(), IRanges::reduce() ✖ dplyr::rename() masks S4Vectors::rename() ✖ dplyr::slice() masks XVector::slice(), IRanges::slice() ✖ purrr::transpose() masks data.table::transpose() Attaching package: ‘Matrix’ The following object is masked from ‘package:tidyr’: expand The following object is masked from ‘package:S4Vectors’: expand Attaching package: ‘gplots’ The following object is masked from ‘package:IRanges’: space The following object is masked from ‘package:S4Vectors’: space The following object is masked from ‘package:stats’: lowess SHA-1 hash of file is 015fc0457e61e3e93a903e69a24d96d2dac7b9fb
bam2gr <- function(bamfile, PAIRED = FALSE) {
if (PAIRED) {
scanned = scanBam(bamfile, param = ScanBamParam(flag =scanBamFlag(isMinusStrand = FALSE,
isUnmappedQuery = FALSE,
isProperPair = TRUE),
what = c("rname", "pos", "isize")))[[1]]
out = GRanges(seqnames = scanned$rname, IRanges(start = scanned$pos, width = scanned$isize))
} else {
scanned = scanBam(bamfile, param = ScanBamParam(flag =scanBamFlag(isUnmappedQuery = FALSE),
what = c("rname", "pos", "strand", "qwidth")))[[1]]
out = GRanges(seqnames = scanned$rname,
IRanges(start = ifelse(scanned$strand == "-", scanned$pos + scanned$qwidth - 1, scanned$pos),
width = scanned$qwidth))
}
return(out)
}
getCountsByReadGroup <- function(bamfile, peaks, RGtag, tags = NULL, PAIRED = FALSE, VERBOSE = FALSE){
scanned <- Rsamtools::scanBam(bamfile,
param = Rsamtools::ScanBamParam(flag =scanBamFlag(isUnmappedQuery = FALSE),
what = c("rname", "pos", "strand", "qwidth"),
tag = RGtag))[[1]]
if(is.null(tags)){
RGtags = unique(unlist(scanned$tag[RGtag]))
}
counts_mat = Matrix::Matrix(0, nrow = length(peaks), ncol = length(tags), sparse = TRUE)
for(i in 1:length(tags)){
tag = tags[i]
if(VERBOSE){
message("Processing tag ", tag)
}
match_RG <- which(scanned$tag[[RGtag]] == tag)
# convert bamfiles to Genomic Ranges
bam.gr = GenomicRanges::GRanges(seqnames = scanned$rname[match_RG],
IRanges::IRanges(start = sapply(match_RG, function(i) ifelse(scanned$strand[i] == "-",
scanned$pos[i] + scanned$qwidth[i] - 1,
scanned$pos[i])),
width = scanned$qwidth[match_RG]))
counts_mat[,i] = GenomicRanges::countOverlaps(peaks, bam.gr, type = "any", ignore.strand = TRUE)
}
# counts = do.call(cbind, lapply(RGtags, function(x) getTagCounts(x, bamfile, peaks)))
colnames(counts_mat) = tags
return(counts_mat)
}
peaks2GRanges <- function(peaks, upstream = 0, downstream = 0){
peaks.gr = with(peaks, GenomicRanges::GRanges(chrom, IRanges::IRanges(sapply(start, function(x) max(0, x - upstream)), end + downstream),
id = name))#, pVal = pValue))
}
# peaks should be in GenomicRanges
get_counts_from_bam <- function(bamfile, peaks){
param = Rsamtools::ScanBamParam(flag =scanBamFlag(isUnmappedQuery = FALSE),
which = peaks,
what = c("rname", "pos", "strand", "qwidth"))
counts = Rsamtools::countBam(bamfile, param = param,
flag = Rsamtools::scanBamFlag(isDuplicate = FALSE,
isUnmappedQuery = FALSE))
return(counts[,c("space", "start", "end", "file", "records")])
}
getCountsMatrix <- function(bamfiles, peaks, PAIRED = FALSE,
byReadGroup = FALSE,
RGtag = 'RG',
tags2include = NULL,
VERBOSE = FALSE){
peaks.gr = peaks2GRanges(peaks)
if(VERBOSE){
message("beginning reading in counts\n")
}
if(byReadGroup){
if(VERBOSE){
message("getting counts by read group\n")
message("read group tag = ", RGtag, "\n")
}
stopifnot(length(bamfiles) == 1)
counts_mat = getCountsByReadGroup(bamfiles, peaks.gr, RGtag = RGtag,
tags = tags2include);
rownames(counts_mat) = peaks$name
}
else{
nCells = length(bamfiles)
counts_mat = matrix(nrow = length(peaks.gr), ncol = nCells)
for(i in 1:nCells){
if(VERBOSE){
message("Processing file ", bamfiles[i])
}
# convert bamfiles to Genomic Ranges
bam.gr = bam2gr(bamfiles[i], PAIRED = PAIRED)
counts_mat[,i] = countOverlaps(peaks.gr, bam.gr, type = "any", ignore.strand = TRUE)
}
colnames(counts_mat) = bamfiles
rownames(counts_mat) = peaks$name
#counts_info = data.frame(chrom = counts_list[[1]]$space, start = counts_list[[1]]$start, end = counts_list[[1]]$end, name = peaks$id, pValue = peaks$pVal)
}
peaks = peaks[,c("chrom", "start", "end", "name")]#, "pValue")]
return(list(peaks = peaks, ForeGroundMatrix = Matrix::Matrix(counts_mat, sparse = TRUE)))
}
sort_peaks <- function(peaks){
return(peaks[order(peaks$chrom, peaks$start), ])
}
selectPeaks <- function(filename, thresh = 2){
peaks = read.table(file = filename, header = FALSE, sep = "\t",
stringsAsFactors = FALSE);
if(dim(peaks)[2] == 15){
# gapped peaks
column_names = c("chrom", "start", "end", "name", "score", "strand",
"thickStart", "thickEnd", "itemRgb", "blockCount", "blockSizes",
"blockStarts", "signalValue", "pValue", "qValue");
colnames(peaks) = column_names
wanted_peaks = which(peaks$pValue > thresh); # pValue is -log10(p), p < 0.1 => pValue > 2
peaks = sort_peaks(peaks[wanted_peaks, ])
}
if(dim(peaks)[2] == 10){
# narrow peaks
column_names = c("chrom", "start", "end", "name", "score", "strand",
"foldChange", "pValue", "qValue", "summit2PeakDist")
colnames(peaks) = column_names
wanted_peaks = which(peaks$pValue > thresh); # pValue is -log10(p), p < 0.1 => pValue > 2
peaks = sort_peaks(peaks[wanted_peaks, ])
}
# mine
if(dim(peaks)[2] == 3){
# gapped peaks
column_names = c("chrom", "start", "end");
colnames(peaks) = column_names
peaks = sort_peaks(peaks)
}
return(peaks)
}
getBackground <- function(bamfiles, peaks, upstream = 500000,
downstream = 500000, byReadGroup = FALSE,
VERBOSE = FALSE, PAIRED = FALSE){
nCells = length(bamfiles)
background_peaks.gr = peaks2GRanges(peaks, upstream, downstream)
if(byReadGroup){
counts_mat = getCountsByReadGroup2(bamfile, background_peaks.gr);
rownames(counts_mat) = peaks$name
}
else{
counts_mat = matrix(nrow = length(background_peaks.gr), ncol = nCells)
for(i in 1:nCells){
if(VERBOSE){
message("Processing file ", bamfiles[i])
}
# convert bamfiles to Genomic Ranges
bam.gr = bam2gr(bamfiles[i], PAIRED = PAIRED)
counts_mat[,i] = countOverlaps(background_peaks.gr, bam.gr, type = "any", ignore.strand = TRUE)
}
colnames(counts_mat) = bamfiles
rownames(counts_mat) = peaks$name
#counts_info = data.frame(chrom = counts_list[[1]]$space, start = counts_list[[1]]$start, end = counts_list[[1]]$end, name = peaks$id, pValue = peaks$pVal)
}
peaks = peaks[,c("chrom", "start", "end", "name")]#, "pValue")]
return(list(peaks = peaks, BackGroundMatrix = Matrix::Matrix(counts_mat, sparse = TRUE)))
}
start_time <- Sys.time()
metadata <- read.table('../../input/metadata.tsv',
header = TRUE,
stringsAsFactors=FALSE,quote="",row.names=1)
# loading input 1: single-cell BAM files
bamfiles <- list.files("../../input/sc-bams_nodup/",
pattern = "*.bam", full.names = TRUE)
length(bamfiles)
# loading input 2: peaks file
peaks <- selectPeaks("../../input/combined.sorted.merged.bed")
peaks$name <- paste("Peak", peaks$chrom, peaks$start, peaks$end, sep = "_")
dim(peaks)
head(peaks)
chrom | start | end | name |
---|---|---|---|
chr1 | 3002715 | 3002962 | Peak_chr1_3002715_3002962 |
chr1 | 3037090 | 3037634 | Peak_chr1_3037090_3037634 |
chr1 | 3084622 | 3085850 | Peak_chr1_3084622_3085850 |
chr1 | 3103610 | 3104006 | Peak_chr1_3103610_3104006 |
chr1 | 3106869 | 3107182 | Peak_chr1_3106869_3107182 |
chr1 | 3109389 | 3111052 | Peak_chr1_3109389_3111052 |
ForeGround <- getCountsMatrix(bamfiles, peaks)
ForeGroundFiltered <- filterPeaks(ForeGround$ForeGroundMatrix, peaks,
nreads_thresh = 1,
ncells_thresh = length(bamfiles)*0.01)
dim(peaks)
dim(ForeGroundFiltered$peaks)
fm_scABC = as.matrix(ForeGroundFiltered$ForeGroundMatrix)
dim(fm_scABC)
fm_scABC[1:5,1:5]
../../input/sc-bams_nodup//BoneMarrow_62016.AGCGATAGAATACGATAATGGCAGCTCGCAGGACGT.header.bam | ../../input/sc-bams_nodup//BoneMarrow_62016.AGCGATAGAATATTACTTTCCGCGGACTGTACTGAC.header.bam | ../../input/sc-bams_nodup//BoneMarrow_62016.AGCGATAGACCAGGCGCATGGCAGCTCGATAGAGGC.header.bam | ../../input/sc-bams_nodup//BoneMarrow_62016.AGCGATAGAGATTACGTTGCGCAATGACGTACTGAC.header.bam | ../../input/sc-bams_nodup//BoneMarrow_62016.AGCGATAGAGGTCAGCTTGGAGTTGCGTGTACTGAC.header.bam | |
---|---|---|---|---|---|
Peak_chr1_3084622_3085850 | 0 | 0 | 0 | 0 | 0 |
Peak_chr1_3109389_3111052 | 0 | 0 | 0 | 0 | 0 |
Peak_chr1_3111327_3111863 | 0 | 0 | 0 | 0 | 0 |
Peak_chr1_3174934_3176608 | 0 | 0 | 0 | 0 | 0 |
Peak_chr1_3238831_3239934 | 0 | 0 | 0 | 0 | 0 |
end_time <- Sys.time()
end_time - start_time
Time difference of 48.47346 mins
all(sapply(strsplit(basename(colnames(fm_scABC)),'\\.'),'[', 2) == rownames(metadata))
colnames(fm_scABC) = sapply(strsplit(basename(colnames(fm_scABC)),'\\.'),'[', 2)
dim(fm_scABC)
fm_scABC[1:5,1:5]
AGCGATAGAATACGATAATGGCAGCTCGCAGGACGT | AGCGATAGAATATTACTTTCCGCGGACTGTACTGAC | AGCGATAGACCAGGCGCATGGCAGCTCGATAGAGGC | AGCGATAGAGATTACGTTGCGCAATGACGTACTGAC | AGCGATAGAGGTCAGCTTGGAGTTGCGTGTACTGAC | |
---|---|---|---|---|---|
Peak_chr1_3084622_3085850 | 0 | 0 | 0 | 0 | 0 |
Peak_chr1_3109389_3111052 | 0 | 0 | 0 | 0 | 0 |
Peak_chr1_3111327_3111863 | 0 | 0 | 0 | 0 | 0 |
Peak_chr1_3174934_3176608 | 0 | 0 | 0 | 0 | 0 |
Peak_chr1_3238831_3239934 | 0 | 0 | 0 | 0 | 0 |
fm_scABC = fm_scABC[,rownames(metadata)]
dim(fm_scABC)
fm_scABC[1:5,1:5]
TCCGCGAACTAACTAGGTTGCTACGGTCATAGAGGC | TCCGCGAAAGGTCAGCTTTGCGGATAGTGTACTGAC | ATTACTCGTTGCCGTAGGCTTAATCTTGTATAGCCT | TCCGCGAAACCAGGCGCAAAGCTAGGTTGTACTGAC | ATTCAGAATCGTAGCATCGCGCAATGACCCTATCCT | |
---|---|---|---|---|---|
Peak_chr1_3084622_3085850 | 0 | 0 | 0 | 0 | 0 |
Peak_chr1_3109389_3111052 | 0 | 0 | 0 | 0 | 0 |
Peak_chr1_3111327_3111863 | 0 | 0 | 0 | 0 | 0 |
Peak_chr1_3174934_3176608 | 0 | 0 | 0 | 0 | 0 |
Peak_chr1_3238831_3239934 | 0 | 0 | 0 | 0 | 0 |
all(colnames(fm_scABC) == rownames(metadata))
saveRDS(fm_scABC, file = '../../output/feature_matrices/FM_scABC_cusanovich2018subset.rds')
### Instead of normalizing the peaks-by-cells matrix,
### weights calculated below will be used in k-medoid clustering step based on cell dissimilarity matrix: WeightedCluster::wcKMedoids(FGdist, k = nCluster, weights = weights);
### so it's not considered as part of feature matrix construction
# BackGround <- getBackground(bamfiles, ForeGroundFiltered$peaks,
# upstream = 5e+05, downstream = 5e+05,
# PAIRED = FALSE, byReadGroup = FALSE)
# InSilicoForeGround = ForeGroundFiltered$ForeGroundMatrix
# InSilicoBackGround = BackGround$BackGroundMatrix
# InSilicoPeaks = ForeGroundFiltered$peaks
# fg.mat <- Matrix::Matrix(InSilicoForeGround, sparse = TRUE)
# bg.mat <- Matrix::Matrix(InSilicoBackGround, sparse = TRUE);
# weights = apply(bg.mat, 2, median);
# c = min(8, median(weights))
# # compute weighted k-medioids
# lambda = 1.0
# FGdist = 1 - sparseSpearmanCor(ForeGround);
# weights = 1/(1 + exp(-(weights - c)/(c*lambda)));
BackGround <- getBackground(bamfiles, ForeGroundFiltered$peaks,
upstream = 5e+05, downstream = 5e+05,
PAIRED = FALSE, byReadGroup = FALSE)
InSilicoForeGround = ForeGroundFiltered$ForeGroundMatrix
InSilicoBackGround = BackGround$BackGroundMatrix
InSilicoPeaks = ForeGroundFiltered$peaks
#compute the landmarks
InSilicoLandMarks = computeLandmarks(ForeGround = InSilicoForeGround,
BackGround = InSilicoBackGround,
nCluster = length(unique(metadata$label)), lambda = 1, nTop = 2000)
cor(InSilicoLandMarks, InSilicoLandMarks, method = 'spearman')
1.0000000 | 0.4502911 | 0.5828863 | 0.3910398 | 0.6133986 | 0.5850597 | 0.4262520 | 0.7529231 | 0.6787846 | 0.6137799 | 0.5689958 | 0.5148295 | 0.5075920 |
0.4502911 | 1.0000000 | 0.4783861 | 0.4091725 | 0.5072364 | 0.4641601 | 0.3939587 | 0.4716679 | 0.4326113 | 0.4031804 | 0.4728562 | 0.4416382 | 0.3808171 |
0.5828863 | 0.4783861 | 1.0000000 | 0.5317003 | 0.5680175 | 0.5619443 | 0.4188157 | 0.5798220 | 0.5326821 | 0.5050429 | 0.5248360 | 0.4879837 | 0.4476117 |
0.3910398 | 0.4091725 | 0.5317003 | 1.0000000 | 0.4158435 | 0.3906936 | 0.3107852 | 0.4064649 | 0.3731941 | 0.3641376 | 0.3708085 | 0.3555687 | 0.3026505 |
0.6133986 | 0.5072364 | 0.5680175 | 0.4158435 | 1.0000000 | 0.6023185 | 0.4439613 | 0.6484725 | 0.5950063 | 0.5283675 | 0.5981445 | 0.5088167 | 0.4748662 |
0.5850597 | 0.4641601 | 0.5619443 | 0.3906936 | 0.6023185 | 1.0000000 | 0.3999461 | 0.6076734 | 0.5590317 | 0.4948485 | 0.5069651 | 0.4620556 | 0.4338220 |
0.4262520 | 0.3939587 | 0.4188157 | 0.3107852 | 0.4439613 | 0.3999461 | 1.0000000 | 0.4469632 | 0.4155036 | 0.4120799 | 0.4797128 | 0.6801581 | 0.4932913 |
0.7529231 | 0.4716679 | 0.5798220 | 0.4064649 | 0.6484725 | 0.6076734 | 0.4469632 | 1.0000000 | 0.8028852 | 0.7095414 | 0.5667249 | 0.5378509 | 0.5003190 |
0.6787846 | 0.4326113 | 0.5326821 | 0.3731941 | 0.5950063 | 0.5590317 | 0.4155036 | 0.8028852 | 1.0000000 | 0.6829619 | 0.5261481 | 0.4867047 | 0.4652567 |
0.6137799 | 0.4031804 | 0.5050429 | 0.3641376 | 0.5283675 | 0.4948485 | 0.4120799 | 0.7095414 | 0.6829619 | 1.0000000 | 0.4900305 | 0.4790754 | 0.4470428 |
0.5689958 | 0.4728562 | 0.5248360 | 0.3708085 | 0.5981445 | 0.5069651 | 0.4797128 | 0.5667249 | 0.5261481 | 0.4900305 | 1.0000000 | 0.5370407 | 0.5077249 |
0.5148295 | 0.4416382 | 0.4879837 | 0.3555687 | 0.5088167 | 0.4620556 | 0.6801581 | 0.5378509 | 0.4867047 | 0.4790754 | 0.5370407 | 1.0000000 | 0.5389705 |
0.5075920 | 0.3808171 | 0.4476117 | 0.3026505 | 0.4748662 | 0.4338220 | 0.4932913 | 0.5003190 | 0.4652567 | 0.4470428 | 0.5077249 | 0.5389705 | 1.0000000 |
# assign cells to the closest landmark #
InSilicoLandMarkAssignments = assign2landmarks(InSilicoForeGround, InSilicoLandMarks)
dim(InSilicoLandMarks)
head(InSilicoLandMarks)
Peak_chr1_3084622_3085850 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Peak_chr1_3109389_3111052 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
Peak_chr1_3111327_3111863 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
Peak_chr1_3174934_3176608 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
Peak_chr1_3238831_3239934 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
Peak_chr1_3240564_3242233 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
InSilicoCell2LandmarkCorrelation = do.call(cbind,
lapply(seq(length(unique(metadata$label))),
function(i) apply(InSilicoForeGround, 2,
function(x) cor(x, InSilicoLandMarks[,i], method = 'spearman'))))
dim(InSilicoCell2LandmarkCorrelation)
InSilicoCell2LandmarkCorrelation[1:5,1:5]
InSilicoLandMarkAssignments_sorted <- InSilicoLandMarkAssignments[order(InSilicoLandMarkAssignments)]
InSilicoCell2LandmarkCorrelation_sorted <- InSilicoCell2LandmarkCorrelation[names(InSilicoLandMarkAssignments_sorted),]
head(InSilicoCell2LandmarkCorrelation_sorted)
../../input/sc-bams_nodup//BoneMarrow_62016.AGCGATAGAGATTACGTTGCGCAATGACGTACTGAC.header.bam | 0.15037825 | 0.08100556 | 0.09970500 | 0.07487623 | 0.10867135 | 0.10783935 | 0.08097096 | 0.12848284 | 0.12549406 | 0.10458567 | 0.10517637 | 0.09236745 | 0.09934552 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
../../input/sc-bams_nodup//BoneMarrow_62016.AGCGATAGAGTTGAATCAAAGCTAGGTTCCTATCCT.header.bam | 0.02254813 | 0.01167650 | 0.01717296 | 0.01222310 | 0.01361289 | 0.01319279 | 0.01381670 | 0.01980719 | 0.02092724 | 0.02139908 | 0.01926243 | 0.01614836 | 0.01441729 |
../../input/sc-bams_nodup//BoneMarrow_62016.AGCGATAGATGGCGCCTGCTTAATCTTGGTACTGAC.header.bam | 0.07982147 | 0.03168394 | 0.06199784 | 0.04273903 | 0.04101516 | 0.04706886 | 0.03759681 | 0.05680032 | 0.04660285 | 0.04887604 | 0.04069325 | 0.04219200 | 0.03981479 |
../../input/sc-bams_nodup//BoneMarrow_62016.AGCGATAGCCGCTAAGAGTCCGCGGACTATAGAGGC.header.bam | 0.09332764 | 0.05470010 | 0.06528551 | 0.06023007 | 0.07070460 | 0.06730417 | 0.04801937 | 0.08351262 | 0.07833204 | 0.07596945 | 0.05664343 | 0.05437833 | 0.05871448 |
../../input/sc-bams_nodup//BoneMarrow_62016.AGCGATAGCTAATTGCGATCCGCGGACTAGGCGAAG.header.bam | 0.13298426 | 0.07647489 | 0.08945609 | 0.06134983 | 0.09372020 | 0.09838101 | 0.07392224 | 0.11821980 | 0.11236638 | 0.10301981 | 0.08600142 | 0.08108829 | 0.07482789 |
../../input/sc-bams_nodup//BoneMarrow_62016.AGCGATAGCTCAATTAGTAAGCTAGGTTAGGCGAAG.header.bam | 0.19314251 | 0.11728222 | 0.13639938 | 0.10323081 | 0.16583521 | 0.14759743 | 0.11103057 | 0.18328654 | 0.16987117 | 0.15700288 | 0.14417240 | 0.13356430 | 0.12417066 |
cell.ids = sapply(strsplit(basename(names(InSilicoLandMarkAssignments_sorted)),'\\.'),'[', 2)
head(cell.ids)
cell.labels = metadata[cell.ids,'label']
cell.info = match(metadata[cell.ids,'label'],unique(metadata[,'label']))
head(cell.info)
scalered <- colorRampPalette(c("white", "red"), space = "rgb")(256)
rcols1 = colorRampPalette(brewer.pal(8, "Accent"))(length(unique(metadata$label)))
rowcols1 = rcols1[cell.info]
rcols2 = colorRampPalette(brewer.pal(8, "Dark2"))(length(unique(metadata$label)))
rowcols2 = rcols2[InSilicoLandMarkAssignments_sorted]
rowcols = rbind(rowcols1, rowcols2)
rownames(rowcols) = c("cell label", "cluster")
rowcols[,1:5]
cell label | #7FC97F | #7FC97F | #7FC97F | #7FC97F | #7FC97F |
---|---|---|---|---|---|
cluster | #1B9E77 | #1B9E77 | #1B9E77 | #1B9E77 | #1B9E77 |
heatmap.3(InSilicoCell2LandmarkCorrelation_sorted, dendrogram='none', Rowv=FALSE, Colv=FALSE,
trace='none', col = scalered, margin = c(5, 5), density.info = "none",
RowSideColors = rowcols, RowSideColorsSize=2, symm=F,symkey=F,
symbreaks=F, scale="none")
legend("bottomleft", legend = c(unique(cell.labels), paste0("cluster ", 1:length(unique(metadata$label)))),
col = c(rcols1, rcols2), border=FALSE, bty="n", y.intersp = 0.7, cex=0.7, pch = 15)
sessionInfo()
R version 3.5.1 (2018-07-02) Platform: x86_64-conda_cos6-linux-gnu (64-bit) Running under: CentOS Linux 7 (Core) Matrix products: default BLAS/LAPACK: /data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_scABC/lib/R/lib/libRblas.so locale: [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 [7] LC_PAPER=en_US.UTF-8 LC_NAME=C [9] LC_ADDRESS=C LC_TELEPHONE=C [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C attached base packages: [1] parallel stats4 stats graphics grDevices utils datasets [8] methods base other attached packages: [1] usethis_1.4.0 devtools_2.0.1 RColorBrewer_1.1-2 [4] gplots_3.0.1.1 Matrix_1.2-17 forcats_0.4.0 [7] stringr_1.4.0 purrr_0.3.2 readr_1.3.1 [10] tidyr_0.8.3 tibble_2.1.1 ggplot2_3.1.0 [13] tidyverse_1.2.1 dplyr_0.8.0.1 data.table_1.12.0 [16] Rsamtools_1.34.1 Biostrings_2.50.2 XVector_0.22.0 [19] scABC_0.99.0 GenomicRanges_1.34.0 GenomeInfoDb_1.18.2 [22] IRanges_2.16.0 S4Vectors_0.20.1 BiocGenerics_0.28.0 loaded via a namespace (and not attached): [1] nlme_3.1-137 fs_1.2.7 bitops_1.0-6 [4] lubridate_1.7.4 httr_1.4.0 rprojroot_1.3-2 [7] repr_0.19.2 tools_3.5.1 backports_1.1.3 [10] R6_2.4.0 rpart_4.1-13 KernSmooth_2.23-15 [13] Hmisc_4.2-0 lazyeval_0.2.2 colorspace_1.4-1 [16] nnet_7.3-12 withr_2.1.2 prettyunits_1.0.2 [19] processx_3.3.0 tidyselect_0.2.5 gridExtra_2.3 [22] curl_3.3 compiler_3.5.1 cli_1.1.0 [25] rvest_0.3.2 htmlTable_1.13.1 xml2_1.2.0 [28] desc_1.2.0 caTools_1.17.1.2 scales_1.0.0 [31] checkmate_1.9.1 callr_3.2.0 pbdZMQ_0.3-3 [34] digest_0.6.18 foreign_0.8-71 base64enc_0.1-3 [37] pkgconfig_2.0.2 htmltools_0.3.6 sessioninfo_1.1.1 [40] htmlwidgets_1.3 rlang_0.3.3 readxl_1.3.1 [43] rstudioapi_0.10 generics_0.0.2 jsonlite_1.6 [46] BiocParallel_1.16.6 gtools_3.8.1 acepack_1.4.1 [49] RCurl_1.95-4.12 magrittr_1.5 GenomeInfoDbData_1.2.0 [52] Formula_1.2-3 Rcpp_1.0.1 IRkernel_0.8.15 [55] munsell_0.5.0 stringi_1.4.3 zlibbioc_1.28.0 [58] pkgbuild_1.0.3 plyr_1.8.4 grid_3.5.1 [61] gdata_2.18.0 crayon_1.3.4 lattice_0.20-38 [64] IRdisplay_0.7.0 haven_2.1.0 splines_3.5.1 [67] TraMineR_2.0-11.1 hms_0.4.2 ps_1.3.0 [70] knitr_1.22 pillar_1.3.1 uuid_0.1-2 [73] boot_1.3-20 pkgload_1.0.2 glue_1.3.1 [76] evaluate_0.13 latticeExtra_0.6-28 remotes_2.0.2 [79] modelr_0.1.4 cellranger_1.1.0 gtable_0.3.0 [82] assertthat_0.2.1 xfun_0.6 broom_0.5.1 [85] survival_2.44-1.1 memoise_1.1.0 cluster_2.0.7-1 [88] WeightedCluster_1.4
save.image(file = 'scABC_cusanovich2018subset.RData')