# if (!requireNamespace("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
# BiocManager::install("cicero")
library(cicero)
library(data.table)
library(Matrix)
library(proxy)
library(reshape2)
library(BuenColors)
library(umap)
Loading required package: monocle Loading required package: Matrix Loading required package: Biobase Loading required package: BiocGenerics Loading required package: parallel Attaching package: ‘BiocGenerics’ The following objects are masked from ‘package:parallel’: clusterApply, clusterApplyLB, clusterCall, clusterEvalQ, clusterExport, clusterMap, parApply, parCapply, parLapply, parLapplyLB, parRapply, parSapply, parSapplyLB The following objects are masked from ‘package:Matrix’: colMeans, colSums, rowMeans, rowSums, which The following objects are masked from ‘package:stats’: IQR, mad, sd, var, xtabs The following objects are masked from ‘package:base’: anyDuplicated, append, as.data.frame, basename, cbind, colMeans, colnames, colSums, dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep, grepl, intersect, is.unsorted, lapply, lengths, Map, mapply, match, mget, order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce, rowMeans, rownames, rowSums, sapply, setdiff, sort, table, tapply, union, unique, unsplit, which, which.max, which.min Welcome to Bioconductor Vignettes contain introductory material; view with 'browseVignettes()'. To cite Bioconductor, see 'citation("Biobase")', and for packages 'citation("pkgname")'. Loading required package: ggplot2 Loading required package: VGAM Loading required package: stats4 Loading required package: splines Loading required package: DDRTree Loading required package: irlba Loading required package: Gviz Loading required package: S4Vectors Attaching package: ‘S4Vectors’ The following object is masked from ‘package:Matrix’: expand The following object is masked from ‘package:base’: expand.grid Loading required package: IRanges Loading required package: GenomicRanges Loading required package: GenomeInfoDb Loading required package: grid Attaching package: 'data.table' The following object is masked from 'package:GenomicRanges': shift The following object is masked from 'package:IRanges': shift The following objects are masked from 'package:S4Vectors': first, second Attaching package: 'proxy' The following object is masked from 'package:IRanges': as.matrix The following object is masked from 'package:S4Vectors': as.matrix The following object is masked from 'package:Matrix': as.matrix The following objects are masked from 'package:stats': as.dist, dist The following object is masked from 'package:base': as.matrix Attaching package: 'reshape2' The following objects are masked from 'package:data.table': dcast, melt Loading required package: MASS
load('../../run_methods/Cicero/Cicero_buenrostro2018.RData')
dim(datafr)
dim(fm_Cicero)
fm_Cicero[1:5,1:5]
BM1077-CLP-Frozen-160106-13 | BM1077-CLP-Frozen-160106-14 | BM1077-CLP-Frozen-160106-2 | BM1077-CLP-Frozen-160106-21 | BM1077-CLP-Frozen-160106-27 | |
---|---|---|---|---|---|
A2LD1 | 0.000000000 | 0.000000000 | 0.000000000 | 0.000000000 | 0 |
A4GALT | 0.000000000 | 0.000000000 | 0.000000000 | 0.000000000 | 0 |
AAAS | 0.000000000 | 0.001337961 | 0.000000000 | 0.000000000 | 0 |
AACS | 0.001042821 | 0.000000000 | 0.001407443 | 0.006506052 | 0 |
AADAT | 0.000000000 | 0.000000000 | 0.000000000 | 0.000000000 | 0 |
pd <- new("AnnotatedDataFrame", data = data.frame(label=metadata[colnames(fm_Cicero),'label'],row.names =colnames(fm_Cicero)))
fd <- new("AnnotatedDataFrame", data = data.frame(row.names =rownames(fm_Cicero)))
cds <- newCellDataSet(fm_Cicero, phenoData = pd, featureData = fd, expressionFamily = tobit())
Warning message in newCellDataSet(fm_Cicero, phenoData = pd, featureData = fd, expressionFamily = tobit()): "Warning: featureData must contain a column verbatim named 'gene_short_name' for certain functions"Warning message in newCellDataSet(fm_Cicero, phenoData = pd, featureData = fd, expressionFamily = tobit()): "Warning: featureData must contain a column verbatim named 'gene_short_name' for certain functions"Warning message in newCellDataSet(fm_Cicero, phenoData = pd, featureData = fd, expressionFamily = tobit()): "Warning: featureData must contain a column verbatim named 'gene_short_name' for certain functions"
cds <- detectGenes(cds)
cds <- estimateSizeFactors(cds)
# cds <- estimateDispersions(cds)
cds <- reduceDimension(cds, max_components = 2, num_dim = 15,reduction_method = 'tSNE', verbose = T)
Warning message in if (cds@expressionFamily@vfamily %in% c("negbinomial", "negbinomial.size")) {: "the condition has length > 1 and only the first element will be used"Warning message in if (cds@expressionFamily@vfamily == "binomialff") {: "the condition has length > 1 and only the first element will be used"Warning message in if (cds@expressionFamily@vfamily == "Tobit") {: "the condition has length > 1 and only the first element will be used"Warning message in if (cds@expressionFamily@vfamily == "uninormal") {: "the condition has length > 1 and only the first element will be used"Remove noise by PCA ... Reduce dimension by tSNE ...
cds <- clusterCells(cds,num_clusters = length(unique(metadata$label)),method = 'densityPeak')
Distance cutoff calculated to 3.104757
plot_cell_clusters(cds, 1, 2, color = "label")
plot_cell_clusters(cds, 1, 2, color = "Cluster")
df_out = data.frame(Cicero = cds$Cluster, row.names = sampleNames(phenoData(cds)))
all(rownames(df_out) == rownames(metadata))
head(df_out)
Cicero | |
---|---|
BM1077-CLP-Frozen-160106-13 | 4 |
BM1077-CLP-Frozen-160106-14 | 9 |
BM1077-CLP-Frozen-160106-2 | 3 |
BM1077-CLP-Frozen-160106-21 | 4 |
BM1077-CLP-Frozen-160106-27 | 4 |
BM1077-CLP-Frozen-160106-3 | 3 |
write.table(df_out, file='clusteringSolution.tsv', quote=FALSE, sep='\t', col.names = NA)