# The notebook below can be run in Custom Environment with "schaluvadi/cross_tissue_analysis:v1.6" as Container image
# The Docker Container image includes all the packages required to run this notebook
# The original analysis was run with the R version, package version below
# However, most of them are not required to run this notebook
# R version 3.6.0
# Seurat_3.1.1
# Signac_0.1.6
# GenomeInfoDb_1.22.0
# ggplot2_3.3.0
# chromVAR_1.6.0
# motifmatchr_1.6.0
# JASPAR2020_0.99.8
# TFBSTools_1.25.1
# EnsDb.Hsapiens.v86_2.99.0
# BSgenome.Hsapiens.UCSC.hg38_1.4.1
# ComplexHeatmap_2.0.0
# RColorBrewer_1.1-2
# rtracklayer_1.46.0
# dplyr_0.8.4
# repr_1.0.1
# gridExtra_2.3
library(Signac)
library(Seurat)
library(GenomeInfoDb)
library(ggplot2)
library(ComplexHeatmap)
library(chromVAR)
library(motifmatchr)
library(RColorBrewer)
library(rtracklayer)
library(dplyr)
library(repr)
library(JASPAR2020)
library(TFBSTools)
library(BSgenome.Hsapiens.UCSC.hg38)
library(EnsDb.Hsapiens.v86)
library(gridExtra)
set.seed(1234)
Loading required package: BiocGenerics Loading required package: parallel Attaching package: ‘BiocGenerics’ The following objects are masked from ‘package:parallel’: clusterApply, clusterApplyLB, clusterCall, clusterEvalQ, clusterExport, clusterMap, parApply, parCapply, parLapply, parLapplyLB, parRapply, parSapply, parSapplyLB The following objects are masked from ‘package:stats’: IQR, mad, sd, var, xtabs The following objects are masked from ‘package:base’: anyDuplicated, append, as.data.frame, basename, cbind, colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget, order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply, union, unique, unsplit, which, which.max, which.min Loading required package: S4Vectors Loading required package: stats4 Attaching package: ‘S4Vectors’ The following object is masked from ‘package:base’: expand.grid Loading required package: IRanges Loading required package: grid ======================================== ComplexHeatmap version 2.3.4 Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/ Github page: https://github.com/jokergoo/ComplexHeatmap Documentation: http://jokergoo.github.io/ComplexHeatmap-reference If you use it in published research, please cite: Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional genomic data. Bioinformatics 2016. This message can be suppressed by: suppressPackageStartupMessages(library(ComplexHeatmap)) ======================================== Loading required package: GenomicRanges Attaching package: ‘dplyr’ The following objects are masked from ‘package:GenomicRanges’: intersect, setdiff, union The following object is masked from ‘package:GenomeInfoDb’: intersect The following objects are masked from ‘package:IRanges’: collapse, desc, intersect, setdiff, slice, union The following objects are masked from ‘package:S4Vectors’: first, intersect, rename, setdiff, setequal, union The following objects are masked from ‘package:BiocGenerics’: combine, intersect, setdiff, union The following objects are masked from ‘package:stats’: filter, lag The following objects are masked from ‘package:base’: intersect, setdiff, setequal, union Loading required package: BSgenome Loading required package: Biostrings Loading required package: XVector Attaching package: ‘Biostrings’ The following object is masked from ‘package:base’: strsplit Loading required package: ensembldb Loading required package: GenomicFeatures Loading required package: AnnotationDbi Loading required package: Biobase Welcome to Bioconductor Vignettes contain introductory material; view with 'browseVignettes()'. To cite Bioconductor, see 'citation("Biobase")', and for packages 'citation("pkgname")'. Attaching package: ‘AnnotationDbi’ The following object is masked from ‘package:dplyr’: select Loading required package: AnnotationFilter Attaching package: 'ensembldb' The following object is masked from 'package:dplyr': filter The following object is masked from 'package:stats': filter Attaching package: 'gridExtra' The following object is masked from 'package:Biobase': combine The following object is masked from 'package:dplyr': combine The following object is masked from 'package:BiocGenerics': combine
sessionInfo()
R version 3.6.1 (2019-07-05) Platform: x86_64-pc-linux-gnu (64-bit) Running under: Ubuntu 18.04.3 LTS Matrix products: default BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1 LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1 locale: [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 [7] LC_PAPER=en_US.UTF-8 LC_NAME=C [9] LC_ADDRESS=C LC_TELEPHONE=C [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C attached base packages: [1] grid stats4 parallel stats graphics grDevices utils [8] datasets methods base other attached packages: [1] gridExtra_2.3 EnsDb.Hsapiens.v86_2.99.0 [3] ensembldb_2.8.1 AnnotationFilter_1.8.0 [5] GenomicFeatures_1.36.4 AnnotationDbi_1.46.1 [7] Biobase_2.44.0 BSgenome.Hsapiens.UCSC.hg38_1.4.1 [9] BSgenome_1.52.0 Biostrings_2.52.0 [11] XVector_0.24.0 TFBSTools_1.22.0 [13] JASPAR2020_0.99.8 repr_1.0.1 [15] dplyr_0.8.5 rtracklayer_1.44.4 [17] GenomicRanges_1.36.1 RColorBrewer_1.1-2 [19] motifmatchr_1.6.0 chromVAR_1.6.0 [21] ComplexHeatmap_2.3.4 ggplot2_3.3.0 [23] GenomeInfoDb_1.20.0 IRanges_2.18.3 [25] S4Vectors_0.22.1 BiocGenerics_0.30.0 [27] Seurat_3.1.0 Signac_0.2.4 loaded via a namespace (and not attached): [1] reticulate_1.15 R.utils_2.9.2 [3] tidyselect_1.0.0 poweRlaw_0.70.2 [5] RSQLite_2.1.2 htmlwidgets_1.5.1 [7] BiocParallel_1.18.1 Rtsne_0.15 [9] munsell_0.5.0 codetools_0.2-16 [11] mutoss_0.1-12 ica_1.0-2 [13] DT_0.13 pbdZMQ_0.3-3 [15] future_1.16.0 miniUI_0.1.1.1 [17] withr_2.1.2 colorspace_1.4-1 [19] uuid_0.1-2 ROCR_1.0-7 [21] gbRd_0.4-11 listenv_0.8.0 [23] Rdpack_0.11-1 GenomeInfoDbData_1.2.1 [25] mnormt_1.5-6 bit64_0.9-7 [27] vctrs_0.2.4 TH.data_1.0-10 [29] ggseqlogo_0.1 R6_2.4.1 [31] clue_0.3-57 rsvd_1.0.3 [33] VGAM_1.1-2 bitops_1.0-6 [35] DelayedArray_0.10.0 assertthat_0.2.1 [37] promises_1.1.0 SDMTools_1.1-221 [39] scales_1.1.0 multcomp_1.4-12 [41] gtable_0.3.0 npsurv_0.4-0 [43] globals_0.12.5 sandwich_2.5-1 [45] seqLogo_1.50.0 rlang_0.4.5 [47] gggenes_0.4.0 GlobalOptions_0.1.1 [49] splines_3.6.1 lazyeval_0.2.2 [51] reshape2_1.4.3 httpuv_1.5.2 [53] tools_3.6.1 ellipsis_0.3.0 [55] gplots_3.0.3 ggridges_0.5.2 [57] TFisher_0.2.0 Rcpp_1.0.4 [59] plyr_1.8.6 base64enc_0.1-3 [61] progress_1.2.2 zlibbioc_1.30.0 [63] purrr_0.3.3 RCurl_1.95-4.12 [65] prettyunits_1.1.1 pbapply_1.4-2 [67] GetoptLong_0.1.8 cowplot_1.0.0 [69] zoo_1.8-7 SummarizedExperiment_1.14.1 [71] ggrepel_0.8.2 cluster_2.1.0 [73] magrittr_1.5 data.table_1.12.8 [75] circlize_0.4.8 lmtest_0.9-37 [77] RANN_2.6.1 mvtnorm_1.1-0 [79] ProtGenerics_1.16.0 fitdistrplus_1.0-14 [81] matrixStats_0.55.0 hms_0.5.1 [83] patchwork_1.0.0 lsei_1.2-0 [85] mime_0.9 evaluate_0.14 [87] xtable_1.8-4 XML_3.98-1.20 [89] shape_1.4.4 compiler_3.6.1 [91] biomaRt_2.40.5 tibble_3.0.0 [93] KernSmooth_2.23-15 crayon_1.3.4 [95] R.oo_1.23.0 htmltools_0.4.0 [97] later_1.0.0 tidyr_1.0.2 [99] DBI_1.0.0 MASS_7.3-51.4 [101] rappdirs_0.3.1 readr_1.3.1 [103] Matrix_1.2-17 cli_2.0.2 [105] R.methodsS3_1.8.0 gdata_2.18.0 [107] metap_1.3 igraph_1.2.5 [109] pkgconfig_2.0.3 sn_1.6-1 [111] TFMPvalue_0.0.8 GenomicAlignments_1.20.1 [113] numDeriv_2016.8-1.1 IRdisplay_0.7.0 [115] plotly_4.9.2 annotate_1.62.0 [117] DirichletMultinomial_1.26.0 multtest_2.40.0 [119] bibtex_0.4.2.2 stringr_1.4.0 [121] digest_0.6.25 sctransform_0.2.1 [123] RcppAnnoy_0.0.16 tsne_0.1-3 [125] CNEr_1.20.0 leiden_0.3.3 [127] uwot_0.1.8 curl_4.3 [129] shiny_1.4.0.2 Rsamtools_2.0.3 [131] gtools_3.8.2 rjson_0.2.20 [133] lifecycle_0.2.0 nlme_3.1-141 [135] jsonlite_1.6.1 viridisLite_0.3.0 [137] fansi_0.4.1 pillar_1.4.3 [139] lattice_0.20-38 KEGGREST_1.24.1 [141] GO.db_3.8.2 fastmap_1.0.1 [143] httr_1.4.1 plotrix_3.7-7 [145] survival_2.44-1.1 glue_1.3.2 [147] png_0.1-7 bit_1.1-14 [149] stringi_1.4.6 blob_1.2.0 [151] ggfittext_0.8.1 caTools_1.18.0 [153] memoise_1.1.0 IRkernel_1.0.2.9000 [155] irlba_2.3.3 future.apply_1.4.0 [157] ape_5.3
## Loading raw data
## QC
## Processing
system('gsutil cp gs://fc-1a9c7c2a-7fd9-4a7a-9e43-8f3d3cfee66d/20200327_lungATAC_onlyACE2_TMPRSS2_CTLS.rds .')
## Loading processed data
lung=readRDS('20200327_lungATAC_onlyACE2_TMPRSS2_CTLS.rds')
lung
Loading required package: Seurat
An object of class Seurat 3 features across 11706 samples within 1 assay Active assay: RNA (3 features) 1 dimensional reduction calculated: umap
DimPlot(object = lung, group.by= 'location', pt.size=.05)
table(lung@meta.data$location)
1C RPL 3366 8340
DimPlot(lung, group.by = 'revised.annot_2', label = F,cols = c("AT1" = "#0072B2",
"AT2" = "#56B4E9",
"B.cells" = "#009E73",
"Basal" = "#D55E00",
"Ciliated.RPL" = "#E69F00",
"Ciliated.1C" = "#F0E442",
"Endothelial" = "#14D2DC",
"Fibroblast" = "#82A0BE",
"Ionocytes.PNEC" = "#FA5078",
"Myeloid" = "#DC8282",
"Mesothelial" = "#FAE6BE",
"Neuron" = 'black',
"Secretory.RPL" = "#FA7850",
"Secretory.1C" = "#96B400",
"Smooth.Muscle" = "#FA2800",
"B.T.NK.cells" = "#FA78FA",
"Tuft.like" ="#8214A0"))
table(lung@meta.data$revised.annot_2)
AT1 AT2 B.T.NK.cells Basal Ciliated.1C 730 3371 227 1962 644 Ciliated.RPL Endothelial Fibroblast Ionocytes.PNEC Mesothelial 484 188 2579 135 252 Myeloid Neuron Secretory.1C Secretory.RPL Smooth.Muscle 174 46 200 119 474 Tuft.like 121
DefaultAssay(lung) <- 'RNA'
FeaturePlot(
object = lung,
features = 'ACE2',
pt.size = 0.02,
max.cutoff = 'q99',
ncol = 3,
order=T
) + scale_color_gradientn(colors = brewer.pal(9, "Reds"))
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
FeaturePlot(
object = lung,
features = 'TMPRSS2',
pt.size = 0.02,
max.cutoff = 'q99',
ncol = 3,
order=T
) + scale_color_gradientn(colors = brewer.pal(9, "Reds"))
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
FeaturePlot(
object = lung,
features = 'CTSL',
pt.size = 0.02,
max.cutoff = 'q99',
ncol = 3,
order=T
) + scale_color_gradientn(colors = brewer.pal(9, "Reds"))
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
gene_of_int='ACE2'
gene_of_int2='TMPRSS2'
exp_thresh = 1
flt_pt1 = lung[['RNA']]@counts[gene_of_int,] >= exp_thresh
flt_pt2 = flt_pt1 & (lung[['RNA']]@counts[gene_of_int2,] >= exp_thresh)
lung@meta.data$ACE2_moreeq1 = 'other'
lung@meta.data$ACE2_moreeq1[flt_pt1] = 'ACE2_more_eq1count'
lung@meta.data$ACE2_TMPRSS2_moreeq1 = 'other'
lung@meta.data$ACE2_TMPRSS2_moreeq1[flt_pt2] = 'ACE2_TMPRSS2_more_eq1count'
DimPlot(lung, group.by = 'ACE2_moreeq1', order='ACE2_more_eq1count', cols=c('other' = 'lightgrey',
'ACE2_more_eq1count' = 'black'),
pt.size = 0.000001,
) +ggtitle('Cells with at least 1 fragment at ACE2') + theme(legend.position="bottom")
DotPlot(lung, group.by = 'revised.annot_2', features = c('ACE2', 'TMPRSS2', 'CTSL')) + RotatedAxis() + scale_color_gradientn(colors = rev(brewer.pal(11, "RdBu")))
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
dp=subset(lung@meta.data, select=c('revised.annot_2', 'ACE2_TMPRSS2_moreeq1'))
tab_dp=as.data.frame.matrix(table(dp$revised.annot_2, dp$ACE2_TMPRSS2_moreeq1))
tab_dp$percent=round((tab_dp$ACE2_TMPRSS2_more_eq1count/(tab_dp$ACE2_TMPRSS2_more_eq1count + tab_dp$other))*100, digits = 2)
head(tab_dp)
ACE2_TMPRSS2_more_eq1count | other | percent | |
---|---|---|---|
<int> | <int> | <dbl> | |
AT1 | 9 | 721 | 1.23 |
AT2 | 168 | 3203 | 4.98 |
B.T.NK.cells | 2 | 225 | 0.88 |
Basal | 30 | 1932 | 1.53 |
Ciliated.1C | 31 | 613 | 4.81 |
Ciliated.RPL | 32 | 452 | 6.61 |
barplot(tab_dp$percent, names=rownames(tab_dp), horiz = T, las = 1, cex.names=0.4, xlab='percentage', main='cells at least 1 fragment ACE2 TMPRSS2')
gene_of_int='ACE2'
exp_thresh = 1
flt_pt1 = lung@meta.data$revised.annot_2 %in% c('AT1','AT2', 'Basal', 'Ciliated.1C', 'Ciliated.RPL', 'Ionocytes.PNEC', 'Secretory.1C', 'Secretory.RPL', 'Tuft.like')
flt_pt2 = flt_pt1 & (lung[['RNA']]@counts[gene_of_int,] >= exp_thresh)
flt_pt3 = flt_pt1 & (lung[['RNA']]@counts[gene_of_int,] < exp_thresh)
lung@meta.data$epith_group = 'other_cells'
lung@meta.data$epith_group[flt_pt1] = 'Epith_other'
lung@meta.data$epith_group[flt_pt2] = 'Epith_ACE2_more_eq1'
lung@meta.data$epith_group[flt_pt3] = 'Epith_ACE2_neg'
options(repr.plot.width=8, repr.plot.height=8)
DimPlot(lung, group.by = 'epith_group', order='Epith_ACE2_more_eq1',
cols=c('other_cells' = 'lightgrey',
'Epith_ACE2_neg' = '#d8b365',
'Epith_other' = '#5ab4ac',
'Epith_ACE2_more_eq1' = 'black'),
pt.size = 0.000001,
) +ggtitle('Epith Cells with at least 1 fragment ACE2') + theme(legend.position="bottom")
### Code in this cell will not run because the object provided lacks ChromVar Assay - please get it touch if interested
DefaultAssay(lung) <- 'chromvar'
epi_ace2_p_marker_motif<- FindMarkers(
object = lung,
group.by='epith_group',
ident.1 = 'Epith_ACE2_more_eq1',
ident.2 = 'Epith_ACE2_neg',
only.pos = TRUE,
test.use = 'LR',
latent.vars = 'nCount_peaks',
)
subset(epi_ace2_p_marker_motif, p_val_adj <1)
p_val | avg_logFC | pct.1 | pct.2 | p_val_adj | |
---|---|---|---|---|---|
<dbl> | <dbl> | <dbl> | <dbl> | <dbl> | |
IRF1_MA0050.1 | 2.457011e-05 | 0.5829078 | 0.360 | 0.292 | 0.01990179 |
IRF7_MA0772.1 | 9.501319e-05 | 0.3977183 | 0.306 | 0.249 | 0.07696068 |
STAT1::STAT2_MA0517.1 | 1.949853e-04 | 0.6069283 | 0.335 | 0.276 | 0.15793811 |
FOXA1_MA0148.2 | 2.909351e-04 | 0.9415472 | 0.829 | 0.775 | 0.23565744 |
FOXD2_MA0847.2 | 3.292566e-04 | 1.8421963 | 0.801 | 0.744 | 0.26669781 |
CDX2_MA0465.2 | 3.490678e-04 | 0.3039219 | 0.418 | 0.359 | 0.28274493 |
FOXA1_MA0148.1 | 3.820777e-04 | 0.8484952 | 0.831 | 0.774 | 0.30948297 |
IRF2_MA0051.1 | 4.107653e-04 | 0.6469624 | 0.375 | 0.324 | 0.33271987 |
FOXC1_MA0032.2 | 4.515934e-04 | 1.2623710 | 0.754 | 0.687 | 0.36579061 |
HNF4A_MA0114.2 | 4.727549e-04 | 0.3798913 | 0.377 | 0.296 | 0.38293150 |
FOXP1_MA0481.1 | 5.163231e-04 | 0.5110404 | 0.573 | 0.491 | 0.41822174 |
FOXE1_MA1487.1 | 5.489487e-04 | 1.2067660 | 0.774 | 0.736 | 0.44464843 |
FOXA1_MA0148.3 | 1.130720e-03 | 2.0757295 | 0.811 | 0.781 | 0.91588313 |
### Code in this cell will not run because the object provided lacks ChromVar Assay so the cell above could not run - please get it touch if interested
t10=head(epi_ace2_p_marker_motif, n=10)
t10b <- t10[order(t10[,1], decreasing=TRUE),]
par(mar=c(4,12,4,4))
barplot(-log10(t10b$p_val_adj ), horiz = T, names.arg = rownames(t10b), las=2, main = 'Differential activity scores ACE2+ vs - epithelial cells', xlab='-log10 (adj p-val)', col="grey" )
abline(v=(-log10(0.05)), lty=2)
VlnPlot(lung, features = c("nCount_peaks"), group.by = 'epith_group', pt.size = .01)+ theme(axis.text.x = element_text(angle = 90, hjust = 1)) + ylab('nCount_peaks') + ggtitle('Epithelial ACE2 positive / neg / other')