library('ggplot2') library('dplyr') setwd('/tigress/BEE/gtex/analyses/user_specific/cis-mapping/references/') trim_summary = read.csv('./summaries/trim_summary_report', stringsAsFactors=FALSE, header=TRUE, sep='\t') colnames(trim_summary) = c("Run_s", "pre_trim", "post_trim") samples_with_geno = read.table('./summaries/samples_with_genotypes.txt', stringsAsFactors=FALSE) subjects_with_geno = read.table('./summaries/subjects_with_genotypes.txt', stringsAsFactors=FALSE) sra_runtable = read.csv('./summaries/SRARunTable.txt', stringsAsFactors=FALSE, header=TRUE, sep='\t') summary_table = sra_runtable %>% filter(submitted_subject_id_s %in% subjects_with_geno$V1) %>% select(Run_s, Sample_Name_s, body_site_s, histological_type_s, submitted_subject_id_s) summary_table = left_join(summary_table, trim_summary, by="Run_s") g = ggplot(summary_table, aes(x = log10(post_trim), y = log10(pre_trim), color= histological_type_s)) + geom_point() + ggtitle('log-log plot of pre-trim and post-trim reads') + theme(legend.position = "none") ggsave(filename = './gallery/trim_scatter.png', plot = g, width = 7, height = 7) star1_summary = read.csv('./summaries/STAR_1pass_hg19_summary_report', stringsAsFactors=FALSE, header=TRUE, sep='\t') colnames(star1_summary) = c("Run_s", "Star1_input", "Star1_unique", "Star1_multi", "Star1_toomany", "Star1_unmapped") summary_table = left_join(summary_table, star1_summary, by="Run_s") # There are two rows that are not completed: remove them summary_table = summary_table %>% filter(!is.na(Star1_input)) g = ggplot(summary_table, aes(x = Star1_unmapped, y = Star1_input, color= histological_type_s)) + geom_point() + ggtitle('Plot of post-trim reads vs unmapped reads') + theme(legend.position = "none") ggsave(filename = './gallery/star1_unmapped_scatter.png', plot = g, width = 7, height = 7) g = ggplot(summary_table, aes(x = Star1_unique, y = Star1_input, color= histological_type_s)) + geom_point() + ggtitle('Plot of post-trim reads vs unique-mapping reads') + theme(legend.position = "none") ggsave(filename = './gallery/star1_unique_scatter.png', plot = g, width = 7, height = 7) g = ggplot(summary_table, aes(x = Star1_multi, y = Star1_input, color= histological_type_s)) + geom_point() + ggtitle('Plot of post-trim reads vs multi-mapping reads') + theme(legend.position = "none") ggsave(filename = './gallery/star1_multi_scatter.png', plot = g, width = 7, height = 7) g = ggplot(summary_table, aes(x = Star1_toomany, y = Star1_input, color= histological_type_s)) + geom_point() + ggtitle('Plot of post-trim reads vs reads mapped to too many locations') + theme(legend.position = "none") ggsave(filename = './gallery/star1_toomany_scatter.png', plot = g, width = 7, height = 7) # Which tissue is giving us a lot of unmapped reads? unmapped_tissue = summary_table %>% group_by(histological_type_s) %>% summarise(mean=mean(Star1_unmapped)) # Turns out they are the blood samples, as expected. star2_summary_featurecounts = read.csv('./summaries/STAR_2pass_hg19_featurecounts_summary_report', stringsAsFactors=FALSE, header=TRUE, sep='\t') colnames(star2_summary_featurecounts) = c("Run_s", "Star2_input_f", "Star2_unique_f", "Star2_multi_f", "Star2_toomany_f", "Star2_unmapped_f") star2_summary_rsem = read.csv('./summaries/STAR_2pass_hg19_rsem_summary_report', stringsAsFactors=FALSE, header=TRUE, sep='\t') colnames(star2_summary_rsem) = c("Run_s", "Star2_input_r", "Star2_unique_r", "Star2_multi_r", "Star2_toomany_r", "Star2_unmapped_r") summary_table = left_join(summary_table, star2_summary_featurecounts, by="Run_s") summary_table = left_join(summary_table, star2_summary_rsem, by="Run_s") # There are two rows that are not completed for rsem: remove them summary_table = summary_table %>% filter(!is.na(Star2_input_r)) g = ggplot(summary_table, aes(x = Star2_unmapped_f, y = Star2_input_f, color= histological_type_s)) + geom_point() + ggtitle('Plot of post-trim reads vs unmapped reads') + theme(legend.position = "none") ggsave(filename = './gallery/star2_unmapped_scatter.png', plot = g, width = 7, height = 7) g = ggplot(summary_table, aes(x = Star2_unique_f, y = Star2_input_f, color= histological_type_s)) + geom_point() + ggtitle('Plot of post-trim reads vs unique-mapping reads') + theme(legend.position = "none") ggsave(filename = './gallery/star2_unique_scatter.png', plot = g, width = 7, height = 7) g = ggplot(summary_table, aes(x = Star2_multi_f, y = Star2_input_f, color= histological_type_s)) + geom_point() + ggtitle('Plot of post-trim reads vs multi-mapping reads') + theme(legend.position = "none") ggsave(filename = './gallery/star2_multi_scatter.png', plot = g, width = 7, height = 7) g = ggplot(summary_table, aes(x = Star2_toomany_f, y = Star2_input_f, color= histological_type_s)) + geom_point() + ggtitle('Plot of post-trim reads vs reads mapped to too many locations') + theme(legend.position = "none") ggsave(filename = './gallery/star2_toomany_scatter.png', plot = g, width = 7, height = 7) multimapping_tissue = summary_table %>% group_by(histological_type_s) %>% summarise(mean=mean(Star2_multi_f)) # The brown points are Blood, the pink points are stomach. from IPython.display import Image Image(filename='/tigress/BEE/gtex/analyses/user_specific/cis-mapping/references/gallery/trim_scatter.png', width = 500, height = 500) a = Image(filename='/tigress/BEE/gtex/analyses/user_specific/cis-mapping/references/gallery/star1_unique_scatter.png', width = 500, height = 500) b = Image(filename='/tigress/BEE/gtex/analyses/user_specific/cis-mapping/references/gallery/star1_multi_scatter.png', width = 500, height = 500) c = Image(filename='/tigress/BEE/gtex/analyses/user_specific/cis-mapping/references/gallery/star1_toomany_scatter.png', width = 500, height = 500) d = Image(filename='/tigress/BEE/gtex/analyses/user_specific/cis-mapping/references/gallery/star1_unmapped_scatter.png', width = 500, height = 500) display(a,b,c,d) a = Image(filename='/tigress/BEE/gtex/analyses/user_specific/cis-mapping/references/gallery/star2_unique_scatter.png', width = 500, height = 500) b = Image(filename='/tigress/BEE/gtex/analyses/user_specific/cis-mapping/references/gallery/star2_multi_scatter.png', width = 500, height = 500) c = Image(filename='/tigress/BEE/gtex/analyses/user_specific/cis-mapping/references/gallery/star2_toomany_scatter.png', width = 500, height = 500) d = Image(filename='/tigress/BEE/gtex/analyses/user_specific/cis-mapping/references/gallery/star2_unmapped_scatter.png', width = 500, height = 500) display(a,b,c,d) a = Image(filename='/tigress/BEE/gtex/analyses/user_specific/cis-mapping/references/gallery/LFA_factors.png', width = 500, height = 500) b = Image(filename='/tigress/BEE/gtex/analyses/user_specific/cis-mapping/references/gallery/LFAPopulation-2D.png', width = 700, height = 700) c = Image(filename='/tigress/BEE/gtex/analyses/user_specific/cis-mapping/references/gallery/LFAPopulation-3D.png', width = 500, height = 500) display(a,b,c)