!head -2 /Volumes/Students/diseases/Amanda/seastar_clc_rnaseq_counts.txt
Feature HK_CF2 HK_CF35 HK_CF70 V_CF71 V_CF34 V_CF26 seastar_clc_contig_1 28 38 89 168 37 8
!head -1 /Volumes/Students/diseases/Canonical_Files/Phel_clc_blastn_nt_nopipes.tab
Phel_clc_contig_2 gi 378345425 gb JN676976.1 81.71 175 1e-33 154 7602 Asterias amurensis Asterias amurensis starfish Eukaryota Asterias amurensis microsatellite Aamr32 sequence
!sed 's/seastar/Phel/g' </Volumes/Students/diseases/Amanda/seastar_clc_rnaseq_counts.txt> \
/Volumes/Students/diseases/steven/Phel_clc_RNAseq_count.txt
!head /Volumes/Students/diseases/steven/Phel_clc_RNAseq_count.txt
Feature HK_CF2 HK_CF35 HK_CF70 V_CF71 V_CF34 V_CF26 Phel_clc_contig_1 28 38 89 168 37 8 Phel_clc_contig_10 42 180 22 9518 2752 839 Phel_clc_contig_100 1235 6104 616 260 565 413 Phel_clc_contig_1000 8551 31946 4311 2043 3853 3070 Phel_clc_contig_10000 21 211 32 9 12 13 Phel_clc_contig_10001 54 365 90 44 227 89 Phel_clc_contig_10002 478 1267 185 38 61 80 Phel_clc_contig_10003 29 186 17 9 29 20 Phel_clc_contig_10004 19 92 4 8 25 6
!head -4 /Volumes/web/cnidarian/Seaster_clc_rnaseq_tc.txt
Feature ID HK_CF2 HK_CF35 HK_CF70 V_CF71 V_CF34 V_CF26 3291_5903_10007_H94MGADXX_V_CF71_ATCACG_R1 (paired) trimmed (paired) contig 1 28 38 89 168 37 8 3291_5903_10007_H94MGADXX_V_CF71_ATCACG_R1 (paired) trimmed (paired) contig 10 42 180 22 9518 2752 839 3291_5903_10007_H94MGADXX_V_CF71_ATCACG_R1 (paired) trimmed (paired) contig 100 1235 6104 616 260 565 413
!sed 's/3291_5903_10007_H94MGADXX_V_CF71_ATCACG_R1 (paired) trimmed (paired) contig /seastar_clc_contig_/g' </Volumes/web/cnidarian/Seaster_clc_rnaseq_tc.txt> /Volumes/web/cnidarian/Seaster_clc_rnaseq_tcc.txt
!head -4 /Volumes/web/cnidarian/Seaster_clc_rnaseq_tcc.txt
Feature ID HK_CF2 HK_CF35 HK_CF70 V_CF71 V_CF34 V_CF26 seastar_clc_contig_1 28 38 89 168 37 8 seastar_clc_contig_10 42 180 22 9518 2752 839 seastar_clc_contig_100 1235 6104 616 260 565 413
!sed 's/Feature ID/Feature/g' </Volumes/web/cnidarian/Seaster_clc_rnaseq_tcc.txt> /Volumes/web/cnidarian/seastar_clc_rnaseq_counts.txt
!head /Volumes/web/cnidarian/seastar_clc_rnaseq_counts.txt
Feature HK_CF2 HK_CF35 HK_CF70 V_CF71 V_CF34 V_CF26 seastar_clc_contig_1 28 38 89 168 37 8 seastar_clc_contig_10 42 180 22 9518 2752 839 seastar_clc_contig_100 1235 6104 616 260 565 413 seastar_clc_contig_1000 8551 31946 4311 2043 3853 3070 seastar_clc_contig_10000 21 211 32 9 12 13 seastar_clc_contig_10001 54 365 90 44 227 89 seastar_clc_contig_10002 478 1267 185 38 61 80 seastar_clc_contig_10003 29 186 17 9 29 20 seastar_clc_contig_10004 19 92 4 8 25 6
!grep "contig_2671 " /Users/sr320/Dropbox/Steven/eimd/data/seastar_clc_rnaseq_counts.txt
seastar_clc_contig_2671 22304 17668 6133 1349 5266 3297
!grep "contig_51 " /Users/sr320/Dropbox/Steven/eimd/data/seastar_clc_rnaseq_counts.txt
seastar_clc_contig_51 15849 12626 4334 1017 3858 2318
!wc -l /Volumes/web/cnidarian/seastar_clc_rnaseq_counts.txt
30579 /Volumes/web/cnidarian/seastar_clc_rnaseq_counts.txt
Experiment
!head /Users/sr320/Dropbox/Steven/eimd/paper/data/Phelv3_counts.txt
Feature ID V_CF71 - Total gene reads V_CF34 - Total gene reads V_CF26 - Total gene reads HK_CF70 - Total gene reads HK_CF2 - Total gene reads HK_CF35 - Total gene reads Phel_clc_contig_1 168 37 8 89 28 38 Phel_clc_contig_10 9518 2752 839 22 42 180 Phel_clc_contig_100 260 565 413 616 1234 6104 Phel_clc_contig_1000 2043 3842 3070 4311 8527 31946 Phel_clc_contig_10000 9 12 13 32 21 211 Phel_clc_contig_10001 44 225 89 90 54 365 Phel_clc_contig_10002 38 61 80 185 478 1267 Phel_clc_contig_10003 9 29 20 17 29 186 Phel_clc_contig_10004 8 25 6 4 19 92
!wc -l /Users/sr320/Dropbox/Steven/eimd/paper/data/Phelv3_counts.txt
29476 /Users/sr320/Dropbox/Steven/eimd/paper/data/Phelv3_counts.txt
%load_ext rpy2.ipython
%%R
source("http://bioconductor.org/biocLite.R")
biocLite("DESeq2")
y y y a
Bioconductor version 2.13 (BiocInstaller 1.12.1), ?biocLite for help A newer version of Bioconductor is available after installing a new version of R, ?BiocUpgrade for help BioC_mirror: http://bioconductor.org Using Bioconductor version 2.13 (BiocInstaller 1.12.1), R version 3.0.3. Installing package(s) 'DESeq2' trying URL 'http://bioconductor.org/packages/2.13/bioc/bin/macosx/contrib/3.0/DESeq2_1.2.10.tgz' Content type 'application/x-gzip' length 1647689 bytes (1.6 Mb) opened URL ================================================== downloaded 1.6 Mb The downloaded binary packages are in /var/folders/28/07h2q2ms7nd609f1ft3mj1240000gn/T//RtmpIk0BZ1/downloaded_packages Old packages: 'RCurl' Update all/some/none? [a/s/n]: Update all/some/none? [a/s/n]: Update all/some/none? [a/s/n]: Update all/some/none? [a/s/n]: trying URL 'http://cran.fhcrc.org/bin/macosx/contrib/3.0/RCurl_1.95-4.3.tgz' Content type 'application/x-gzip' length 711068 bytes (694 Kb) opened URL ================================================== downloaded 694 Kb The downloaded binary packages are in /var/folders/28/07h2q2ms7nd609f1ft3mj1240000gn/T//RtmpIk0BZ1/downloaded_packages
%%R
library("DESeq2")
packageVersion("DESeq2")
Loading required package: GenomicRanges Loading required package: BiocGenerics Loading required package: parallel Attaching package: ‘BiocGenerics’ The following objects are masked from ‘package:parallel’: clusterApply, clusterApplyLB, clusterCall, clusterEvalQ, clusterExport, clusterMap, parApply, parCapply, parLapply, parLapplyLB, parRapply, parSapply, parSapplyLB The following object is masked from ‘package:stats’: xtabs The following objects are masked from ‘package:base’: anyDuplicated, append, as.data.frame, as.vector, cbind, colnames, duplicated, eval, evalq, Filter, Find, get, intersect, is.unsorted, lapply, Map, mapply, match, mget, order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce, rep.int, rownames, sapply, setdiff, sort, table, tapply, union, unique, unlist Loading required package: IRanges Loading required package: XVector Loading required package: Rcpp Loading required package: RcppArmadillo [1] ‘1.2.10’
!pwd
/Users/sr320/Dropbox/Steven/eimd/ipynbs
%%R
data <- read.table("/Users/sr320/Dropbox/Steven/eimd/data/seastar_clc_rnaseq_counts.txt", header = T, sep = "\t")
rownames(data) <- data$Feature
data <- data[,-1]
%R head (data)
HK_CF2 | HK_CF35 | HK_CF70 | V_CF71 | V_CF34 | V_CF26 | |
---|---|---|---|---|---|---|
0 | 28 | 38 | 89 | 168 | 37 | 8 |
1 | 42 | 180 | 22 | 9518 | 2752 | 839 |
2 | 1235 | 6104 | 616 | 260 | 565 | 413 |
3 | 8551 | 31946 | 4311 | 2043 | 3853 | 3070 |
4 | 21 | 211 | 32 | 9 | 12 | 13 |
5 | 54 | 365 | 90 | 44 | 227 | 89 |
%%R
# Build Objects
# Specify which columns are in which groups
deseq2.colData <- data.frame(condition=factor(c(rep("HK", 3), rep("V", 3))),
type=factor(rep("single-read", 6)))
rownames(deseq2.colData) <- colnames(data)
deseq2.dds <- DESeqDataSetFromMatrix(countData = data,
colData = deseq2.colData,
design = ~ condition)
%%R
# Run Analysis
deseq2.dds <- DESeq(deseq2.dds)
deseq2.res <- results(deseq2.dds)
deseq2.res <- deseq2.res[order(rownames(deseq2.res)), ]
estimating size factors estimating dispersions gene-wise dispersion estimates mean-dispersion relationship final dispersion estimates fitting model and testing
%%R
# Count number of hits with adjusted p-value less then 0.05
dim(deseq2.res[!is.na(deseq2.res$padj) & deseq2.res$padj <= 0.05, ])
[1] 3914 6
%%R
# Output
write.csv(deseq2.res, "/Users/sr320/Dropbox/Steven/eimd/data/deseq2.results.csv", row.names = T)
!head /Users/sr320/Dropbox/Steven/eimd/data/deseq2.results.csv
"","baseMean","log2FoldChange","lfcSE","stat","pvalue","padj" "seastar_clc_contig_1",115.066144674932,1.2584943899478,0.798269369507704,1.57652847274337,NA,NA "seastar_clc_contig_10",5559.9807996404,6.21395254235976,0.706071287598319,8.80074385052017,NA,NA "seastar_clc_contig_100",784.602239929063,0.0240772706127452,0.252625922408093,0.0953079968327662,0.924070191017092,0.963581804834051 "seastar_clc_contig_1000",5399.35140742484,0.220304382495146,0.291639349490296,0.755400061343492,0.450008976442489,0.668550107882446 "seastar_clc_contig_10000",24.759355847272,-0.181484411685002,0.556540344229253,-0.326093900589251,0.744353311654646,0.868919878380582 "seastar_clc_contig_10001",136.27718425946,1.49886855266789,0.550947966456816,2.72052651778936,0.00651780439581695,0.0350101864080016 "seastar_clc_contig_10002",175.868477828981,-0.904068111183539,0.436621462902541,-2.0705993360325,0.0383962542741281,0.130532011415495 "seastar_clc_contig_10003",28.3483004978917,0.703135859400196,0.456126993374581,1.54153529524346,0.12318655962946,0.29495023131509 "seastar_clc_contig_10004",16.5832494690187,1.05100411121411,0.61056831461333,1.72135383717006,0.0851866356198028,0.230797062716078
%%R
tmp <- deseq2.res
# The main plot
plot(tmp$baseMean, tmp$log2FoldChange, pch=20, cex=0.45, ylim=c(-3, 3), log="x", col="darkgray",
main="DEG Virus Exposure (pval <= 0.05)",
xlab="mean of normalized counts",
ylab="Log2 Fold Change")
# Getting the significant points and plotting them again so they're a different color
tmp.sig <- deseq2.res[!is.na(deseq2.res$padj) & deseq2.res$padj <= 0.05, ]
points(tmp.sig$baseMean, tmp.sig$log2FoldChange, pch=20, cex=0.45, col="red")
# 2 FC lines
abline(h=c(-1,1), col="blue")
%%R
write.table(tmp.sig, "/Users/sr320/Dropbox/Steven/eimd/data/deseq2_sig_results.txt", row.names = T)
!head /Users/sr320/Dropbox/Steven/eimd/data/deseq2_sig_results.txt
"baseMean" "log2FoldChange" "lfcSE" "stat" "pvalue" "padj" "seastar_clc_contig_10001" 136.27718425946 1.49886855266789 0.550947966456816 2.72052651778936 0.00651780439581695 0.0350101864080016 "seastar_clc_contig_10010" 8.73784349604224 2.43426494953234 0.717338850139719 3.39346593183711 0.000690141423521562 0.00610587057360923 "seastar_clc_contig_10013" 188.603521302622 -1.94923068685006 0.463271107915175 -4.20753777549832 2.58168256856775e-05 0.00041485396609123 "seastar_clc_contig_10045" 50.485024106335 2.87706412205861 0.695003049883772 4.13964244119468 3.47847563039607e-05 0.000526018158180456 "seastar_clc_contig_1005" 3408.98435663456 -1.66691198866687 0.378947946534594 -4.39878881495589 1.08856689786144e-05 0.000199362344222153 "seastar_clc_contig_10050" 276.941589423215 -1.61560402677235 0.323282993601818 -4.9974915437781 5.80808848482353e-07 1.60431884071189e-05 "seastar_clc_contig_1006" 77.8447124150533 -2.01146173192642 0.468218416201358 -4.29599020953798 1.73915258259802e-05 0.000298432523132762 "seastar_clc_contig_10064" 147.578457909919 1.81093710357749 0.683578354090462 2.64920194260252 0.00806821051110654 0.041310905437205 "seastar_clc_contig_10065" 28.5076393455268 2.27188575134493 0.585911374358637 3.87752457243526 0.00010552467251757 0.00131744894528401
%%R
source("http://bioconductor.org/biocLite.R")
biocLite("DESeq")
Bioconductor version 2.13 (BiocInstaller 1.12.1), ?biocLite for help A newer version of Bioconductor is available after installing a new version of R, ?BiocUpgrade for help BioC_mirror: http://bioconductor.org Using Bioconductor version 2.13 (BiocInstaller 1.12.1), R version 3.0.3. Installing package(s) 'DESeq' also installing the dependency ‘geneplotter’ trying URL 'http://bioconductor.org/packages/2.13/bioc/bin/macosx/contrib/3.0/geneplotter_1.40.0.tgz' Content type 'application/x-gzip' length 1461546 bytes (1.4 Mb) opened URL ================================================== downloaded 1.4 Mb trying URL 'http://bioconductor.org/packages/2.13/bioc/bin/macosx/contrib/3.0/DESeq_1.14.0.tgz' Content type 'application/x-gzip' length 2340535 bytes (2.2 Mb) opened URL ================================================== downloaded 2.2 Mb The downloaded binary packages are in /var/folders/28/07h2q2ms7nd609f1ft3mj1240000gn/T//RtmpAtUZwG/downloaded_packages
%R library( "DESeq" )
Loading required package: Biobase Welcome to Bioconductor Vignettes contain introductory material; view with 'browseVignettes()'. To cite Bioconductor, see 'citation("Biobase")', and for packages 'citation("pkgname")'. Loading required package: locfit locfit 1.5-9.1 2013-03-22 Attaching package: ‘locfit’ The following objects are masked from ‘package:GenomicRanges’: left, right Loading required package: lattice Welcome to 'DESeq'. For improved performance, usability and functionality, please consider migrating to 'DESeq2'. Attaching package: ‘DESeq’ The following objects are masked from ‘package:DESeq2’: estimateSizeFactorsForMatrix, getVarianceStabilizedData, plotPCA, varianceStabilizingTransformation
%%R
CountData<- read.table("/Users/sr320/Dropbox/Steven/eimd/data/seastar_clc_rnaseq_counts.txt",row.names=1, header=TRUE,)
%R head (CountData)
HK_CF2 | HK_CF35 | HK_CF70 | V_CF71 | V_CF34 | V_CF26 | |
---|---|---|---|---|---|---|
0 | 28 | 38 | 89 | 168 | 37 | 8 |
1 | 42 | 180 | 22 | 9518 | 2752 | 839 |
2 | 1235 | 6104 | 616 | 260 | 565 | 413 |
3 | 8551 | 31946 | 4311 | 2043 | 3853 | 3070 |
4 | 21 | 211 | 32 | 9 | 12 | 13 |
5 | 54 | 365 | 90 | 44 | 227 | 89 |
%%R
conds <- c(rep("HK",3), rep("V",3))
conds
cds <- newCountDataSet( CountData, conds )
head (counts (cds))
LibrarySize <- estimateSizeFactors( cds )
sizeFactors( LibrarySize )
head( counts( LibrarySize, normalized=TRUE ) )
HK_CF2 HK_CF35 HK_CF70 V_CF71 seastar_clc_contig_1 18.34488 4.768066 109.66656 489.38889 seastar_clc_contig_10 27.51731 22.585578 27.10859 27726.21130 seastar_clc_contig_100 809.14003 765.902030 759.04044 757.38757 seastar_clc_contig_1000 5602.39381 4008.438117 5312.05090 5951.31852 seastar_clc_contig_10000 13.75866 26.475316 39.43067 26.21726 seastar_clc_contig_10001 35.37940 45.798532 110.89877 128.17328 V_CF34 V_CF26 seastar_clc_contig_1 52.43201 15.79646 seastar_clc_contig_10 3899.80803 1656.65399 seastar_clc_contig_100 800.65100 815.49237 seastar_clc_contig_1000 5460.01466 6061.89243 seastar_clc_contig_10000 17.00498 25.66925 seastar_clc_contig_10001 321.67748 175.73564
%%R
Disp <- estimateDispersions( LibrarySize )
str( fitInfo(Disp) )
plotDispEsts <- function (Disp)
{
plot(
rowMeans( counts(Disp, normalized=TRUE)),
fitInfo(Disp)$perGeneDispEsts,
pch = '.', log="xy")
xg <-10^seq(-.5, 5, length.out=300)
lines (xg, fitInfo(Disp)$dispFun(xg), col="red")
}
plotDispEsts(Disp)
head ( fData(Disp))
str(fitInfo(Disp))
DE <- nbinomTest( Disp, "HK", "V" )
head (DE)
plotDE <- function (DE)
plot(
DE$baseMean,
DE$log2FoldChange,
log="x", pch=20, cex=.3,
col = ifelse (DE$padj < .05, "red", "black"))
plotDE(DE)
hist(DE$pval, breaks=100, col="skyblue", border="slateblue", main="")
List of 5 $ perGeneDispEsts: num [1:30578] 2.732689 3.375718 -0.000427 0.013865 0.100182 ... $ dispFunc :function (q) ..- attr(*, "coefficients")= Named num [1:2] 0.379 4.011 .. ..- attr(*, "names")= chr [1:2] "asymptDisp" "extraPois" ..- attr(*, "fitType")= chr "parametric" $ fittedDispEsts : num [1:30578] 0.414 0.38 0.384 0.38 0.541 ... $ df : int 4 $ sharingMode : chr "maximum" List of 5 $ perGeneDispEsts: num [1:30578] 2.732689 3.375718 -0.000427 0.013865 0.100182 ... $ dispFunc :function (q) ..- attr(*, "coefficients")= Named num [1:2] 0.379 4.011 .. ..- attr(*, "names")= chr [1:2] "asymptDisp" "extraPois" ..- attr(*, "fitType")= chr "parametric" $ fittedDispEsts : num [1:30578] 0.414 0.38 0.384 0.38 0.541 ... $ df : int 4 $ sharingMode : chr "maximum"
%R write.table(DE, "HK_V_DESeq.txt", row.names = FALSE, sep="\t")
Error in is.data.frame(x) : object 'DE' not found
!DE(head)
/bin/sh: -c: line 0: syntax error near unexpected token `head' /bin/sh: -c: line 0: `DE(head)'