#mix
#details @ http://nbviewer.ipython.org/github/sr320/ipython_nb/blob/master/fish546/BSseq_workflow.ipynb
!head /Volumes/web/cnidarian/YE_mix_22sm_methratio_out.txt
chr pos strand context ratio eff_CT_count C_count CT_count rev_G_count rev_GA_count CI_lower CI_upper C12706 112 + TTCTA 0.000 1.00 0 1 0 0 0.000 0.793 C12706 135 + GACTC 0.000 1.00 0 1 0 0 0.000 0.793 C12706 137 + CTCTT 0.000 1.00 0 1 0 0 0.000 0.793 C12706 148 + ATCAA 0.000 1.00 0 1 0 0 0.000 0.793 C12706 151 + AACTT 0.000 1.00 0 1 0 0 0.000 0.793 C12706 159 + AGCCT 0.000 1.00 0 1 0 0 0.000 0.793 C12706 160 + GCCTA 0.000 1.00 0 1 0 0 0.000 0.793 C12706 163 + TACAA 0.000 1.00 0 1 0 0 0.000 0.793 C12706 170 - AAGGT 0.000 1.00 0 1 2 2 0.000 0.793
#control
#details @ http://nbviewer.ipython.org/github/sr320/ipython_nb/blob/master/fish546/BSseq_YE_control.ipynb
!head /Volumes/web/cnidarian/YE_control_22sm_methratio_out.txt
chr pos strand context ratio eff_CT_count C_count CT_count rev_G_count rev_GA_count CI_lower CI_upper C12798 41 + AACAA 0.000 1.00 0 1 0 0 0.000 0.793 C12798 45 + AACTT 0.000 1.00 0 1 0 0 0.000 0.793 C12798 52 + ATCCC 0.000 1.00 0 1 0 0 0.000 0.793 C12798 53 + TCCCC 0.000 1.00 0 1 0 0 0.000 0.793 C12798 54 + CCCCT 0.000 1.00 0 1 0 0 0.000 0.793 C12798 55 + CCCTT 0.000 1.00 0 1 0 0 0.000 0.793 C12798 58 + TTCAC 0.000 1.00 0 1 0 0 0.000 0.793 C12798 60 + CACCT 0.000 1.00 0 1 0 0 0.000 0.793 C12798 61 + ACCTA 0.000 1.00 0 1 0 0 0.000 0.793
!grep "[A-Z][A-Z]CG[A-Z]" </Volumes/web/cnidarian/YE_mix_22sm_methratio_out.txt> /Volumes/web/cnidarian/YE_mix_22sm_methratio_CG.txt
!head /Volumes/web/cnidarian/YE_mix_22sm_methratio_CG.txt
C12722 104 + TACGT 0.500 2.00 1 2 2 2 0.095 0.905 C12728 40 + GCCGC 0.000 1.00 0 1 1 1 0.000 0.793 C12728 62 + GCCGT 0.000 1.00 0 1 1 1 0.000 0.793 C12734 61 + ACCGT 0.000 1.00 0 1 1 1 0.000 0.793 C12734 136 + ACCGC 0.000 2.00 0 2 2 2 0.000 0.658 C12734 178 + GACGG 0.000 1.00 0 1 1 1 0.000 0.793 C12734 183 + GGCGG 0.000 1.00 0 1 1 1 0.000 0.793 C12748 71 + AACGA 0.000 1.00 0 1 1 1 0.000 0.793 C12758 82 + TCCGC 1.000 1.00 1 1 1 1 0.207 1.000 C12758 125 + TTCGC 1.000 1.00 1 1 1 1 0.207 1.000
!grep "[A-Z][A-Z]CG[A-Z]" </Volumes/web/cnidarian/YE_control_22sm_methratio_out.txt> /Volumes/web/cnidarian/YE_control_22sm_methratio_CG.txt
!head /Volumes/web/cnidarian/YE_control_22sm_methratio_CG.txt
C12802 82 + ACCGG 0.000 1.00 0 1 1 1 0.000 0.793 C12802 90 + TGCGA 0.000 1.00 0 1 1 1 0.000 0.793 C12824 150 + GGCGT 0.000 1.00 0 1 0 0 0.000 0.793 C12824 160 + ATCGC 0.000 2.00 0 2 2 2 0.000 0.658 C12824 183 + TTCGA 0.000 2.00 0 2 2 2 0.000 0.658 C12832 9 + TGCGT 0.000 1.00 0 1 1 1 0.000 0.793 C12832 25 + AGCGC 0.000 1.00 0 1 1 1 0.000 0.793 C12832 129 + GTCGA 0.000 2.00 0 2 2 2 0.000 0.658 C12842 123 + CACGA 0.000 1.00 0 1 1 1 0.000 0.793 C12860 54 + AACGT 0.000 1.00 0 1 1 1 0.000 0.793
!awk '{print $1,$2,$2+1,$3,$8,($7/$8),(1-($7/$8))}' </Volumes/web/cnidarian/YE_mix_22sm_methratio_CG.txt> /Volumes/web/cnidarian/YE_mix_22sm_methkit_outCG.txt
!head /Volumes/web/cnidarian/YE_mix_22sm_methkit_outCG.txt
C12722 104 105 + 2 0.5 0.5 C12728 40 41 + 1 0 1 C12728 62 63 + 1 0 1 C12734 61 62 + 1 0 1 C12734 136 137 + 2 0 1 C12734 178 179 + 1 0 1 C12734 183 184 + 1 0 1 C12748 71 72 + 1 0 1 C12758 82 83 + 1 1 0 C12758 125 126 + 1 1 0
#convert space to tab
!tr ' ' "\t" </Volumes/web/cnidarian/YE_mix_22sm_methkit_outCG.txt> /Volumes/web/cnidarian/YE_mix_22sm_methkit.txt
!head /Volumes/web/cnidarian/YE_mix_22sm_methkit.txt
C12722 104 105 + 2 0.5 0.5 C12728 40 41 + 1 0 1 C12728 62 63 + 1 0 1 C12734 61 62 + 1 0 1 C12734 136 137 + 2 0 1 C12734 178 179 + 1 0 1 C12734 183 184 + 1 0 1 C12748 71 72 + 1 0 1 C12758 82 83 + 1 1 0 C12758 125 126 + 1 1 0
#processing control sample
!awk '{print $1,$2,$2+1,$3,$8,($7/$8),(1-($7/$8))}' </Volumes/web/cnidarian/YE_control_22sm_methratio_CG.txt> /Volumes/web/cnidarian/YE_control_22sm_methkit_outCG.txt
!tr ' ' "\t" </Volumes/web/cnidarian/YE_control_22sm_methkit_outCG.txt> /Volumes/web/cnidarian/YE_control_22sm_methkit.txt
!head /Volumes/web/cnidarian/YE_control_22sm_methkit.txt
C12802 82 83 + 1 0 1 C12802 90 91 + 1 0 1 C12824 150 151 + 1 0 1 C12824 160 161 + 2 0 1 C12824 183 184 + 2 0 1 C12832 9 10 + 1 0 1 C12832 25 26 + 1 0 1 C12832 129 130 + 2 0 1 C12842 123 124 + 1 0 1 C12860 54 55 + 1 0 1
#filter mk files for 3x coverage
!awk '{if ($5 >= 3) print $1,$2,$3,$4,$5,$6,$7}' </Volumes/web/cnidarian/YE_mix_22sm_methkit.txt> /Volumes/web/cnidarian/YE_mix_22sm_methkit3x.txt
!head /Volumes/web/cnidarian/YE_mix_22sm_methkit3x.txt
C12764 119 120 + 3 0 1 C12764 129 130 + 4 0 1 C12764 132 133 + 4 0 1 C12778 114 115 + 5 0 1 C12802 119 120 + 4 0 1 C12860 105 106 + 3 0 1 C12860 113 114 + 4 0 1 C12860 116 117 + 4 0 1 C12860 121 122 + 3 0 1 C12860 159 160 + 3 0 1
#filter mk files for 3x coverage
!awk '{if ($5 >= 3) print $1,$2,$3,$4,$5,$6,$7}' </Volumes/web/cnidarian/YE_control_22sm_methkit.txt> /Volumes/web/cnidarian/YE_control_22sm_methkit3x.txt
!head /Volumes/web/cnidarian/YE_control_22sm_methkit3x.txt
C13288 101 102 + 4 0 1 C13288 108 109 + 4 0 1 C13448 37 38 + 3 0 1 C13800 98 99 + 3 0 1 C14308 114 115 + 3 0 1 C14308 121 122 + 3 0 1 C14966 94 95 + 3 0 1 C15066 82 83 + 3 0.333333 0.666667 C15066 89 90 + 3 0 1 C15066 261 262 + 4 0 1
%pylab inline
Populating the interactive namespace from numpy and matplotlib
%load_ext rpy2.ipython
%R library(methylKit)
<StrVector - Python:0x10a62c560 / R:0x10f303400> [str, str, str, ..., str, str, str]
%R library(data.table)
data.table 1.9.2 For help type: help("data.table")
%R library(GenomicRanges)
Loading required package: BiocGenerics Loading required package: parallel Attaching package: ‘BiocGenerics’ The following objects are masked from ‘package:parallel’: clusterApply, clusterApplyLB, clusterCall, clusterEvalQ, clusterExport, clusterMap, parApply, parCapply, parLapply, parLapplyLB, parRapply, parSapply, parSapplyLB The following object is masked from ‘package:stats’: xtabs The following objects are masked from ‘package:base’: anyDuplicated, append, as.data.frame, as.vector, cbind, colnames, duplicated, eval, evalq, Filter, Find, get, intersect, is.unsorted, lapply, Map, mapply, match, mget, order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce, rep.int, rownames, sapply, setdiff, sort, table, tapply, union, unique, unlist Loading required package: IRanges Loading required package: XVector Attaching package: ‘GenomicRanges’ The following object is masked from ‘package:data.table’: last
!head /Volumes/web/cnidarian/YE_mix_22sm_methkit3x.txt
C12764 119 120 + 3 0 1 C12764 129 130 + 4 0 1 C12764 132 133 + 4 0 1 C12778 114 115 + 5 0 1 C12802 119 120 + 4 0 1 C12860 105 106 + 3 0 1 C12860 113 114 + 4 0 1 C12860 116 117 + 4 0 1 C12860 121 122 + 3 0 1 C12860 159 160 + 3 0 1
%%R file.list <- list
('/Volumes/web/cnidarian/YE_mix_22sm_methkit3x.txt',
'/Volumes/web/cnidarian/YE_control_22sm_methkit3x.txt')
%R myobj=read(file.list,sample.id=list("mix","control"),assembly="v9",treatment=c(0,1))
<ListVector - Python:0x10a62c2d8 / R:0x10f94c588> [ListVector, ListVector] <ListVector - Python:0x10a62c2d8 / R:0x10f94c588> [ListVector, ListVector] <ListVector - Python:0x10a62c2d8 / R:0x10f94c588> [ListVector, ListVector]
%%R
meth<-unite(myobj)
Error in dev.off() : QuartzBitmap_Output - unable to open file '/var/folders/28/07h2q2ms7nd609f1ft3mj1240000gn/T/tmpHGwHzf/Rplots001.png'
%%R
meth<-unite(myobj)
head(meth)
nrow(meth)
getCorrelation(meth,plot=T)
hc<- clusterSamples(meth, dist="correlation", method="ward", plot=T)
PCA<-PCASamples(meth)
mix control mix 1 NA control NA 1 KernSmooth 2.23 loaded Copyright M. P. Wand 1997-2009 Error in quantile.default(sds, sd.threshold) : missing values and NaN's not allowed if 'na.rm' is FALSE
--------------------------------------------------------------------------- RRuntimeError Traceback (most recent call last) <ipython-input-10-b640763ed36e> in <module>() ----> 1 get_ipython().run_cell_magic(u'R', u'', u'meth<-unite(myobj)\nhead(meth)\nnrow(meth)\ngetCorrelation(meth,plot=T)\nhc<- clusterSamples(meth, dist="correlation", method="ward", plot=T)\nPCA<-PCASamples(meth)') /Users/sr320/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.pyc in run_cell_magic(self, magic_name, line, cell) 2160 magic_arg_s = self.var_expand(line, stack_depth) 2161 with self.builtin_trap: -> 2162 result = fn(magic_arg_s, cell) 2163 return result 2164 /Users/sr320/anaconda/lib/python2.7/site-packages/rpy2/ipython/rmagic.pyc in R(self, line, cell, local_ns) /Users/sr320/anaconda/lib/python2.7/site-packages/IPython/core/magic.pyc in <lambda>(f, *a, **k) 191 # but it's overkill for just that one bit of state. 192 def magic_deco(arg): --> 193 call = lambda f, *a, **k: f(*a, **k) 194 195 if callable(arg): /Users/sr320/anaconda/lib/python2.7/site-packages/rpy2/ipython/rmagic.pyc in R(self, line, cell, local_ns) 634 finally: 635 if self.device in ['png', 'svg']: --> 636 ro.r('dev.off()') 637 638 if text_output: /Users/sr320/anaconda/lib/python2.7/site-packages/rpy2/robjects/__init__.pyc in __call__(self, string) 244 def __call__(self, string): 245 p = rinterface.parse(string) --> 246 res = self.eval(p) 247 return res 248 /Users/sr320/anaconda/lib/python2.7/site-packages/rpy2/robjects/functions.pyc in __call__(self, *args, **kwargs) 164 v = kwargs.pop(k) 165 kwargs[r_k] = v --> 166 return super(SignatureTranslatedFunction, self).__call__(*args, **kwargs) 167 168 pattern_link = re.compile(r'\\link\{(.+?)\}') /Users/sr320/anaconda/lib/python2.7/site-packages/rpy2/robjects/functions.pyc in __call__(self, *args, **kwargs) 97 for k, v in kwargs.items(): 98 new_kwargs[k] = conversion.py2ri(v) ---> 99 res = super(Function, self).__call__(*new_args, **new_kwargs) 100 res = conversion.ri2ro(res) 101 return res RRuntimeError: Error in dev.off() : QuartzBitmap_Output - unable to open file '/var/folders/28/07h2q2ms7nd609f1ft3mj1240000gn/T/tmpHGwHzf/Rplots001.png'
Install should only have to occur once.
source("http://methylkit.googlecode.com/files/install.methylKit.R")
# installing the package with the dependencies
install.methylKit(ver="0.9.2",dependencies=TRUE)
library(methylKit)
library(data.table)
library(GenomicRanges)
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
##
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
##
## The following object is masked from 'package:stats':
##
## xtabs
##
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, as.vector, cbind,
## colnames, duplicated, eval, evalq, Filter, Find, get,
## intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rep.int, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unlist
##
## Loading required package: IRanges
## Loading required package: XVector
##
## Attaching package: 'GenomicRanges'
##
## The following object is masked from 'package:data.table':
##
## last
file.list <- list('/Volumes/web/cnidarian/YE_control_22sm_methkit3x.txt','/Volumes/web/cnidarian/YE_mix_22sm_methkit3x.txt')
myobj=read(file.list,sample.id=list("mix","control"),assembly="ensembl",treatment=c(1,0))
meth<-unite(myobj)
nrow(meth)
## [1] 309616
head(meth)
## methylBase object with 6 rows
## --------------
## chr start end strand coverage1 numCs1 numTs1 coverage2 numCs2 numTs2
## 1 5 6 6 + 11 0 0 110 0 1
## 2 8 9 9 + 5 0 0 4 0 0
## 3 8 9 9 + 5 0 0 8 0 0
## 4 9 10 10 + 6 0 0 35 0 0
## 5 9 10 10 + 6 0 0 3 0 0
## 6 13 14 14 + 3 0 0 3 0 0
## --------------
## sample.ids: mix control
## destranded FALSE
## assembly: ensembl
## context: CpG
## treament: 1 0
## resolution: base
myDiff=calculateDiffMeth(meth)
head(myDiff)
## methylDiff object with 6 rows
## --------------
## chr start end strand pvalue qvalue meth.diff
## 1 5 6 6 + 1 0 0
## 2 8 9 9 + 1 0 0
## 3 8 9 9 + 1 0 0
## 4 9 10 10 + 1 0 0
## 5 9 10 10 + 1 0 0
## 6 13 14 14 + 1 0 0
## --------------
## sample.ids: mix control
## destranded FALSE
## assembly: ensembl
## context: CpG
## treament: 1 0
## resolution: base
write.csv(myDiff, file="/Volumes/web/cnidarian/YE_mix_diff3.csv")
## Warning: Setting class(x) to NULL; result will no longer be an S4 object