cd /mnt
NetID='titus'
mkdir -p $NetID
cd $NetID
You should now be in '/mnt/USERNAME', where USERNAME is not CHANGEME
# configure some stuff...
import sys
sys.path.insert(0, '/usr/local/share/khmer/python')
import os
os.environ['PYTHONPATH'] = '/usr/local/share/khmer/python'
import khmer
import random
random.seed(1)
x = ["A"] + ["G"] + ["C"] + ["T"]
x = x*1000
random.shuffle(x)
x = "".join(x)
y = ["A"] + ["G"] + ["C"] + ["T"]
y = y*1000
random.shuffle(y)
y = "".join(y)
print 'x is', x[:100]
print 'y is', y[:100]
outfp = open('metagenome.fa', 'w')
print >>outfp, ">x 1"
print >>outfp, x
print >>outfp, ">y 2"
print >>outfp, y
outfp.close()
!python /usr/local/share/2012-paper-diginorm/pipeline/make-biased-reads.py metagenome.fa | head -100000 > reads.fa
(Yes, you should see an error.)
!/usr/local/share/khmer/scripts/abundance-dist-single.py -k 20 -x 1e8 reads.fa reads.hist
histdata = numpy.loadtxt('reads.hist')
plot(histdata[:,0], histdata[:,1])
xlabel("k-mer abundance")
ylabel("N of k-mers with that abundance")
plot(histdata[:,0], histdata[:,1])
axis(ymax=500)
xlabel("k-mer abundance")
ylabel("N of k-mers with that abundance")
plot(histdata[:,0], histdata[:,1])
axis(xmax=10)
How do the 3 peaks (1, 100, 800) shift relative to each other with (a) changing coverage and (b) different k values?
The '-k' parameter is set up above in the abundance-dist-single command; the coverage is set by how many sequences you keep ('head -10000') in the make-biased-reads.py command.