A simple example of k-mer abundance distributions, diginorm, and partitioning

This is for the simulated data set in khmer, data/stamps.fa.gz. Look under examples/stamps/, script 'do.sh', to see how to build these data files.

In [1]:
reads = numpy.loadtxt('stamps/stamps-reads.hist')
part1 = numpy.loadtxt('stamps/stamps-part.g0.hist')
part2 = numpy.loadtxt('stamps/stamps-part.g1.hist')
dn = numpy.loadtxt('stamps/stamps-dn.hist')
dn3 = numpy.loadtxt('stamps/stamps-dn3.hist')
In [2]:
plot(reads[:,0], reads[:, 1], label='raw reads')
axis(ymax=200)
legend()
title("A fake metagenome (1:10)")
ylabel('N(k-mers at that abundance)')
xlabel('k-mer abundance')

savefig('stamps/stamps-reads.png')
In [3]:
plot(reads[:,0], reads[:, 1], label='raw reads')
plot(dn[:,0], dn[:, 1], label='diginorm to 10')
axis(ymax=200, xmax=400)
legend()
title("Normalizing metagenomic data")
ylabel('N(k-mers at that abundance)')
xlabel('k-mer abundance')

savefig('stamps/diginorm.png')
In [4]:
plot(reads[:,0], reads[:, 1], label='raw reads')
plot(dn3[:,0], dn3[:, 1], label='3-pass diginorm')
axis(xmax=50, ymax=2000)
legend()
title("Normalizing metagenomic data w/error trimming")
ylabel('N(k-mers at that abundance)')
xlabel('k-mer abundance')

savefig('stamps/diginorm-dn3.png')
In [5]:
plot(part1[:,0], part1[:, 1], label='partition A')
plot(part2[:,0], part2[:, 1], label='partition B')
axis(ymax=120)
legend()
title("Separating different genomes into partitions")
ylabel('N(k-mers at that abundance)')
xlabel('k-mer abundance')
savefig('stamps/stamps-partitions.png')
In [5]: