ipyrad.analysis simulations
¶This notebook demonstrates how to use the baba
module in ipyrad
to test for admixture and introgression. The code is written in Python and is best implemented in a Jupyter-notebook like this one. Analyses can be split up among many computing cores. Finally, there are some simple plotting methods to accompany tests which I show below.
## imports
import numpy as np
import ipyrad as ip
import ipyparallel as ipp
from ipyrad.analysis import baba
## print versions
print "ipyrad v.{}".format(ip.__version__)
print "ipyparallel v.{}".format(ipp.__version__)
print "numpy v.{}".format(np.__version__)
ipyrad v.0.6.11 ipyparallel v.6.0.2 numpy v.1.12.0
You can find more details about this in our [ipyparallel tutorial].
## start 4 engines by running the commented line below in a separate bash terminal.
##
## ipcluster start --n=4
## connect to client and print connection summary
ipyclient = ipp.Client()
print ip.cluster_info(client=ipyclient)
host compute node: [4 cores] on oud
The function baba.Tree()
takes a newick
string as an argument, or, if no value is passed, it constructs the default 12 taxon tree shown below. The Tree
Class object holds a representation of the tree, which can be used for plotting, as well as for describing a demographic model for simulating data, as we'll show below.
tre.draw(width=400, height=200);
<module 'ipyrad.analysis.baba' from '/home/deren/miniconda2/lib/python2.7/site-packages/ipyrad/analysis/baba.pyc'>
## tree2dict
import ete3
newick = "./analysis_tetrad/pedtest1.full.tre"
## load the newick string as a Tree object
tre = ete3.Tree(newick)
## set przewalskiis as the outgroup
prz = [i for i in tre.get_leaves() if "prz" in i.name]
out = tre.get_common_ancestor(prz)
## set new outgroup and store back as a newick string
tre.set_outgroup(out)
newick = tre.write()
def test_constraint(node, cdict, tip, exact):
names = set(node.get_leaf_names())
const = set(cdict[tip])
if const:
if exact:
if len(names.intersection(const)) == len(const):
return 1
else:
return 0
else:
if len(names.intersection(const)) == len(names):
return 1
else:
return 0
return 1
## constraints
#if not constraint_dict:
cdict = {"p1":[], "p2":[], "p3":[], "p4":[]}
cdict.update({"p4":['a']})
cdict
{'p1': [], 'p2': [], 'p3': [], 'p4': ['a']}
def tree2tests(newick, constraint_dict=None, constraint_exact=True):
"""
Returns dict of all possible four-taxon splits in a tree. Assumes
the user has entered a rooted tree. Skips polytomies.
"""
## make tree
tree = ete3.Tree(newick)
## constraints
#if not constraint_dict:
cdict = {"p1":[], "p2":[], "p3":[], "p4":[]}
cdict.update(constraint_dict)
print cdict
## traverse root to tips. Treat the left as outgroup, then the right.
tests = []
## topnode must have children
for topnode in tree.traverse("levelorder"):
## test onode as either child
for oparent in topnode.children:
## test outgroup as all descendants on one child branch
for onode in oparent.traverse():
## put constraints on onode
if test_constraint(onode, cdict, "p4", constraint_exact):
## p123 parent is sister to oparent
p123parent = oparent.get_sisters()[0]
## test p3 as all descendants of p123parent
for p3parent in p123parent.children:
## p12 parent is sister to p3parent
p12parent = p3parent.get_sisters()[0]
for p3node in p3parent.traverse():
if test_constraint(p3node, cdict, "p3", constraint_exact):
if p12parent.children:
p1parent, p2parent = p12parent.children
for p2node in p2parent.traverse():
if test_constraint(p2node, cdict, "p2", constraint_exact):
for p1node in p1parent.traverse():
if test_constraint(p1node, cdict, "p1", constraint_exact):
test = {}
test['p4'] = onode.get_leaf_names()
test['p3'] = p3node.get_leaf_names()
test['p2'] = p2node.get_leaf_names()
test['p1'] = p1node.get_leaf_names()
tests.append(test)
return tests
tests = tree2tests(newick,
constraint_dict={"p4":['32082_przewalskii', '33588_przewalskii'],
"p3":['30686_cyathophylla']},
constraint_exact=True)
len(tests)
{'p2': [], 'p3': ['30686_cyathophylla'], 'p1': [], 'p4': ['32082_przewalskii', '33588_przewalskii']}
33
tests
[{'p1': ['38362_rex', '39618_rex', '35855_rex', '40578_rex', '30556_thamno', '35236_rex'], 'p2': ['33413_thamno'], 'p3': ['29154_superba', '30686_cyathophylla', '41478_cyathophylloides', '41954_cyathophylloides'], 'p4': ['32082_przewalskii', '33588_przewalskii']}, {'p1': ['38362_rex', '39618_rex', '35855_rex', '40578_rex'], 'p2': ['33413_thamno'], 'p3': ['29154_superba', '30686_cyathophylla', '41478_cyathophylloides', '41954_cyathophylloides'], 'p4': ['32082_przewalskii', '33588_przewalskii']}, {'p1': ['30556_thamno', '35236_rex'], 'p2': ['33413_thamno'], 'p3': ['29154_superba', '30686_cyathophylla', '41478_cyathophylloides', '41954_cyathophylloides'], 'p4': ['32082_przewalskii', '33588_przewalskii']}, {'p1': ['38362_rex', '39618_rex'], 'p2': ['33413_thamno'], 'p3': ['29154_superba', '30686_cyathophylla', '41478_cyathophylloides', '41954_cyathophylloides'], 'p4': ['32082_przewalskii', '33588_przewalskii']}, {'p1': ['35855_rex', '40578_rex'], 'p2': ['33413_thamno'], 'p3': ['29154_superba', '30686_cyathophylla', '41478_cyathophylloides', '41954_cyathophylloides'], 'p4': ['32082_przewalskii', '33588_przewalskii']}, {'p1': ['30556_thamno'], 'p2': ['33413_thamno'], 'p3': ['29154_superba', '30686_cyathophylla', '41478_cyathophylloides', '41954_cyathophylloides'], 'p4': ['32082_przewalskii', '33588_przewalskii']}, {'p1': ['35236_rex'], 'p2': ['33413_thamno'], 'p3': ['29154_superba', '30686_cyathophylla', '41478_cyathophylloides', '41954_cyathophylloides'], 'p4': ['32082_przewalskii', '33588_przewalskii']}, {'p1': ['38362_rex'], 'p2': ['33413_thamno'], 'p3': ['29154_superba', '30686_cyathophylla', '41478_cyathophylloides', '41954_cyathophylloides'], 'p4': ['32082_przewalskii', '33588_przewalskii']}, {'p1': ['39618_rex'], 'p2': ['33413_thamno'], 'p3': ['29154_superba', '30686_cyathophylla', '41478_cyathophylloides', '41954_cyathophylloides'], 'p4': ['32082_przewalskii', '33588_przewalskii']}, {'p1': ['35855_rex'], 'p2': ['33413_thamno'], 'p3': ['29154_superba', '30686_cyathophylla', '41478_cyathophylloides', '41954_cyathophylloides'], 'p4': ['32082_przewalskii', '33588_przewalskii']}, {'p1': ['40578_rex'], 'p2': ['33413_thamno'], 'p3': ['29154_superba', '30686_cyathophylla', '41478_cyathophylloides', '41954_cyathophylloides'], 'p4': ['32082_przewalskii', '33588_przewalskii']}, {'p1': ['38362_rex', '39618_rex', '35855_rex', '40578_rex', '30556_thamno', '35236_rex'], 'p2': ['33413_thamno'], 'p3': ['29154_superba', '30686_cyathophylla'], 'p4': ['32082_przewalskii', '33588_przewalskii']}, {'p1': ['38362_rex', '39618_rex', '35855_rex', '40578_rex'], 'p2': ['33413_thamno'], 'p3': ['29154_superba', '30686_cyathophylla'], 'p4': ['32082_przewalskii', '33588_przewalskii']}, {'p1': ['30556_thamno', '35236_rex'], 'p2': ['33413_thamno'], 'p3': ['29154_superba', '30686_cyathophylla'], 'p4': ['32082_przewalskii', '33588_przewalskii']}, {'p1': ['38362_rex', '39618_rex'], 'p2': ['33413_thamno'], 'p3': ['29154_superba', '30686_cyathophylla'], 'p4': ['32082_przewalskii', '33588_przewalskii']}, {'p1': ['35855_rex', '40578_rex'], 'p2': ['33413_thamno'], 'p3': ['29154_superba', '30686_cyathophylla'], 'p4': ['32082_przewalskii', '33588_przewalskii']}, {'p1': ['30556_thamno'], 'p2': ['33413_thamno'], 'p3': ['29154_superba', '30686_cyathophylla'], 'p4': ['32082_przewalskii', '33588_przewalskii']}, {'p1': ['35236_rex'], 'p2': ['33413_thamno'], 'p3': ['29154_superba', '30686_cyathophylla'], 'p4': ['32082_przewalskii', '33588_przewalskii']}, {'p1': ['38362_rex'], 'p2': ['33413_thamno'], 'p3': ['29154_superba', '30686_cyathophylla'], 'p4': ['32082_przewalskii', '33588_przewalskii']}, {'p1': ['39618_rex'], 'p2': ['33413_thamno'], 'p3': ['29154_superba', '30686_cyathophylla'], 'p4': ['32082_przewalskii', '33588_przewalskii']}, {'p1': ['35855_rex'], 'p2': ['33413_thamno'], 'p3': ['29154_superba', '30686_cyathophylla'], 'p4': ['32082_przewalskii', '33588_przewalskii']}, {'p1': ['40578_rex'], 'p2': ['33413_thamno'], 'p3': ['29154_superba', '30686_cyathophylla'], 'p4': ['32082_przewalskii', '33588_przewalskii']}, {'p1': ['38362_rex', '39618_rex', '35855_rex', '40578_rex', '30556_thamno', '35236_rex'], 'p2': ['33413_thamno'], 'p3': ['30686_cyathophylla'], 'p4': ['32082_przewalskii', '33588_przewalskii']}, {'p1': ['38362_rex', '39618_rex', '35855_rex', '40578_rex'], 'p2': ['33413_thamno'], 'p3': ['30686_cyathophylla'], 'p4': ['32082_przewalskii', '33588_przewalskii']}, {'p1': ['30556_thamno', '35236_rex'], 'p2': ['33413_thamno'], 'p3': ['30686_cyathophylla'], 'p4': ['32082_przewalskii', '33588_przewalskii']}, {'p1': ['38362_rex', '39618_rex'], 'p2': ['33413_thamno'], 'p3': ['30686_cyathophylla'], 'p4': ['32082_przewalskii', '33588_przewalskii']}, {'p1': ['35855_rex', '40578_rex'], 'p2': ['33413_thamno'], 'p3': ['30686_cyathophylla'], 'p4': ['32082_przewalskii', '33588_przewalskii']}, {'p1': ['30556_thamno'], 'p2': ['33413_thamno'], 'p3': ['30686_cyathophylla'], 'p4': ['32082_przewalskii', '33588_przewalskii']}, {'p1': ['35236_rex'], 'p2': ['33413_thamno'], 'p3': ['30686_cyathophylla'], 'p4': ['32082_przewalskii', '33588_przewalskii']}, {'p1': ['38362_rex'], 'p2': ['33413_thamno'], 'p3': ['30686_cyathophylla'], 'p4': ['32082_przewalskii', '33588_przewalskii']}, {'p1': ['39618_rex'], 'p2': ['33413_thamno'], 'p3': ['30686_cyathophylla'], 'p4': ['32082_przewalskii', '33588_przewalskii']}, {'p1': ['35855_rex'], 'p2': ['33413_thamno'], 'p3': ['30686_cyathophylla'], 'p4': ['32082_przewalskii', '33588_przewalskii']}, {'p1': ['40578_rex'], 'p2': ['33413_thamno'], 'p3': ['30686_cyathophylla'], 'p4': ['32082_przewalskii', '33588_przewalskii']}]
## pass a newick string to the Tree class object
tre = baba.Tree(newick="((a,b),c);")
tre.draw(width=200, height=200);
Shown by an arrow pointing backwards from source to sink (indicating that geneflow should be entered as occurring backwards in time). This is how the program msprime
will interpret it.
## store newick tree and a migration event list [source, sink, start, stop, rate]
## if not edges are entered then it is converted to cladogram
newick = "(((a,b),Cow), (d,e));"
events = [['e', 'Cow', 0, 1, 1e-6],
['3', '1', 1, 2, 1e-6]]
## initiate Tree object with newick and admix args
tre = baba.Tree(newick=newick,
admix=events)
## show the tree
tre.draw(width=250, height=250, yaxis=True);
## a way of finding names xpos from verts
tre.verts[tre.verts[:, 1] == 0]
tre.tree.search_nodes(name='b')[0].idx
tre.verts[6, 0]
print [tre.verts[tre.tree.search_nodes(name=name)[0].idx, 0]
for name in tre.tree.get_leaf_names()]
print tre.tree.get_leaf_names()
[4.0, 3.0, 2.0, 1.0, 0.0] ['d', 'e', 'Cow', 'a', 'b']
## returns a Sim data object
sims = tre.simulate(nreps=10000, Ns=50000, gen=20)
## what is in the sims object? 4 attributes:
for key, val in sims.__dict__.items():
print key, val
sims <generator object _replicate_generator at 0x7fa709d3c870> debug 0 nreps 10000 names ['b', 'a', 'Cow', 'e', 'd']
baba._msp_to_arr(sims, test)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-10-3d3bdbaf406e> in <module>() ----> 1 baba._msp_to_arr(sims, test) NameError: name 'test' is not defined
def _msp_to_arr(Sim, test):
## the fixed tree dictionary
#fix = {j: [i, i+1] for j, i in zip(list("abcdefghijkl"), range(0, 24, 2))}
fix = {j: [i, i+1] for j, i in zip(Sim.names, range(0, len(Sim.names)*2, 2))}
## fill taxdict by test
keys = ['p1', 'p2', 'p3', 'p4']
arr = np.zeros((Sim.nreps, 4, 100))
## unless it's a 5-taxon test
if len(test) == 5:
arr = np.zeros((100000, 6, 100))
keys += ['p5']
## create array sampler for taxa
taxs = [test[key] for key in keys]
idxs = [list(itertools.chain(*[fix[j] for j in i])) for i in taxs]
print idxs
from ipyrad.analysis.baba import *
_msp_to_arr(sims, test)
[[2, 3], [0, 1], [4, 5], [6, 7, 8, 9]]
This is used just for demonstration. The matrix will have 2X as many columns as there are tips in the tree, corresponding to two alleles per individual. The number of rows is the number of variant sites simulated.
def print_variant_matrix(tree):
shape = tree.get_num_mutations(), tree.get_sample_size()
arr = np.empty(shape, dtype="u1")
for variant in tree.variants():
arr[variant.index] = variant.genotypes
print(arr)
## in order of ladderized tip names (2 copies per tip)
print_variant_matrix(sims.sims.next())
[[0 0 0 0 0 0 1 1 0 0] [1 1 1 1 0 0 0 0 0 0] [0 0 1 1 0 0 0 0 0 0] [1 0 1 1 0 0 0 0 0 0] [0 1 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 1 1 1 1]]
sims
<ipyrad.analysis.baba.Sim at 0x7f92881bbc50>
## simulate data on tree
#sims = tre.simulate(nreps=10000, Ns=50000, gen=20)
## pass sim object to baba
test = {
'p4': ['e', 'd'],
'p3': ['Cow'],
'p2': ['b'],
'p1': ['a'],
}
#baba.batch(sims, test, ipyclient=ipyclient)