#!/usr/bin/env python # coding: utf-8 #

ipyrad-analysis toolkit: tetrad

# # The `tetrad` tool is a framework for inferring a species tree topology using quartet-joining on large unlinked SNP data sets. It is particularly optimized for RAD-seq type datasets that are likely to involve a lot of missing data. # ### Load libraries # In[38]: # conda install ipyrad -c conda-forge -c bioconda # conda install tetrad -c conda-forge # In[2]: import ipyrad.analysis as ipa import toytree import ipcoal # ### Simulate a random tree with 20 tips and crown age of 10M generations # In[15]: tree = toytree.rtree.bdtree(ntips=20, seed=555) tree = tree.mod.node_scale_root_height(10e6) tree.draw(scalebar=True); # ### Simulate SNPs with missing data and write to database (.seqs.hdf5) # In[16]: # init simulator with one diploid sample from each tip model = ipcoal.Model(tree, Ne=1e6, nsamples=2, recomb=0) # simulate sequence data on 10K loci model.sim_loci(10000, 50) # add missing data (50%) model.apply_missing_mask(0.5) # write results to database file model.write_snps_to_hdf5(name="test-tet-miss50", outdir='/tmp', diploid=True) # ### Infer tetrad tree # In[17]: SNPS = "/tmp/test-tet-miss50.snps.hdf5" # In[19]: tet = ipa.tetrad( data=SNPS, name="test-tet-miss50", workdir="/tmp", nboots=10, nquartets=1e6, ) # In[20]: tet.run(auto=True, force=True) # ### Draw the inferred tetrad tree # In[30]: tre = toytree.tree(tet.trees.cons) rtre = tre.root(["r19", "r18", "r17"]) rtre.draw(ts='d', use_edge_lengths=False, node_labels="support"); # ### Does this tree match the *true* species tree? # In[37]: rfdist = rtre.treenode.robinson_foulds(tree.treenode)[0] rfdist == 0