## conda install ipyrad -c ipyrad
## conda install toytree -c eaton-lab
## conda install bpp -c ipyrad
import toytree
import toyplot.svg
import ipyrad as ip
import ipyrad.analysis as ipa
print "ipyrad v.{}".format(ip.__version__)
ipyrad v.0.7.22
import ipyparallel as ipp
ipyclient = ipp.Client()
ip.cluster_info(ipyclient)
host compute node: [40 cores] on sacra
## the subset of six taxa used in BPP analyses, 4 per taxon (or 2)
## exclude hybrid taxon SF172
## exclude D12963 b/c it was not consistently placed in 3B vs 3C vs. sister to both
imap = {
"1A": ['SF175', 'SF328', 'SF200'],
"1B": ['SF209', 'D13052'],
"1C": ['D14528', 'SF276', 'SF286'],
"2A": ['D13101', 'D13103', 'D14482', 'D14483'],
"2B": ['D14504', 'D14505', 'D14506'],
"2C": ['D14477', 'D14478', 'D14480', 'D14485', 'D14501', 'D14513'],
"3A": ['D13090', 'D12950'],
"3B": ['SF155', 'SF224', 'SF228', '5573', 'SF327'],
"3C": ['SF164', 'SF153', 'SF160', 'D13053', 'D13063', 'D13075', 'D13097', 'SF197'],
}
## make a dictionary with min values to filter loci to those with N samples per species.
minmap = {
"1A": 3,
"1B": 2,
"1C": 3,
"2A": 4,
"2B": 3,
"2C": 4,
"3A": 2,
"3B": 4,
"3C": 4,
}
## Species tree hypothesis ('guide tree') based on raxml & bucky results
tree9 = "((1A,(1B,1C)),((2A,(2B,2C)),(3A,(3B,3C))));"
toytree.tree(tree9).draw(use_edge_lengths=False);
## initial bpp object
bpp00 = ipa.bpp(
name="base",
data="analysis-ipyrad/Canarium-min10_outfiles/Canarium-min10.alleles.loci",
imap=imap,
minmap=minmap,
guidetree=tree9,
)
## filtering
bpp00.filters.maxloci = 100
## bpp params
bpp00.params.burnin = 10000
bpp00.params.nsample = 100000
bpp00.params.sampfreq = 10
## run object 1
b1 = bpp00.copy("bpp00_theta_2_2000_tau_2_2000")
b1.params.thetaprior = (2, 2000)
b1.params.tauprior = (2, 2000, 1)
## run object 2
b2 = bpp00.copy("bpp00_theta_2_2000_tau_2_200")
b2.params.thetaprior = (2, 2000)
b2.params.tauprior = (2, 200, 1)
## run object 3
b3 = bpp00.copy("bpp00_theta_2_200_tau_2_2000")
b3.params.thetaprior = (2, 200)
b3.params.tauprior = (2, 2000, 1)
## run object 4
b4 = bpp00.copy("bpp00_theta_2_200_tau_2_200")
b4.params.thetaprior = (2, 200)
b4.params.tauprior = (2, 200, 1)
## put all into a list
b00list = [b1, b2, b3, b4]
## submit 5 replicates of each job
for job in b00list:
job.run(ipyclient, nreps=5, randomize_order=True, force=True)
submitted 5 bpp jobs [bpp00_theta_2_2000_tau_2_2000] (100 loci) submitted 5 bpp jobs [bpp00_theta_2_2000_tau_2_200] (100 loci) submitted 5 bpp jobs [bpp00_theta_2_200_tau_2_2000] (100 loci) submitted 5 bpp jobs [bpp00_theta_2_200_tau_2_200] (100 loci)
## create prior-only run objects
b00plist = [i.copy(i.name+"-prior-only") for i in b00list]
## set no-data parameter on them
for job in b00plist:
job.params.usedata = 0
## submit jobs to run (only needs a single rep each)
for job in b00plist:
job.run(ipyclient, randomize_order=True, force=True)
submitted 1 bpp jobs [bpp00_theta_2_2000_tau_2_2000-prior-only] (100 loci) submitted 1 bpp jobs [bpp00_theta_2_2000_tau_2_200-prior-only] (100 loci) submitted 1 bpp jobs [bpp00_theta_2_200_tau_2_2000-prior-only] (100 loci) submitted 1 bpp jobs [bpp00_theta_2_200_tau_2_200-prior-only] (100 loci)
## create delim objects as copies of bpp00s
b01list = [i.copy(i.name.replace("bpp00", "bpp01")) for i in b00list]
## modify params on the delim objs
for job in b01list:
job.params.infer_delimit = 1
job.params.delimit_alg = (0, 5)
## submit jobs to run
for job in b01list:
job.run(ipyclient, nreps=5, randomize_order=True, force=True)
submitted 5 bpp jobs [bpp01_theta_2_2000_tau_2_2000] (100 loci) submitted 5 bpp jobs [bpp01_theta_2_2000_tau_2_200] (100 loci) submitted 5 bpp jobs [bpp01_theta_2_200_tau_2_2000] (100 loci) submitted 5 bpp jobs [bpp01_theta_2_200_tau_2_200] (100 loci)
## create prior-only run objects
b01plist = [i.copy(i.name+"-prior-only") for i in b01list]
## set no-data parameter on them
for job in b01plist:
job.params.usedata = 0
## submit jobs to run (only needs a single rep each)
for job in b01plist:
job.run(ipyclient, randomize_order=True, force=True)
submitted 1 bpp jobs [bpp01_theta_2_2000_tau_2_2000-prior-only] (100 loci) submitted 1 bpp jobs [bpp01_theta_2_2000_tau_2_200-prior-only] (100 loci) submitted 1 bpp jobs [bpp01_theta_2_200_tau_2_2000-prior-only] (100 loci) submitted 1 bpp jobs [bpp01_theta_2_200_tau_2_200-prior-only] (100 loci)
Here I access the result files from th bpp objects themselves. Alternatively you could enter the path to the results files.
## set options to print prettier tables
## (this suppresses scientific notation)
import pandas as pd
import numpy as np
pd.set_option('display.float_format', lambda x: '%.4f' % x)
The first question is whether the replicate runs for the same prior values have converged on similar parameter values. If so, then we should feel OK with combining them and reporting average parameter values as our results.
## get mean parameters across different values for the prior with and without data
all00 = pd.DataFrame(
{job.name: job.summarize_results()["mean"] for job in b00list + b00plist})
## save result to file for 200 2000
outdf = all00[["bpp00_theta_2_200_tau_2_2000", "bpp00_theta_2_200_tau_2_2000-prior-only"]]
outdf.columns = ["with-data", "prior-only"]
outdf
with-data | prior-only | |
---|---|---|
lnL | -11006.5848 | nan |
tau_101A1B1C2A2B2C3A3B3C | 0.0011 | 0.0014 |
tau_111A1B1C | 0.0007 | 0.0009 |
tau_121B1C | 0.0004 | 0.0005 |
tau_132A2B2C3A3B3C | 0.0010 | 0.0012 |
tau_142A2B2C | 0.0009 | 0.0008 |
tau_152B2C | 0.0007 | 0.0004 |
tau_163A3B3C | 0.0007 | 0.0008 |
tau_173B3C | 0.0004 | 0.0004 |
theta_101A1B1C2A2B2C3A3B3C | 0.0020 | 0.0090 |
theta_111A1B1C | 0.0033 | 0.0096 |
theta_11A | 0.0027 | 0.0101 |
theta_121B1C | 0.0094 | 0.0101 |
theta_132A2B2C3A3B3C | 0.0098 | 0.0095 |
theta_142A2B2C | 0.0074 | 0.0100 |
theta_152B2C | 0.0099 | 0.0103 |
theta_163A3B3C | 0.0047 | 0.0095 |
theta_173B3C | 0.0057 | 0.0091 |
theta_21B | 0.0011 | 0.0099 |
theta_31C | 0.0016 | 0.0095 |
theta_42A | 0.0030 | 0.0093 |
theta_52B | 0.0012 | 0.0100 |
theta_62C | 0.0119 | 0.0104 |
theta_73A | 0.0013 | 0.0101 |
theta_83B | 0.0084 | 0.0102 |
theta_93C | 0.0036 | 0.0097 |
These tables show the mean posteriors the five replicates for each set of priors values.
for job in b01list:
print job.name
print job.summarize_results()
print ""
bpp01_theta_2_2000_tau_2_2000 delim prior posterior nspecies 0 00000000 0.03226 0.0 1 1 10000000 0.03226 0.0 2 2 10010000 0.03226 0.0 3 3 10010010 0.03226 0.0 4 4 10010011 0.03226 0.0 5 5 10011000 0.03226 0.0 4 6 10011010 0.03226 0.0 5 7 10011011 0.03226 0.0 6 8 10011100 0.03226 0.0 5 9 10011110 0.03226 0.0 6 10 10011111 0.03226 0.007156 7 11 11000000 0.03226 0.0 3 12 11010000 0.03226 0.0 4 13 11010010 0.03226 0.0 5 14 11010011 0.03226 0.0 6 15 11011000 0.03226 0.0 5 16 11011010 0.03226 0.0 6 17 11011011 0.03226 0.0 7 18 11011100 0.03226 0.0 6 19 11011110 0.03226 0.0 7 20 11011111 0.03226 0.060192 8 21 11100000 0.03226 0.0 4 22 11110000 0.03226 0.0 5 23 11110010 0.03226 0.0 6 24 11110011 0.03226 0.0 7 25 11111000 0.03226 0.0 6 26 11111010 0.03226 0.0 7 27 11111011 0.03226 0.0 8 28 11111100 0.03226 0.0 7 29 11111110 0.03226 0.0 8 30 11111111 0.03226 0.932652 9 bpp01_theta_2_2000_tau_2_200 delim prior posterior nspecies 0 00000000 0.03226 0.0 1 1 10000000 0.03226 0.0 2 2 10010000 0.03226 0.0 3 3 10010010 0.03226 0.0 4 4 10010011 0.03226 0.0 5 5 10011000 0.03226 0.0 4 6 10011010 0.03226 0.0 5 7 10011011 0.03226 0.0 6 8 10011100 0.03226 0.0 5 9 10011110 0.03226 0.0 6 10 10011111 0.03226 0.020182 7 11 11000000 0.03226 0.0 3 12 11010000 0.03226 0.0 4 13 11010010 0.03226 0.0 5 14 11010011 0.03226 0.0 6 15 11011000 0.03226 0.0 5 16 11011010 0.03226 0.0 6 17 11011011 0.03226 0.0 7 18 11011100 0.03226 0.0 6 19 11011110 0.03226 0.0 7 20 11011111 0.03226 0.07754 8 21 11100000 0.03226 0.0 4 22 11110000 0.03226 0.0 5 23 11110010 0.03226 0.0 6 24 11110011 0.03226 0.0 7 25 11111000 0.03226 0.0 6 26 11111010 0.03226 0.0 7 27 11111011 0.03226 0.0 8 28 11111100 0.03226 0.0 7 29 11111110 0.03226 0.0 8 30 11111111 0.03226 0.902278 9 bpp01_theta_2_200_tau_2_2000 delim prior posterior nspecies 0 00000000 0.03226 0.0 1 1 10000000 0.03226 0.0 2 2 10010000 0.03226 0.0 3 3 10010010 0.03226 0.0 4 4 10010011 0.03226 0.0 5 5 10011000 0.03226 0.0 4 6 10011010 0.03226 0.0 5 7 10011011 0.03226 0.0 6 8 10011100 0.03226 0.0 5 9 10011110 0.03226 0.0 6 10 10011111 0.03226 0.012494 7 11 11000000 0.03226 0.0 3 12 11010000 0.03226 0.0 4 13 11010010 0.03226 0.0 5 14 11010011 0.03226 0.0 6 15 11011000 0.03226 0.0 5 16 11011010 0.03226 0.0 6 17 11011011 0.03226 0.0 7 18 11011100 0.03226 0.0 6 19 11011110 0.03226 0.0 7 20 11011111 0.03226 0.02224 8 21 11100000 0.03226 0.0 4 22 11110000 0.03226 0.0 5 23 11110010 0.03226 0.0 6 24 11110011 0.03226 0.0 7 25 11111000 0.03226 0.0 6 26 11111010 0.03226 0.0 7 27 11111011 0.03226 0.0 8 28 11111100 0.03226 0.0 7 29 11111110 0.03226 0.0 8 30 11111111 0.03226 0.965266 9 bpp01_theta_2_200_tau_2_200 delim prior posterior nspecies 0 00000000 0.03226 0.0 1 1 10000000 0.03226 0.0 2 2 10010000 0.03226 0.0 3 3 10010010 0.03226 0.0 4 4 10010011 0.03226 0.0 5 5 10011000 0.03226 0.0 4 6 10011010 0.03226 0.0 5 7 10011011 0.03226 0.0 6 8 10011100 0.03226 0.0 5 9 10011110 0.03226 0.0 6 10 10011111 0.03226 0.000128 7 11 11000000 0.03226 0.0 3 12 11010000 0.03226 0.0 4 13 11010010 0.03226 0.0 5 14 11010011 0.03226 0.0 6 15 11011000 0.03226 0.0 5 16 11011010 0.03226 0.0 6 17 11011011 0.03226 0.0 7 18 11011100 0.03226 0.0 6 19 11011110 0.03226 0.0 7 20 11011111 0.03226 0.078476 8 21 11100000 0.03226 0.0 4 22 11110000 0.03226 0.0 5 23 11110010 0.03226 0.0 6 24 11110011 0.03226 0.0 7 25 11111000 0.03226 0.0 6 26 11111010 0.03226 0.0 7 27 11111011 0.03226 0.0 8 28 11111100 0.03226 0.0 7 29 11111110 0.03226 0.0 8 30 11111111 0.03226 0.921396 9