The data sets used in this notebook were generated with ipyrad (see notebook here). You can re-create the data sets used here by running that notebook.
Software installation (conda)
Phylogenetic analysis (tetrad)
[Tree plots (toytree)](#Tree plot)
All software required for this notebook can be installed locally using conda.
## conda install toytree -c eaton-lab
## conda install ipyrad -c ipyrad
## import packages
import ipyrad as ip
import ipyrad.analysis as ipa
import toytree
## print ipyrad info
print "ipyrad v.{}".format(ip.__version__)
ipyrad v.0.6.20
We will distribute jobs across an HPC cluster using the ipyparallel library (which is installed as a dependency of ipyrad). Start an ipcluster instance and check your connection below.
## print ipyparallel cluster information
import ipyparallel as ipp
ipyclient = ipp.Client()
#print ip.cluster_info(ipyclient)
For a given data set this function returns the samples in it that are from the selected clade.
def get_subsample_names(data, clade):
## known clades
c1 = ["tonduzii", "maxima", "yoponens", "glabrata", "insipida"]
c2 = ["nymph", "obtus", "pope", "bull", "citri", "paraen",
"pertus", "perfor", "dugan", "turbin", "colub",
"costa", "tria", "trig"]
## select clades from a dict
clades={
"pharmacosycea": c1,
"americana": c2,
}
## return selected clade names
keys = data.samples.keys()
names = [i for i in keys if any([bit in i for bit in clades[clade]])]
return names
We will use three data sets that were created already. One "full" data set and two subsampled data sets.
## the full large data set
full = ip.load_json("analysis-ipyrad/ficus_dhi_s4.json")
## pharma subsampled data set
pharma = full.branch("pharma_dhi_s4",
subsamples=get_subsample_names(full, "pharmacosycea"))
pharma.set_params("min_samples_locus", 4)
pharma.run("7", force=True)
## americana subsampled data set
america = full.branch("america_dhi_s4",
subsamples=get_subsample_names(full, "americana"))
america.set_params("min_samples_locus", 4)
america.run("7", force=True)
loading Assembly: ficus_dhi_s4 from saved path: ~/Documents/Ficus/analysis-ipyrad/ficus_dhi_s4.json Assembly: pharma_dhi_s4 [####################] 100% filtering loci | 0:00:17 | s7 | [####################] 100% building loci/stats | 0:00:35 | s7 | [####################] 100% building alleles | 0:00:40 | s7 | [####################] 100% building vcf file | 0:00:51 | s7 | [####################] 100% writing vcf file | 0:00:00 | s7 | [####################] 100% building arrays | 0:00:14 | s7 | [####################] 100% writing outfiles | 0:00:14 | s7 | Outfiles written to: ~/Documents/Ficus/analysis-ipyrad/pharma_dhi_s4_outfiles Assembly: america_dhi_s4 [####################] 100% filtering loci | 0:00:36 | s7 | [####################] 100% building loci/stats | 0:00:37 | s7 | [####################] 100% building alleles | 0:00:45 | s7 | [####################] 100% building vcf file | 0:01:16 | s7 | [####################] 100% writing vcf file | 0:00:00 | s7 | [####################] 100% building arrays | 0:00:29 | s7 | [####################] 100% writing outfiles | 0:00:44 | s7 | Outfiles written to: ~/Documents/Ficus/analysis-ipyrad/america_dhi_s4_outfiles
full = ip.load_json("analysis-ipyrad/ficus_dhi_s4.json")
pharma = ip.load_json("analysis-ipyrad/pharma_dhi_s4.json")
america = ip.load_json("analysis-ipyrad/america_dhi_s4.json")
loading Assembly: ficus_dhi_s4 from saved path: ~/Documents/Ficus/analysis-ipyrad/ficus_dhi_s4.json loading Assembly: pharma_dhi_s4 from saved path: ~/Documents/Ficus/analysis-ipyrad/pharma_dhi_s4.json loading Assembly: america_dhi_s4 from saved path: ~/Documents/Ficus/analysis-ipyrad/america_dhi_s4.json
tetrad
¶(Inference by phylogenetic invariants within quartets)
## create tetrad object
fulltet = ipa.tetrad(
full.name,
seqfile=full.outfiles.snpsphy,
mapfile=full.outfiles.snpsmap,
workdir="analysis-tetrad",
nboots=100,
);
pharmatet = ipa.tetrad(
pharma.name,
seqfile=pharma.outfiles.snpsphy,
mapfile=pharma.outfiles.snpsmap,
workdir="analysis-tetrad",
nboots=100,
);
americatet = ipa.tetrad(
america.name,
seqfile=america.outfiles.snpsphy,
mapfile=america.outfiles.snpsmap,
workdir="analysis-tetrad",
nboots=100,
);
loading seq array [76 taxa x 547987 bp] max unlinked SNPs per quartet (nloci): 76904 loading seq array [23 taxa x 143884 bp] max unlinked SNPs per quartet (nloci): 41432 loading seq array [53 taxa x 361100 bp] max unlinked SNPs per quartet (nloci): 56067
pharmatet.run(force=True, ipyclient=ipyclient)
americatet.run(ipyclient=ipyclient)
fulltet.run(ipyclient=ipyclient)
import numpy as np
import toytree
import toyplot
import toyplot.pdf
tre = toytree.tree("analysis-tetrad/pharma_dhi_s4.cons")
#tre.root(wildcard="insip")
tre.draw(
width=300,
height=700,
node_labels=tre.get_node_values("support"),
);
data = ip.load_json("analysis-ipyrad/ficus_dlo.json", 1)
## draw scatterplot
c, a, m = toyplot.scatterplot(
data.stats.reads_raw,
data.stats.reads_consens,
width=300, height=300,
size=8,
mstyle={"fill": "#262626", "stroke": "#262626", "opacity": 0.8},
title=data.stats.index.tolist(),
)
## modify axes
a.y.label.text = "N consensus reads (x 1e3)"
a.y.ticks.show = True
a.y.ticks.locator = toyplot.locator.Explicit(
locations = range(0, int(240e3), int(40e3)),
labels = range(0, 240, 40),
)
a.x.label.text = "N raw reads (x 1e6)"
a.x.ticks.show = True
a.x.ticks.locator = toyplot.locator.Explicit(
locations = range(0, int(60e6), int(10e6)),
labels = range(0, 6),
)
loading Assembly: ficus_dlo from saved path: ~/Documents/Ficus/analysis-ipyrad/ficus_dlo.json
## load tree data
#tre = toytree.tree("analysis-tetrad/ficus_85d6f2_min4_s3K.tree")
newick="analysis_raxml/RAxML_bipartitions.ficus_c85d6f2_min4_s3K"
tre = toytree.tree(newick, format=0)
## load ipyrad data
data = ip.load_json("analysis-ipyrad/lodepth_min4.json")
heights = data.stats.reads_raw.ix[tre.get_tip_labels()]
hdata = heights/float(1e6)
hover = ["{}: {}".format(hdata.index[i], hdata[i]) for \
i in range(hdata.shape[0])[::-1]]
## plot tree
canvas = toyplot.Canvas(
width=800,
height=1000,
)
ax1 = canvas.cartesian(bounds=("10%", "50%", "10%", "90%"))
ax2 = canvas.cartesian(bounds=("55%", "90%", "10%", "90%"))
tre.draw(
axes=ax1,
#node_labels=True,
node_size=5,
use_edge_lengths=True,
tip_labels_style={"font-size": "7px"}
);
## add barplot
ax2.bars(hdata[::-1],
along='y',
title=hover,
)
## style axes
ax1.show = False
ax2.y.show = False
ax2.x.label.text = "N raw reads (1e6)"
ax2.x.ticks.show = True
## save the plot and render in notebook
toyplot.pdf.render(canvas, "Tree_w_N_raw_Ficus_lodepth_min_4.pdf")
canvas
loading Assembly: lodepth_min4 from saved path: ~/Documents/Ficus/analysis-ipyrad/lodepth_min4.json