anhima.tree
- Trees¶import numpy as np
np.random.seed(1)
import scipy.stats
import random
random.seed(1)
import matplotlib.pyplot as plt
%matplotlib inline
import sys
import itertools
import anhima
# dev imports
# sys.path.insert(0, '../src')
# %reload_ext autoreload
# %autoreload 1
# %aimport anhima.tree
# %aimport anhima.sim
# %aimport anhima.gt
# %aimport anhima.dist
# simulate genotypes with no relatedness
n_variants = 1000
n_samples = 100
samples = [''.join(s) for s in itertools.product('ABCDEFGHIJ', repeat=2)]
ploidy = 2
af_dist = scipy.stats.beta(a=.4, b=.6)
p_missing = 0
genotypes = anhima.sim.simulate_biallelic_genotypes(n_variants, n_samples, af_dist, p_missing, ploidy)
gn = anhima.gt.as_n_alt(genotypes)
# simulate three sub-populations with relatedness
genotypes_subpop1 = anhima.sim.simulate_relatedness(genotypes[:, :n_samples//2], relatedness=.5, n_iter=1000)
genotypes_subpop2 = anhima.sim.simulate_relatedness(genotypes[:, n_samples//2:], relatedness=.5, n_iter=1000)
genotypes_subpop2a = anhima.sim.simulate_relatedness(genotypes_subpop2[:, n_samples//4:], relatedness=.5, n_iter=500)
genotypes_subpop2b = anhima.sim.simulate_relatedness(genotypes_subpop2[:, :n_samples//4], relatedness=.5, n_iter=500)
genotypes_related = np.concatenate([genotypes_subpop1, genotypes_subpop2a, genotypes_subpop2b], axis=1)
gnr = anhima.gt.as_n_alt(genotypes_related)
# calculate pairwise distance
d, s = anhima.dist.pairwise_distance(gn, metric='euclidean')
dr, sr = anhima.dist.pairwise_distance(gnr, metric='euclidean')
# build a neighbour joining tree
t = anhima.tree.nj(s, labels=samples)
tr = anhima.tree.nj(sr, labels=samples)
anhima.tree.plot_phylo(t, plot_kwargs={'type': 'unrooted',
'tip.color': 'black',
'edge.color': 'gray',
'lab4ut': 'axial'});
# first 50 samples are in pop1, second 25 are in pop2a, third 25 are in pop 2b
populations = (['pop1'] * 50) + (['pop2a']*25) + (['pop2b']*25)
# choose a color for each sub-population
population_colors = {
'pop1': 'red',
'pop2a': 'blue',
'pop2b': 'green',
}
# assign a color to each sample based on population membership
sample_colors = [population_colors[p] for p in populations]
# assign a color to each tree edge based on population majority
edge_colors = anhima.tree.color_edges_by_group_majority(tr,
labels=samples,
groups=populations,
colors=population_colors)
# now plot the tree
anhima.tree.plot_phylo(tr, plot_kwargs={'type': 'unrooted',
'tip.color': sample_colors,
'edge.color': edge_colors,
'lab4ut': 'axial'});
metrics = 'euclidean', 'cityblock'
fig = plt.figure(figsize=(len(metrics)*5, 5))
for i, metric in enumerate(metrics):
ax = fig.add_subplot(1, len(metrics), i+1)
ax.set_title(metric)
d, s = anhima.dist.pairwise_distance(gnr, metric=metric)
t = anhima.tree.nj(s, labels=samples)
edge_colors = anhima.tree.color_edges_by_group_majority(t,
labels=samples,
groups=populations,
colors=population_colors)
anhima.tree.plot_phylo(t,
plot_kwargs={'type': 'unrooted',
'tip.color': sample_colors,
'edge.color': edge_colors,
'lab4ut': 'axial'},
width=5, height=5, units='in', res=plt.rcParams['savefig.dpi'],
add_scale_bar={},
ax=ax)
fig.subplots_adjust(0, 0, 1, 1, hspace=0, wspace=0)
d, s = anhima.dist.pairwise_distance(gnr, metric=metric)
t = anhima.tree.nj(s, labels=samples)
# write to string
anhima.tree.write_tree(t)
'((((((IB:25.84990777,IG:31.15009223):11.77556818,JF:42.22443182):3.910205078,HF:45.08979492):2.012491862,(IF:43.51267904,JC:42.48732096):10.73750814):4.99294608,(ID:36.59516345,JG:32.40483655):22.69455392):5.550426483,((((HI:29.02730887,JI:32.97269113):14.78392792,JD:43.21607208):8.421531677,IH:47.57846832):3.807510376,HG:57.94248962):4.566761017,(((((IA:22.08810292,II:20.91189708):34.57021484,JA:62.42978516):2.214874268,((HH:48.19351632,JB:42.80648368):2.416541466,(JH:34.75761922,JJ:25.24238078):28.08345853):8.410125732):3.545110067,((((((GB:46.30488856,HA:39.69511144):10.82900391,(FE:26.34034455,FH:39.65965545):17.67099609):4.767144339,(((GD:25.24037237,HB:30.75962763):12.46283569,HE:39.53716431):8.164868164,GJ:45.33513184):15.23285566):2.291960822,((((((FC:25.63408203,GC:31.36591797):13.85064532,HC:49.14935468):5.328113903,GE:44.1718861):7.473176956,(GI:27.23205566,HD:31.76794434):26.02682304):1.79327491,(((FB:30.02872911,FG:33.97127089):7.929276629,(FJ:12.83507055,GH:15.16492945):21.57072337):5.228953044,((FD:25.79449463,GF:26.20550537):8.790480841,GA:33.20951916):13.52104696):13.17547509):5.356535287,(FA:25.46094789,FF:25.53905211):34.56533971):3.409211053):4.38986074,(FI:46.49660773,GG:45.50339227):23.01736582):56.27002556,(((((((((((EE:57.97222222,EH:63.02777778):19.08024691,AD:82.91975309):31.63479478,(DB:61.9787234,DG:73.0212766):53.86520522):6.877319336,(BA:99.48263889,BI:120.5173611):21.12268066):6.58108724,((BJ:71.28125,EB:68.71875):25.87006579,(AH:64.83870968,CI:68.16129032):30.62993421):35.85641276):4.518383361,(((((CB:74.05154639,DJ:56.94845361):26.91463415,EI:85.08536585):36.18931159,((AJ:51.28571429,CD:53.71428571):27.0625,CG:78.9375):37.56068841):9.971478175,((CA:72.52747253,EJ:75.47252747):25.40945513,DH:89.59054487):30.65352183):3.601114242,EF:125.2738858):6.188647889):1.690944127,((((AI:55.6875,CC:53.3125):13.81179775,BD:72.18820225):19.5846519,BB:86.9153481):38.70166843,(BE:68.5,DE:60.5):65.17333157):5.506321498):4.332044567,AC:129.9472523):2.573908968,((CF:76.81104651,EG:81.18895349):24.97083333,DD:90.02916667):36.19366916):6.519239837,(((((AE:82.5,BC:68.5):35.25892857,CE:122.7410714):8.232692308,(AF:94.45422535,CH:105.5457746):17.26730769):9.659752155,((DA:98.04280822,ED:106.9571918):15.65257353,(AA:71.6091954,DI:67.3908046):45.34742647):18.02774784):7.257190846,(((((DF:63.18823529,EC:59.81176471):19.19642857,AG:77.80357143):8.873310811,CJ:86.62668919):20.29995265,AB:105.9500473):6.172568044,((BG:60.5,EA:62.5):25.7125,DC:87.2875):40.23368196):13.00843415):7.524888581):2.429049072,(BF:152.3832632,BH:145.6167368):7.890195312):129.660299):67.05210559):3.05134964,((((HJ:28.09548118,IC:23.90451882):10.86761475,IE:40.13238525):9.088084501,JE:39.4119155):8.509577433,IJ:52.99042257):9.76984787):0.9664802551);'
# write to file
anhima.tree.write_tree(t, '/tmp/tree.newick')
# read from file
t2 = anhima.tree.read_tree('/tmp/tree.newick')
anhima.tree.plot_phylo(t2, plot_kwargs={'type': 'unrooted'});
# N.B., labels will not be preserved in original order
list(t2[2])[:10]
['IB', 'IG', 'JF', 'HF', 'IF', 'JC', 'ID', 'JG', 'HI', 'JI']