#!/usr/bin/env python # coding: utf-8 # # RADseq data simulations # I simulated two trees to work with. One that is completely imbalanced (ladder-like) and one that is balanced (equal number tips descended from each node). I'm using the Python package `ete2` for most of the tree manipulations. This notebook was run in Python 2.7. You will also need the package rpy2 installed, as well as a working version of R with the package 'ape' to make tree plots later in the notebook. # In[28]: ## standard Python imports import glob import itertools from collections import OrderedDict, Counter ## extra Python imports import rpy2 ## required for tree plotting import ete2 ## used for tree manipulation import egglib ## used for coalescent simulations import numpy as np import pandas as pd import matplotlib import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') # In[29]: ## print versions for pkg in [matplotlib, np, pd, ete2, rpy2]: print "{:<10}\t{:<10}".\ format(pkg.__name__, pkg.__version__) # ### Simulation software # I wrote a program to simulate RAD-seq like sequence data which uses the python package egglib for coalescent simulations. Below will check that you have the relevant software installed. See here for simrrls installation: https://github.com/dereneaton/simrrls # # In[20]: ## check simrrls package and requirements import egglib import simrrls # In[21]: ## print versions print 'egglib', egglib.version print 'simrrls', simrrls.__version__ # # Generate trees for simulations # # ### Make a balanced tree with 64 tips and tree length = 6 # In[5]: ## base tree Tbal = ete2.Tree() ## branch lengths bls = 1. ## namer n = iter(('s'+str(i) for i in xrange(1,1500))) ## first nodes n1 = Tbal.add_child(name=n.next(), dist=bls) n2 = Tbal.add_child(name=n.next(), dist=bls) ## make balanced tree while len(Tbal.get_leaves()) < 64: thisrep = Tbal.get_descendants() for node in thisrep: if len(node.get_children()) < 1: node.add_child(name=n.next(), dist=bls) node.add_child(name=n.next(), dist=bls) ## Save newick string to file Tbal.write(outfile="Tbal.tre", format=3) # String representation # In[6]: ## newick string get_ipython().system(' cat Tbal.tre') # In[7]: ## show tree, remove node circles #for node in Tbal.traverse(): # node.img_style["size"] = 0 #Tbal.render("%%inline", h=500) # ## Make an imbalanced tree of same treelength with 64 tips # In[8]: ## base tree Timb = ete2.Tree() ## namer n = iter(('s'+str(i) for i in range(1,5000))) ## scale branches to match balanced treelength brlen = (bls*6.)/63 ## first nodes n1 = Timb.add_child(name=n.next(), dist=brlen) n2 = Timb.add_child(name=n.next(), dist=brlen) while len(Timb.get_leaves()) < 64: ## extend others for tip in Timb.get_leaves()[:-1]: tip.dist += brlen ## extend the last node Timb.get_leaves()[-1].add_child(name=n.next(), dist=brlen) Timb.get_leaves()[-1].add_sister(name=n.next(), dist=brlen) ## write to file Timb.write(outfile="Timb.tre", format=3) # Or copy the following string to a file: # In[9]: get_ipython().system(' cat Timb.tre') # In[10]: ## show tree, remove node circles #for node in Timb.traverse(): # node.img_style["size"] = 0 #Timb.render("%%inline", h=500) # Check that the trees are the same length (close enough). # In[11]: print set([i.get_distance(Tbal) for i in Tbal]), 'treelength' print len(Tbal), 'tips' print set([i.get_distance(Timb) for i in Timb]), 'treelength' print len(Timb), 'tips' # ## An ultrametric topology of Viburnum w/ 64 tips # This tree is inferred in notebook 3, and here it is scaled with penalized likelihood to be ultrametric. # In[12]: get_ipython().run_line_magic('load_ext', 'rpy2.ipython') # In[13]: get_ipython().run_cell_magic('R', '-w 400 -h 600', 'library(ape)\n\n## make tree ultrametric using penalized likelihood\nVtree <- read.tree("~/Dropbox/RAxML_bestTree.VIB_small_c85d6m4p99")\nUtree <- drop.tip(Vtree, "clemensiae_DRY6_PWS_2135")\nUtree <- ladderize(chronopl(Utree, 0.5))\n\n## multiply bls so tree length=6 after dropping outgroup\nUtree$edge.length <- Utree$edge.length*6\n\n## save the new tree\nwrite.tree(Utree, "Tvib.tre")\nplot(Utree, cex=0.7, edge.width=2)\nadd.scale.bar()\n#edgelabels(round(Utree$edge.length,3))\n') # In[14]: #### load TVib tree into Python and print newick string Tvib = ete2.Tree("Tvib.tre") get_ipython().system(' cat Tvib.tre') # # Simulate sequence data on each tree # Here I use the _simrrls_ program to simulate RADseq data on each input topology with locus dropout occurring with respect to phylogenetic distances. Find simrrls in my github profile. # ## Comparing tree shapes and sources of missing data # In[15]: get_ipython().run_cell_magic('bash', '', '## balanced tree \nmkdir -p Tbal_rad_drop/\nmkdir -p Tbal_ddrad_drop/\nmkdir -p Tbal_rad_covfull/\nmkdir -p Tbal_rad_covlow/\nmkdir -p Tbal_rad_covmed/\n\n## imbalanced tree\nmkdir -p Timb_rad_drop/\nmkdir -p Timb_ddrad_drop/\nmkdir -p Timb_rad_covfull/\nmkdir -p Timb_rad_covlow/\nmkdir -p Timb_rad_covmed/\n\n## sims on empirical Viburnum topo\nmkdir -p Tvib_rad_drop/\nmkdir -p Tvib_ddrad_drop/\nmkdir -p Tvib_rad_covfull/\nmkdir -p Tvib_rad_covlow/\nmkdir -p Tvib_rad_covmed\n') # #### Show simrrls options # In[16]: get_ipython().run_cell_magic('bash', '', 'simrrls -h\n') # ## Simulate RAD data on different trees and with sampling # Here I simulate 1000 loci on each tree. For each tree data are simulated in 5 ways. With and without data loss from mutation-disruption or low sequencing coverage, and as a rad data type (one cutter) and ddrad (two cutters). This will take about 10 minutes to run probably. # In[18]: get_ipython().run_cell_magic('bash', '', 'for tree in Tbal Timb Tvib;\n do \n simrrls -mc 1 -ms 1 -t $tree.tre \\\n -L 1000 -l 100 \\\n -u 1e-9 -N 1e6 \\\n -f rad -c1 CTGCAG \\\n -o $tree\\_rad_drop/$tree\n \n simrrls -mc 1 -ms 1 -t $tree.tre \\\n -L 1000 -l 100 \\\n -u 1e-9 -N 1e6 \\\n -f ddrad -c1 CTGCAG -c2 AATT \\\n -o $tree\\_ddrad_drop/$tree \n \n simrrls -mc 0 -ms 0 -t $tree.tre \\\n -L 1000 -l 100 \\\n -u 1e-9 -N 1e6 \\\n -f rad -c1 CTGCAG \\\n -o $tree\\_rad_covfull/$tree\n\n simrrls -mc 0 -ms 0 -t $tree.tre \\\n -L 1000 -l 100 \\\n -u 1e-9 -N 1e6 \\\n -f rad -c1 CTGCAG \\\n -dm 5 -ds 5 \\\n -o $tree\\_rad_covmed/$tree\n \n simrrls -mc 0 -ms 0 -t $tree.tre \\\n -L 1000 -l 100 \\\n -u 1e-9 -N 1e6 \\\n -f rad -c1 CTGCAG \\\n -dm 2 -ds 5 \\\n -o $tree\\_rad_covlow/$tree\ndone\n') # # Assemble data sets in _pyRAD_ # In[22]: get_ipython().run_cell_magic('bash', '', 'pyrad --version\n') # In[25]: get_ipython().run_cell_magic('bash', '', '## new params file (remove existing file if present)\nrm params.txt\npyrad -n\n') # In[26]: get_ipython().run_cell_magic('bash', '', "## enter parameters into params file using sed\nsed -i '/## 1. /c\\Tbal_rad_drop ## 1. working dir ' params.txt\nsed -i '/## 2. /c\\Tbal_rad_drop/*.gz ## 2. data loc ' params.txt\nsed -i '/## 3. /c\\Tbal_rad_drop/*barcodes.txt ## 3. Bcode ' params.txt\nsed -i '/## 6. /c\\TGCAG,AATT ## 6. cutters ' params.txt\nsed -i '/## 7. /c\\20 ## 7. Nproc ' params.txt\nsed -i '/## 10. /c\\.82 ## 10. clust thresh' params.txt\nsed -i '/## 11. /c\\rad ## 11. datatype ' params.txt\nsed -i '/## 12. /c\\2 ## 12. minCov ' params.txt\nsed -i '/## 13. /c\\10 ## 13. maxSH' params.txt\nsed -i '/## 14. /c\\Tbal ## 14. outname' params.txt\nsed -i '/## 24./c\\99 ## 24. maxH' params.txt\nsed -i '/## 30./c\\n,p,s ## 30. out format' params.txt\n") # In[ ]: ## IPython code to iterate over trees and coverages and run pyrad ## sometimes this freezes when run in a jupyter notebook due ## to problems with multiprocessing in notebooks. This is why my new ## work with ipyrad uses ipyparallel instead of multiprocessing. for tree in ['Tbal', 'Timb', 'Tvib']: for dtype in ['rad', 'ddrad']: with open('params.txt', 'rb') as params: pp = params.readlines() pp[1] = "{}_{}_drop ## 1. \n".format(tree, dtype) pp[2] = "{}_{}_drop/*.gz ## 2. \n".format(tree, dtype) pp[3] = "{}_{}_drop/*barcodes.txt ## 3. \n".format(tree, dtype) pp[14] = "{} ## 14. \n".format(tree) with open('params.txt', 'wb') as params: params.write("".join(pp)) ## this calls pyrad as a bash script get_ipython().system(' pyrad -p params.txt >> log.txt 2>&1') for cov in ['full', 'med', 'low']: with open('params.txt', 'rb') as params: pp = params.readlines() pp[1] = "{}_rad_cov{} ## 1. \n".format(tree, cov) pp[2] = "{}_rad_cov{}/*.gz ## 2. \n".format(tree, cov) pp[3] = "{}_rad_cov{}/*barcodes.txt ## 3. \n".format(tree, cov) pp[14] = "{} ## 14. \n".format(tree) with open('params.txt', 'wb') as params: params.write("".join(pp)) ## this calls pyrad as a bash script get_ipython().system(' pyrad -p params.txt >> log.txt 2>&1') # # Visualize data sharing on these trees # In[30]: def getarray(locifile, tree): """ parse the loci list and return a presence/absence matrix ordered by the tips on the tree""" ## parse the loci file loci = open(locifile).read().split("\n//")[:-1] ## order (ladderize) the tree tree.ladderize() ## get tip names names = tree.get_leaf_names() ## make empty matrix lxs = np.zeros((len(names), len(loci))) ## fill the matrix for loc in xrange(len(loci)): for seq in loci[loc].split("\n"): if ">" in seq: lxs[names.index(seq.split()[0][1:].rsplit("_", 1)[0]),loc] += 1 return lxs # In[31]: def countmatrix(lxsabove, lxsbelow, max=0): """ fill a matrix with pairwise data sharing between each pair of samples. You could put in two different 'share' matrices to have different results above and below the diagonal. Can enter a max value to limit fill along diagonal. """ share = np.zeros((lxsabove.shape[0], lxsbelow.shape[0])) ## fill above names = range(lxsabove.shape[0]) for row in lxsabove: for samp1,samp2 in itertools.combinations(names,2): shared = lxsabove[samp1, lxsabove[samp2,]>0].sum() share[samp1,samp2] = shared ## fill below for row in lxsbelow: for samp2,samp1 in itertools.combinations(names,2): shared = lxsabove[samp1, lxsabove[samp2,]>0].sum() share[samp1,samp2] = shared ## fill diagonal if not max: for row in range(len(names)): share[row,row] = lxsabove[row,].sum() else: for row in range(len(names)): share[row,row] = max return share # In[32]: def plotSVGmatrix(share, outname): surf = plt.pcolormesh(share, cmap="gist_yarg") dims = plt.axis('image') surf.axes.get_xaxis().set_ticklabels([]) surf.axes.get_xaxis().set_ticks([]) surf.axes.get_yaxis().set_ticklabels([]) surf.axes.get_yaxis().set_ticks([]) ax = plt.gca() ax.invert_yaxis() plt.colorbar(surf, aspect=15) if outname: plt.savefig(outname+".svg") def fullplot(locifile, tree, outname=None): lxsB = getarray(locifile, tree) share = countmatrix(lxsB, lxsB) plotSVGmatrix(share, outname) # In[41]: fullplot('Tbal_rad_drop/outfiles/Tbal.loci', Tbal, 'Tbal_drop') # In[42]: fullplot('Tbal_rad_covlow/outfiles/Tbal.loci', Tbal, 'Tbal_covlow') # In[43]: fullplot('Tbal_rad_covmed/outfiles/Tbal.loci', Tbal, 'Tbal_covmed') # In[44]: fullplot('Tbal_rad_covfull/outfiles/Tbal.loci', Tbal, 'Tbal_covfull') # In[45]: fullplot('Timb_rad_drop/outfiles/Timb.loci', Timb, 'Timb_drop') # In[46]: fullplot('Tvib_rad_drop/outfiles/Tvib.loci', Tvib, 'Tvib_drop') # In[47]: fullplot('Tvib_rad_covlow/outfiles/Tvib.loci', Tvib, 'Tvib_covlow') # # The hierarchical distribution of informative sites # First we re-calculate the pair-wise data sharing matrices for all species in each data set. # In[49]: lxs_Tbal_droprad = getarray("Tbal_rad_drop/outfiles/Tbal.loci", Tbal) lxs_Tbal_dropddrad = getarray("Tbal_ddrad_drop/outfiles/Tbal.loci", Tbal) lxs_Tbal_covlow = getarray("Tbal_rad_covlow/outfiles/Tbal.loci", Tbal) lxs_Tbal_covmed = getarray("Tbal_rad_covmed/outfiles/Tbal.loci", Tbal) lxs_Tbal_covfull = getarray("Tbal_rad_covfull/outfiles/Tbal.loci", Tbal) lxs_Timb_droprad = getarray("Timb_rad_drop/outfiles/Timb.loci", Timb) lxs_Timb_dropddrad = getarray("Timb_ddrad_drop/outfiles/Timb.loci", Timb) lxs_Timb_covlow = getarray("Timb_rad_covlow/outfiles/Timb.loci", Timb) lxs_Timb_covmed = getarray("Timb_rad_covmed/outfiles/Timb.loci", Timb) lxs_Timb_covfull = getarray("Timb_rad_covfull/outfiles/Timb.loci", Timb) lxs_Tvib_droprad = getarray("Tvib_rad_drop/outfiles/Tvib.loci", Tvib) lxs_Tvib_dropddrad = getarray("Tvib_ddrad_drop/outfiles/Tvib.loci", Tvib) lxs_Tvib_covlow = getarray("Tvib_rad_covlow/outfiles/Tvib.loci", Tvib) lxs_Tvib_covmed = getarray("Tvib_rad_covmed/outfiles/Tvib.loci", Tvib) lxs_Tvib_covfull = getarray("Tvib_rad_covfull/outfiles/Tvib.loci", Tvib) # #### A function to count loci for each bipartition (quartet-style) # In[50]: def count_inf4(tree, matrix, node): """ count the number of loci with data spanning a given node in the tree """ ## get children of selected node a, b = node.get_children() ## get tip descendents of a and b tips_a = set(a.get_leaf_names()) tips_b = set(b.get_leaf_names()) ## get every other tip (outgroups) upone = node.up if upone.is_root(): ch = upone.children sis = [i for i in ch if i != node][0] if sis.children: tips_c = sis.children[0].get_leaf_names() tips_d = sis.children[1].get_leaf_names() else: return 0 else: upone = set(node.up.get_leaf_names()) tips_c = upone - tips_a - tips_b tips_all = set(tree.get_leaf_names()) tips_d = tips_all - tips_a - tips_b - tips_c ## get indices in matrix for leaf tips names = tree.get_leaf_names() index_a = [names.index(i) for i in tips_a] index_b = [names.index(i) for i in tips_b] index_c = [names.index(i) for i in tips_c] index_d = [names.index(i) for i in tips_d] ## how man loci are "informative" inf = 0 for col in matrix.T: hits_a = sum([col[i] for i in index_a]) hits_b = sum([col[i] for i in index_b]) hits_c = sum([col[i] for i in index_c]) hits_d = sum([col[i] for i in index_d]) if all([hits_a, hits_b, hits_c, hits_d]): inf += 1 return inf # #### A function to write data to file for plotting # Here I iterate over each node and apply _count\_inf4_ which returns the number of loci that are informative for the subtending bipartition, and _count\_snps_ which counts snps segregating at that bipartition. This takes a few minutes to run. # In[51]: def nodes_dat(tree, lxs, datfilename): dat = [] for node in tree.traverse(): if not (node.is_leaf() or node.is_root()): loci = count_inf4(tree, lxs, node) dist = round(tree.get_distance(node),2) dat.append([dist, loci]) node.name = "%d" % loci ## print tree with bls & node labels tree.write(format=3,outfile=datfilename+".tre") ## print data to file with open(datfilename, 'w') as outfile: np.savetxt(outfile, np.array(dat), fmt="%.2f") # ## Make data files # In[52]: get_ipython().run_cell_magic('bash', '', '## a new directory to store the data in\nmkdir -p analysis_counts2\n') # In[53]: nodes_dat(Tbal, lxs_Tbal_droprad, "analysis_counts2/Tbal_droprad.dat3") nodes_dat(Tbal, lxs_Tbal_dropddrad, "analysis_counts2/Tbal_dropddrad.dat3") nodes_dat(Tbal, lxs_Tbal_covlow, "analysis_counts2/Tbal_covlow.dat3") nodes_dat(Tbal, lxs_Tbal_covmed, "analysis_counts2/Tbal_covmed.dat3") nodes_dat(Tbal, lxs_Tbal_covfull, "analysis_counts2/Tbal_covfull.dat3") # In[54]: nodes_dat(Timb, lxs_Timb_droprad, "analysis_counts2/Timb_droprad.dat3") nodes_dat(Timb, lxs_Timb_dropddrad, "analysis_counts2/Timb_dropddrad.dat3") nodes_dat(Timb, lxs_Timb_covlow, "analysis_counts2/Timb_covlow.dat3") nodes_dat(Timb, lxs_Timb_covmed, "analysis_counts2/Timb_covmed.dat3") nodes_dat(Timb, lxs_Timb_covfull, "analysis_counts2/Timb_covfull.dat3") # In[55]: nodes_dat(Tvib, lxs_Tvib_droprad, "analysis_counts2/Tvib_droprad.dat3") nodes_dat(Tvib, lxs_Tvib_dropddrad, "analysis_counts2/Tvib_dropddrad.dat3") nodes_dat(Tvib, lxs_Tvib_covlow, "analysis_counts2/Tvib_covlow.dat3") nodes_dat(Tvib, lxs_Tvib_covmed, "analysis_counts2/Tvib_covmed.dat3") nodes_dat(Tvib, lxs_Tvib_covfull, "analysis_counts2/Tvib_covfull.dat3") # # Plot the hierarchical distribution with the trees # In[5]: get_ipython().run_line_magic('load_ext', 'rpy2.ipython') # In[6]: get_ipython().run_cell_magic('R', '', 'library(ape)\n') # ### Load in the data to R # In[56]: get_ipython().run_cell_magic('R', '', '## read in the data and factor results\ndat <- read.table("analysis_counts2/Tbal_droprad.dat3", \n header=F, col.names=c(\'depth\',\'loci\'))\ndat[,1] <- as.factor(dat[,1])\nTbal_droprad_Lme <- with(dat, tapply(loci, depth, mean))\n\ndat <- read.table("analysis_counts2/Tbal_dropddrad.dat3", \n header=F, col.names=c(\'depth\',\'loci\'))\ndat[,1] <- as.factor(dat[,1])\nTbal_dropddrad_Lme <- with(dat, tapply(loci, depth, mean))\n\ndat <- read.table("analysis_counts2/Tbal_covlow.dat3", \n header=F, col.names=c(\'depth\',\'loci\'))\ndat[,1] <- as.factor(dat[,1])\nTbal_covlow_Lme <- with(dat, tapply(loci, depth, mean))\n\ndat <- read.table("analysis_counts2/Tbal_covmed.dat3", \n header=F, col.names=c(\'depth\',\'loci\'))\ndat[,1] <- as.factor(dat[,1])\nTbal_covmed_Lme <- with(dat, tapply(loci, depth, mean))\n\ndat <- read.table("analysis_counts2/Tbal_covfull.dat3", \n header=F, col.names=c(\'depth\',\'loci\'))\ndat[,1] <- as.factor(dat[,1])\nTbal_covfull_Lme <- with(dat, tapply(loci, depth, mean))\n') # In[57]: get_ipython().run_cell_magic('R', '', '## read in the data and factor results\ndat <- read.table("analysis_counts2/Timb_droprad.dat3", \n header=F, col.names=c(\'depth\',\'loci\'))\ndat[,1] <- as.factor(dat[,1])\nTimb_droprad_Lme <- with(dat, tapply(loci, depth, mean))\n\ndat <- read.table("analysis_counts2/Timb_dropddrad.dat3", \n header=F, col.names=c(\'depth\',\'loci\'))\ndat[,1] <- as.factor(dat[,1])\nTimb_dropddrad_Lme <- with(dat, tapply(loci, depth, mean))\n\ndat <- read.table("analysis_counts2/Timb_covlow.dat3", \n header=F, col.names=c(\'depth\',\'loci\'))\ndat[,1] <- as.factor(dat[,1])\nTimb_covlow_Lme <- with(dat, tapply(loci, depth, mean))\n\ndat <- read.table("analysis_counts2/Timb_covmed.dat3", \n header=F, col.names=c(\'depth\',\'loci\'))\ndat[,1] <- as.factor(dat[,1])\nTimb_covmed_Lme <- with(dat, tapply(loci, depth, mean))\n\ndat <- read.table("analysis_counts2/Timb_covfull.dat3", \n header=F, col.names=c(\'depth\',\'loci\'))\ndat[,1] <- as.factor(dat[,1])\nTimb_covfull_Lme <- with(dat, tapply(loci, depth, mean))\n') # In[58]: get_ipython().run_cell_magic('R', '', '## read in the data and factor results\ndat <- read.table("analysis_counts2/Tvib_droprad.dat3", \n header=F, col.names=c(\'depth\',\'loci\'))\ndat[,1] <- as.factor(dat[,1])\nTvib_droprad_Lme <- with(dat, tapply(loci, depth, mean))\n\ndat <- read.table("analysis_counts2/Tvib_dropddrad.dat3", \n header=F, col.names=c(\'depth\',\'loci\'))\ndat[,1] <- as.factor(dat[,1])\nTvib_dropddrad_Lme <- with(dat, tapply(loci, depth, mean))\n\ndat <- read.table("analysis_counts2/Tvib_covlow.dat3", \n header=F, col.names=c(\'depth\',\'loci\'))\ndat[,1] <- as.factor(dat[,1])\nTvib_covlow_Lme <- with(dat, tapply(loci, depth, mean))\n\ndat <- read.table("analysis_counts2/Tvib_covmed.dat3", \n header=F, col.names=c(\'depth\',\'loci\'))\ndat[,1] <- as.factor(dat[,1])\nTvib_covmed_Lme <- with(dat, tapply(loci, depth, mean))\n\ndat <- read.table("analysis_counts2/Tvib_covfull.dat3", \n header=F, col.names=c(\'depth\',\'loci\'))\ndat[,1] <- as.factor(dat[,1])\nTvib_covfull_Lme <- with(dat, tapply(loci, depth, mean))\n') # ### Plots # In[59]: get_ipython().run_cell_magic('R', '-w 400 -h 400', '\n#svg("box1.svg", width=4, height=5)\n\nL = Tbal_droprad_Lme\nplot(L, xlim=c(0,6), ylim=c(575,1025), \n cex.axis=1.25, type=\'n\', xaxt="n")\n#abline(h=1000, lwd=2, col="#6E6E6E", lty="dotted")\n\nL = Tbal_covfull_Lme\ndf1 = data.frame(as.numeric(names(L)),as.numeric(L))\nlines(df1, type=\'l\', lwd=2, lty=2)\npoints(df1, cex=2.5, pch=21, col="#262626", bg="#262626", lwd=0.75)\n\nL = Tbal_droprad_Lme\ndf1 = data.frame(as.numeric(names(L)),as.numeric(L))\nlines(df1, type=\'l\', lwd=2, lty=2)\npoints(df1, cex=2.5, pch=21, col="#6E6E6E", bg="#6E6E6E", lwd=0.75)\n\n#L = Tbal_dropddrad_Lme \n#df1 = data.frame(as.numeric(names(L)),as.numeric(L))\n#points(df1, cex=3.5, pch=21, col="#6E6E6E", bg="#D3D3D3", lwd=0.75)\n\nbox()\n\naxis(side=1, at=seq(0,6,0.5), \n labels=as.character(seq(6,0,by=-0.5)), cex.axis=1.25)\n\n#dev.off()\n') # In[60]: get_ipython().run_cell_magic('R', '-w 400 -h 400', '\n#svg("box2.svg", width=4, height=5)\n\nL = Tbal_covlow_Lme\nplot(L, xlim=c(0,6), ylim=c(575,1025), \n cex.axis=1.25, type=\'n\', xaxt="n")\nabline(h=1000, lwd=2, col="#6E6E6E", lty="dotted")\n\nL = Tbal_covfull_Lme\ndf1 = data.frame(as.numeric(names(L)),as.numeric(L))\nlines(df1, type=\'l\', lwd=2, lty=2)\npoints(df1, cex=2.5, pch=21, col="#262626", bg="#262626", lwd=0.75)\n\nL = Tbal_covmed_Lme \ndf1 = data.frame(as.numeric(names(L)),as.numeric(L))\nlines(df1, type=\'l\', lwd=2, lty=2)\npoints(df1, cex=2.5, pch=21, col="#262626", bg="#6E6E6E", lwd=0.75)\n\n\n#L = Tbal_covlow_Lme\n#df1 = data.frame(as.numeric(names(L)),as.numeric(L))\n#points(df1, cex=3.5, pch=21, col="#262626", bg="#D3D3D3", lwd=0.75)\n\nbox()\n\naxis(side=1, at=seq(0,6,0.5), \n labels=as.character(seq(6,0,by=-0.5)), cex.axis=1.25)\n\n#dev.off()\n') # In[61]: get_ipython().run_cell_magic('R', '-w 400 -h 400', '\n#svg("box3.svg", width=4, height=5)\n\n## samples every 6th to make plot more readable\n\nL = Timb_droprad_Lme[c(3:62)]\nplot(L, xlim=c(0,6), ylim=c(575, 1025), \n cex.axis=1.25, type=\'n\', xaxt="n")#, yaxt="n")\n#abline(h=1000, lwd=2, col="#6E6E6E", lty="dotted")\n\nL = Timb_covfull_Lme[seq(3, 65, 6)]\ndf1 = data.frame(as.numeric(names(L)),as.numeric(L))\nlines(df1, type=\'l\', lwd=2, lty=2)\npoints(df1, cex=2.5, pch=21, col="#262626", bg="#262626", lwd=0.75)\n\nL = Timb_droprad_Lme[seq(3, 65, 6)]\ndf1 = data.frame(as.numeric(names(L)),as.numeric(L))\nlines(df1, type=\'l\', lwd=2, lty=2)\npoints(df1, cex=2.5, pch=21, col="#262626", bg="#6E6E6E", lwd=0.75)\n\nbox()\n\naxis(side=1, at=seq(0,6, 0.5), \n labels=as.character(seq(6,0,by=-0.5)), cex.axis=1.25)\n\n#dev.off()\n') # In[62]: get_ipython().run_cell_magic('R', '-w 400 -h 400', '\n## samples every 6th to make plot more readable\n\n#svg("box4.svg", width=4, height=5)\nL = Timb_droprad_Lme[c(3:62)]\nplot(L, xlim=c(0,6), ylim=c(575, 1025), \n cex.axis=1.25, type=\'n\', xaxt="n")#, yaxt="n")\n#abline(h=1000, lwd=2, col="#6E6E6E", lty="dotted")\n\nL = Timb_covfull_Lme[seq(3, 65, 6)]\ndf1 = data.frame(as.numeric(names(L)),as.numeric(L))\nlines(df1, type=\'l\', lwd=2, lty=2)\npoints(df1, cex=2.5, pch=21, col="#262626", bg="#262626", lwd=0.75)\n\nL = Timb_covmed_Lme[seq(3, 65, 6)]\ndf1 = data.frame(as.numeric(names(L)),as.numeric(L))\nlines(df1, type=\'l\', lwd=2, lty=2)\npoints(df1, cex=2.5, pch=21, col="#262626", bg="#6E6E6E", lwd=0.75)\n\nbox()\n\naxis(side=1, at=seq(0,6, 0.5), \n labels=as.character(seq(6,0,by=-0.5)), cex.axis=1.25)\n#dev.off()\n') # In[63]: get_ipython().run_cell_magic('R', '-w 400 -h 500', '\n#svg("Sourcemissing.svg", height=10, width=8)\n#svg("Sourcemissing.svg", height=7, width=5.33)\n\nmat2 = matrix(c(1,1,1,4,4,4,7,7,7, \n 1,1,1,4,4,4,7,7,7,\n 2,2,2,5,5,5,8,8,8,\n 3,3,3,6,6,6,9,9,9),\n 4,9, byrow=TRUE)\nlayout(mat2)\npar(mar=c(1,1,0,1), \n oma=c(2,2,1,0))\n\n#########################################################\ntre <- read.tree("Tbal.tre")\nplot(tre, show.tip.label=F,\n edge.width=2.5, type=\'p\', \n x.lim=c(0,6))\n\n#### \nL = Tbal_droprad_Lme\nplot(L, xlim=c(0,6), ylim=c(-25,1200), \n cex.axis=1.25, type=\'n\', xaxt="n")\nabline(h=1000, lwd=2, col="#6E6E6E", lty="dotted")\n\nL = Tbal_covfull_Lme\ndf1 = data.frame(as.numeric(names(L)),as.numeric(L))\npoints(df1, cex=1.5, pch=21, col="#262626", bg="#262626", lwd=0.75)\n\nL = Tbal_droprad_Lme\ndf1 = data.frame(as.numeric(names(L)),as.numeric(L))\npoints(df1, cex=1.5, pch=21, col="#262626", bg="#6E6E6E", lwd=0.75)\n\nL = Tbal_dropddrad_Lme \ndf1 = data.frame(as.numeric(names(L)),as.numeric(L))\npoints(df1, cex=1.5, pch=21, col="#262626", bg="#D3D3D3", lwd=0.75)\n\nbox()\n\n####\nL = Tbal_covlow_Lme\nplot(L, xlim=c(0,6), ylim=c(-25,1200), \n cex.axis=1.25, type=\'n\', xaxt="n")\nabline(h=1000, lwd=2, col="#6E6E6E", lty="dotted")\n\nL = Tbal_covfull_Lme\ndf1 = data.frame(as.numeric(names(L)),as.numeric(L))\npoints(df1, cex=1.5, pch=21, col="#262626", bg="#262626", lwd=0.75)\n\nL = Tbal_covmed_Lme \ndf1 = data.frame(as.numeric(names(L)),as.numeric(L))\npoints(df1, cex=1.5, pch=21, col="#262626", bg="#6E6E6E", lwd=0.75)\n\nL = Tbal_covlow_Lme\ndf1 = data.frame(as.numeric(names(L)),as.numeric(L))\npoints(df1, cex=1.5, pch=21, col="#262626", bg="#D3D3D3", lwd=0.75)\n\naxis(side=1, at=seq(0,6,0.5), \n labels=as.character(seq(6,0,by=-0.5)), cex.axis=1.25)\n\nbox()\n\n##########################################################\ntre <- read.tree("Timb.tre")\nplot(tre, show.tip.label=F,\n edge.width=2.5, type=\'p\', \n x.lim=c(0,6))\n\n####\nL = Timb_droprad_Lme[c(3:62)]\nplot(L, xlim=c(0,6), ylim=c(-25,1200), \n cex.axis=1.25, type=\'n\', xaxt="n", yaxt="n")\nabline(h=1000, lwd=2, col="#6E6E6E", lty="dotted")\n\nL = Timb_covfull_Lme[c(3:62)]\ndf1 = data.frame(as.numeric(names(L)),as.numeric(L))\npoints(df1, cex=1.5, pch=21, col="#262626", bg="#262626", lwd=0.75)\n\nL = Timb_droprad_Lme[c(3:62)]\ndf1 = data.frame(as.numeric(names(L)),as.numeric(L))\npoints(df1, cex=1.5, pch=21, col="#262626", bg="#6E6E6E", lwd=0.75)\n\nL = Timb_dropddrad_Lme[c(3:62)]\ndf1 = data.frame(as.numeric(names(L)),as.numeric(L))\npoints(df1, cex=1.5, pch=21, col="#262626", bg="#D3D3D3", lwd=0.75)\n\nbox()\n\n####\nL = Timb_covlow_Lme[c(3:62)] \nplot(L, xlim=c(0,6), ylim=c(-25,1200), \n cex.axis=1.25, type=\'n\', xaxt="n", yaxt="n")\nabline(h=1000, lwd=2, col="#6E6E6E", lty="dotted")\n\nL = Timb_covfull_Lme[c(3:62)]\ndf1 = data.frame(as.numeric(names(L)),as.numeric(L))\npoints(df1, cex=1.5, pch=21, col="#262626", bg="#262626", lwd=0.75)\n\nL = Timb_covmed_Lme[c(3:62)]\ndf1 = data.frame(as.numeric(names(L)),as.numeric(L))\npoints(df1, cex=1.5, pch=21, col="#262626", bg="#6E6E6E", lwd=0.75)\n\nL = Timb_covlow_Lme[c(3:62)] \ndf1 = data.frame(as.numeric(names(L)),as.numeric(L))\npoints(df1, cex=1.5, pch=21, col="#262626", bg="#D3D3D3", lwd=0.75)\n\nbox()\n\n##\naxis(side=1, at=seq(0,6,0.5), \n labels=as.character(seq(6,0,by=-0.5)), cex.axis=1.25)\n\n#########################################################\ntre <- read.tree("Tvib.tre")\nplot(tre, show.tip.label=F,\n edge.width=2.5, type=\'p\', \n x.lim=c(0,6))\n\n####\nL = Tvib_droprad_Lme\nplot(L, xlim=c(0,6.25), ylim=c(-25,1200), \n cex.axis=1.25, type=\'n\', xaxt="n", yaxt="n")\nabline(h=1000, lwd=2, col="#6E6E6E", lty="dotted")\n\nL = Tvib_covfull_Lme \ndf1 = data.frame(as.numeric(names(L)),as.numeric(L))\npoints(df1, cex=1.5, pch=21, col="#262626", bg="#262626", lwd=0.75)\n\nL = Tvib_droprad_Lme\ndf1 = data.frame(as.numeric(names(L)),as.numeric(L))\npoints(df1, cex=1.5, pch=21, col="#262626", bg="#6E6E6E", lwd=0.75)\n\nL = Tvib_dropddrad_Lme \ndf1 = data.frame(as.numeric(names(L)),as.numeric(L))\npoints(df1, cex=1.5, pch=21, col="#262626", bg="#D3D3D3", lwd=0.75)\n\nbox()\n####\n\nplot(L, xlim=c(0,6.25), ylim=c(-25,1200), \n cex.axis=1.25, type=\'n\', xaxt="n", yaxt="n")\nabline(h=1000, lwd=2, col="#6E6E6E", lty="dotted")\n\nL = Tvib_covfull_Lme \ndf1 = data.frame(as.numeric(names(L)),as.numeric(L))\npoints(df1, cex=1.5, pch=21, col="#262626", bg="#262626", lwd=0.75)\n\nL = Tvib_covmed_Lme \ndf1 = data.frame(as.numeric(names(L)),as.numeric(L))\npoints(df1, cex=1.5, pch=21, col="#262626", bg="#6E6E6E", lwd=0.75)\n\nL = Tvib_covlow_Lme\ndf1 = data.frame(as.numeric(names(L)),as.numeric(L))\npoints(df1, cex=1.5, pch=21, col="#262626", bg="#D3D3D3", lwd=0.75)\n\nbox()\n##\naxis(side=1, at=seq(0,6,1), \n labels=as.character(seq(6,0,by=-1)), cex.axis=1.25)\n\n#dev.off()\n') # ## Empirical data (full & half depth) # Here I am grabbing the assembled empirical data from notebook_1 (Viburnum) to compare the effect of sequencing coverage with the results we see when simulating data on that tree. # In[64]: Tvib2 = Tvib.copy() for node in Tvib2: node.name = node.name+"_0" # In[70]: ## full size data lxs_EmpVib_full = getarray("/home/deren/Dropbox/RADexplore/EmpVib/vib_full_64tip_c85d6m4p99.loci", Tvib)#, dropind=1) lxs_EmpVib_half = getarray("/home/deren/Dropbox/RADexplore/EmpVib/vib_half_64tip_c85d6m4p99.loci", Tvib)#, dropind=1) # In[ ]: share_full = countmatrix(lxs_EmpVib_full,lxs_EmpVib_full) plotSVGmatrix(share_full, "EmpVib_full") # In[ ]: share_half = countmatrix(lxs_EmpVib_half,lxs_EmpVib_half) plotSVGmatrix(share_half, "EmpVib_half") # In[ ]: nodes_dat(Tvib, lxs_EmpVib_half, "analysis_counts/Tvib_Emp_half.dat3") nodes_dat(Tvib, lxs_EmpVib_full, "analysis_counts/Tvib_Emp_full.dat3") # In[ ]: get_ipython().run_cell_magic('R', '', '## read in the data and factor results\ndat <- read.table("analysis_counts/Tvib_Emp_full.dat3", \n header=F, col.names=c(\'depth\',\'loci\'))\ndat[,1] <- as.factor(dat[,1])\nEmpVib_full_Lme <- with(dat, tapply(loci, depth, mean))\n\n## read in the data and factor results\ndat <- read.table("analysis_counts/Tvib_Emp_half.dat3", \n header=F, col.names=c(\'depth\',\'loci\'))\ndat[,1] <- as.factor(dat[,1])\nEmpVib_half_Lme <- with(dat, tapply(loci, depth, mean))\n') # In[ ]: get_ipython().run_cell_magic('R', '', 'EmpVib_half_Lme\n') # In[ ]: get_ipython().run_cell_magic('R', '-w 200 -h 400', '\n#svg("EmpVib_fullvhalf3.svg", height=6, width=2.5)\n\nmat2 <- matrix(c(1,1,1,2),byrow=TRUE)\nlayout(mat2)\npar(mar=c(1,1,0,1), \n oma=c(2,2,1,0))\n\n#########################################################\n#tre <- read.tree("Tvib.tre")\n#plot(tre, show.tip.label=F,\n# edge.width=2.5, type=\'p\', \n# x.lim=c(0,2.25))\nVtre <- read.tree("analysis_counts/EmpVib_full.dat3.tre")\nplot(Vtre, cex=0.6, adj=0.05, x.lim=c(0,2.25),\n edge.width=2.5, type=\'p\',show.tip.label=FALSE)\nnodelabels(pch=20, col="black",\n cex=as.integer(Vtre$node.label)/7500)\n\n####\nL = EmpVib_half_Lme\nplot(L, xlim=c(0,2.25), ylim=c(-25,50200), \n cex.axis=1.25, type=\'n\', xaxt="n")\ndf1 = data.frame(as.numeric(names(L)),\n as.numeric(L))\npoints(df1, cex=1.25, pch=21, col="red", bg="red")\n\n####\nL = EmpVib_full_Lme\n#plot(L, xlim=c(0,2.25), ylim=c(-25,50200), \n# cex.axis=1.25, type=\'n\', xaxt="n")\nabline(h=0, lwd=2, col="gray", lty="dotted")\ndf1 = data.frame(as.numeric(names(L)),\n as.numeric(L))\npoints(df1, cex=1.25, pch=21, col="#262626", bg="#262626")\n\n#dev.off()\n') # In[ ]: get_ipython().run_cell_magic('R', '-w 200 -h 400', '\n#svg("fullvhalf3.svg", height=4.5, width=4)\n\n####\nL = EmpVib_full_Lme\nplot(L, xlim=c(0,2.25), ylim=c(-25,50200), \n cex.axis=1.25, type=\'n\', xaxt="n")\nabline(h=0, lwd=2, col="gray", lty="dotted")\ndf1 = data.frame(as.numeric(names(L)),\n as.numeric(L))\npoints(df1, cex=1.25, pch=21, col="#262626", bg="#262626")\n\n\ndev.off()\n\nsvg("fullonly.svg", height=4.5, width=4)\n####\nL = EmpVib_half_Lme\nplot(L, xlim=c(0,2.25), ylim=c(-25,50200), \n cex.axis=1.25, type=\'n\', xaxt="n")\ndf1 = data.frame(as.numeric(names(L)),\n as.numeric(L))\npoints(df1, cex=1.25, pch=21, col="red", bg="red")\n\n####\nL = EmpVib_full_Lme\nabline(h=0, lwd=2, col="gray", lty="dotted")\ndf1 = data.frame(as.numeric(names(L)),\n as.numeric(L))\npoints(df1, cex=1.25, pch=21, col="#262626", bg="#262626")\n\n#dev.off()\n') # In[ ]: get_ipython().run_cell_magic('R', '', 'data.frame(cbind(median(EmpVib_full_Lme),\n median(EmpVib_half_Lme)),\n cbind(mean(EmpVib_full_Lme),\n mean(EmpVib_half_Lme)),\n col.names=c("full","half"))\n') # In[ ]: get_ipython().run_cell_magic('R', '', 'svg(\'hist.svg\', width=4.25, height=4)\nhist(rnorm(10000), col="grey")\ndev.off()\n') # In[ ]: get_ipython().run_cell_magic('R', '-w 300 -h 600', '\nVtre <- read.tree("analysis_counts/EmpVib_full.dat3.tre")\nsvg("EmpVib_full_nodes.svg", height=6, width=3)\nplot(Vtre, cex=0.6, adj=0.05,\n edge.width=3, show.tip.label=FALSE)\nnodelabels(pch=20, col="black",\n cex=as.integer(Vtre$node.label)/10000)\ndev.off()\n') # In[ ]: # ### plOT # In[ ]: get_ipython().run_cell_magic('R', '-w 300 -h 600', '\n#svg("locisnpsdepth.svg", height=8, width=4)\n#pdf("locisnpsdepth.pdf", height=8, width=4)\n\nmat2 <- matrix(c(1,1,1,5,5,5,\n 1,1,1,5,5,5,\n 2,2,2,6,6,6,\n 3,3,3,7,7,7,\n 4,4,4,8,8,8), \n 5,6, byrow=TRUE)\nlayout(mat2)\npar(mar=c(1,1,0,1), \n oma=c(2,2,1,0))\ntre <- read.tree("Tbal.tre")\nplot(tre, show.tip.label=F,\n edge.width=2.5, type=\'p\', \n x.lim=c(-0.25,2.75))\n\n##-------------------------------\n## Plot full data locus sharing\nx = seq(1.5,5.5)\ny = Tbal_full_Lme#[1:6]\ns = Tbal_full_Lsd#[1:6]\nplot(x, y, xlim=c(1,7.2), ylim=c(-25,3100), \n cex.axis=1.25, type=\'n\', xaxt="n")\nabline(h=(seq(0,1000,200)), lwd=1.5, col="gray", lty="dotted")\npoints(x,y, cex=1.25, pch=21, col="#262626", bg="#262626")\nlines(x,y, lwd=2, col="#262626")\nsegments(x, y-s, x, y+s, col="#262626")\nepsilon = 0.02\nsegments(x-epsilon,y-s,x+epsilon,y-s)\nsegments(x-epsilon,y+s,x+epsilon,y+s)\n\nx = seq(1.5,5.5)\ny = Tbal_full_Sme#[2:6]\ns = Tbal_full_Ssd#[2:6]\nlines(x, y, lwd=2, col="darkgrey")\npoints(x, y, cex=1.25, pch=21, col="darkgrey", bg="darkgrey")\nsegments(x, y-s, x, y+s, col="darkgrey")\nepsilon = 0.02\nsegments(x-epsilon,y-s,x+epsilon,y-s)\nsegments(x-epsilon,y+s,x+epsilon,y+s)\n\n##--------------------------------\n## Plot drop data locus sharing\nx = seq(1.5,5.5)\ny = Tbal_drop_Lme\ns = Tbal_drop_Lsd\nplot(x, y, xlim=c(1,7.2), ylim=c(-25,3100), \n cex.axis=1.25, type=\'n\', xaxt="n")\nabline(h=(seq(0,1000,200)), lwd=1.5, col="gray", lty="dotted")\npoints(x,y, cex=1.25, pch=21, col="#262626", bg="#262626")\nlines(x,y, lwd=2, col="#262626")\nsegments(x, y-s, x, y+s, col="#262626")\nepsilon = 0.02\nsegments(x-epsilon,y-s,x+epsilon,y-s)\nsegments(x-epsilon,y+s,x+epsilon,y+s)\n\nx = seq(1.5,5.5)\ny = Tbal_drop_Sme\ns = Tbal_drop_Ssd\nlines(x, y, lwd=2, col="darkgrey")\npoints(x, y, cex=1.25, pch=21, col="darkgrey", bg="darkgrey")\nsegments(x, y-s, x, y+s, col="darkgrey")\nepsilon = 0.02\nsegments(x-epsilon,y-s,x+epsilon,y-s)\nsegments(x-epsilon,y+s,x+epsilon,y+s)\n\n##------------------------------\n## Plot cov data locus sharing\nx = seq(1.5,5.5)\ny = Tbal_cov_Lme\ns = Tbal_cov_Lsd\nplot(x, y, xlim=c(1,7.2), ylim=c(-25,3100), \n cex.axis=1.25, type=\'n\', xaxt="n")\nabline(h=(seq(0,1000,200)), lwd=1.5, col="gray", lty="dotted")\npoints(x,y, cex=1.25, pch=21, col="#262626", bg="#262626")\nlines(x,y, lwd=2, col="#262626")\nsegments(x, y-s, x, y+s, col="#262626")\nepsilon = 0.02\nsegments(x-epsilon,y-s,x+epsilon,y-s)\nsegments(x-epsilon,y+s,x+epsilon,y+s)\n\nx = seq(1.5,5.5)\ny = Tbal_cov_Sme\ns = Tbal_cov_Ssd\nlines(x, y, lwd=2, col="darkgrey")\npoints(x, y, cex=1.25, pch=21, col="darkgrey", bg="darkgrey")\nsegments(x, y-s, x, y+s, col="darkgrey")\nepsilon = 0.02\nsegments(x-epsilon,y-s,x+epsilon,y-s)\nsegments(x-epsilon,y+s,x+epsilon,y+s)\naxis(side=1, at=seq(1.1,8.1,1), \n labels=as.character(seq(3,-0.5,by=-0.5)), cex.axis=1.25)\n\n###########################################\n###########################################\ntre <- read.tree("Timb.tre")\nplot(tre, show.tip.label=F, \n edge.width=2, type=\'p\')\n##------------------------------------\n## Plot full data locus sharing\nx = seq(2,62)\ny = Timb_full_Lme[2:62]\ns = Timb_full_Lsd[2:62]\nplot(x, y, xlim=c(1,65), ylim=c(-25,3100), \n cex.axis=1.25, type=\'n\', yaxt="n", xaxt="n")\nabline(h=(seq(0,1000,200)), lwd=1.5, col="gray", lty="dotted")\npoints(x,y, cex=1, pch=21, col="#262626", bg="#262626")\nlines(x,y, lwd=2, col="#262626")\nsegments(x, y-s, x, y+s, col="#262626")\nepsilon = 0.02\nsegments(x-epsilon,y-s,x+epsilon,y-s)\nsegments(x-epsilon,y+s,x+epsilon,y+s)\n\ny = Timb_full_Sme[2:62]\ns = Timb_full_Ssd[2:62]\nlines(x, y, lwd=2, col="darkgrey")\n#points(x, y, cex=1, pch=21, col="darkgrey", bg="darkgrey")\nsegments(x, y-s, x, y+s, col="darkgrey")\nepsilon = 0.02\nsegments(x-epsilon,y-s,x+epsilon,y-s)\nsegments(x-epsilon,y+s,x+epsilon,y+s)\n\n##----------------------------------\n## Plot full data locus sharing\nx = seq(2,62)\ny = Timb_drop_Lme[2:62]\ns = Timb_drop_Lsd[2:62]\nplot(x, y, xlim=c(1,65), ylim=c(-25,3100), \n cex.axis=1.25, type=\'n\', yaxt="n", xaxt="n")\nabline(h=(seq(0,1000,200)), lwd=1.5, col="gray", lty="dotted")\npoints(x,y, cex=1, pch=21, col="#262626", bg="#262626")\nlines(x,y, lwd=2, col="#262626")\nsegments(x, y-s, x, y+s, col="#262626")\nepsilon = 0.02\nsegments(x-epsilon,y-s,x+epsilon,y-s)\nsegments(x-epsilon,y+s,x+epsilon,y+s)\n\ny = Timb_drop_Sme[2:62]\ns = Timb_drop_Ssd[2:62]\nlines(x, y, lwd=2, col="darkgrey")\n#points(x, y, cex=1, pch=21, col="darkgrey", bg="darkgrey")\nsegments(x, y-s, x, y+s)\nepsilon = 0.02\nsegments(x-epsilon,y-s,x+epsilon,y-s)\nsegments(x-epsilon,y+s,x+epsilon,y+s)\n\n##-----------------------------------\n## Plot drop data locus sharing\nx = seq(2,62)\ny = Timb_cov_Lme[2:62]\ns = Timb_cov_Lsd[2:62]\nplot(x, y,\n xlim=c(1,65), ylim=c(-20,3100), \n cex=1, cex.axis=1.25,\n pch=21, bg="#262626", xaxt="n", yaxt="n")\nabline(h=(seq(0,1000,200)), lwd=1.5, col="gray", lty="dotted")\npoints(x,y, cex=1, pch=21, col="#262626", bg="#262626")\nlines(x,y, lwd=2, col="#262626")\nsegments(x, y-s, x, y+s, col="#262626")\nepsilon = 0.02\nsegments(x-epsilon,y-s,x+epsilon,y-s)\nsegments(x-epsilon,y+s,x+epsilon,y+s)\n\ny = Timb_cov_Sme[2:62]\ns = Timb_cov_Ssd[2:62]\nlines(x, y, lwd=2, col="darkgrey")\n#points(x, y, cex=1, pch=21, col="darkgrey", bg="darkgrey")\nsegments(x, y-s, x, y+s, col="darkgrey")\nepsilon = 0.02\nsegments(x-epsilon,y-s,x+epsilon,y-s)\nsegments(x-epsilon,y+s,x+epsilon,y+s)\naxis(side=1, at=seq(2.1,72,10), \n labels=as.character(seq(3,0,by=-0.5)), cex.axis=1.25)\n\n#dev.off()\n') # ### PLOt nodes on tree # In[ ]: lxs_EmpVib_full # In[ ]: def write_nodes_to_tree(tree,lxs,treefilename): for node in tree.traverse(): if not node.is_leaf(): inf = count_inf4(tree, lxs, node) node.name = "%d" % inf ## print tree with bls & node labels tree.write(format=3,outfile=treefilename) # In[ ]: write_nodes_to_tree(Tvib, lxs_EmpVib_full, "Tvib_full_nodes.tre") # In[ ]: get_ipython().run_cell_magic('R', '-w 400 -h 500', 'tre <- read.tree("loci_Tbal_cov")\nplot(tre)#, show.tip.label=F, edge.width=2.5)\n#nodelabels(pch=21, \n# bg="#262626",\n# cex=as.integer(tre$node.label)/150)\nnodelabels(tre$node.label, bg=\'grey\', cex=1.5)\n') # ## Data sharing by sub-sampling # How much data are shared by a random N samples, and how much data are shared across the deepest bipartition for 2+N samples. _Also how many SNPs?_ # In[ ]: def counts(lxs, minr, maxr, maxi): ## data store data = np.zeros((maxr+1-minr,maxi)) for sample in range(minr, maxr+1): g = itertools.combinations(range(maxr), sample) i = 0 while i