#!/usr/bin/env python # coding: utf-8 # ## Example notebook: Structure with pop assignments # # This notebook shows how to use the `ipyrad.analysis` toolkit to generate structure input files that use population information. # #### Required software # In[1]: # conda install ipyrad -c ipyrad # conda install structure clumpp -c ipyrad # conda install toytree -c eaton-lab # In[2]: import ipyrad.analysis as ipa import toyplot # ### Create a structure analysis object # If you include a 'mapfile' then we will use locus information to subsample just a single SNP from each locus so that the resulting data file will meet the expectations of structure that SNPs are "unlinked". If you create multiple replicates files using different random seeds then different SNPs will be selected in each rep. # In[3]: s = ipa.structure( name="test", workdir="analysis-structure", data="analysis-ipyrad/ped_min10_outfiles/ped_min10.str", mapfile="analysis-ipyrad/ped_min10_outfiles/ped_min10.snps.map", ) # #### Set params for the structure analysis # These values are used to generate the "mainparams" and "extraparams" files for structure. # In[4]: ## set run parameters (you probably want to run >10X this long) s.mainparams.burnin = 1000 s.mainparams.numreps = 5000 ## tell structure to expect popdata & popflag s.mainparams.popdata = 1 s.mainparams.popflag = 1 ## print all mainparams s.mainparams # In[5]: ## tell structure to use popinfo s.extraparams.usepopinfo = 1 ## print all other extraparams s.extraparams # #### By default the 'header' of the str file is empty # In[6]: s.header # #### You can fill it in by filling the header attribute lists # `popdata` is the *a priori* population assignment of an individual to a population. Assignments should be non-zero integers (e.g., 1, 2, 3). Zero is reserved to mean that there is no *a priori* assignment. `popflag` indicates whether or not to use the population assignment in the analysis (1) or to leave it to be inferred (0). So in the example below seven samples have assigned populations (popflag=1), and six samples will have their population assignments inferred (popflag=0). The popdata information will only be used for the seven individuals with assigned pops. # In[7]: ## assign popdata and popflag by entering a list of values s.popdata = [1, 3, 1, 2, 3, 2, 3, 3, 3, 3, 3, 1, 1] s.popflag = [1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1] ## print the header information s.header # ### Write the input files and run STRUCTURE # This will write the `.str` file (and subsample SNPs if you included a mapfile) with the header information included, and it will write a mainparams and extraparams file with the parameter settings that we entered above. # In[8]: ## writes three input files and then call structure yourself s.write_structure_files(kpop=3) # ### Or, submit jobs directly to the cluster # If you start an ipcluster instance (see other tutorials) you can submit structure jobs directly to the cluster and easily collect the results, like below. # In[9]: ## connect to a running ipcluster instance import ipyparallel as ipp ipyclient = ipp.Client() print "connected to {} cores".format(len(ipyclient)) # In[10]: ## submit job replicates to ipyclient s.run(kpop=3, nreps=3, ipyclient=ipyclient) # In[11]: ## wait for jobs to finish ipyclient.wait() # # ### Collect results and plot # In[12]: ## get table of summarized results table = s.get_clumpp_table(3) # In[13]: ## reorder table by membership in groups table.sort_values(by=[0, 1, 2], inplace=True) # In[14]: ## build barplot canvas = toyplot.Canvas(width=500, height=250) axes = canvas.cartesian(bounds=("10%", "90%", "10%", "45%")) axes.bars(table) ## add labels to x-axis ticklabels = [i for i in table.index.tolist()] axes.x.ticks.locator = toyplot.locator.Explicit(labels=ticklabels) axes.x.ticks.labels.angle = -60 axes.x.ticks.show = True axes.x.ticks.labels.offset = 10 axes.x.ticks.labels.style = {"font-size": "12px"}