from os import chdir, mkdir
from os.path import join
from IPython.display import FileLinks, FileLink
Sequence data was demultiplexed and filtered using an inhouse script available at https://github.com/gjospin/scripts/blob/master/Demul_trim_prep.pl
#The subsequent files containing the merged 16S reads only were concatonated into one file using:
!cat *.M.* > EverythingMerged.fasta.gz
#Then they were unzipped
!gunzip EverythingMerged.fasta.gz
#Then they were reverse complemented (as reads are in the wrong direction relative to the Greengenes/Unite databases)
!adjust_seq_orientation.py -i EverythingMerged.fasta -o EverythingMerged_RC.fasta
#Note: if there are spaces in your path make sure they have a '\' before them so they are recognized
#16S sequences and mapping file
bactarch_seqs = "/Users/Cassie/Dropbox/Seagrass/EdgeAnalysis/EverythingMerged_RC.fasta"
bactarch_map = "/Users/Cassie/Dropbox/Seagrass/EdgeAnalysis/Sofie_only_mapping_w_metadata.txt"
#Databases
otu_base = "/macqiime/greengenes/gg_13_8_otus/"
reference_seqs = join(otu_base,"/macqiime/greengenes/gg_13_8_otus/rep_set/97_otus.fasta")
reference_tree = join(otu_base,"/macqiime/greengenes/gg_13_8_otus/trees/97_otus.tree")
reference_tax = join(otu_base,"/macqiime/greengenes/gg_13_8_otus/taxonomy/97_otu_taxonomy.txt")
#checks mapping file for qiime use
!validate_mapping_file.py \
-m $bactarch_map
No errors or warnings were found in mapping file.
#replaces original mapping file with new corrected file
!mv Sofie_only_mapping_w_metadata_corrected.txt Sofie_only_mapping_w_metadata.txt
!validate_demultiplexed_fasta.py \
-i $bactarch_seqs \
-m $bactarch_map
#check the log file generated to see if any duplicate barcodes/sample names are used; mostly this is a sanity check
There are two versions of USEARCH and you will need both in QIIME 1.9.0: USEARCH v5.2.236 and USEARCH 6.1. Name the 5.2.236 executable "usearch" and the 6.1 executable "usearch61" and make sure they're in your path. http://www.drive5.com/usearch/manual/install.html
Code to Install: (repeat for usearch)
sudo mv usearch61 /usr/local/bin/usearch61
sudo chmod a+x /usr/local/bin/usearch61
#identifies chimeric sequences using usearch61 in our bacterial data using the 97% OTU databases as the reference
!identify_chimeric_seqs.py \
-i $bactarch_seqs \
-m usearch61 \
-o qiime_ready_chimeras/ \
-r $reference_seqs
#filters out chimeric seqs from our fasta file
!filter_fasta.py \
-f $bactarch_seqs \
-o EverythingMerged_RC_Filtered.fasta \
-s qiime_ready_chimeras/chimeras.txt \
-n
#16S sequences after chimera filtering
bactarch_seqs = "/Users/Cassie/Dropbox/Seagrass/EdgeAnalysis/EverythingMerged_RC_Filtered.fasta"
Make sure to install BLAST Legacy, if using (http://www.wernerlab.org/software/macqiime/macqiime-installation/installing-blast-in-os-x)
#Picking Open Reference OTUS for 16S
!pick_open_reference_otus.py \
-o open_ref_97_otus_EverythingRCFiltered \
-i $bactarch_seqs \
-r $reference_seqs \
-p params.txt \
-a -O 6 -f
!cat /Users/Cassie/Dropbox/Seagrass/EdgeAnalysis/params.txt
pick_otus:enable_rev_strand_match True beta_diversity:metrics bray_curtis,euclidean,unweighted_unifrac,weighted_unifrac
Repeat OTU picking above using the 99% OTU reference database or alternate database as is desired.
#summarizes the biom table obtained from running open ref otu picking at 97% with greengenes; sanity check
!biom summarize-table \
-i open_ref_97_otus_EverythingRCFiltered/otu_table_mc2_w_tax_no_pynast_failures.biom \
-o open_ref_97_otus_EverythingRCFiltered/otu_table_summary.txt
FileLink("open_ref_97_otus_EverythingRCFiltered/otu_table_summary.txt")
Traceback (most recent call last): File "/macqiime/bin/pyqi", line 5, in <module> pkg_resources.run_script('pyqi==0.3.1', 'pyqi') File "/macqiime/lib/python2.7/site-packages/setuptools-0.9.8-py2.7.egg/pkg_resources.py", line 540, in run_script File "/macqiime/lib/python2.7/site-packages/setuptools-0.9.8-py2.7.egg/pkg_resources.py", line 1455, in run_script File "/macqiime/lib/python2.7/site-packages/pyqi-0.3.1-py2.7.egg/EGG-INFO/scripts/pyqi", line 177, in <module> optparse_main(cmd_obj, argv[1:]) File "/macqiime/lib/python2.7/site-packages/pyqi-0.3.1-py2.7.egg/pyqi/core/interfaces/optparse/__init__.py", line 276, in optparse_main result = optparse_cmd(local_argv[1:]) File "/macqiime/lib/python2.7/site-packages/pyqi-0.3.1-py2.7.egg/pyqi/core/interface.py", line 42, in __call__ return self._output_handler(cmd_result) File "/macqiime/lib/python2.7/site-packages/pyqi-0.3.1-py2.7.egg/pyqi/core/interfaces/optparse/__init__.py", line 251, in _output_handler opt_value) File "/macqiime/lib/python2.7/site-packages/pyqi-0.3.1-py2.7.egg/pyqi/core/interfaces/optparse/output_handler.py", line 55, in write_list_of_strings raise IOError("Output path '%s' already exists." % option_value) IOError: Output path 'open_ref_97_otus_EverythingRCFiltered/otu_table_summary.txt' already exists.
#filters out all the chloroplasts/mitochondria/singletons
!filter_taxa_from_otu_table.py \
-i open_ref_97_otus_EverythingRCFiltered/otu_table_mc2_w_tax_no_pynast_failures.biom \
-o open_ref_97_otus_EverythingRCFiltered/otu_table_no_euks.biom \
-n c__Chloroplast,f__mitochondria
!filter_otus_from_otu_table.py \
-i open_ref_97_otus_EverythingRCFiltered/otu_table_no_euks.biom \
-o open_ref_97_otus_EverythingRCFiltered/otu_table_no_euks_no_singletons.biom \
-n 2
#filters out anything unable to be assigned at Domain level
!filter_taxa_from_otu_table.py \
-i open_ref_97_otus_EverythingRCFiltered/otu_table_no_euks_no_singletons.biom \
-o open_ref_97_otus_EverythingRCFiltered/otu_table_no_euks_no_singletons_no_unassigned.biom \
-n Unassigned
#summarizes the biom table obtained above after filtering; sanity check
!biom summarize-table \
-i open_ref_97_otus_EverythingRCFiltered/otu_table_no_euks_no_singletons_no_unassigned.biom \
-o open_ref_97_otus_EverythingRCFiltered/otu_summary_filtered.txt
FileLink("open_ref_97_otus_EverythingRCFiltered/otu_summary_filtered.txt")
Traceback (most recent call last): File "/macqiime/bin/pyqi", line 5, in <module> pkg_resources.run_script('pyqi==0.3.1', 'pyqi') File "/macqiime/lib/python2.7/site-packages/setuptools-0.9.8-py2.7.egg/pkg_resources.py", line 540, in run_script File "/macqiime/lib/python2.7/site-packages/setuptools-0.9.8-py2.7.egg/pkg_resources.py", line 1455, in run_script File "/macqiime/lib/python2.7/site-packages/pyqi-0.3.1-py2.7.egg/EGG-INFO/scripts/pyqi", line 177, in <module> optparse_main(cmd_obj, argv[1:]) File "/macqiime/lib/python2.7/site-packages/pyqi-0.3.1-py2.7.egg/pyqi/core/interfaces/optparse/__init__.py", line 276, in optparse_main result = optparse_cmd(local_argv[1:]) File "/macqiime/lib/python2.7/site-packages/pyqi-0.3.1-py2.7.egg/pyqi/core/interface.py", line 42, in __call__ return self._output_handler(cmd_result) File "/macqiime/lib/python2.7/site-packages/pyqi-0.3.1-py2.7.egg/pyqi/core/interfaces/optparse/__init__.py", line 251, in _output_handler opt_value) File "/macqiime/lib/python2.7/site-packages/pyqi-0.3.1-py2.7.egg/pyqi/core/interfaces/optparse/output_handler.py", line 55, in write_list_of_strings raise IOError("Output path '%s' already exists." % option_value) IOError: Output path 'open_ref_97_otus_EverythingRCFiltered/otu_summary_filtered.txt' already exists.
#97% 16S OTUs
!biom add-metadata \
-i open_ref_97_otus_EverythingRCFiltered/otu_table_no_euks_no_singletons_no_unassigned.biom \
-o open_ref_97_otus_EverythingRCFiltered/otu_table_no_euks_no_singletons_no_unassigned_w_metadata.biom \
-m $bactarch_map
The data analyzed so far in this Ipython notebook is for two projects - a project on edge effects and a project on distance from the harbor. Here we seperate the Edge Effects data from the biom table for further analysis. (NOTE: I could have analyzed these two datasets seperately from the beginning if I desired)
#97% 16S OTUS
#Edge Effects samples
!filter_samples_from_otu_table.py \
-i open_ref_97_otus_EverythingRCFiltered/otu_table_no_euks_no_singletons_no_unassigned_w_metadata.biom \
-o open_ref_97_otus_EverythingRCFiltered/Westpoint.biom \
-m $bactarch_map \
--sample_id_fp open_ref_97_otus_EverythingRCFiltered/WestpointBacKEEP.txt
#Harbor samples
!filter_samples_from_otu_table.py \
-i open_ref_97_otus_EverythingRCFiltered/otu_table_no_euks_no_singletons_no_unassigned_w_metadata.biom \
-o open_ref_97_otus_EverythingRCFiltered/Harbor.biom \
-m $bactarch_map \
-s 'CN_ratio:NA'
#sanity check
!biom summarize-table \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint.biom \
-o open_ref_97_otus_EverythingRCFiltered/Westpoint.txt
FileLink("open_ref_97_otus_EverythingRCFiltered/Westpoint.txt")
#List of Sample IDs to keep (these are the sample IDs for the Edge Effects Data)
#Westpoint == Edge Effects
!cat /Users/Cassie/Dropbox/Seagrass/EdgeAnalysis/open_ref_97_otus_EverythingRCFiltered/WestpointBacKEEP.txt
BB001 BB002 BB003 BB004 BB005 BB006 BB007 BB008 BB009 BB010 BB011 BB012 BB013 BB014 BB015 BB016 BB017 BB018 BB019 BB020 BB021 BB022 BB023 BB024 BB025 BB026 BB027 BB028 BB029 BB030 BB031 BB032 BB033 BB034 BB035 BB036 BB037 BB038 BB039 BB040 BBNC
For 16S, the ratio of OTUS for negative control:smallest sample is approx 1:10 after filtering and 1:50 before filtering. The number one contaminant is chloroplast DNA - since the negative control had no plant material in it, this indicates that some spillover occured between the negative control and the rest of my samples. I looked at the negative control in more detail (steps not included in this Ipython Notebook) and ultimately decided to simply remove the negative control sample from downstream analysis.
#16S Greengenes 97% OTUs
!filter_samples_from_otu_table.py \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint.biom \
-o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC.biom \
-m $bactarch_map \
-s "phinchID:*,!Negative_control"
#Investigating levels of rarification - Bacteria
!alpha_rarefaction.py \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC.biom \
-n 25 \
-o open_ref_97_otus_EverythingRCFiltered/arare_WestpointNONC \
-m $bactarch_map \
-t open_ref_97_otus_EverythingRCFiltered/rep_set.tre -f
FileLink("open_ref_97_otus_EverythingRCFiltered/arare_WestpointNONC/alpha_rarefaction_plots/rarefaction_plots.html")
#Investigating how rarification to smallest sample size would effect PCoA plots
!jackknifed_beta_diversity.py \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC.biom \
-o open_ref_97_otus_EverythingRCFiltered/jacknifed_betadiv_3277/ \
-e 3277\
-m $bactarch_map \
-t open_ref_97_otus_EverythingRCFiltered/rep_set.tre -f
/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/skbio/stats/ordination/_principal_coordinate_analysis.py:107: RuntimeWarning: The result contains negative eigenvalues. Please compare their magnitude with the magnitude of some of the largest positive eigenvalues. If the negative ones are smaller, it's probably safe to ignore them, but if they are large in magnitude, the results won't be useful. See the Notes section for more details. The smallest eigenvalue is -0.0528807397816 and the largest is 2.20871223703. RuntimeWarning /Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/skbio/stats/ordination/_principal_coordinate_analysis.py:107: RuntimeWarning: The result contains negative eigenvalues. Please compare their magnitude with the magnitude of some of the largest positive eigenvalues. If the negative ones are smaller, it's probably safe to ignore them, but if they are large in magnitude, the results won't be useful. See the Notes section for more details. The smallest eigenvalue is -0.0504254111014 and the largest is 2.20188726427. RuntimeWarning /Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/skbio/stats/ordination/_principal_coordinate_analysis.py:107: RuntimeWarning: The result contains negative eigenvalues. Please compare their magnitude with the magnitude of some of the largest positive eigenvalues. If the negative ones are smaller, it's probably safe to ignore them, but if they are large in magnitude, the results won't be useful. See the Notes section for more details. The smallest eigenvalue is -0.0500996647029 and the largest is 2.20514522014. RuntimeWarning /Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/skbio/stats/ordination/_principal_coordinate_analysis.py:107: RuntimeWarning: The result contains negative eigenvalues. Please compare their magnitude with the magnitude of some of the largest positive eigenvalues. If the negative ones are smaller, it's probably safe to ignore them, but if they are large in magnitude, the results won't be useful. See the Notes section for more details. The smallest eigenvalue is -0.0546377657593 and the largest is 2.18418408358. RuntimeWarning /Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/skbio/stats/ordination/_principal_coordinate_analysis.py:107: RuntimeWarning: The result contains negative eigenvalues. Please compare their magnitude with the magnitude of some of the largest positive eigenvalues. If the negative ones are smaller, it's probably safe to ignore them, but if they are large in magnitude, the results won't be useful. See the Notes section for more details. The smallest eigenvalue is -0.0583052134005 and the largest is 2.20208391119. RuntimeWarning /Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/skbio/stats/ordination/_principal_coordinate_analysis.py:107: RuntimeWarning: The result contains negative eigenvalues. Please compare their magnitude with the magnitude of some of the largest positive eigenvalues. If the negative ones are smaller, it's probably safe to ignore them, but if they are large in magnitude, the results won't be useful. See the Notes section for more details. The smallest eigenvalue is -0.0514658591384 and the largest is 2.21595797818. RuntimeWarning /Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/skbio/stats/ordination/_principal_coordinate_analysis.py:107: RuntimeWarning: The result contains negative eigenvalues. Please compare their magnitude with the magnitude of some of the largest positive eigenvalues. If the negative ones are smaller, it's probably safe to ignore them, but if they are large in magnitude, the results won't be useful. See the Notes section for more details. The smallest eigenvalue is -0.0484149983321 and the largest is 2.16240301124. RuntimeWarning /Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/skbio/stats/ordination/_principal_coordinate_analysis.py:107: RuntimeWarning: The result contains negative eigenvalues. Please compare their magnitude with the magnitude of some of the largest positive eigenvalues. If the negative ones are smaller, it's probably safe to ignore them, but if they are large in magnitude, the results won't be useful. See the Notes section for more details. The smallest eigenvalue is -0.0457806682627 and the largest is 2.19729946138. RuntimeWarning /Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/skbio/stats/ordination/_principal_coordinate_analysis.py:107: RuntimeWarning: The result contains negative eigenvalues. Please compare their magnitude with the magnitude of some of the largest positive eigenvalues. If the negative ones are smaller, it's probably safe to ignore them, but if they are large in magnitude, the results won't be useful. See the Notes section for more details. The smallest eigenvalue is -0.0530840501316 and the largest is 2.19989678923. RuntimeWarning /Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/skbio/stats/ordination/_principal_coordinate_analysis.py:107: RuntimeWarning: The result contains negative eigenvalues. Please compare their magnitude with the magnitude of some of the largest positive eigenvalues. If the negative ones are smaller, it's probably safe to ignore them, but if they are large in magnitude, the results won't be useful. See the Notes section for more details. The smallest eigenvalue is -0.0505954109231 and the largest is 2.18193368748. RuntimeWarning
#16S Greengenes 97% OTU Data; Minimum OTUs = 3277 from biom table summary so rarify to 3277
!single_rarefaction.py \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC.biom \
-o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277.biom \
-d 3277
#sanity check 16S data
!biom summarize-table \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277.biom \
-o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277.txt
FileLink("open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277.txt")
Traceback (most recent call last): File "/macqiime/bin/pyqi", line 5, in <module> pkg_resources.run_script('pyqi==0.3.1', 'pyqi') File "/macqiime/lib/python2.7/site-packages/setuptools-0.9.8-py2.7.egg/pkg_resources.py", line 540, in run_script File "/macqiime/lib/python2.7/site-packages/setuptools-0.9.8-py2.7.egg/pkg_resources.py", line 1455, in run_script File "/macqiime/lib/python2.7/site-packages/pyqi-0.3.1-py2.7.egg/EGG-INFO/scripts/pyqi", line 177, in <module> optparse_main(cmd_obj, argv[1:]) File "/macqiime/lib/python2.7/site-packages/pyqi-0.3.1-py2.7.egg/pyqi/core/interfaces/optparse/__init__.py", line 276, in optparse_main result = optparse_cmd(local_argv[1:]) File "/macqiime/lib/python2.7/site-packages/pyqi-0.3.1-py2.7.egg/pyqi/core/interface.py", line 42, in __call__ return self._output_handler(cmd_result) File "/macqiime/lib/python2.7/site-packages/pyqi-0.3.1-py2.7.egg/pyqi/core/interfaces/optparse/__init__.py", line 251, in _output_handler opt_value) File "/macqiime/lib/python2.7/site-packages/pyqi-0.3.1-py2.7.egg/pyqi/core/interfaces/optparse/output_handler.py", line 55, in write_list_of_strings raise IOError("Output path '%s' already exists." % option_value) IOError: Output path 'open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277.txt' already exists.
#Leaf
!filter_samples_from_otu_table.py \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC.biom \
-o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Leaf.biom \
-m $bactarch_map \
-s "Substrate:leaf"
#Root
!filter_samples_from_otu_table.py \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC.biom \
-o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Root.biom \
-m $bactarch_map \
-s "Substrate:root"
#Soil
!filter_samples_from_otu_table.py \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC.biom \
-o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil.biom \
-m $bactarch_map \
-s "Substrate:soil"
#sanity check - Leaf
!biom summarize-table \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Leaf.biom \
-o open_ref_97_otus_EverythingRCFiltered/Leaf.txt
FileLink("open_ref_97_otus_EverythingRCFiltered/Leaf.txt")
Traceback (most recent call last): File "/macqiime/bin/pyqi", line 5, in <module> pkg_resources.run_script('pyqi==0.3.1', 'pyqi') File "/macqiime/lib/python2.7/site-packages/setuptools-0.9.8-py2.7.egg/pkg_resources.py", line 540, in run_script File "/macqiime/lib/python2.7/site-packages/setuptools-0.9.8-py2.7.egg/pkg_resources.py", line 1455, in run_script File "/macqiime/lib/python2.7/site-packages/pyqi-0.3.1-py2.7.egg/EGG-INFO/scripts/pyqi", line 177, in <module> optparse_main(cmd_obj, argv[1:]) File "/macqiime/lib/python2.7/site-packages/pyqi-0.3.1-py2.7.egg/pyqi/core/interfaces/optparse/__init__.py", line 276, in optparse_main result = optparse_cmd(local_argv[1:]) File "/macqiime/lib/python2.7/site-packages/pyqi-0.3.1-py2.7.egg/pyqi/core/interface.py", line 42, in __call__ return self._output_handler(cmd_result) File "/macqiime/lib/python2.7/site-packages/pyqi-0.3.1-py2.7.egg/pyqi/core/interfaces/optparse/__init__.py", line 251, in _output_handler opt_value) File "/macqiime/lib/python2.7/site-packages/pyqi-0.3.1-py2.7.egg/pyqi/core/interfaces/optparse/output_handler.py", line 55, in write_list_of_strings raise IOError("Output path '%s' already exists." % option_value) IOError: Output path 'open_ref_97_otus_EverythingRCFiltered/Leaf.txt' already exists.
#sanity check - Root
!biom summarize-table \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Root.biom \
-o open_ref_97_otus_EverythingRCFiltered/Root.txt
FileLink("open_ref_97_otus_EverythingRCFiltered/Root.txt")
#sanity check - Soil
!biom summarize-table \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil.biom \
-o open_ref_97_otus_EverythingRCFiltered/Soil.txt
FileLink("open_ref_97_otus_EverythingRCFiltered/Soil.txt")
Traceback (most recent call last): File "/Library/Frameworks/Python.framework/Versions/2.7/bin/pyqi", line 184, in <module> optparse_main(cmd_obj, argv[1:]) File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pyqi/core/interfaces/optparse/__init__.py", line 275, in optparse_main result = optparse_cmd(local_argv[1:]) File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pyqi/core/interface.py", line 41, in __call__ return self._output_handler(cmd_result) File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pyqi/core/interfaces/optparse/__init__.py", line 250, in _output_handler opt_value) File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pyqi/core/interfaces/optparse/output_handler.py", line 97, in write_or_print_list_of_strings write_list_of_strings(result_key, data, option_value) File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pyqi/core/interfaces/optparse/output_handler.py", line 55, in write_list_of_strings raise IOError("Output path '%s' already exists." % option_value) IOError: Output path 'open_ref_97_otus_EverythingRCFiltered/Soil.txt' already exists.
#Leaf & Root (sanity check to see if soil artificially pulls leaf and root apart)
!filter_samples_from_otu_table.py \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC.biom \
-o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_LeafAndRoot.biom \
-m $bactarch_map \
-s 'Substrate:*,!soil'
!biom summarize-table \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_LeafAndRoot.biom \
-o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_LeafAndRoot.txt
FileLink("open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_LeafAndRoot.txt")
#16S Data: Rarify each substrate biom table by minimum OTU in each table
#Leaf: Min #OTU = 5177
!single_rarefaction.py \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Leaf.biom \
-o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Leaf_5177.biom \
-d 5177
#Root: Min #OTU = 3277
!single_rarefaction.py \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Root.biom \
-o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Root_3277.biom \
-d 3277
#Soil: Min #OTU = 23059; but rarify to 20000
!single_rarefaction.py \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil.biom \
-o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000.biom \
-d 20000
#Leaf + Root #OTU = 3277
!single_rarefaction.py \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_LeafAndRoot.biom \
-o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_LeafAndRoot_3277.biom \
-d 3277
For exploratory analysis - used core_diversity_analyses
#For ALL 16S Data
!core_diversity_analyses.py \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277.biom \
-o open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint/ \
-m $bactarch_map \
-e 3277 \
-p open_ref_97_otus_EverythingRCFiltered/betadiv_params.txt \
-t open_ref_97_otus_EverythingRCFiltered/rep_set.tre
FileLink("open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint/index.html")
/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/skbio/stats/ordination/_principal_coordinate_analysis.py:107: RuntimeWarning: The result contains negative eigenvalues. Please compare their magnitude with the magnitude of some of the largest positive eigenvalues. If the negative ones are smaller, it's probably safe to ignore them, but if they are large in magnitude, the results won't be useful. See the Notes section for more details. The smallest eigenvalue is -0.0503240579433 and the largest is 2.19159752919. RuntimeWarning /Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/numpy-1.9.2-py2.7-macosx-10.6-x86_64.egg/numpy/core/fromnumeric.py:2507: VisibleDeprecationWarning: `rank` is deprecated; use the `ndim` attribute or function instead. To find the rank of a matrix see `numpy.linalg.matrix_rank`. VisibleDeprecationWarning)
#For Leaf 16S
!core_diversity_analyses.py \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Leaf_5177.biom \
-o open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint_Leaf/ \
-m $bactarch_map \
-e 5177 \
-p open_ref_97_otus_EverythingRCFiltered/betadiv_params.txt \
-t open_ref_97_otus_EverythingRCFiltered/rep_set.tre
FileLink("open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint_Leaf/index.html")
/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/numpy-1.9.2-py2.7-macosx-10.6-x86_64.egg/numpy/core/fromnumeric.py:2507: VisibleDeprecationWarning: `rank` is deprecated; use the `ndim` attribute or function instead. To find the rank of a matrix see `numpy.linalg.matrix_rank`. VisibleDeprecationWarning)
#For Root 16S
!core_diversity_analyses.py \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Root_3277.biom \
-o open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint_Root/ \
-m $bactarch_map \
-e 3277 \
-p open_ref_97_otus_EverythingRCFiltered/betadiv_params.txt \
-t open_ref_97_otus_EverythingRCFiltered/rep_set.tre
FileLink("open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint_Root/index.html")
/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/numpy-1.9.2-py2.7-macosx-10.6-x86_64.egg/numpy/core/fromnumeric.py:2507: VisibleDeprecationWarning: `rank` is deprecated; use the `ndim` attribute or function instead. To find the rank of a matrix see `numpy.linalg.matrix_rank`. VisibleDeprecationWarning)
#For Soil 16S
!core_diversity_analyses.py \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000.biom \
-o open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint_Soil_20000/ \
-m $bactarch_map \
-e 20000 \
-p open_ref_97_otus_EverythingRCFiltered/betadiv_params.txt \
-t open_ref_97_otus_EverythingRCFiltered/rep_set.tre
FileLink("open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint_Soil_20000/index.html")
/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/skbio/stats/ordination/_principal_coordinate_analysis.py:107: RuntimeWarning: The result contains negative eigenvalues. Please compare their magnitude with the magnitude of some of the largest positive eigenvalues. If the negative ones are smaller, it's probably safe to ignore them, but if they are large in magnitude, the results won't be useful. See the Notes section for more details. The smallest eigenvalue is -0.00188786837015 and the largest is 0.303247381814. RuntimeWarning /Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/numpy-1.9.2-py2.7-macosx-10.6-x86_64.egg/numpy/core/fromnumeric.py:2507: VisibleDeprecationWarning: `rank` is deprecated; use the `ndim` attribute or function instead. To find the rank of a matrix see `numpy.linalg.matrix_rank`. VisibleDeprecationWarning)
#For Leaf + Root 16S
!core_diversity_analyses.py \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_LeafAndRoot_3277.biom \
-o open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint_LeafAndRoot_3277/ \
-m $bactarch_map \
-e 3277 \
-p open_ref_97_otus_EverythingRCFiltered/betadiv_params.txt \
-t open_ref_97_otus_EverythingRCFiltered/rep_set.tre
FileLink("open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint_LeafAndRoot_3277/index.html")
/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/skbio/stats/ordination/_principal_coordinate_analysis.py:107: RuntimeWarning: The result contains negative eigenvalues. Please compare their magnitude with the magnitude of some of the largest positive eigenvalues. If the negative ones are smaller, it's probably safe to ignore them, but if they are large in magnitude, the results won't be useful. See the Notes section for more details. The smallest eigenvalue is -0.0259391833005 and the largest is 1.179226454. RuntimeWarning /Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/numpy-1.9.2-py2.7-macosx-10.6-x86_64.egg/numpy/core/fromnumeric.py:2507: VisibleDeprecationWarning: `rank` is deprecated; use the `ndim` attribute or function instead. To find the rank of a matrix see `numpy.linalg.matrix_rank`. VisibleDeprecationWarning)
#Soil, 20000
!alpha_rarefaction.py \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000.biom \
-o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000_AlphaDiv/ \
-p open_ref_97_otus_EverythingRCFiltered/alpha_params.txt \
-t open_ref_97_otus_EverythingRCFiltered/rep_set.tre \
-m $bactarch_map
#All, 3277
!alpha_rarefaction.py \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277.biom \
-o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_AlphaDiv/ \
-p open_ref_97_otus_EverythingRCFiltered/alpha_params.txt \
-t open_ref_97_otus_EverythingRCFiltered/rep_set.tre \
-m $bactarch_map
#Parameter file used above
!cat open_ref_97_otus_EverythingRCFiltered/alpha_params.txt
alpha_diversity:metrics shannon,chao1,observed_otus,simpson
!alpha_diversity.py -s
Known metrics are: ace, berger_parker_d, brillouin_d, chao1, chao1_ci, dominance, doubles, enspie, equitability, esty_ci, fisher_alpha, gini_index, goods_coverage, heip_e, kempton_taylor_q, margalef, mcintosh_d, mcintosh_e, menhinick, michaelis_menten_fit, observed_otus, observed_species, osd, simpson_reciprocal, robbins, shannon, simpson, simpson_e, singles, strong, PD_whole_tree For more information, see http://scikit-bio.org/docs/latest/generated/skbio.diversity.alpha.html
#16S All
#unweighted
!compare_categories.py \
--method permanova \
-m $bactarch_map \
-c Location \
-i open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint/bdiv_even3277/unweighted_unifrac_dm.txt \
-o open_ref_97_otus_EverythingRCFiltered/permanova_Location_UU \
-n 9999
#bray
!compare_categories.py \
--method permanova \
-m $bactarch_map \
-c Location \
-i open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint/bdiv_even3277/bray_curtis_dm.txt \
-o open_ref_97_otus_EverythingRCFiltered/permanova_Location_BC \
-n 9999
#weighted unifrac
!compare_categories.py \
--method permanova \
-m $bactarch_map \
-c Location \
-i open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint/bdiv_even3277/weighted_unifrac_dm.txt \
-o open_ref_97_otus_EverythingRCFiltered/permanova_Location_WU \
-n 9999
#16S Soil
#unifrac
!compare_categories.py \
--method permanova \
-m $bactarch_map \
-c Location \
-i open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint_Soil_20000/bdiv_even20000/weighted_unifrac_dm.txt \
-o open_ref_97_otus_EverythingRCFiltered/permanova_Location_Soil_WU \
-n 9999
#unweighted
!compare_categories.py \
--method permanova \
-m $bactarch_map \
-c Location \
-i open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint_Soil_20000/bdiv_even20000/unweighted_unifrac_dm.txt \
-o open_ref_97_otus_EverythingRCFiltered/permanova_Location_Soil_UU \
-n 9999
#bray
!compare_categories.py \
--method permanova \
-m $bactarch_map \
-c Location \
-i open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint_Soil_20000/bdiv_even20000/bray_curtis_dm.txt \
-o open_ref_97_otus_EverythingRCFiltered/permanova_Location_Soil_BC \
-n 9999
#16S Root
!compare_categories.py \
--method permanova \
-m $bactarch_map \
-c Location \
-i open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint_Root/bdiv_even3277/weighted_unifrac_dm.txt \
-o open_ref_97_otus_EverythingRCFiltered/permanova_Location_Root_WU \
-n 9999
#16S Leaf
!compare_categories.py \
--method permanova \
-m $bactarch_map \
-c Location \
-i open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint_Leaf/bdiv_even5177/weighted_unifrac_dm.txt \
-o open_ref_97_otus_EverythingRCFiltered/permanova_Location_Leaf_WU \
-n 9999
#16S Data - All (b/c testing if substrate signficant)
#weighted unifrac
!compare_categories.py \
--method permanova \
-m $bactarch_map \
-c Substrate \
-i open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint/bdiv_even3277/weighted_unifrac_dm.txt \
-o open_ref_97_otus_EverythingRCFiltered/permanova_Substrate_WU \
-n 9999
#unweighted unifrac
!compare_categories.py \
--method permanova \
-m $bactarch_map \
-c Substrate \
-i open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint/bdiv_even3277/unweighted_unifrac_dm.txt \
-o open_ref_97_otus_EverythingRCFiltered/permanova_Substrate_UU \
-n 9999
#bray curtis
!compare_categories.py \
--method permanova \
-m $bactarch_map \
-c Substrate \
-i open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint/bdiv_even3277/bray_curtis_dm.txt \
-o open_ref_97_otus_EverythingRCFiltered/permanova_Substrate_BC \
-n 9999
#Substrate (sample type differences)
#All, chao1
!compare_alpha_diversity.py \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_AlphaDiv/alpha_div_collated/chao1.txt \
-m $bactarch_map \
-c Substrate \
-o open_ref_97_otus_EverythingRCFiltered/compare_alpha_div_substrate_westpoint_nonc_3277_chao1.txt \
-n 9999 -p bonferroni
#All, observed otus
!compare_alpha_diversity.py \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_AlphaDiv/alpha_div_collated/observed_otus.txt \
-m $bactarch_map \
-c Substrate \
-o open_ref_97_otus_EverythingRCFiltered/compare_alpha_div_substrate_westpoint_nonc_3277_otus.txt \
-n 9999 -p bonferroni
#All, shannon
!compare_alpha_diversity.py \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_AlphaDiv/alpha_div_collated/shannon.txt \
-m $bactarch_map \
-c Substrate \
-o open_ref_97_otus_EverythingRCFiltered/compare_alpha_div_substrate_westpoint_nonc_3277_shannon.txt \
-n 9999 -p bonferroni
#All, simpson
!compare_alpha_diversity.py \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_AlphaDiv/alpha_div_collated/simpson.txt \
-m $bactarch_map \
-c Substrate \
-o open_ref_97_otus_EverythingRCFiltered/compare_alpha_div_substrate_westpoint_nonc_3277_simpson.txt \
-n 9999 -p bonferroni
#Location (inside, edge, outside)
#All, chao1
!compare_alpha_diversity.py \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_AlphaDiv/alpha_div_collated/chao1.txt \
-m $bactarch_map \
-c Location \
-o open_ref_97_otus_EverythingRCFiltered/compare_alpha_div_location_westpoint_nonc_3277_chao1.txt \
-n 9999 -p bonferroni
#All, observed otus
!compare_alpha_diversity.py \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_AlphaDiv/alpha_div_collated/observed_otus.txt \
-m $bactarch_map \
-c Location \
-o open_ref_97_otus_EverythingRCFiltered/compare_alpha_div_location_westpoint_nonc_3277_otus.txt \
-n 9999 -p bonferroni
#All, shannon
!compare_alpha_diversity.py \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_AlphaDiv/alpha_div_collated/shannon.txt \
-m $bactarch_map \
-c Location \
-o open_ref_97_otus_EverythingRCFiltered/compare_alpha_div_location_westpoint_nonc_3277_shannon.txt \
-n 9999 -p bonferroni
#All, simpson
!compare_alpha_diversity.py \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_AlphaDiv/alpha_div_collated/simpson.txt \
-m $bactarch_map \
-c Location \
-o open_ref_97_otus_EverythingRCFiltered/compare_alpha_div_location_westpoint_nonc_3277_simpson.txt \
-n 9999 -p bonferroni
#Location
#Sediment, chao1
!compare_alpha_diversity.py \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000_AlphaDiv/alpha_div_collated/chao1.txt \
-m $bactarch_map \
-c Location \
-o open_ref_97_otus_EverythingRCFiltered/compare_alpha_div_substrate_westpoint_nonc_soil_20000_chao1.txt \
-n 9999 -p bonferroni
#Sediment, observed otus
!compare_alpha_diversity.py \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000_AlphaDiv/alpha_div_collated/observed_otus.txt \
-m $bactarch_map \
-c Location \
-o open_ref_97_otus_EverythingRCFiltered/compare_alpha_div_substrate_westpoint_nonc_soil_20000_otus.txt \
-n 9999 -p bonferroni
#Sediment, shannon
!compare_alpha_diversity.py \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000_AlphaDiv/alpha_div_collated/shannon.txt \
-m $bactarch_map \
-c Location \
-o open_ref_97_otus_EverythingRCFiltered/compare_alpha_div_substrate_westpoint_nonc_soil_20000_shannon.txt \
-n 9999 -p bonferroni
#Sediment, simpson
!compare_alpha_diversity.py \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000_AlphaDiv/alpha_div_collated/simpson.txt \
-m $bactarch_map \
-c Location \
-o open_ref_97_otus_EverythingRCFiltered/compare_alpha_div_substrate_westpoint_nonc_soil_20000_simpson.txt \
-n 9999 -p bonferroni
/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/matplotlib-1.4.3-py2.7-macosx-10.6-x86_64.egg/matplotlib/tight_layout.py:225: UserWarning: tight_layout : falling back to Agg renderer warnings.warn("tight_layout : falling back to Agg renderer") /Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/matplotlib-1.4.3-py2.7-macosx-10.6-x86_64.egg/matplotlib/tight_layout.py:225: UserWarning: tight_layout : falling back to Agg renderer warnings.warn("tight_layout : falling back to Agg renderer") /Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/matplotlib-1.4.3-py2.7-macosx-10.6-x86_64.egg/matplotlib/tight_layout.py:225: UserWarning: tight_layout : falling back to Agg renderer warnings.warn("tight_layout : falling back to Agg renderer") /Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/matplotlib-1.4.3-py2.7-macosx-10.6-x86_64.egg/matplotlib/tight_layout.py:225: UserWarning: tight_layout : falling back to Agg renderer warnings.warn("tight_layout : falling back to Agg renderer") /Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/matplotlib-1.4.3-py2.7-macosx-10.6-x86_64.egg/matplotlib/tight_layout.py:225: UserWarning: tight_layout : falling back to Agg renderer warnings.warn("tight_layout : falling back to Agg renderer") /Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/matplotlib-1.4.3-py2.7-macosx-10.6-x86_64.egg/matplotlib/tight_layout.py:225: UserWarning: tight_layout : falling back to Agg renderer warnings.warn("tight_layout : falling back to Agg renderer") /Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/matplotlib-1.4.3-py2.7-macosx-10.6-x86_64.egg/matplotlib/tight_layout.py:225: UserWarning: tight_layout : falling back to Agg renderer warnings.warn("tight_layout : falling back to Agg renderer") /Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/matplotlib-1.4.3-py2.7-macosx-10.6-x86_64.egg/matplotlib/tight_layout.py:225: UserWarning: tight_layout : falling back to Agg renderer warnings.warn("tight_layout : falling back to Agg renderer") /Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/matplotlib-1.4.3-py2.7-macosx-10.6-x86_64.egg/matplotlib/tight_layout.py:225: UserWarning: tight_layout : falling back to Agg renderer warnings.warn("tight_layout : falling back to Agg renderer") /Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/matplotlib-1.4.3-py2.7-macosx-10.6-x86_64.egg/matplotlib/tight_layout.py:225: UserWarning: tight_layout : falling back to Agg renderer warnings.warn("tight_layout : falling back to Agg renderer") /Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/matplotlib-1.4.3-py2.7-macosx-10.6-x86_64.egg/matplotlib/tight_layout.py:225: UserWarning: tight_layout : falling back to Agg renderer warnings.warn("tight_layout : falling back to Agg renderer") /Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/matplotlib-1.4.3-py2.7-macosx-10.6-x86_64.egg/matplotlib/tight_layout.py:225: UserWarning: tight_layout : falling back to Agg renderer warnings.warn("tight_layout : falling back to Agg renderer")
#filter out low abundance otus
#All
!filter_otus_from_otu_table.py \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_w_meta_json.biom \
-o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_w_meta_json_top99.biom \
--min_count_fraction .001
#Sediment
!filter_otus_from_otu_table.py \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000_w_meta_json.biom \
-o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000_w_meta_top99.biom \
--min_count_fraction .001
#Stats
#All, Location
!group_significance.py \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_w_meta_json_top99.biom \
-m $bactarch_map \
-c Location \
-o open_ref_97_otus_EverythingRCFiltered/group_sig_Location_3277_top99.txt
#All, Sample type
!group_significance.py \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_w_meta_json_top99.biom \
-m $bactarch_map \
-c Substrate \
-o open_ref_97_otus_EverythingRCFiltered/group_sig_Substrate_3277_top99.txt
#Sediment, location
!group_significance.py \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000_w_meta_top99.biom \
-m $bactarch_map \
-c Location \
-o open_ref_97_otus_EverythingRCFiltered/group_sig_Location_Soil_20000_top99.txt \
#First summarize taxa & have keep only > 0.1% taxa
#All
!summarize_taxa.py \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277.biom \
-o open_ref_97_otus_EverythingRCFiltered/taxa_Westpoint_NoNC_3277_top99/ \
-u 0.01
#Sediment
!summarize_taxa.py \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000.biom \
-o open_ref_97_otus_EverythingRCFiltered/Taxa_Soil_20000_top99/ \
-u 0.01
#Stats at Order Level (L4)
#All, Sample Type
!group_significance.py \
-i open_ref_97_otus_EverythingRCFiltered/taxa_Westpoint_NoNC_3277_top99/Westpoint_NoNC_3277_L4.biom \
-m $bactarch_map \
-c Substrate \
-o open_ref_97_otus_EverythingRCFiltered/taxa_Westpoint_NoNC_3277_top99/group_sig_substrate_kw_9999_L4.txt \
--permutations 9999
#Sediment, location
!group_significance.py \
-i open_ref_97_otus_EverythingRCFiltered/Taxa_Soil_20000_top99/Westpoint_NoNC_Soil_20000_L4.biom \
-m $bactarch_map \
-c Location \
-o open_ref_97_otus_EverythingRCFiltered/Taxa_Soil_20000_top99/group_sig_location_kw_9999_L4.txt \
--permutations 9999
Warning: No metadata in biom table. Won't alter calculations. Warning: No metadata in biom table. Won't alter calculations.
#All, cross-validation 10 fold, sample type, order level
!supervised_learning.py \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_w_meta_json_top99.biom \
-o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_w_meta_json_top99_supervised_learning_substrate_cv10/ \
-m $bactarch_map \
-c Substrate \
--ntree 1000 \
-e cv10 -f
#All, leave one out, sample type, order level
!supervised_learning.py \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_w_meta_json_top99.biom \
-o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_w_meta_json_top99_supervised_learning_substrate_loo/ \
-m $bactarch_map \
-c Substrate \
--ntree 1000 \
-e loo -f
#Sediment, cross-validation 10 fold, location, order level
!supervised_learning.py \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000_w_meta_top99.biom \
-o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_200000_w_meta_supervised_learning_location/ \
-m $bactarch_map \
-c Substrate \
--ntree 1000 \
-e cv10 -f
#Sediment, leave one out, location, order level
!supervised_learning.py \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000_w_meta_top99.biom \
-o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_200000_w_meta_supervised_learning_location_loo/ \
-m $bactarch_map \
-c Substrate \
--ntree 1000 \
-e loo -f
Error in supervised_learning.py: -i option requires an argument If you need help with QIIME, see: http://help.qiime.org
#how to convert to json for phyloseq and phinch
!biom convert \
-i table.txt \
-o table.from_txt_json.biom \
--table-type="OTU table" \
--to-json
#converting biom tables for phyloseq
#All 16S
!biom convert \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277.biom \
-o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_json.biom \
--table-type="OTU table" \
--to-json
#Soil
!biom convert \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000.biom \
-o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000_json.biom \
--table-type="OTU table" \
--to-json
#Leaf
!biom convert \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Leaf_5177.biom \
-o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Leaf_5177_json.biom \
--table-type="OTU table" \
--to-json
#Root
!biom convert \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Root_3277.biom \
-o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Root_3277_json.biom \
--table-type="OTU table" \
--to-json
Picrust only accepts closed-reference OTUS, so this will filter out all open reference OTUS
!filter_otus_from_otu_table.py \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_w_meta_json.biom \
-o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_w_meta_closed.biom \
--negate_ids_to_exclude \
-e $reference_tax
!filter_otus_from_otu_table.py \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000_w_meta_json.biom \
-o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000_w_meta_closed.biom \
--negate_ids_to_exclude \
-e $reference_tax
!biom convert \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_w_meta_closed.biom \
-o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_w_meta_closed_json.biom \
--table-type="OTU table" \
--to-json
!biom convert \
-i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000_w_meta_closed.biom \
-o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000_w_meta_closed_json.biom \
--table-type="OTU table" \
--to-json