from os import chdir, mkdir from os.path import join #the following are only available in the current development branch of IPython from IPython.display import FileLinks, FileLink project_name = "MicrobesWeEat" sequence_file = "./MicrobesWeEat.fasta" non_chimeric_sequence_file = "./non_chimeric_sequences.fasta" mapping_file = "./MicrobesWeEat.txt" otu_base = "/macqiime/greengenes/gg_13_8_otus/" reference_seqs_99 = join(otu_base,"/macqiime/greengenes/gg_13_8_otus/rep_set/99_otus.fasta") reference_tree_99 = join(otu_base,"/macqiime/greengenes/gg_13_8_otus/trees/99_otus.tree") reference_tax_99 = join(otu_base,"/macqiime/greengenes/gg_13_8_otus/taxonomy/99_otu_taxonomy.txt") reference_seqs_97 = join(otu_base,"/macqiime/greengenes/gg_13_8_otus/rep_set/97_otus.fasta") reference_tree_97 = join(otu_base,"/macqiime/greengenes/gg_13_8_otus/trees/97_otus.tree") reference_tax_97 = join(otu_base,"/macqiime/greengenes/gg_13_8_otus/taxonomy/97_otu_taxonomy.txt") !validate_mapping_file.py -m $mapping_file #!ls RawSequenceData/ #Listed are the raw sequence files. #!gunzip RawSequenceData/*.gz #!ls RawSequenceData/ !sed 's/ 1:N:0://g' RawSequenceData/HMSB_AZ_35_NoIndex_L001_R1_001.fastq > HMSB_AZ_35_NoIndex_L001_R1_001_fixed.fastq !sed 's/ 4:N:0://g' RawSequenceData/HMSB_AZ_35_NoIndex_L001_R4_001.fastq > HMSB_AZ_35_NoIndex_L001_R4_001_fixed.fastq !sed 's/ 1:N:0://g' RawSequenceData/HMSB_AZ_35_NoIndex_L001_R1_002.fastq > HMSB_AZ_35_NoIndex_L001_R1_002_fixed.fastq !sed 's/ 4:N:0://g' RawSequenceData/HMSB_AZ_35_NoIndex_L001_R4_002.fastq > HMSB_AZ_35_NoIndex_L001_R4_002_fixed.fastq !sed 's/ 1:N:0://g' RawSequenceData/HMSB_AZ_35_NoIndex_L001_R1_003.fastq > HMSB_AZ_35_NoIndex_L001_R1_003_fixed.fastq !sed 's/ 4:N:0://g' RawSequenceData/HMSB_AZ_35_NoIndex_L001_R4_003.fastq > HMSB_AZ_35_NoIndex_L001_R4_003_fixed.fastq !sed 's/ 1:N:0://g' RawSequenceData/HMSB_AZ_35_NoIndex_L001_R1_004.fastq > HMSB_AZ_35_NoIndex_L001_R1_004_fixed.fastq !sed 's/ 4:N:0://g' RawSequenceData/HMSB_AZ_35_NoIndex_L001_R4_004.fastq > HMSB_AZ_35_NoIndex_L001_R4_004_fixed.fastq !extract_barcodes.py -r RawSequenceData/HMSB_AZ_35_NoIndex_L001_R2_001.fastq -f RawSequenceData/HMSB_AZ_35_NoIndex_L001_R3_001.fastq -c barcode_paired_end --bc1_len 8 --bc2_len 8 -o Lane1.barcodes !extract_barcodes.py -r RawSequenceData/HMSB_AZ_35_NoIndex_L001_R2_002.fastq -f RawSequenceData/HMSB_AZ_35_NoIndex_L001_R3_002.fastq -c barcode_paired_end --bc1_len 8 --bc2_len 8 -o Lane2.barcodes !extract_barcodes.py -r RawSequenceData/HMSB_AZ_35_NoIndex_L001_R2_003.fastq -f RawSequenceData/HMSB_AZ_35_NoIndex_L001_R3_003.fastq -c barcode_paired_end --bc1_len 8 --bc2_len 8 -o Lane3.barcodes !extract_barcodes.py -r RawSequenceData/HMSB_AZ_35_NoIndex_L001_R2_004.fastq -f RawSequenceData/HMSB_AZ_35_NoIndex_L001_R3_004.fastq -c barcode_paired_end --bc1_len 8 --bc2_len 8 -o Lane4.barcodes !sed 's/ 3:N:0://g' Lane1.barcodes/barcodes.fastq > Lane1.barcodes.fastq !sed 's/ 3:N:0://g' Lane2.barcodes/barcodes.fastq > Lane2.barcodes.fastq !sed 's/ 3:N:0://g' Lane3.barcodes/barcodes.fastq > Lane3.barcodes.fastq !sed 's/ 3:N:0://g' Lane4.barcodes/barcodes.fastq > Lane4.barcodes.fastq !join_paired_ends.py -r HMSB_AZ_35_NoIndex_L001_R1_001_fixed.fastq -f HMSB_AZ_35_NoIndex_L001_R4_001_fixed.fastq -b Lane1.barcodes.fastq -o Lane1_joined !join_paired_ends.py -r HMSB_AZ_35_NoIndex_L001_R1_002_fixed.fastq -f HMSB_AZ_35_NoIndex_L001_R4_002_fixed.fastq -b Lane2.barcodes.fastq -o Lane2_joined !join_paired_ends.py -r HMSB_AZ_35_NoIndex_L001_R1_003_fixed.fastq -f HMSB_AZ_35_NoIndex_L001_R4_003_fixed.fastq -b Lane3.barcodes.fastq -o Lane3_joined !join_paired_ends.py -r HMSB_AZ_35_NoIndex_L001_R1_004_fixed.fastq -f HMSB_AZ_35_NoIndex_L001_R4_004_fixed.fastq -b Lane4.barcodes.fastq -o Lane4_joined !split_libraries_fastq.py -q 5 -r 9 -p 0.5 -i Lane1_joined/fastqjoin.join.fastq -o Demultiplexed_Lane1 -m MicrobesWeEat.txt -b Lane1_joined/fastqjoin.join_barcodes.fastq --retain_unassigned_reads --barcode_type 16 --max_barcode_errors 2 !split_libraries_fastq.py -q 5 -r 9 -p 0.5 -i Lane2_joined/fastqjoin.join.fastq -o Demultiplexed_Lane2 -m MicrobesWeEat.txt -b Lane2_joined/fastqjoin.join_barcodes.fastq --retain_unassigned_reads --barcode_type 16 --max_barcode_errors 2 !split_libraries_fastq.py -q 5 -r 9 -p 0.5 -i Lane3_joined/fastqjoin.join.fastq -o Demultiplexed_Lane3 -m MicrobesWeEat.txt -b Lane3_joined/fastqjoin.join_barcodes.fastq --retain_unassigned_reads --barcode_type 16 --max_barcode_errors 2 !split_libraries_fastq.py -q 5 -r 9 -p 0.5 -i Lane4_joined/fastqjoin.join.fastq -o Demultiplexed_Lane4 -m MicrobesWeEat.txt -b Lane4_joined/fastqjoin.join_barcodes.fastq --retain_unassigned_reads --barcode_type 16 --max_barcode_errors 2 #!cat Demultiplexed_Lane1/split_library_log.txt #!cat Demultiplexed_Lane2/split_library_log.txt #!cat Demultiplexed_Lane3/split_library_log.txt #!cat Demultiplexed_Lane4/split_library_log.txt !cat Lane*_joined/fastqjoin.join.fastq > All_Lanes_joined.fastq !cat Lane*_joined/fastqjoin.join_barcodes.fastq > All_barcodes_joined.fastq !split_libraries_fastq.py -q 5 -r 9 -p 0.5 -i All_Lanes_joined.fastq -o Demultiplexed_All -m MicrobesWeEat.txt -b All_barcodes_joined.fastq --retain_unassigned_reads --barcode_type 16 --max_barcode_errors 2 !cat Demultiplexed_All/split_library_log.txt #!ls Demultiplexed_All/ !pick_closed_reference_otus.py -p 99parameters.txt -o greengenes_99_otus -i Demultiplexed_All/seqs.fna -r $reference_seqs_99 -t $reference_tax_99 -a -O 2 -f !pick_closed_reference_otus.py -p 97parameters.txt -o greengenes_97_otus -i Demultiplexed_All/seqs.fna -r $reference_seqs_97 -t $reference_tax_97 -a -O 2 -f #!cat greengenes_99_otus/uclust_ref_picked_otus/seqs_otus.log #!biom summarize-table -i greengenes_99_otus/otu_table.biom -o 99_otu_table_summary.txt #!cat 99_otu_table_summary.txt #!biom summarize-table -i greengenes_97_otus/otu_table.biom -o 97_otu_table_summary.txt #!cat 97_otu_table_summary.txt #!cat greengenes_97_otus/uclust_ref_picked_otus/seqs_otus.log #!identify_chimeric_seqs.py -i Demultiplexed_All/seqs.fna -m usearch61 -o 97_usearch61_chimera_detection/ -r $reference_seqs_97 #!identify_chimeric_seqs.py -i Demultiplexed_All/seqs.fna -m usearch61 -o 99_usearch61_chimera_detection/ -r $reference_seqs_99 !filter_taxa_from_otu_table.py -i greengenes_97_otus/otu_table.biom -o closed_ref_97_otu_table_no_euks.biom -n c__Chloroplast,f__mitochondria !filter_taxa_from_otu_table.py -i closed_ref_97_otu_table_no_euks.biom -o closed_ref_97_otu_table_no_euks_no_unassigned.biom -n Unassigned !filter_otus_from_otu_table.py -i closed_ref_97_otu_table_no_euks_no_unassigned.biom -o closed_ref_97_otu_table_filtered.biom -n 2 !filter_taxa_from_otu_table.py -i greengenes_99_otus/otu_table.biom -o closed_ref_99_otu_table_no_euks.biom -n c__Chloroplast,f__mitochondria !filter_taxa_from_otu_table.py -i closed_ref_99_otu_table_no_euks.biom -o closed_ref_99_otu_table_no_euks_no_unassigned.biom -n Unassigned !filter_otus_from_otu_table.py -i closed_ref_99_otu_table_no_euks_no_unassigned.biom -o closed_ref_99_otu_table_filtered.biom -n 2 #!biom summarize-table -i closed_ref_99_otu_table_filtered.biom -o closed_ref_99_otu_table_filtered.summary #!cat closed_ref_99_otu_table_filtered.summary !filter_samples_from_otu_table.py -i closed_ref_97_otu_table_filtered.biom -o closed_ref_97_otu_table_final.biom --sample_id_fp samples_to_keep.txt !filter_samples_from_otu_table.py -i closed_ref_99_otu_table_filtered.biom -o closed_ref_99_otu_table_final.biom --sample_id_fp samples_to_keep.txt !/Applications/picrust-1.0.0/scripts/normalize_by_copy_number.py -i closed_ref_97_otu_table_final.biom -o normalized_closed_ref_97_otu_table.biom !/Applications/picrust-1.0.0/scripts/normalize_by_copy_number.py -i closed_ref_99_otu_table_final.biom -o normalized_closed_ref_99_otu_table.biom !/Applications/picrust-1.0.0/scripts/predict_metagenomes.py -a normalized_closed_ref_97_NSTI.tab -i normalized_closed_ref_97_otu_table.biom -o metagenome_prediction_from_normalized_closed_ref_97_otu_table.biom !/Applications/picrust-1.0.0/scripts/predict_metagenomes.py -a normalized_closed_ref_99_NSTI.tab -i normalized_closed_ref_99_otu_table.biom -o metagenome_prediction_from_normalized_closed_ref_99_otu_table.biom #!more normalized_closed_ref_97_NSTI.tab #!more normalized_closed_ref_99_NSTI.tab !/Applications/picrust-1.0.0/scripts/categorize_by_function.py -f -i metagenome_prediction_from_normalized_closed_ref_97_otu_table.biom -c KEGG_Pathways -l 3 -o 97_MWE_predicted_metagenomes.L3.txt !/Applications/picrust-1.0.0/scripts/categorize_by_function.py -f -i metagenome_prediction_from_normalized_closed_ref_97_otu_table.biom -c KEGG_Pathways -l 2 -o 97_MWE_predicted_metagenomes.L2.txt !/Applications/picrust-1.0.0/scripts/categorize_by_function.py -f -i metagenome_prediction_from_normalized_closed_ref_99_otu_table.biom -c KEGG_Pathways -l 3 -o 99_MWE_predicted_metagenomes.L3.txt !/Applications/picrust-1.0.0/scripts/categorize_by_function.py -f -i metagenome_prediction_from_normalized_closed_ref_99_otu_table.biom -c KEGG_Pathways -l 2 -o 99_MWE_predicted_metagenomes.L2.txt !sed '1d' 97_MWE_predicted_metagenomes.L3.txt | rev | cut -f 2- | rev > 97_MWE_predicted_metagenome.L3.spf !sed '1d' 97_MWE_predicted_metagenomes.L2.txt | rev | cut -f 2- | rev > 97_MWE_predicted_metagenome.L2.spf !sed '1d' 99_MWE_predicted_metagenomes.L3.txt | rev | cut -f 2- | rev > 99_MWE_predicted_metagenome.L3.spf !sed '1d' 99_MWE_predicted_metagenomes.L2.txt | rev | cut -f 2- | rev > 99_MWE_predicted_metagenome.L2.spf !/Applications/picrust-1.0.0/scripts/metagenome_contributions.py -l K00511 -i normalized_closed_ref_97_otu_table.biom -o 97_metagenome_contribution_K00511 -g 13_5 !/Applications/picrust-1.0.0/scripts/metagenome_contributions.py -l K00511 -i normalized_closed_ref_99_otu_table.biom -o 99_metagenome_contribution_K00511 -g 13_5 !cat 97_metagenome_contribution_K00511 !cat 99_metagenome_contribution_K00511 !filter_fasta.py -f MicrobesWeEat.fasta -o MicrobesWeEat_NoUnassigned.fasta --sample_id_fp samples_to_keep.txt !pick_open_reference_otus.py -p 97parameters.txt -r $reference_seqs_97 -i MicrobesWeEat_NoUnassigned.fasta -o 97_open_reference_otus -f #!biom summarize-table -i 97_open_reference_otus/otu_table_mc2_w_tax_no_pynast_failures.biom -o otu_table_summary_before_cleanup.txt #!cat otu_table_summary_before_cleanup.txt !filter_taxa_from_otu_table.py -i 97_open_reference_otus/otu_table_mc2_w_tax_no_pynast_failures.biom -o open_ref_97_otu_table_no_euks.biom -n c__Chloroplast,f__mitochondria !filter_taxa_from_otu_table.py -i open_ref_97_otu_table_no_euks.biom -o open_ref_97_otu_table_no_euks_no_unassigned.biom -n Unassigned !filter_otus_from_otu_table.py -i open_ref_97_otu_table_no_euks_no_unassigned.biom -o open_ref_97_otu_table_no_euks_no_unassigned_no_singletons.biom -n 2 #sanity check #!biom summarize-table -i open_ref_97_otu_table_no_euks_no_unassigned_no_singletons.biom -o open_ref_97_otu_table_no_euks_no_unassigned_no_singletons.summary #!cat open_ref_97_otu_table_no_euks_no_unassigned_no_singletons.summary #!identify_chimeric_seqs.py -m ChimeraSlayer -i rep_set_aligned.fasta -a core_set_aligned.fasta.imputed -o chimeric_seqs_open_97.txt #remove chimeras from aligned rep_set !filter_fasta.py -f 97_open_reference_otus/pynast_aligned_seqs/rep_set_aligned.fasta -o non_chimeric_rep_set_aligned.fasta -s chimeric_seqs_open_97.txt -n !make_phylogeny.py -i non_chimeric_rep_set_aligned.fasta !filter_otus_from_otu_table.py -i open_ref_97_otu_table_no_euks_no_unassigned_no_singletons.biom -o open_ref_97_otu_table_no_euks_no_unassigned_no_singletons_no_chimeras.biom -e chimeric_seqs_open_97.txt #sanity check #!biom summarize-table -i open_ref_97_otu_table_no_euks_no_unassigned_no_singletons_no_chimeras.biom -o open_ref_97_otu_table_no_euks_no_unassigned_no_singletons_no_chimeras.summary #!cat open_ref_97_otu_table_no_euks_no_unassigned_no_singletons_no_chimeras.summary !rm 97_open_otu_table_wmd.biom !biom add-metadata -i open_ref_97_otu_table_no_euks_no_unassigned_no_singletons_no_chimeras.biom -o 97_open_otu_table_wmd.biom --sample-metadata-fp MicrobesWeEat.txt --observation-metadata-fp rep_set_tax_assignments_header.txt #sanity check #!biom summarize-table -i 97_open_otu_table_wmd.biom -o 97_open_otu_table_wmd.summary #!cat 97_open_otu_table_wmd.summary #unique counts instead of totals #!biom summarize-table -i 97_open_otu_table_wmd.biom --qualitative -o 97_open_otu_table_wmd.qualitative.summary #!cat 97_open_otu_table_wmd.qualitative.summary #unique counts instead of totals, but with rarefied OTU table !biom summarize-table -i core_diversity_analyses_open_ref_97/table_even771.biom --qualitative -o 97_open_otu_table_771.qualitative.summary !cat 97_open_otu_table_771.qualitative.summary #!rm -r DietType_core_diversity_analyses_open_ref_97 #!rm -r core_diversity_analyses_open_ref_97 !core_diversity_analyses.py -p 97parameters.txt -i 97_open_otu_table_wmd.biom -o DietType_core_diversity_analyses_open_ref_97 -m MicrobesWeEat.txt -e 771 -c DietType -t non_chimeric_rep_set_aligned.tre !core_diversity_analyses.py -p 97parameters.txt -i 97_open_otu_table_wmd.biom -o core_diversity_analyses_open_ref_97 -m MicrobesWeEat.txt -e 771 -t non_chimeric_rep_set_aligned.tre !nmds.py -i core_diversity_analyses_open_ref_97/bdiv_even771/weighted_unifrac_dm.txt -o NMDS_output !cat NMDS_output !alpha_rarefaction.py -i 97_open_otu_table_wmd.biom -m $mapping_file -o 97_alpha_uneven -t 97_open_reference_otus/rep_set.tre -f !rm -r permanova_DietTypes !compare_categories.py --method permanova -m $mapping_file -c DietType -i core_diversity_analyses_open_ref_97/bdiv_even771/weighted_unifrac_dm.txt -o permanova_DietTypes !compare_alpha_diversity.py -i core_diversity_analyses_open_ref_97/arare_max771/alpha_div_collated/PD_whole_tree.txt -m $mapping_file -c DietType -o compare_alpha_div_DietType_PD !biom add-metadata -i core_diversity_analyses_open_ref_97/taxa_plots/table_mc771_sorted_L3.biom -o core_diversity_analyses_open_ref_97/taxa_plots/table_mc771_sorted_L2_wmd.biom --sample-metadata-fp MicrobesWeEat.txt #--observation-metadata-fp rep_set_tax_assignments_header.txt !biom add-metadata -i core_diversity_analyses_open_ref_97/taxa_plots/table_mc771_sorted_L3.biom -o core_diversity_analyses_open_ref_97/taxa_plots/table_mc771_sorted_L3_wmd.biom --sample-metadata-fp MicrobesWeEat.txt #--observation-metadata-fp rep_set_tax_assignments_header.txt !biom add-metadata -i core_diversity_analyses_open_ref_97/taxa_plots/table_mc771_sorted_L4.biom -o core_diversity_analyses_open_ref_97/taxa_plots/table_mc771_sorted_L4_wmd.biom --sample-metadata-fp MicrobesWeEat.txt #--observation-metadata-fp rep_set_tax_assignments_header.txt !biom add-metadata -i core_diversity_analyses_open_ref_97/taxa_plots/table_mc771_sorted_L5.biom -o core_diversity_analyses_open_ref_97/taxa_plots/table_mc771_sorted_L5_wmd.biom --sample-metadata-fp MicrobesWeEat.txt #--observation-metadata-fp rep_set_tax_assignments_header.txt !group_significance.py -i core_diversity_analyses_open_ref_97/taxa_plots/table_mc771_sorted_L2_wmd.biom -m $mapping_file -c DietType -o group_significance_L2 !group_significance.py -i core_diversity_analyses_open_ref_97/taxa_plots/table_mc771_sorted_L3_wmd.biom -m $mapping_file -c DietType -o group_significance_L3 !group_significance.py -i core_diversity_analyses_open_ref_97/taxa_plots/table_mc771_sorted_L4_wmd.biom -m $mapping_file -c DietType -o group_significance_L4 !group_significance.py -i core_diversity_analyses_open_ref_97/taxa_plots/table_mc771_sorted_L5_wmd.biom -m $mapping_file -c DietType -o group_significance_L5 !make_otu_heatmap.py -i 97_open_otu_table_wmd.biom !make_otu_heatmap.py -i core_diversity_analyses_open_ref_97/table_even771.biom !per_library_stats.py -i 97_open_otu_table_wmd.biom-m $mapping_file -o 97_open_otu_table_wmd.perlibstats !biom add-metadata -i otu_tables/open_ref_97_otu_table_no_euks_no_singletons.biom -o otu_tables/open_ref_97_otu_table_no_euks_no_singletons_with_metadata.biom -m MicrobesWeEat.txt