from __future__ import division from skbio.parse.sequences import parse_fasta from skbio import BiologicalSequence, SequenceCollection from iab.algorithms import progressive_msa_and_tree, iterative_msa_and_tree, kmer_distance, guide_tree_from_sequences seqs_16s = """>881726 GACGAACGCTGGCGGCGTGCCTAATACATGCAAGTCGAGCGGATTCATCCTTCGGGATGGGTTAGCGGCGGACGGGTGAGTAACACGTAGGCAACCTGCCTGCAAGTCCGGGATAACTAACGGAAACGTTAGCTAATACCGGATACGCGGTTGGATCGCATGATCCGATCGGGAAAGACGGCGCAAGCTGCCACTTGTAGATGGGCCTGCGGCGCATTAGCTAGTTGGTGGGGTAACGGCTCACCAAGGCGACGATGCGTAGCCGACCTGAGAGAGTGATCGGCCACACTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTAGGGAATCTTCCGCAATGGACGCAAGTCTGACGGAGCAACGCCGCGTGAGTGATGAAGGTTCTCGGATCGTAAAGCTCTGTTGCCAGGGAAGAACGCTCGGGAGAGTAACTGCTCTCGAGGTGACGGTACCTGAGAAGAAAGCCCCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCGCGCAGGCGGTCGATTAAGTTTGGTGTTTAAGCCCGGGGCTCAACCCCGGTTCGCACTGAAAACTGATCGACTTGAGTGTAGGAGAGGAAAGTGGAATTCCACGTGTAGCGGTGAAATGCGTAGAGATGTGGAGGAACACCAGTGGCGAAGGCGACTTTCTGGCCTATAACTGACGCTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGCATGCTAGGTGTTAGGGGTTTCGATACCCTTGGTGCCGAAGTCAACACAGTAAGCATGCCGCCTGGGGAGTACGGTCGCAAGACTGAAACTCAAAGGAATTGACGGGGACCCGCACAAGCAGTGGAGTATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCCCCTGAATCCTCTAGAGATAGAGGCGGCCCTTCGGGGACAGGGGAGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGATCGTAGTTGCCAGCACTTCGGGTGGGCACTCTAGGATGACTGCCGGTGACAAACCGGAGGAAGGCGGGGATGACGTCAAATCATCATGCCCCTTATGACCTGGGCTACACACGTACTACAATGGCCGGTACAACGGGCTGCGAAGCCGCGAGGTGGAGCCAATCCCAGAAAGCCGGTCTCAGTTCAGATTGCAGGCTGCAACTCGCCTGCATGAAGTCGGAATTGCTAGTAATCGCGGATCAGCATGCCGCGGTGAATACGTTCCCGGGTCT >793074 GAATGAACGCTGGCGGCGTGCTTAATAATGCAAGTCGAGCGCGTAGCAATACGAGCGGCGCACGGGTGCGTAACACGTAGGTCATCTGCCTCTAGGTCGGGGATAACTGCGGGAAACTGCAGCTAATACCCGATGATATCGAGAGATCAAAGCTTCGGTGCCTAGAGAGGAGCCTGCGGCTCATTAGCTAGTTGGTGGGGTAACGGCCTACCAAGGCCACGATGAGTAGCCGGCCTGAGAGGGCGATCGGCCACACTGGAACTGAGACACGGTCCAGACTCCTACGGGAGGCAGCAGTAGGGAATATTGGGCAATGGGCGAAAGCCTGACCCAGCAACGCCGCGTGAGTGATGAAGCCTTTCGGGGTGTAAAGCTCTTTTGGCAGGGACGAATCAATGACGGTACCTGCGTAATAAGCCCCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGGGGGGGCAAGCGTTATTCGGAATTACTGGGCGTAAAGCGCGCGTAGGCGGCTTCTTAAGTCGGGTGTTTAATGTCGGGGCTCAACTCCGGCGCTGCACTCGATACTGGGAGGCTAGAGTACTCGAGAGGAAAGCGGAATTCCTAGTGTAGCGGTGAAATGCGTAGATATTTAGGAGGAACACCAGTGGCGAAGGCGGCTTTCTGGAGAGTAACTGACGCTCAGAGCGCGAAAGCCAGGGGATCGAACGGGATTAGATACCCCGGTAGTCCTGGCTGTAAACGATGGGTACTAGATGTCGCCGGTATCAATCCCGGCGGTATCGTCGCTAACGCATTAAGTACCCCGCCTGGGGAGTACGCTCGCAAGAGTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGATGCAACGCGAAGAACCTTACCTGGACTTGACATACCTCGGACCGGACCTAGAGATAGGACCTTCTCCCGTAAGGGAGCCGGGGATACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCCCATCCCTAGTTGCCAGCGAGTCATGTCGGGAACTCTAGGGAGACTGCCGTTGATAAAACGAGAGGAAGGTGGGGATGACGTCAAGTCATCATGGCCCTTACGTCCAGGGCTACACACGTGCTACAATGGCCACCACAAAGGGTCGCAATACCGTGAGGTGGAGCTAATCCCAAAAAGGTGGCCTCAGTTCGGATTGTAGTCTGCAACTCGACTACATGAAGTCGGAATCGCTAGTAATCGCGGATCAGAACGCCGCGGTGAATACAGTTCCCGGGCCTTGTACACACCGCCCGTCACACCACGAGAGCTGGTTGCGTTAGAAGTCGCCAGGCCAACCGCAAGGGGGCAGGCGCCGAATGCGTGATGAGTGATTGGGGT >669210 AGAGTTTGATCCTGGCTCAGAACGAACGCTGGCGGCGCGCCTAACACATGCAAGTCGAACGGACTAGCCCCTTCGGGGGCGAAGTTAGTGGCGAACGGGTGAGTAACGCGTAAGTAACCTGCCCCCGGGACTGGGATAACAGCTCGAAAGAGCCGCTAATACCGGATAATTGTTGCAACACTTAGGAGTTGTAACTAAAGAAGGCCTCTGTTTCAAGCTTTCACCTGGGGATGGGCTTGCGTCCCATTAGCTTGTTGGTGAGGTAACGGCTCACCAAGGAAACGATGGGTAGCCGGCCTGAGAGGGTGGTCGGTCACACTGGAACTGAGACACGGTCCAGACTCCTACGGGAGGCAGCAGTGAGGAATCTTGCGCAATGGGCTAACGCCTGACGCAGCGACGCCGCGTGGACGATGAAGCTTTTCGGAGTGTAAAGTCCTTTCAGGAGGGAAGAAATGCCGGTAGTGTGAATAACACACCGGTTTGACGGTACCTCAAGAAGAAGCCCCGGCTAACTCCGTGCCAGCAGCCGCGGTAACACGGAGGGGGCAAGCGTTGTTCGGAATCACTGGGCGTAAAGAGCGCGTAGGTGGTTGTGTAAGTCGGATGTGAAATCCCTCGGCTCAACCGAGGAACTGCGTTCGAAACTACATAGCTAGAGGGCAGGAGAGGAGAGCGGAATTCCCAGTGTAGCGGTGAAATGCGCAGATATTGGGAAGAACACCGGTGGCGAAGGCGGCTCTCTGGACTGTTCCTGACACTGAGGCGCGAAAGCCAGGGGAGCGAACGGGATTAGATACCCCGGTAGTCCTGGCCGTAAACGATGGGCACTAGGTGTGGGGGGTGTCGATCCCCCCCGTGCCGCAGCTAACGCATTAAGTGCCCCGCCTGGGAAGTACGATCGCAAGGTTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGACGCAACGCGAAGAACCTTACCGGAATTTGACATGTTTCTGACGGCCTGCAGAAATGCAGGCTTCCCCTCGGGGCAGATACACAGGAGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGCCCTTAGTTGCCATCGGTTCGGCCGGGAACTCTAAGGGGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAGTCAGTATGGCCTTTATGTTCCGGGCTACACACGTGCTACAATGGCTGGTACAAAGGGTCGCGATGCCGTGAGGTGGAGCCAATCCCAAAAAGCCAGTCTTAGTTCGGATTGGAGTCTGCAACTCGACTCTATGAAGCCGGAATCGCTAGTAATCGTGGATCAGCACGCCATGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCACGAAAGTTGGTTGTACCAGAAGTCATTGGGCTAACCCTTTTGGGGGGCAGATGCCGAAGGTATGGTCAGCGATTGGGGTGAAGTCGTAACAAGGTAACC >583705 ACGGGTGAGTAACGCGTATGCAACCTACCTCGGAAAAGGGGATGACTGGTGGAAACGGGGATTAATGCCCCCTAGGGTTGTTTCTCTGCCTGGGTGAGCCGTTACTATTGGAACCGATTGAGATGGCCATGTTGGTCATTTCCTGGTTGGTGAGGTTACCTCACACCAAGGCGACGATGACTACGGGGTCTAAAAGGATGGTCCCGCACACTGGTACTGAGACACGGACCAGACTCCTACGGGAGGCAGCAGTGAGGAATATTGGTCAATGGGGGCAACCCTGAACCAGCCATGCCGCGTGAAGGAAGACGGCCCTATGGGTTGTAAACTTCTTTTATATGGGAATAAAGAGAGGTACGTGTACCTCAGTGAATGTACCATATGAATAAGCATCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGATGCAGGCGTTATCCGGATTTATTAGGTTTAAAGGGTGCGTAGGCGGGATACTAAGTCAGTGGTGAAAGTTTGCGGCTCAACCGTAAAATCGCCATTGATACTGGTATTCTTGAGTATACAGGAAGTAGGCGGAATGTGTAGTGTAGCGGTGAAATGCATAGATATTACACAGAACACCGATTGCGAAGGCAGCTTACTATAGTATAACTGACGCTGATGCACGAAAGCGTGGGGAGCGAACAGGATTAGATACCCTGGTAGTCCACGCTGTAAACGATGATTACTGGTTGTGCGCGATACACAGTGCGCGACTGAGCGAAAGCATTAAGTAATCCACCTGGGGAGTACGGCGGCAACGCTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGAGGAACATGTGGTTTAATTCGATGATACGCGAGGAACCTTACCTGGGCTTAAATGTAGAGTGCATGGAGTGGAAACATTCCTTTCCTTCGGGACTCTTTACAAGGTGCTGCATGGTTGTCGTCAGCTCGTGCCGTGAGGTGTCGGGTTAAGTCCCATAACGAGCGCAACCCCTATCATTAGTTGCTAACAGGTCAAGCTGAGGACTCTAGCGAAACTGCCGGTGTAAACCGTGAGGAAGGTGGGGATGACGTCAAATCAGCACGGCCCTTATGTCCAGGGCTACACACGTGTTACGATGGCCAGTACAAAGGGTAGCTACCTGGTGACAGGATGCTAATCTCAAAAGCTGGTCTCAGTTCGGATTGGAGTCTGCAACTCGACTCCATGAAGTTGGATTCGCTAGTAATCGTATATCAGCCATGATACGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCAAACCATGGAAGCTGGGGGTACCTGAAGTACGTCACCGCAAGGAGCGTCCTAGGGTAAATCTAGTGACTGGGGTTAAGTGGTAACAAGGTAACC >524860 AGAGTTTGATCCTGGCTCAGAACGAACGTTGGCGGCATGGATGAGGCATGCAAGTCGCGGGAATCCCCAGCAATGGGGGGAACCGGCGTAAGGGGCAGTAAGGCGTAGGTACCTACCCCCAGGTCCGGGATAGCCCGCCGAGAGGCGGGGTAATACCGGATGACCTCGGGAGAGCAAAGCTCCGGCGCCTGAGGCGGGGCCTACGTGATATTACCTAGTTGGCGGGGTAACGGCCCACCAAGGGGGAGATGTCTAGCGGGTGTGAGAGCACGACCCGCGCCACTCGCACTGAGACACTGGCGAGACACCTACGGGTGGCTGCAGTCGAGGATCTTCGGCAATGGGGGCAACCCTGACCGAGCGATGCCGCGTGGGCGACGAAGGCCTTCGGGTTGTAAAGCCCTGTCGAGGGGGAGAAAGCCGCAAGGCGGATCCATCCCTGGAGGAAGCTCGGGCTAAGTTCGTGCCAGCAGCCGCGGTAAGACGAACCGAGCGAACGTTGTTCGGAATCACTGGGCTTAAAGGGCGCGTAGGCGGGCTGCCGCGTCCGGGGTGAAATCCCACGGCTCAACCGTGGAACGGCCCCGGGTACGGGCGGCCTCGAGGGGGATAGGGGCGTGCGGAACTGTGGGTGGAGCGGTGAAATGCGTTGATATCCACAGGAACTCCGGTGGCGAAGGCGGCACGCTGGATCCTCTCTGACGCTGAGGCGCGGAAGCCAGGGGAGCGAACGGGATTAGATACCCCGGTAGTCCTGGCCCTAAACGATGAGAACTGGGTAGTAGCCCTGGCATGGGGTTACTGCCGCAGCCAAAGTGCTAAGTTCTCCGCCTGGGGAGTATGGTCGCAAGGCTGAAACTCAAAGGAATTGACGGGGGCTCACACAAGCGGTGGAGCATGTGGCTTAATTCGAGGCTACGCGAAGAACCTTATCCTGGACTTGACATGTGCGAAAGCGCCAGCAGGTAGGACCCGGAAACGGGAACGAACGGTATCCAACCCGGAAGCTGGTACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTCGGGTTAAGTCCCATAACGAGCGAAACCCTTACCCCTTGTTGCAACCCGAAAGGGGCACTCGAGGGGGACTGCCGGTGTCAAACCGGAGGAAGGTGGGGATGACGTCAAGTCCTCATGGCCTTTATGTCCAGGGCTGCACACGTGCTACAATGGCGTGGACAGAGGGACGCGACTGCGCGAGCAGAAGCCGACCCCCGAAAGCACGCCCCAGTTCAGATCGCAGGCTGCAACCCGCCTGCGTGAAGCCGGAATCGCTAGTAATCGCGGGTCAGCAACACCGCGGTGAATGTGTTCCTGAGCCTTGTACACACCGCCCGTCAAGCCACGAAAGGGAGGGACGTCCGAAGTCGCCTCGCGGCGCCGAAGACGGACTTCCTGATTGGGACTAAGTCGTAACAAGGTAACC >501793 GACGAACGCTGGCGGCGTGCCTAATACATGCAAGTCGAACGGAGTGGTTGAAGGAGCTTGCTCTTTTGATCGCTTAGTGGCAGACGGGTGAGTAACACGTAGGCAACCTGGCTGTAAGACGGGGATAACTGGCGGAAACGTGAGCTAAAACCGGATGGTCGGCTTGAGGGCATCCTCGAGTCGGGAAAGGACGGAGCAATCTGTCGCTTACAGATGGGCCTGCGGCGCATTAGCTAGTTGGTAGGGTAACGGCCTACCAAGGCGACGATGCGTAGCCGACCTGAGAGGGTGAACGGCCACACTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTAGGGAATCTTCGGCAATGGACGAAAGTCTGACCGAGCAACGCCGCGTGAGTGATGAAGGTTTTCGGATCGTAAAGCTCTGTTGCCAGGGAAGAACGCCAGGGAGAGTAACTGCTCTCTGGGTGACGGTACCTGAGAAGAAAGCCCCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCGCGCAGGCGGTCGTTTAAGTCTCATGTCTAAACCCCGGGGCTCAACCTCGGGGTGCATGGGAAACTGGGCGACTGGAGTGCATGTGAGGAAAGTGGAATTCCACGTGTAGCGGTGGAATGCGTAGAGATGTGGAGGAACACCAGTGGCGAAGGCGACTTTCTGGCCTGTAACTGACGCTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAATGCTAGGTGTTAGGGGTTTCGATACCCTTGGTGCCGAAGTTAACACATTAAGCATTCCGCCTGGGGAGTACGGTCGCAAGACTGAAACTCAAAGGAATTGACGGGGACCCGCACAAGCAGTGGGGTATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCCGATGAAACATGCAGAGATGTGTGCCCTCTTCGGAGCATTGGAGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTAAGGTTAGTTGCCAGCAGGTGAAGCTGGGCACTCTAACATGACTGCCGGTGACAAACCGGAGGAAGGCGGGGATGACGTCAAATCATCATGCCCCTTATGACCTGGGCTACACACGTACTACAATGGCCAGTACAACGGGAAGCGAAGTGGCGACACGGAGCCAATCTTAGAAAGCTGGTCTCAGTTCGGATTGCAGGCTGCAACTCGCCTGCATGAAGTCGGAATTGCTAGTAATCGCGGATCAGCATGCCGCGGTGAATACGTTCCCGGGTCT >296752 AGAGTTTGATCTCTGGCTCAGAACAAACGCTGGCGGTGCGTCTTAAGCATGCAAGTCGAGCGATGGTAGGGGGCTTGCTCCCTATTCATAGCGGCGGACTGGTGAGTAACGCGTAGATGACATACCTTTTGCTGGGGGATAGCTTGTGGAAACACAGGGTAATACCGCATACGATTGAGGCGGTTAGAGCGCTTCAATCAAAGCCTTGTATGGGGCGGCAGTTGAGTGGTCTGCGTACTATTAGCTTGTTGGTGGGGTAACGGCTCACCAAGGCGATGATAGTTATCCGGCCTGAGAGGGTGAACGGACACATTGGGACTGAGATACGGCCCAGACTCCTACGGGAGGCAGCAGCTAAGAATATTCCGCAATGGGCGAAAGCCTGACGGAGCGACACCGCATGAGTGATGAAGGTCGAAAGATTGTAAAATTCTTTTTGAGAGTGATGAATAAAGTCGAGCAGTAATGCTCGGTGATGACGGTAACTTTTGAATAAGGGGTGGCTAATTACGTGCCAGCAGCCGCGGTAACACGTAAGCCCCAAGCGTTGTTCGGAATTATTGGGTGTAAAGGGCATGTAGGTGGTCTTGCAAGCTTGATGTGAAATCTTACAGCTTAACTGTAAAACTGCATTGAGAACTGCATAACTTAAGAATAACTGAGGCGCAACTGGAATTCCAGGTGTAGGGGTGAAATCTGTAGATATCTGGAAGAACACCAATGGCGAAGGCAAGTTGCGAGCAGATTATTGACACTGAGGTGCGAAGGTGCGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCGCACAGACAACGATGTACACTGGGCGTCTGGCTTTATGCTGGGTGCCGTAGTAAACGCGATAAGTGTACCGCCTGGGGAGTATGCTCGCAAGGGTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGATGATACGCGAGAAATCTTACCTGGGTTTGACATTTAGTGGAATTGTATAGAGATATGCAAGGTACTTGTACCCGCTAAACAGGTGCTGCATGGCTGTCGTCAGCTCGTGCCGTGAGGTGTTGGGTTAAGTCCCGCAACGAGCGCAACCCCTACTGCTAGTTACTAACAGGTTATGCTGAGGACTCTAGCGGAACTGCCGGTGACAAACCGGAGGAAGATGGGGATGACGTCAAGTCATCATGGCCCTTATGTCCAGGGCAACACACGTGCTACAATGGTTGTAACAAAGTGATGCGAAATCGCAAGATGAAGCAAAACGCAGAAATGCAATCGTAGTTCGGATTGGAGTCTGAAACTCGACTCCATGAAGTTGGAATCGCTAGTAATCGCATATCAGCACGATGCGGTGAATACGTTCCCGGGCCTTGCACACACCGCCCGTCA >293514 AGAGTTTGATCCTGGCTCAGAACGAACGCTGGCGGTGCGTCTTAAGCATGCAAGTCGAGCGATGGTAGGGGGCTTGCTCCCTATTCATAGCGGCGGACTGGTGAGTAACGCGTAGATGACATACCTTTTGCTGGGGGATAGCTTGTGGAAACACAGGGTAATACCGCATACGATTGAGGCGGTTAGAGCGCTTCAATCAAAGCCTTGTATGGGGCGGCAGTTGAGTGGTCTGCGTACTATTAGCTTGTTGGTGGGGTAACGGCTCACCAAGGCGATGATAGTTATCCGGCCTGAGAGGGTGAACGGACACATTGGGACTGAGATACGGCCCAGACTCCTACGGGAGGCAGCAGCTAAGAATATTCCGCAATGGGCGAAAGCCTGACGGAGCGACACCGCGTGAGTGATGAAGGTCGAAAGATTGTAAAACTCTTTTGAGAGTGATGAATAAGTCGAGCAGTAATGCTCGGTGATGACGGTAACTTTTGAATAAGGGGTGGCTAATTACGTGCCAGCAGCCGCGGTAACACGTAAGCCCCAAGCGTTGTTCGGAATTATTGGGCGTAAAGGGCATGTAGGTGGTCTTGCAAGCTTGATGTGAAATCTTACAGCTTAACTGTAAAACTGCATTGAGAACTGCATAACTTAAGAATAACTGAGGCGCAACTGGAATTCCAGGTGTAGGGGTGAAATCTGTAGATATCTGGAAGAACACCAATGGCGAAGGCAAGTTGCGAGCAGATTATTGACACTGAGGTGCGAAGGTGCGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCGCACAGTCAACGATGTACACTGGGCGTCTGGCTTTATGCTGGGTGCCGTAGTAAACGCGATAAGTGTACCGCCTGGGGAGTATGCTCGCAAGGGTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGATGATACGCGAGGAATCTTACCTGGGTTTGACATACACATTATCTTTGCAGAGATGTAAAGCGGGGGTAACCCCAATGTGAACAGGTGCTGCATGGCTGTCGTCAGCTCGTGCCGTGAGGTGTTGGGTTAAGTCCCGCAACGAGCGCAACCCCTATTGCCAGTTACTAACAAGTTAAGTTGAGGACTCTGGCGAAACTGCCGGTGACAAATCGGAGGAAGATGGGGATGACGTCAAGTCATCATGGCCCTTATGTCCAGGGCAACACACGTGCTACAATGGTTGAGACAGAGTGATGCTAAGTCGCAAGATGGAGCAAAACGCAGAAATTCAATCGTAGTTCGGATTGGAGTCTGAAACTCGACTCCATGAAGTTGGAATCGCTAGTAATCGCATATCAGCACGATGCGGTGAATACGTTCCCGGGCCTTGCACACACCGCCCGTCA >292553 AGAGTTTGATCCTGGCTCAGAACGAACGCTGGCGGTGCGTCTTAAGCATGCAAGTCGAGCGATGAATGAGGGGCTTGCTCCTTATTCATAGCGGCGGACTGGTGAGTAACGCGTAGATGACATGTCGATGGCAGGGGGATAGCCAGTAGAAATATTGGGTAATACCGCGTATCCTTCTTGTTGTTAGAGGACAAGAAGAAAAGCCTTGTATGGGGCGGCTATTGAGTGGTCTGCGTACTATTAGTTTGTTGGTGGGGTAACGGCCTACCAAGACTATGATAGTTATCCGGCCTGAGAGGGTGAACGGACACATTGGGACTGAGATACGGCCCAGACTCCTACGGGAGGCAGCAGCTAAGAATATTCCGCAATGGGCGAAAGCCTGACGGAGCGACACCGCGTGAGTGATGAAGGTCGAAAGATTGTAAAACTCTTTTGAATATGATGAATAAGTCAAGCAGTAATGCTTGGCGATGACGGTAGTGTTTGAATAAGGGGTGGCTAATTACGTGCCAGCAGCCGCGGTAACACGTAAGCCCCAAGCGTTGTTCGGAATTATTGGGCGTAAAGGGCATGTAGGTGGTTTTGTAAGCTTGATGTGAAATCTTACAGCTTAACTGTAAAACTGCATTGAGAACTGCAGAACTAGAGTAACTGAGGTGCAACTGGAATTCCAGGTGTAGGGGTGAAATCTGTAGATATCTGGAAGAACACCAATGGCGAAGGCAAGTTGCAAGCAGATTACTGACACTGAGGTGCGAAGGTGCGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCGCACAGTCAACGATGTACACTGGGCGTCTGGCTTTATGCTGGGTGCCGTAGTAAACGCGATAAGTGTACCGCCTGGGGAGTATGCTCGCAAGGGTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGATGATACGCGAGAAATCTTACCTGGGTTTGACATTTAGTGGAATTGTATAGAGATATGCAAGGTACTTGTACCCGCTAAACAGGTGCTGCATGGCTGTCGTCAGCTCGTGCCGTGAGGTGTTGGGTTAAGTCCCGCAACGAGCGCAACCCCTACTGCTAGTTACTAACAGGTTATGCTGAGGACTCTAGCGGAACTGCCGGTGACAAACCGGAGGAAGATGGGGATGACGTCAAGTCATCATGGCCCTTATGTCCAGGGCAACACACGTGCTACAATGGTTGTAACAAAGTGATGCGAAATCGCAAGATGAAGCAAAACGCAGAAATGCAATCGTAGTTCGGATTGGAGTCTGAAACTCGACTCCATGAAGTTGGAATCGCTAGTAATCGCATATCAGCACGATGCGGTGAATACGTTCCCGGGCCTTGCACTAACGGCCCGTCA >266495 AGTTTGATCCTGGCTCAAGATGAACGCTAGCGGCAGGCTTAACACATGCAAGTCAAAGGGCAACGGGGAGAGTGCTTGCACTCTCTGCCGGCGACTGGCGCACGGGTGAGTAACACTTATGCAGACACTGCCTTCCACAGGGCGGACAACCTCTCCCAAAGGGAGGCTAATCCCGCGTATATCCCTTGGGGGCATCCCCGGGGGAGGAAAGGATTACCGGTGTGCAGGATGGGCATGCGGCGCATTACGCAGTAGGCGGGGTAACGGCCCACCTAACCGACCATGCGTATGGGTTCTGAGAGGAAGGCCCCCCACACTGGTACTGAGACACTGACCAGACTCCTACTGGAGGCAGCAGTGAGGAACATTGGTCAATGGGCGGGAGCCTGAACCAGCAAACCCGCGTGAAGGAAGAAGGCGCCGAACGTCGTAAACTTCTTTTGTCCGGGATCAAAGGGCGCCACGTGTGGCGTTGTGAGTGTACCTGTAGAGAAAGCTTCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGTAGGGAGCGAGCGTTGTCCGGATTTATTGGGTGTAAAGGGCGCGTAGGTGGTCGGTTAAGTCAGGTGTGAAAGCTCGGGGCTCAACCCGGAGGATCCGCTGGAACTTTGGTGTCATGAGGCGCAGGAGAAGTAAAGTGGAATTCGTGGTGTAGCGGTGAAATGCATAGATATTGGGCGGAACTCCGGTGGCGAAGGCAGCGTTCTGGCGCGTGCCTGACGCTGAGGCGCGAAAGCGTGGGTATCGAACGGGATTAGATACCCCGGTAGTCCACGCAGTAAACGATGAATACTGGGTGTCGGACCCATAGAACGTTTGGGTGCGCGCAGCGAAAGCGATAAGCATTCCAAGTGGGGAGTACACCGGCAGTGATGAGACTCAAAGGAATCGACGGGGGTTCGCACAAGTGGAGGGATATGTGGTTTAATTAGACGATAAGTGAGGAACGTGACCCGGGTTCAACAGGGAGTCGACAGGGGCAGAGATTCCCTCTTCCACGGACGTCTTCCGAGGTGGGGCATGGTTGTCAGTCAGCTACGTGCCGTGAGGTGTCGGCTTAAGTGCCATAAGGTGTGCAACACGGGCAGACAGTTGCTAACGGGTAGAGCAGTGGAATGTGTAGTGATTGCAGGGGCAAGCCGCGAGGAAGGGGGGGATGATGTCAAATCAGCGCGGCCCTTAGGTCAGGGGTGACACACGTGCTGCAATGGCGGGGACAGAGGGATGTGAAGAGGCGACGTGGAGCGAACCCCAAAAACCCCGCCCCAGTTAGGATTGTAGTATGCAACCCGAATACATGAAGCCGGAATAGGTAGTAATCGCGGATCAGAATGCAGCGGTGAATAAGTTCCCGGCTCTAGCACACACCGCCCGTCA >229854 GAGTTTGATCCTGGCTCAGATTGAACGCTGGCGGCATGCTTAACACATGCAAGTCGAACGGCAGCATGACTTAGCTTGCTAAGTTGATGGCGAGTGGCGAACGGGTGAGTAACGCGTAGGAATATGCCTTAAAGAGGGGGACAACTTGGGGAAACTCAAGCTAATACCGCATAAACTCTTCGGAGAAAAGCTGGGGACTTTCGAGCCTGGCGCTTTAAGATTAGCCTGCGTCCGATTAGCTAGTTGGTAGGGTAAAGGCCTACCAAGGCGACGATCAGTAGCTGGTCTGAGAGGATGACCAGCCACACTGGAACTGAGACACGGTCCAGACTCCTACGGGAGGCAGCAGTGGGGAATATTGGACAATGGGGGCAACCCTGATCCAGCAATGCCGCGTGTGTGAAGAAGGCCTGAGGGTTGTAAAGCACTTTCAGTGGGGAGGAGGGTTTCCCGGTTAAGAGCTAGGGGCATTGGACGTTACCCACAGAAGAAGCACCGGCTAACTCCGTGCCAGCAGCCCGCGGTAATACGGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCCGTTAAAANGGTGCCTAAGGTGGTTTGGATNAGTTATGTGTTAAATTCCCTGGCGCCTCCACCCTGGNGCCAGGTCCATANTAAAAACTGTTAAACTCCGAAGTATGGGCACAAGGTAANTTGGAAANTTCCGGTGGTNANCCGNTGAAAATGCGCTTAGAGATNCGGGAAGGGACCACCCCAGTGGGGAAGGCGGCTACCTGGCCTAATAACTGACATTGAGGCACGAAAAGCGTGGGGAGCAACCAGGATTAGATACCCTGGTAGTCCACGCTGTAAACGATGTCAACTAGCTGTNGGTTATATGAATATAATTAGTGGCGAAGCTAACGCGATAAGTTGACCGCCTGGGGAGTACGGTCGCAAGATTAAAACTCAAAGGAATNGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGATGCAACGCGAAGAACCTTACCTACCCTTGACATACAGTAAATCTTTCAGAGATGAGAGAGTGCCTTCGGGAATACTGATACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGTAACGAGCGCAACCCTTATCTCTAGTTGCCAGCGAGTAATGTCGGGAACTCTAAAGAGACTGCCGGTGACAAACCGGAGGAAGGCGGGGACGACGTCAAGTCATCATGGCCCTTACGGGTAGGGCTACACACGTGCTACAATGGCCGATACAGAGGGGCGCGAAGGAGCGATCTGGAGCAAATCTTATAAAGTCGGTCGTAGTCCGGATTGGAGTCTGCAACTCGACTCCATGAAGTCGGAATCGCTAGTAATCGCGAATCAGCATGTCGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCATGGGAGTGGGCTGCACCAGAAGTAGATAGTCTAACCGCAAGGGGGACGTTTACCACGGTGTGGTTCATGACTGGGGTGAAGTCGTAACAAGGTAGCCG >182569 AGAGTTTGATCCTGGCTCAGGATGAACGCTAGCTACAGGCTTAACACATGCAAGTCGAGGGGCAGCATGGTGTATCAATATATCTATGGCGACCAGCGCACCGGTGATGCACACCTCTCCTACCTGCCCCTTACTCCGGGATGATCTTTCTAAAAAAATATTACTACTCCATGGTATTACCGAAAAACGTCTTTTTGTTGTTTAAAAACTTCGATGGTGGAAGGTGATGCTTTCTATTATATACTTGGTGGGGTAACAGCCCACCACCTCAGCGATGAATAGGGGTTCTAATAAGAAGGTCCCCCCCATGGTAACTGGGCCCCGGTCCAAATTCTTCGGGAAGCCACCAGTGAGGATTATTGTTCAATGGCGGAGATTTTGACCCAGCCCAAGTAGCGTGAAGGATGACTGCTCCCATAGGTGGTAAACTTCTTTTATATGGGAATAAAGTGAGTCACGTGTGTCTTTTTGTATGTATCATATGAATAAGGATCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGATTCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGCGGTTTGTTAAGTCAGTGGTGAAAGTTTGGGGCTCAACCGTGAAATTGCATTTGATACTGGCGGTCTTGAGTGCAGTAGAGGTGGGCGGAATTTGTGGTGTAGCGGTGAAATGCTTAGATATCATGCAGAACTCCGATTGCGAAGGCAGCTCACCGGAGTGTATCTGACGTTGAGGCTCGAAAGTGTGGGTATCAAACAGGATTAGATACCCTGGTAGTCCACACAGTAAAGAAGGAATATTGTCGTTGTGGGATCTCCATTAAGGGGTCAAGGGAAAGCATTAATTATTCCCCTGGGGGAGTAGTCCGCCAGAGGTGAAATTAAAAGAAATGGAGGGGGGCCGGCCCAAGGGAAGGACCATGTGGTTTAATTGGAGGATAGGGGAGGACCTTTCCCGGGGTTGAAAGTGCAAATGAATTATGGGGAGAGCCATTCCCTTCAAGGCATGAGAGAAGGTGCTGCATGGTTGTCGTCAGCTCGTGCCGTGAGGTGTCGGGTTAAGTCCCATAACGAGCGCAACCCTTATCTTCAGTTACTATCAGGTCAAGCTGAGCACTCTGGAGAGACTGCCGTTGTAAGATGAGAGGAAGGTGGGGATGACGTCAAATCAGCACGGCCCTTACGTCCGGGGCTACACACGTGTTACAATGGGGGGTACAGAAGGCAGCTACCCAGCGACAGGATGCCAATCCCAAAAACCTATCTCAGTTCGGATTGAAGTCTGCAACCCGCCTTCGTGAAGTTGGATTCGCTAGTAATCGCGCATCAGCCATGGCGCGGTGAATACGTTCCCGGGCCTTGCACACACCGCCCGTCA >1719550 TCCTGGCTCAGAACGAACGTTGGCGGCGTGGATTAGGCATGCAAGTCGCGCGAATCCCCGCAAGGGGGGAAGCGGCGTAAGGGGCAGTAAGGCGTGGGTACCTACCCGGGGGTCGGGGATAGCCCGTCGAGAGACGGGGTAATACCCGATGACGTGGAGACACCAAAGGTCCGCCGCCCTCGGCGGGGCCCACGTGATATTAGCTAGTTGGCGGGGTAACGGCCCACCAAGGCGGGGATGTCTAGCGGGTGTGAGAGCACGACCCGCGCCACTGGCACTGAGACACTGGCCAGACACCTACGGGTGGCTGCAGTCGAGGATCTTCGGCAATGGGGGAAACCCTGACCGAGCGACGCCGCGTGGGCGACGAAGGCCTTCGGGTTGTAAAGCCCTGTCGAGGGGGAGAAAGCCTTAACCGGGTGATCTATCCCTGGAGGAAGCACGGGCTAAGTTCGTGCCAGCAGCCGCGGTAAGACGAACCGTGCGAACGTTGTTCGGAATCACTGGGCTTAAAGGGCGCGTAGGCGGGTTGCCGCGTCCGGGGTGAAATCCCACGGCTCAACCGTGGGGCGGCCCCGGGTACGGGCAGCCTCGAGGAGAGTAGGGGCATGCGGAACTCTGGGTGGAGCGGTGAAATGCGTTGATATCCAGAGGAACTCCGGTGGCGAAGGCGGCATGCTGGACCCTTCCTGACGCTGAGGCGCGAAAGCCAGGGGAGCGAACGGGATTAGATACCCCGGTAGTCCTGGCCCTAAACGATGAGAACTAGGTAGCCGGCCGGACATGGGCTGGCTGCCGGAGCCAAAGTGCTAAGTTCTCCGCCTGGGGAGTATGGCCGCAAGGCTGAAACTCAAAGGAATTGACGGGGGCTCACACAAGCGGTGGAGCATGTGGCTTAATTCGAGGCTACGCGAAGAACCTTATCCCGGGCTTGACATGTTCGAAAGAGGCTCGAAGTAGCCCGCGGAAACGTGGGGCCAACGGTATCCAGTCCGGAGCGAGCTACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTCGGGTTAAGTCCCATAACGAGCGAAACCCTTACCCTCAGTTGCTTACTAGGACTCTGGGGGGACTGCCGGTGTCAAACCGGAGGAAGGTGGGGATGACGTCAAGTCCTCATGGCCTTTATGCCCGGGGCTGCACACGTGCTACAATGGCGTGGACAAAGAGACGCGAGCCCGCGAGGGGGAGCCAATCTCAGAAAGCACGCCCCAGTTCAGATCGCAGGCTGCAACTCGCCTGCGTGAAGCCGGAATCGCTAGTAATCGCGGGTCAGCAACACCGCGGTGAATGTGTTCCTGAGCCTTGTACACACCGCCCGTCAAGCCACGAAAGAGAGGGACGTCCGGAGTCGCCTTCACCGGTGCCGAAGACGGACTTCTTGATTGGGACTAAGTCGTAACAAGGTAACC >1794723 TTAGAGTTTGATCCTGGCTCAGAACGAACGTTGGCGGCGTGGATTAGGCATGCAAGTCTCGCGAATCCCCGCAAGGGGGGAAGCGGCGTAAGGGGCAGTAAGGCGTGGGTAACCCACCCCGGGGCCCGGGATAGCCCGTCGAGAGACGGGGTAATACCGGGCGACGCAGCGTGCCGGCATCGGTGTGCTGCCAAAGGTCCGCCGCCCCGGGCGGGGCCCACGTGGTATTAGCTAGTTGGTGGGGTGACGGCCCACCAAGGCGGAGATGCCTAGCGGGTGTGAGAGCACGACCCGCGCCACTGGCACTGAGACACTGGCCAGACACCTACGGGTGGCTGCAGTCGAGGATCTTCGGCAATGGGGGCAACCCTGACCGAGCGACGCCGCGTGGGCGACGAAGGCCTTCGGGTTGTAAAGCCCTGTCGAGGGGGAGAAACGTCCCGCAAGGGGCCTGATCTATCCCTGGAGGAAGCACGAGCTAAGTTCGTGCCAGCAGCCGCGGTAAGACGAACCGTGCGAACGTTGTTCGGAATCACTGGGCTTAAAGGGCGCGTAGGCGGGCTGCCGAGTCCGGGGTGAAATCCTCCCGCTCAACGGGAGAACGGCCCCGGGTACTGGCGGCCTCGAGGCGGGTAGGGGCGTGCGGAACACTGGGTGGAGCGGTGAAATGCGTTGATATCCAGTGGAACTCCGGTGGCGAAGGCGGCACGCTGGACCCGTCTGACGCTGAGGCGCGAAAGCCAGGGGAGCGAACGGGATTAGATACCCCGGTAGTCCTGGCCCTAAACGTTGAGAACTAGGTAGTCGGCCGGACATGGGCTGACTGCCGGAGCGAAAGTGCTAAGTTCTCCGCCTGGGGAGTATGGTCGCAAGGCTGAAACTCAAAGGAATTGACGGGGGCTCACACAAGCGGTGGAGCATGTGGCTTAATTCGAGGCAACGCGAAGAACCTTATCCCGGGCTTGACATGTGCGAAAGCGTCTGGGGGTACCCGCCGGAAACGGCCGGGGAAGGTATCCAGTCCTGAACCAGACACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTCGGGTTAAGTCCCATAACGAGCGAAACCCTTACCCTCAGTTGCCAGCGGGTCACGCCGGGGACTCTGGGGGGACTGCCGGTGTCAAACCGGAGGAAGGTGGGGATGACGTCAAGTCCTCATGGCCTTTATGCCCGGGGCTGCACACGTGCTACAATGGCGTGGACAAAGGGGCGCGAACGCGCGAGCGGGAGCCGACCCCGGAAAGCACGCCCCAGTTCAGATCGCAGGCTGCAACTCGCCTGCGTGAAGTCGGAATCGCTAGTAATCGCGGGTCAGCAACACCGCGGTGAATGTGTTCCTGAGCCTTGTACACACCGCCCGTCAAGCCACGAAAGGGAGGGACGGCCGAAGTCGCGCCCCGCGCGCCGACGCCGGACTTCCCGATTGGGACTAAGTCGTAACAAGGTAACC >1142181 CACGTGGGTCATTTGCCCCGAAGCCCGGGATAGCCCATGGAAACATGGATTAATACCGGATGTGGTTGGAGTACACAGGTGCTCCGTATTAAACGGTAGGTAGCAATACCTTCCGCTTCGGGATAAGCCCGCGGCCCATTAGCTAGTTGGTGGGGTAAGACCCAACCAAGGAGACAACCGGGAGCCGGACAGAAAGGGTGACGGCCACATTGGGACTGAGAAACGGCCCGATCCTACGGAGGCAGCAGTAAGAATCTTCCGCATGAACGAAGTCCGACCGAGCGACGCGCTGAGTGATGAAGGTGTTATGCATCGTAAAGCTCCTTCGGGGAGGAGAATAAGCATAGTCCAAAAGGCTATGTGATGACGACCCTCCCTAAAGAAGCCCCGGCTAATTACGTGCAGCAGCGCGGCAATACGTAAGGGGTAAGCGTTGTTCGGAATTACTGGGCGTAAAGGGTGTGCAGGCGGGAGAGTAAGTTGGGGGTGAAATCTACGGGCCCAACCCGTAAACTGCCCTCAAAACTGCTTTTCTTGAGTGCAGGAGAGGAGACTGGAATTCCTAGTGTAGGAGTGAAATCTGTAGATATTAGGAAGAACACCGGTGGCGAAGGCGAGTCTCTGGCCTGACACTGACGCTGATACACGAAAGCGTGGGGAGCGAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGTTGTGCACTAGATGTTGGGGGTGTCAATCCCCTCAGTGTCGCAGTTAACGCATTAAGTGCACCGCCTGGGGAGTATGCTCGCAAGGGTGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGATGATACGCGAGGAACCTTACCAGGGCTTGACATACAGGTGCCGGGCTGTGAAAGCAGTCCTCTCTTCCGAGCGCCTGTACAGGTGTTGCACGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGTTAAGTCCCCCAACGAGCGCAACCCCTATTGTCTGTGCCATCATTAAGTTGGCACTCGAACGAAACTGCCGGTGATAAACCGGAGGAAGGTGGGGATGACGTCAAGTCATATGGCCCTTAGGCCTGGCTACACGTGCTACAATGGACAGTACAAGAGTCGCAAGACCGAAAGGTGGACCATCCAAAAGCTGTCCTCAGTTCCGATTGAAGTCTGAAACTCGACTCCATGAAGTTGGAATCGCTAGTAATCGCGCATCAGAATGGCGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCACCCGAGTTGGAAGTACTTGAAGTCGCTGATCTAACCTTCGGGAGGAAGGCGCCGATTGTACGTCTGATAAGGGGGGTGAAGTCGTAACAAGGTAACC >2683209 CTGGCGGCGTGGTTTAGGCATGCAAGTCGAACGCGAAAGATTTACTTCGGTAAATTGAGTAGAGTGGCGAACGGGTGAGTAATACGTACGAATCTACCTTAAAGACAGGGATAGTCCCGGGAAACTGGGTTTAATACCTGATGGTATCCGGCTTTGCCGGATTAAAGACGGCCTCTATTTATAAGCTGTTACTTTTAGATGAGCGTGCGCTCCATTAGTTAGTTGGTAAGGTAAGAGCTTACCAAGGCGATGATGGATAGGCGTCCTTAACGGGTGGTCGCCCACACTGGGATTGAGATACGGCCCAGACTCCTACGGGAGGCAGCAGTCGAGAATCGTCTACAATGAACGCAAGTTTGATAGTGCGACGCCGCGTGAATGAAGAAGCATTTCGGTGTGTAAAATTCTTTTATATAAGAACAGTGCATGTATGGTAAATAATTATACGTGAGAGATAGTACTATATGAATAAGCTCCGGCTAACTTCGTGCCAGCAGCCGCGGTAATACGAAGGGAGCAAGCGTTGTCCGGAATTACTAGGTGTAAAGGGTAAGTAGGCGGAAATTTAAGTCTCCGGTTAAATCTTCGGGCTCAACCCGAAATCTGCCTGAGATACTGGATTTCTAGAGTAAAGCAGATGAAGGCGGAATTCCTGGAGTAGCGGTGGAATGCGTAGATATCAGGAAGAACACCCATAGCGAAGGCAGCTTTCAATGCTATTACTGACGCTCAATTACGAAGGTGCGGGTATCGAACAGGATTAGATACCCTGGTAGTCCGCACAGTAAACGATATGTACTTGATATTGGATGTTGAAAATTCAGTGTCGTAGCTAACGCGTTAAGTACATCACCTGGGGACTAACGGCCGCAAGGTTAAAACTCAAAGGAATTGACGGGGGCCCACACAAGCGGTGGAGCATGTGGTTTAATTCGATGCAACGCGAAGAACCTTACCTGAACTTGACATGCCGAGAATCCTGTAGAAATATGGGAGTGCCTTTTTTGGAGCTCGGACACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTTGTGAAATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCCTATCTTTAGTTGCTACCATTAAGTTGAGGACTCTAAAGAGACTGCCAGAGTACAAATCTGGAGGAAGGTGGGGACGACGTCAAGTCATCATGGTCCTTATGTTCAGGGCTACACACGTGCTACAATGGTTGGAACAAAAGGCAGCGAAGGGGCGACCCGGAGCTAATCTCCAAACCCAATCTTAGTCCGGATTGCAGTCTGCAACTCGACTGCATGAAGTTGGAATCGCTAGTAATCGTGAGTCAGCATATCACGGTGAACATGTTCCTGGGCCTTGTACACACCGCCCGTCAAGTCAGCCGAATCGAGTGCACCCGAAGAAGGTGAGTTAATTAGACAGCTTTCGAAGGTGTGCTTGTAAGGGGGACTAAGTC >2784824 AGTGGCGCACGGGTGAGTAACGCGTGGGTAACTTGCCTTTAAGTGAGGGATAACCCACTGAAAGGTGGACTAATACCTCATAAGACCACAGTGCTACGGCAGCGTGGTCAAAGGTGGCTTTATTAAAAGCTGCCGCTTGGAGAGAGACCCGCGTCCCATCAGCTTGTTGGTAAGGTAATGGCTTACCAAGGCCGAGACGGGTAGCTGGTCTGAGAGGATGGCCAGCCACACTGGAACTGAAACACGGTCCAGACTCCTACGGGAGGCAGCAGTGAGGAATCTTGCGCAATGGGGGGAACCCTGACGCAGCAACGCCGCGTGAGTGAAGAAGGTCTTCGGGTCGTAAAGCCCTGTCGGGAGGGAAGAAACAGTTATGCATGAATAATGCATAACCTTGACGGTACCTCCNGAGGAAGCACCGGCCAACTCCGTGCCAGCAGCCGCGGTAAAACGGAGGGTGCTAGCGTTGTTCGGAATTACTGGGCGTAAAGAGCGTGTAGGCGGATAGATAAGTCGAGTGTGAAAGCCCTCAGCTTAACTGAGGAAGTGCATTCGAAACTATCTTTCTTGGGTACGGAAGAGGGAAGTGGAATTCCCGGTGTAGGGGTGAAATCCGTAGATATCGGGAGGAATACCAGTGGCGAAGGCGACTTCCTGGACCGTCACTGACGCTGAGACGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGGGCACTAGGTGTATCTCGCTTAGCGGGATGTGCCGTAGCTAACGCATTAAGTGCCCCGCCTGGGGAGTACGGTCGCAAGACTAAAACTCAAAGGAATTGACGGGGGCCCGCACAAGTGGTGGAGCATGTGGTTTAATTCGACGCAACGCGAAGAACCTTACCTGGGTTTGACATGCCGAGAATCTGCCAGAAATGGTGGAGTGCCCCGTTAGGGGAACTCGGACACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTCACCTTTAGTTGCCAGCATTAAGTTGGGCACTCTAAAGGGACTGCCGGTGTTAAACCGGAGGAAGGTGGGGACGACGTCNAGTCCTCATGGCCTTTATACCCAGGGCTACACACGTGCTACAATGGCCAGTACAAAGGGCTGCAATCCCGCGAGGGGGAGCCAACCCCAAAAATCTGGTCTTAGTTCGGATTGGAGTCTGCAACTCGACTCCATAAAGGTGGAATCGCTAGTAATCGTGAATCAGCACGTCACGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCACGAAAGTTGGCTGTACCAGAAGTTGCTGAGCTAACTCGCCTCGGCGGGAGGCAGGCACCTAAGGTGTGGTTGATGATTGGGGTGAAGT >2941516 TTAGAGTTTGATCCTGGCTCAGGATGAACGCTAGCGATAGGCCTAACACATGCAAGTCGAGGGGTAACAGGGTAGCAATACCGCTGACGACCGGCAAATGGGTGAGTAACGCGTATGCAACCTACCGATAACAGTTGGATAGCTCCCTGAAAGGGGAATTAAACCGGCATGACACTATGAGATCGCCTGTTTTCATAGTTAAATATTTATAGGTTATTGATGGGCATGCGTGACATTAGCAAGTTGGTGAGGTAACGGCTCACCAATGCTACGATGTCTAGGGGTTCTGAGAGGAAGGTCCCCCACACTGGTACTGAGACACGGACCAGACTCCTACGGGAGGCAGCAGTGAGGAATATTGGTCAATGGACGGAAGTCTGAACCACCCACTTCGCGTGCAGGATGACTGCCCTATGGGTTGTAAACTGCTTTTATATAAGAGGAACAGTATTTATGTATAGATATTTGCCAGTATTATATGAATAAGGATCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGATCCGAGCGTTATCCGGATTTATTGAGTTTAAAGGGTGAGTACGCGGTAGTATAAGTCAGCGGTGATAACTCGCAGCTCATCTGTAAGCTTGCCGTTGACACTGTATTACTTGACTTAACGTTGAGGTATGCTGAATGGGGGGGGGTTACCCGTTGAAATGCATTAATCAAAACAACAGACCACCCGATTTGCGGACGGCAGCAAAACTACACTGTCCACTGACGCTGATGCACAAAAGGCGTGGGTATCAAACAGGATTAGATACCCTGGTAGTCCACGCTGTAAACGATGATTACTGGTTGTTTGTGATACACTGCAAGTGACTGAGCGAAAGCACTAAGTAATCCACTTGGCGAGTACGTCGGCAACGATGAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTCTAATTCGAGGCAACGCGAAGAACCTTACCCAGACTTGACATCTAGGAAAGGTCCTTGAAAGAGGATCGTGCCCGCAAGGGAATCCTAAGACAGGTGTTGCATGGCTGTCGTCAGCTCCTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCCTGCTTACAGTTACCATCGGTTCGGCCGGGGACTCTGTAAGGACTGCCGCTGATAAAGCGAAGGAAGGCGGGGACGACGTCAAGCAATCACGGCCCTTACGTCTGGGGCTACACACGTGCTACAATGGCCGGTACAATGAGTCGCAAAACCGCGAGGTCAAGCTAATCTCAAAAAACCGGTCTCAGTTCGGATTGGAGTCTGCAACCCGACTTCGTGAAGCTGGATTCGCTAGTAATCGCGCATCAGCCATGGCGCGGCGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCA >998428 GACGAACGCTGGCGGCGTGCCTAATACATGCAAGTCGAGCGGAGTTGTTCCTTCGGGGACAGCTTAGCGGCGGACGGGTGAGTAACACGTAGGCAACCTGCCTGCAGGACCGGGATAACCCACGGAAACGTGAGCTAATACCGGATAGATGGTTCCCTCGCATGAGGGGATCAGGAAAGACGGGGCAACCTGTCACTTGTAGATGGGCCTGCGGCGCATTAGCTAGTTGGCGAGGTAACGGCTCACCAAGGCGACGATGCGTAGCCGACCTGAGAGGGTGAACGGCCACACTGGGACTGAGACACGGCCCAGACTCCTACGGGAGGCAGCAGTAGGGAATCTTCCGCAATGGACGAAAGTCTGACGGAGCAACGCCGCGTGAGTGAGGAAGGTCTTCGGATCGTAAAGCTCTGTTGCCAAGGAAGAACGCTTGGTGGAGTAACTGCCATCAAGGTGACGGTACTTGAGAAGAAAGCCCCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGGGCAAGCGTTGTCCGGAATTATGGGCGTAAGCGCGCGCAGCGGTTCTTTAAGTCTGAGGTTAAATGCAGGGCTCAACCTTGTAACGCCTTGGAAACTGGGGGACTGGAGTGTAGGAGAGGAAAGTGGAATTCCACGTGTAGCGGTGAAATGCGTAGAGATGTGGAGGAACACCAGTGGCGAAGGCGACTTTCTGGCCTATAACTGACGCTGAGGCGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGAGTGCTAGGTGTTAGGGGTTTCGATACCCTTGGTGCCGAAGTTAACACAGTAAGCACTCCGCCTGGGGAGTACGCTCGCAAGAGTGAAACTCAAAGGAATTGACGGGGACCCGCACAGGCAGTGGAGTATGTGGTTTAATTCGAAGCAACGCGAAGAACCTTACCAGGTCTTGACATCCCGATGAAACGTCTAGAGATAGGCGCCCTCTTCGGAGCATTGGAGACAGGTGGTGCATGGTTGTCGTCAGCTCGTGTCGTGAGATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGAATTCAGTTGCCAGCACTTCGGGTGGGCACTCTGAATTGACTGCCGGTGACAAACCGGAGGAAGGTGGGGATGACGTCAAATCATCGTGCCCCTTATGACCTGGGCTACACACGTACTACAATGGTCGGTACAACGGGCAGCGAAGCCGCGAGGCGGAGCCAATCCTAGAAAAGCCGATCTCAGTTCGGATTGCAGGCTGCAACTCGCCTGCATGAAGTCGGAATTGCTAGTAATCGCGGATCAGCATGCCGCGGTGAATACGTTCCCGGGTCT >4343117 AACGAACGCTGGCGGCGTGCTTAACACATGCAAGTCGCGTGCGCGGTTCACGAACTTGTACGTGGATGGGCGCACGGCGCAGGGGGGCGTAACACGTGGGCACTCTGCCCTCCGATGGGGAATACTCCCGCGAACCGGGGGCTAATACCGCATAACATTCCGAGGACTTGGGTTCTTGGATTCAAAGCAGTGATGCCTGTGAGGAGGAGCCCGCGCCCGATTAGCTAGTTGGTAGGGTAACGGCCTACCTCGGCAATGATCGGTAGCTGGTCTGAGAGGATAATCAGACACACTGCAACTGAAACGAGGCCCAGACTCCTACCGTAGGGAACGCTGGGGAATCTTGCCTTCTGGGCGAAAGCATGACCCAACGACGCCGCGTGGGGGATGAAGCTTTTGCTAGTGTAAACCCCTTTTCACTGGTAAGAATGCACGCAAGGGAGCGACAGTACCCTGGCAAGAAGCCCCGGCTAACTACGTGCCACCCGCCTCGGTAAGACCTAGGGGGCCAGCGTTGTTCGGAATTACTGGGTGTATAGGGTACTTATGCGGTGCGACAAGTTGGGAGTGAAATCTCTGGGCTTAACCCAGAGGCTGCTTCTCAAACTGCTATGCTTGATTGTGACAGAGGCTCTTGAAATTGCAGGAGTAGCGTTGAAATGCATGTATATCTGCAAGATCACCCGAGATATGGACGAACAGCTGGATCACAAGTGACGCTGAGGAACGAAAGCTACGCTGAGCGAACAGGATTATATACACTGGTAGTCCTAGCACTAAACGATCATGACTTGCGGTGACGACCGTTCGGACGTCTCCCGGAGCTAACGCGTTAAGTCCTGCACCTGGGGAGTACGGTCGCAGACTGGAAGTCAAAGGAATTGACGGGGGCCCGCACAAGCGGTGGAACATGTGGTTCAATTCGACGCTACGCGAGGAACCTTACCTGGTTCGAAATTCTTATGACCAGCTGTAGAATTACGGCTTTCCTTCAAGAGACATGAGTCTAGGCGCTCCATGGCTGTCGTCAGTTCGTTCCGTGAGGTGTTGGGTTAAGTCCCGCAACGAGCGCAACCCCTGCACGTAGTTACTACTCGCAAGAGAGGACTCTACGTGGACTGCTCCGGATAACGGAGAGGAAGGTGGGAATGACGTCAAGTCCGCATGGCCTTTATGTCCAGGGCTACACACGTGTTACAATGCAGGGTACAAACCGTTGCCAACCCGCGAGGGGGAGCTAATCGGATAAAACTGTGCTCAGTTCGGATTGCAGTCTGCAACTCGACTGCATGAAGCTGGAATCGCTAGTAATGGGGATCAGCTTGACGCCGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACATCACGAAAGTGAGCTCACCTAGAAGTCGCCACGCTAACCGCAAGGGGGCAGGCGCCCAAGGTATGACTCATGATTGGGGTG >4353661 GGATGAACGCTAGCGGGAGGCTTAATACATGCAAGTCGAGGGTGAAGCTTTCTTCGGAAAGTGGAAACCGGCGAACGGGTGCGTAACGCGTACGCAACTTACCCCTTGCTGGAGAATAGCCCCGGGAAACTGGGATTAATGCTCCATGGTATGGTGAAATCGCATGATTTTATCATTAAAGGTTACGGCAAGGGATAGGCGTGCGTCCCATTAGCTTGTTGGTGAGGTAACGGCTCACCAATGCAAACGATGGGTAGCTGGTCTGAGAGGATGATCAGCCACACGGGCACTGAGACACGGGCCCGACTCCTACGGGAGGCAGCAGTAGGGAATATTGGACAATGGACGAAAGTCTGATCCAGCCATCCCGCGTGCAGGACGAATGCCCTATGGGTTGTAAACTGCTTTTCTAAGGAAAGAAATATCTCATTCATGAGGTGCTGACGGTACCTTAGGAATAAGCACCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTATCCGGATTCACTGGGTTTAAAGGGTGCGTAGGCGGTATGATAAGTCAGTGGTGAAAGCCCGGGGCTCAACTCCGGAACTGCCGTTGATACTGTCATACTTGAGTCCAGTTGAGGTGGGCGGAATGATACATGTAGCGGTGAAATGCTTAGATATGTATCAGAACACCGACTGCGAAGGCAGCTCACTAAACTGGTACTGACGCTGAGGCACGAAAGCGTGGGTAGCGAACAGGATTAGATACCCTGGTAGTCCACGCCCTAAACGATGCTAACTCGGTATGTGCGATATACTGTACGTGCCTGAGGGAAACCGTTAAGTTAGCCACCTGGGGAGTACGTTCGCAAGAATGAAACTCAAAGGAATTGACGGGGGTCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGATGATACGCGAGGAACCTTACCTGGGCTCTAATGTACCACGCCCGACCCTGAAAGGGGTCTTCTTCTTCGGAAGCGGGGTACAAGGTGCTGCATGGTTGTCGTCAGCTCGTGCCGTGAGGTGTTGGGTTAAGTCCCGCAACGAGCGCAACCCCTGTCCTTAGTTGCCAGCGGTCCGGCCGGGGACTCTAAGGAGACTGCCTTCGCAAGGAGTGAGGAAGGAGGGGACGACGTCAAATCATCATGGCCTTTATGCCCAGGGCTACACACGTGCTACAATGGTGAGGACAAAGGGCAGCCACTTAGCGATAAGGAGCAAATCCCAAAAACCTCACCTCAGTTCGGATTGGAGTCTGCAACTCGACTCCATGAAGTCGGAACCGCTAGTAATCGCAGATCAGACATGCTGCGGTGAATACGTTCCCGGACCTTGTACACACCGCCCGTCAAGCCATGGAGCCGGGTGTACCTTAAGGCGATAACCGAAAGGAGTTGCCCAAGGTA""" _seqs_16s = [] for seq_id, seq in list(parse_fasta(seqs_16s.split("\n"))): _seqs_16s.append(BiologicalSequence(seq, seq_id)) seqs_16s = SequenceCollection(_seqs_16s) tax = """669210 k__Bacteria; p__; c__; o__; f__; g__; s__ 881726 k__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Paenibacillaceae; g__Paenibacillus; s__ 296752 k__Bacteria; p__Spirochaetes; c__Spirochaetes; o__Spirochaetales; f__Spirochaetaceae; g__Treponema; s__ 1794723 k__Bacteria; p__Planctomycetes; c__Planctomycetia; o__Gemmatales; f__Gemmataceae; g__Gemmata; s__ 2941516 k__Bacteria; p__Bacteroidetes; c__Bacteroidia; o__Bacteroidales; f__Marinilabiaceae; g__; s__ 793074 k__Bacteria; p__; c__; o__; f__; g__; s__ 4353661 k__Bacteria; p__Bacteroidetes; c__[Saprospirae]; o__[Saprospirales]; f__; g__; s__ 292553 k__Bacteria; p__Spirochaetes; c__Spirochaetes; o__Spirochaetales; f__Spirochaetaceae; g__Treponema; s__ 2784824 k__Bacteria; p__Proteobacteria; c__Deltaproteobacteria; o__Syntrophobacterales; f__Syntrophaceae; g__; s__ 1719550 k__Bacteria; p__Planctomycetes; c__Planctomycetia; o__Gemmatales; f__Gemmataceae; g__Gemmata; s__ 182569 k__Bacteria; p__Bacteroidetes; c__Bacteroidia; o__Bacteroidales; f__Bacteroidaceae; g__Bacteroides; s__ 266495 k__Bacteria; p__Bacteroidetes; c__Bacteroidia; o__Bacteroidales; f__S24-7; g__; s__ 524860 k__Bacteria; p__Planctomycetes; c__Planctomycetia; o__Gemmatales; f__Gemmataceae; g__Gemmata; s__ 293514 k__Bacteria; p__Spirochaetes; c__Spirochaetes; o__Spirochaetales; f__Spirochaetaceae; g__Treponema; s__ 2683209 k__Bacteria; p__WWE1; c__[Cloacamonae]; o__[Cloacamonales]; f__; g__; s__ 501793 k__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Paenibacillaceae; g__Paenibacillus; s__ 229854 k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Legionellales; f__Legionellaceae; g__Legionella; s__ 583705 k__Bacteria; p__Bacteroidetes; c__Bacteroidia; o__Bacteroidales; f__; g__; s__ 1142181 k__Bacteria; p__Spirochaetes; c__GN05; o__SBYZ_6080; f__; g__; s__ 998428 k__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Paenibacillaceae; g__Paenibacillus; s__ 4343117 k__Bacteria; p__Acidobacteria; c__DA052; o__Ellin6513; f__; g__; s__""" tax_lookup = dict([e.strip().split('\t') for e in tax.split('\n')]) seq1 = BiologicalSequence('AGCTAGCATCGATCGATCGATGCATGCAT') seq2 = BiologicalSequence('AGCTCGGCATCGAGGGCAGTCAATCGATCT') help(kmer_distance) # Compute the kmer distance in this cell query_seqs = SequenceCollection( [BiologicalSequence("ACGATGACCAGTGCTACCAGT", "s1"), BiologicalSequence("AACGATCGATCGATCGTGCTA", "s2"), BiologicalSequence("AACGATCTGCTA", "s3"), BiologicalSequence("CGATCGATGACATGCATG", "s4"), BiologicalSequence("CGATCTGCAT", "s5")]) help(guide_tree_from_sequences) # Display the guide tree in this cell. help(iterative_msa_and_tree) from skbio.alignment import global_pairwise_align_nucleotide # add your command for 1 iterations of iterative multiple sequence alignment here # hint: pass pairwise_aligner=global_pairwise_align_nucleotide # add your command for 5 iterations of iterative multiple sequence alignment here # hint: pass pairwise_aligner=global_pairwise_align_nucleotide help(progressive_msa_and_tree) # Add your command for progressive alignment and tree building here # hint: pass pairwise_aligner=global_pairwise_align_nucleotide print seqs_16s.get_seq('4343117') print seqs_16s.get_seq('4353661') from skbio.alignment import global_pairwise_align_nucleotide from skbio import BiologicalSequence def pairwise_percent_id(seq1_id, seq2_id, seq_lookup): seq1 = seq_lookup.get_seq(seq1_id) seq2 = seq_lookup.get_seq(seq2_id) aln = global_pairwise_align_nucleotide(seq1, seq2) return 1 - aln.distances()[0][1] print pairwise_percent_id('793074', '4353661', seqs_16s) # Compute additional pairwise identities, as necessary, to answer this question here. Show all of your commands! print tax_lookup['4343117'] print tax_lookup['4353661'] # Perform addition taxonomy look-ups here