#!/usr/bin/env python # coding: utf-8 # In[1]: genetic_code = { # this was adapted from # https://gitlab.com/RebelCoder/dna-toolset/-/blob/90e8d9249c6dcc82a850aa854da4baabef98ee6f/structures.py "GCT": "A", "GCC": "A", "GCA": "A", "GCG": "A", "TGT": "C", "TGC": "C", "GAT": "D", "GAC": "D", "GAA": "E", "GAG": "E", "TTT": "F", "TTC": "F", "GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G", "CAT": "H", "CAC": "H", "ATA": "I", "ATT": "I", "ATC": "I", "AAA": "K", "AAG": "K", "TTA": "L", "TTG": "L", "CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L", "ATG": "M", "AAT": "N", "AAC": "N", "CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P", "CAA": "Q", "CAG": "Q", "CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R", "AGA": "R", "AGG": "R", "TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S", "AGT": "S", "AGC": "S", "ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T", "GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V", "TGG": "W", "TAT": "Y", "TAC": "Y", "TAA": "*", "TAG": "*", "TGA": "*" } # In[2]: 'TTTTATGATGT'.index('ATG') # In[3]: list(range(4, 25, 3)) # In[4]: test_sequence = 'TTTTATGACCT' print(test_sequence[10:10+3]) # In[5]: genetic_code['AGA'] # In[11]: 'AAA' not in 'ACGAAAT' # In[22]: def translate(sequence, genetic_code): protein = [] if 'ATG' not in sequence: return protein first_codon_position = sequence.index('ATG') sequence_length = len(sequence) for codon_start in range(first_codon_position, sequence_length, 3): codon = sequence[codon_start:codon_start+3] if len(codon) == 3: try: amino_acid = genetic_code[codon] except KeyError: raise KeyError(codon + " is not present in the genetic code.") if amino_acid == '*': return protein protein.append(amino_acid) return protein # In[23]: translate("AGGTACGTGGAACGTACGTGACCGATGGACCACACUCATTGAGTGTGTACACACACGTGTGTGTGACACAACAAC", genetic_code) # In[ ]: