%pylab inline from functools import partial from IPython.core import page page.page = print import matplotlib.pyplot as plt seq_lengths = range(25) s2_times = [t ** 2 for t in range(25)] plt.plot(range(25), s2_times) plt.xlabel('Sequence Length') plt.ylabel('Runtime (s)') s3_times = [t ** 3 for t in range(25)] plt.plot(range(25), s3_times) plt.xlabel('Sequence Length') plt.ylabel('Runtime (s)') plt.plot(range(25), s2_times) plt.plot(range(25), s3_times) plt.xlabel('Sequence Length') plt.ylabel('Runtime (s)') s4_times = [t ** 4 for t in range(25)] plt.plot(range(25), s2_times) plt.plot(range(25), s3_times) plt.plot(range(25), s4_times) plt.xlabel('Sequence Length') plt.ylabel('Runtime (s)') from skbio import DNA %psource DNA.iter_kmers for e in DNA("ACCGGTGACCAGTTGACCAGTA").iter_kmers(3): print(e) for e in DNA("ACCGGTGACCAGTTGACCAGTA").iter_kmers(7): print(e) for e in DNA("ACCGGTGACCAGTTGACCAGTA").iter_kmers(3, overlap=False): print(e) from iab.algorithms import kmer_distance %psource kmer_distance s1 = DNA("ACCGGTGACCAGTTGACCAGT") s2 = DNA("ATCGGTACCGGTAGAAGT") s3 = DNA("GGTACCAAATAGAA") print(s1.distance(s2, kmer_distance)) print(s1.distance(s3, kmer_distance)) fivemer_distance = partial(kmer_distance, k=5) s1 = DNA("ACCGGTGACCAGTTGACCAGT") s2 = DNA("ATCGGTACCGGTAGAAGT") s3 = DNA("GGTACCAAATAGAA") print(s1.distance(s2, fivemer_distance)) print(s1.distance(s3, fivemer_distance)) query_sequences = [DNA("ACCGGTGACCAGTTGACCAGT", {"id": "s1"}), DNA("ATCGGTACCGGTAGAAGT", {"id": "s2"}), DNA("GGTACCAAATAGAA", {"id": "s3"}), DNA("GGCACCAAACAGAA", {"id": "s4"}), DNA("GGCCCACTGAT", {"id": "s5"})] from skbio import DistanceMatrix guide_dm = DistanceMatrix.from_iterable(query_sequences, metric=kmer_distance, key='id') fig = guide_dm.plot(cmap='Greens') from scipy.cluster.hierarchy import average, dendrogram, to_tree for q in query_sequences: print(q) guide_lm = average(guide_dm.condensed_form()) guide_d = dendrogram(guide_lm, labels=guide_dm.ids, orientation='right', link_color_func=lambda x: 'black') guide_tree = to_tree(guide_lm) from iab.algorithms import guide_tree_from_sequences %psource guide_tree_from_sequences t = guide_tree_from_sequences(query_sequences, display_tree=True) from iab.algorithms import format_dynamic_programming_matrix, format_traceback_matrix from skbio.alignment._pairwise import _compute_score_and_traceback_matrices %psource _compute_score_and_traceback_matrices from skbio.alignment._pairwise import _traceback %psource _traceback from skbio.alignment import global_pairwise_align_nucleotide %psource global_pairwise_align_nucleotide global_pairwise_align_nucleotide = partial(global_pairwise_align_nucleotide, penalize_terminal_gaps=True) aln1, _, _ = global_pairwise_align_nucleotide(query_sequences[0], query_sequences[1]) print(aln1) aln1, _, _ = global_pairwise_align_nucleotide(aln1, query_sequences[2]) print(aln1) aln2, _, _ = global_pairwise_align_nucleotide(query_sequences[2], query_sequences[3]) print(aln2) aln3, _, _ = global_pairwise_align_nucleotide(aln1, aln2) print(aln3) from skbio import TreeNode guide_tree = TreeNode.from_linkage_matrix(guide_lm, guide_dm.ids) print(guide_tree) from iab.algorithms import progressive_msa %psource progressive_msa msa = progressive_msa(query_sequences, pairwise_aligner=global_pairwise_align_nucleotide, guide_tree=guide_tree) print(msa) fig = guide_dm.plot(cmap='Greens') msa_dm = DistanceMatrix.from_iterable(msa, metric=kmer_distance) fig = msa_dm.plot(cmap='Greens') d = dendrogram(guide_lm, labels=guide_dm.ids, orientation='right', link_color_func=lambda x: 'black') msa_lm = average(msa_dm.condensed_form()) d = dendrogram(msa_lm, labels=msa_dm.ids, orientation='right', link_color_func=lambda x: 'black') from iab.algorithms import progressive_msa_and_tree %psource progressive_msa_and_tree msa = progressive_msa(query_sequences, pairwise_aligner=global_pairwise_align_nucleotide, guide_tree=guide_tree) msa, tree = progressive_msa_and_tree(query_sequences, pairwise_aligner=global_pairwise_align_nucleotide, display_tree=True, display_aln=True) from iab.algorithms import iterative_msa_and_tree %psource iterative_msa_and_tree msa, tree = iterative_msa_and_tree(query_sequences, pairwise_aligner=global_pairwise_align_nucleotide, num_iterations=1, display_aln=True, display_tree=True) msa, tree = iterative_msa_and_tree(query_sequences, pairwise_aligner=global_pairwise_align_nucleotide, num_iterations=2, display_aln=True, display_tree=True) msa, tree = iterative_msa_and_tree(query_sequences, pairwise_aligner=global_pairwise_align_nucleotide, num_iterations=3, display_aln=True, display_tree=True) msa, tree = iterative_msa_and_tree(query_sequences, pairwise_aligner=global_pairwise_align_nucleotide, num_iterations=5, display_aln=True, display_tree=True)