# first we need to do some configuration of the notebook, don't worry about this for now from __future__ import division, print_function from functools import partial from IPython.core import page page.page = print import matplotlib.pyplot as plt seq_lengths = range(25) s2_times = [t ** 2 for t in range(25)] plt.plot(range(25), s2_times) plt.xlabel('Sequence Length') plt.ylabel('Runtime (s)') s3_times = [t ** 3 for t in range(25)] plt.plot(range(25), s3_times) plt.xlabel('Sequence Length') plt.ylabel('Runtime (s)') plt.plot(range(25), s2_times) plt.plot(range(25), s3_times) plt.xlabel('Sequence Length') plt.ylabel('Runtime (s)') s4_times = [t ** 4 for t in range(25)] plt.plot(range(25), s2_times) plt.plot(range(25), s3_times) plt.plot(range(25), s4_times) plt.xlabel('Sequence Length') plt.ylabel('Runtime (s)') from skbio import BiologicalSequence %psource BiologicalSequence.k_words for e in BiologicalSequence("ACCGGTGACCAGTTGACCAGTA").k_words(3): print(e) for e in BiologicalSequence("ACCGGTGACCAGTTGACCAGTA").k_words(7): print(e) for e in BiologicalSequence("ACCGGTGACCAGTTGACCAGTA").k_words(3, overlapping=False): print(e) from iab.algorithms import kmer_distance %psource kmer_distance s1 = BiologicalSequence("ACCGGTGACCAGTTGACCAGT") s2 = BiologicalSequence("ATCGGTACCGGTAGAAGT") s3 = BiologicalSequence("GGTACCAAATAGAA") print(s1.distance(s2, kmer_distance)) print(s1.distance(s3, kmer_distance)) fivemer_distance = partial(kmer_distance, k=5) s1 = BiologicalSequence("ACCGGTGACCAGTTGACCAGT") s2 = BiologicalSequence("ATCGGTACCGGTAGAAGT") s3 = BiologicalSequence("GGTACCAAATAGAA") print(s1.distance(s2, fivemer_distance)) print(s1.distance(s3, fivemer_distance)) from skbio import SequenceCollection query_sequences = SequenceCollection( [BiologicalSequence("ACCGGTGACCAGTTGACCAGT", "s1"), BiologicalSequence("ATCGGTACCGGTAGAAGT", "s2"), BiologicalSequence("GGTACCAAATAGAA", "s3"), BiologicalSequence("GGCACCAAACAGAA", "s4"), BiologicalSequence("GGCCCACTGAT", "s5")]) guide_dm = query_sequences.distances(kmer_distance) print(guide_dm) fig = guide_dm.plot(cmap='Greens') from scipy.cluster.hierarchy import average, dendrogram, to_tree for q in query_sequences: print(q) guide_lm = average(guide_dm.condensed_form()) guide_d = dendrogram(guide_lm, labels=guide_dm.ids, orientation='right', link_color_func=lambda x: 'black') guide_tree = to_tree(guide_lm) from iab.algorithms import guide_tree_from_sequences %psource guide_tree_from_sequences t = guide_tree_from_sequences(query_sequences, display_tree=True) from iab.algorithms import format_dynamic_programming_matrix, format_traceback_matrix from skbio.alignment._pairwise import _compute_score_and_traceback_matrices %psource _compute_score_and_traceback_matrices from skbio.alignment._pairwise import _traceback %psource _traceback from skbio.alignment import global_pairwise_align_nucleotide %psource global_pairwise_align_nucleotide global_pairwise_align_nucleotide = partial(global_pairwise_align_nucleotide, penalize_terminal_gaps=True) print(query_sequences[0]) print(query_sequences[1]) aln1 = global_pairwise_align_nucleotide(query_sequences[0], query_sequences[1]) print(aln1) print(query_sequences[2]) print(global_pairwise_align_nucleotide(aln1, query_sequences[2])) aln2 = global_pairwise_align_nucleotide(query_sequences[2], query_sequences[3]) print(aln2) print(aln1) print(aln2) aln3 = global_pairwise_align_nucleotide(aln1, aln2) print(aln3) from skbio import TreeNode guide_tree = TreeNode.from_linkage_matrix(guide_lm, guide_dm.ids) print(guide_tree) from iab.algorithms import progressive_msa %psource progressive_msa msa = progressive_msa(query_sequences, guide_tree, pairwise_aligner=global_pairwise_align_nucleotide) print(msa) fig = guide_dm.plot(cmap='Greens') msa_dm = msa.distances() fig = msa_dm.plot(cmap='Greens') d = dendrogram(guide_lm, labels=guide_dm.ids, orientation='right', link_color_func=lambda x: 'black') msa_lm = average(msa_dm.condensed_form()) d = dendrogram(msa_lm, labels=msa_dm.ids, orientation='right', link_color_func=lambda x: 'black') from iab.algorithms import progressive_msa_and_tree %psource progressive_msa_and_tree msa, tree = progressive_msa_and_tree(query_sequences, pairwise_aligner=global_pairwise_align_nucleotide, display_tree=True, display_aln=True) from iab.algorithms import iterative_msa_and_tree %psource iterative_msa_and_tree msa, tree = iterative_msa_and_tree(query_sequences, pairwise_aligner=global_pairwise_align_nucleotide, num_iterations=1, display_aln=True, display_tree=True) msa, tree = iterative_msa_and_tree(query_sequences, pairwise_aligner=global_pairwise_align_nucleotide, num_iterations=2, display_aln=True, display_tree=True) msa, tree = iterative_msa_and_tree(query_sequences, pairwise_aligner=global_pairwise_align_nucleotide, num_iterations=3, display_aln=True, display_tree=True) msa, tree = iterative_msa_and_tree(query_sequences, pairwise_aligner=global_pairwise_align_nucleotide, num_iterations=5, display_aln=True, display_tree=True)