#!/usr/bin/env python # coding: utf-8 # In[1]: def traceLcs(D, x, y): ''' Backtrace for LCS; returns LCS as string. ''' i, j = len(x), len(y) # start in lower-right st = [] while i > 0 and j > 0: # get the three contributions diag, vert, horz = 0, 0, 0 if i > 0 and j > 0: delt = -1 if x[i-1] == y[j-1] else 1 diag = D[i-1, j-1] + delt if i > 0: vert = D[i-1, j] if j > 0: horz = D[i, j-1] if diag <= vert and diag <= horz: # diagonal is best, thus, this char is part of LCS st.append(x[i-1]) i -= 1; j -= 1 # move up and left elif vert <= horz: i -= 1 # vertical is best; move up else: j -= 1 # horizontal is best; move left # reverse it, then return string-ized LCS return (''.join(st))[::-1] import numpy def lcsDp(x, y): ''' Return LCS of x and y. Uses bottom-up dynamic programming approach. ''' D = numpy.zeros((len(x)+1, len(y)+1), dtype=int) for i in range(1, len(x)+1): for j in range(1, len(y)+1): delt = -1 if x[i-1] == y[j-1] else 1 D[i, j] = min(D[i-1, j-1] + delt, D[i-1, j], D[i, j-1]) return traceLcs(D, x, y), D # In[2]: lcs, D = lcsDp('ATCTGAT', 'TGCATA') # example from Jones & Pevzner 6.5 lcs # In[3]: D