# For space reasons,I'm not showing the implementations of these
# imported functions
from index_edit import kEditDp, Index, partition
def queryIndexEdit(p, t, k, index):
''' Look for occurrences of p in t with up to k edits using an
index combined with dynamic-programming alignment. '''
l = index.ln
occurrences = []
seen = set() # for avoiding reporting same hit twice
for part, poff in partition(p, k+1):
for hit in index.occurrences(part): # query index w/ partition
# left edge of T to include in DP matrix
lf = max(0, hit - poff - k)
# right edge of T to include in DP matrix
rt = min(len(t), hit - poff + len(p) + k)
mn, off, xcript = kEditDp(p, t[lf:rt])
off += lf
if mn <= k and (mn, off) not in seen:
occurrences.append((mn, off, xcript))
seen.add((mn, off))
return occurrences
t = 'I had two banana splits'
ind = Index(t, ln=4)
queryIndexEdit('bananasplit', t, 1, ind)
[(1, 10, 'MMMMMMDMMMMM')]
t = 'haystack needle haystack noodle haystack nedle haystack'
# needle noodle ne-dle
# |||||| | ||| || |||
# needle needle needle
p = 'needle'
# Find the two occurrences that are within edit distance 1
# The substring length for the index has to be <= 3 (why?)
queryIndexEdit(p, t, 1, Index(t, ln=3))
[(0, 9, 'MMMMMM'), (1, 41, 'MIMMMM')]
# Find the three occurrences that are within edit distance 2
# The substring length for the index has to be <= 2 (why?)
queryIndexEdit('needle', t, 2, Index(t, ln=2))
[(0, 9, 'MMMMMM'), (1, 41, 'MIMMMM'), (2, 25, 'MRRMMM')]