In [8]:
# For space reasons,I'm not showing the implementations of these
# imported functions
from index_edit import kEditDp, Index, partition
In [30]:
def queryIndexEdit(p, t, k, index):
    ''' Look for occurrences of p in t with up to k edits using an
        index combined with dynamic-programming alignment. '''
    l = index.ln
    occurrences = []
    seen = set()     # for avoiding reporting same hit twice
    for part, poff in partition(p, k+1):
        for hit in index.occurrences(part): # query index w/ partition
            # left edge of T to include in DP matrix
            lf = max(0, hit - poff - k)
            # right edge of T to include in DP matrix
            rt = min(len(t), hit - poff + len(p) + k)
            mn, off, xcript = kEditDp(p, t[lf:rt])
            off += lf
            if mn <= k and (mn, off) not in seen:
                occurrences.append((mn, off, xcript))
                seen.add((mn, off))
    return occurrences
In [31]:
t = 'I had two banana splits'
ind = Index(t, ln=4)
queryIndexEdit('bananasplit', t, 1, ind)
Out[31]:
[(1, 10, 'MMMMMMDMMMMM')]
In [38]:
t = 'haystack needle haystack noodle haystack nedle haystack'
#             needle          noodle          ne-dle
#             ||||||          |  |||          || |||
#             needle          needle          needle
p = 'needle'
# Find the two occurrences that are within edit distance 1
# The substring length for the index has to be <= 3 (why?)
queryIndexEdit(p, t, 1, Index(t, ln=3))
Out[38]:
[(0, 9, 'MMMMMM'), (1, 41, 'MIMMMM')]
In [39]:
# Find the three occurrences that are within edit distance 2
# The substring length for the index has to be <= 2 (why?)
queryIndexEdit('needle', t, 2, Index(t, ln=2))
Out[39]:
[(0, 9, 'MMMMMM'), (1, 41, 'MIMMMM'), (2, 25, 'MRRMMM')]