In [1]:
# Not a great way to build a suffix array, but we'll use it
# for the small examples here
def naiveBuildSA(t):
    satups = sorted([(t[i:], i) for i in range(len(t))])
    return list(map(lambda x: x[1], satups))
In [2]:
naiveBuildSA('abaaba$') # works on a simple example
Out[2]:
[6, 5, 2, 3, 0, 4, 1]
In [3]:
def binarySearchSA(t, sa, p):
    assert t[-1] == '$' # t already has terminator
    assert len(t) == len(sa) # sa is the suffix array for t
    if len(t) == 1: return 1
    l, r = 0, len(sa) # invariant: sa[l] < p < sa[r]
    while True:
        c = (l + r) // 2
        # determine whether p < T[sa[c]:] by doing comparisons
        # starting from left-hand sides of p and T[sa[c]:]
        plt = True # assume p < T[sa[c]:] until proven otherwise
        i = 0
        while i < len(p) and sa[c]+i < len(t):
            if p[i] < t[sa[c]+i]:
                break # p < T[sa[c]:]
            elif p[i] > t[sa[c]+i]:
                plt = False
                break # p > T[sa[c]:]
            i += 1 # tied so far
        if plt:
            if c == l + 1: return c
            r = c
        else:
            if c == r - 1: return r
            l = c
In [4]:
t = 'abaaba$'
sa = naiveBuildSA(t)
binarySearchSA(t, sa, 'aba')
Out[4]:
3
In [5]:
binarySearchSA(t, sa, 'bb') # p is greater than all suffixes
Out[5]:
7
In [6]:
binarySearchSA(t, sa, 'aa')
Out[6]:
2
In [7]:
def suffixLcp(t, toff, p):
    i = 0
    while i < len(p) and i + toff < len(t):
        if p[i] != t[i + toff]:
            return i
        i += 1
    return i
In [8]:
suffixLcp('abaaba$', 0, 'aba')
Out[8]:
3
In [9]:
suffixLcp('abaaba$', 0, 'abab')
Out[9]:
3
In [10]:
suffixLcp('abaaba$', 0, 'abaabaaba')
Out[10]:
6
In [11]:
def binarySearchSA_lcp1(t, sa, p):
    assert t[-1] == '$' # t already has terminator
    assert len(t) == len(sa) # sa is the suffix array for t
    if len(t) == 1: return 1
    l, r = 0, len(sa) # invariant: sa[l] < p < sa[r]
    lcp_lp, lcp_rp = 0, 0
    while True:
        c = (l + r) // 2
        # determine whether p < T[sa[c]:] by doing comparisons
        # starting from left-hand sides of p and T[sa[c]:]
        plt = True # assume p < T[sa[c]:] until proven otherwise
        i = min(lcp_lp, lcp_rp)
        while i < len(p) and sa[c]+i < len(t):
            if p[i] < t[sa[c]+i]:
                break # p < T[sa[c]:]
            elif p[i] > t[sa[c]+i]:
                plt = False
                break # p > T[sa[c]:]
            i += 1 # tied so far
        if plt:
            if c == l + 1: return c
            r = c
            lcp_rp = i
        else:
            if c == r - 1: return r
            l = c
            lcp_lp = i
In [12]:
binarySearchSA_lcp1(t, sa, 'aba')
Out[12]:
3
In [13]:
binarySearchSA_lcp1(t, sa, 'bb')
Out[13]:
7
In [14]:
binarySearchSA_lcp1(t, sa, 'aa')
Out[14]:
2