In [1]:
def z(s):
    ''' Use Z-algorithm to preprocess given string.  See
        Gusfield for complete description of algorithm. '''
    
    Z = [len(s)] + [0] * len(s)
    assert len(s) > 1
    
    # Initial comparison of s[1:] with prefix
    for i in range(1, len(s)):
        if s[i] == s[i-1]:
            Z[1] += 1
        else:
            break
    
    r, l = 0, 0
    if Z[1] > 0:
        r, l = Z[1], 1
    
    for k in range(2, len(s)):
        assert Z[k] == 0
        if k > r:
            # Case 1
            for i in range(k, len(s)):
                if s[i] == s[i-k]:
                    Z[k] += 1
                else:
                    break
            r, l = k + Z[k] - 1, k
        else:
            # Case 2
            # Calculate length of beta
            nbeta = r - k + 1
            Zkp = Z[k - l]
            if nbeta > Zkp:
                # Case 2a: Zkp wins
                Z[k] = Zkp
            else:
                # Case 2b: Compare characters just past r
                nmatch = 0
                for i in range(r+1, len(s)):
                    if s[i] == s[i - k]:
                        nmatch += 1
                    else:
                        break
                l, r = k, r + nmatch
                Z[k] = r - k + 1
    return Z
In [2]:
z('abracadabra')
#  abracadabra (11)
#     a        (1)
#       a      (1)
#         abra (4)
#            a (1)
Out[2]:
[11, 0, 0, 1, 0, 1, 0, 4, 0, 0, 1, 0]
In [3]:
z('aaaaa')
Out[3]:
[5, 4, 3, 2, 1, 0]
In [4]:
def zMatch(p, t):
    s = p + "$" + t
    Z = z(s)
    occurrences = []
    for i in range(len(p) + 1, len(s)):
        if Z[i] >= len(p):
            occurrences.append(i - (len(p) + 1))
    return occurrences
In [5]:
zMatch('needle', 'haystack needle haystack')
#                 012345678901234567890123
#                           1         2
Out[5]:
[9]
In [6]:
zMatch('needle', 'haystack needle needle')
#                 0123456789012345678901
#                           1         2
Out[6]:
[9, 16]