In [1]:
import (
"bytes"
"fmt"
)

In [2]:
func zArray(s string) []int {
// Use Z-algorithm to preprocess given string.  See
// Gusfield for complete description of algorithm.
Z := make([]int, len(s)+1)
Z[0] = len(s)

// Initial comparison of s[1:] with prefix
for i := 1; i < len(s); i++ {
if s[i] == s[i-1] {
Z[1] += 1
} else {
break
}
}

r, l := 0, 0
if Z[1] > 0 {
r, l = Z[1], 1
}

for k := 2; k < len(s); k++ {
if k > r {
// Case 1
for i := k; i < len(s); i++ {
if s[i] == s[i-k] {
Z[k] += 1
} else {
break
}
}
r, l = k + Z[k] - 1, k
} else {
// Case 2
// Calculate length of beta
nbeta := r - k + 1
Zkp := Z[k - l]
if nbeta > Zkp {
// Case 2a: Zkp wins
Z[k] = Zkp
} else {
// Case 2b: Compare characters just past r
nmatch := 0
for i := r+1; i < len(s); i++ {
if s[i] == s[i - k] {
nmatch += 1
} else {
break
}
}
l, r = k, r + nmatch
Z[k] = r - k + 1
}
}
}
return Z
}

In [3]:
// Helper function that returns a new string
// that is the reverse of the argument
func reverseString(s string) string {
var buf bytes.Buffer
for i := 0; i < len(s); i++ {
buf.WriteByte(s[len(s)-i-1])
}
return buf.String()
}

// Helper function that returns a new int slice
// that is the reverse of the argument
func reverseInts(is []int) []int {
buf := make([]int, len(is))
for i := 0; i < len(is); i++ {
buf[len(is)-i-1] = is[i]
}
return buf
}

In [4]:
// Make N array (Gusfield theorem 2.2.2) from Z array
func nArray(s string) []int {
return reverseInts(zArray(reverseString(s)))
}

In [5]:
// Make L' array (Gusfield theorem 2.2.2) using p and N array.
// L'[i] = largest index j less than n such that N[j] = |P[i:]|
func bigLPrimeArray(length int, n []int) []int {
lp := make([]int, length)
for j := 0; j < length-1; j++ {
i := length - n[j]
if i < length {
lp[i] = j + 1
}
}
return lp
}

In [6]:
// Compile L array (Gusfield theorem 2.2.2) using p and L' array.
// L[i] = largest index j less than n such that N[j] >= |P[i:]|
func bigLArray(length int, lp []int) []int {
l := make([]int, length)
l[1] = lp[1]
for i := 2; i < length; i++ {
l[i] = l[i-1]
if lp[i] > l[i] {
l[i] = lp[i]
}
}
return l
}

In [7]:
// Compile lp' array (Gusfield theorem 2.2.4) using N array
func smallLPrimeArray(n []int) []int {
smallLps := make([]int, len(n))
for i := 0; i < len(n); i++ {
if n[i] == i+1 {  // prefix matching a suffix
smallLps[len(n)-i-1] = i+1
}
}
for i := len(n)-2; i > -1; i-- {  // "smear" them out to the left
if smallLps[i] == 0 {
smallLps[i] = smallLps[i+1]
}
}
return smallLps
}

In [8]:
// Return tables needed to apply good suffix rule
func goodSuffixTable(p string) ([]int, []int) {
n := nArray(p)
lp := bigLPrimeArray(len(p), n)
return bigLArray(len(p), lp), smallLPrimeArray(n)
}

In [9]:
// Given pattern string and list with ordered alphabet characters, create
// and return a dense bad character table.  Table is indexed by offset
// then by character.
func denseBadCharTab(p string, amap map[byte]int) [][]int {
tab := make([][]int, len(p))
nxt := make([]int, len(amap))
for i, c := range []byte(p) {
tab[i] = nxt[:]
nxt[amap[c]] = i+1
}
return tab
}

In [10]:
// Boyer-Moore preprocessing object
type BoyerMoore struct {
amap                map[byte]int
bigLS, smallLPrimes []int
}

// Constructor for Boyer-Moore preprocessing object
func NewBoyerMoore(p string, alphabet string) *BoyerMoore {
m := new(BoyerMoore)
m.amap = make(map[byte]int)
for i, c := range []byte(alphabet) {
m.amap[c] = i
}
m.bigLS, m.smallLPrimes = goodSuffixTable(p)
return m
}

// Return # skips given by bad character rule at offset i
func (bm *BoyerMoore) badCharacterRule(i int, c byte) int {
ci := bm.amap[c]
return i - (bm.badChars[i][ci]-1)
}

// Given a mismatch at offset i, return amount to shift
// as determined by (weak) good suffix rule.
func (bm *BoyerMoore) goodSuffixRule(i int) int {
length := len(bm.bigLS)
if i == length - 1 {
return 0
}
i += 1   // i points to leftmost matching position of P
if bm.bigLS[i] > 0 {
return length - bm.bigLS[i]
}
return length - bm.smallLPrimes[i]
}

// Return amount to shift in case where P matches T
func (bm *BoyerMoore) matchSkip() int {
return len(bm.smallLPrimes) - bm.smallLPrimes[1]
}

In [11]:
// Utility function for taking max of integers
func Max(x, y int) int {
if x > y {
return x
}
return y
}

In [12]:
// Do Boyer-Moore matching
func boyerMoore(p string, bm *BoyerMoore, t string) []int {
i := 0
occurrences := make([]int, 0)
for i < len(t)-len(p)+1 {
shift, skipGs := 1, 1
mismatched := false
for j := len(p) - 1; j >= 0; j-- {
if p[j] != t[i+j] {
skipBc := bm.badCharacterRule(j, t[i+j])
skipGs = bm.goodSuffixRule(j)
shift = Max(shift, Max(skipBc, skipGs))
mismatched = true
break
}
}
if ! mismatched {
occurrences = append(occurrences, i)
skipGs := Max(shift, skipGs)
shift = Max(shift, skipGs)
}
i += shift
}
return occurrences
}

In [13]:
t := "haystack needle haystack"  // "text" - thing we search in
p := "needle"                    // "pattern" thing we search for

In [14]:
bm := NewBoyerMoore(p, "abcdefghijklmnopqrstuvwxyz")
boyerMoore(p, bm, t)

Out[14]:
[9]
In [15]:
t[9:9+len(p)]

Out[15]:
needle
In [16]:
boyerMoore(p, bm, "needleneedleneedle")

Out[16]:
[0 6 12]