import ( "fmt" "sort" ) // Let's make a class that implements an inverted (substring) // index where the map data structure is a sorted list of // (substring, offset) pairs. Query w/ binary search. type IndexItem struct { mer string offset int } type IndexSorted struct { length, interval int index []IndexItem } // Constructor for Boyer-Moore preprocessing object func NewIndexSorted(t string, ln, ival int) *IndexSorted { idx := new(IndexSorted) idx.length = ln idx.interval = ival // add all k-mers to the index for i := 0; i < len(t) - ln + 1; i++ { idx.index = append(idx.index, IndexItem{mer: t[i:i+ln], offset:i}) } // sort the index, first by k-mer (alphabetically) then by offset sort.Slice(idx.index, func(i, j int) bool { if idx.index[i].mer != idx.index[j].mer { return idx.index[i].mer < idx.index[j].mer } else { return idx.index[i].offset < idx.index[j].offset } }) return idx } func (idx *IndexSorted) query(p string) []int { qu := p[:idx.length] // sort.Search does binary search, returning first // element for which function returns true st := sort.Search(len(idx.index), func(i int) bool { return qu <= idx.index[i].mer }) en := sort.Search(len(idx.index), func(i int) bool { return qu < idx.index[i].mer }) ret := make([]int, 0) for i := st; i < en; i++ { ret = append(ret, idx.index[i].offset) } return ret } index := NewIndexSorted("CGTGCCTACTTACTTACAT", 5, 4) index.query("CTTACTTA") index.query("TTTTTTTT") // Now let's make a similar class but using a map instead of a sorted // list. A Go map is basically a hash table. type IndexHash struct { length, interval int index map[string][]int } func NewIndexHash(t string, ln, ival int) *IndexHash { idx := new(IndexHash) idx.length = ln idx.interval = ival idx.index = make(map[string][]int) // add all k-mers to the index (map) for i := 0; i < len(t) - ln + 1; i++ { mer := t[i:i+ln] idx.index[mer] = append(idx.index[mer], i) } return idx } func (idx *IndexHash) query(p string) []int { qu := p[:idx.length] // No need for binary search here; just a lookup return idx.index[qu] } index := NewIndexHash("CGTGCCTACTTACTTACAT", 5, 4) index.query("CTTACTTA") index.query("TTTTTTTT")