In [1]:
// A simple example of a substring index; mirrors example from lecture notes

// we're going to extract 4 substrings like this:
// t:           CGTGCCTACTTACTTACAT
// substring 1: CGTGC
// substring 2:     CCTAC
// substring 3:         CTTAC
// substring 4:             CTTAC
t := "CGTGCCTACTTACTTACAT"
In [2]:
// From t, make list of pairs, where first pair item is substring, second is its offset
func substringize(t string, ln int, iv int) ([]string, []int) {
    // ln = length of substrings to extract
    // iv = distance between substings to extract; e.g. 1 means take *every* substring
    strings := make([]string, 0)
    offsets := make([]int, 0)
    for i := 0; i < len(t) - ln + 1; i += iv {
        strings = append(strings, t[i:i+ln])
        offsets = append(offsets, i)
    }
    return strings, offsets
}
In [3]:
substringize("CGTGCCTACTTACTTACAT", 5, 4)
Out[3]:
[CGTGC CCTAC CTTAC CTTAC] [0 4 8 12]
In [4]:
// Like substringize, but uses a map data structure
func mapize(t string, ln int, iv int) map[string][]int {
    index := make(map[string][]int)
    for i := 0; i < len(t) - ln + 1; i += iv {
        sub := t[i:i+ln]
        index[sub] = append(index[sub], i)
    }
    return index
}
In [5]:
index := mapize("CGTGCCTACTTACTTACAT", 5, 4)
index
Out[5]:
map[CGTGC:[0] CCTAC:[4] CTTAC:[8 12]]
In [6]:
p := "CTTACTTA"
In [7]:
// index: give me a hint where I should look for occurrences of p in t
elem, ok := index[p[:5]]
elem
Out[7]:
[8 12]