#!/usr/bin/env python # coding: utf-8 # In[1]: from bisect import bisect_left, bisect_right # for binary search import random random.seed(77) # so that you and I get same random lists # In[2]: a = [1, 2, 3, 3, 3, 4, 5] bisect_left(a, 3), bisect_right(a, 3) # In[3]: a = [2, 4, 6, 8, 10] bisect_left(a, 5), bisect_right(a, 5) # In[4]: # make a list of pseudo-random integers in [1, 99] ls = [ random.randint(1, 99) for _ in range(30) ] ls # show it # In[5]: ls.sort() # sort the list in-place ls # show the sorted list; note there are some repeated elements # In[6]: # The number 40 is not in the sorted list. If it were, where would it go? bisect_left(ls, 40) # In[7]: # insert it ls.insert(13, 40) ls # show the new list # In[8]: # The number 19 appears in the list multiple times. # Say we want a range [st, en) where st is the first element # (inclusive) and en is last element (exclusive) that contains a 19 st, en = bisect_left(ls, 19), bisect_right(ls, 19) # In[9]: st, en # In[10]: ls[st:en] # In[11]: # Of course, there also exists a total ordering over strings # (lexicographical ordering), so we can do all the same things # with strings strls = ['a', 'awkward', 'awl', 'awls', 'axe', 'axes', 'bee'] # In[12]: strls == sorted(strls) # In[13]: # We want to get the range of elements that equal a query string: st, en = bisect_left(strls, 'awl'), bisect_right(strls, 'awl') st, en # In[14]: # Say we want to get the range of elements that have some prefix, # e.g. 'aw' and say that 'z' is the lexicographically greatest # character in the alphabet. st, en = bisect_left(strls, 'aw'), bisect_left(strls, 'ax') st, en # In[15]: strls[st:en] # In[16]: # If we can do it for any sorted list of strings, we can do it for # a sorted list of suffixes t = 'abaaba$' suffixes = sorted([t[i:] for i in range(len(t))]) suffixes # In[17]: st, en = bisect_left(suffixes, 'aba'), bisect_left(suffixes, 'abb') st, en