from bisect import bisect_left, bisect_right # for binary search
import random
random.seed(77) # so that you and I get same random lists
a = [1, 2, 3, 3, 3, 4, 5]
bisect_left(a, 3), bisect_right(a, 3)
(2, 5)
a = [2, 4, 6, 8, 10]
bisect_left(a, 5), bisect_right(a, 5)
(2, 2)
# make a list of pseudo-random integers in [1, 99]
ls = [ random.randint(1, 99) for _ in range(30) ]
ls # show it
[80, 33, 24, 82, 12, 48, 56, 61, 15, 63, 50, 85, 3, 95, 18, 81, 51, 83, 19, 32, 9, 78, 53, 21, 19, 9, 20, 56, 44, 70]
ls.sort() # sort the list in-place
ls # show the sorted list; note there are some repeated elements
[3, 9, 9, 12, 15, 18, 19, 19, 20, 21, 24, 32, 33, 44, 48, 50, 51, 53, 56, 56, 61, 63, 70, 78, 80, 81, 82, 83, 85, 95]
# The number 40 is not in the sorted list. If it were, where would it go?
bisect_left(ls, 40)
13
# insert it
ls.insert(13, 40)
ls # show the new list
[3, 9, 9, 12, 15, 18, 19, 19, 20, 21, 24, 32, 33, 40, 44, 48, 50, 51, 53, 56, 56, 61, 63, 70, 78, 80, 81, 82, 83, 85, 95]
# The number 19 appears in the list multiple times.
# Say we want a range [st, en) where st is the first element
# (inclusive) and en is last element (exclusive) that contains a 19
st, en = bisect_left(ls, 19), bisect_right(ls, 19)
st, en
(6, 8)
ls[st:en]
[19, 19]
# Of course, there also exists a total ordering over strings
# (lexicographical ordering), so we can do all the same things
# with strings
strls = ['a', 'awkward', 'awl', 'awls', 'axe', 'axes', 'bee']
strls == sorted(strls)
True
# We want to get the range of elements that equal a query string:
st, en = bisect_left(strls, 'awl'), bisect_right(strls, 'awl')
st, en
(2, 3)
# Say we want to get the range of elements that have some prefix,
# e.g. 'aw' and say that 'z' is the lexicographically greatest
# character in the alphabet.
st, en = bisect_left(strls, 'aw'), bisect_left(strls, 'ax')
st, en
(1, 4)
strls[st:en]
['awkward', 'awl', 'awls']
# If we can do it for any sorted list of strings, we can do it for
# a sorted list of suffixes
t = 'abaaba$'
suffixes = sorted([t[i:] for i in range(len(t))])
suffixes
['$', 'a$', 'aaba$', 'aba$', 'abaaba$', 'ba$', 'baaba$']
st, en = bisect_left(suffixes, 'aba'), bisect_left(suffixes, 'abb')
st, en
(3, 5)