# Not a great way to build a suffix array, but we'll use it
# for the small examples here
def naiveBuildSA(t):
satups = sorted([(t[i:], i) for i in range(len(t))])
return list(map(lambda x: x[1], satups))
naiveBuildSA('abaaba$') # works on a simple example
[6, 5, 2, 3, 0, 4, 1]
def binarySearchSA(t, sa, p):
assert t[-1] == '$' # t already has terminator
assert len(t) == len(sa) # sa is the suffix array for t
if len(t) == 1: return 1
l, r = 0, len(sa) # invariant: sa[l] < p < sa[r]
while True:
c = (l + r) // 2
# determine whether p < T[sa[c]:] by doing comparisons
# starting from left-hand sides of p and T[sa[c]:]
plt = True # assume p < T[sa[c]:] until proven otherwise
i = 0
while i < len(p) and sa[c]+i < len(t):
if p[i] < t[sa[c]+i]:
break # p < T[sa[c]:]
elif p[i] > t[sa[c]+i]:
plt = False
break # p > T[sa[c]:]
i += 1 # tied so far
if plt:
if c == l + 1: return c
r = c
else:
if c == r - 1: return r
l = c
t = 'abaaba$'
sa = naiveBuildSA(t)
binarySearchSA(t, sa, 'aba')
3
binarySearchSA(t, sa, 'bb') # p is greater than all suffixes
7
binarySearchSA(t, sa, 'aa')
2
def suffixLcp(t, toff, p):
i = 0
while i < len(p) and i + toff < len(t):
if p[i] != t[i + toff]:
return i
i += 1
return i
suffixLcp('abaaba$', 0, 'aba')
3
suffixLcp('abaaba$', 0, 'abab')
3
suffixLcp('abaaba$', 0, 'abaabaaba')
6
def binarySearchSA_lcp1(t, sa, p):
assert t[-1] == '$' # t already has terminator
assert len(t) == len(sa) # sa is the suffix array for t
if len(t) == 1: return 1
l, r = 0, len(sa) # invariant: sa[l] < p < sa[r]
lcp_lp, lcp_rp = 0, 0
while True:
c = (l + r) // 2
# determine whether p < T[sa[c]:] by doing comparisons
# starting from left-hand sides of p and T[sa[c]:]
plt = True # assume p < T[sa[c]:] until proven otherwise
i = min(lcp_lp, lcp_rp)
while i < len(p) and sa[c]+i < len(t):
if p[i] < t[sa[c]+i]:
break # p < T[sa[c]:]
elif p[i] > t[sa[c]+i]:
plt = False
break # p > T[sa[c]:]
i += 1 # tied so far
if plt:
if c == l + 1: return c
r = c
lcp_rp = i
else:
if c == r - 1: return r
l = c
lcp_lp = i
binarySearchSA_lcp1(t, sa, 'aba')
3
binarySearchSA_lcp1(t, sa, 'bb')
7
binarySearchSA_lcp1(t, sa, 'aa')
2