In [1]:
def rotations(t):
    # Return list of rotations of input string t
    tt = t * 2
    return [ tt[i:i+len(t)] for i in range(len(t)) ]
In [2]:
rotations('cat')
Out[2]:
['cat', 'atc', 'tca']
In [3]:
def bwm(t):
    # Return lexicographically sorted list of t’s rotations
    return sorted(rotations(t))
In [4]:
bwm('abaaba$')
Out[4]:
['$abaaba', 'a$abaab', 'aaba$ab', 'aba$aba', 'abaaba$', 'ba$abaa', 'baaba$a']
In [5]:
print('\n'.join(bwm('abaaba$')))
$abaaba
a$abaab
aaba$ab
aba$aba
abaaba$
ba$abaa
baaba$a
In [6]:
def bwtViaBwm(t):
    # Given T, returns BWT(T) by way of the BWM
    return ''.join(map(lambda x: x[-1], bwm(t)))
In [7]:
bwtViaBwm('abaaba$') # we can see the result equals the last column of the matrix above
Out[7]:
'abba$aa'
In [8]:
def suffixArray(s):
    satups = sorted([(s[i:], i) for i in range(len(s))])
    return map(lambda x: x[1], satups)

def bwtViaSa(t):
    # Given T, returns BWT(T) by way of the suffix array
    bw = []
    for si in suffixArray(t):
        if si == 0:
            bw.append('$')
        else:
            bw.append(t[si-1])
    return ''.join(bw) # return string-ized version of list bw
In [9]:
bwtViaBwm('abaaba$'), bwtViaSa('abaaba$') # same result
Out[9]:
('abba$aa', 'abba$aa')