In [1]:
def rotations(t):
    ''' Return list of rotations of input string t '''
    tt = t * 2
    return [ tt[i:i+len(t)] for i in range(0, len(t)) ]

def bwm(t):
    ''' Return lexicographically sorted list of t’s rotations '''
    return sorted(rotations(t))

def bwtViaBwm(t):
    ''' Given T, returns BWT(T) by way of the BWM '''
    return ''.join(map(lambda x: x[-1], bwm(t)))
In [2]:
t = 'abaaba$'
b = bwtViaBwm(t)
b
Out[2]:
'abba$aa'
In [3]:
def rankBwt(bw):
    ''' Given BWT string bw, return parallel list of B-ranks.  Also
        returns tots: map from character to # times it appears. '''
    tots = dict()
    ranks = []
    for c in bw:
        if c not in tots:
            tots[c] = 0
        ranks.append(tots[c])
        tots[c] += 1
    return ranks, tots
In [4]:
ranks, tots = rankBwt(b)
print zip(b, ranks) # print characters of BWT(T) in order, along with rank
[('a', 0), ('b', 0), ('b', 1), ('a', 1), ('$', 0), ('a', 2), ('a', 3)]
In [5]:
def firstCol(tots):
    ''' Return map from character to the range of rows prefixed by
        the character. '''
    first = {}
    totc = 0
    for c, count in sorted(tots.iteritems()):
        first[c] = (totc, totc + count)
        totc += count
    return first
In [6]:
firstCol(tots)
Out[6]:
{'$': (0, 1), 'a': (1, 5), 'b': (5, 7)}
In [7]:
# confirm that the representation of the first column above is sensible
print('\n'.join(bwm(t)))
$abaaba
a$abaab
aaba$ab
aba$aba
abaaba$
ba$abaa
baaba$a
In [8]:
def reverseBwt(bw):
    ''' Make T from BWT(T) '''
    ranks, tots = rankBwt(bw)
    first = firstCol(tots)
    rowi = 0 # start in first row
    t = '$' # start with rightmost character
    while bw[rowi] != '$':
        c = bw[rowi]
        t = c + t # prepend to answer
        # jump to row that starts with c of same rank
        rowi = first[c][0] + ranks[rowi]
    return t
In [9]:
reverseBwt(b)
Out[9]:
'abaaba$'
In [10]:
reverseBwt(bwtViaBwm('In_the_jingle_jangle_morning$'))
Out[10]:
'In_the_jingle_jangle_morning$'