#!/usr/bin/env python # coding: utf-8 # In[1]: def rotations(t): ''' Return list of rotations of input string t ''' tt = t * 2 return [ tt[i:i+len(t)] for i in range(0, len(t)) ] def bwm(t): ''' Return lexicographically sorted list of t’s rotations ''' return sorted(rotations(t)) def bwtViaBwm(t): ''' Given T, returns BWT(T) by way of the BWM ''' return ''.join(map(lambda x: x[-1], bwm(t))) # In[2]: t = 'abaaba$' b = bwtViaBwm(t) b # In[3]: def rankBwt(bw): ''' Given BWT string bw, return parallel list of B-ranks. Also returns tots: map from character to # times it appears. ''' tots = dict() ranks = [] for c in bw: if c not in tots: tots[c] = 0 ranks.append(tots[c]) tots[c] += 1 return ranks, tots # In[4]: ranks, tots = rankBwt(b) print zip(b, ranks) # print characters of BWT(T) in order, along with rank # In[5]: def firstCol(tots): ''' Return map from character to the range of rows prefixed by the character. ''' first = {} totc = 0 for c, count in sorted(tots.items()): first[c] = (totc, totc + count) totc += count return first # In[6]: firstCol(tots) # In[7]: # confirm that the representation of the first column above is sensible print('\n'.join(bwm(t))) # In[8]: def reverseBwt(bw): ''' Make T from BWT(T) ''' ranks, tots = rankBwt(bw) first = firstCol(tots) rowi = 0 # start in first row t = '$' # start with rightmost character while bw[rowi] != '$': c = bw[rowi] t = c + t # prepend to answer # jump to row that starts with c of same rank rowi = first[c][0] + ranks[rowi] return t # In[9]: reverseBwt(b) # In[10]: reverseBwt(bwtViaBwm('In_the_jingle_jangle_morning$'))