from pylab import * M = rand(5,5) sum(M,axis=0) M /= sum(M,axis=0)[newaxis,:] sum(M,axis=0) state = zeros(5) state[0] = 1 dot (M,state) v = add.accumulate(M[:,0]) def binsearch(a,x,lo,hi): while lox: hi = mid else: return mid assert lo==hi return lo binsearch(v,0.9,0,len(M)) def rsample(dist): assert amin(dist)>=0.0 and amax(dist)<=1.0 v = add.accumulate(dist) assert abs(v[-1]-1.0)<1e-6 val = rand() result = binsearch(v,val,0,len(v)) return result def sample_chain(M,state,N): result = [state] for i in range(N): state = rsample(M[:,state]) result.append(state) return result sample_chain(M,0,10) M.shape s = """Humpty Dumpty sat on a wall, Humpty Dumpty had a great fall; All the King's horses and all the King's men Couldn't put Humpty together again""".lower() import re words = re.split(r'\W+',s) words[:10] from collections import Counter bigrams = Counter() for i in range(len(words)-1): bigrams[tuple(words[i:i+2])] += 1 bigrams sorted(bigrams.items()) p = zeros(5) p[0] = 1.0 for i in range(10): print p p = dot(M,p)