import pylab from pylab import * from scipy import random from scipy.stats import distributions data1 = random.normal(loc=0.0,scale=1.0,size=3000) data2 = random.normal(loc=5.0,scale=2.0,size=7000) data = concatenate((data1,data2)) random.shuffle(data) n = len(data) _=hist(data,bins=100) def normal(x,params): mu = params[0] var = params[1] return exp(-(x-mu)**2/(2*var))/sqrt(2*pi*var) thetas = array([[0.0,1.0],[1.0,1.0]]) mixes = array([0.5,0.5]) ys = zeros((2,n)) for i in range(2): for j in range(n): ys[i,j] = mixes[i]*normal(data[j],thetas[i]) ys /= sum(ys,axis=0)[newaxis,:] ys[:,:5] mixes = sum(ys,axis=1)/n mixes mu0 = sum(data*ys[0])/sum(ys[0]) mu1 = sum(data*ys[1])/sum(ys[1]) print mu0,mu1 var0 = sum(data**2 * ys[0])/sum(ys[0]) - mu0**2 var1 = sum(data**2 * ys[1])/sum(ys[1]) - mu1**2 print var0,var1 thetas = array([[mu0,var0],[mu1,var1]]) def expectation_maximization(data): thetas = array([[0.0,1.0],[1.0,1.0]]) mixes = array([0.5,0.5]) for iter in range(100): ys = zeros((2,n)) for i in range(2): for j in range(n): ys[i,j] = mixes[i]*normal(data[j],thetas[i]) ys /= sum(ys,axis=0)[newaxis,:] mixes = sum(ys,axis=1)/n mu0 = sum(data*ys[0])/sum(ys[0]) mu1 = sum(data*ys[1])/sum(ys[1]) var0 = sum(data**2 * ys[0])/sum(ys[0]) - mu0**2 var1 = sum(data**2 * ys[1])/sum(ys[1]) - mu1**2 thetas = array([[mu0,var0],[mu1,var1]]) if iter%10==0: print "===",iter,"===" print mixes print thetas return mixes,thetas # running the EM steps multiple times (p0,p1),((m0,s0),(m1,s1)) = expectation_maximization(data) # final estimates print (p0,p1) print (m0,s0) print (m1,s1)