%matplotlib inline from __future__ import division from scipy.stats import bernoulli import numpy as np p_true=1/2 # this is the value we will try to estimate from the observed data fp=bernoulli(p_true) def sample(n=10): 'simulate coin flipping' return fp.rvs(n)# flip it n times xs = sample(100) # generate some samples import sympy from sympy.abc import x, z p=sympy.symbols('p',positive=True) L=p**x*(1-p)**(1-x) J=np.prod([L.subs(x,i) for i in xs]) # objective function to maximize logJ=sympy.expand_log(sympy.log(J)) sol=sympy.solve(sympy.diff(logJ,p),p)[0] x=linspace(0,1,100) plot(x,map(sympy.lambdify(p,logJ,'numpy'),x),sol,logJ.subs(p,sol),'o', p_true,logJ.subs(p,p_true),'s',) xlabel('$p$',fontsize=18) ylabel('Likelihood',fontsize=18) title('Estimate not equal to true value',fontsize=18) def estimator_gen(niter=10,ns=100): 'generate data to estimate distribution of maximum likelihood estimator' out=[] x=sympy.symbols('x',real=True) L= p**x*(1-p)**(1-x) for i in range(niter): xs = sample(ns) # generate some samples from the experiment J=np.prod([L.subs(x,i) for i in xs]) # objective function to maximize logJ=sympy.expand_log(sympy.log(J)) sol=sympy.solve(sympy.diff(logJ,p),p)[0] out.append(float(sol.evalf())) return out if len(out)>1 else out[0] # return scalar if list contains only 1 term etries = estimator_gen(100) # this may take awhile, depending on how much data you want to generate hist(etries) # histogram of maximum likelihood estimator title('$\mu=%3.3f,\sigma=%3.3f$'%(mean(etries),std(etries)),fontsize=18) import scipy.stats b=scipy.stats.binom(100,.5) # n=100, p = 0.5, distribution of the estimator \hat{p} f,ax= subplots() ax.stem(arange(0,101),b.pmf(arange(0,101))) # heres the density of the sum of x_i g = lambda i:b.pmf(arange(-i,i)+50).sum() # symmetric sum the probability around the mean print 'this is pretty close to 0.95:%r'%g(10) ax.vlines( [50+10,50-10],0 ,ax.get_ylim()[1] ,color='r',lw=3.) b=scipy.stats.bernoulli(.5) # coin distribution xs = b.rvs(100) # flip it 100 times phat = mean(xs) # estimated p print abs(phat-0.5) < 0.5*0.20 # did I make it w/in interval 95% of the time? out=[] b=scipy.stats.bernoulli(.5) # coin distribution for i in range(500): # number of tries xs = b.rvs(100) # flip it 100 times phat = mean(xs) # estimated p out.append(abs(phat-0.5) < 0.5*0.20 ) # within 20% print 'Percentage of tries within 20 interval = %3.2f'%(100*sum(out)/float(len(out) ))