%matplotlib inline from __future__ import division from IPython.html.widgets import interact,fixed from scipy import stats import sympy from sympy import S, init_printing, Sum, summation,expand from sympy import stats as st from sympy.abc import k import re sympy.plot( k*(1-k),(k,0,1),ylabel='$n^2 \mathbb{V}$',xlabel='p',fontsize=28) b=stats.bernoulli(p=.8) samples=b.rvs(100) print var(samples) print mean(samples) def slide_figure(n=100,m=1000,p=0.8): fig,ax=subplots() ax.axis(ymax=25) b=stats.bernoulli(p=p) v=iter(b.rvs,2) samples=b.rvs(n*1000) tmp=(samples.reshape(n,-1).mean(axis=0)) ax.hist(tmp,normed=1) ax1=ax.twinx() ax1.axis(ymax=25) ax1.plot(linspace(0,1,200),stats.norm(mean(tmp),std(tmp)).pdf(linspace(0.0,1,200)),lw=3,color='r') interact(slide_figure, n=(100,500),p=(0.01,1,.05),m=fixed(500)); from sympy.abc import p,n,k sympy.plot(st.density(st.Beta('p',3,3))(p),(p,0,1) ,xlabel='p') obj=sympy.expand_log(sympy.log(p**k*(1-p)**(n-k) * st.density(st.Beta('p',6,6))(p))) sol=sympy.solve(sympy.simplify(sympy.diff(obj,p)),p)[0] print sol n=5 def show_bias(n=30): sympy.plot(p,(5+n*p)/(n+10),(p,0,1),aspect_ratio=1,title='more samples reduce bias') interact(show_bias,n=(10,500,10)); sum(sympy.var('x1:10')) expr=((5+(sum(sympy.var('x1:10'))))/(n+10)) def apply_exp(expr): tmp=re.sub('x[\d]+\*\*2','p',str(expand(expr))) tmp=re.sub('x[\d]+\*x[\d]+','p**2',tmp) tmp=re.sub('x[\d]+','p',tmp) return sympy.sympify(tmp) ex2 = apply_exp(expr**2) print ex2 tmp=sympy.simplify(ex2 - (apply_exp(expr))**2 ) sympy.plot(tmp,p*(1-p)/10,(p,0,1)) def generate_expr(num_samples=10,alpha=6): n = sympy.symbols('n') obj=sympy.expand_log(sympy.log(p**k*(1-p)**(n-k) * st.density(st.Beta('p',alpha,alpha))(p))) sol=sympy.solve(sympy.simplify(sympy.diff(obj,p)),p)[0] expr=sol.replace(k,(sum(sympy.var('x1:%d'%(num_samples))))) expr=expr.subs(n,num_samples) ex2 = apply_exp(expr**2) ex = apply_exp(expr) return (ex,sympy.simplify(ex2-ex**2)) num_samples=10 X_bias,X_v = generate_expr(num_samples,alpha=2) p1=sympy.plot(X_v,(p,0,1),show=False,line_color='b',ylim=(0,.03),xlabel='p') X_bias,X_v = generate_expr(num_samples,alpha=6) p2=sympy.plot(X_v,(p,0,1),show=False,line_color='r',xlabel='p') p3=sympy.plot(p*(1-p)/num_samples,(p,0,1),show=False,line_color='g',xlabel='p') p1.append(p2[0]) p1.append(p3[0]) p1.show() p1=sympy.plot(n*(1-p)*p/(n+10)**2,(p,0,1),show=False,line_color='b',ylim=(0,.05),xlabel='p',ylabel='variance') p2=sympy.plot((1-p)*p/n,(p,0,1),show=False,line_color='r',ylim=(0,.05),xlabel='p') p1.append(p2[0]) p1.show() def show_variance(n=5): p1=sympy.plot(n*(1-p)*p/(n+10)**2,(p,0,1),show=False,line_color='b',ylim=(0,.05),xlabel='p',ylabel='variance') p2=sympy.plot((1-p)*p/n,(p,0,1),show=False,line_color='r',ylim=(0,.05),xlabel='p') p1.append(p2[0]) p1.show() interact(show_variance,n=(10,120,2)); pv=.60 nsamp=30 fig,ax=subplots() rv = stats.bernoulli(pv) map_est=(rv.rvs((nsamp,1000)).sum(axis=0)+5)/(nsamp+10); ml_est=(rv.rvs((nsamp,1000)).sum(axis=0))/(nsamp); _,bins,_=ax.hist(map_est,bins=20,alpha=.3,normed=True,label='MAP'); ax.hist(ml_est,bins=20,alpha=.3,normed=True,label='ML'); ax.vlines(map_est.mean(),0,12,lw=3,linestyle=':',color='b') ax.vlines(pv,0,12,lw=3,color='r',linestyle=':') ax.vlines(ml_est.mean(),0,12,lw=3,color='g',linestyle=':') ax.legend()