%matplotlib inline from IPython.html.widgets import interact from scipy import stats import seaborn as sns import pandas as pd n0=stats.norm(0,1) n1=stats.norm(1,2) xi = linspace(-5,5,100) fig,ax=subplots() ax.plot(xi,n0.pdf(xi)) ax.plot(xi,n1.pdf(xi)) def bias_coin(phead = .5): while True: yield int( np.random.rand() < phead ) pct_mixed = 0.1 bias_coin_gen = bias_coin(pct_mixed) dual_set = [n0,n1] samples = [ dual_set[bias_coin_gen.next()].rvs() for i in range(500) ] hist(samples,bins=20) title('average = %3.3f, median=%3.3f pct_mixed=%3.3f'%(mean(samples),np.median(samples),pct_mixed)) import sympy.stats from sympy.abc import x eps = sympy.symbols('epsilon') mixed_cdf = sympy.stats.cdf(sympy.stats.Normal('x',0,1),'x')(x)*(1-eps) + eps*sympy.stats.cdf(sympy.stats.Normal('x',1,2),'x')(x) mixed_pdf = sympy.diff(mixed_cdf,x) def plot_mixed_dist(epsilon=.1): n1 = stats.norm(1,2) xi = linspace(-5,5,100) fig,ax = subplots() ax.plot(xi,[sympy.lambdify(x,mixed_pdf.subs(eps,epsilon))(i) for i in xi],label='mixed',lw=2) ax.plot(xi,n0.pdf(xi),label='g(x)',linestyle='--') ax.plot(xi,n1.pdf(xi),label='h(x)',linestyle='--') ax.legend(loc=0) ax.set_title('epsilon = %2.2f'%(epsilon)) ax.vlines(0,0,.4,linestyle='-',color='g') ax.vlines(epsilon,0,.4,linestyle='-',color='b') interact(plot_mixed_dist,epsilon=(0,1,.05)) fig,ax=subplots() colors=['b','r'] for k in [1,2]: ax.plot(xi,np.ma.masked_array(xi,abs(xi)>k),color=colors[k-1]) ax.plot(xi,np.ma.masked_array(np.sign(xi)*k,abs(xi) k > 0]^2') numer=mma.eval('Integrate[lpdf*Piecewise[{{x, Abs[x] < k},{k*Sign[x],Abs[x] >= k}}]^2, {x, -Infinity, Infinity}, Assumptions -> k > 0]') def asymp_variance(kval,eps=0.01): mma.push('k',kval) mma.push('Epsilon',eps) mma.eval('numer ='+numer) # used closure string mma.eval('denom ='+denom) mma.eval('Clear[k,Epsilon]') return float(mma.eval('N[numer/denom]')) return asymp_variance asympt_var_case1 = closure_variance((0,1),(1,2)) # case 1 with N(0,1) + N(1,2) fig,ax=subplots() ax.plot(kvals,[asympt_var_case1(k,0) for k in kvals],'-o',label='eps=0') ax.plot(kvals,[asympt_var_case1(k,.05) for k in kvals],'-o',label='eps=.05') ax.plot(kvals,[asympt_var_case1(k,.1) for k in kvals],'-o',label='eps=0.1') ax.set_xlabel("k") ax.set_ylabel("relative asymptotic efficiency ") ax.legend(loc=0) ax.set_title(r"$\mathcal{N}(0,1) , \mathcal{N}(1,2)$ mixed",fontsize=18) ax.grid() asympt_var_case2 = closure_variance((0,0),(1,10)) # case 1 with N(0,1) + N(0,10) fig2,ax=subplots() ax.plot(kvals,[asympt_var_case2(k,0) for k in kvals],'-o',label='eps=0') ax.plot(kvals,[asympt_var_case2(k,.05) for k in kvals],'-o',label='eps=.05') ax.plot(kvals,[asympt_var_case2(k,.1) for k in kvals],'-o',label='eps=0.1') ax.set_xlabel("k") ax.set_ylabel("relative asymptotic efficiency ") ax.legend(loc=0) ax.set_title(r"$\mathcal{N}(0,1) , \mathcal{N}(0,10)$ mixed",fontsize=18) ax.grid() fig.get_axes()[0].axis(ymax=3.5) fig nsamples = 200 xs = np.array([dual_set[bias_coin_gen.next()].rvs() for i in range(200)*nsamples ]).reshape(nsamples,-1) fig,ax=subplots() ax.hist(np.mean(xs,0),20,alpha=0.8,label = 'mean') ax.hist(np.median(xs,0),20,alpha=0.3,label ='median') ax.legend() fig,ax=subplots() sns.violinplot(np.vstack([np.median(xs,axis=0),np.mean(xs,axis=0)]).T,ax=ax,names=['median','mean']); mma.push('data',xs[:,0].tolist()) mma.eval('psi[k_] := Function[x, Piecewise[{{x, Abs[x] < k}, {k*Sign[x], Abs[x] > k}}]]') def psi_est(k,x): if x.ndim ==2 : # loop over columns out = [] for i in range(x.shape[1]): data = x[:,i] out.append( psi_est(k,data) ) # recurse return np.array(out) else: mma.push('data',x.tolist()) return float(mma.eval('\[Mu] /. FindRoot[Plus @@ (psi[%d] /@ (data - \[Mu])) == 0, {\[Mu], 0}]'%(k))) hist(psi_est(1,xs),alpha=.7,label='k=1') hist(psi_est(2,xs),alpha=.7,label='k=2') hist(psi_est(3,xs),alpha=.7,label='k=3') legend(loc=0) huber_est={k:psi_est(k,xs) for k in [1,1.5,2,3]} huber_est[0] = np.median(xs,axis=0) huber_est[4] = np.mean(xs,axis=0) fig,ax=subplots() sns.violinplot(pd.DataFrame(huber_est),ax=ax) ax.set_xticklabels(['median','1','1.5','2','3','mean']); from sympy import mpmath, symbols, diff, Piecewise, sign, lambdify from sympy.stats import density, cdf, Normal from sympy.abc import k,x eps = symbols('epsilon') lpdf=diff(cdf(Normal('x',0,1))(x)*(1-eps)+ eps*cdf(Normal('x',0,10))(x),x) p = Piecewise((x,abs(x)