#!/usr/bin/env python # coding: utf-8 # # 1D gaussian toy model # In[1]: get_ipython().run_line_magic('pylab', 'inline') # In[2]: import seaborn as sns sns.set_style("whitegrid") sns.set_context("talk") rc('axes', labelsize=20, titlesize=20) # In[3]: import numpy as np import triangle from clerk.stats.distances import mahalanobis import abcpmc #np.random.seed(987654321) # # Data # In[4]: mean = 1 sigma = 1 n = 10000 y = np.random.normal(mean, sigma, n) # In[5]: sns.distplot(y) # # Setup # In[6]: prior = abcpmc.TophatPrior([-5], [5]) # In[7]: def mean_dist(x, y): return np.abs(np.mean(x, axis=0) - np.mean(y, axis=0)) dist = mean_dist # In[8]: def create_new_sample(theta): return np.random.normal(theta, sigma, n) postfn = create_new_sample # # Verification # In[9]: theta = prior() print theta x = postfn([mean]) d = dist(x, y) print d # In[10]: distances = [dist(y, postfn(mean)) for _ in range(1000)] sns.distplot(distances) # # ABC sampling with PMC # In[11]: def sample(T, eps_val, eps_min): abcpmc_sampler = abcpmc.Sampler(N=2000, Y=y, postfn=postfn, dist=dist, threads=8) eps = abcpmc.ConstEps(T, eps_val) pools = [] for pool in abcpmc_sampler.sample(prior, eps): print("T: {0}, eps: {1:>.4f}, ratio: {2:>.4f}".format(pool.t, eps(pool.t), pool.ratio)) for i, (mean, std) in enumerate(zip(*abcpmc.weighted_avg_and_std(pool.thetas, pool.ws, axis=0))): print(u" theta[{0}]: {1:>.4f} \u00B1 {2:>.4f}".format(i, mean,std)) eps.eps = np.percentile(pool.dists, 90) if eps.eps < eps_min: eps.eps = eps_min pools.append(pool) abcpmc_sampler.close() return pools # In[12]: T=38 eps=0.5 pools = sample(T, eps, sigma/sqrt(n)) # # Postprocessing # In[13]: def plot_pool(prob, samples): prob = np.array(prob).flatten() samples = np.vstack(samples) sns.distplot(prob) show() sns.distplot(samples, axlabel=r'$\theta$') #savefig("1d_gauss_posterior.pdf") show() sample_mean = np.mean(samples, axis=0) print(u"mean: {0:>.4f} \u00B1 {1:>.4f}".format(float(np.mean(samples, axis=0)), float(np.std(samples, axis=0)))) # In[14]: offset = 33 samples = np.array([pool.thetas for pool in pools]) distances = np.array([pool.dists for pool in pools]) plot_pool(distances[offset:], samples[offset:]) # In[15]: var_vals = np.var((samples), axis=1) eps_values = np.array([pool.eps for pool in pools]) # In[16]: max_eps = 38 f, ax = subplots(1,1) ax.plot(eps_values, var_vals[:max_eps], "-" ,label="ABC PMC") ax.plot(eps_values, (float(sigma)**2/n + eps_values[:max_eps]**2/3), "--", label="ABC analytic") ax.axhline(1e-4, linestyle=":", linewidth=4, color=sns.xkcd_rgb["pale red"], label="Bayesian") #ticks = xticks()[0][:-1] #xticks(ticks, ["{0:>4.3f}".format(eps) for eps in eps_values[ticks.tolist()]], rotation=70) #ax.semilogy() ax.set_ylabel(r"var($\theta$)") ax.set_xlabel(r"$\epsilon$") ax.legend(loc="best") ax.invert_xaxis() ylim([-.01, None]) #savefig("1d_gauss_variance.pdf") # In[17]: acc_ratios = np.array([pool.ratio for pool in pools]) plot(acc_ratios, label="ABC PMC") plot(2 * np.array(eps_values), label="analytic") xticks(np.arange(len(eps_values)), ["{0:>4.3f}".format(eps) for eps in eps_values], rotation=70) #semilogy() ylabel("acceptance ratio") xlabel(r"$\epsilon$") legend(loc="best") # In[18]: from scipy.special import erf def p_theta_eta(theta, eta): phi = lambda t: (1 + erf(t / np.sqrt(2))) / 2 ybar = np.mean(y, axis=0) return 1. / (2 * eta) *(phi((ybar - theta + eta) / (sigma / np.sqrt(n))) - phi((ybar - theta - eta) / (sigma / np.sqrt(n))) ) # In[19]: def get_ticks(ticks, num=7): return [float("{0:<.2f}".format(tick)) for tick in np.linspace(ticks[0], ticks[-1], 7)] # In[20]: from scipy.stats import norm # In[21]: nbins = 6 rc('axes', labelsize=40, titlesize=30) #rc("ticks", size=20) sns.set(style="ticks") with mpl.rc_context(rc={"figure.figsize": [15, 20]}): for i in range(36/2): subplot(5, 4, i+1) title(r"Iteration: {0}, $\epsilon$: {1:>4.3f}".format(i*2, eps_values[i*2]), size=15) ax = sns.distplot(samples[i*2], hist=False, axlabel=r'$\theta$', hist_kws={"rotation":.3, "fontsize":30}) ax.set_xlabel(r'$\theta$', size=15) ax.locator_params(axis = 'x', nbins = nbins) ax.locator_params(axis = 'y', nbins = 5) min_val, max_val = ax.get_xlim() x_grid = np.linspace(min_val, max_val, 1000) plot(x_grid, p_theta_eta(x_grid, eps_values[i*2]), "--") if i*2>=22: xlim([0.96, 1.04]) if i*2<=14: ylim([None, 9]) rv = norm(loc=np.mean(y), scale=1/np.sqrt(n)) plot(x_grid, rv.pdf(x_grid), ":", color=sns.xkcd_rgb["pale red"]) fill(x_grid, rv.pdf(x_grid), color=sns.xkcd_rgb["pale red"], alpha=0.2) tick_params(axis='both', which='major', labelsize=15) #i = 37 #subplot(5, 4, 20) #title(r"Iteration: {0}, $\epsilon$: {1:>4.3f}".format(i, eps_values[i])) #ax = sns.distplot(samples[i], hist=False, axlabel=r'$\theta$') #ax.locator_params(axis = 'x', nbins = nbins) #min_val, max_val = ax.get_xlim() #x_grid = np.linspace(min_val, max_val, 1000) #plot(x_grid, p_theta_eta(x_grid, eps_values[i])) ax = subplot(5, 4, i+2) plot(1, label="ABC PMC") plot(1, "--", label="ABC analytic") plot(1, ":", label="Bayesian") legend(loc=2) ax.set_axis_off() subplots_adjust(hspace = 0.40) #savefig("1d_gauss_posterior_evolution.pdf") # In[22]: from scipy.stats import gaussian_kde # In[23]: x_grid = np.linspace(0.4, 1.6, 1000) with mpl.rc_context(rc={"figure.figsize": [15, 20]}): for i in range(len(samples)/2): subplot(5, 4, i+1) title("Iteration: {0}, eps: {1:>4.3f}".format(i*2, eps_values[i*2])) kde = gaussian_kde(np.array(samples[i*2])[:, 0]) plot(x_grid, kde(x_grid)) plot(x_grid, p_theta_eta(x_grid, eps_values[i*2])) # In[ ]: