# 1D gaussian toy model¶

In [1]:
%pylab inline

Populating the interactive namespace from numpy and matplotlib

In [2]:
import seaborn as sns
sns.set_style("whitegrid")
sns.set_context("talk")
rc('axes', labelsize=20, titlesize=20)

In [3]:
import numpy as np
import triangle
from clerk.stats.distances import mahalanobis
import abcpmc

#np.random.seed(987654321)


# Data¶

In [4]:
mean = 1
sigma = 1
n = 10000
y = np.random.normal(mean, sigma, n)

In [5]:
sns.distplot(y)

Out[5]:
<matplotlib.axes._subplots.AxesSubplot at 0x1175fccd0>

# Setup¶

In [6]:
prior = abcpmc.TophatPrior([-5], [5])

In [7]:
def mean_dist(x, y):
return np.abs(np.mean(x, axis=0) - np.mean(y, axis=0))

dist = mean_dist

In [8]:
def create_new_sample(theta):
return np.random.normal(theta, sigma, n)
postfn = create_new_sample


# Verification¶

In [9]:
theta = prior()
print theta
x = postfn([mean])
d = dist(x, y)
print d

[-0.4025917]
0.000582605098998

In [10]:
distances = [dist(y, postfn(mean)) for _ in range(1000)]
sns.distplot(distances)

Out[10]:
<matplotlib.axes._subplots.AxesSubplot at 0x11780e8d0>

# ABC sampling with PMC¶

In [11]:
def sample(T, eps_val, eps_min):
abcpmc_sampler = abcpmc.Sampler(N=2000, Y=y, postfn=postfn, dist=dist, threads=8)
eps = abcpmc.ConstEps(T, eps_val)
pools = []
for pool in abcpmc_sampler.sample(prior, eps):
print("T: {0}, eps: {1:>.4f}, ratio: {2:>.4f}".format(pool.t, eps(pool.t), pool.ratio))
for i, (mean, std) in enumerate(zip(*abcpmc.weighted_avg_and_std(pool.thetas, pool.ws, axis=0))):
print(u"    theta[{0}]: {1:>.4f} \u00B1 {2:>.4f}".format(i, mean,std))

eps.eps = np.percentile(pool.dists, 90)
if eps.eps < eps_min:
eps.eps = eps_min

pools.append(pool)

abcpmc_sampler.close()

return pools

In [12]:
T=38
eps=0.5
pools = sample(T, eps, sigma/sqrt(n))

T: 0, eps: 0.5000, ratio: 0.1000
theta[0]: 0.9792 Â± 0.2894
T: 1, eps: 0.4495, ratio: 0.6137
theta[0]: 1.0011 Â± 0.2582
T: 2, eps: 0.3917, ratio: 0.6086
theta[0]: 0.9932 Â± 0.2256
T: 3, eps: 0.3414, ratio: 0.6001
theta[0]: 0.9860 Â± 0.1989
T: 4, eps: 0.2992, ratio: 0.6127
theta[0]: 0.9945 Â± 0.1734
T: 5, eps: 0.2653, ratio: 0.6077
theta[0]: 0.9961 Â± 0.1531
T: 6, eps: 0.2335, ratio: 0.6008
theta[0]: 0.9967 Â± 0.1362
T: 7, eps: 0.2053, ratio: 0.6335
theta[0]: 0.9904 Â± 0.1180
T: 8, eps: 0.1806, ratio: 0.6219
theta[0]: 0.9940 Â± 0.1040
T: 9, eps: 0.1577, ratio: 0.6099
theta[0]: 0.9918 Â± 0.0905
T: 10, eps: 0.1387, ratio: 0.6207
theta[0]: 0.9923 Â± 0.0811
T: 11, eps: 0.1228, ratio: 0.6177
theta[0]: 0.9919 Â± 0.0717
T: 12, eps: 0.1081, ratio: 0.6057
theta[0]: 0.9914 Â± 0.0640
T: 13, eps: 0.0959, ratio: 0.6107
theta[0]: 0.9927 Â± 0.0557
T: 14, eps: 0.0835, ratio: 0.6232
theta[0]: 0.9920 Â± 0.0496
T: 15, eps: 0.0729, ratio: 0.5928
theta[0]: 0.9909 Â± 0.0426
T: 16, eps: 0.0624, ratio: 0.5891
theta[0]: 0.9905 Â± 0.0373
T: 17, eps: 0.0554, ratio: 0.5872
theta[0]: 0.9932 Â± 0.0340
T: 18, eps: 0.0498, ratio: 0.5938
theta[0]: 0.9933 Â± 0.0294
T: 19, eps: 0.0436, ratio: 0.6011
theta[0]: 0.9920 Â± 0.0271
T: 20, eps: 0.0385, ratio: 0.5685
theta[0]: 0.9920 Â± 0.0244
T: 21, eps: 0.0337, ratio: 0.5599
theta[0]: 0.9926 Â± 0.0222
T: 22, eps: 0.0299, ratio: 0.5498
theta[0]: 0.9925 Â± 0.0198
T: 23, eps: 0.0261, ratio: 0.5549
theta[0]: 0.9924 Â± 0.0180
T: 24, eps: 0.0232, ratio: 0.5258
theta[0]: 0.9924 Â± 0.0167
T: 25, eps: 0.0205, ratio: 0.5056
theta[0]: 0.9926 Â± 0.0155
T: 26, eps: 0.0181, ratio: 0.4697
theta[0]: 0.9929 Â± 0.0148
T: 27, eps: 0.0159, ratio: 0.4240
theta[0]: 0.9922 Â± 0.0138
T: 28, eps: 0.0141, ratio: 0.4017
theta[0]: 0.9927 Â± 0.0134
T: 29, eps: 0.0125, ratio: 0.3784
theta[0]: 0.9924 Â± 0.0129
T: 30, eps: 0.0112, ratio: 0.3383
theta[0]: 0.9929 Â± 0.0118
T: 31, eps: 0.0101, ratio: 0.3437
theta[0]: 0.9923 Â± 0.0111
T: 32, eps: 0.0100, ratio: 0.3518
theta[0]: 0.9922 Â± 0.0115
T: 33, eps: 0.0100, ratio: 0.3458
theta[0]: 0.9924 Â± 0.0114
T: 34, eps: 0.0100, ratio: 0.3600
theta[0]: 0.9930 Â± 0.0114
T: 35, eps: 0.0100, ratio: 0.3515
theta[0]: 0.9931 Â± 0.0111
T: 36, eps: 0.0100, ratio: 0.3637
theta[0]: 0.9921 Â± 0.0112
T: 37, eps: 0.0100, ratio: 0.3531
theta[0]: 0.9926 Â± 0.0113


# Postprocessing¶

In [13]:
def plot_pool(prob, samples):
prob = np.array(prob).flatten()
samples = np.vstack(samples)

sns.distplot(prob)
show()
sns.distplot(samples, axlabel=r'$\theta$')
#savefig("1d_gauss_posterior.pdf")
show()
sample_mean = np.mean(samples, axis=0)
print(u"mean: {0:>.4f} \u00B1 {1:>.4f}".format(float(np.mean(samples, axis=0)), float(np.std(samples, axis=0))))

In [14]:
offset = 33
samples = np.array([pool.thetas for pool in pools])
distances = np.array([pool.dists for pool in pools])

plot_pool(distances[offset:], samples[offset:])

mean: 0.9926 Â± 0.0098

In [15]:
var_vals = np.var((samples), axis=1)
eps_values = np.array([pool.eps for pool in pools])

In [16]:
max_eps = 38
f, ax = subplots(1,1)

ax.plot(eps_values, var_vals[:max_eps], "-" ,label="ABC PMC")

ax.plot(eps_values, (float(sigma)**2/n + eps_values[:max_eps]**2/3), "--", label="ABC analytic")

ax.axhline(1e-4, linestyle=":", linewidth=4, color=sns.xkcd_rgb["pale red"], label="Bayesian")

#ticks = xticks()[0][:-1]
#xticks(ticks, ["{0:>4.3f}".format(eps) for eps in eps_values[ticks.tolist()]], rotation=70)
#ax.semilogy()
ax.set_ylabel(r"var($\theta$)")
ax.set_xlabel(r"$\epsilon$")
ax.legend(loc="best")
ax.invert_xaxis()
ylim([-.01, None])
#savefig("1d_gauss_variance.pdf")

Out[16]:
(-0.01, 0.089999999999999997)
In [17]:
acc_ratios = np.array([pool.ratio for pool in pools])
plot(acc_ratios, label="ABC PMC")
plot(2 * np.array(eps_values), label="analytic")
xticks(np.arange(len(eps_values)), ["{0:>4.3f}".format(eps) for eps in eps_values], rotation=70)
#semilogy()
ylabel("acceptance ratio")
xlabel(r"$\epsilon$")
legend(loc="best")

Out[17]:
<matplotlib.legend.Legend at 0x118367390>
In [18]:
from scipy.special import erf
def p_theta_eta(theta, eta):
phi = lambda t: (1 + erf(t / np.sqrt(2))) / 2
ybar = np.mean(y, axis=0)
return 1. / (2 * eta) *(phi((ybar - theta + eta) / (sigma / np.sqrt(n))) - phi((ybar - theta - eta) / (sigma / np.sqrt(n))) )

In [19]:
def get_ticks(ticks, num=7):
return [float("{0:<.2f}".format(tick)) for tick in np.linspace(ticks[0], ticks[-1], 7)]

In [20]:
from scipy.stats import norm

In [21]:
nbins = 6
rc('axes', labelsize=40, titlesize=30)
#rc("ticks", size=20)
sns.set(style="ticks")
with mpl.rc_context(rc={"figure.figsize": [15, 20]}):
for i in range(36/2):
subplot(5, 4, i+1)
title(r"Iteration: {0}, $\epsilon$: {1:>4.3f}".format(i*2, eps_values[i*2]), size=15)
ax = sns.distplot(samples[i*2], hist=False, axlabel=r'$\theta$', hist_kws={"rotation":.3, "fontsize":30})
ax.set_xlabel(r'$\theta$', size=15)
ax.locator_params(axis = 'x', nbins = nbins)
ax.locator_params(axis = 'y', nbins = 5)
min_val, max_val = ax.get_xlim()
x_grid = np.linspace(min_val, max_val, 1000)
plot(x_grid, p_theta_eta(x_grid, eps_values[i*2]), "--")
if i*2>=22:
xlim([0.96, 1.04])

if i*2<=14:
ylim([None, 9])

rv = norm(loc=np.mean(y), scale=1/np.sqrt(n))
plot(x_grid, rv.pdf(x_grid), ":", color=sns.xkcd_rgb["pale red"])
fill(x_grid, rv.pdf(x_grid), color=sns.xkcd_rgb["pale red"], alpha=0.2)
tick_params(axis='both', which='major', labelsize=15)
#i = 37
#subplot(5, 4, 20)
#title(r"Iteration: {0}, $\epsilon$: {1:>4.3f}".format(i, eps_values[i]))
#ax = sns.distplot(samples[i], hist=False, axlabel=r'$\theta$')
#ax.locator_params(axis = 'x', nbins = nbins)
#min_val, max_val = ax.get_xlim()
#x_grid = np.linspace(min_val, max_val, 1000)
#plot(x_grid, p_theta_eta(x_grid, eps_values[i]))

ax = subplot(5, 4, i+2)
plot(1, label="ABC PMC")
plot(1, "--", label="ABC analytic")
plot(1, ":", label="Bayesian")
legend(loc=2)
ax.set_axis_off()

#savefig("1d_gauss_posterior_evolution.pdf")

In [22]:
from scipy.stats import gaussian_kde

In [23]:
x_grid = np.linspace(0.4, 1.6, 1000)
with mpl.rc_context(rc={"figure.figsize": [15, 20]}):
for i in range(len(samples)/2):
subplot(5, 4, i+1)
title("Iteration: {0}, eps: {1:>4.3f}".format(i*2, eps_values[i*2]))

kde = gaussian_kde(np.array(samples[i*2])[:, 0])
plot(x_grid, kde(x_grid))
plot(x_grid, p_theta_eta(x_grid, eps_values[i*2]))