%pylab inline
Populating the interactive namespace from numpy and matplotlib
import seaborn as sns
sns.set_style("whitegrid")
sns.set_context("talk")
rc('axes', labelsize=20, titlesize=20)
import numpy as np
import triangle
from clerk.stats.distances import mahalanobis
import abcpmc
#np.random.seed(987654321)
mean = 1
sigma = 1
n = 10000
y = np.random.normal(mean, sigma, n)
sns.distplot(y)
<matplotlib.axes._subplots.AxesSubplot at 0x1175fccd0>
prior = abcpmc.TophatPrior([-5], [5])
def mean_dist(x, y):
return np.abs(np.mean(x, axis=0) - np.mean(y, axis=0))
dist = mean_dist
def create_new_sample(theta):
return np.random.normal(theta, sigma, n)
postfn = create_new_sample
theta = prior()
print theta
x = postfn([mean])
d = dist(x, y)
print d
[-0.4025917] 0.000582605098998
distances = [dist(y, postfn(mean)) for _ in range(1000)]
sns.distplot(distances)
<matplotlib.axes._subplots.AxesSubplot at 0x11780e8d0>
def sample(T, eps_val, eps_min):
abcpmc_sampler = abcpmc.Sampler(N=2000, Y=y, postfn=postfn, dist=dist, threads=8)
eps = abcpmc.ConstEps(T, eps_val)
pools = []
for pool in abcpmc_sampler.sample(prior, eps):
print("T: {0}, eps: {1:>.4f}, ratio: {2:>.4f}".format(pool.t, eps(pool.t), pool.ratio))
for i, (mean, std) in enumerate(zip(*abcpmc.weighted_avg_and_std(pool.thetas, pool.ws, axis=0))):
print(u" theta[{0}]: {1:>.4f} \u00B1 {2:>.4f}".format(i, mean,std))
eps.eps = np.percentile(pool.dists, 90)
if eps.eps < eps_min:
eps.eps = eps_min
pools.append(pool)
abcpmc_sampler.close()
return pools
T=38
eps=0.5
pools = sample(T, eps, sigma/sqrt(n))
T: 0, eps: 0.5000, ratio: 0.1000 theta[0]: 0.9792 ± 0.2894 T: 1, eps: 0.4495, ratio: 0.6137 theta[0]: 1.0011 ± 0.2582 T: 2, eps: 0.3917, ratio: 0.6086 theta[0]: 0.9932 ± 0.2256 T: 3, eps: 0.3414, ratio: 0.6001 theta[0]: 0.9860 ± 0.1989 T: 4, eps: 0.2992, ratio: 0.6127 theta[0]: 0.9945 ± 0.1734 T: 5, eps: 0.2653, ratio: 0.6077 theta[0]: 0.9961 ± 0.1531 T: 6, eps: 0.2335, ratio: 0.6008 theta[0]: 0.9967 ± 0.1362 T: 7, eps: 0.2053, ratio: 0.6335 theta[0]: 0.9904 ± 0.1180 T: 8, eps: 0.1806, ratio: 0.6219 theta[0]: 0.9940 ± 0.1040 T: 9, eps: 0.1577, ratio: 0.6099 theta[0]: 0.9918 ± 0.0905 T: 10, eps: 0.1387, ratio: 0.6207 theta[0]: 0.9923 ± 0.0811 T: 11, eps: 0.1228, ratio: 0.6177 theta[0]: 0.9919 ± 0.0717 T: 12, eps: 0.1081, ratio: 0.6057 theta[0]: 0.9914 ± 0.0640 T: 13, eps: 0.0959, ratio: 0.6107 theta[0]: 0.9927 ± 0.0557 T: 14, eps: 0.0835, ratio: 0.6232 theta[0]: 0.9920 ± 0.0496 T: 15, eps: 0.0729, ratio: 0.5928 theta[0]: 0.9909 ± 0.0426 T: 16, eps: 0.0624, ratio: 0.5891 theta[0]: 0.9905 ± 0.0373 T: 17, eps: 0.0554, ratio: 0.5872 theta[0]: 0.9932 ± 0.0340 T: 18, eps: 0.0498, ratio: 0.5938 theta[0]: 0.9933 ± 0.0294 T: 19, eps: 0.0436, ratio: 0.6011 theta[0]: 0.9920 ± 0.0271 T: 20, eps: 0.0385, ratio: 0.5685 theta[0]: 0.9920 ± 0.0244 T: 21, eps: 0.0337, ratio: 0.5599 theta[0]: 0.9926 ± 0.0222 T: 22, eps: 0.0299, ratio: 0.5498 theta[0]: 0.9925 ± 0.0198 T: 23, eps: 0.0261, ratio: 0.5549 theta[0]: 0.9924 ± 0.0180 T: 24, eps: 0.0232, ratio: 0.5258 theta[0]: 0.9924 ± 0.0167 T: 25, eps: 0.0205, ratio: 0.5056 theta[0]: 0.9926 ± 0.0155 T: 26, eps: 0.0181, ratio: 0.4697 theta[0]: 0.9929 ± 0.0148 T: 27, eps: 0.0159, ratio: 0.4240 theta[0]: 0.9922 ± 0.0138 T: 28, eps: 0.0141, ratio: 0.4017 theta[0]: 0.9927 ± 0.0134 T: 29, eps: 0.0125, ratio: 0.3784 theta[0]: 0.9924 ± 0.0129 T: 30, eps: 0.0112, ratio: 0.3383 theta[0]: 0.9929 ± 0.0118 T: 31, eps: 0.0101, ratio: 0.3437 theta[0]: 0.9923 ± 0.0111 T: 32, eps: 0.0100, ratio: 0.3518 theta[0]: 0.9922 ± 0.0115 T: 33, eps: 0.0100, ratio: 0.3458 theta[0]: 0.9924 ± 0.0114 T: 34, eps: 0.0100, ratio: 0.3600 theta[0]: 0.9930 ± 0.0114 T: 35, eps: 0.0100, ratio: 0.3515 theta[0]: 0.9931 ± 0.0111 T: 36, eps: 0.0100, ratio: 0.3637 theta[0]: 0.9921 ± 0.0112 T: 37, eps: 0.0100, ratio: 0.3531 theta[0]: 0.9926 ± 0.0113
def plot_pool(prob, samples):
prob = np.array(prob).flatten()
samples = np.vstack(samples)
sns.distplot(prob)
show()
sns.distplot(samples, axlabel=r'$\theta$')
#savefig("1d_gauss_posterior.pdf")
show()
sample_mean = np.mean(samples, axis=0)
print(u"mean: {0:>.4f} \u00B1 {1:>.4f}".format(float(np.mean(samples, axis=0)), float(np.std(samples, axis=0))))
offset = 33
samples = np.array([pool.thetas for pool in pools])
distances = np.array([pool.dists for pool in pools])
plot_pool(distances[offset:], samples[offset:])
mean: 0.9926 ± 0.0098
var_vals = np.var((samples), axis=1)
eps_values = np.array([pool.eps for pool in pools])
max_eps = 38
f, ax = subplots(1,1)
ax.plot(eps_values, var_vals[:max_eps], "-" ,label="ABC PMC")
ax.plot(eps_values, (float(sigma)**2/n + eps_values[:max_eps]**2/3), "--", label="ABC analytic")
ax.axhline(1e-4, linestyle=":", linewidth=4, color=sns.xkcd_rgb["pale red"], label="Bayesian")
#ticks = xticks()[0][:-1]
#xticks(ticks, ["{0:>4.3f}".format(eps) for eps in eps_values[ticks.tolist()]], rotation=70)
#ax.semilogy()
ax.set_ylabel(r"var($\theta$)")
ax.set_xlabel(r"$\epsilon$")
ax.legend(loc="best")
ax.invert_xaxis()
ylim([-.01, None])
#savefig("1d_gauss_variance.pdf")
(-0.01, 0.089999999999999997)
acc_ratios = np.array([pool.ratio for pool in pools])
plot(acc_ratios, label="ABC PMC")
plot(2 * np.array(eps_values), label="analytic")
xticks(np.arange(len(eps_values)), ["{0:>4.3f}".format(eps) for eps in eps_values], rotation=70)
#semilogy()
ylabel("acceptance ratio")
xlabel(r"$\epsilon$")
legend(loc="best")
<matplotlib.legend.Legend at 0x118367390>
from scipy.special import erf
def p_theta_eta(theta, eta):
phi = lambda t: (1 + erf(t / np.sqrt(2))) / 2
ybar = np.mean(y, axis=0)
return 1. / (2 * eta) *(phi((ybar - theta + eta) / (sigma / np.sqrt(n))) - phi((ybar - theta - eta) / (sigma / np.sqrt(n))) )
def get_ticks(ticks, num=7):
return [float("{0:<.2f}".format(tick)) for tick in np.linspace(ticks[0], ticks[-1], 7)]
from scipy.stats import norm
nbins = 6
rc('axes', labelsize=40, titlesize=30)
#rc("ticks", size=20)
sns.set(style="ticks")
with mpl.rc_context(rc={"figure.figsize": [15, 20]}):
for i in range(36/2):
subplot(5, 4, i+1)
title(r"Iteration: {0}, $\epsilon$: {1:>4.3f}".format(i*2, eps_values[i*2]), size=15)
ax = sns.distplot(samples[i*2], hist=False, axlabel=r'$\theta$', hist_kws={"rotation":.3, "fontsize":30})
ax.set_xlabel(r'$\theta$', size=15)
ax.locator_params(axis = 'x', nbins = nbins)
ax.locator_params(axis = 'y', nbins = 5)
min_val, max_val = ax.get_xlim()
x_grid = np.linspace(min_val, max_val, 1000)
plot(x_grid, p_theta_eta(x_grid, eps_values[i*2]), "--")
if i*2>=22:
xlim([0.96, 1.04])
if i*2<=14:
ylim([None, 9])
rv = norm(loc=np.mean(y), scale=1/np.sqrt(n))
plot(x_grid, rv.pdf(x_grid), ":", color=sns.xkcd_rgb["pale red"])
fill(x_grid, rv.pdf(x_grid), color=sns.xkcd_rgb["pale red"], alpha=0.2)
tick_params(axis='both', which='major', labelsize=15)
#i = 37
#subplot(5, 4, 20)
#title(r"Iteration: {0}, $\epsilon$: {1:>4.3f}".format(i, eps_values[i]))
#ax = sns.distplot(samples[i], hist=False, axlabel=r'$\theta$')
#ax.locator_params(axis = 'x', nbins = nbins)
#min_val, max_val = ax.get_xlim()
#x_grid = np.linspace(min_val, max_val, 1000)
#plot(x_grid, p_theta_eta(x_grid, eps_values[i]))
ax = subplot(5, 4, i+2)
plot(1, label="ABC PMC")
plot(1, "--", label="ABC analytic")
plot(1, ":", label="Bayesian")
legend(loc=2)
ax.set_axis_off()
subplots_adjust(hspace = 0.40)
#savefig("1d_gauss_posterior_evolution.pdf")
from scipy.stats import gaussian_kde
x_grid = np.linspace(0.4, 1.6, 1000)
with mpl.rc_context(rc={"figure.figsize": [15, 20]}):
for i in range(len(samples)/2):
subplot(5, 4, i+1)
title("Iteration: {0}, eps: {1:>4.3f}".format(i*2, eps_values[i*2]))
kde = gaussian_kde(np.array(samples[i*2])[:, 0])
plot(x_grid, kde(x_grid))
plot(x_grid, p_theta_eta(x_grid, eps_values[i*2]))