See the paper for full details of the model.
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import norm, multivariate_normal, invgamma
import pandas as pd
from pandas.plotting import scatter_matrix
from math import sqrt, exp
from sympy import Symbol
from sympy.solvers import solve
I got a (somewhat) arbitrary estimate of average snow depth of 0.15 meters from eyeballing the graphs on https://earthobservatory.nasa.gov/images/146758/mapping-snow-on-arctic-sea-ice. I also borrowed average ice thickness of 2 meters from lecture, along with the lower/upper bounds given in the problem statement.
The variances I chose are wide to be less informative, but the bounds are truncated to prevent this from introducing inaccuracy.
I chose to use a very wide hyperprior since we have so little info. I reparametrized the inverse-gamma distributions in terms of mean and variance. I chose a mean that was equal to the variance I expect by calculating the square of the standard deviation, and then choose the standard deviation to be equal to the mean. That means that the variance will be the square of the mean, or the fourth power of the original parameter estimated standard deviation.
For the mean on the error variance hyperprior, I drew from Prof. Matthew Parno's suggestion that the IceSat-2 claims a standard error of 0.02 meters, and set the mean to the square of that.
For the mean on the snow variance hyperprior, I used another eyeballed estimate of 0.2 squared from the NASA sea ice website above.
# freeboard measurement
F = 0.1
# fixed variances
var_rw = 7**2
var_ri = 80**2
var_rs = 90**2
var_H = 2**2
# fixed means
mean_rw = 1020
mean_ri = 910
mean_rs = 300
mean_H = 2 # from lecture
mean_S = 0.15 # eyeballed estimate from NASA's CryoSat-2 website
mu_x = np.array([mean_rw, mean_ri, mean_rs, mean_H, mean_S]) # combined into one mean vector
# fixed upper and lower bounds on parameters
rw_lb = 1010
rw_ub = 1030
ri_lb = 750
ri_ub = 940
rs_lb = 100
rs_ub = 400
H_lb = 0.1 # from lecture
H_ub = 5 # from lecture
S_lb = 0 # we cannot have negative snow, no upper bound since limited info
# hyperparameter estimates
estimate_sigma_eps = 0.03 # from Dr. Parno's suggestion
estimate_sigma_S = 0.2 # from CryoSat-2 website
# reparametrized hyper-hyper-parameters using hyperparameter estimates
mean_var_eps = estimate_sigma_eps**2
var_var_eps = mean_var_eps**2
mean_var_S = estimate_sigma_S**2
var_var_S = mean_var_S**2
print(mean_var_eps, var_var_eps)
print(mean_var_S, var_var_S)
0.0009 8.1e-07 0.04000000000000001 0.0016000000000000007
def GetHyperHyperparams(mean, variance):
alpha = Symbol('alpha')
beta = Symbol('beta')
# equations from wikipedia for inv-gamma parametrizations
solution = solve([beta / (alpha-1) - mean, beta**2 / (alpha-1)**2 / (alpha-2) - variance])[0]
return solution.values()
ae, be = GetHyperHyperparams(mean_var_eps, var_var_eps)
alpha_eps = float(ae)
beta_eps = float(be)
nugget_eps = 1e-4 # needed to keep this from going to zero
print('epsilon variance hyperhyperparams:', alpha_eps, beta_eps, nugget_eps)
aS, bS = GetHyperHyperparams(mean_var_S, var_var_S)
alpha_S = float(aS)
beta_S = float(bS)
nugget_S = 3e-3 #4e-4 # needed to keep this from going to zero
print('snow variance hyperhyperparams:', alpha_S, beta_S, nugget_S)
epsilon variance hyperhyperparams: 3.0 0.0018 0.0001 snow variance hyperhyperparams: 3.0 0.08 0.003
# seawater density
rws = np.linspace(rw_lb, rw_ub)
rws_pdf = norm.pdf(rws, mean_rw, sqrt(var_rw))
plt.plot(rws, rws_pdf)
plt.title('water density: $\\rho_w$')
plt.fill_between(rws, 0, rws_pdf, alpha=0.6)
<matplotlib.collections.PolyCollection at 0x7f46e0eff1d0>
# ice density
ris = np.linspace(ri_lb, ri_ub)
ris_pdf = norm.pdf(ris, mean_ri, sqrt(var_ri))
plt.plot(ris, ris_pdf)
plt.title('ice density: $\\rho_i$')
plt.fill_between(ris, 0, ris_pdf, alpha=0.6)
<matplotlib.collections.PolyCollection at 0x7f46e0d9cc90>
# snow density
rss = np.linspace(rs_lb, rs_ub)
rss_pdf = norm.pdf(rss, mean_rs, sqrt(var_rs))
plt.plot(rss, rss_pdf)
plt.title('snow density: $\\rho_s$')
plt.fill_between(rss, 0, rss_pdf, alpha=0.6)
<matplotlib.collections.PolyCollection at 0x7f46e0d1b4d0>
# Ice Thickness
Hs = np.linspace(H_lb, H_ub)
Hs_pdf = norm.pdf(Hs, mean_H, sqrt(var_H))
plt.plot(Hs, Hs_pdf)
plt.title('ice thickness: $H$')
plt.fill_between(Hs, 0, Hs_pdf, alpha=0.6)
<matplotlib.collections.PolyCollection at 0x7f46e0e82e50>
# error hyperprior
invgamma_eps = invgamma(alpha_eps, loc=nugget_eps, scale=beta_eps)
var_epss = np.linspace(invgamma_eps.ppf(0.01), invgamma_eps.ppf(0.99), 100)
var_epss_pdf = invgamma_eps.pdf(var_epss)
plt.plot(var_epss, var_epss_pdf)
plt.title('hyperprior on additive error variance $\\epsilon$')
plt.fill_between(var_epss, 0, var_epss_pdf, alpha=0.6)
<matplotlib.collections.PolyCollection at 0x7f46e323e150>
# snow hyperprior
invgamma_S = invgamma(alpha_S, loc=nugget_S, scale=beta_S)
var_Ss = np.linspace(invgamma_S.ppf(0.01), invgamma_S.ppf(0.99), 100)
var_Ss_pdf = invgamma_S.pdf(var_Ss)
plt.plot(var_Ss, var_Ss_pdf)
plt.title('hyperprior on snow variance $\\sigma^2_S$')
plt.fill_between(var_Ss, 0, var_Ss_pdf, alpha=0.6)
<matplotlib.collections.PolyCollection at 0x7f46e0c47fd0>
# prior snow prior
norm_S = norm(mean_S, estimate_sigma_S)
Ss = np.linspace(S_lb, norm_S.ppf(0.995))
Ss_pdf = norm_S.pdf(Ss)
plt.plot(Ss, Ss_pdf)
plt.title('prior-prior on snow: $S$')
plt.fill_between(Ss, 0, Ss_pdf, alpha=0.6)
<matplotlib.collections.PolyCollection at 0x7f46e0bca290>
Computes the model's prediction of Freeboard given the three densities, snow thickness, and ice thickness.
def ForwardModel(rw, ri, rs, H, S):
return (rw-ri)/rw * (H - S*(rs/(rw-ri))) # from problem statement
Evaluate log of the likelihood of the freeboard measurement F
given params x
and some hierarchally modeled additive error var_eps
:
f(F|x,ϵ)∼N(0,ϵ)
where the value to evaluate the likelihood at is F−g(x) where g(x) is the forward model.
def LogLikelihood(x, F, var_eps):
"""
ARGS:
x (np.array): A numpy array of length 5 containing the model parameters: rw, ri, rs, H, S
F (float): A scalar containing the observation of freeboard F
var_eps (float): A scalar containing the given error variance
RETURNS:
(float): A scalar containing log( f(F | x, var_eps) )
"""
if var_eps < 0: #nugget_eps:
return -1e10 # big negative value, means log(zero) basically if out of bounds
diff = F - ForwardModel(*x)
sigma_eps = sqrt(var_eps)
return norm.logpdf(diff, 0, sigma_eps)
Since our prior consists of truncated normal distributions, we need to check if the proposed parameters x
fit this.
def CheckPriorBounds(x):
rw, ri, rs, H, S = x
# uses global vars for lower/upper bounds
return rw_lb <= rw <= rw_ub and ri_lb <= ri <= ri_ub and rs_lb <= rs <= rs_ub and H_lb <= H <= H_ub and S_lb <= S
Evaluates log of the prior density of proposed parameters x
given some variable snow variance:
f(x|σ2S)∼N(μx,covx)
where the covariance contains our snow variance, and where we evaluate the density with our proposed x
.
Assume that the covariance matrix is diagonal; i.e. we have an independent prior.
def LogPrior(x, var_S):
"""
ARGS:
x (np.array): A numpy array of length 5 containing the model parameters: rw, ri, rs, H, S
var_S (float): A scalar containing the snow variance
RETURNS:
(float): A scalar containing log( f(x | var_S) )
"""
if not CheckPriorBounds(x) or var_S < 0: #nugget_S:
return -1e10 # big negative value, means log(zero) basically if out of bounds
# the first four are global vars; var_S is a variable parameter
cov_x = np.diag([var_rw, var_ri, var_rs, var_H, var_S])
return multivariate_normal.logpdf(x, mean=mu_x, cov=cov_x)
Evaluates the log of the inverse-gamma density on some variance and hyperparameters α and β.
def LogInvGamma(var, alpha, beta, nugget):
return invgamma.logpdf(var, alpha, loc=nugget, scale=beta)
Evaluates log of the posterior on model parameters x
:
f(x|F,ϵ,σ2S)∝f(F|x,ϵ)⋅f(x|σ2S)
which is the likelihood times the prior on x
.
def LogPosteriorX(x, F, var_eps, var_S):
"""
ARGS:
x (np.array): A numpy array of length 5 containing the model parameters: rw, ri, rs, H, S
F (float): A scalar containing the observation of freeboard F
var_eps (float): A scalar containing the given error variance
var_S (float): A scalar containing the snow variance
RETURNS:
(float): A scalar containing log( f(x | F, var_eps, var_S) )
"""
return LogLikelihood(x, F, var_eps) + LogPrior(x, var_S)
Evaluates log of the posterior on the error var_eps
:
f(ϵ|F,x)∝f(F|x,ϵ)f(ϵ)
evaluated at the proposed var_eps
. All dependence on var_S
is dropped since it's held constant here, and x
is dropped in the final term since the error is independent of the parameters.
def LogPosteriorEps(x, F, var_eps):
"""
ARGS:
x (np.array): A numpy array of length 5 containing the model parameters: rw, ri, rs, H, S
F (float): A scalar containing the observation of freeboard F
var_eps (float): A scalar containing the given error variance
RETURNS:
(float): A scalar containing log( f(var_eps | F, x) )
"""
# uses global variables for inv-gamma prior
return LogLikelihood(x, F, var_eps) + LogInvGamma(var_eps, alpha_eps, beta_eps, nugget_eps)
Evaluates log of the posterior on the snow variance var_S
:
f(σ2S|F,x,ϵ)=f(F|x,ϵ)f(x|σ2S)f(σ2S)
evaluated at a given var_S
.
def LogPosteriorS(x, F, var_eps, var_S):
"""
ARGS:
x (np.array): A numpy array of length 5 containing the model parameters: rw, ri, rs, H, S
F (float): A scalar containing the observation of freeboard F
var_eps (float): A scalar containing the given error variance
var_S (float): A scalar containing the snow variance
RETURNS:
(float): A scalar containing log( f(var_S | F, x, var_eps) )
"""
# uses global variables for inv-gamma prior
return LogLikelihood(x, F, var_eps) + LogPrior(x, var_S) + LogInvGamma(var_S, alpha_S, beta_S, nugget_S)
Preparing for three-step Metropolis-Within-Gibbs sampling.
Repeat for each step on the Markov Chain:
Setup:
Create a multivariate Gaussian proposal for x
and univariate Gaussian proposals for each hyperparameter to take advantage of the benefits of symmetrical proposals.
Choose each proposal scale qualitatively based on the acceptance ratio and integrated autocorrelation time calculated after MCMC.
# proposal for x
x_prop_scale = 2.8e-1
x_prop_stds = x_prop_scale * np.sqrt(np.array([var_rw, var_ri, var_rs, var_H, mean_var_S])) # trying to move proportionally to the prior
print('x proposal standard deviations:', x_prop_stds)
# proposal for var_eps
eps_prop_scale = 3.5e-2
eps_prop_std = eps_prop_scale * estimate_sigma_eps
print('var_eps proposal std dev:', eps_prop_std)
# proposal for var_S
S_prop_scale = 2.5e-1
S_prop_std = S_prop_scale * estimate_sigma_S
print('var_S proposal std dev:', S_prop_std)
x proposal standard deviations: [ 1.96 22.4 25.2 0.56 0.056] var_eps proposal std dev: 0.0010500000000000002 var_S proposal std dev: 0.05
x
given fixed hyperparameters using LogPosteriorX
function¶def SampleX(x, F, var_eps, var_S, past_logPosterior):
"""
ARGS:
x (np.array): A numpy array of length 5 containing the model parameters: rw, ri, rs, H, S
F (float): A scalar containing the observation of freeboard F
var_eps (float): A scalar containing the given error variance
var_S (float): A scalar containing the snow variance
past_logPosterior(float): A scalar containing the log-posterior on x (for use in acceptance ratio/returning)
RETURNS:
(boolean): true if the new proposal was accepted, false otherwise
(np.array): A numpy array containing the next x (which may be old or new)
(float): the log-posterior of that x
"""
# uses global variable x_prop_stds for standard deviations
proposal_x = x + x_prop_stds * np.random.randn(5) # 5 is number of params in x
proposal_logPosterior = LogPosteriorX(proposal_x, F, var_eps, var_S)
# acceptance ratio
gamma = exp(proposal_logPosterior - past_logPosterior)
# accept or reject
p = np.random.rand()
if p < gamma:
return True, proposal_x, proposal_logPosterior
else:
return False, x, past_logPosterior
var_eps
given fixed x
using LogPosteriorEps
¶def SampleEps(x, F, var_eps, past_logPosterior):
"""
ARGS:
x (np.array): A numpy array of length 5 containing the model parameters: rw, ri, rs, H, S
F (float): A scalar containing the observation of freeboard F
var_eps (float): A scalar containing the given error variance
past_logPosterior(float): A scalar containing the log-posterior on var_eps (for use in acceptance ratio/returning)
RETURNS:
(boolean): true if the new proposal was accepted, false otherwise
(float): the next var_eps (which may be old or new)
(float): the log-posterior of that var_eps
"""
# uses global var eps_prop_std for std dev
proposal_var_eps = norm.rvs(var_eps, eps_prop_std)
proposal_logPosterior = LogPosteriorEps(x, F, proposal_var_eps)
# acceptance ratio
gamma = exp(proposal_logPosterior - past_logPosterior)
# accept or reject
p = np.random.rand()
if p < gamma:
return True, proposal_var_eps, proposal_logPosterior
else:
return False, var_eps, past_logPosterior
var_eps
given fixed x
using LogPosteriorEps
¶def SampleS(x, F, var_eps, var_S, past_logPosterior):
"""
ARGS:
x (np.array): A numpy array of length 5 containing the model parameters: rw, ri, rs, H, S
F (float): A scalar containing the observation of freeboard F
var_eps (float): A scalar containing the given error variance
var_S (float): A scalar containing the snow variance
past_logPosterior(float): A scalar containing the log-posterior on var_S (for use in acceptance ratio/returning)
RETURNS:
(boolean): true if the new proposal was accepted, false otherwise
(float): the next var_S (which may be old or new)
(float): the log-posterior of that var_S
"""
# uses global var S_prop_std for std dev
proposal_var_S = norm.rvs(var_S, S_prop_std)
proposal_logPosterior = LogPosteriorS(x, F, var_eps, proposal_var_S)
# acceptance ratio
gamma = exp(proposal_logPosterior - past_logPosterior)
# accept or reject
p = np.random.rand()
if p < gamma:
return True, proposal_var_S, proposal_logPosterior
else:
return False, var_S, past_logPosterior
Set up some numpy matrices to record samples, then run the sampling forward, keeping track of the acceptance ratio for each type of step in Gibbs sampling.
num_steps = 10000
# start at mean of prior and hyperpriors since why not
x0 = mu_x
var_eps0 = mean_var_eps + nugget_eps
var_S0 = mean_var_S + nugget_S
print('initial conditions:', x0, var_eps0, var_S0)
# recording of the samples
samps_x = np.zeros((num_steps, 5))
logposts_x = np.zeros((num_steps))
samps_var_eps = np.zeros((num_steps))
logposts_var_eps = np.zeros((num_steps))
samps_var_S = np.zeros((num_steps))
logposts_var_S = np.zeros((num_steps))
# insert initial conditions into the arrays
samps_x[0,:] = x0
logposts_x[0] = LogPosteriorX(x0, F, var_eps0, var_S0)
samps_var_eps[0] = var_eps0
logposts_var_eps[0] = LogPosteriorEps(x0, F, var_eps0)
samps_var_S[0] = var_S0
logposts_var_S[0] = LogPosteriorS(x0, F, var_eps0, var_S0)
initial conditions: [1.02e+03 9.10e+02 3.00e+02 2.00e+00 1.50e-01] 0.001 0.04300000000000001
# accept ratios
accepts_x = 0
accepts_var_eps = 0
accepts_var_S = 0
# run the markov chain
for i in range(num_steps - 1):
# get some info from the prior step
past_x = samps_x[i, :]
past_logpost_x = logposts_x[i]
past_var_eps = var_eps0
past_var_S = var_S0
# sample x
x_accept, next_x, next_logpost_x = SampleX(past_x, F, past_var_eps, past_var_S, past_logpost_x)
# update the chain with new info
if x_accept:
accepts_x += 1
samps_x[i+1, :] = next_x
logposts_x[i+1] = next_logpost_x
# retrieve more information
past_logpost_var_eps = logposts_var_eps[i]
past_x = next_x # advance the x increment early to maybe reduce IACT
# sample var_eps
var_eps_accept, next_var_eps, next_logpost_var_eps = SampleEps(past_x, F, past_var_eps, past_logpost_var_eps)
# update the chain
if var_eps_accept:
accepts_var_eps += 1
samps_var_eps[i+1] = next_var_eps
logposts_var_eps[i+1] = next_logpost_var_eps
# retrieve more info
past_logpost_var_S = logposts_var_S[i]
past_var_eps = next_var_eps # advance the var_eps increment early to maybe reduce IACT
# sample var_S
var_S_accept, next_var_S, next_logpost_var_S = SampleS(past_x, F, past_var_eps, past_var_S, past_logpost_var_S)
# update the chain
if var_S_accept:
accepts_var_S += 1
samps_var_S[i+1] = next_var_S
logposts_var_S[i+1] = next_logpost_var_S
if i % 500 == 0 and i > 0:
print('Step %d, x AR = %0.2f, var_eps AR = %0.2f, var_S AR = %0.2f'%(i, accepts_x/(i+1), accepts_var_eps/(i+1), accepts_var_S/(i+1)) )
print('Total acceptance: x AR = %0.2f, var_eps AR = %0.2f, var_S AR = %0.2f'%(accepts_x/num_steps, accepts_var_eps/num_steps, accepts_var_S/num_steps ))
Step 500, x AR = 0.25, var_eps AR = 0.32, var_S AR = 0.30 Step 1000, x AR = 0.26, var_eps AR = 0.29, var_S AR = 0.26 Step 1500, x AR = 0.26, var_eps AR = 0.30, var_S AR = 0.25 Step 2000, x AR = 0.27, var_eps AR = 0.31, var_S AR = 0.26 Step 2500, x AR = 0.28, var_eps AR = 0.30, var_S AR = 0.28 Step 3000, x AR = 0.28, var_eps AR = 0.30, var_S AR = 0.28 Step 3500, x AR = 0.29, var_eps AR = 0.29, var_S AR = 0.28 Step 4000, x AR = 0.29, var_eps AR = 0.30, var_S AR = 0.29 Step 4500, x AR = 0.29, var_eps AR = 0.30, var_S AR = 0.28 Step 5000, x AR = 0.30, var_eps AR = 0.30, var_S AR = 0.28 Step 5500, x AR = 0.30, var_eps AR = 0.30, var_S AR = 0.29 Step 6000, x AR = 0.30, var_eps AR = 0.30, var_S AR = 0.29 Step 6500, x AR = 0.30, var_eps AR = 0.30, var_S AR = 0.29 Step 7000, x AR = 0.30, var_eps AR = 0.30, var_S AR = 0.29 Step 7500, x AR = 0.30, var_eps AR = 0.31, var_S AR = 0.29 Step 8000, x AR = 0.30, var_eps AR = 0.30, var_S AR = 0.29 Step 8500, x AR = 0.30, var_eps AR = 0.30, var_S AR = 0.29 Step 9000, x AR = 0.30, var_eps AR = 0.31, var_S AR = 0.28 Step 9500, x AR = 0.30, var_eps AR = 0.31, var_S AR = 0.28 Total acceptance: x AR = 0.30, var_eps AR = 0.31, var_S AR = 0.28
fig, axs = plt.subplots(nrows=10, sharex=True, figsize=(16, 36))
for i in range(10):
axs[i].ticklabel_format(useOffset=False)
axs[0].plot(samps_x[:, 0], linewidth=2)
axs[0].set_ylabel('water density')
axs[1].plot(samps_x[:, 1], linewidth=2)
axs[1].set_ylabel('ice density')
axs[2].plot(samps_x[:, 2], linewidth=2)
axs[2].set_ylabel('snow density')
axs[3].plot(samps_x[:, 3], linewidth=2)
axs[3].set_ylabel('ice thickness')
axs[4].plot(samps_x[:, 4], linewidth=2)
axs[4].set_ylabel('snow thickness')
axs[5].plot(logposts_x[:], linewidth=2)
axs[5].set_ylabel('Log Posterior of X')
axs[6].plot(samps_var_eps[:], linewidth=2)
axs[6].set_ylabel('additive error variance')
axs[7].plot(logposts_var_eps[:], linewidth=2)
axs[7].set_ylabel('Log Posterior of error')
axs[8].plot(samps_var_S[:], linewidth=2)
axs[8].set_ylabel('snow thickness variance')
axs[9].plot(logposts_var_S[:], linewidth=2)
axs[9].set_ylabel('Log Posterior of snow thickness variance')
axs[9].set_xlabel('MCMC Iteration')
plt.show()
df = pd.DataFrame(samps_x, columns=['rw', 'ri', 'rs', 'H', 'S'])
scatter_matrix(df, figsize=(16,16), alpha=0.2)
plt.show()
fig, ax = plt.subplots(1, 3, figsize=(16,4))
ax[0].hist(samps_var_eps, 30)
ax[0].set_title('posterior on error variance')
ax[1].hist(samps_var_S, 30)
ax[1].set_title('posterior on snow variance')
ax[2].hist(samps_x[:, 3], 30)
ax[2].set_title('posterior on ice thickness H')
plt.show()
Examine the autocorrelation of the five parameters and two hyperparameters and use that to inform our standard error for our sample mean using the central limit theorem.
burn_in = 100
slice = (num_steps-burn_in)//2
def calculateAutoCorr(signal):
diffs = signal[burn_in:] - np.mean(signal[burn_in:])
corr = np.correlate(diffs, diffs, mode='same')
corr /= np.max(corr) # normalize so the maximum autocorrelation is 1
return corr[slice:]
# calculations
corr_rw = calculateAutoCorr(samps_x[:, 0])
corr_ri = calculateAutoCorr(samps_x[:, 1])
corr_rs = calculateAutoCorr(samps_x[:, 2])
corr_H = calculateAutoCorr(samps_x[:, 3])
corr_S = calculateAutoCorr(samps_x[:, 4])
corr_var_eps = calculateAutoCorr(samps_var_eps)
corr_var_S = calculateAutoCorr(samps_var_S)
fig, ax = plt.subplots(1, 1, figsize=(16,10))
ax.plot(corr_rw, label='$\\rho_w$')
ax.plot(corr_ri, label='$\\rho_i$')
ax.plot(corr_rs, label='$\\rho_s$')
ax.plot(corr_H, label='$H$')
ax.plot(corr_S, label='$S$')
ax.plot(corr_var_eps, label='$\\epsilon$')
ax.plot(corr_var_S, label='$\\sigma^2_S$')
ax.plot([0, num_steps/2], [0, 0], '--k')
ax.legend()
ax.set_xlim(-50, 1000)
# integrated autocorrelation times
iact_rw = 1 + 2*np.sum(corr_rw[1:1000])
iact_ri = 1 + 2*np.sum(corr_ri[1:1000])
iact_rs = 1 + 2*np.sum(corr_rs[1:1000])
iact_H = 1 + 2*np.sum(corr_H[1:1000])
iact_S = 1 + 2*np.sum(corr_S[1:1000])
iact_var_eps = 1 + 2*np.sum(corr_var_eps[1:1000])
iact_var_S = 1 + 2*np.sum(corr_var_S[1:1000])
print('Estimated IACTs:')
print('rw: %0.2f'%iact_rw)
print('ri: %0.2f'%iact_ri)
print('rs: %0.2f'%iact_rs)
print('H: %0.2f'%iact_H)
print('S: %0.2f'%iact_S)
print('var_eps: %0.2f'%iact_var_eps)
print('var_S: %0.2f'%iact_var_S)
Estimated IACTs: rw: 37.81 ri: 12.36 rs: 58.08 H: 15.86 S: 35.04 var_eps: 1.93 var_S: 1.41
Variance of the sample mean is equal to the integrated autocorrelation time, times the sample variance, divided by the sample size.
Standard error is the square root of the variance of the sample mean.
def GetStats(array, iact):
sample_mean = np.mean(array)
sample_var = np.var(array)
var_sample_mean = iact * sample_var / num_steps
return sample_mean, sample_var, var_sample_mean
m_rw, v_rw, sv_rw = GetStats(samps_x[:, 0], iact_rw)
print('rw: posterior mean: %0.2f, \tposterior standard deviation: %0.2f, \tstandard error: %0.2f'%(m_rw, sqrt(v_rw), sqrt(sv_rw)))
m_ri, v_ri, sv_ri = GetStats(samps_x[:, 1], iact_ri)
print('ri: posterior mean: %0.2f, \tposterior standard deviation: %0.2f, \tstandard error: %0.2f'%(m_ri, sqrt(v_ri), sqrt(sv_ri)))
m_rs, v_rs, sv_rs = GetStats(samps_x[:, 2], iact_rs)
print('rs: posterior mean: %0.2f, \tposterior standard deviation: %0.2f, \tstandard error: %0.2f'%(m_rs, sqrt(v_rs), sqrt(sv_rs)))
m_H, v_H, sv_H = GetStats(samps_x[:, 3], iact_H)
print('H: posterior mean: %0.2f, \tposterior standard deviation: %0.2f, \tstandard error: %0.2f'%(m_H, sqrt(v_H), sqrt(sv_H)))
m_S, v_S, sv_S = GetStats(samps_x[:, 4], iact_S)
print('S: posterior mean: %0.2f, \tposterior standard deviation: %0.2f, \tstandard error: %0.2f'%(m_S, sqrt(v_S), sqrt(sv_S)))
print()
m_var_eps, v_var_eps, sv_var_eps = GetStats(samps_var_eps, iact_var_eps)
print('var_eps: posterior mean: %0.3f, \tposterior standard deviation: %0.4f, \tstandard error: %0.6f'%(m_var_eps, sqrt(v_var_eps), sqrt(sv_var_eps)))
m_var_S, v_var_S, sv_var_S = GetStats(samps_var_S, iact_var_S)
print('var_S: posterior mean: %0.3f, \t\tposterior standard deviation: %0.4f, \tstandard error: %0.6f'%(m_var_S, sqrt(v_var_S), sqrt(sv_var_S)))
rw: posterior mean: 1019.91, posterior standard deviation: 4.99, standard error: 0.31 ri: posterior mean: 887.81, posterior standard deviation: 40.25, standard error: 1.42 rs: posterior mean: 267.57, posterior standard deviation: 69.47, standard error: 5.29 H: posterior mean: 1.34, posterior standard deviation: 0.55, standard error: 0.02 S: posterior mean: 0.23, posterior standard deviation: 0.15, standard error: 0.01 var_eps: posterior mean: 0.001, posterior standard deviation: 0.0003, standard error: 0.000004 var_S: posterior mean: 0.043, posterior standard deviation: 0.0123, standard error: 0.000146
Use the samples of the MCMC chain to accumulate a posterior predictive on the freeboard.
samps_F = np.zeros((num_steps))
for i in range(num_steps):
samps_F[i] = ForwardModel(*samps_x[i]) + norm.rvs(0, sqrt(samps_var_eps[i]))
plt.hist(samps_F, 30)
plt.title('Posterior Predictive on F')
plt.show()