using Base64 open("drton2017_01.jpg") do f base64 = base64encode(f) display("text/html", """""") end open("WBIC_paper_01.jpg") do f base64 = base64encode(f) display("text/html", """""") end using Distributed using Printf using SpecialFunctions linspace(a, b, L::Integer) = range(a, stop=b, length=L) import PyPlot; plt = PyPlot using Distributions using QuadGK #using Mamba using KernelDensity function makefunc_pdfkde(X) ik = InterpKDE(kde(X)) pdfkde(x) = pdf(ik, x) return pdfkde end function makefunc_pdfkde(X,Y) ik = InterpKDE(kde((X,Y))) pdfkde(x, y) = pdf(ik, x, y) return pdfkde end macro sum(f_k, k, itr) quote begin s = zero(($(esc(k))->$(esc(f_k)))($(esc(itr))[1])) for $(esc(k)) in $(esc(itr)) s += $(esc(f_k)) end s end end end macro prod(f_k, k, itr) quote begin p = one(($(esc(k))->$(esc(f_k)))($(esc(itr))[1])) for $(esc(k)) in $(esc(itr)) p *= $(esc(f_k)) end p end end end ##### Thermal*(d, β) = the distribution d at the inverse temperature β @everywhere begin using Distributions import Distributions: logpdf, maximum, minimum import Distributions: _logpdf, length, insupport import Distributions: pdf, rand, _rand! mutable struct ThermalCU{T<:Real, D<:ContinuousUnivariateDistribution} <: ContinuousUnivariateDistribution d::D β::T end logpdf(d::ThermalCU, x::Real) = d.β*logpdf(d.d, x) maximum(d::ThermalCU) = maximum(d.d) minimum(d::ThermalCU) = minimum(d.d) mutable struct ThermalDU{T<:Real, D<:DiscreteUnivariateDistribution} <: DiscreteUnivariateDistribution d::D β::T end logpdf(d::ThermalDU, x::Int) = d.β*logpdf(d.d, x) maximum(d::ThermalDU) = maximum(d.d) minimum(d::ThermalDU) = minimum(d.d) mutable struct ThermalCM{T<:Real, D<:ContinuousMultivariateDistribution} <: ContinuousMultivariateDistribution d::D β::T end _logpdf(d::ThermalCM, x::AbstractArray{T,1}) where T = d.β*_logpdf(d.d, x) length(d::ThermalCM) = length(d.d) insupport(d::ThermalCM, x::AbstractArray{T,1}) where T = insupport(d.d, x) mutable struct ThermalDM{T<:Real, D<:DiscreteMultivariateDistribution} <: DiscreteMultivariateDistribution d::D β::T end _logpdf(d::ThermalDM, x::AbstractArray{T,1}) where T = d.β*_logpdf(d.d, x) length(d::ThermalDM) = length(d.d) insupport(d::ThermalDM, x::AbstractArray{T,1}) where T = insupport(d.d, x) end #################### Examples beta = 0.2 println("-"^70) @show methods(ThermalCU) println() @show dc = ThermalCU(Normal(), beta) @show logpdf(dc.d, 1.0), logpdf(dc, 1.0)/beta println("-"^70) @show methods(ThermalDU) println() @show dd = ThermalDU(Poisson(), beta) @show logpdf(dd.d, 5), logpdf(dd, 5)/beta println("-"^70) @show methods(ThermalCM) println() @show dcm = ThermalCM(MvNormal([0.0, 0.0], [1.0, 1.0]), beta) @show _logpdf(dcm.d, [1.0, 2.0]), _logpdf(dcm, [1.0, 2.0])/beta println("-"^70) @show methods(ThermalDM) println() @show ddm = ThermalDM(Multinomial(10, [0.1, 0.3, 0.6]), beta) @show _logpdf(ddm.d, [1,3,6]), _logpdf(ddm, [1,3,6])/beta ; ##### NormalGamma prior @everywhere struct NormalGamma{S<:Real} <: ContinuousMultivariateDistribution μ₀::S λ₀::S α::S θ::S end @everywhere begin import Distributions: logpdf, maximum, minimum import Distributions: _logpdf, length, insupport import Distributions: pdf, rand, _rand! function pdf(d::NormalGamma{S}, x::AbstractArray{S,1}) where S<:Real μ₀ = d.μ₀ λ₀ = d.λ₀ α = d.α θ = d.θ μ = x[1] λ = x[2] sqrt(λ₀/(2π)) / (gamma(α) * θ^α) * λ^(α-1/2) * exp( -(1/2)*λ*(λ₀*(μ - μ₀)^2 + 2/θ) ) end function _logpdf(d::NormalGamma{S}, x::AbstractArray{S,1}) where S<:Real μ₀ = d.μ₀ λ₀ = d.λ₀ α = d.α θ = d.θ μ = x[1] λ = x[2] (1/2)*(log(λ₀) - log(2π)) - (lgamma(α) + α*log(θ)) + (α-1/2)log(λ) - (1/2)*λ*(λ₀*(μ-μ₀)^2 + 2/θ) end length(d::NormalGamma) = 2 insupport(d::NormalGamma{S}, x::AbstractArray{S,1}) where S<:Real = (x[2] > zero(S)) function _rand!(d::NormalGamma{S}, x::AbstractArray{S,1}) where S<:Real x[2] = rand(Gamma(d.α, d.θ)) x[1] = rand(Normal(d.μ₀, 1/sqrt(x[2]*d.λ₀))) return x end function _rand!(d::NormalGamma{S}, x::AbstractArray{S,2}) where S<:Real for i in 1:size(x,2) x[:,i] = rand(d) end return x end end ##### predictive distribution for NormalGamma prior d @everywhere begin ##### LocationScaled t-distribution GTDist(μ::S, ρ::S, ν::S) where S<:Real = LocationScale(μ, ρ, TDist(ν)) GTDist(μ, ρ, ν) = GTDist(Float64(μ), Float64(ρ), Float64(ν)) function PredNormal(μ₀, λ₀, α, θ) ρ = sqrt((λ₀+1)/(α*θ*λ₀)) GTDist(μ₀, ρ, 2α) end ##### predictive distribution for NormalGamma prior d PredNormal(d::NormalGamma) = PredNormal(d.μ₀, d.λ₀, d.α, d.θ) end ##### Partition and log-partition functions function Z(d::NormalGamma{S}) where S<:Real μ₀ = d.μ₀ λ₀ = d.λ₀ α = d.α θ = d.θ 1/( sqrt(λ₀/(2π)) / (gamma(α) * θ^α) ) end function logZ(d::NormalGamma{S}) where S<:Real μ₀ = d.μ₀ λ₀ = d.λ₀ α = d.α θ = d.θ -( (1/2)*(log(λ₀) - log(2π)) - (lgamma(α) + α*log(θ)) ) end ################## Tests println("--------- Test of the normal-Gamma conjugate priors of normal distributions\n") μ₀ = 5.0 σ₀ = 2.0 μ_λ = 1.0 σ_λ = 1.0 λ₀ = 1/σ₀^2 αθ = μ_λ αθ² = σ_λ^2 α = (αθ)^2/αθ² θ = αθ/α @show d = NormalGamma(μ₀, λ₀, α, θ) # prior @show pdf(d, [0.0, 1.0]) @show logpdf(d, [0.0, 1.0]), log(pdf(d, [0.0,1.0])) @show logZ(d), log(Z(d)) @show length(d) @show insupport(d, [0.0, -1.0]) @show rand(d,5) @show pd = PredNormal(d) # predictive of the prior d @show logpdf(pd, 1.0) println("\n--------- Comparison between the Monte Carlo and the exact estimates of the predictive distribution") sleep(0.1) L = 10^6 w = rand(d, L) X = rand.(Normal.(w[1,:], 1 ./sqrt.(w[2,:]))) xmin, xmax = -10, 20 x = linspace(xmin, xmax, 201) plt.figure(figsize=(6.4, 4.2)) plt.plt[:hist](X, normed=true, bins=100, range=(xmin,xmax), label="Monte Carlo sim.") # Monte Carlo estimate plt.plot(x, pdf.(pd, x), label="exact predictive") # exact estimate plt.grid(ls=":") plt.legend() plt.title("predictive distributions"); ########## Baysian update of NormalGamma prior d w.r.t sample X and inverse temperature β function BayesianUpdate(d::NormalGamma{S}, X::AbstractArray{S,1}, β::S) where S<:Real μ₀ = d.μ₀ λ₀ = d.λ₀ α = d.α θ = d.θ n = length(X) βn = β*n meanX = mean(X) varX = var(X, corrected=false) μ₀_updated = (λ₀*μ₀ + βn*meanX)/(λ₀ + βn) λ₀_updated = λ₀ + βn α_updated = α + βn/2 θ_updated = θ/(1 + (θ/2)*((βn*λ₀)/(λ₀+βn)*(meanX - μ₀)^2 + βn*varX)) return NormalGamma(μ₀_updated, λ₀_updated, α_updated, θ_updated) end BayesianUpdate(d, X) = BayesianUpdate(d, X, one(eltype(d))) #################### Test println("----------- associativity and commutativity of Bayesian update\n") @show dist_true = Normal(-5.0, 2.0) X1 = rand(dist_true, 100) X2 = rand(dist_true, 200) X = vcat(X1,X2) β = 3.0 prior = NormalGamma(0.0, 1/100^2, 1.0, 1.0) posterior1 = BayesianUpdate(prior, X1, β) posterior2 = BayesianUpdate(prior, X2, β) posterior12 = BayesianUpdate(posterior1, X2, β) posterior21 = BayesianUpdate(posterior2, X1, β) posterior = BayesianUpdate(prior, X, β); predictive = PredNormal(posterior) @show size(X1,1), mean(X1), std(X1) @show size(X2,1), mean(X2), std(X2) @show size(X,1), mean(X), std(X) @show β @show prior @show posterior1 @show posterior2 @show posterior12 @show posterior21 @show posterior @show predictive; ########## WBIC and partition and log-partition functions for posteriors # Exact calculation of partition function # function Z(prior::NormalGamma{S}, X::AbstractArray{S,1}, β::S) where S<:Real βn = β*size(X,1) posterior = BayesianUpdate(prior, X, β) Z(posterior)/((2π)^(βn/2) * Z(prior)) end Z(prior::NormalGamma{S}, X::AbstractArray{S,1}) where S<:Real = Z(prior, X, one(S)) # Monte Carlo simulation w.r.t. prior distribution (β=∞) # function Z_mc(prior, X, β; L = 10^5) w = rand(prior, L) f(w) = prod(pdf.(Normal(w[1],1/sqrt(w[2])), X))^β return mean(f(w[:,l]) for l in 1:L) end # Exact calculation of log-partition function # function logZ(prior::NormalGamma{S}, X::AbstractArray{S,1}, β::S) where S<:Real βn = β*size(X,1) posterior = BayesianUpdate(prior, X, β) return logZ(posterior) - (βn/2)*log(2π) - logZ(prior) end function logZ(prior::NormalGamma{S}, X::AbstractArray{S,1}) where S<:Real return logZ(prior, X, one(S)) end function BayesianFreeEnergy(prior::NormalGamma{S}, X::AbstractArray{S,1}, β::S) where S<:Real return -2/β*logZ(prior, X, β) end function BayesianFreeEnergy(prior::NormalGamma{S}, X::AbstractArray{S,1}) where S<:Real return BayesianFreeEnergy(prior, X, one(S)) end # Monte Carlo simulation w.r.t. prior distribution (β=∞) # function logZ_mc(prior, X, β; L = 10^5) w = rand(prior, L) logf(w) = β*sum(logpdf.(Normal(w[1],1/sqrt(w[2])), X)) loglik = [logf(@view w[:,l]) for l in 1:L] maxloglik = maximum(loglik) return maxloglik .+ log(mean(exp.(loglik .- maxloglik))) end logZ_mc(prior::NormalGamma{S}, X::AbstractArray{S,1}; L=10^5) where S<:Real = logZ_mc(prior, X, one(S), L=L) # Exact formula of the posterior mean of logpdf # function ElogpdfNormal(d::NormalGamma, x) μ₀ = d.μ₀ λ₀ = d.λ₀ α = d.α θ = d.θ θ ≤ zero(θ) && return typemax(θ) return -(1/2)*( log(2π) + α*θ*(x - μ₀)^2 + 1/λ₀ - (digamma(α) + log(θ)) ) end # Exact formula of the posterior mean of logpdf^2 # function ElogpdfNormal2(d::NormalGamma, x) μ₀ = d.μ₀ λ₀ = d.λ₀ α = d.α θ = d.θ θ ≤ zero(θ) && return typemax(θ) return (1/4)*( log(2π)^2 + (α+1)*α*θ^2*(x - μ₀)^4 + 6α*θ/λ₀*(x - μ₀)^2 + 3/λ₀^2 + trigamma(α) + (digamma(α) + log(θ))^2 + 2*log(2π)*( α*θ*(x - μ₀)^2 + 1/λ₀ ) - 2*log(2π)*( digamma(α) + log(θ) ) - 2*( (x - μ₀)^2*(θ + α*θ*(digamma(α) + log(θ))) + 1/λ₀*(digamma(α) + log(θ)) ) ) end # Exact formula of the β-posterior mean of nL_n(w) # function EnLn(prior, X, β) posterior = BayesianUpdate(prior, X, β) return -sum(ElogpdfNormal(posterior, x) for x in X) end # Exact formula of WBIC # WBIC(prior, X, β) = 2*EnLn(prior, X, β/log(length(X))) WBIC(prior, X) = WBIC(prior, X, 1.0) # Estimate of real log canonical threshold by WBIC # function RealLogCanonicalThreshold_WBIC(prior, X; n = length(X), β₁ = 1/log(n), β₂ = 2/log(n)) return (EnLn(prior, X, β₁) - EnLn(prior, X, β₂))/(1/β₁ - 1/β₂) end # improved WBIC # # See pages 363--364 of # http://onlinelibrary.wiley.com/doi/10.1111/rssb.12187/full # function improved_WBIC(prior, X, β; n = length(X), β₁ = 1/log(n), β₂ = 2/log(n)) posterior = BayesianUpdate(prior, X, β) λ_star = RealLogCanonicalThreshold_WBIC(prior, X, β₁=β₁, β₂=β₂) WBIC₀ = WBIC(prior,X,β) WAIC₀ = WAIC(prior,X,β) V = 2*FunctionalVariance(posterior, X) β_star = (β/log(n))*(β*(WBIC₀ - WAIC₀ - (1-β)*V)/(2*λ_star*log(n)) - 1/log(n)) β_star = max(0.0, β_star) return 2*EnLn(prior, X, β_star) end function improved_WBIC(prior, X; n = length(X), β₁ = 1/log(n), β₂ = 2/log(n)) return improved_WBIC(prior, X, 1.0, β₁=β₁, β₂=β₂) end ########### Estimation by Monte Carlo method # Monte Carlo simulation # function ElogpdfNormal_mc(d::NormalGamma, x; L = 10^5) w = rand(d, L) return mean(logpdf(Normal(w[1,l], 1/sqrt(w[2,l])), x) for l in 1:L) end # Monte Carlo simulation # function ElogpdfNormal2_mc(d::NormalGamma, x; L = 10^5) w = rand(d, L) return mean(logpdf(Normal(w[1,l], 1/sqrt(w[2,l])), x)^2 for l in 1:L) end # Monte Carlo simulation at the inverse tenperature β # function EnLn_mc(prior, X, β; L = 10^5) posterior = BayesianUpdate(prior, X, β) predictive = PredNormal(posterior) w = rand(posterior, L) f(w) = sum(logpdf.(Normal(w[1], 1/sqrt(w[2])), X)) return -mean(f(@view w[:,l]) for l in 1:L) end # Monte Carlo simulation at the inverse tenperature β # WBIC_mc(prior, X, β; L = 10^5) = 2*EnLn_mc(prior, X, β/log(length(X)), L=L) WBIC_mc(prior, X; L = 10^5) = 2*EnLn_mc(prior, X, 1.0/log(length(X)), L=L) ########## WAIC, LOOCV, and generalization loss TrainingLoss(dist_pred, X) = -sum(logpdf.(dist_pred, X)) function GeneralizationLoss(dist_true, dist_pred; xmin = -100.0, xmax = 100.0) f(x) = -pdf(dist_true, x) * logpdf(dist_pred, x) return quadgk(f, xmin, xmax)[1] end function ShannonInformation(dist_true; xmin = -100.0, xmax = 100.0) return GeneralizationLoss(dist_true, dist_true, xmin = xmin, xmax = xmax) end function KullbackLeibler(dist_true, dist_pred; xmin = -100.0, xmax = 100.0) return ( GeneralizationLoss(dist_true, dist_pred, xmin = xmin, xmax = xmax) - ShannonInformation(dist_true, xmin = xmin, xmax = xmax) ) end # Exact formula # function FunctionalVariance(posterior::NormalGamma, X) return sum(ElogpdfNormal2(posterior, x) - ElogpdfNormal(posterior, x)^2 for x in X) end # Exact formula of WAIC # function WAIC(prior, X, β) posterior = BayesianUpdate(prior, X, β) predictive = PredNormal(posterior) return 2*( TrainingLoss(predictive, X) + β*FunctionalVariance(posterior, X) ) end WAIC(prior, X) = WAIC(prior, X, 1.0) # Exact formula of LOOCV # function LOOCV(prior, X, β) posterior = BayesianUpdate(prior, X, β) return -2*sum( logZ(posterior, [x], 1-β) - logZ(posterior, [x], -β) for x in X) end LOOCV(prior, X) = LOOCV(prior, X, 1.0) # Monte Carlo simulation w.r.t. posterior # function FunctionalVariance_mc(posterior::NormalGamma, X; L = 10^5) w = rand(posterior, L) return sum(var(logpdf(Normal(w[1,l], 1/sqrt(w[2,l])), x) for l in 1:L) for x in X) end ######### Estimation by Monte Carlo method # Monte Carlo simulation w.r.t. posterior # function WAIC_mc(prior, X, β; L = 10^5) posterior = BayesianUpdate(prior, X, β) predictive = PredNormal(posterior) return 2*( TrainingLoss(predictive, X) + β*FunctionalVariance_mc(posterior, X)) end # Monte Carlo simulation w.r.t. posterior # function VlogpdfNormal_mc(d::NormalGamma, x; L = 10^5) w = rand(d,L) return ( mean(logpdf(Normal(w[1,l], 1/sqrt(w[2,l])), x)^2 for l in 1:L) - mean(logpdf(Normal(w[1,l], 1/sqrt(w[2,l])), x) for l in 1:L)^2 ) end #################### Tests println("----------- sample and estimates\n") dist_true = Normal(-5.0, 2.0) n = 2^10 X = rand(dist_true, n) β = 1.0 prior = NormalGamma(0.0, 1/100^2, 1.0, 1.0) posterior = BayesianUpdate(prior, X, β) predictive = PredNormal(posterior) T_true = 2*TrainingLoss(dist_true, X) T_pred = 2*TrainingLoss(predictive, X) @show dist_true @show n @show mean(X), std(X) @show β @show prior @show posterior @show predictive println("----------- Bayesian free energy and WBIC\n") BFE_Zexact = -2/β*log(Z(prior, X, β)) @time BFE_Zmcsim = -2/β*log(Z_mc(prior, X, β)) BFE_Lexact = BayesianFreeEnergy(prior, X, β) @time BFE_Lmcsim = -2/β*logZ_mc(prior, X, β) WBIC_exact = WBIC(prior, X, β) @time WBIC_mcsim = WBIC_mc(prior, X, β) iWBIC = improved_WBIC(prior, X, β) @show T_true @show T_pred @show BFE_Zexact @show BFE_Zmcsim @show BFE_Lexact @show BFE_Lmcsim @show WBIC_exact @show WBIC_mcsim @show iWBIC println("\n----------- Generalization Loss and WAIC\n") T_true = 2*TrainingLoss(dist_true, X) @time GL = 2n*GeneralizationLoss(dist_true, predictive) @time KL = 2n*KullbackLeibler(dist_true, predictive) T_pred = 2*TrainingLoss(predictive, X) V_exact = 2β*FunctionalVariance(posterior, X) @time V_mcsim = 2β*FunctionalVariance_mc(posterior, X, L=10^5) #@time V_mcsim2 = 2β*sum(VlogpdfNormal_mc(posterior, x, L=10^5) for x in X) WAIC_exact = WAIC(prior, X, β) @time WAIC_mcsim = WAIC_mc(prior, X, β, L=10^5) WAT = WAIC_exact - T_true LOOCV_exact = LOOCV(prior, X, β) LCT = LOOCV_exact - T_true RLCT_WBIC = RealLogCanonicalThreshold_WBIC(prior, X) println() @show dist_true @show n @show mean(X), std(X) @show β @show prior @show posterior @show predictive @show T_true @show T_pred @show V_exact @show V_mcsim #@show V_mcsim2 println() @show GL @show WAIC_exact @show WAIC_mcsim @show LOOCV_exact println() @show KL @show WAT @show LCT println() @show λ = 2/2 @show ν = 2/2 @show β^(-1)*4λ + (1-β^(-1))*4ν @show KL + WAT @show KL + LCT @show RLCT_WBIC ; function AIC_Normal(X) predictive = fit(Normal, X) return -2*sum(logpdf.(predictive, X)) + 4 end function BIC_Normal(X) predictive = fit(Normal, X) return -2*sum(logpdf.(predictive, X)) + 2*log(length(X)) end function AIC_Normal_mean(X; σ = 1.0) μ = mean(X) predictive = Normal(μ, σ) return -2*sum(logpdf.(predictive, X)) + 2 end function BIC_Normal_mean(X; σ = 1.0) μ = mean(X) predictive = Normal(μ, σ) return -2*sum(logpdf.(predictive, X)) + log(length(X)) end function AIC_Normal_sigma(X; μ = 0.0) σ = std(X, corrected=false, mean=μ) predictive = Normal(μ, σ) return -2*sum(logpdf.(predictive, X)) + 2 end function BIC_Normal_sigma(X; μ = 0.0) σ = std(X, corrected=false, mean=μ) predictive = Normal(μ, σ) return -2*sum(logpdf.(predictive, X)) + log(length(X)) end #################### Test @show dist_true = Normal(1.0,2.0) @show n = 2^10 X = rand(dist_true, n) @show mean(X), std(X) println() @show prior = NormalGamma(0.0, 1.0, 1.0, 1.0/1.0) @show WAIC(prior, X) @show WBIC(prior, X) @show AIC_Normal(X) @show BIC_Normal(X) println() @show prior_Normal_mean = NormalGamma(0.0, 1.0, 1e10, 1.0/1e10) @show WAIC(prior_Normal_mean, X) @show WBIC(prior_Normal_mean, X) @show AIC_Normal_mean(X, σ=1.0) @show BIC_Normal_mean(X, σ=1.0) println() @show prior_Normal_sigma = NormalGamma(0.0, 1e10, 1.0, 1.0/1.0) @show WAIC(prior_Normal_sigma, X) @show WBIC(prior_Normal_sigma, X) @show AIC_Normal_sigma(X, μ=0.0) @show BIC_Normal_sigma(X, μ=0.0) ; function plotPosteriorSample(d::NormalGamma; L = 10^4, epsilon = 0.025, titlestr = "Sample") w = rand(d, L) μ = (@view w[1,:]) σ = 1 ./sqrt.(w[2,:]) xmin = quantile(μ, epsilon) xmax = quantile(μ, 1-epsilon) ymin = quantile(σ, epsilon) ymax = quantile(σ, 1-epsilon) f = makefunc_pdfkde(μ, σ) c = f.(μ, σ) plt.scatter(μ, σ, c=c, s=3, cmap="CMRmap") plt.colorbar() plt.xlim(xmin, xmax) plt.ylim(ymin, ymax) plt.xlabel("\$\\mu\$ = mean of normal distribution") plt.ylabel("\$\\sigma\$ = std of normal distribution") plt.grid(ls=":", color="darkgray") #plt.legend() plt.title(titlestr) end function plotP(prior; L = 10^4, epsilon=0.025, titlestr="prior sample") plt.figure(figsize=(5,4.5)) fig1 = plt.subplot(111) fig1[:set_facecolor]("gray") plotPosteriorSample(prior, L=L, titlestr=titlestr, epsilon=epsilon) plt.tight_layout() end function plotP01(prior0, prior1; L = 10^4, epsilon=0.025, titlestr0="prior_0 sample", titlestr1="prior_1 sample") plt.figure(figsize=(10,4.5)) fig1 = plt.subplot(121) fig1[:set_facecolor]("gray") plotPosteriorSample(prior0, L=L, titlestr=titlestr0, epsilon=epsilon) fig2 = plt.subplot(122) fig2[:set_facecolor]("gray") plotPosteriorSample(prior1, L=L, titlestr=titlestr1, epsilon=epsilon) plt.tight_layout() end function plotPP(prior, X, β; L = 10^4, epsilon=0.025) posterior = BayesianUpdate(prior, X, β) plt.figure(figsize=(10,4.5)) fig1 = plt.subplot(121) fig1[:set_facecolor]("gray") plotPosteriorSample(prior, L=L, titlestr="prior sample", epsilon=epsilon) fig2 = plt.subplot(122) fig2[:set_facecolor]("gray") plotPosteriorSample(posterior, L=L, titlestr="posterior sample", epsilon=epsilon) plt.tight_layout() end dist_true = Normal(1.0, 2.0) n = 2^10 prior = NormalGamma(0.0, 1/5^2, 0.7, 1.0/0.7) prior0 = prior μ₀, αθ = 0.0, 1.0 λ₀, α = 1.0, 1.0 prior1 = NormalGamma(μ₀, λ₀, α, αθ/α) β = 1.0 X = rand(dist_true, n) @time plotP(prior) @time plotP01(prior0, prior1) @time plotPP(prior, X, β) function runsim(dist_true, n, prior, β; Nsims=10^4) X = Array{Float64, 1}(undef, n) Samples = Array{Array{Float64, 1}, 1}(undef, Nsims) Ttrues = Array{Float64, 1}(undef, Nsims) GLs = Array{Float64, 1}(undef, Nsims) KLs = Array{Float64, 1}(undef, Nsims) WAICs = Array{Float64, 1}(undef, Nsims) LOOCVs = Array{Float64, 1}(undef, Nsims) WATs = Array{Float64, 1}(undef, Nsims) BFEs = Array{Float64, 1}(undef, Nsims) WBICs = Array{Float64, 1}(undef, Nsims) iWBICs = Array{Float64, 1}(undef, Nsims) AICs = Array{Float64, 1}(undef, Nsims) BICs = Array{Float64, 1}(undef, Nsims) for k in 1:Nsims X = rand(dist_true, n) posterior = BayesianUpdate(prior, X, β) predictive = PredNormal(posterior) Samples[k] = X Ttrues[k] = 2*TrainingLoss(dist_true, X) GLs[k] = 2*GeneralizationLoss(dist_true, predictive) KLs[k] = 2n*KullbackLeibler(dist_true, predictive) WAICs[k] = WAIC(prior, X, β) LOOCVs[k] = LOOCV(prior, X, β) WATs[k] = WAICs[k] - Ttrues[k] BFEs[k] = -2*logZ(prior, X, β) WBICs[k] = WBIC(prior, X, β) iWBICs[k] = improved_WBIC(prior, X, β) AICs[k] = AIC_Normal(X) BICs[k] = BIC_Normal(X) end return Samples, Ttrues, GLs, KLs, WAICs, WATs, LOOCVs, BFEs, WBICs, iWBICs, AICs, BICs end function plotsim(dist_true, n, prior, β; Nsims=10^3) global Samples, Ttrues, GLs, KLs, WAICs, WATs, LOOCVs, BFEs, WBICs, iWBICs, AICs, BICs Samples, Ttrues, GLs, KLs, WAICs, WATs, LOOCVs, BFEs, WBICs, iWBICs, AICs, BICs = runsim(dist_true, n, prior, β, Nsims=Nsims) @show dist_true @show n @show prior @show β @show Nsims @show cor(KLs, WAICs - Ttrues) @show cor(KLs, AICs - Ttrues) @show mean(WAICs-Ttrues+KLs), std(WAICs-Ttrues+KLs) @show mean(LOOCVs-Ttrues+KLs), std(LOOCVs-Ttrues+KLs) @show mean(AICs-Ttrues+KLs), std(AICs-Ttrues+KLs) @show mean(WAICs-Ttrues-KLs), std(WAICs-Ttrues-KLs) @show mean(LOOCVs-Ttrues-KLs), std(LOOCVs-Ttrues-KLs) @show mean(AICs-Ttrues-KLs), std(WAICs-Ttrues-KLs) @show mean(LOOCVs-WAICs), std(LOOCVs-WAICs) @show mean(AICs-WAICs), std(LOOCVs-WAICs) @show mean(WAICs-Ttrues), std(WAICs-Ttrues) @show mean(LOOCVs-Ttrues), std(LOOCVs-Ttrues) @show mean(AICs-Ttrues), std(WAICs-Ttrues) @show mean(WBICs - BFEs), std(WBICs - BFEs) @show mean(iWBICs - BFEs), std(iWBICs - BFEs) @show mean(BICs - BFEs), std(BICs - BFEs) @show mean(BICs - WBICs), std(BICs - WBICs) @show mean(iWBICs - WBICs), std(iWBICs - WBICs) sleep(0.1) plt.figure(figsize=(10,3.5)) plt.subplot(121) plt.plt[:hist](KLs, normed=true, bins=30) plt.grid(ls=":") plt.title("Kullback-Leibler") plt.subplot(122) plt.plt[:hist](WATs, normed=true, bins=30) plt.grid(ls=":") plt.title("WAIC \$-\$ T_true") plt.tight_layout() plt.figure(figsize=(10,5)) plt.subplot(121) plt.scatter(BFEs - Ttrues, WBICs - Ttrues, s=5) x = [minimum(BFEs - Ttrues), maximum(BFEs - Ttrues)] plt.plot(x, x, color="k", ls=":", label="y = x") plt.grid(ls=":") plt.xlabel("Free Energy \$-\$ T_true") plt.ylabel("WBIC \$-\$ T_true") plt.legend(fontsize=9) plt.title("n = $n, Nsims = $Nsims") plt.subplot(122) plt.scatter(BFEs - Ttrues, WBICs - BFEs, s=5) x = [minimum(BFEs - Ttrues), maximum(BFEs - Ttrues)] plt.plot(x, zero(x), color="k", ls=":", label="y = 0") plt.grid(ls=":") plt.xlabel("Free Energy \$-\$ T_true") plt.ylabel("WBIC \$-\$ Free Energy") plt.legend(fontsize=9) plt.title("n = $n, Nsims = $Nsims") plt.tight_layout() plt.figure(figsize=(10,5)) plt.subplot(121) plt.scatter(BFEs - Ttrues, iWBICs - Ttrues, s=5) x = [minimum(BFEs - Ttrues), maximum(BFEs - Ttrues)] plt.plot(x, x, color="k", ls=":", label="y = x") plt.grid(ls=":") plt.xlabel("Free Energy \$-\$ T_true") plt.ylabel("improved WBIC \$-\$ T_true") plt.legend(fontsize=9) plt.title("n = $n, Nsims = $Nsims") plt.subplot(122) plt.scatter(BFEs - Ttrues, iWBICs - BFEs, s=5) x = [minimum(BFEs - Ttrues), maximum(BFEs - Ttrues)] plt.plot(x, zero(x), color="k", ls=":", label="y = 0") plt.grid(ls=":") plt.xlabel("Free Energy \$-\$ T_true") plt.ylabel("improved WBIC \$-\$ Free Energy") plt.legend(fontsize=9) plt.title("n = $n, Nsims = $Nsims") plt.tight_layout() end ##################### Test dist_true = Normal(1.0, 2.0) n = 2^10 μ₀, αθ = 0.0, 1.0 λ₀, α = 1.0, 1.0 prior = NormalGamma(μ₀, λ₀, α, αθ/α) β = 1.0 X = rand(dist_true, n) @time plotP(prior) @time plotsim(dist_true, n, prior, β, Nsims=10^3) function plotWBIC(prior, X) n = length(X) BFE_exact = -2*logZ(prior, X) BFE_mcsim = -2*logZ_mc(prior, X) WBIC_exact = WBIC(prior, X) WBIC_mcsim = WBIC_mc(prior, X) iWBIC = improved_WBIC(prior, X) f(β) = 2*EnLn(prior, X, β) @show BFE_quadgk, error = quadgk(f, 0, 1) println() @show BFE_exact @show BFE_quadgk @show BFE_mcsim @show WBIC_exact @show WBIC_mcsim @show iWBIC sleep(0.1) β = linspace(0.02,1,101) plt.figure(figsize=(6.4, 4.2)) plt.plot(β, f.(β), color="blue", label="\$2E^\\beta_w[nL_n(w)]\$") plt.axhline(BFE_exact, color="blue", ls="--", label="Free Energy (exact)") plt.axhline(BFE_mcsim, color="cyan", ls=":", label="Free Energy (MC at \$\\beta=\\infty\$)") plt.axhline(WBIC_exact, color="red", ls="-.", label="exact WBIC") #plt.axhline(WBIC_mcsim, color="magenta", ls="-.", label="MC WBIC") plt.axhline(iWBIC, color="orange", ls="--", label="improved WBIC") plt.axvline(1/log(n), color="red", ls=":", label="1/log(n)") plt.grid(ls=":") plt.xlabel("\$\\beta\$") plt.title("Notmal distribution model (n = $n)") plt.legend(fontsize=9) end #################### Test dist_true = Normal(10.0, 1.0) n = 2^10 μ₀, αθ = 0.0, 1.0 λ₀, α = 1.0, 1.0 prior = NormalGamma(μ₀, λ₀, α, αθ/α) β = 1.0 X = rand(dist_true, n) plotPP(prior, X, β) @time plotWBIC(prior, X) @time plotsim(dist_true, n, prior, β) function makefunc_EnLn_mc(prior, X, β₀; L = 10^5) posterior = BayesianUpdate(prior, X, β₀) w = rand(posterior, L) loglik = Array{Float64,1}(undef, L) for l in 1:L loglik[l] = sum(logpdf.(Normal(w[1,l], 1/sqrt(w[2,l])), X)) end normalized_loglik = loglik .- maximum(loglik) function E(β) numerator = -mean(loglik .* exp.((β-β₀).*normalized_loglik)) denominator = mean(exp.((β-β₀).*normalized_loglik)) return numerator/denominator end return E end function BaysianFreeEnergy_mc(prior, X, β; β₀ = 1/log(length(X)), L = 10^5) E = makefunc_EnLn_mc(prior, X, β₀, L = L) return 2/β*quadgk(E, 0.0, β)[1] end function BaysianFreeEnergy_mc_localint(prior, X, β; β₀ = 1/log(length(X)), L = 10^5, a = 0.5) E = makefunc_EnLn_mc(prior, X, β₀, L = L) return 2/(β₀/a-β₀*a)*quadgk(E, β₀*a, min(β₀/a, β))[1] end function plotBFE(X, β, β₀, prior; a = 0.5) n = length(X) BFE_exact = BayesianFreeEnergy(prior, X, β) WBIC_exact = WBIC(prior, X) iWBIC = improved_WBIC(prior, X) E = makefunc_EnLn_mc(prior, X, β₀) intE, error = quadgk(E, 0.0, 1.0) BFE_mcsim0 = 2/β*intE BFE_mcsim = BaysianFreeEnergy_mc(prior, X, β, β₀=β₀) #E_invlogn = makefunc_EnLn_mc(prior, X, 1/log(n)) #localintE, error_localint = quadgk(E_invlogn, a/log(n), 1/a/log(n)) #BFE_mcsim_localint0 = 2/(1/a/log(n) - a/log(n))*localintE #BFE_mcsim_localint = BaysianFreeEnergy_mc_localint(prior, X, β, a=a) @show β₀ @show BFE_exact @show BFE_mcsim0 @show BFE_mcsim #@show BFE_mcsim_localint0 #@show BFE_mcsim_localint @show WBIC_exact #@show 2*E(1/log(n)) @show iWBIC sleep(0.1) x = linspace(0.02, 1, 100) f(x) = 2*EnLn(prior, X, x) g(x) = 2*E(x) plt.figure(figsize=(6.4, 4.2)) plt.plot(x, f.(x), label="exact \$2E[nL_n]\$", color="blue") plt.plot(x, g.(x), label="MC \$2E[nL_n]\$", color="cyan", ls="--") plt.axvline(β₀, label="x = β₀", color="black", ls=":") plt.axvline(1/log(n), label="x = 1/log(n)", color="red", ls=":") plt.axhline(BFE_exact, color="blue", ls="--", label="exact BFE") plt.axhline(BFE_mcsim0, color="cyan", ls=":", label="MC BFE_1_0") plt.axhline(BFE_mcsim, color="steelblue", ls=":", label="MC BFE_1_1") #plt.axhline(BFE_mcsim_localint0, color="orange", ls=":", label="MC BFE_2_0") #plt.axhline(BFE_mcsim_localint, color="gold", ls=":", label="MC BFE_2_1") plt.axhline(WBIC_exact, color="red", ls=":", label="exact WBIC") plt.axhline(iWBIC, color="orange", ls="--", label="improved WBIC") plt.grid(ls=":") plt.legend(fontsize=9) end #################### Test #dist_true = Normal(5.0, 1.0) dist_true = Normal(10.0, 1.0) n = 2^10 X = rand(dist_true, n) β = 1.0 μ₀, αθ = 0.0, 1.0 λ₀, α = 1.0, 1.0 prior = NormalGamma(μ₀, λ₀, α, αθ/α) β₀ = 0.0 @time plotPP(prior, X, β) @time plotBFE(X, β, β₀, prior) @time plotsim(dist_true, n, prior, β) dist_true = Normal(0.0, 1.0) n = 2^10 X = rand(dist_true, n) β = 1.0 μ₀, αθ = 0.0, 1.0 λ₀, α = 1.0, 1.0 prior = NormalGamma(μ₀, λ₀, α, αθ/α) β₀ = 0.0 @time plotPP(prior, X, β) @time plotBFE(X, β, β₀, prior) @time plotsim(dist_true, n, prior, β) dist_true = Normal(0.0, 10.0) n = 2^10 X = rand(dist_true, n) β = 1.0 μ₀, αθ = 0.0, 1.0 λ₀, α = 1.0, 1.0 prior = NormalGamma(μ₀, λ₀, α, αθ/α) β₀ = 0.0 @time plotPP(prior, X, β) @time plotBFE(X, β, β₀, prior) @time plotsim(dist_true, n, prior, β) dist_true = Normal(10.0, 1.0) n = 2^10 X = rand(dist_true, n) β = 1.0 μ₀, αθ = 0.0, 1.0 λ₀, α = 1.0, 1.0 prior = NormalGamma(μ₀, λ₀, α, αθ/α) β₀ = 0.0 @time plotPP(prior, X, β) @time plotBFE(X, β, β₀, prior) @time plotsim(dist_true, n, prior, β) dist_true = Normal(10.0, 1.0) n = 2^10 X = rand(dist_true, n) β = 1.0 μ₀, αθ = 0.0, 1.0 λ₀, α = 1.0, 1.0 prior = NormalGamma(μ₀, λ₀, α, αθ/α) β₀ = 1/log(n) @time plotPP(prior, X, β) @time plotBFE(X, β, β₀, prior) @time plotsim(dist_true, n, prior, β) dist_true = Normal(10.0, 1.0) n = 2^10 X = rand(dist_true, n) β = 1.0 @show mean(X), std(X) μ₀, αθ = 0.0, 1.0 λ₀, α = 0.1, 0.5 prior = NormalGamma(μ₀, λ₀, α, αθ/α) #β₀ = 1/log(n)/10 β₀ = 0.0 @time plotPP(prior, X, β, epsilon=0.1) @time plotBFE(X, β, β₀, prior) @time plotsim(dist_true, n, prior, β) @time plotBFE(X, β, β₀, prior) @time plotsim(dist_true, n, prior, β) dist_true = Normal(10.0, 10.0) n = 2^10 X = rand(dist_true, n) β = 1.0 μ₀, αθ = 0.0, 1.0 λ₀, α = 1.0, 1.0 prior = NormalGamma(μ₀, λ₀, α, αθ/α) #β₀ = 1/log(n)/10 β₀ = 0.0 @time plotPP(prior, X, β) @time plotBFE(X, β, β₀, prior) @time plotsim(dist_true, n, prior, β) dist_true = Normal(10.0, 10.0) n = 2^10 X = rand(dist_true, n) β = 1.0 μ₀, αθ = 0.0, 1.0 λ₀, α = 1.0/100, 1.0 prior = NormalGamma(μ₀, λ₀, α, αθ/α) #β₀ = 1/log(n)/10 β₀ = 0.0 @time plotPP(prior, X, β) @time plotBFE(X, β, β₀, prior) @time plotsim(dist_true, n, prior, β) dist_true = Normal(5.0, 1.0) n = 2^6 X = rand(dist_true, n) β = 1.0 μ₀, αθ = 0.0, 1.0 λ₀, α = 0.1, 1.0 prior = NormalGamma(μ₀, λ₀, α, αθ/α) #β₀ = 1/log(n)/10 β₀ = 0.0 @time plotPP(prior, X, β) @time plotBFE(X, β, β₀, prior) @time plotsim(dist_true, n, prior, β) dist_true = Normal(5.0, 1.0) n = 2^6 X = rand(dist_true, n) β = 1.0 μ₀, αθ = 0.0, 1.0 λ₀, α = 0.1, 0.5 prior = NormalGamma(μ₀, λ₀, α, αθ/α) #β₀ = 1/log(n)/10 β₀ = 0.0 @time plotPP(prior, X, β) @time plotBFE(X, β, β₀, prior) @time plotsim(dist_true, n, prior, β) dist_true = Normal(5.0, 1.0) n = 2^10 X = rand(dist_true, n) β = 1.0 μ₀, αθ = 0.0, 1.0 λ₀, α = 0.0001, 0.5 prior = NormalGamma(μ₀, λ₀, α, αθ/α) β₀ = 0.0 @time plotPP(prior, X, β) @time plotBFE(X, β, β₀, prior) @time plotsim(dist_true, n, prior, β) dist_true = Normal(0.0, 1.0) n = 2^10 X = rand(dist_true, n) β = 1.0 μ₀, αθ = 0.0, 1.0 λ₀, α = 0.0001, 0.5 prior = NormalGamma(μ₀, λ₀, α, αθ/α) β₀ = 0.0 @time plotPP(prior, X, β) @time plotBFE(X, β, β₀, prior) @time plotsim(dist_true, n, prior, β) dist_true = Normal(0.0, 1.0) n = 2^5 X = rand(dist_true, n) β = 1.0 μ₀, αθ = 0.0, 1.0 λ₀, α = 0.0001, 0.5 prior = NormalGamma(μ₀, λ₀, α, αθ/α) β₀ = 0.0 #β₀ = 1/log(n) @time plotPP(prior, X, β) @time plotBFE(X, β, β₀, prior) @time plotsim(dist_true, n, prior, β) function simcomparison(dist_true, n, prior0, prior1; β₀=1.0, β₁=1.0, Nsims = 10^3) X = Array{Float64, 1}(undef, n) Samples = Array{Array{Float64, 1}, 1}(undef, Nsims) Ttrues = Array{Float64, 1}(undef, Nsims) KL0s = Array{Float64, 1}(undef, Nsims) KL1s = Array{Float64, 1}(undef, Nsims) WAIC0s = Array{Float64, 1}(undef, Nsims) WAIC1s = Array{Float64, 1}(undef, Nsims) LOOCV0s = Array{Float64, 1}(undef, Nsims) LOOCV1s = Array{Float64, 1}(undef, Nsims) BFE0s = Array{Float64, 1}(undef, Nsims) BFE1s = Array{Float64, 1}(undef, Nsims) WBIC0s = Array{Float64, 1}(undef, Nsims) WBIC1s = Array{Float64, 1}(undef, Nsims) iWBIC0s = Array{Float64, 1}(undef, Nsims) iWBIC1s = Array{Float64, 1}(undef, Nsims) AICs = Array{Float64, 1}(undef, Nsims) AICms = Array{Float64, 1}(undef, Nsims) AICss = Array{Float64, 1}(undef, Nsims) BICs = Array{Float64, 1}(undef, Nsims) BICms = Array{Float64, 1}(undef, Nsims) BICss = Array{Float64, 1}(undef, Nsims) for k in 1:Nsims X = rand(dist_true, n) Samples[k] = X Ttrues[k] = 2*TrainingLoss(dist_true, X) posterior0 = BayesianUpdate(prior0, X, β₀) posterior1 = BayesianUpdate(prior1, X, β₁) predictive0 = PredNormal(posterior0) predictive1 = PredNormal(posterior1) KL0s[k] = 2n*KullbackLeibler(dist_true, predictive0) KL1s[k] = 2n*KullbackLeibler(dist_true, predictive1) WAIC0s[k] = WAIC(prior0, X, β₀) - Ttrues[k] WAIC1s[k] = WAIC(prior1, X, β₁) - Ttrues[k] LOOCV0s[k] = LOOCV(prior0, X, β₀) - Ttrues[k] LOOCV1s[k] = LOOCV(prior1, X, β₁) - Ttrues[k] BFE0s[k] = BayesianFreeEnergy(prior0, X, β₀) - Ttrues[k] BFE1s[k] = BayesianFreeEnergy(prior1, X, β₁) - Ttrues[k] WBIC0s[k] = WBIC(prior0, X, β₀) - Ttrues[k] WBIC1s[k] = WBIC(prior1, X, β₁) - Ttrues[k] iWBIC0s[k] = improved_WBIC(prior0, X, β₀) - Ttrues[k] iWBIC1s[k] = improved_WBIC(prior1, X, β₁) - Ttrues[k] AICs[k] = AIC_Normal(X) - Ttrues[k] AICms[k] = AIC_Normal_mean(X) - Ttrues[k] AICss[k] = AIC_Normal_sigma(X) - Ttrues[k] BICs[k] = BIC_Normal(X) - Ttrues[k] BICms[k] = BIC_Normal_mean(X) - Ttrues[k] BICss[k] = BIC_Normal_sigma(X) - Ttrues[k] end return Samples, Ttrues, KL0s, KL1s, WAIC0s, WAIC1s, LOOCV0s, LOOCV1s, BFE0s, BFE1s, WBIC0s, WBIC1s, iWBIC0s, iWBIC1s, AICs, AICms, AICss, BICs, BICms, BICss end function show2x2(criterionA::String, criterionB::String; prior0="prior_0", prior1="prior_1", a0s="0s", a1s="1s", b0s="0s", b1s="1s") A0 = eval(Symbol(criterionA, a0s)) A1 = eval(Symbol(criterionA, a1s)) B0 = eval(Symbol(criterionB, b0s)) B1 = eval(Symbol(criterionB, b1s)) count00 = count((A0 .< A1) .& (B0 .< B1)) count01 = count((A0 .< A1) .& (B0 .≥ B1)) count10 = count((A0 .> A1) .& (B0 .≤ B1)) count11 = count((A0 .> A1) .& (B0 .> B1)) println("="^60) @printf("%11s %11s|%-11s selects |\n", "", "", criterionB) @printf("%11s %11s|-----------+-----------+\n", "", "") @printf("%11s %11s|%11s|%11s|\n", "", "", prior0, prior1) println("-----------+-----------+-----------+-----------+----------") @printf("%-11s|%11s| %5d | %5d | %5d\n", criterionA, prior0, count00, count01, count00+count01) @printf( "%11s|%11s| %5d | %5d | %5d\n", " selects", prior1, count10, count11, count10+count11) println("-----------+-----------+-----------+-----------+----------") @printf("%11s %11s| %5d | %5d | %5d\n", "", "", count00+count10, count01+count11, length(A0)) println("="^60) end function plotcomparisonby(criterionA::String, criterionB::String, fig1, fig2; prior0="prior_0", prior1="prior_1", a0s="0s", a1s="1s", b0s="0s", b1s="1s") A0 = eval(Symbol(criterionA, a0s)) A1 = eval(Symbol(criterionA, a1s)) B0 = eval(Symbol(criterionB, b0s)) B1 = eval(Symbol(criterionB, b1s)) A0_A0B0 = A0[(A0 .< A1) .& (B0 .< B1)] A0_A0B1 = A0[(A0 .< A1) .& (B0 .> B1)] A0_A1B0 = A0[(A0 .> A1) .& (B0 .< B1)] A0_A1B1 = A0[(A0 .> A1) .& (B0 .> B1)] A1_A0B0 = A1[(A0 .< A1) .& (B0 .< B1)] A1_A0B1 = A1[(A0 .< A1) .& (B0 .> B1)] A1_A1B0 = A1[(A0 .> A1) .& (B0 .< B1)] A1_A1B1 = A1[(A0 .> A1) .& (B0 .> B1)] B0_A0B0 = B0[(A0 .< A1) .& (B0 .< B1)] B0_A0B1 = B0[(A0 .< A1) .& (B0 .> B1)] B0_A1B0 = B0[(A0 .> A1) .& (B0 .< B1)] B0_A1B1 = B0[(A0 .> A1) .& (B0 .> B1)] B1_A0B0 = B1[(A0 .< A1) .& (B0 .< B1)] B1_A0B1 = B1[(A0 .< A1) .& (B0 .> B1)] B1_A1B0 = B1[(A0 .> A1) .& (B0 .< B1)] B1_A1B1 = B1[(A0 .> A1) .& (B0 .> B1)] fig1[:set_facecolor]("darkgray") fig1[:scatter](A0_A0B0, A1_A0B0, s=3, color="red", alpha=0.5) fig1[:scatter](A0_A0B1, A1_A0B1, s=3, color="yellow", alpha=0.5, marker="x") fig1[:scatter](A0_A1B0, A1_A1B0, s=3, color="cyan", alpha=0.5, marker="x") fig1[:scatter](A0_A1B1, A1_A1B1, s=3, color="blue", alpha=0.5) fig1[:set_xlabel]("$criterionA of $prior0") fig1[:set_ylabel]("$criterionA of $prior1") fig1[:set_title]("$criterionA (n = $n, Nsims = $Nsims)") x = [minimum(A0), maximum(A0)] fig1[:plot](x, x, ls="--", color="white", lw=1) fig1[:grid](ls=":", color="black", lw=0.3) fig2[:set_facecolor]("darkgray") fig2[:scatter](B0_A0B0, B1_A0B0, s=3, color="red", alpha=0.5) fig2[:scatter](B0_A0B1, B1_A0B1, s=3, color="yellow", alpha=0.5, marker="x") fig2[:scatter](B0_A1B0, B1_A1B0, s=3, color="cyan", alpha=0.5, marker="x") fig2[:scatter](B0_A1B1, B1_A1B1, s=3, color="blue", alpha=0.5) fig2[:set_xlabel]("$criterionB of $prior0") fig2[:set_ylabel]("$criterionB of $prior1") fig2[:set_title]("$criterionB (n = $n, Nsims = $Nsims)") x = [minimum(B0), maximum(B0)] fig2[:plot](x, x, ls="--", color="white", lw=1) fig2[:grid](ls=":", color="black", lw=0.3) end function plotcomparisonby(criterionA::String, criterionB::String; prior0="prior_0", prior1="prior_1", a0s="0s", a1s="1s", b0s="0s", b1s="1s") plt.figure(figsize=(10,5)) fig1=plt.subplot(121) fig2=plt.subplot(122) plotcomparisonby(criterionA, criterionB, fig1, fig2, prior0=prior0, prior1=prior1, a0s=a0s, a1s=a1s, b0s=b0s, b1s=b1s) plt.tight_layout() end function show_everything(dist_true, n, prior0, prior1; Nsims=10^3) global Samples, Ttrues, KL0s, KL1s, WAIC0s, WAIC1s, LOOCV0s, LOOCV1s, BFE0s, BFE1s, WBIC0s, WBIC1s, iWBIC0s, iWBIC1s, AICs, AICms, AICss, BICs, BICms, BICss Samples, Ttrues, KL0s, KL1s, WAIC0s, WAIC1s, LOOCV0s, LOOCV1s, BFE0s, BFE1s, WBIC0s, WBIC1s, iWBIC0s, iWBIC1s, AICs, AICms, AICss, BICs, BICms, BICss = simcomparison(dist_true, n, prior0, prior1, Nsims=Nsims) @show count(KL0s .< KL1s)/Nsims @show count(WAIC0s .< WAIC1s)/Nsims @show count(LOOCV0s .< LOOCV1s)/Nsims @show count(AICms .< AICs)/Nsims @show count(AICss .< AICs)/Nsims println() @show count(BFE0s .< BFE1s)/Nsims @show count(WBIC0s .< WBIC1s)/Nsims @show count(iWBIC0s .< iWBIC1s)/Nsims @show count(BICms .< BICs)/Nsims @show count(BICss .< BICs)/Nsims println() @show cor(KL1s-KL0s, WAIC1s-WAIC0s) @show cor(KL1s-KL0s, LOOCV1s-LOOCV0s) @show cor(WAIC1s-WAIC0s, LOOCV1s-LOOCV0s) @show mean(KL1s-KL0s), std(KL1s-KL0s) @show mean(WAIC1s-WAIC0s), std(WAIC1s-WAIC0s) @show mean(LOOCV1s-LOOCV0s), std(LOOCV1s-LOOCV0s) println() @show cor(BFE1s-BFE0s, WBIC1s-WBIC0s) @show cor(BFE1s-BFE0s, iWBIC1s-iWBIC0s) @show cor(WBIC1s-WBIC0s, iWBIC1s-iWBIC0s) @show cor(iWBIC1s-iWBIC0s, iWBIC1s-iWBIC0s) @show mean(BFE1s-BFE0s), std(BFE1s-BFE0s) @show mean(WBIC1s-WBIC0s), std(WBIC1s-WBIC0s) @show mean(iWBIC1s-iWBIC0s), std(iWBIC1s-iWBIC0s) println() show2x2("KL", "WAIC") show2x2("KL", "LOOCV") show2x2("WAIC", "LOOCV") show2x2("BFE", "WBIC") show2x2("BFE", "iWBIC") show2x2("WBIC", "iWBIC") show2x2("KL", "BFE") show2x2("KL", "AIC", prior0="Normal_mean", prior1="Normal", b0s="ms", b1s="s") show2x2("BFE", "BIC", prior0="Normal_mean", prior1="Normal", b0s="ms", b1s="s") sleep(0.1) plotP01(prior0, prior1) plotcomparisonby("KL", "WAIC") plotcomparisonby("KL", "LOOCV") plotcomparisonby("WAIC", "LOOCV") plotcomparisonby("BFE", "WBIC") plotcomparisonby("BFE", "iWBIC") plotcomparisonby("WBIC", "iWBIC") plotcomparisonby("KL", "BFE") plotcomparisonby("KL", "AIC", prior0="Normal_mean", prior1="Normal", b0s="ms", b1s="s") plotcomparisonby("BFE", "BIC", prior0="Normal_mean", prior1="Normal", b0s="ms", b1s="s") end ########## Test @show Nsims = 1000 @show μ₀, μ_λ = 0.5, 1.0 @show dist_true = Normal(μ₀, 1/sqrt(μ_λ)) @show n = 2^5 @show prior0 = NormalGamma(μ₀, 1.0, 1e8, μ_λ/1e8) # Nearly λ=μ_λ @show prior1 = NormalGamma(μ₀, 1.0, 1.0, μ_λ/1.0) println() @time show_everything(dist_true, n, prior0, prior1, Nsims=Nsims); @show Nsims = 10000 @show μ₀, μ_λ = 0.5, 1.0 @show dist_true = Normal(μ₀, 1/sqrt(μ_λ)) @show n = 2^10 @show prior0 = NormalGamma(μ₀, 1.0, 1e8, μ_λ/1e8) # Nearly λ=μ_λ @show prior1 = NormalGamma(μ₀, 1.0, 1.0, μ_λ/1.0) println() @time show_everything(dist_true, n, prior0, prior1, Nsims=Nsims); @show Nsims = 1000 @show μ₀, μ_λ = 0.5, 1.0 @show dist_true = Normal(μ₀, 1/sqrt(μ_λ-0.5)) @show n = 2^5 @show prior0 = NormalGamma(μ₀, 1.0, 1e8, μ_λ/1e8) # Nearly λ=μ_λ @show prior1 = NormalGamma(μ₀, 1.0, 1.0, μ_λ/1.0) println() @time show_everything(dist_true, n, prior0, prior1, Nsims=Nsims); @show Nsims = 1000 @show μ₀, μ_λ = 0.0, 1.5 @show dist_true = Normal(μ₀, 1/sqrt(μ_λ)) @show n = 2^5 @show prior0 = NormalGamma(μ₀, 1e8, 1.0, μ_λ/1.0) # Nearly μ=μ₀ @show prior1 = NormalGamma(μ₀, 1.0, 1.0, μ_λ/1.0) println() @time show_everything(dist_true, n, prior0, prior1, Nsims=Nsims); @show Nsims = 10000 @show μ₀, μ_λ = 0.0, 1.5 @show dist_true = Normal(μ₀, 1/sqrt(μ_λ)) @show n = 2^10 @show prior0 = NormalGamma(μ₀, 1e8, 1.0, μ_λ/1.0) # Nearly μ=μ₀ @show prior1 = NormalGamma(μ₀, 1.0, 1.0, μ_λ/1.0) println() @time show_everything(dist_true, n, prior0, prior1, Nsims=Nsims); @show Nsims = 1000 @show μ₀, μ_λ = 0.0, 1.5 @show dist_true = Normal(μ₀+0.3, 1/sqrt(μ_λ)) @show n = 2^5 @show prior0 = NormalGamma(μ₀, 1e8, 1.0, μ_λ/1.0) # Nearly μ=μ₀ @show prior1 = NormalGamma(μ₀, 1.0, 1.0, μ_λ/1.0) println() @time show_everything(dist_true, n, prior0, prior1, Nsims=Nsims); @show Nsims = 1000 @show μ₀, μ_λ = 0.0, 1.0 @show dist_true = Normal(μ₀, 1/sqrt(μ_λ)) @show n = 2^5 @show prior0 = NormalGamma(μ₀, 1e8, 1e8, μ_λ/1e8) # Nearly μ=μ₀ and λ=μ_λ @show prior1 = NormalGamma(μ₀, 1.0, 1.0, μ_λ/1.0) println() @time show_everything(dist_true, n, prior0, prior1, Nsims=Nsims); @show Nsims = 10000 @show μ₀, μ_λ = 0.0, 1.0 @show dist_true = Normal(μ₀, 1/sqrt(μ_λ)) @show n = 2^10 @show prior0 = NormalGamma(μ₀, 1e8, 1e8, μ_λ/1e8) # Nearly μ=μ₀ and λ=μ_λ @show prior1 = NormalGamma(μ₀, 1.0, 1.0, μ_λ/1.0) println() @time show_everything(dist_true, n, prior0, prior1, Nsims=Nsims); @show Nsims = 1000 @show μ₀, μ_λ = 0.0, 1.0 @show dist_true = Normal(μ₀+0.2, 1/sqrt(μ_λ-0.4)) @show n = 2^5 @show prior0 = NormalGamma(μ₀, 1e16, 1e16, μ_λ/1e16) # Nearly μ=μ₀ and λ=μ_λ @show prior1 = NormalGamma(μ₀, 1.0, 1.0, μ_λ/1.0) println() @time show_everything(dist_true, n, prior0, prior1, Nsims=Nsims);