ENV["LINES"]=100
using Distributions
using PyPlot
using QuadGK
using KernelDensity
?Gamma
search: Gamma gamma lgamma digamma trigamma polygamma invdigamma eulergamma
Gamma(α,θ)
The Gamma distribution with shape parameter α
and scale θ
has probability density function
Gamma() # Gamma distribution with unit shape and unit scale, i.e. Gamma(1, 1)
Gamma(α) # Gamma distribution with shape α and unit scale, i.e. Gamma(α, 1)
Gamma(α, θ) # Gamma distribution with shape α and scale θ
params(d) # Get the parameters, i.e. (α, θ)
shape(d) # Get the shape parameter, i.e. α
scale(d) # Get the scale parameter, i.e. θ
External links
# true distribution
@show mu_true = 5.0
@show sigma_true = 2.0
@show theta_true = sigma_true^2/mu_true
@show alpha_true = mu_true/theta_true
@show dist_true = Gamma(alpha_true, theta_true)
sleep(0.1)
figure(figsize=(5,3.5))
x = linspace(0,16,201)
plot(x, pdf.(dist_true,x), label="true distribution")
grid(ls=":")
legend()
mu_true = 5.0 = 5.0 sigma_true = 2.0 = 2.0 theta_true = sigma_true ^ 2 / mu_true = 0.8 alpha_true = mu_true / theta_true = 6.25 dist_true = Gamma(alpha_true, theta_true) = Distributions.Gamma{Float64}(α=6.25, θ=0.8)
PyObject <matplotlib.legend.Legend object at 0x0000000029A95828>
?Normal
search: Normal normalize normalize! NormalCanon normalize_string
Normal(μ,σ)
The Normal distribution with mean μ
and standard deviation σ
has probability density function
Normal() # standard Normal distribution with zero mean and unit variance
Normal(mu) # Normal distribution with mean mu and unit variance
Normal(mu, sig) # Normal distribution with mean mu and variance sig^2
params(d) # Get the parameters, i.e. (mu, sig)
mean(d) # Get the mean, i.e. mu
std(d) # Get the standard deviation, i.e. sig
External links
# true distribution に最も近い正規分布
@show normal_0 = Normal(mu_true, sigma_true)
sleep(0.1)
figure(figsize=(5,3.5))
x = linspace(-2.5,16,201)
plot(x, pdf.(dist_true,x), label="true distribution")
plot(x, pdf.(normal_0,x), label="best normal approx.", ls=":", lw=2)
grid(ls=":")
legend()
normal_0 = Normal(mu_true, sigma_true) = Distributions.Normal{Float64}(μ=5.0, σ=2.0)
PyObject <matplotlib.legend.Legend object at 0x000000004FAC1518>
# true distribution でサンプルを生成
@show n = 2^5
@show sample = rand(dist_true, n)
sleep(0.1)
x = linspace(0,16,201)
figure(figsize=(5,3.5))
ik = InterpKDE(kde(sample))
plot(x, pdf.(ik, x), label="KDE of sample")
scatter(sample, pdf.(ik, sample), s=20, label="sample")
plot(x, pdf.(dist_true,x), label="true distribution")
grid(ls=":")
legend()
n = 2 ^ 5 = 32 sample = rand(dist_true, n) = [4.5641, 6.19197, 5.60249, 4.37372, 1.83065, 3.84634, 5.29138, 4.61106, 4.15472, 1.99385, 3.18426, 6.17617, 3.88658, 3.65506, 5.2163, 3.33684, 2.69788, 2.92942, 5.83301, 4.32333, 4.91496, 7.16279, 7.16302, 7.99822, 2.7417, 4.88835, 1.48054, 2.85098, 4.45253, 6.91663, 3.69474, 7.98897]
PyObject <matplotlib.legend.Legend object at 0x000000004FAC1630>
# サンプルを用いて正規分布でのフィッティング (最尤法と同値)
@show normal_star = fit(Normal, sample)
@show mu_star = normal_star.μ
@show sigma_star = normal_star.σ
@show mean(sample)
@show std(sample, corrected=false)
sleep(0.1)
x = linspace(-2.5,16,201)
figure(figsize=(5,3.5))
ik = InterpKDE(kde(sample))
plot(x, pdf.(ik, x), label="KDE of sample")
scatter(sample, pdf.(ik, sample), s=20, label="sample")
#plot(x, pdf.(dist_true,x), label="true distribution")
#plot(x, pdf.(normal_0,x), label="best normal approx.", ls=":", lw=2)
plot(x, pdf.(normal_star,x), label="normal fitting", ls="--")
grid(ls=":")
legend()
normal_star = fit(Normal, sample) = Distributions.Normal{Float64}(μ=4.561017287848959, σ=1.7132238486951947) mu_star = normal_star.μ = 4.561017287848959 sigma_star = normal_star.σ = 1.7132238486951947 mean(sample) = 4.56101728784896 std(sample, corrected=false) = 1.713223848695195
PyObject <matplotlib.legend.Legend object at 0x00000000520E5978>
# AIC の計算 (注意:正規分布モデルは真の確率分布を含まない)
@show maxloglikelihood_normal = sum(logpdf.(normal_star, sample))
@show d_normal = 2
@show -2*maxloglikelihood_normal, 2*d_normal
@show AIC_normal = -2*maxloglikelihood_normal + 2*d_normal;
maxloglikelihood_normal = sum(logpdf.(normal_star, sample)) = -62.63409345350645 d_normal = 2 = 2 (-2maxloglikelihood_normal, 2d_normal) = (125.2681869070129, 4) AIC_normal = -2maxloglikelihood_normal + 2d_normal = 129.2681869070129
# サンプルを用いてガンマ分布でフィッティング (最尤法と同値)
@show gamma_star = fit(Gamma, sample)
@show alpha_star = gamma_star.α
@show theta_star = gamma_star.θ
sleep(0.1)
x = linspace(-2.5,16,201)
figure(figsize=(5,3.5))
ik = InterpKDE(kde(sample))
plot(x, pdf.(ik, x), label="KDE of sample")
scatter(sample, pdf.(ik, sample), s=20, label="sample")
plot(x, pdf.(dist_true,x), label="true distribution")
#plot(x, pdf.(normal_0,x), label="best normal approx.", ls=":", lw=2)
plot(x, pdf.(normal_star,x), label="normal fitting", ls="--")
plot(x, pdf.(gamma_star,x), label="gamma fitting", ls="-.")
grid(ls=":")
legend()
gamma_star = fit(Gamma, sample) = Distributions.Gamma{Float64}(α=6.513594689286562, θ=0.700230441932599) alpha_star = gamma_star.α = 6.513594689286562 theta_star = gamma_star.θ = 0.700230441932599
PyObject <matplotlib.legend.Legend object at 0x00000000520EDA20>
# AICの計算
@show maxloglikelihood_gamma = sum(logpdf.(gamma_star, sample))
@show d_gamma = 2
@show -2*maxloglikelihood_gamma, 2*d_gamma
@show AIC_gamma = -2*maxloglikelihood_gamma + 2*d_gamma;
maxloglikelihood_gamma = sum(logpdf.(gamma_star, sample)) = -62.28365016788004 d_gamma = 2 = 2 (-2maxloglikelihood_gamma, 2d_gamma) = (124.56730033576008, 4) AIC_gamma = -2maxloglikelihood_gamma + 2d_gamma = 128.56730033576008
# AICの比較
println("AIC_normal = $AIC_normal = $(-2*maxloglikelihood_normal) + $(2*d_normal)")
println("AIC_gamma = $AIC_gamma = $(-2*maxloglikelihood_gamma) + $(2*d_gamma)")
println("AIC_normal - AIC_gamma = $(AIC_normal - AIC_gamma)")
AIC_normal = 129.2681869070129 = 125.2681869070129 + 4 AIC_gamma = 128.56730033576008 = 124.56730033576008 + 4 AIC_normal - AIC_gamma = 0.7008865712528234
?quadgk
search: quadgk QuadGK
quadgk(f, a,b,c...; reltol=sqrt(eps), abstol=0, maxevals=10^7, order=7, norm=vecnorm)
Numerically integrate the function f(x)
from a
to b
, and optionally over additional intervals b
to c
and so on. Keyword options include a relative error tolerance reltol
(defaults to sqrt(eps)
in the precision of the endpoints), an absolute error tolerance abstol
(defaults to 0), a maximum number of function evaluations maxevals
(defaults to 10^7
), and the order
of the integration rule (defaults to 7).
Returns a pair (I,E)
of the estimated integral I
and an estimated upper bound on the absolute error E
. If maxevals
is not exceeded then E <= max(abstol, reltol*norm(I))
will hold. (Note that it is useful to specify a positive abstol
in cases where norm(I)
may be zero.)
The endpoints a
et cetera can also be complex (in which case the integral is performed over straight-line segments in the complex plane). If the endpoints are BigFloat
, then the integration will be performed in BigFloat
precision as well.
!!! note
It is advisable to increase the integration order
in rough proportion to the precision, for smooth integrands.
More generally, the precision is set by the precision of the integration endpoints (promoted to floating-point types).
The integrand f(x)
can return any numeric scalar, vector, or matrix type, or in fact any type supporting +
, -
, multiplication by real values, and a norm
(i.e., any normed vector space). Alternatively, a different norm can be specified by passing a norm
-like function as the norm
keyword argument (which defaults to vecnorm
).
!!! note Only one-dimensional integrals are provided by this function. For multi-dimensional integration (cubature), there are many different algorithms (often much better than simple nested 1d integrals) and the optimal choice tends to be very problem-dependent. See the Julia external-package listing for available algorithms for multidimensional integration or other specialized tasks (such as integrals of highly oscillatory or singular functions).
The algorithm is an adaptive Gauss-Kronrod integration technique: the integral in each interval is estimated using a Kronrod rule (2*order+1
points) and the error is estimated using an embedded Gauss rule (order
points). The interval with the largest error is then subdivided into two intervals and the process is repeated until the desired error tolerance is achieved.
These quadrature rules work best for smooth functions within each interval, so if your function has a known discontinuity or other singularity, it is best to subdivide your interval to put the singularity at an endpoint. For example, if f
has a discontinuity at x=0.7
and you want to integrate from 0 to 1, you should use quadgk(f, 0,0.7,1)
to subdivide the interval at the point of discontinuity. The integrand is never evaluated exactly at the endpoints of the intervals, so it is possible to integrate functions that diverge at the endpoints as long as the singularity is integrable (for example, a log(x)
or 1/sqrt(x)
singularity).
For real-valued endpoints, the starting and/or ending points may be infinite. (A coordinate transformation is performed internally to map the infinite interval to a finite one.)
# 真のモデルのShannon情報量
# quadgk() は QuadGKパッケージの数値積分函数
@show S_true = entropy(dist_true)
f(x) = -pdf(dist_true, x)*logpdf(dist_true, x)
xintmax = 100.0
@time S_true_int, error = quadgk(f, 0, xintmax)
@show S_true_int, error
@show S_true_int - S_true;
S_true = entropy(dist_true) = 2.0565794195364004 1.981603 seconds (1.48 M allocations: 79.577 MiB, 1.99% gc time) (S_true_int, error) = (2.056579419536399, 5.6751887001885866e-9) S_true_int - S_true = -1.3322676295501878e-15
# 汎化損失と Kullback-Leibler 情報量の計算
f(x) = -pdf(dist_true,x)*logpdf(normal_star, x)
@show GL_normal, error = quadgk(f, 0, xintmax)
@show KL_normal = GL_normal - S_true
println()
f(x) = -pdf(dist_true,x)*logpdf(gamma_star, x)
@show GL_gamma, error = quadgk(f, 0, xintmax)
@show KL_gamma = GL_gamma - S_true
(GL_normal, error) = quadgk(f, 0, xintmax) = (2.1715422713744714, 2.1955588512277876e-8) KL_normal = GL_normal - S_true = 0.11496285183807098 (GL_gamma, error) = quadgk(f, 0, xintmax) = (2.0853977048159633, 6.467057431488734e-9) KL_gamma = GL_gamma - S_true = 0.028818285279562872
0.028818285279562872
# AICと汎化損失の比較
@show AIC_normal - 2*n*GL_normal
@show AIC_normal/(2*n) - GL_normal
println()
@show AIC_gamma - 2*n*GL_gamma
@show AIC_gamma/(2*n) - GL_gamma;
AIC_normal - 2 * n * GL_normal = -9.710518460953267 AIC_normal / (2n) - GL_normal = -0.1517268509523948 AIC_gamma - 2 * n * GL_gamma = -4.898152772461572 AIC_gamma / (2n) - GL_gamma = -0.07653363706971206
function AICs_and_GLs(dist_true, a, b, model1, d_model1, model2, d_model2, n, L)
AIC1s = Array{Float64}(L)
GL1s = Array{Float64}(L)
AIC2s = Array{Float64}(L)
GL2s = Array{Float64}(L)
for l in 1:L
sample = rand(dist_true, n)
dist_fit = fit(model1, sample)
AIC1s[l] = -2*sum(logpdf.(dist_fit, sample)) + 2*d_model1
f(x) = -pdf(dist_true, x)*logpdf(dist_fit, x)
GL1s[l] = quadgk(f, a, b)[1]
dist_fit = fit(model2, sample)
AIC2s[l] = -2*sum(logpdf.(dist_fit, sample)) + 2*d_model2
f(x) = -pdf(dist_true, x)*logpdf(dist_fit, x)
GL2s[l] = quadgk(f, a, b)[1]
end
return AIC1s, GL1s, AIC2s, GL2s
end
AICs_and_GLs (generic function with 1 method)
function doall(mu_true, sigma_true, n, L)
bins = Int64(round(0.5*sqrt(L)))
@show theta_true = sigma_true^2/mu_true
@show alpha_true = mu_true/theta_true
@show dist_true = Gamma(alpha_true, theta_true)
@show normal_0 = Normal(mu_true, sigma_true)
println()
@time AICs_normal, GLs_normal, AICs_gamma, GLs_gamma =
AICs_and_GLs(dist_true, 0.0, 1000.0, Normal, 2, Gamma, 2, n, L)
@show mean(AICs_normal/(2n) - GLs_normal)
@show std(AICs_normal/(2n) - GLs_normal)
println()
@show mean(AICs_gamma/(2n) - GLs_gamma)
@show std(AICs_gamma/(2n) - GLs_gamma)
println()
probability_of_failure = count(x -> x<0, AICs_normal - AICs_gamma)/length(AICs_normal - AICs_gamma)
@show probability_of_failure
probability_of_failure_str = @sprintf("%.2f%%", 100*probability_of_failure)
@show probability_of_failure_str
println()
@show S_true = entropy(dist_true)
f(x) = -pdf(dist_true, x)*logpdf(normal_0, x)
@time minGL_normal, error = quadgk(f, 0, 100)
@show minGL_normal
@show minGL_gamma = S_true
@show minKL_normal = minGL_normal - S_true
@show minKL_gamma = minGL_gamma - S_true
sleep(0.1)
#####
figure(figsize=(10,4))
subplot(121)
x = linspace(-2.5,16,201)
plot(x, pdf.(dist_true,x), label="true distribution")
plot(x, pdf.(normal_0,x), label="best normal approx.", ls=":", lw=2)
ylabel("propbability density")
grid(ls=":")
legend()
title("true distribution and its best normal approx.")
subplot(122)
AIC_diff = AICs_normal - AICs_gamma
h = plt[:hist](AIC_diff, normed=true, bins=bins, label="AIC_n - AIC_g")
xlabel("AIC_normal - AIC_gamma")
ylabel("empirical propbability density")
grid(ls=":")
axvline(0, color="pink", ls="--")
xann = 0.9*minimum(AIC_diff)
yann = 0.9*maximum(h[1])
annotate(probability_of_failure_str,
xy=(0.3*xann, 0.3*yann), xytext=(xann, 0.6*yann),
arrowprops=Dict("arrowstyle" => "->"))
legend()
tight_layout()
#####
figure(figsize=(10,4))
subplot(121)
plt[:hist](AICs_normal/(2n) - GLs_normal, normed=true, bins=bins, label="normal model")
xlabel("AIC/(2n) - (generalization loss)")
ylabel("empirical propbability density")
grid(ls=":")
legend()
title("AIC/(2n) - GL (n=$n)")
subplot(122)
plt[:hist](AICs_gamma/(2n) - GLs_gamma, normed=true, bins=bins, label="Gamma model")
xlabel("AIC/(2n) - (generalization loss)")
ylabel("empirical propbability density")
grid(ls=":")
legend()
title("AIC/(2n) - GL (n=$n)")
tight_layout()
#####
figure(figsize=(10,4))
subplot(121)
plt[:hist](AICs_normal/(2*n) - minGL_normal, normed=true, bins=bins, label="normal model")
xlabel("AIC/(2n) - min(generalization loss)")
ylabel("empirical propbability density")
grid(ls=":")
legend()
title("AIC/(2n) - min(GL) (n=$n)")
subplot(122)
plt[:hist](AICs_gamma/(2*n) - minGL_gamma, normed=true, bins=bins, label="Gamma model")
ylabel("empirical propbability density")
grid(ls=":")
legend()
title("AIC/(2n) - min(GL) (n=$n)")
tight_layout()
#####
figure(figsize=(10,4))
subplot(121)
x = linspace(0,12,201)
range = minimum(x)-1e-6, maximum(x)
plt[:hist](2n*(GLs_normal - minGL_normal), normed=true, bins=bins, range=range, label="normal model")
plot(x, pdf.(Chisq(2),x), label="Chisq(2)", ls="--", lw=2)
xlabel("2n((generalization loss) - min(generalization loss))")
ylabel("empirical propbability density")
grid(ls=":")
legend()
title("2n(GL - min(GL)) (n=$n)")
subplot(122)
x = linspace(0,12,201)
range = minimum(x)-1e-6, maximum(x)
plt[:hist](2n*(GLs_gamma - minGL_gamma), normed=true, bins=bins, range=range, label="Gamma model")
plot(x, pdf.(Chisq(2),x), label="Chisq(2)", ls="--", lw=2)
xlabel("2n*((generalization loss) - min(generalization loss))")
ylabel("empirical propbability density")
grid(ls=":")
legend()
title("2n(GL - min(GL)) (n=$n)")
tight_layout()
end
doall (generic function with 1 method)
@show mu_true = 5.0
@show sigma_true = 10.0
@show n = 2^5
@show L = 10^4
println()
doall(mu_true, sigma_true, n, L)
mu_true = 5.0 = 5.0 sigma_true = 10.0 = 10.0 n = 2 ^ 5 = 32 L = 10 ^ 4 = 10000 theta_true = sigma_true ^ 2 / mu_true = 20.0 alpha_true = mu_true / theta_true = 0.25 dist_true = Gamma(alpha_true, theta_true) = Distributions.Gamma{Float64}(α=0.25, θ=20.0) normal_0 = Normal(mu_true, sigma_true) = Distributions.Normal{Float64}(μ=5.0, σ=10.0) 48.157178 seconds (502.33 M allocations: 7.714 GiB, 1.14% gc time) mean(AICs_normal / (2n) - GLs_normal) = -0.48596453211297885 std(AICs_normal / (2n) - GLs_normal) = 1.2786703131192905 mean(AICs_gamma / (2n) - GLs_gamma) = -0.01791324240456336 std(AICs_gamma / (2n) - GLs_gamma) = 0.6059536405285314 probability_of_failure = 0.0 probability_of_failure_str = "0.00%" S_true = entropy(dist_true) = 1.3631646482198698 0.208866 seconds (42.79 k allocations: 1.251 MiB) minGL_normal = 3.687733918081491 minGL_gamma = S_true = 1.3631646482198698 minKL_normal = minGL_normal - S_true = 2.3245692698616214 minKL_gamma = minGL_gamma - S_true = 0.0
@show mu_true = 5.0
@show sigma_true = 5.0
@show n = 2^5
@show L = 10^4
println()
doall(mu_true, sigma_true, n, L)
mu_true = 5.0 = 5.0 sigma_true = 5.0 = 5.0 n = 2 ^ 5 = 32 L = 10 ^ 4 = 10000 theta_true = sigma_true ^ 2 / mu_true = 5.0 alpha_true = mu_true / theta_true = 1.0 dist_true = Gamma(alpha_true, theta_true) = Distributions.Gamma{Float64}(α=1.0, θ=5.0) normal_0 = Normal(mu_true, sigma_true) = Distributions.Normal{Float64}(μ=5.0, σ=5.0) 7.302019 seconds (70.93 M allocations: 1.113 GiB, 1.44% gc time) mean(AICs_normal / (2n) - GLs_normal) = -0.10208257483750932 std(AICs_normal / (2n) - GLs_normal) = 0.3468183354479419 mean(AICs_gamma / (2n) - GLs_gamma) = -0.0049887283061916185 std(AICs_gamma / (2n) - GLs_gamma) = 0.1971873845336475 probability_of_failure = 0.0008 probability_of_failure_str = "0.08%" S_true = entropy(dist_true) = 2.6094379124341005 0.000094 seconds (746 allocations: 12.125 KiB) minGL_normal = 3.0283760271660993 minGL_gamma = S_true = 2.6094379124341005 minKL_normal = minGL_normal - S_true = 0.4189381147319988 minKL_gamma = minGL_gamma - S_true = 0.0
@show mu_true = 5.0
@show sigma_true = 2.0
@show n = 2^5
@show L = 10^4
println()
doall(mu_true, sigma_true, n, L)
mu_true = 5.0 = 5.0 sigma_true = 2.0 = 2.0 n = 2 ^ 5 = 32 L = 10 ^ 4 = 10000 theta_true = sigma_true ^ 2 / mu_true = 0.8 alpha_true = mu_true / theta_true = 6.25 dist_true = Gamma(alpha_true, theta_true) = Distributions.Gamma{Float64}(α=6.25, θ=0.8) normal_0 = Normal(mu_true, sigma_true) = Distributions.Normal{Float64}(μ=5.0, σ=2.0) 5.854977 seconds (45.42 M allocations: 742.169 MiB, 1.44% gc time) mean(AICs_normal / (2n) - GLs_normal) = -0.022090030930159706 std(AICs_normal / (2n) - GLs_normal) = 0.18974003014931837 mean(AICs_gamma / (2n) - GLs_gamma) = -0.007113567040741691 std(AICs_gamma / (2n) - GLs_gamma) = 0.15852414732483713 probability_of_failure = 0.1596 probability_of_failure_str = "15.96%" S_true = entropy(dist_true) = 2.0565794195364004 0.000160 seconds (1.92 k allocations: 30.703 KiB) minGL_normal = 2.1120857137646634 minGL_gamma = S_true = 2.0565794195364004 minKL_normal = minGL_normal - S_true = 0.05550629422826292 minKL_gamma = minGL_gamma - S_true = 0.0
@show mu_true = 5.0
@show sigma_true = 2.0
@show n = 2^8
@show L = 10^4
println()
doall(mu_true, sigma_true, n, L)
mu_true = 5.0 = 5.0 sigma_true = 2.0 = 2.0 n = 2 ^ 8 = 256 L = 10 ^ 4 = 10000 theta_true = sigma_true ^ 2 / mu_true = 0.8 alpha_true = mu_true / theta_true = 6.25 dist_true = Gamma(alpha_true, theta_true) = Distributions.Gamma{Float64}(α=6.25, θ=0.8) normal_0 = Normal(mu_true, sigma_true) = Distributions.Normal{Float64}(μ=5.0, σ=2.0) 6.481934 seconds (45.36 M allocations: 792.055 MiB, 1.37% gc time) mean(AICs_normal / (2n) - GLs_normal) = -0.0011092681138581747 std(AICs_normal / (2n) - GLs_normal) = 0.05469178124933257 mean(AICs_gamma / (2n) - GLs_gamma) = 0.0006405664442972563 std(AICs_gamma / (2n) - GLs_gamma) = 0.04620951716393543 probability_of_failure = 0.0022 probability_of_failure_str = "0.22%" S_true = entropy(dist_true) = 2.0565794195364004 0.000136 seconds (1.92 k allocations: 30.703 KiB) minGL_normal = 2.1120857137646634 minGL_gamma = S_true = 2.0565794195364004 minKL_normal = minGL_normal - S_true = 0.05550629422826292 minKL_gamma = minGL_gamma - S_true = 0.0
@show mu_true = 5.0
@show sigma_true = 1.0
@show n = 2^5
@show L = 10^4
println()
doall(mu_true, sigma_true, n, L)
mu_true = 5.0 = 5.0 sigma_true = 1.0 = 1.0 n = 2 ^ 5 = 32 L = 10 ^ 4 = 10000 theta_true = sigma_true ^ 2 / mu_true = 0.2 alpha_true = mu_true / theta_true = 25.0 dist_true = Gamma(alpha_true, theta_true) = Distributions.Gamma{Float64}(α=25.0, θ=0.2) normal_0 = Normal(mu_true, sigma_true) = Distributions.Normal{Float64}(μ=5.0, σ=1.0) 4.303287 seconds (54.63 M allocations: 885.222 MiB, 1.91% gc time) mean(AICs_normal / (2n) - GLs_normal) = -0.01028365315771081 std(AICs_normal / (2n) - GLs_normal) = 0.16721424116235487 mean(AICs_gamma / (2n) - GLs_gamma) = -0.006476213033842371 std(AICs_gamma / (2n) - GLs_gamma) = 0.1590522461013108 probability_of_failure = 0.3187 probability_of_failure_str = "31.87%" S_true = entropy(dist_true) = 1.4054711772308395 0.000186 seconds (2.21 k allocations: 35.328 KiB) minGL_normal = 1.418938533204661 minGL_gamma = S_true = 1.4054711772308395 minKL_normal = minGL_normal - S_true = 0.013467355973821427 minKL_gamma = minGL_gamma - S_true = 0.0
@show mu_true = 5.0
@show sigma_true = 1.0
@show n = 2^8
@show L = 10^4
println()
doall(mu_true, sigma_true, n, L)
mu_true = 5.0 = 5.0 sigma_true = 1.0 = 1.0 n = 2 ^ 8 = 256 L = 10 ^ 4 = 10000 theta_true = sigma_true ^ 2 / mu_true = 0.2 alpha_true = mu_true / theta_true = 25.0 dist_true = Gamma(alpha_true, theta_true) = Distributions.Gamma{Float64}(α=25.0, θ=0.2) normal_0 = Normal(mu_true, sigma_true) = Distributions.Normal{Float64}(μ=5.0, σ=1.0) 4.689285 seconds (54.63 M allocations: 936.041 MiB, 1.88% gc time) mean(AICs_normal / (2n) - GLs_normal) = 2.298096938388452e-5 std(AICs_normal / (2n) - GLs_normal) = 0.047878751136205835 mean(AICs_gamma / (2n) - GLs_gamma) = 0.0004695186480988951 std(AICs_gamma / (2n) - GLs_gamma) = 0.045380847517096715 probability_of_failure = 0.0899 probability_of_failure_str = "8.99%" S_true = entropy(dist_true) = 1.4054711772308395 0.000131 seconds (2.21 k allocations: 35.328 KiB) minGL_normal = 1.418938533204661 minGL_gamma = S_true = 1.4054711772308395 minKL_normal = minGL_normal - S_true = 0.013467355973821427 minKL_gamma = minGL_gamma - S_true = 0.0
@show mu_true = 5.0
@show sigma_true = 1.0
@show n = 2^10
@show L = 10^4
println()
doall(mu_true, sigma_true, n, L)
mu_true = 5.0 = 5.0 sigma_true = 1.0 = 1.0 n = 2 ^ 10 = 1024 L = 10 ^ 4 = 10000 theta_true = sigma_true ^ 2 / mu_true = 0.2 alpha_true = mu_true / theta_true = 25.0 dist_true = Gamma(alpha_true, theta_true) = Distributions.Gamma{Float64}(α=25.0, θ=0.2) normal_0 = Normal(mu_true, sigma_true) = Distributions.Normal{Float64}(μ=5.0, σ=1.0) 6.480530 seconds (54.64 M allocations: 1.086 GiB, 1.73% gc time) mean(AICs_normal / (2n) - GLs_normal) = -0.00014322298659301077 std(AICs_normal / (2n) - GLs_normal) = 0.023670911204768174 mean(AICs_gamma / (2n) - GLs_gamma) = 8.215546042121617e-5 std(AICs_gamma / (2n) - GLs_gamma) = 0.0225344789801987 probability_of_failure = 0.0042 probability_of_failure_str = "0.42%" S_true = entropy(dist_true) = 1.4054711772308395 0.000115 seconds (2.21 k allocations: 35.328 KiB) minGL_normal = 1.418938533204661 minGL_gamma = S_true = 1.4054711772308395 minKL_normal = minGL_normal - S_true = 0.013467355973821427 minKL_gamma = minGL_gamma - S_true = 0.0
function findfailure(dist_true, model1, d_model1, model2, d_model2, n, L)
AIC1 = 0.0
AIC2 = 0.0
sample = Float64(L)
for l in 1:L
sample = rand(dist_true, n)
dist_fit1 = fit(model1, sample)
AIC1 = -2*sum(logpdf.(dist_fit1, sample)) + 2*d_model1
dist_fit2 = fit(model2, sample)
AIC2 = -2*sum(logpdf.(dist_fit2, sample)) + 2*d_model2
if AIC1 - AIC2 < -5.0
return sample, AIC1, AIC2, dist_fit1, dist_fit2
end
end
return Float64[], AIC2, AIC1, dist_fit1, dist_fit2
end
findfailure (generic function with 1 method)
function findfailure_and_plot(randseed)
srand(randseed)
dist_true = Gamma(6.25, 0.8)
n = 2^5
L = 10^4
sample, AIC_normal, AIC_gamma, normal_star, gamma_star =
findfailure(dist_true, Normal, 2, Gamma, 2, n, L)
@show sample
@show AIC_normal
@show AIC_gamma
@show AIC_normal - AIC_gamma
@show normal_star
@show normal_star
if length(sample) > 0
sleep(0.1)
x = linspace(-2.5,16,201)
figure(figsize=(5,3.5))
ik = InterpKDE(kde(sample))
plot(x, pdf.(ik, x), label="KDE of sample")
scatter(sample, pdf.(ik, sample), s=10, label="sample")
#plot(x, pdf.(dist_true,x), label="true distribution")
plot(x, pdf.(normal_star,x), label="normal fitting", ls="--")
plot(x, pdf.(gamma_star,x), label="gamma fitting", ls="-.")
grid(ls=":")
legend()
end
end
findfailure_and_plot (generic function with 1 method)
findfailure_and_plot(1)
sample = [8.51694, 5.03924, 4.93218, 6.43229, 5.95894, 2.80618, 6.16621, 6.36799, 1.27609, 4.65685, 2.18912, 6.50703, 5.85715, 5.60843, 3.99716, 7.06639, 7.90057, 3.21082, 4.11817, 0.534787, 4.94174, 6.74726, 4.14043, 4.23995, 2.30083, 4.49409, 3.27103, 4.4309, 7.01271, 7.00271, 5.51779, 4.60777] AIC_normal = 134.27305459783474 AIC_gamma = 142.89976664795296 AIC_normal - AIC_gamma = -8.626712050118215 normal_star = Distributions.Normal{Float64}(μ=4.932805074657629, σ=1.8525775596823404) normal_star = Distributions.Normal{Float64}(μ=4.932805074657629, σ=1.8525775596823404)
PyObject <matplotlib.legend.Legend object at 0x000000005391CE48>
findfailure_and_plot(2)
sample = [6.61288, 4.27077, 8.6783, 4.2972, 8.1462, 6.05261, 3.69757, 5.14709, 5.79036, 7.33517, 4.6505, 1.44042, 4.21831, 7.63693, 6.78671, 5.25584, 6.74642, 2.56541, 4.37669, 4.50145, 6.47086, 5.93588, 6.26217, 5.65236, 4.99017, 8.4476, 2.84629, 6.68334, 5.03883, 6.96774, 5.80396, 5.65832] AIC_normal = 126.93459907868751 AIC_gamma = 132.0861558012711 AIC_normal - AIC_gamma = -5.151556722583592 normal_star = Distributions.Normal{Float64}(μ=5.592635790337137, σ=1.6518808988948162) normal_star = Distributions.Normal{Float64}(μ=5.592635790337137, σ=1.6518808988948162)
PyObject <matplotlib.legend.Legend object at 0x00000000539630F0>
findfailure_and_plot(3)
sample = [3.39051, 4.85286, 5.84347, 9.06356, 6.94377, 7.41921, 4.1418, 4.15976, 1.29839, 3.82125, 7.39945, 4.51234, 4.13256, 4.62575, 5.17709, 6.37377, 5.08286, 5.04572, 3.20572, 5.74777, 5.88435, 5.3199, 6.25898, 1.23276, 2.97443, 2.39112, 4.89373, 5.19201, 5.6271, 6.05125, 4.82241, 6.79795] AIC_normal = 128.2465072802354 AIC_gamma = 133.69841006705312 AIC_normal - AIC_gamma = -5.451902786817726 normal_star = Distributions.Normal{Float64}(μ=4.990111657407978, σ=1.686091524413419) normal_star = Distributions.Normal{Float64}(μ=4.990111657407978, σ=1.686091524413419)
PyObject <matplotlib.legend.Legend object at 0x000000005313B1D0>