∫10ζ(s,x+t)dt=1/((s−1)xs) の数値的確認.
using QuadGK
using SpecialFunctions
using Plots
pyplot(fmt=:svg)
f(s, x) = quadgk(t -> zeta(s, x+t), 0, 1)[1]
g(s, x) = 1/((s-1)*x^(s-1))
PP = []
for s in [-5:0; 2:4]
x = range(s > 1 ? 0.1 : eps(), 1; length=100)
P = plot(x, f.(s, x); label="∫ζ(s,x+t)dt")
plot!(x, g.(s, x); label="1/((s-1)xˢ⁻¹)", ls=:dash)
plot!(; xlim=(-0.05, 1.05))
plot!(; title="s = $s", titlefontsize=8)
push!(PP, P)
end
plot(PP...; layout=(3, 3), size=(800, 520))
Beta分布の最尤法
using Optim
using Distributions
using StatsFuns
using Plots
pyplot(fmt=:svg)
function Distributions.fit_mle(::Type{<:Beta}, sample::AbstractArray)
f(w) = -loglikelihood(Beta(exp.(w)...), sample)
o = optimize(f, zeros(2))
Beta(exp.(o.minimizer)...)
end
dist_true = Beta(0.01, 3)
n = 20
sample = rand(dist_true, n)
@show fit(Beta, sample)
@show fit_mle(Beta, sample);
fit(Beta, sample) = Beta{Float64}(α=0.045952727988298074, β=7.7516855543615115) fit_mle(Beta, sample) = Beta{Float64}(α=0.01297467251434813, β=2.5291405367712114)
function sim_beta(; dist_true=Beta(0.01, 3), n=20, L=10^3)
Fit = similar([0.0], 2, L)
MLE = similar(Fit)
for l in 1:L
sample = rand(dist_true, n)
Fit[:,l] = collect(params(fit(Beta, sample)))
MLE[:,l] = collect(params(fit_mle(Beta, sample)))
end
Fit, MLE
end
dist_true = Beta(0.01, 3)
a, b = params(dist_true)
Fit, MLE = sim_beta(; dist_true)
P = plot(; scale=:log10)
scatter!(Fit[1,:], Fit[2,:]; label="fit(Beta, sample)", alpha=0.5, ms=3, msa=0.0)
scatter!([a], [b]; msa=0.0, label="true", color=:red)
Q = plot(; scale=:log10)
scatter!(MLE[1,:], MLE[2,:]; label="fit_mle(Beta, sample)", alpha=0.5, ms=3, msa=0.0)
scatter!([a], [b]; msa=0.0, label="true", color=:red)
plot(P, Q; layout=(1, 2), size=(800, 250))
fit(Beta, sample)
よりも fit_mle(Beta, sample)
の方が誤差が小さい.
Fisher検定に付随する信頼区間を Roots.find_zeros
で作る.
離散分布のサンプルのP値や信頼区間を計算する函数はMemoizeしておくと重複しがちな計算を繰り返さずにすむ.
using Roots
using Memoize
using Distributions
using Plots
pyplot(fmt=:svg)
⪅(x ,y) = x < y || x ≈ y
or(a, b, c, d) = a*d/(b*c)
or(x::AbstractVecOrMat) = or(x...)
@memoize pval(d::FisherNoncentralHypergeometric, k) =
sum(pdf(d, j) for j in support(d) if pdf(d,j) ⪅ pdf(d, k))
pval(a, b, c, d, ω=1.0) =
pval(FisherNoncentralHypergeometric(a+b, c+d, a+c, ω), a)
pval(A::AbstractVecOrMat, ω=1.0) = pval(A..., ω)
@memoize ci(a, b, c, d, α=0.05) =
exp.(find_zeros(t -> pval(a, b, c, d, exp(t)) - α, -1e2, 1e2))
ci(A::AbstractVecOrMat, α=0.05) = ci(A..., α)
struct FisherTest{D, T}
data::D
ω::T
α::T
odds_ratio::T
p_value::T
conf_int::Vector{T}
end
function fisher_test(data; ω=1.0, α = 0.05)
odds_ratio = or(data)
p_value = pval(data, ω)
conf_int = ci(data, α)
FisherTest(data, ω, α, odds_ratio, p_value, conf_int)
end
function Base.show(io::IO, x::FisherTest)
print(io,
"""
Fisher's Exact Test for Count Data
data: $(x.data)
p-value: $(x.p_value)
alternative hypothesis: true odds ratio is not equal to $(x.ω)
$(100(1 - x.α)) percent confidence interval:
$(x.conf_int)
odds ratio: $(x.odds_ratio)
"""
)
end
@recipe function f(x::FisherTest)
title --> "Fisher's Exact Test: $(x.data)"
xguide --> "odds ratio"
xscale --> :log10
@series begin
seriestype := :path
label --> "p-value"
(
t -> pval(x.data, t),
1/3*x.conf_int[1],
3 *x.conf_int[2]
)
end
@series begin
seriestype := :path
label --> "$(100(1 - x.α))% CI"
linewidth --> 2.0
(
x.conf_int,
zeros(2)
)
end
end
A = [
10 10
7 27
]
result = fisher_test(A)
Fisher's Exact Test for Count Data data: [10 10; 7 27] p-value: 0.03515636840648691 alternative hypothesis: true odds ratio is not equal to 1.0 95.0 percent confidence interval: [1.0691205185478725, 13.492616894184634] odds ratio: 3.857142857142857
plot(result)
与えられた平均と標準偏差を持つワイブル分布
using NLsolve
using Distributions
using StatsPlots
pyplot(fmt=:svg)
function meanstd_weibull!(F, x, p)
dist = Weibull(exp(x[1]), exp(x[2]))
F[1], F[2] = mean(dist) - p[1], std(dist) - p[2]
end
function weibull_meanstd(μ, σ; verbose=false)
sol = nlsolve((F, x) -> meanstd_weibull!(F, x, (μ, σ)), zeros(2))
verbose && println(sol, "\n")
Weibull(exp.(sol.zero)...)
end
w = weibull_meanstd(4.8, 2.3; verbose=true)
@show w
plot(w; label="Weibull$(round.(params(w); digits=2))")
Results of Nonlinear Solver Algorithm * Algorithm: Trust-region with dogleg and autoscaling * Starting Point: [0.0, 0.0] * Zero: [0.789997672650562, 1.6900728053191354] * Inf-norm of residuals: 0.000000 * Iterations: 8 * Convergence: true * |x - x'| < 0.0e+00: false * |f(x)| < 1.0e-08: true * Function Calls (f): 9 * Jacobian Calls (df/dx): 8 w = Weibull{Float64}(α=2.20339129818847, θ=5.419875286517129)
using Optim
function Distributions.fit(::Type{<:Weibull}, sample::AbstractArray)
weibull_meanstd(mean(sample), std(sample))
end
function Distributions.fit_mle(::Type{<:Weibull}, sample::AbstractArray)
f(w) = -loglikelihood(Weibull(exp.(w)...), sample)
o = optimize(f, zeros(2))
Weibull(exp.(o.minimizer)...)
end
dist_true = Weibull(2.2, 5.4)
sample = rand(dist_true, 20)
@show fit(Weibull, sample)
@show fit_mle(Weibull, sample);
fit(Weibull, sample) = Weibull{Float64}(α=2.012156667525288, θ=4.920056374823114) fit_mle(Weibull, sample) = Weibull{Float64}(α=2.1084408746436694, θ=4.940049528195075)