Notebook

QuadGK, Optim, Roots, Memoize, NLsolve¶

1 QuadGK.jl
2 Optim.jl
3 Roots.jl and Memoize.jl
4 NLsolve

QuadGK.jl¶

$\int_0^1 \zeta(s, x+t)\,dt = 1/((s-1)x^s)$ の数値的確認.

In [1]:

using QuadGK
using SpecialFunctions
using Plots
pyplot(fmt=:svg)

f(s, x) = quadgk(t -> zeta(s, x+t), 0, 1)[1]
g(s, x) = 1/((s-1)*x^(s-1))

PP = []
for s in [-5:0; 2:4]
    x = range(s > 1 ? 0.1 : eps(), 1; length=100)
    P = plot(x, f.(s, x); label="∫ζ(s,x+t)dt")
    plot!(x, g.(s, x); label="1/((s-1)xˢ⁻¹)", ls=:dash)
    plot!(; xlim=(-0.05, 1.05))
    plot!(; title="s = $s", titlefontsize=8)
    push!(PP, P)
end
plot(PP...; layout=(3, 3), size=(800, 520))

Out[1]:

Optim.jl¶

Beta分布の最尤法

In [2]:

using Optim
using Distributions
using StatsFuns
using Plots
pyplot(fmt=:svg)

function Distributions.fit_mle(::Type{<:Beta}, sample::AbstractArray)
    f(w) = -loglikelihood(Beta(exp.(w)...), sample)
    o = optimize(f, zeros(2))
    Beta(exp.(o.minimizer)...)
end

dist_true = Beta(0.01, 3)
n = 20
sample = rand(dist_true, n)

@show fit(Beta, sample)
@show fit_mle(Beta, sample);

fit(Beta, sample) = Beta{Float64}(α=0.045952727988298074, β=7.7516855543615115)
fit_mle(Beta, sample) = Beta{Float64}(α=0.01297467251434813, β=2.5291405367712114)

In [3]:

function sim_beta(; dist_true=Beta(0.01, 3), n=20, L=10^3)
    Fit = similar([0.0], 2, L)
    MLE = similar(Fit)
    for l in 1:L
        sample = rand(dist_true, n)
        Fit[:,l] = collect(params(fit(Beta, sample)))
        MLE[:,l] = collect(params(fit_mle(Beta, sample)))
    end
    Fit, MLE
end

dist_true = Beta(0.01, 3)
a, b = params(dist_true)
Fit, MLE = sim_beta(; dist_true)

P = plot(; scale=:log10)
scatter!(Fit[1,:], Fit[2,:]; label="fit(Beta, sample)", alpha=0.5, ms=3, msa=0.0)
scatter!([a], [b]; msa=0.0, label="true", color=:red)

Q = plot(; scale=:log10)
scatter!(MLE[1,:], MLE[2,:]; label="fit_mle(Beta, sample)", alpha=0.5, ms=3, msa=0.0)
scatter!([a], [b]; msa=0.0, label="true", color=:red)

plot(P, Q; layout=(1, 2), size=(800, 250))

Out[3]:

fit(Beta, sample) よりも fit_mle(Beta, sample) の方が誤差が小さい.

Roots.jl and Memoize.jl¶

Fisher検定に付随する信頼区間を Roots.find_zeros で作る.

離散分布のサンプルのP値や信頼区間を計算する函数はMemoizeしておくと重複しがちな計算を繰り返さずにすむ.

In [4]:

using Roots
using Memoize
using Distributions
using Plots
pyplot(fmt=:svg)

⪅(x ,y) = x < y || x ≈ y

or(a, b, c, d) = a*d/(b*c)
or(x::AbstractVecOrMat) = or(x...)

@memoize pval(d::FisherNoncentralHypergeometric, k) =
    sum(pdf(d, j) for j in support(d) if pdf(d,j) ⪅ pdf(d, k))
pval(a, b, c, d, ω=1.0) =
    pval(FisherNoncentralHypergeometric(a+b, c+d, a+c, ω), a)
pval(A::AbstractVecOrMat, ω=1.0) = pval(A..., ω)

@memoize ci(a, b, c, d, α=0.05) = 
    exp.(find_zeros(t -> pval(a, b, c, d, exp(t)) - α, -1e2, 1e2))
ci(A::AbstractVecOrMat, α=0.05) = ci(A..., α)

struct FisherTest{D, T}
    data::D
    ω::T
    α::T
    odds_ratio::T
    p_value::T
    conf_int::Vector{T}
end

function fisher_test(data; ω=1.0, α = 0.05)
    odds_ratio = or(data)
    p_value = pval(data, ω)
    conf_int = ci(data, α)
    FisherTest(data, ω, α, odds_ratio, p_value, conf_int)
end

function Base.show(io::IO, x::FisherTest)
    print(io, 
        """
                Fisher's Exact Test for Count Data
        
        data: $(x.data)
        p-value: $(x.p_value)
        alternative hypothesis: true odds ratio is not equal to $(x.ω)
        $(100(1 - x.α)) percent confidence interval:
            $(x.conf_int)
        odds ratio: $(x.odds_ratio)
        """
    )
end

@recipe function f(x::FisherTest)
    title --> "Fisher's Exact Test: $(x.data)"
    xguide --> "odds ratio"
    xscale --> :log10
    @series begin
        seriestype := :path
        label --> "p-value"
        (
            t -> pval(x.data, t),
            1/3*x.conf_int[1],
            3  *x.conf_int[2]
        )
    end
    @series begin
        seriestype := :path
        label --> "$(100(1 - x.α))% CI"
        linewidth --> 2.0
        (
            x.conf_int,
            zeros(2)
        )
    end
end

In [5]:

A = [
    10 10
     7 27
]

result = fisher_test(A)

Out[5]:

        Fisher's Exact Test for Count Data

data: [10 10; 7 27]
p-value: 0.03515636840648691
alternative hypothesis: true odds ratio is not equal to 1.0
95.0 percent confidence interval:
    [1.0691205185478725, 13.492616894184634]
odds ratio: 3.857142857142857

In [6]:

plot(result)

Out[6]:

NLsolve¶

与えられた平均と標準偏差を持つワイブル分布

In [7]:

using NLsolve
using Distributions
using StatsPlots
pyplot(fmt=:svg)

function meanstd_weibull!(F, x, p)
    dist = Weibull(exp(x[1]), exp(x[2]))
    F[1], F[2] = mean(dist) - p[1], std(dist) - p[2]
end

function weibull_meanstd(μ, σ; verbose=false)
    sol = nlsolve((F, x) -> meanstd_weibull!(F, x, (μ, σ)), zeros(2))
    verbose && println(sol, "\n")
    Weibull(exp.(sol.zero)...)
end

w = weibull_meanstd(4.8, 2.3; verbose=true)
@show w

plot(w; label="Weibull$(round.(params(w); digits=2))")

Results of Nonlinear Solver Algorithm
 * Algorithm: Trust-region with dogleg and autoscaling
 * Starting Point: [0.0, 0.0]
 * Zero: [0.789997672650562, 1.6900728053191354]
 * Inf-norm of residuals: 0.000000
 * Iterations: 8
 * Convergence: true
   * |x - x'| < 0.0e+00: false
   * |f(x)| < 1.0e-08: true
 * Function Calls (f): 9
 * Jacobian Calls (df/dx): 8

w = Weibull{Float64}(α=2.20339129818847, θ=5.419875286517129)

Out[7]:

In [8]:

using Optim

function Distributions.fit(::Type{<:Weibull}, sample::AbstractArray)
    weibull_meanstd(mean(sample), std(sample))
end

function Distributions.fit_mle(::Type{<:Weibull}, sample::AbstractArray)
    f(w) = -loglikelihood(Weibull(exp.(w)...), sample)
    o = optimize(f, zeros(2))
    Weibull(exp.(o.minimizer)...)
end

dist_true = Weibull(2.2, 5.4)
sample = rand(dist_true, 20)
@show fit(Weibull, sample)
@show fit_mle(Weibull, sample);

fit(Weibull, sample) = Weibull{Float64}(α=2.012156667525288, θ=4.920056374823114)
fit_mle(Weibull, sample) = Weibull{Float64}(α=2.1084408746436694, θ=4.940049528195075)

In [ ]:

QuadGK, Optim, Roots, Memoize, NLsolve¶

Table of Contents

QuadGK.jl¶

Optim.jl¶

Roots.jl and Memoize.jl¶

NLsolve¶