Notebook

Baysian version of the plot of Anscombe's quartet¶

黒木玄

2020-05-02

In [1]:

@time using RDatasets
@time using StatsPlots
@time pyplot()
relax(t=0.2) = (backend() == Plots.PyPlotBackend() && PyPlot.clf(); sleep(t))
@time using Turing

rd(x, d=3) = round(x; digits=d)
mrd(t, d=3) = map(x->rd(x, d), t)

  9.441269 seconds (19.85 M allocations: 964.012 MiB, 6.31% gc time)
 32.242469 seconds (63.00 M allocations: 3.040 GiB, 4.75% gc time)
  7.841830 seconds (13.35 M allocations: 643.637 MiB, 2.95% gc time)
 29.997367 seconds (65.66 M allocations: 4.154 GiB, 4.67% gc time)

Out[1]:

mrd (generic function with 2 methods)

In [2]:

@time df_anscombe = dataset("datasets", "anscombe")

  5.669637 seconds (8.77 M allocations: 412.675 MiB, 3.05% gc time)

Out[2]:

11 rows × 8 columns

	X1	X2	X3	X4	Y1	Y2	Y3	Y4
	Int64	Int64	Int64	Int64	Float64	Float64	Float64	Float64
1	10	10	10	8	8.04	9.14	7.46	6.58
2	8	8	8	8	6.95	8.14	6.77	5.76
3	13	13	13	8	7.58	8.74	12.74	7.71
4	9	9	9	8	8.81	8.77	7.11	8.84
5	11	11	11	8	8.33	9.26	7.81	8.47
6	14	14	14	8	9.96	8.1	8.84	7.04
7	6	6	6	8	7.24	6.13	6.08	5.25
8	4	4	4	19	4.26	3.1	5.39	12.5
9	12	12	12	8	10.84	9.13	8.15	5.56
10	7	7	7	8	4.82	7.26	6.42	7.91
11	5	5	5	8	5.68	4.74	5.73	6.89

In [3]:

X1 = df_anscombe[!, :X1] |> float
Y1 = df_anscombe[!, :Y1]
X2 = df_anscombe[!, :X2] |> float
Y2 = df_anscombe[!, :Y2]
X3 = df_anscombe[!, :X3] |> float
Y3 = df_anscombe[!, :Y3]
X4 = df_anscombe[!, :X4] |> float
Y4 = df_anscombe[!, :Y4]

@show X1
@show Y1
@show X2
@show Y2
@show X3
@show Y4
@show X4
@show Y4;

X1 = [10.0, 8.0, 13.0, 9.0, 11.0, 14.0, 6.0, 4.0, 12.0, 7.0, 5.0]
Y1 = [8.04, 6.95, 7.58, 8.81, 8.33, 9.96, 7.24, 4.26, 10.84, 4.82, 5.68]
X2 = [10.0, 8.0, 13.0, 9.0, 11.0, 14.0, 6.0, 4.0, 12.0, 7.0, 5.0]
Y2 = [9.14, 8.14, 8.74, 8.77, 9.26, 8.1, 6.13, 3.1, 9.13, 7.26, 4.74]
X3 = [10.0, 8.0, 13.0, 9.0, 11.0, 14.0, 6.0, 4.0, 12.0, 7.0, 5.0]
Y4 = [6.58, 5.76, 7.71, 8.84, 8.47, 7.04, 5.25, 12.5, 5.56, 7.91, 6.89]
X4 = [8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 19.0, 8.0, 8.0, 8.0]
Y4 = [6.58, 5.76, 7.71, 8.84, 8.47, 7.04, 5.25, 12.5, 5.56, 7.91, 6.89]

In [4]:

function show_stats(X, Y)
    XX = [ones(length(X)) X]
    @show (rd.(XX'XX), rd.(XX'Y), rd.(Y'Y))
    nothing
end

for (X, Y) in [(X1, Y1), (X2, Y2), (X3, Y3), (X4, Y4)]
    show_stats(X, Y)
end

(rd.(XX' * XX), rd.(XX' * Y), rd.(Y' * Y)) = ([11.0 99.0; 99.0 1001.0], [82.51, 797.6], 660.173)
(rd.(XX' * XX), rd.(XX' * Y), rd.(Y' * Y)) = ([11.0 99.0; 99.0 1001.0], [82.51, 797.59], 660.176)
(rd.(XX' * XX), rd.(XX' * Y), rd.(Y' * Y)) = ([11.0 99.0; 99.0 1001.0], [82.5, 797.47], 659.976)
(rd.(XX' * XX), rd.(XX' * Y), rd.(Y' * Y)) = ([11.0 99.0; 99.0 1001.0], [82.51, 797.58], 660.132)

サイズ n のデータセット (X, Y) について XX = [ones(n) X] とおくとき, 線形回帰の尤度函数は XX'XX, XX'Y, Y'Y から決まる.

4種のデータセットについてその値はほぼ同じなので, ベイズ統計においても, これらのデータセットから得られる線形回帰の結果はほぼ同じになる.

In [5]:

"""
`get_posterior(chain::AbstractChains, vars::vars)` extracts the posterior sample of `vars` from `chain`.

Example:

```julia
chain = psample(model, NUTS(), 2000, 3)
posterior_sample = get_posterior_sample(chain, [:a, :b, :s])
```
"""
function get_posterior_sample(
        chain::Turing.Inference.AbstractMCMC.AbstractChains,
        vars::AbstractVector=get_vars(chain)
    )
    val = chain[vars].value
    W = vcat((val[:,:,m] for m in 1:size(val, 3))...)
    Array{typeof(W[end,end]), 2}(W)
end

@doc get_posterior_sample

Out[5]:

get_posterior(chain::AbstractChains, vars::vars) extracts the posterior sample of vars from chain.

Example:

chain = psample(model, NUTS(), 2000, 3)
posterior_sample = get_posterior_sample(chain, [:a, :b, :s])

In [6]:

"""
`get_vars(chain::AbstractChains)` returns the array of the names of the parameters in `chain`.
"""
function get_vars(chain::Turing.Inference.AbstractMCMC.AbstractChains)
    vars = chain.name_map[:parameters]
end

@doc get_vars

Out[6]:

get_vars(chain::AbstractChains) returns the array of the names of the parameters in chain.

In [7]:

"""
`make_pred_pdf(logpdf_func, posterior_sample)` makes the pdf of the predictive distribution from the arguments.
"""
function make_pred_pdf(logpdf_func, posterior_sample::AbstractArray)
    L = size(posterior_sample, 1)
    pred_pdf(x) = mean(exp(logpdf_func(x, @view(posterior_sample[l, :]))) for l in 1:L)
    pred_pdf
end

"""
`make_pred_pdf(logpdf_func, chain::AbstractChains, vars::AbstractVector)` makes the pdf of the predictive distribution from the arguments.
"""
function make_pred_pdf(
        logpdf_func,
        chain::Turing.Inference.AbstractMCMC.AbstractChains,
        vars::AbstractVector=get_vars(chain)
    )
    posterior_sample = get_posterior_sample(chain, vars)
    make_pred_pdf(logpdf_func, posterior_sample)
end


@doc make_pred_pdf

Out[7]:

make_pred_pdf(logpdf_func, posterior_sample) makes the pdf of the predictive distribution from the arguments.

make_pred_pdf(logpdf_func, chain::AbstractChains, vars::AbstractVector) makes the pdf of the predictive distribution from the arguments.

In [8]:

@model ols(X, Y) = begin
    a ~ Flat()
    b ~ Flat()
    σ ~ FlatPos(0.0)
    for k in eachindex(X)
        Y[k] ~ Normal(a + b*X[k], σ)
    end
end

vars_ols = [:a, :b, :σ]

function logpdf_ols(x, w)
    X, Y = x
    a, b, σ = w
    logpdf(Normal(a + b*X, σ), Y)
end

function plot_ols(X, Y; 
        x = range(-2, 22; length=100),
        y = range(-2, 18; length=100),
        vars_ols = vars_ols
    )
    
    chain_ols = psample(ols(X, Y), NUTS(), 2000, 3)
    @show chain_ols
    plot(chain_ols; lw=0.5) |> display
    relax()
    
    posterior_sample_ols = get_posterior_sample(chain_ols, vars_ols)
    pred_pdf_ols = make_pred_pdf(logpdf_ols, posterior_sample_ols)
    
    @time z = ((x, y) -> pred_pdf_ols((x, y))).(x', y)
    P = plot(size=(300, 300), legend=false)
    plot!(xlim=extrema(x), ylim=extrema(y))
    heatmap!(x, y, z)
    scatter!(X, Y; label="sample", color=:cyan)
    
    P
end

Out[8]:

plot_ols (generic function with 1 method)

In [9]:

P1 = plot_ols(X1, Y1)

┌ Warning: The current proposal will be rejected due to numerical error(s).
│   isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false)
└ @ AdvancedHMC C:\Users\genkuroki\.julia\packages\AdvancedHMC\WJCQA\src\hamiltonian.jl:47
┌ Warning: The current proposal will be rejected due to numerical error(s).
│   isfinite.((θ, r, ℓπ, ℓκ)) = (true, false, false, false)
└ @ AdvancedHMC C:\Users\genkuroki\.julia\packages\AdvancedHMC\WJCQA\src\hamiltonian.jl:47
┌ Info: Found initial step size
│   ϵ = 0.0015625
└ @ Turing.Inference C:\Users\genkuroki\.julia\packages\Turing\cReBm\src\inference\hmc.jl:556
Parallel sampling: 100%|█████████████████████████████████████████| Time: 0:00:11

chain_ols = Object of type Chains, with data of type 1000×15×3 Array{Float64,3}

Iterations        = 1:1000
Thinning interval = 1
Chains            = 1, 2, 3
Samples per chain = 1000
internals         = acceptance_rate, hamiltonian_energy, hamiltonian_energy_error, is_accept, log_density, lp, max_hamiltonian_energy_error, n_steps, nom_step_size, numerical_error, step_size, tree_depth
parameters        = a, b, σ

2-element Array{ChainDataFrame,1}

Summary Statistics
  parameters    mean     std  naive_se    mcse       ess   r_hat
  ──────────  ──────  ──────  ────────  ──────  ────────  ──────
           a  3.0530  1.4352    0.0262  0.0717  526.1467  1.0082
           b  0.4964  0.1516    0.0028  0.0076  549.7384  1.0092
           σ  1.4779  0.4499    0.0082  0.0202  667.7887  1.0038

Quantiles
  parameters    2.5%   25.0%   50.0%   75.0%   97.5%
  ──────────  ──────  ──────  ──────  ──────  ──────
           a  0.3830  2.1218  3.0157  3.9349  5.9675
           b  0.1909  0.4078  0.4962  0.5933  0.7821
           σ  0.8972  1.1818  1.4056  1.6733  2.5493

  3.059556 seconds (30.58 M allocations: 1.368 GiB, 11.13% gc time)

Out[9]:

In [10]:

P2 = plot_ols(X2, Y2)

┌ Info: Found initial step size
│   ϵ = 0.025
└ @ Turing.Inference C:\Users\genkuroki\.julia\packages\Turing\cReBm\src\inference\hmc.jl:556
Parallel sampling: 100%|█████████████████████████████████████████| Time: 0:00:00

chain_ols = Object of type Chains, with data of type 1000×15×3 Array{Float64,3}

Iterations        = 1:1000
Thinning interval = 1
Chains            = 1, 2, 3
Samples per chain = 1000
internals         = acceptance_rate, hamiltonian_energy, hamiltonian_energy_error, is_accept, log_density, lp, max_hamiltonian_energy_error, n_steps, nom_step_size, numerical_error, step_size, tree_depth
parameters        = a, b, σ

2-element Array{ChainDataFrame,1}

Summary Statistics
  parameters    mean     std  naive_se    mcse       ess   r_hat
  ──────────  ──────  ──────  ────────  ──────  ────────  ──────
           a  3.0242  1.3596    0.0248  0.0527  661.5522  1.0010
           b  0.4976  0.1420    0.0026  0.0056  692.5221  1.0015
           σ  1.4623  0.4469    0.0082  0.0206  501.7501  1.0042

Quantiles
  parameters    2.5%   25.0%   50.0%   75.0%   97.5%
  ──────────  ──────  ──────  ──────  ──────  ──────
           a  0.2503  2.2129  3.0479  3.8441  5.6665
           b  0.2177  0.4125  0.4952  0.5841  0.7912
           σ  0.9122  1.1586  1.3610  1.6555  2.5539

  2.823920 seconds (30.02 M allocations: 1.342 GiB, 10.23% gc time)

Out[10]:

In [11]:

P3 = plot_ols(X3, Y3)

┌ Info: Found initial step size
│   ϵ = 0.0125
└ @ Turing.Inference C:\Users\genkuroki\.julia\packages\Turing\cReBm\src\inference\hmc.jl:556
Parallel sampling: 100%|█████████████████████████████████████████| Time: 0:00:00

chain_ols = Object of type Chains, with data of type 1000×15×3 Array{Float64,3}

Iterations        = 1:1000
Thinning interval = 1
Chains            = 1, 2, 3
Samples per chain = 1000
internals         = acceptance_rate, hamiltonian_energy, hamiltonian_energy_error, is_accept, log_density, lp, max_hamiltonian_energy_error, n_steps, nom_step_size, numerical_error, step_size, tree_depth
parameters        = a, b, σ

2-element Array{ChainDataFrame,1}

Summary Statistics
  parameters    mean     std  naive_se    mcse       ess   r_hat
  ──────────  ──────  ──────  ────────  ──────  ────────  ──────
           a  3.0661  1.3481    0.0246  0.0493  951.2578  1.0015
           b  0.4923  0.1406    0.0026  0.0054  948.6848  1.0015
           σ  1.4514  0.4212    0.0077  0.0159  822.1566  1.0042

Quantiles
  parameters    2.5%   25.0%   50.0%   75.0%   97.5%
  ──────────  ──────  ──────  ──────  ──────  ──────
           a  0.3477  2.2062  3.0383  3.8830  5.8486
           b  0.2036  0.4049  0.4958  0.5828  0.7714
           σ  0.8824  1.1693  1.3699  1.6363  2.4731

  2.928867 seconds (30.02 M allocations: 1.342 GiB, 9.93% gc time)

Out[11]:

In [12]:

P4 = plot_ols(X4, Y4)

┌ Info: Found initial step size
│   ϵ = 0.025
└ @ Turing.Inference C:\Users\genkuroki\.julia\packages\Turing\cReBm\src\inference\hmc.jl:556
Parallel sampling: 100%|█████████████████████████████████████████| Time: 0:00:00

chain_ols = Object of type Chains, with data of type 1000×15×3 Array{Float64,3}

Iterations        = 1:1000
Thinning interval = 1
Chains            = 1, 2, 3
Samples per chain = 1000
internals         = acceptance_rate, hamiltonian_energy, hamiltonian_energy_error, is_accept, log_density, lp, max_hamiltonian_energy_error, n_steps, nom_step_size, numerical_error, step_size, tree_depth
parameters        = a, b, σ

2-element Array{ChainDataFrame,1}

Summary Statistics
  parameters    mean     std  naive_se    mcse       ess   r_hat
  ──────────  ──────  ──────  ────────  ──────  ────────  ──────
           a  2.9928  1.4151    0.0258  0.0560  414.3591  1.0025
           b  0.5020  0.1484    0.0027  0.0057  374.4195  1.0019
           σ  1.4469  0.4296    0.0078  0.0131  774.1743  1.0027

Quantiles
  parameters    2.5%   25.0%   50.0%   75.0%   97.5%
  ──────────  ──────  ──────  ──────  ──────  ──────
           a  0.2400  2.1232  3.0042  3.8161  5.8172
           b  0.2148  0.4154  0.4997  0.5955  0.7925
           σ  0.8828  1.1571  1.3552  1.6526  2.5343

  3.104714 seconds (30.08 M allocations: 1.345 GiB, 10.65% gc time)

Out[12]:

In [13]:

plot(P1, P2, P3, P4; size=(600, 600), layout=(2, 2))

Out[13]:

In [ ]: