このファイルの新しいバージョンが

https://nbviewer.jupyter.org/gist/genkuroki/67f03274960dca00e73d5498ead138b7

で公開されているのでそちらを参照して欲しい.

2x2の分割表での独立性検定の比較¶

黒木玄

2017-09-26

$2\times 2$ の分割表における次のような形の独立性の条件を満たしている確率分布を考える：

$$\displaystyle \begin{bmatrix} p_{11} & p_{12} \\ p_{21} & p_{22} \\ \end{bmatrix} = \begin{bmatrix} pq & p(1-q) \\ (1-p)q & (1-p)(1-q) \\ \end{bmatrix} $$

このとき $p=p_{11}+p_{12}$, $q=p_{11}+p_{21}$ を周辺確率 (marginal probabilities) と呼ぶことにする。

周辺確率を動かしながら、$\chi^2$ 検定、G検定、Fisherの正確確率検定の3種類について、p値が $\alpha$ 以下になる確率をプロットしてみる。

p値が $\alpha$ 以下になる確率は $\alpha$ に近い方がよい。 $\alpha$ に近い場合には白色でプロットし、$\alpha$ より小さい場合には青色をどんどん濃くして行き、$\alpha$ より大きい場合には赤色をどんどん濃くして行くことにする。白っぽく見える方がよい。青色はその検定が conservative であることを意味している。

サンプルを生成する確率分布として、

4×Poisson分布
多項分布
2×二項分布
超幾何分布

を用いる。$\alpha=0.05$ (有意水準5%)の場合を扱うことにする。

結論

$\chi^2$ 検定が最も安定して白っぽくなりやすい。
G検定は濃い赤が出易い。
Fisherの正確確率検定は $n$ が小さいとき濃い青がたくさん出る。

追記： 最後の方に独立性の条件を満たさない場合に関するシミュレーション結果も追加した。

独立性の条件を満たさない場合にはもっと意味のある情報が得られるシミュレーションを考えたい。

In [1]:

using PyPlot
using Distributions

In [2]:

# xlog(x,y) = if x == 0 then 0 else x*log(x/y)
#
# Warning:
#
#   Don't use x*log(x/y) instead of xlog(x,y).
#   Because log(0) = -Inf and 0*Inf = NaN.
#
function xlog(x::Float64, y::Float64)
    if x == zero(x)
        return zero(x)
    elseif y == zero(y)
        return Inf
    else
        return x*log(x/y)
    end
end
xlog(x,y) = xlog(Float64(x),Float64(y))

x, y = 0, 1
@show x, y
@show x*log(x/y)
@show xlog(x,y);

(x, y) = (0, 1)
x * log(x / y) = NaN
xlog(x, y) = 0.0

In [3]:

prodprob(p::Float64, q::Float64) = [p*q, p*(1-q), (1-p)*q, (1-p)*(1-q)]

function expected(a::AbstractArray{T,1}) where T
    n = sum(a)
    return [(a[1]+a[2])*(a[1]+a[3])/n, (a[1]+a[2])*(a[2]+a[4])/n, 
            (a[3]+a[4])*(a[1]+a[3])/n, (a[3]+a[4])*(a[2]+a[4])/n]
end

function chisqtest(a::AbstractArray{Int64,1})
    mu = expected(a)
    chisq = sum((a .- mu).^2 ./mu)
    pval = ccdf(Chisq(1), chisq)
    return pval
end

function gtest(a::AbstractArray{Int64,1})
    mu = expected(a)
    g = 2*sum(xlog.(a,mu)) # Don't use a.*log(a./mu)
    pval = ccdf(Chisq(1), g)
    return pval
end

function pvaluehg(d::Hypergeometric, k::Int64)
    c = params(d)
    amax = min(c[1],c[3])
    p0 = pdf(d, k)
    p1 = 0.0
    pval = 0.0
    for j in 0:amax
        p1 = pdf(d, j)
        pval += ifelse(p1 ≤ p0, p1, 0.0)
    end
    return min(pval, 1.0)
end

function fishertest(a::AbstractArray{Int64,1})
    d = Hypergeometric(a[1]+a[2], a[3]+a[4], a[1]+a[3])
    return pvaluehg(d, a[1])
end

Out[3]:

fishertest (generic function with 1 method)

In [4]:

ecdf(pval::AbstractArray{Float64,1}, x::Float64) = count(p -> p ≤ x, pval)/length(pval)
ecdf(pval, x) = ecdf(pval, Float64(x))

function randPoisson(n::Int64, p::AbstractArray{Float64}, N::Int64)
    return [
        rand(Poisson(n*p[1]),N)';
        rand(Poisson(n*p[2]),N)';
        rand(Poisson(n*p[3]),N)';
        rand(Poisson(n*p[4]),N)';
    ]
end

function randMultinomial(n::Int64, p::AbstractArray{Float64}, N::Int64)
    return rand(Multinomial(n,p), N)
end
    
function randBinomial(n::Int64, p::AbstractArray{Float64}, N::Int64)
    m = Int64(round(n*(p[1]+p[2])))
    q1 = p[1]/(p[1]+p[2])
    q2 = p[3]/(p[3]+p[4])
    return [rand(Multinomial(m, [q1, 1.0-q1]), N); rand(Multinomial(n-m, [q2, 1.0-q2]), N)]
end

function randHypergeometric(n::Int64, p::AbstractArray{Float64}, N::Int64)
    m1 = Int64(round(n*(p[1]+p[2])))
    m2 = Int64(round(n*(p[3]+p[4])))
    n1 = Int64(round(n*(p[1]+p[3])))
    a = rand(Hypergeometric(m1, m2, n1), N)'
    b = m1 .- a
    c = n1 .- a
    d = m2 .- c
    return [a; b; c; d]
end

function pvaluesby(sampler::T, n::Int64;
        N = 10^4, alpha = 0.05) where T<:Function
    px = collect(0.05:0.05:0.50)
    py = px
    np = length(px)
    prob_chisq  = Array{Float64,2}(np,np)
    prob_g      = Array{Float64,2}(np,np)
    prob_fisher = Array{Float64,2}(np,np)
    a = Array{Float64,2}(4,N)
    
    for i in 1:np
        for j in 1:np
            if i > j
                prob_chisq[i,j]  = prob_chisq[j,i]
                prob_g[i,j]      = prob_g[j,i]
                prob_fisher[i,j] = prob_fisher[j,i]
            else
                a = sampler(n, prodprob(px[i],py[j]), N)
                prob_chisq[i,j]  = ecdf([chisqtest(a[:,i])  for i in 1:size(a,2)], alpha)
                prob_g[i,j]      = ecdf([gtest(a[:,i])      for i in 1:size(a,2)], alpha)
                prob_fisher[i,j] = ecdf([fishertest(a[:,i]) for i in 1:size(a,2)], alpha)
            end
        end
    end
    return alpha, px, prob_chisq, prob_g, prob_fisher
end

function plotcomparisontest(sampler, n; N = 10^4, alpha = 0.05)
    alpha, px, prob_chisq, prob_g, prob_fisher = pvaluesby(sampler, n; N = N, alpha = alpha)
    py = px
    np = length(px)
    ps = [0.0;px]
    vmin = 0.0
    vmax = 2*alpha
    cmap = "RdBu_r"

    figure(figsize=(8,6.4))

    ax1 = subplot2grid((16,20), (0,0), rowspan=7, colspan=8)
    pcolormesh(ps, ps, prob_chisq, cmap=cmap, vmin=vmin, vmax=vmax)
    colorbar(label="probability of p-value \$\\leqq\$ $alpha")
    title("\$\\chi^2\$-test")
    xlabel("marginal probability", fontsize=8)
    ylabel("marginal probability", fontsize=8)

    ax2 = subplot2grid((16,20), (0,12), rowspan=7, colspan=8)
    pcolormesh(ps, ps, prob_g, cmap=cmap, vmin=vmin, vmax=vmax)
    colorbar(label="probability of p-value \$\\leqq\$ $alpha")
    title("G-test")
    xlabel("marginal probability", fontsize=8)
    ylabel("marginal probability", fontsize=8)

    ax3 = subplot2grid((16,20), (9,0), rowspan=7, colspan=8)
    pcolormesh(ps, ps, prob_fisher, cmap=cmap, vmin=vmin, vmax=vmax)
    colorbar(label="probability of p-value \$\\leqq\$ $alpha")
    title("Fisher's exact test")
    xlabel("marginal probability", fontsize=8)
    ylabel("marginal probability", fontsize=8)

    suptitle("sampler = $(typeof(sampler)),  n = $n,  \$\\alpha\$ = $alpha")
end

Out[4]:

plotcomparisontest (generic function with 1 method)

4×Poisson分布でサンプルを生成する場合¶

In [5]:

@time plotcomparisontest(randPoisson, 25, N=10000, alpha=0.05)

 5.861474 seconds (15.16 M allocations: 706.183 MiB, 1.42% gc time)

Out[5]:

PyObject <matplotlib.text.Text object at 0x000000003281B5F8>

In [6]:

@time plotcomparisontest(randPoisson, 50, N=10000, alpha=0.05)

 5.360726 seconds (13.12 M allocations: 599.222 MiB, 0.84% gc time)

Out[6]:

PyObject <matplotlib.text.Text object at 0x0000000032C3A5F8>

In [7]:

@time plotcomparisontest(randPoisson, 100, N=10000, alpha=0.05)

 9.175158 seconds (13.12 M allocations: 599.223 MiB, 0.34% gc time)

Out[7]:

PyObject <matplotlib.text.Text object at 0x00000000332EE7F0>

In [8]:

@time plotcomparisontest(randPoisson, 200, N=10000, alpha=0.05)

16.607463 seconds (13.12 M allocations: 599.223 MiB, 0.20% gc time)

Out[8]:

PyObject <matplotlib.text.Text object at 0x00000000337B84E0>

多項分布でサンプルを生成する場合¶

In [9]:

@time plotcomparisontest(randMultinomial, 25, N=10000, alpha=0.05)

 3.602437 seconds (13.85 M allocations: 617.143 MiB, 1.24% gc time)

Out[9]:

PyObject <matplotlib.text.Text object at 0x0000000033C20E80>

In [10]:

@time plotcomparisontest(randMultinomial, 50, N=10000, alpha=0.05)

 5.340411 seconds (13.67 M allocations: 607.617 MiB, 0.76% gc time)

Out[10]:

PyObject <matplotlib.text.Text object at 0x00000000341464A8>

In [11]:

@time plotcomparisontest(randMultinomial, 100, N=10000, alpha=0.05)

 9.183502 seconds (13.67 M allocations: 607.614 MiB, 0.43% gc time)

Out[11]:

PyObject <matplotlib.text.Text object at 0x00000000348B85F8>

In [12]:

@time plotcomparisontest(randMultinomial, 200, N=10000, alpha=0.05)

16.333810 seconds (13.67 M allocations: 607.614 MiB, 0.24% gc time)

Out[12]:

PyObject <matplotlib.text.Text object at 0x00000000318B2668>

2×二項分布でサンプルを生成する場合¶

In [13]:

@time plotcomparisontest(randBinomial, 25, N=10000, alpha=0.05)

 3.455279 seconds (14.48 M allocations: 655.758 MiB, 1.54% gc time)

Out[13]:

PyObject <matplotlib.text.Text object at 0x0000000031D944E0>

In [14]:

@time plotcomparisontest(randBinomial, 50, N=10000, alpha=0.05)

 5.334195 seconds (14.22 M allocations: 649.629 MiB, 0.89% gc time)

Out[14]:

PyObject <matplotlib.text.Text object at 0x00000000356A3A58>

In [15]:

@time plotcomparisontest(randBinomial, 100, N=10000, alpha=0.05)

 9.082301 seconds (14.22 M allocations: 649.629 MiB, 0.54% gc time)

Out[15]:

PyObject <matplotlib.text.Text object at 0x000000003ABBF4A8>

In [16]:

@time plotcomparisontest(randBinomial, 200, N=10000, alpha=0.05)

16.318503 seconds (14.22 M allocations: 649.629 MiB, 0.31% gc time)

Out[16]:

PyObject <matplotlib.text.Text object at 0x000000003AC6BC18>

超幾何分布でサンプルを生成する場合¶

In [17]:

@time plotcomparisontest(randHypergeometric, 25, N=10000, alpha=0.05)

 3.441200 seconds (13.19 M allocations: 615.505 MiB, 1.23% gc time)

Out[17]:

PyObject <matplotlib.text.Text object at 0x000000003C671828>

In [18]:

@time plotcomparisontest(randHypergeometric, 50, N=10000, alpha=0.05)

 5.225723 seconds (13.12 M allocations: 611.826 MiB, 0.78% gc time)

Out[18]:

PyObject <matplotlib.text.Text object at 0x000000003CC134A8>

In [19]:

@time plotcomparisontest(randHypergeometric, 100, N=10000, alpha=0.05)

 9.184050 seconds (13.12 M allocations: 611.829 MiB, 0.44% gc time)

Out[19]:

PyObject <matplotlib.text.Text object at 0x000000003B1DA7F0>

In [20]:

@time plotcomparisontest(randHypergeometric, 200, N=10000, alpha=0.05)

16.389502 seconds (13.12 M allocations: 611.826 MiB, 0.25% gc time)

Out[20]:

PyObject <matplotlib.text.Text object at 0x0000000032059BE0>

独立性を満たさない分布でサンプルを生成¶

randHypergeometric は対応していない。

randMultinomial と randBinomial の場合のみを扱う。

In [21]:

function randprob(n, p,q)
    P = prodprob(p,q)
    R = similar(P)
    for i in 1:10000
        R = P .* (1.0 .+ 0.5*rand(4))
        R = R/sum(R)
        if 0.4 < chisqtest(Int64.(round.(n*R))) < 0.6
            break
        end
    end
    return R
end
    
function randpvaluesby(sampler::T, n::Int64;
        N = 10^4, alpha = 0.05) where T<:Function
    px = collect(0.05:0.05:0.50)
    py = px
    np = length(px)
    prob_chisq  = Array{Float64,2}(np,np)
    prob_g      = Array{Float64,2}(np,np)
    prob_fisher = Array{Float64,2}(np,np)
    a = Array{Float64,2}(4,N)
    
    for i in 1:np
        for j in 1:np
            a = sampler(n, randprob(n, px[i],py[j]), N)
            prob_chisq[i,j]  = ecdf([chisqtest(a[:,i])  for i in 1:size(a,2)], alpha)
            prob_g[i,j]      = ecdf([gtest(a[:,i])      for i in 1:size(a,2)], alpha)
            prob_fisher[i,j] = ecdf([fishertest(a[:,i]) for i in 1:size(a,2)], alpha)
        end
    end
    return alpha, px, prob_chisq, prob_g, prob_fisher
end

function plotrandcomparisontest(sampler, n; N = 10^4, alpha = 0.05)
    alpha, px, prob_chisq, prob_g, prob_fisher = randpvaluesby(sampler, n; N = N, alpha = alpha)
    py = px
    np = length(px)
    ps = [0.0;px]
    vmin = 0.0
    vmax = 2*alpha
    cmap = "RdBu_r"

    figure(figsize=(8,6.4))

    ax1 = subplot2grid((16,20), (0,0), rowspan=7, colspan=8)
    pcolormesh(ps, ps, prob_chisq, cmap=cmap, vmin=vmin, vmax=vmax)
    colorbar(label="probability of p-value \$\\leqq\$ $alpha")
    title("\$\\chi^2\$-test")
    xlabel("marginal probability", fontsize=8)
    ylabel("marginal probability", fontsize=8)

    ax2 = subplot2grid((16,20), (0,12), rowspan=7, colspan=8)
    pcolormesh(ps, ps, prob_g, cmap=cmap, vmin=vmin, vmax=vmax)
    colorbar(label="probability of p-value \$\\leqq\$ $alpha")
    title("G-test")
    xlabel("marginal probability", fontsize=8)
    ylabel("marginal probability", fontsize=8)

    ax3 = subplot2grid((16,20), (9,0), rowspan=7, colspan=8)
    pcolormesh(ps, ps, prob_fisher, cmap=cmap, vmin=vmin, vmax=vmax)
    colorbar(label="probability of p-value \$\\leqq\$ $alpha")
    title("Fisher's exact test")
    xlabel("marginal probability", fontsize=8)
    ylabel("marginal probability", fontsize=8)

    suptitle("Dependent case: sampler = $(typeof(sampler)),  n = $n,  \$\\alpha\$ = $alpha")
end

Out[21]:

plotrandcomparisontest (generic function with 1 method)

In [22]:

n = 100
@show p = prodprob(0.2,0.3)
@show r = randprob(n,0.2,0.3)
chisqtest(Int.(round.(n*r)))

p = prodprob(0.2, 0.3) = [0.06, 0.14, 0.24, 0.56]
r = randprob(n, 0.2, 0.3) = [0.0515132, 0.145921, 0.250814, 0.551752]

Out[22]:

0.5853789284609615

In [23]:

n = 100
@time reshape([chisqtest(Int64.(round.(n*randprob(n,p,q)))) for p in 0.05:0.05:0.5 for q in 0.05:0.05:0.5],10,10)

  0.191114 seconds (97.91 k allocations: 5.224 MiB)

Out[23]:

10×10 Array{Float64,2}:
 0.562179  0.529083  0.509322  0.567551  …  0.520912  0.519297  0.402775
 0.567241  0.55026   0.479253  0.526034     0.556815  0.588994  0.546653
 0.567551  0.479253  0.455889  0.585379     0.508414  0.562818  0.429607
 0.567551  0.582804  0.48384   0.514814     0.577176  0.534985  0.519297
 0.523662  0.586003  0.461372  0.549935     0.402548  0.565545  0.488422
 0.52931   0.522691  0.422229  0.531971  …  0.446001  0.475004  0.495827
 0.567185  0.567551  0.509579  0.557358     0.586031  0.412543  0.407   
 0.587198  0.589248  0.534882  0.541886     0.503111  0.500983  0.59858 
 0.596989  0.412295  0.449468  0.451769     0.564934  0.457993  0.448403
 0.596989  0.504985  0.470044  0.491234     0.472948  0.42334   0.415858

In [24]:

@time plotcomparisontest(randMultinomial, 25, N=10000, alpha=0.05)
@time plotrandcomparisontest(randMultinomial, 25, N=10000, alpha=0.05)

  3.369926 seconds (13.67 M allocations: 607.614 MiB, 1.52% gc time)

  6.061108 seconds (25.22 M allocations: 1.102 GiB, 1.54% gc time)

Out[24]:

PyObject <matplotlib.text.Text object at 0x000000003EE8C198>

In [25]:

@time plotcomparisontest(randMultinomial, 50, N=10000, alpha=0.05)
@time plotrandcomparisontest(randMultinomial, 50, N=10000, alpha=0.05)

  5.292568 seconds (13.67 M allocations: 607.614 MiB, 0.91% gc time)

  9.411061 seconds (24.99 M allocations: 1.089 GiB, 0.93% gc time)

Out[25]:

PyObject <matplotlib.text.Text object at 0x000000003F7B0C18>

In [26]:

@time plotcomparisontest(randMultinomial, 100, N=10000, alpha=0.05)
@time plotrandcomparisontest(randMultinomial, 100, N=10000, alpha=0.05)

  9.085913 seconds (13.67 M allocations: 607.614 MiB, 0.53% gc time)

 15.980376 seconds (24.86 M allocations: 1.079 GiB, 0.53% gc time)

Out[26]:

PyObject <matplotlib.text.Text object at 0x0000000040387C50>

In [27]:

@time plotcomparisontest(randMultinomial, 200, N=10000, alpha=0.05)
@time plotrandcomparisontest(randMultinomial, 200, N=10000, alpha=0.05)

 16.529112 seconds (13.67 M allocations: 607.614 MiB, 0.31% gc time)

 28.562060 seconds (24.86 M allocations: 1.079 GiB, 0.31% gc time)

Out[27]:

PyObject <matplotlib.text.Text object at 0x000000003516C5C0>

In [28]:

@time plotcomparisontest(randBinomial, 25, N=10000, alpha=0.05)
@time plotrandcomparisontest(randBinomial, 25, N=10000, alpha=0.05)

  3.388684 seconds (14.42 M allocations: 652.681 MiB, 1.73% gc time)

  6.063692 seconds (26.26 M allocations: 1.173 GiB, 3.07% gc time)

Out[28]:

PyObject <matplotlib.text.Text object at 0x000000004059D438>

In [29]:

@time plotcomparisontest(randBinomial, 50, N=10000, alpha=0.05)
@time plotrandcomparisontest(randBinomial, 50, N=10000, alpha=0.05)

  5.464273 seconds (14.22 M allocations: 649.629 MiB, 0.88% gc time)

 10.123056 seconds (26.00 M allocations: 1.164 GiB, 0.84% gc time)

Out[29]:

PyObject <matplotlib.text.Text object at 0x0000000042DE00B8>

In [30]:

@time plotcomparisontest(randBinomial, 100, N=10000, alpha=0.05)
@time plotrandcomparisontest(randBinomial, 100, N=10000, alpha=0.05)

  9.850951 seconds (14.22 M allocations: 649.629 MiB, 0.54% gc time)

 16.797204 seconds (25.87 M allocations: 1.154 GiB, 0.54% gc time)

Out[30]:

PyObject <matplotlib.text.Text object at 0x000000003510D128>

In [31]:

@time plotcomparisontest(randBinomial, 200, N=10000, alpha=0.05)
@time plotrandcomparisontest(randBinomial, 200, N=10000, alpha=0.05)

 16.543340 seconds (14.22 M allocations: 649.629 MiB, 0.30% gc time)

 28.019486 seconds (25.86 M allocations: 1.154 GiB, 0.30% gc time)

Out[31]:

PyObject <matplotlib.text.Text object at 0x000000003FCBEC88>

In [ ]:

Table of Contents