2x2の分割表での独立性検定の比較¶

黒木玄

2017-09-26, 2019-10-14

$2\times 2$ の分割表における次のような形の独立性の条件を満たしている確率分布を考える：

$\displaystyle \begin{bmatrix} p_{11} & p_{12} \\ p_{21} & p_{22} \\ \end{bmatrix} = \begin{bmatrix} pq & p(1-q) \\ (1-p)q & (1-p)(1-q) \\ \end{bmatrix}$

このとき $p=p_{11}+p_{12}$ , $q=p_{11}+p_{21}$ を周辺確率 (marginal probabilities) と呼ぶことにする。

周辺確率を動かしながら、 $\chi^2$ 検定、G検定、Fisherの正確確率検定の3種類について、p値が $\alpha$ 以下になる確率をプロットしてみる。

p値が $\alpha$ 以下になる確率は $\alpha$ に近い方がよい。 $\alpha$ に近い場合には白色でプロットし、 $\alpha$ より小さい場合には青色をどんどん濃くして行き、 $\alpha$ より大きい場合には赤色をどんどん濃くして行くことにする。白っぽく見える方がよい。青色はその検定が conservative であることを意味している。

サンプルを生成する確率分布として、

4×Poisson分布
多項分布
2×二項分布
超幾何分布

を用いる。 $\alpha=0.05$ (有意水準5%)の場合を扱うことにする。

結論

$\chi^2$ 検定が最も安定して白っぽくなりやすい。
G検定は濃い赤が出易い。
Fisherの正確確率検定は $n$ が小さいとき濃い青がたくさん出る。

追記： 最後の方に独立性の条件を満たさない場合に関するシミュレーション結果も追加した。

独立性の条件を満たさない場合にはもっと意味のある情報が得られるシミュレーションを考えたい。

追記(2019-10-14): mid-p版のFisher検定を追加した. 実装については pvaluehg_corr() 函数を参照せよ. mid-p版のFisher検定のP値の様子は $\chi^2$ 検定に近くなる.

Julia Version 0.6.4
Commit 9d11f62bcb* (2018-07-09 19:09 UTC)
Platform Info:
  OS: Windows (x86_64-w64-mingw32)
  CPU: Intel(R) Core(TM) i7-4770HQ CPU @ 2.20GHz
  WORD_SIZE: 64
  BLAS: libopenblas (USE64BITINT DYNAMIC_ARCH NO_AFFINITY Haswell MAX_THREADS=16)
  LAPACK: libopenblas64_
  LIBM: libopenlibm
  LLVM: libLLVM-3.9.1 (ORCJIT, haswell)

In [2]:

# http://yomichi.hateblo.jp/entry/2015/09/27/151257
# https://gist.github.com/yomichi/def5921f6b81eb5f7b44

#=
Yuichi Motoyama 2015
This is distributed under the Boost Software License Version 1.0
http://www.boost.org/LICENSE_1_0.txt
=#

@enum DepwarnFlag DepwarnOff=0 DepwarnOn=1 DepwarnError=2

doc"""
- `switch_depwarn!(flag :: Bool)`
- `switch_depwarn!(flag :: DepwarnFlag)`
Enable/Disable deprecation warning. 
- `DepwarnOff` or `false` : switch off deprecation warning
- `DepwarnOn` or `true` : switch on deprecation warning
- `DepwarnError` : turn deprecation warning into error
"""
switch_depwarn!(flag :: Bool) = switch_depwarn!(flag ? DepwarnOn : DepwarnOff)
function switch_depwarn!(flag :: DepwarnFlag)
    old_opt = Base.JLOptions()
    params = map(fieldnames(Base.JLOptions)) do name
        name == :depwarn ? Int(flag) : getfield(old_opt, name)
    end
    new_opt = Base.JLOptions(params...)
    unsafe_store!(cglobal(:jl_options, Base.JLOptions), new_opt)
    flag
end

# one-liner
# unsafe_store!(cglobal(:jl_options, Base.JLOptions), Base.JLOptions(map(fieldnames(Base.JLOptions)) do name; name==:depwarn?0:getfield(Base.JLOptions(), name) end...))

switch_depwarn!(false)

Out[2]:

DepwarnOff::DepwarnFlag = 0

In [3]:

using PyPlot
using Distributions

In [4]:

# xlog(x,y) = if x == 0 then 0 else x*log(x/y)
#
# Warning:
#
#   Don't use x*log(x/y) instead of xlog(x,y).
#   Because log(0) = -Inf and 0*Inf = NaN.
#
function xlog(x::Float64, y::Float64)
    if x == zero(x)
        return zero(x)
    elseif y == zero(y)
        return Inf
    else
        return x*log(x/y)
    end
end
xlog(x,y) = xlog(Float64(x),Float64(y))

x, y = 0, 1
@show x, y
@show x*log(x/y)
@show xlog(x,y);

(x, y) = (0, 1)
x * log(x / y) = NaN
xlog(x, y) = 0.0

In [5]:

prodprob(p::Float64, q::Float64) = [p*q, p*(1-q), (1-p)*q, (1-p)*(1-q)]

function expected(a::AbstractArray{T,1}) where T
    n = sum(a)
    return [(a[1]+a[2])*(a[1]+a[3])/n, (a[1]+a[2])*(a[2]+a[4])/n, 
            (a[3]+a[4])*(a[1]+a[3])/n, (a[3]+a[4])*(a[2]+a[4])/n]
end

function chisqtest(a::AbstractArray{Int64,1})
    mu = expected(a)
    chisq = sum((a .- mu).^2 ./mu)
    pval = ccdf(Chisq(1), chisq)
    return pval
end

function gtest(a::AbstractArray{Int64,1})
    mu = expected(a)
    g = 2*sum(xlog.(a,mu)) # Don't use a.*log(a./mu)
    pval = ccdf(Chisq(1), g)
    return pval
end

function pvaluehg(d::Hypergeometric, k::Int64)
    c = params(d)
    amax = min(c[1],c[3])
    p0 = pdf(d, k)
    p1 = 0.0
    pval = 0.0
    for j in 0:amax
        p1 = pdf(d, j)
        pval += ifelse(p1 ≤ p0, p1, 0.0)
    end
    return min(pval, 1.0)
end

function fishertest(a::AbstractArray{Int64,1})
    d = Hypergeometric(a[1]+a[2], a[3]+a[4], a[1]+a[3])
    return pvaluehg(d, a[1])
end

Out[5]:

fishertest (generic function with 1 method)

In [6]:

# a が補正パラメーター
# a を小さくするほどP値はより小さくなる.
# a = 1.0 で通常のFisher検定の場合に戻る.
# a = 0.5 で mid-p 版になる.
# 
function pvaluehg_corr(d::Hypergeometric, k::Int64; a=0.5)
    c = params(d)
    amax = min(c[1],c[3])
    p0 = pdf(d, k)
    p1 = 0.0
    pval = 0.0
    for j in 0:amax
        p1 = pdf(d, j)
        pval += ifelse(p1 == p0, a*p1, ifelse(p1 < p0, p1, 0.0))
    end
    return min(pval, 1.0)
end

function fishertest_corr(a::AbstractArray{Int64,1})
    d = Hypergeometric(a[1]+a[2], a[3]+a[4], a[1]+a[3])
    return pvaluehg_corr(d, a[1])
end

Out[6]:

fishertest_corr (generic function with 1 method)

In [7]:

ecdf(pval::AbstractArray{Float64,1}, x::Float64) = count(p -> p ≤ x, pval)/length(pval)
ecdf(pval, x) = ecdf(pval, Float64(x))

function randPoisson(n::Int64, p::AbstractArray{Float64}, N::Int64)
    return [
        rand(Poisson(n*p[1]),N)';
        rand(Poisson(n*p[2]),N)';
        rand(Poisson(n*p[3]),N)';
        rand(Poisson(n*p[4]),N)';
    ]
end

function randMultinomial(n::Int64, p::AbstractArray{Float64}, N::Int64)
    return rand(Multinomial(n,p), N)
end
    
function randBinomial(n::Int64, p::AbstractArray{Float64}, N::Int64)
    m = Int64(round(n*(p[1]+p[2])))
    q1 = p[1]/(p[1]+p[2])
    q2 = p[3]/(p[3]+p[4])
    return [rand(Multinomial(m, [q1, 1.0-q1]), N); rand(Multinomial(n-m, [q2, 1.0-q2]), N)]
end

function randHypergeometric(n::Int64, p::AbstractArray{Float64}, N::Int64)
    m1 = Int64(round(n*(p[1]+p[2])))
    m2 = Int64(round(n*(p[3]+p[4])))
    n1 = Int64(round(n*(p[1]+p[3])))
    a = rand(Hypergeometric(m1, m2, n1), N)'
    b = m1 .- a
    c = n1 .- a
    d = m2 .- c
    return [a; b; c; d]
end

function pvaluesby(sampler::T, n::Int64;
        N = 10^4, alpha = 0.05) where T<:Function
    px = collect(0.05:0.05:0.50)
    py = px
    np = length(px)
    prob_chisq    = Array{Float64,2}(np,np)
    prob_g        = Array{Float64,2}(np,np)
    prob_fisher   = Array{Float64,2}(np,np)
    prob_fisher_c = Array{Float64,2}(np,np)
    a = Array{Float64,2}(4,N)
    
    for i in 1:np
        for j in 1:np
            if i > j && sampler != randBinomial
                prob_chisq[i,j]    = prob_chisq[j,i]
                prob_g[i,j]        = prob_g[j,i]
                prob_fisher[i,j]   = prob_fisher[j,i]
                prob_fisher_c[i,j] = prob_fisher_c[j,i]
            else
                a = sampler(n, prodprob(px[i],py[j]), N)
                prob_chisq[i,j]    = ecdf([chisqtest(a[:,i])       for i in 1:size(a,2)], alpha)
                prob_g[i,j]        = ecdf([gtest(a[:,i])           for i in 1:size(a,2)], alpha)
                prob_fisher[i,j]   = ecdf([fishertest(a[:,i])      for i in 1:size(a,2)], alpha)
                prob_fisher_c[i,j] = ecdf([fishertest_corr(a[:,i]) for i in 1:size(a,2)], alpha)
            end
        end
    end
    return alpha, px, prob_chisq, prob_g, prob_fisher, prob_fisher_c
end

function plotcomparisontest(sampler, n; N = 10^4, alpha = 0.05)
    alpha, px, prob_chisq, prob_g, prob_fisher, prob_fisher_c = pvaluesby(sampler, n; N = N, alpha = alpha)
    py = px
    np = length(px)
    ps = [0.0;px]
    vmin = 0.0
    vmax = 2*alpha
    cmap = "RdBu_r"

    figure(figsize=(8,6.4))

    ax1 = subplot2grid((16,20), (0,0), rowspan=7, colspan=8)
    pcolormesh(ps, ps, prob_chisq, cmap=cmap, vmin=vmin, vmax=vmax)
    colorbar(label="probability of p-value \$\\leqq\$ $alpha")
    title("\$\\chi^2\$-test")
    xlabel("marginal probability", fontsize=8)
    ylabel("marginal probability", fontsize=8)

    ax2 = subplot2grid((16,20), (0,12), rowspan=7, colspan=8)
    pcolormesh(ps, ps, prob_g, cmap=cmap, vmin=vmin, vmax=vmax)
    colorbar(label="probability of p-value \$\\leqq\$ $alpha")
    title("G-test")
    xlabel("marginal probability", fontsize=8)
    ylabel("marginal probability", fontsize=8)

    ax3 = subplot2grid((16,20), (9,0), rowspan=7, colspan=8)
    pcolormesh(ps, ps, prob_fisher, cmap=cmap, vmin=vmin, vmax=vmax)
    colorbar(label="probability of p-value \$\\leqq\$ $alpha")
    title("Fisher's exact test")
    xlabel("marginal probability", fontsize=8)
    ylabel("marginal probability", fontsize=8)

    ax3 = subplot2grid((16,20), (9,12), rowspan=7, colspan=8)
    pcolormesh(ps, ps, prob_fisher_c, cmap=cmap, vmin=vmin, vmax=vmax)
    colorbar(label="probability of p-value \$\\leqq\$ $alpha")
    title("mid-p Fisher's exact test")
    xlabel("marginal probability", fontsize=8)
    ylabel("marginal probability", fontsize=8)

    suptitle("sampler = $(typeof(sampler)),  n = $n,  \$\\alpha\$ = $alpha")
end

Out[7]:

plotcomparisontest (generic function with 1 method)

4×Poisson分布でサンプルを生成する場合¶

In [8]:

@time plotcomparisontest(randPoisson, 25, N=10000, alpha=0.05)

  8.706177 seconds (18.45 M allocations: 818.336 MiB, 0.96% gc time)

Out[8]:

PyObject Text(0.5, 0.98, 'sampler = #randPoisson,  n = 25,  $\\alpha$ = 0.05')

In [9]:

@time plotcomparisontest(randPoisson, 50, N=10000, alpha=0.05)

  9.869911 seconds (16.39 M allocations: 712.112 MiB, 0.63% gc time)

Out[9]:

PyObject Text(0.5, 0.98, 'sampler = #randPoisson,  n = 50,  $\\alpha$ = 0.05')

In [10]:

@time plotcomparisontest(randPoisson, 100, N=10000, alpha=0.05)

 19.190928 seconds (16.39 M allocations: 712.112 MiB, 0.28% gc time)

Out[10]:

PyObject Text(0.5, 0.98, 'sampler = #randPoisson,  n = 100,  $\\alpha$ = 0.05')

In [11]:

@time plotcomparisontest(randPoisson, 200, N=10000, alpha=0.05)

 34.817723 seconds (16.39 M allocations: 712.112 MiB, 0.15% gc time)

Out[11]:

PyObject Text(0.5, 0.98, 'sampler = #randPoisson,  n = 200,  $\\alpha$ = 0.05')

多項分布でサンプルを生成する場合¶

In [12]:

@time plotcomparisontest(randMultinomial, 25, N=10000, alpha=0.05)

  6.366306 seconds (17.11 M allocations: 729.305 MiB, 1.06% gc time)

Out[12]:

PyObject Text(0.5, 0.98, 'sampler = #randMultinomial,  n = 25,  $\\alpha$ = 0.05')

In [13]:

@time plotcomparisontest(randMultinomial, 50, N=10000, alpha=0.05)

  9.955822 seconds (16.94 M allocations: 720.503 MiB, 0.59% gc time)

Out[13]:

PyObject Text(0.5, 0.98, 'sampler = #randMultinomial,  n = 50,  $\\alpha$ = 0.05')

In [14]:

@time plotcomparisontest(randMultinomial, 100, N=10000, alpha=0.05)

 18.203548 seconds (16.94 M allocations: 720.503 MiB, 0.32% gc time)

Out[14]:

PyObject Text(0.5, 0.98, 'sampler = #randMultinomial,  n = 100,  $\\alpha$ = 0.05')

In [15]:

@time plotcomparisontest(randMultinomial, 200, N=10000, alpha=0.05)

 34.735788 seconds (16.94 M allocations: 720.503 MiB, 0.18% gc time)

Out[15]:

PyObject Text(0.5, 0.98, 'sampler = #randMultinomial,  n = 200,  $\\alpha$ = 0.05')

2×二項分布でサンプルを生成する場合¶

In [16]:

@time plotcomparisontest(randBinomial, 25, N=10000, alpha=0.05)

  9.986756 seconds (31.86 M allocations: 1.357 GiB, 1.19% gc time)

Out[16]:

PyObject Text(0.5, 0.98, 'sampler = #randBinomial,  n = 25,  $\\alpha$ = 0.05')

In [17]:

@time plotcomparisontest(randBinomial, 50, N=10000, alpha=0.05)

 16.601017 seconds (31.81 M allocations: 1.354 GiB, 0.76% gc time)

Out[17]:

PyObject Text(0.5, 0.98, 'sampler = #randBinomial,  n = 50,  $\\alpha$ = 0.05')

In [18]:

@time plotcomparisontest(randBinomial, 100, N=10000, alpha=0.05)

 30.636706 seconds (31.81 M allocations: 1.354 GiB, 0.40% gc time)

Out[18]:

PyObject Text(0.5, 0.98, 'sampler = #randBinomial,  n = 100,  $\\alpha$ = 0.05')

In [19]:

@time plotcomparisontest(randBinomial, 200, N=10000, alpha=0.05)

 58.076591 seconds (31.81 M allocations: 1.354 GiB, 0.21% gc time)

Out[19]:

PyObject Text(0.5, 0.98, 'sampler = #randBinomial,  n = 200,  $\\alpha$ = 0.05')

超幾何分布でサンプルを生成する場合¶

In [20]:

@time plotcomparisontest(randHypergeometric, 25, N=10000, alpha=0.05)

  6.299768 seconds (16.48 M allocations: 729.116 MiB, 1.00% gc time)

Out[20]:

PyObject Text(0.5, 0.98, 'sampler = #randHypergeometric,  n = 25,  $\\alpha$ = 0.05')

In [21]:

@time plotcomparisontest(randHypergeometric, 50, N=10000, alpha=0.05)

 10.138402 seconds (16.39 M allocations: 724.716 MiB, 0.57% gc time)

Out[21]:

PyObject Text(0.5, 0.98, 'sampler = #randHypergeometric,  n = 50,  $\\alpha$ = 0.05')

In [22]:

@time plotcomparisontest(randHypergeometric, 100, N=10000, alpha=0.05)

 17.848403 seconds (16.39 M allocations: 724.716 MiB, 0.31% gc time)

Out[22]:

PyObject Text(0.5, 0.98, 'sampler = #randHypergeometric,  n = 100,  $\\alpha$ = 0.05')

In [23]:

@time plotcomparisontest(randHypergeometric, 200, N=10000, alpha=0.05)

 35.738258 seconds (16.39 M allocations: 724.716 MiB, 0.19% gc time)

Out[23]:

PyObject Text(0.5, 0.98, 'sampler = #randHypergeometric,  n = 200,  $\\alpha$ = 0.05')

独立性を満たさない分布でサンプルを生成¶

randHypergeometric は対応していない。

randMultinomial と randBinomial の場合のみを扱う。

In [24]:

function randprob(n, p, q)
    P = prodprob(p,q)
    R = similar(P)
    for i in 1:10000
        R = P .* (1.0 .+ 0.5*rand(4))
        R = R/sum(R)
        if 0.4 < chisqtest(Int64.(round.(n*R))) < 0.6
            break
        end
    end
    return R
end
    
function randpvaluesby(sampler::T, n::Int64;
        N = 10^4, alpha = 0.05) where T<:Function
    px = collect(0.05:0.05:0.50)
    py = px
    np = length(px)
    prob_chisq    = Array{Float64,2}(np,np)
    prob_g        = Array{Float64,2}(np,np)
    prob_fisher   = Array{Float64,2}(np,np)
    prob_fisher_c = Array{Float64,2}(np,np)
    a = Array{Float64,2}(4,N)
    
    for i in 1:np
        for j in 1:np
            a = sampler(n, randprob(n, px[i], py[j]), N)
            prob_chisq[i,j]    = ecdf([chisqtest(a[:,i])  for i in 1:size(a,2)], alpha)
            prob_g[i,j]        = ecdf([gtest(a[:,i])      for i in 1:size(a,2)], alpha)
            prob_fisher[i,j]   = ecdf([fishertest(a[:,i]) for i in 1:size(a,2)], alpha)
            prob_fisher_c[i,j] = ecdf([fishertest(a[:,i]) for i in 1:size(a,2)], alpha)
        end
    end
    return alpha, px, prob_chisq, prob_g, prob_fisher, prob_fisher_c
end

function plotrandcomparisontest(sampler, n; N = 10^4, alpha = 0.05)
    alpha, px, prob_chisq, prob_g, prob_fisher, prob_fisher_c = randpvaluesby(sampler, n; N = N, alpha = alpha)
    py = px
    np = length(px)
    ps = [0.0;px]
    vmin = 0.0
    vmax = 2*alpha
    cmap = "RdBu_r"

    figure(figsize=(8,6.4))

    ax1 = subplot2grid((16,20), (0,0), rowspan=7, colspan=8)
    pcolormesh(ps, ps, prob_chisq, cmap=cmap, vmin=vmin, vmax=vmax)
    colorbar(label="probability of p-value \$\\leqq\$ $alpha")
    title("\$\\chi^2\$-test")
    xlabel("marginal probability", fontsize=8)
    ylabel("marginal probability", fontsize=8)

    ax2 = subplot2grid((16,20), (0,12), rowspan=7, colspan=8)
    pcolormesh(ps, ps, prob_g, cmap=cmap, vmin=vmin, vmax=vmax)
    colorbar(label="probability of p-value \$\\leqq\$ $alpha")
    title("G-test")
    xlabel("marginal probability", fontsize=8)
    ylabel("marginal probability", fontsize=8)

    ax3 = subplot2grid((16,20), (9,0), rowspan=7, colspan=8)
    pcolormesh(ps, ps, prob_fisher, cmap=cmap, vmin=vmin, vmax=vmax)
    colorbar(label="probability of p-value \$\\leqq\$ $alpha")
    title("Fisher's exact test")
    xlabel("marginal probability", fontsize=8)
    ylabel("marginal probability", fontsize=8)

    ax4 = subplot2grid((16,20), (9,12), rowspan=7, colspan=8)
    pcolormesh(ps, ps, prob_fisher_c, cmap=cmap, vmin=vmin, vmax=vmax)
    colorbar(label="probability of p-value \$\\leqq\$ $alpha")
    title("mid-p Fisher's exact test")
    xlabel("marginal probability", fontsize=8)
    ylabel("marginal probability", fontsize=8)

    suptitle("Dependent case: sampler = $(typeof(sampler)),  n = $n,  \$\\alpha\$ = $alpha")
end

Out[24]:

plotrandcomparisontest (generic function with 1 method)

In [25]:

n = 100
@show p = prodprob(0.2,0.3)
@show r = randprob(n,0.2,0.3)
chisqtest(Int.(round.(n*r)))

p = prodprob(0.2, 0.3) = [0.06, 0.14, 0.24, 0.56]
r = randprob(n, 0.2, 0.3) = [0.0634539, 0.115519, 0.202137, 0.61889]

Out[25]:

0.4334508657449359

In [26]:

n = 100
@time reshape([chisqtest(Int64.(round.(n*randprob(n,p,q)))) for p in 0.05:0.05:0.5 for q in 0.05:0.05:0.5],10,10)

  0.131083 seconds (95.47 k allocations: 5.119 MiB)

Out[26]:

10×10 Array{Float64,2}:
 0.596633  0.520912  0.560286  0.590865  …  0.587198  0.581607  0.519297
 0.520912  0.541809  0.588031  0.527121     0.588619  0.452201  0.589824
 0.517565  0.588031  0.403895  0.585379     0.432913  0.568352  0.451967
 0.590865  0.59107   0.543468  0.514814     0.537879  0.567696  0.548796
 0.575626  0.519744  0.558924  0.549935     0.431512  0.520912  0.495282
 0.488212  0.491915  0.536988  0.481154  …  0.413176  0.450331  0.451072
 0.587198  0.580685  0.564351  0.586573     0.462769  0.512276  0.546778
 0.489056  0.409891  0.450797  0.461373     0.591536  0.561707  0.534982
 0.566674  0.593507  0.526761  0.569652     0.564968  0.494633  0.435488
 0.402775  0.504607  0.536507  0.520912     0.599389  0.536724  0.425847

In [27]:

@time plotcomparisontest(randMultinomial, 25, N=10000, alpha=0.05)
@time plotrandcomparisontest(randMultinomial, 25, N=10000, alpha=0.05)

  6.307469 seconds (16.94 M allocations: 720.502 MiB, 1.14% gc time)

 10.759144 seconds (29.22 M allocations: 1.260 GiB, 1.06% gc time)

Out[27]:

PyObject Text(0.5, 0.98, 'Dependent case: sampler = #randMultinomial,  n = 25,  $\\alpha$ = 0.05')

In [28]:

@time plotcomparisontest(randMultinomial, 50, N=10000, alpha=0.05)
@time plotrandcomparisontest(randMultinomial, 50, N=10000, alpha=0.05)

 10.073406 seconds (16.94 M allocations: 720.505 MiB, 0.61% gc time)

 17.681423 seconds (28.94 M allocations: 1.245 GiB, 0.55% gc time)

Out[28]:

PyObject Text(0.5, 0.98, 'Dependent case: sampler = #randMultinomial,  n = 50,  $\\alpha$ = 0.05')

In [29]:

@time plotcomparisontest(randMultinomial, 100, N=10000, alpha=0.05)
@time plotrandcomparisontest(randMultinomial, 100, N=10000, alpha=0.05)

 17.675588 seconds (16.94 M allocations: 720.502 MiB, 0.33% gc time)

 31.290673 seconds (28.81 M allocations: 1.235 GiB, 0.30% gc time)

Out[29]:

PyObject Text(0.5, 0.98, 'Dependent case: sampler = #randMultinomial,  n = 100,  $\\alpha$ = 0.05')

In [30]:

@time plotcomparisontest(randMultinomial, 200, N=10000, alpha=0.05)
@time plotrandcomparisontest(randMultinomial, 200, N=10000, alpha=0.05)

 32.675120 seconds (16.94 M allocations: 720.502 MiB, 0.17% gc time)

 55.494176 seconds (28.81 M allocations: 1.235 GiB, 0.17% gc time)

Out[30]:

PyObject Text(0.5, 0.98, 'Dependent case: sampler = #randMultinomial,  n = 200,  $\\alpha$ = 0.05')

In [31]:

@time plotcomparisontest(randBinomial, 25, N=10000, alpha=0.05)
@time plotrandcomparisontest(randBinomial, 25, N=10000, alpha=0.05)

  9.697588 seconds (31.81 M allocations: 1.354 GiB, 1.24% gc time)

  9.989557 seconds (30.05 M allocations: 1.326 GiB, 1.15% gc time)

Out[31]:

PyObject Text(0.5, 0.98, 'Dependent case: sampler = #randBinomial,  n = 25,  $\\alpha$ = 0.05')

In [32]:

@time plotcomparisontest(randBinomial, 50, N=10000, alpha=0.05)
@time plotrandcomparisontest(randBinomial, 50, N=10000, alpha=0.05)

 17.452716 seconds (31.81 M allocations: 1.354 GiB, 0.70% gc time)

 16.739506 seconds (29.94 M allocations: 1.319 GiB, 0.66% gc time)

Out[32]:

PyObject Text(0.5, 0.98, 'Dependent case: sampler = #randBinomial,  n = 50,  $\\alpha$ = 0.05')

In [33]:

@time plotcomparisontest(randBinomial, 100, N=10000, alpha=0.05)
@time plotrandcomparisontest(randBinomial, 100, N=10000, alpha=0.05)

 32.080788 seconds (31.81 M allocations: 1.354 GiB, 0.42% gc time)

 30.810916 seconds (29.81 M allocations: 1.310 GiB, 0.36% gc time)

Out[33]:

PyObject Text(0.5, 0.98, 'Dependent case: sampler = #randBinomial,  n = 100,  $\\alpha$ = 0.05')

In [34]:

@time plotcomparisontest(randBinomial, 200, N=10000, alpha=0.05)
@time plotrandcomparisontest(randBinomial, 200, N=10000, alpha=0.05)

 57.443468 seconds (31.81 M allocations: 1.354 GiB, 0.22% gc time)

 56.963737 seconds (29.81 M allocations: 1.309 GiB, 0.21% gc time)

Out[34]:

PyObject Text(0.5, 0.98, 'Dependent case: sampler = #randBinomial,  n = 200,  $\\alpha$ = 0.05')

In [ ]: