Isotonic Regression in Julia¶

In [1]:

using Gadfly
using DataFrames
using Isotonic

In [1]:

set_default_plot_size(21cm, 13cm);

Performance¶

Generate fake logistic regression data¶

In [2]:

function logistic_data(n::Int64)
    X = sort(randn(n))
    return X, float64(1.0 / (1.0 + exp(X)) .< rand(n))
end

Out[2]:

logistic_data (generic function with 1 method)

In [3]:

X, Y = logistic_data(200)
plot(x=X, y=Y, Geom.point)

Out[3]:

In [4]:

function timing(n::Int64, iters::Int64, regression::Function)
    gc()
    _, Y = logistic_data(n)
    weights = ones(n)
    times = zeros(iters)
    for i in 1 : iters
        t_start = time()
        scratch = copy(Y)
        regression(scratch, weights)
        t_end = time()
        times[i] = t_end - t_start
    end
    return mean(times)
end

Out[4]:

timing (generic function with 1 method)

In [8]:

xs = logspace(1, 7, 7) |> round

Out[8]:

7-element Array{Float64,1}:
     10.0  
    100.0  
   1000.0  
  10000.0  
 100000.0  
      1.0e6
      1.0e7

In [8]:

run(algorithm, fn) =  DataFrame(X=xs, Y=map(x -> timing(int64(x), 5, fn), xs), algorithm=algorithm)
df = [
    run("Active Set", active_set_isotonic_regression), 
    run("Linear PAVA", isotonic_regression), 
    run("Pooled PAVA", pooled_pava_isotonic_regression)];

In [9]:

function print_algorithm(algorithm)
    # For plotting alongside the Python implementation
    println(algorithm)
    for row in eachrow(df[df[:algorithm] .== algorithm, :])
        @printf("%d %f\n", row[:X], row[:Y])
    end
end

print_algorithm("Active Set")
print_algorithm("Linear PAVA")

Active Set
10 0.000003
100 0.000019
1000 0.000228
10000 0.003492
100000 0.077059
1000000 0.742764
10000000 10.734470
Linear PAVA
10 0.000001
100 0.000002
1000 0.000017
10000 0.000318
100000 0.003388
1000000 0.045979
10000000 0.521390

In [10]:

plot(df, x="X", y="Y", color="algorithm", Geom.line, Geom.point, Scale.x_log10, Scale.y_log10)

Out[10]: