Isotonic Regression in Julia

In [1]:
using Gadfly
using DataFrames
using Isotonic
In [1]:
set_default_plot_size(21cm, 13cm);

Performance

Generate fake logistic regression data

In [2]:
function logistic_data(n::Int64)
    X = sort(randn(n))
    return X, float64(1.0 / (1.0 + exp(X)) .< rand(n))
end
Out[2]:
logistic_data (generic function with 1 method)
In [3]:
X, Y = logistic_data(200)
plot(x=X, y=Y, Geom.point)
Out[3]:
In [4]:
function timing(n::Int64, iters::Int64, regression::Function)
    gc()
    _, Y = logistic_data(n)
    weights = ones(n)
    times = zeros(iters)
    for i in 1 : iters
        t_start = time()
        scratch = copy(Y)
        regression(scratch, weights)
        t_end = time()
        times[i] = t_end - t_start
    end
    return mean(times)
end
Out[4]:
timing (generic function with 1 method)
In [8]:
xs = logspace(1, 7, 7) |> round
Out[8]:
7-element Array{Float64,1}:
     10.0  
    100.0  
   1000.0  
  10000.0  
 100000.0  
      1.0e6
      1.0e7
In [8]:
run(algorithm, fn) =  DataFrame(X=xs, Y=map(x -> timing(int64(x), 5, fn), xs), algorithm=algorithm)
df = [
    run("Active Set", active_set_isotonic_regression), 
    run("Linear PAVA", isotonic_regression), 
    run("Pooled PAVA", pooled_pava_isotonic_regression)];
In [9]:
function print_algorithm(algorithm)
    # For plotting alongside the Python implementation
    println(algorithm)
    for row in eachrow(df[df[:algorithm] .== algorithm, :])
        @printf("%d %f\n", row[:X], row[:Y])
    end
end

print_algorithm("Active Set")
print_algorithm("Linear PAVA")
Active Set
10 0.000003
100 0.000019
1000 0.000228
10000 0.003492
100000 0.077059
1000000 0.742764
10000000 10.734470
Linear PAVA
10 0.000001
100 0.000002
1000 0.000017
10000 0.000318
100000 0.003388
1000000 0.045979
10000000 0.521390
In [10]:
plot(df, x="X", y="Y", color="algorithm", Geom.line, Geom.point, Scale.x_log10, Scale.y_log10)
Out[10]: