using Gadfly
using DataFrames
using Isotonic
set_default_plot_size(21cm, 13cm);
function logistic_data(n::Int64)
X = sort(randn(n))
return X, float64(1.0 / (1.0 + exp(X)) .< rand(n))
end
logistic_data (generic function with 1 method)
X, Y = logistic_data(200)
plot(x=X, y=Y, Geom.point)
function timing(n::Int64, iters::Int64, regression::Function)
gc()
_, Y = logistic_data(n)
weights = ones(n)
times = zeros(iters)
for i in 1 : iters
t_start = time()
scratch = copy(Y)
regression(scratch, weights)
t_end = time()
times[i] = t_end - t_start
end
return mean(times)
end
timing (generic function with 1 method)
xs = logspace(1, 7, 7) |> round
7-element Array{Float64,1}: 10.0 100.0 1000.0 10000.0 100000.0 1.0e6 1.0e7
run(algorithm, fn) = DataFrame(X=xs, Y=map(x -> timing(int64(x), 5, fn), xs), algorithm=algorithm)
df = [
run("Active Set", active_set_isotonic_regression),
run("Linear PAVA", isotonic_regression),
run("Pooled PAVA", pooled_pava_isotonic_regression)];
function print_algorithm(algorithm)
# For plotting alongside the Python implementation
println(algorithm)
for row in eachrow(df[df[:algorithm] .== algorithm, :])
@printf("%d %f\n", row[:X], row[:Y])
end
end
print_algorithm("Active Set")
print_algorithm("Linear PAVA")
Active Set 10 0.000003 100 0.000019 1000 0.000228 10000 0.003492 100000 0.077059 1000000 0.742764 10000000 10.734470 Linear PAVA 10 0.000001 100 0.000002 1000 0.000017 10000 0.000318 100000 0.003388 1000000 0.045979 10000000 0.521390
plot(df, x="X", y="Y", color="algorithm", Geom.line, Geom.point, Scale.x_log10, Scale.y_log10)