# Generate data for Huber regression. srand(1); n = 300; SAMPLES = int(1.5*n); beta_true = 5*randn(n); X = randn(n, SAMPLES); Y = zeros(SAMPLES); v = randn(SAMPLES); # Generate data for different values of p. # Solve the resulting problems. # WARNING this script takes a few minutes to run. using Convex, SCS, Distributions set_default_solver(SCSSolver(verbose=0)); TESTS = 50; lsq_data = zeros(TESTS); huber_data = zeros(TESTS); prescient_data = zeros(TESTS); p_vals = linspace(0,0.15, TESTS); for i=1:length(p_vals) p = p_vals[i]; # Generate the sign changes. factor = float(2 * rand(Binomial(1, 1-p), SAMPLES) - 1); Y = factor .* X' * beta_true + v; # Form and solve a standard regression problem. beta = Variable(n); fit = norm(beta - beta_true) / norm(beta_true); cost = norm(X' * beta - Y); prob = minimize(cost); solve!(prob); lsq_data[i] = evaluate(fit); # Form and solve a prescient regression problem, # i.e., where the sign changes are known. cost = norm(factor .* (X'*beta) - Y); solve!(minimize(cost)) prescient_data[i] = evaluate(fit); # Form and solve the Huber regression problem. cost = sum(huber(X' * beta - Y, 1)); solve!(minimize(cost)) huber_data[i] = evaluate(fit); end using Gadfly, DataFrames df = DataFrame(x=p_vals, y=huber_data, label="Huber"); df = vcat(df, DataFrame(x=p_vals, y=prescient_data, label="Prescient")); df = vcat(df, DataFrame(x=p_vals, y=lsq_data, label="Least squares")); plot(df, x="x", y="y", color="label", Geom.line, Guide.XLabel("p"), Guide.YLabel("Fit")) # Plot the relative reconstruction error for Huber and prescient regression, # zooming in on smaller values of p. indices = find(p_vals .<= 0.08); df = DataFrame(x=p_vals[indices], y=huber_data[indices], label="Huber"); df = vcat(df, DataFrame(x=p_vals[indices], y=prescient_data[indices], label="Prescient")); plot(df, x="x", y="y", color="label", Geom.line, Guide.XLabel("p"), Guide.YLabel("Fit"))