In [2]:
using DataFrames
using RDatasets
using Convex
using SCS
In [6]:
# we'll use iris data
# predict whether the iris species is versicolor using the sepal length and width and petal length and width
iris = dataset("datasets", "iris")
## outcome variable: +1 for versicolor, -1 otherwise
iris[:Y] = [species == "versicolor" ? 1.0 : -1.0 for species in iris[:Species]]
Y = array(iris[:Y])
## create data matrix with one column for each feature (first column corresponds to offset)
X = [ones(size(iris, 1)) iris[:SepalLength] iris[:SepalWidth] iris[:PetalLength] iris[:PetalWidth]];
In [7]:
# solve the logistic regression problem
n, p = size(X)
beta = Variable(p)
problem = minimize(logisticloss(-Y.*(X*beta)))

solve!(problem, SCSSolver(verbose=false))
In [5]:
# let's see how well the model fits
using Gadfly
perm = Base.Sort.sortperm(vec(X*beta.value))
set_default_plot_size(25cm, 12cm)
plot(layer(x=1:n,y=(Y[perm]+1)/2,Geom.point),layer(x=1:n,y=logistic(X*beta.value)[perm],Geom.line))
Out[5]:
x -200 -150 -100 -50 0 50 100 150 200 250 300 350 -150 -145 -140 -135 -130 -125 -120 -115 -110 -105 -100 -95 -90 -85 -80 -75 -70 -65 -60 -55 -50 -45 -40 -35 -30 -25 -20 -15 -10 -5 0 5 10 15 20 25 30 35 40 45 50 55 60 65 70 75 80 85 90 95 100 105 110 115 120 125 130 135 140 145 150 155 160 165 170 175 180 185 190 195 200 205 210 215 220 225 230 235 240 245 250 255 260 265 270 275 280 285 290 295 300 -200 0 200 400 -150 -140 -130 -120 -110 -100 -90 -80 -70 -60 -50 -40 -30 -20 -10 0 10 20 30 40 50 60 70 80 90 100 110 120 130 140 150 160 170 180 190 200 210 220 230 240 250 260 270 280 290 300 y -1.5 -1.0 -0.5 0.0 0.5 1.0 1.5 2.0 2.5 -1.00 -0.95 -0.90 -0.85 -0.80 -0.75 -0.70 -0.65 -0.60 -0.55 -0.50 -0.45 -0.40 -0.35 -0.30 -0.25 -0.20 -0.15 -0.10 -0.05 0.00 0.05 0.10 0.15 0.20 0.25 0.30 0.35 0.40 0.45 0.50 0.55 0.60 0.65 0.70 0.75 0.80 0.85 0.90 0.95 1.00 1.05 1.10 1.15 1.20 1.25 1.30 1.35 1.40 1.45 1.50 1.55 1.60 1.65 1.70 1.75 1.80 1.85 1.90 1.95 2.00 -1 0 1 2 -1.0 -0.9 -0.8 -0.7 -0.6 -0.5 -0.4 -0.3 -0.2 -0.1 0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9 2.0