# Logistic Regression¶

Implementation of logistic regression in pure Python using tools from scipy.optimize. This is a companion notebook to the blog post Numerical optimizers for Logistic Regression

### Loss function¶

The loss function, using vector notation and abusing of broadcasting is:

$$\mathcal{L}(w) = - \mathbb{1}^T \log(\phi(y * X w)) + \frac{\alpha}{2} w^T w$$

where is the component-wise product (numpy's  operator) and $\phi(t) = 1 / (1 + \exp(-t))$ is the logistic function.

In [1]:
from __future__ import print_function
import numpy as np
from scipy import linalg, optimize
from sklearn import cross_validation, metrics, linear_model
from datetime import datetime
import pytron
In [2]:
def phi(t):
# logistic function
idx = t > 0
out = np.empty(t.size, dtype=np.float)
out[idx] = 1. / (1 + np.exp(-t[idx]))
exp_t = np.exp(t[~idx])
out[~idx] = exp_t / (1. + exp_t)
return out

def loss(w, X, y, alpha):
# loss function to be optimized, it's the logistic loss
z = X.dot(w)
yz = y * z
idx = yz > 0
out = np.zeros_like(yz)
out[idx] = np.log(1 + np.exp(-yz[idx]))
out[~idx] = (-yz[~idx] + np.log(1 + np.exp(yz[~idx])))
out = out.sum() + .5 * alpha * w.dot(w)
return out

# gradient of the logistic loss
z = X.dot(w)
z = phi(y * z)
z0 = (z - 1) * y
grad = X.T.dot(z0) + alpha * w

def hessian(s, w, X, y, alpha):
z = X.dot(w)
z = phi(y * z)
d = z * (1 - z)
wa = d * X.dot(s)
Hs = X.T.dot(wa)
out = Hs + alpha * s
return  out

# gradient AND hessian of the logistic
z = X.dot(w)
z = phi(y * z)
z0 = (z - 1) * y
grad = X.T.dot(z0) + alpha * w
def Hs(s):
d = z * (1 - z)
wa = d * X.dot(s)
return X.T.dot(wa) + alpha * s

## Benchmarks¶

In [39]:
for corr in (0., 1., 10.):
n_samples, n_features = 10 ** 4, 10 ** 3
np.random.seed(0)
X = np.random.randn(n_samples, n_features)
w = np.random.randn(n_features)
y = np.sign(X.dot(w))
X += 0.8 * np.random.randn(n_samples, n_features) # add noise
X+= corr # this makes it correlated by adding a constant term
X = np.hstack((X, np.ones((X.shape[0], 1)))) # add a column of ones for intercept
alpha = 1.

print('CG')
timings_cg = []
precision_cg = []
w0 = np.zeros(X.shape[1])
start = datetime.now()
def callback(x0):
prec = linalg.norm(gradient(x0, X, y, alpha), np.inf)
precision_cg.append(prec)
timings_cg.append((datetime.now() - start).total_seconds())
callback(w0)
args=(X, y, alpha), gtol=1e-6, callback=callback, maxiter=200)

## truncated newton
print('TNC')
timings_tnc = []
precision_tnc = []
w0 = np.zeros(X.shape[1])
start = datetime.now()
def callback(x0):
prec = linalg.norm(gradient(x0, X, y, alpha), np.inf)
precision_tnc.append(prec)
timings_tnc.append((datetime.now() - start).total_seconds())
callback(w0)
args=(X, y, alpha), avextol=1e-10,
callback=callback, maxiter=40)

# L-BFGS
print('L-BFGS')
timings_lbfgs = []
precision_lbfgs = []
w0 = np.zeros(X.shape[1])
start = datetime.now()
def callback(x0):
prec = linalg.norm(gradient(x0, X, y, alpha), np.inf)
precision_lbfgs.append(prec)
timings_lbfgs.append((datetime.now() - start).total_seconds())
callback(w0)
args=(X, y, alpha), pgtol=1e-10, maxiter=200,
maxfun=250, factr=1e-30, callback=callback)

# BFGS
print('BFGS')
timings_bfgs = []
precision_bfgs = []
w0 = np.zeros(X.shape[1])
start = datetime.now()
def callback(x0):
prec = linalg.norm(gradient(x0, X, y, alpha), np.inf)
precision_bfgs.append(prec)
timings_bfgs.append((datetime.now() - start).total_seconds())
callback(w0)
args=(X, y, alpha), gtol=1e-10, maxiter=50,
callback=callback)

# Trust Region
print('PYTRON')
timings_pytron= []
precision_pytron = []
w0 = np.zeros(X.shape[1])
start = datetime.now()
def callback(x0):
prec = linalg.norm(gradient(x0, X, y, alpha), np.inf)
precision_pytron.append(prec)
timings_pytron.append((datetime.now() - start).total_seconds())
callback(w0)
w = pytron.minimize(loss, grad_hess, w0, args=(X, y, alpha),
gtol=1e-10, tol=1e-20, max_iter=100, callback=callback).x

import pylab as pl
pl.plot(timings_lbfgs, precision_lbfgs, label='L-BFGS', lw=5, color='green')
pl.scatter(timings_lbfgs, precision_lbfgs, s=100, marker='o', color='green')
pl.plot(timings_bfgs, precision_bfgs, label='BFGS', lw=5, color='red')
pl.scatter(timings_bfgs, precision_bfgs, s=100, marker='o', color='red')
pl.plot(timings_pytron, precision_pytron, label='Trust Region', lw=5, color='cyan')
pl.scatter(timings_pytron, precision_pytron, s=100, marker='o', color='cyan')
pl.plot(timings_cg, precision_cg, label='CG', lw=5, color='magenta')
pl.scatter(timings_cg, precision_cg, s=100, marker='o', color='magenta')
pl.plot(timings_tnc, precision_tnc, label='TNC', lw=5, color='blue')
pl.scatter(timings_tnc, precision_tnc, s=100, marker='o', color='blue')
pl.yscale('log')
pl.ylabel(r'$\|\|\nabla f\|\|_\infty$', fontsize=20)
pl.xlabel('Time (in seconds)', fontsize=20)
fig = matplotlib.pyplot.gcf()
fig.set_size_inches(8.5,6.5)
pl.legend(fontsize=20)
if corr == 0.:
pl.title('Numerical solvers for logistic regression\n(uncorrelated features)', fontsize=20)
pl.xlim((0, 2))
if corr == 1.:
pl.title('Numerical solvers for logistic regression\n(correlated features)', fontsize=20)
pl.xlim((0, 5))
if corr == 10.:
pl.title('Numerical solvers for logistic regression\n(highly correlated features)', fontsize=20)
pl.xlim((0, 5))
pl.ylim((10 ** -5, None))
pl.savefig('comparison_logistic_corr_%s.png' % int(corr))
pl.show()
CG
Optimization terminated successfully.
Current function value: 3848.590812
Iterations: 46
Function evaluations: 285
TNC
Optimization terminated successfully.
Current function value: 3848.590812
Iterations: 18
Function evaluations: 21
Hessian evaluations: 0
L-BFGS
BFGS
Warning: Maximum number of iterations has been exceeded.
Current function value: 3848.928087
Iterations: 50
Function evaluations: 79
PYTRON
CG
Warning: Maximum number of iterations has been exceeded.
Current function value: 3851.748082
Iterations: 200
Function evaluations: 478
TNC
Optimization terminated successfully.
Current function value: 3851.737228
Iterations: 18
Function evaluations: 19
Hessian evaluations: 0
L-BFGS
BFGS
Warning: Maximum number of iterations has been exceeded.
Current function value: 3900.870018
Iterations: 50
Function evaluations: 84
PYTRON
CG
Warning: Maximum number of iterations has been exceeded.
Current function value: 1206599.572756
Iterations: 200
Function evaluations: 860
TNC
Optimization terminated successfully.
Current function value: 3856.768202
Iterations: 19
Function evaluations: 21