from __future__ import print_function import numpy as np from scipy import linalg, optimize from sklearn import cross_validation, metrics, linear_model from datetime import datetime import pytron def phi(t): # logistic function idx = t > 0 out = np.empty(t.size, dtype=np.float) out[idx] = 1. / (1 + np.exp(-t[idx])) exp_t = np.exp(t[~idx]) out[~idx] = exp_t / (1. + exp_t) return out def loss(w, X, y, alpha): # loss function to be optimized, it's the logistic loss z = X.dot(w) yz = y * z idx = yz > 0 out = np.zeros_like(yz) out[idx] = np.log(1 + np.exp(-yz[idx])) out[~idx] = (-yz[~idx] + np.log(1 + np.exp(yz[~idx]))) out = out.sum() + .5 * alpha * w.dot(w) return out def gradient(w, X, y, alpha): # gradient of the logistic loss z = X.dot(w) z = phi(y * z) z0 = (z - 1) * y grad = X.T.dot(z0) + alpha * w return grad def hessian(s, w, X, y, alpha): z = X.dot(w) z = phi(y * z) d = z * (1 - z) wa = d * X.dot(s) Hs = X.T.dot(wa) out = Hs + alpha * s return out def grad_hess(w, X, y, alpha): # gradient AND hessian of the logistic z = X.dot(w) z = phi(y * z) z0 = (z - 1) * y grad = X.T.dot(z0) + alpha * w def Hs(s): d = z * (1 - z) wa = d * X.dot(s) return X.T.dot(wa) + alpha * s return grad, Hs for corr in (0., 1., 10.): n_samples, n_features = 10 ** 4, 10 ** 3 np.random.seed(0) X = np.random.randn(n_samples, n_features) w = np.random.randn(n_features) y = np.sign(X.dot(w)) X += 0.8 * np.random.randn(n_samples, n_features) # add noise X+= corr # this makes it correlated by adding a constant term X = np.hstack((X, np.ones((X.shape[0], 1)))) # add a column of ones for intercept alpha = 1. # conjugate gradient print('CG') timings_cg = [] precision_cg = [] w0 = np.zeros(X.shape[1]) start = datetime.now() def callback(x0): prec = linalg.norm(gradient(x0, X, y, alpha), np.inf) precision_cg.append(prec) timings_cg.append((datetime.now() - start).total_seconds()) callback(w0) w = optimize.fmin_cg(loss, w0, fprime=gradient, args=(X, y, alpha), gtol=1e-6, callback=callback, maxiter=200) ## truncated newton print('TNC') timings_tnc = [] precision_tnc = [] w0 = np.zeros(X.shape[1]) start = datetime.now() def callback(x0): prec = linalg.norm(gradient(x0, X, y, alpha), np.inf) precision_tnc.append(prec) timings_tnc.append((datetime.now() - start).total_seconds()) callback(w0) out = optimize.fmin_ncg(loss, w0, fprime=gradient, args=(X, y, alpha), avextol=1e-10, callback=callback, maxiter=40) # L-BFGS print('L-BFGS') timings_lbfgs = [] precision_lbfgs = [] w0 = np.zeros(X.shape[1]) start = datetime.now() def callback(x0): prec = linalg.norm(gradient(x0, X, y, alpha), np.inf) precision_lbfgs.append(prec) timings_lbfgs.append((datetime.now() - start).total_seconds()) callback(w0) out = optimize.fmin_l_bfgs_b(loss, w0, fprime=gradient, args=(X, y, alpha), pgtol=1e-10, maxiter=200, maxfun=250, factr=1e-30, callback=callback) # BFGS print('BFGS') timings_bfgs = [] precision_bfgs = [] w0 = np.zeros(X.shape[1]) start = datetime.now() def callback(x0): prec = linalg.norm(gradient(x0, X, y, alpha), np.inf) precision_bfgs.append(prec) timings_bfgs.append((datetime.now() - start).total_seconds()) callback(w0) out = optimize.fmin_bfgs(loss, w0, fprime=gradient, args=(X, y, alpha), gtol=1e-10, maxiter=50, callback=callback) # Trust Region print('PYTRON') timings_pytron= [] precision_pytron = [] w0 = np.zeros(X.shape[1]) start = datetime.now() def callback(x0): prec = linalg.norm(gradient(x0, X, y, alpha), np.inf) precision_pytron.append(prec) timings_pytron.append((datetime.now() - start).total_seconds()) callback(w0) w = pytron.minimize(loss, grad_hess, w0, args=(X, y, alpha), gtol=1e-10, tol=1e-20, max_iter=100, callback=callback).x import pylab as pl pl.plot(timings_lbfgs, precision_lbfgs, label='L-BFGS', lw=5, color='green') pl.scatter(timings_lbfgs, precision_lbfgs, s=100, marker='o', color='green') pl.plot(timings_bfgs, precision_bfgs, label='BFGS', lw=5, color='red') pl.scatter(timings_bfgs, precision_bfgs, s=100, marker='o', color='red') pl.plot(timings_pytron, precision_pytron, label='Trust Region', lw=5, color='cyan') pl.scatter(timings_pytron, precision_pytron, s=100, marker='o', color='cyan') pl.plot(timings_cg, precision_cg, label='CG', lw=5, color='magenta') pl.scatter(timings_cg, precision_cg, s=100, marker='o', color='magenta') pl.plot(timings_tnc, precision_tnc, label='TNC', lw=5, color='blue') pl.scatter(timings_tnc, precision_tnc, s=100, marker='o', color='blue') pl.yscale('log') pl.ylabel(r'$\|\|\nabla f\|\|_\infty$', fontsize=20) pl.xlabel('Time (in seconds)', fontsize=20) fig = matplotlib.pyplot.gcf() fig.set_size_inches(8.5,6.5) pl.legend(fontsize=20) if corr == 0.: pl.title('Numerical solvers for logistic regression\n(uncorrelated features)', fontsize=20) pl.xlim((0, 2)) if corr == 1.: pl.title('Numerical solvers for logistic regression\n(correlated features)', fontsize=20) pl.xlim((0, 5)) if corr == 10.: pl.title('Numerical solvers for logistic regression\n(highly correlated features)', fontsize=20) pl.xlim((0, 5)) pl.ylim((10 ** -5, None)) pl.savefig('comparison_logistic_corr_%s.png' % int(corr)) pl.show()