Notebook

In [1]:

import numpy as np
from scipy.optimize import minimize
from sklearn.datasets import load_iris
from sklearn.svm import LinearSVC as skLinearSVC

In [2]:

class LinearSVC():
    def __init__(self, C=1.0):
        self.C = C

    def _encode(self, y):
        classes = np.unique(y)
        y_train = np.full((y.shape[0], len(classes)), -1)
        for i, c in enumerate(classes):
            y_train[y == c, i] = 1
        if len(classes) == 2:
            y_train = y_train[:, 1].reshape(-1, 1)
        return classes, y_train

    @staticmethod
    def _cost_grad(w, X, y, C):
        X_train = np.c_[X, np.ones(X.shape[0])]
        z = np.dot(X_train, w)
        yz = y * z
        mask = yz <= 1
        cost = C * np.sum(np.square(1 - yz[mask])) + 0.5 * np.dot(w, w)
        grad = w + 2 * C * np.dot(X_train[mask].T, z[mask] - y[mask])
        return cost, grad
        
    def _solve_lbfgs(self, X, y):
        result = np.zeros((y.shape[1], X.shape[1] + 1))
        for i in range(y.shape[1]):
            cur_y = y[:, i]
            w0 = np.zeros(X.shape[1] + 1)
            res = minimize(fun=self._cost_grad, jac=True, x0=w0,
                           args=(X, cur_y, self.C), method='L-BFGS-B')
            result[i] = res.x
        return result[:, :-1], result[:, -1]

    def fit(self, X, y):
        self.classes_, y_train = self._encode(y)
        self.coef_, self.intercept_ = self._solve_lbfgs(X, y_train)
        return self

    def decision_function(self, X):
        scores = np.dot(X, self.coef_.T) + self.intercept_
        if scores.shape[1] == 1:
            return scores.ravel()
        else:
            return scores

    def predict(self, X):
        scores = self.decision_function(X)
        if len(scores.shape) == 1:
            indices = (scores > 0).astype(int)
        else:
            indices = np.argmax(scores, axis=1)
        return self.classes_[indices]

In [3]:

X, y = load_iris(return_X_y=True)
X, y = X[y != 2], y[y != 2]
clf1 = LinearSVC().fit(X, y)
clf2 = skLinearSVC(dual=False).fit(X, y)
assert np.allclose(clf1.coef_, clf2.coef_, atol=1e-2)
assert np.allclose(clf1.intercept_, clf2.intercept_, atol=1e-3)
prob1 = clf1.decision_function(X)
prob2 = clf2.decision_function(X)
assert np.allclose(prob1, prob2, atol=1e-2)
pred1 = clf1.predict(X)
pred2 = clf2.predict(X)
assert np.array_equal(pred1, pred2)

In [4]:

X, y = load_iris(return_X_y=True)
clf1 = LinearSVC().fit(X, y)
clf2 = skLinearSVC(dual=False).fit(X, y)
assert np.allclose(clf1.coef_, clf2.coef_, atol=1e-1)
assert np.allclose(clf1.intercept_, clf2.intercept_, atol=1e-2)
prob1 = clf1.decision_function(X)
prob2 = clf2.decision_function(X)
assert np.allclose(prob1, prob2, atol=1e-1)
pred1 = clf1.predict(X)
pred2 = clf2.predict(X)
assert np.array_equal(pred1, pred2)