import numpy as np
from scipy.optimize import minimize
from sklearn.datasets import load_iris
from sklearn.svm import LinearSVC as skLinearSVC
class LinearSVC():
def __init__(self, C=1.0):
self.C = C
def _encode(self, y):
classes = np.unique(y)
y_train = np.full((y.shape[0], len(classes)), -1)
for i, c in enumerate(classes):
y_train[y == c, i] = 1
if len(classes) == 2:
y_train = y_train[:, 1].reshape(-1, 1)
return classes, y_train
@staticmethod
def _cost_grad(w, X, y, C):
X_train = np.c_[X, np.ones(X.shape[0])]
z = np.dot(X_train, w)
yz = y * z
mask = yz <= 1
cost = C * np.sum(np.square(1 - yz[mask])) + 0.5 * np.dot(w, w)
grad = w + 2 * C * np.dot(X_train[mask].T, z[mask] - y[mask])
return cost, grad
def _solve_lbfgs(self, X, y):
result = np.zeros((y.shape[1], X.shape[1] + 1))
for i in range(y.shape[1]):
cur_y = y[:, i]
w0 = np.zeros(X.shape[1] + 1)
res = minimize(fun=self._cost_grad, jac=True, x0=w0,
args=(X, cur_y, self.C), method='L-BFGS-B')
result[i] = res.x
return result[:, :-1], result[:, -1]
def fit(self, X, y):
self.classes_, y_train = self._encode(y)
self.coef_, self.intercept_ = self._solve_lbfgs(X, y_train)
return self
def decision_function(self, X):
scores = np.dot(X, self.coef_.T) + self.intercept_
if scores.shape[1] == 1:
return scores.ravel()
else:
return scores
def predict(self, X):
scores = self.decision_function(X)
if len(scores.shape) == 1:
indices = (scores > 0).astype(int)
else:
indices = np.argmax(scores, axis=1)
return self.classes_[indices]
X, y = load_iris(return_X_y=True)
X, y = X[y != 2], y[y != 2]
clf1 = LinearSVC().fit(X, y)
clf2 = skLinearSVC(dual=False).fit(X, y)
assert np.allclose(clf1.coef_, clf2.coef_, atol=1e-2)
assert np.allclose(clf1.intercept_, clf2.intercept_, atol=1e-3)
prob1 = clf1.decision_function(X)
prob2 = clf2.decision_function(X)
assert np.allclose(prob1, prob2, atol=1e-2)
pred1 = clf1.predict(X)
pred2 = clf2.predict(X)
assert np.array_equal(pred1, pred2)
X, y = load_iris(return_X_y=True)
clf1 = LinearSVC().fit(X, y)
clf2 = skLinearSVC(dual=False).fit(X, y)
assert np.allclose(clf1.coef_, clf2.coef_, atol=1e-1)
assert np.allclose(clf1.intercept_, clf2.intercept_, atol=1e-2)
prob1 = clf1.decision_function(X)
prob2 = clf2.decision_function(X)
assert np.allclose(prob1, prob2, atol=1e-1)
pred1 = clf1.predict(X)
pred2 = clf2.predict(X)
assert np.array_equal(pred1, pred2)