import numpy as np
from copy import deepcopy
from scipy.special import expit
from scipy.optimize import minimize
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression as skLogisticRegression
from sklearn.multiclass import OneVsOneClassifier as skOneVsOneClassifier
class OneVsOneClassifier():
def __init__(self, estimator):
self.estimator = estimator
def fit(self, X, y):
self.classes_ = np.unique(y)
self.estimators_ = []
for i in range(len(self.classes_)):
for j in range(i + 1, len(self.classes_)):
cond = np.logical_or(y == i, y == j)
X_train = X[cond]
y_train = np.zeros(len(y[cond]))
y_train[y[cond] == j] = 1
clf = deepcopy(self.estimator)
clf.fit(X_train, y_train)
self.estimators_.append(clf)
return self
def decision_function(self, X):
votes = np.zeros((X.shape[0], len(self.classes_)))
# use decision function to break tie
sum_of_confidences = np.zeros((X.shape[0], len(self.classes_)))
k = 0
for i in range(len(self.classes_)):
for j in range(i + 1, len(self.classes_)):
cur_prediction = self.estimators_[k].predict(X)
cur_confidence = self.estimators_[k].decision_function(X)
votes[cur_prediction == 0, i] += 1
votes[cur_prediction == 1, j] += 1
sum_of_confidences[:, i] -= cur_confidence
sum_of_confidences[:, j] += cur_confidence
k += 1
# decision function should not influence vote
# follow the solution in scikit-learn
transformed_confidences = (sum_of_confidences /
(3 * (np.abs(sum_of_confidences) + 1)))
return votes + transformed_confidences
def predict(self, X):
scores = self.decision_function(X)
indices = np.argmax(scores, axis=1)
return self.classes_[indices]
# Simplified version of LogisticRegression, only work for binary classification
class BinaryLogisticRegression():
def __init__(self, C=1.0):
self.C = C
@staticmethod
def _cost_grad(w, X, y, alpha):
def _log_logistic(x):
if x > 0:
return -np.log(1 + np.exp(-x))
else:
return x - np.log(1 + np.exp(x))
yz = y * (np.dot(X, w[:-1]) + w[-1])
cost = -np.sum(np.vectorize(_log_logistic)(yz)) + 0.5 * alpha * np.dot(w[:-1], w[:-1])
grad = np.zeros(len(w))
t = (expit(yz) - 1) * y
grad[:-1] = np.dot(X.T, t) + alpha * w[:-1]
grad[-1] = np.sum(t)
return cost, grad
def _solve_lbfgs(self, X, y):
y_train = np.full(X.shape[0], -1)
y_train[y == 1] = 1
w0 = np.zeros(X.shape[1] + 1)
res = minimize(fun=self._cost_grad, jac=True, x0=w0,
args=(X, y_train, 1 / self.C), method='L-BFGS-B')
return res.x[:-1], res.x[-1]
def fit(self, X, y):
self.coef_, self.intercept_ = self._solve_lbfgs(X, y)
return self
def decision_function(self, X):
scores = np.dot(X, self.coef_) + self.intercept_
return scores
def predict(self, X):
scores = self.decision_function(X)
indices = (scores > 0).astype(int)
return indices
for C in [0.1, 1, 10, np.inf]:
X, y = load_iris(return_X_y=True)
clf1 = OneVsOneClassifier(BinaryLogisticRegression(C=C)).fit(X, y)
clf2 = skOneVsOneClassifier(skLogisticRegression(C=C, multi_class="ovr", solver="lbfgs",
# keep consisent with scipy default
tol=1e-5, max_iter=15000)).fit(X, y)
prob1 = clf1.decision_function(X)
prob2 = clf2.decision_function(X)
pred1 = clf1.predict(X)
pred2 = clf2.predict(X)
assert np.allclose(prob1, prob2)
assert np.array_equal(pred1, pred2)