Notebook

In [1]:

import numpy as np
from copy import deepcopy
from scipy.special import expit
from scipy.optimize import minimize
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression as skLogisticRegression
from sklearn.multiclass import OneVsOneClassifier as skOneVsOneClassifier

In [2]:

class OneVsOneClassifier():
    def __init__(self, estimator):
        self.estimator = estimator

    def fit(self, X, y):
        self.classes_ = np.unique(y)
        self.estimators_ = []
        for i in range(len(self.classes_)):
            for j in range(i + 1, len(self.classes_)):
                cond = np.logical_or(y == i, y == j)
                X_train = X[cond]
                y_train = np.zeros(len(y[cond]))
                y_train[y[cond] == j] = 1
                clf = deepcopy(self.estimator)
                clf.fit(X_train, y_train)
                self.estimators_.append(clf)
        return self

    def decision_function(self, X):
        votes = np.zeros((X.shape[0], len(self.classes_)))
        # use decision function to break tie
        sum_of_confidences = np.zeros((X.shape[0], len(self.classes_)))
        k = 0
        for i in range(len(self.classes_)):
            for j in range(i + 1, len(self.classes_)):
                cur_prediction = self.estimators_[k].predict(X)
                cur_confidence = self.estimators_[k].decision_function(X)
                votes[cur_prediction == 0, i] += 1
                votes[cur_prediction == 1, j] += 1
                sum_of_confidences[:, i] -= cur_confidence
                sum_of_confidences[:, j] += cur_confidence
                k += 1
        # decision function should not influence vote
        # follow the solution in scikit-learn
        transformed_confidences = (sum_of_confidences /
                                   (3 * (np.abs(sum_of_confidences) + 1)))
        return votes + transformed_confidences

    def predict(self, X):
        scores = self.decision_function(X)
        indices = np.argmax(scores, axis=1)
        return self.classes_[indices]

In [3]:

# Simplified version of LogisticRegression, only work for binary classification
class BinaryLogisticRegression():
    def __init__(self, C=1.0):
        self.C = C

    @staticmethod
    def _cost_grad(w, X, y, alpha):
        def _log_logistic(x):
            if x > 0:
                return -np.log(1 + np.exp(-x))
            else:
                return x - np.log(1 + np.exp(x))
        yz = y * (np.dot(X, w[:-1]) + w[-1])
        cost = -np.sum(np.vectorize(_log_logistic)(yz)) + 0.5 * alpha * np.dot(w[:-1], w[:-1])
        grad = np.zeros(len(w))
        t = (expit(yz) - 1) * y
        grad[:-1] = np.dot(X.T, t) + alpha * w[:-1]
        grad[-1] = np.sum(t)
        return cost, grad

    def _solve_lbfgs(self, X, y):
        y_train = np.full(X.shape[0], -1)
        y_train[y == 1] = 1
        w0 = np.zeros(X.shape[1] + 1)
        res = minimize(fun=self._cost_grad, jac=True, x0=w0,
                       args=(X, y_train, 1 / self.C), method='L-BFGS-B')
        return res.x[:-1], res.x[-1]

    def fit(self, X, y):
        self.coef_, self.intercept_ = self._solve_lbfgs(X, y)
        return self

    def decision_function(self, X):
        scores = np.dot(X, self.coef_) + self.intercept_
        return scores

    def predict(self, X):
        scores = self.decision_function(X)
        indices = (scores > 0).astype(int)
        return indices

In [4]:

for C in [0.1, 1, 10, np.inf]:
    X, y = load_iris(return_X_y=True)
    clf1 = OneVsOneClassifier(BinaryLogisticRegression(C=C)).fit(X, y)
    clf2 = skOneVsOneClassifier(skLogisticRegression(C=C, multi_class="ovr", solver="lbfgs",
                                                     # keep consisent with scipy default
                                                     tol=1e-5, max_iter=15000)).fit(X, y)
    prob1 = clf1.decision_function(X)
    prob2 = clf2.decision_function(X)
    pred1 = clf1.predict(X)
    pred2 = clf2.predict(X)
    assert np.allclose(prob1, prob2)
    assert np.array_equal(pred1, pred2)