import numpy as np
from copy import deepcopy
from scipy.spatial.distance import cdist
from scipy.special import expit
from scipy.optimize import minimize
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression as skLogisticRegression
from sklearn.multiclass import OutputCodeClassifier as skOutputCodeClassifier
class OutputCodeClassifier():
def __init__(self, estimator,
code_size=1.5, random_state=0):
self.estimator = estimator
self.code_size = code_size
self.random_state = random_state
def fit(self, X, y):
self.classes_, y_enc = np.unique(y, return_inverse=True)
code_size_ = int(len(self.classes_) * self.code_size)
rng = np.random.RandomState(self.random_state)
self.code_book_ = rng.random_sample((len(self.classes_), code_size_))
self.code_book_[self.code_book_ > 0.5] = 1
self.code_book_[self.code_book_ != 1] = -1
y_train = self.code_book_[y_enc]
self.estimators_ = []
for i in range(y_train.shape[1]):
cur_y = y_train[:, i]
clf = deepcopy(self.estimator)
clf.fit(X, cur_y)
self.estimators_.append(clf)
return self
def predict(self, X):
scores = np.zeros((X.shape[0], len(self.estimators_)))
for i, est in enumerate(self.estimators_):
scores[:, i] = est.decision_function(X)
pred = cdist(scores, self.code_book_).argmin(axis=1)
return self.classes_[pred]
# Simplified version of LogisticRegression, only work for binary classification
class BinaryLogisticRegression():
def __init__(self, C=1.0):
self.C = C
@staticmethod
def _cost_grad(w, X, y, alpha):
def _log_logistic(x):
if x > 0:
return -np.log(1 + np.exp(-x))
else:
return x - np.log(1 + np.exp(x))
yz = y * (np.dot(X, w[:-1]) + w[-1])
cost = -np.sum(np.vectorize(_log_logistic)(yz)) + 0.5 * alpha * np.dot(w[:-1], w[:-1])
grad = np.zeros(len(w))
t = (expit(yz) - 1) * y
grad[:-1] = np.dot(X.T, t) + alpha * w[:-1]
grad[-1] = np.sum(t)
return cost, grad
def _solve_lbfgs(self, X, y):
y_train = np.full(X.shape[0], -1)
y_train[y == 1] = 1
w0 = np.zeros(X.shape[1] + 1)
res = minimize(fun=self._cost_grad, jac=True, x0=w0,
args=(X, y_train, 1 / self.C), method='L-BFGS-B')
return res.x[:-1], res.x[-1]
def fit(self, X, y):
self.coef_, self.intercept_ = self._solve_lbfgs(X, y)
return self
def decision_function(self, X):
scores = np.dot(X, self.coef_) + self.intercept_
return scores
def predict(self, X):
scores = self.decision_function(X)
indices = (scores > 0).astype(int)
return indices
for C in [0.1, 1, 10, np.inf]:
X, y = load_iris(return_X_y=True)
clf1 = OutputCodeClassifier(BinaryLogisticRegression(C=C)).fit(X, y)
clf2 = skOutputCodeClassifier(skLogisticRegression(C=C, multi_class="ovr", solver="lbfgs",
# keep consisent with scipy default
tol=1e-5, max_iter=15000),
random_state=0).fit(X, y)
pred1 = clf1.predict(X)
pred2 = clf2.predict(X)
assert np.array_equal(pred1, pred2)