import numpy as np
from scipy.special import expit, logsumexp
from scipy.optimize import minimize
from sklearn.datasets import load_iris, load_breast_cancer
from sklearn.linear_model import LogisticRegression as skLogisticRegression
class LogisticRegression():
def __init__(self, C=1.0):
self.C = C
def _encode(self, y):
classes = np.unique(y)
y_train = np.full((y.shape[0], len(classes)), -1)
for i, c in enumerate(classes):
y_train[y == c, i] = 1
if len(classes) == 2:
y_train = y_train[:, 1].reshape(-1, 1)
return classes, y_train
@staticmethod
def _cost_grad(w, X, y, alpha):
def _log_logistic(x):
if x > 0:
return -np.log(1 + np.exp(-x))
else:
return x - np.log(1 + np.exp(x))
yz = y * (np.dot(X, w[:-1]) + w[-1])
cost = -np.sum(np.vectorize(_log_logistic)(yz)) + 0.5 * alpha * np.dot(w[:-1], w[:-1])
grad = np.zeros(len(w))
t = (expit(yz) - 1) * y
grad[:-1] = np.dot(X.T, t) + alpha * w[:-1]
grad[-1] = np.sum(t)
return cost, grad
def _solve_lbfgs(self, X, y):
result = np.zeros((y.shape[1], X.shape[1] + 1))
for i in range(y.shape[1]):
cur_y = y[:, i]
w0 = np.zeros(X.shape[1] + 1)
res = minimize(fun=self._cost_grad, jac=True, x0=w0,
args=(X, cur_y, 1 / self.C), method='L-BFGS-B')
result[i] = res.x
return result[:, :-1], result[:, -1]
def fit(self, X, y):
self.classes_, y_train = self._encode(y)
self.coef_, self.intercept_ = self._solve_lbfgs(X, y_train)
return self
def decision_function(self, X):
scores = np.dot(X, self.coef_.T) + self.intercept_
if scores.shape[1] == 1:
return scores.ravel()
else:
return scores
def predict(self, X):
scores = self.decision_function(X)
if len(scores.shape) == 1:
indices = (scores > 0).astype(int)
else:
indices = np.argmax(scores, axis=1)
return self.classes_[indices]
def predict_proba(self, X):
scores = self.decision_function(X)
prob = expit(scores)
if len(scores.shape) == 1:
prob = np.vstack((1 - prob, prob)).T
else:
prob /= np.sum(prob, axis=1)[:, np.newaxis]
return prob
# binary classification
for C in [0.1, 1, 10]:
X, y = load_breast_cancer(return_X_y = True)
clf1 = LogisticRegression(C=C).fit(X, y)
clf2 = skLogisticRegression(C=C, multi_class="ovr", solver="lbfgs",
# keep consisent with scipy default
tol=1e-5, max_iter=15000).fit(X, y)
assert clf1.coef_.shape == (1, X.shape[1])
assert np.allclose(clf1.coef_, clf2.coef_)
assert np.allclose(clf1.intercept_, clf2.intercept_)
prob1 = clf1.decision_function(X)
prob2 = clf2.decision_function(X)
assert np.allclose(prob1, prob2)
prob1 = clf1.predict_proba(X)
prob2 = clf2.predict_proba(X)
assert np.allclose(prob1, prob2)
pred1 = clf1.predict(X)
pred2 = clf2.predict(X)
assert np.array_equal(pred1, pred2)
# multiclass classification
for C in [0.1, 1, 10]:
X, y = load_iris(return_X_y=True)
clf1 = LogisticRegression(C=C).fit(X, y)
clf2 = skLogisticRegression(C=C, multi_class="ovr", solver="lbfgs",
# keep consisent with scipy default
tol=1e-5, max_iter=15000).fit(X, y)
assert clf1.coef_.shape == (len(np.unique(y)), X.shape[1])
assert np.allclose(clf1.coef_, clf2.coef_)
assert np.allclose(clf1.intercept_, clf2.intercept_)
prob1 = clf1.decision_function(X)
prob2 = clf2.decision_function(X)
assert np.allclose(prob1, prob2)
prob1 = clf1.predict_proba(X)
prob2 = clf2.predict_proba(X)
assert np.allclose(prob1, prob2)
pred1 = clf1.predict(X)
pred2 = clf2.predict(X)
assert np.array_equal(pred1, pred2)
# penalty = 'none'
X, y = load_iris(return_X_y=True)
clf1 = LogisticRegression(C=np.inf).fit(X, y)
clf2 = skLogisticRegression(penalty='none', multi_class="ovr", solver="lbfgs",
# keep consisent with scipy default
tol=1e-5, max_iter=15000).fit(X, y)
assert clf1.coef_.shape == (len(np.unique(y)), X.shape[1])
assert np.allclose(clf1.coef_, clf2.coef_)
assert np.allclose(clf1.intercept_, clf2.intercept_)
prob1 = clf1.decision_function(X)
prob2 = clf2.decision_function(X)
assert np.allclose(prob1, prob2)
prob1 = clf1.predict_proba(X)
prob2 = clf2.predict_proba(X)
assert np.allclose(prob1, prob2)
pred1 = clf1.predict(X)
pred2 = clf2.predict(X)
assert np.array_equal(pred1, pred2)
class LogisticRegression():
def __init__(self, C=1.0):
self.C = C
def _encode(self, y):
classes = np.unique(y)
y_train = np.zeros((y.shape[0], len(classes)))
for i, c in enumerate(classes):
y_train[y == c, i] = 1
return classes, y_train
@staticmethod
def _cost_grad(w, X, y, alpha):
w = w.reshape(y.shape[1], -1)
p = np.dot(X, w[:, :-1].T) + w[:, -1]
p -= logsumexp(p, axis=1)[:, np.newaxis]
cost = -np.sum(y * p) + 0.5 * alpha * np.dot(w[:, :-1].ravel(), w[:, :-1].ravel())
grad = np.zeros_like(w)
diff = np.exp(p) - y
grad[:, :-1] = np.dot(diff.T, X) + alpha * w[:, :-1]
grad[:, -1] = np.sum(diff, axis=0)
return cost, grad.ravel()
def _solve_lbfgs(self, X, y):
w0 = np.zeros(y.shape[1] * (X.shape[1] + 1))
res = minimize(fun=self._cost_grad, jac=True, x0=w0,
args=(X, y, 1 / self.C), method='L-BFGS-B')
result = res.x.reshape(y.shape[1], -1)
if y.shape[1] == 2:
result = result[1][np.newaxis, :]
return result[:, :-1], result[:, -1]
def fit(self, X, y):
self.classes_, y_train = self._encode(y)
self.coef_, self.intercept_ = self._solve_lbfgs(X, y_train)
return self
def decision_function(self, X):
scores = np.dot(X, self.coef_.T) + self.intercept_
if scores.shape[1] == 1:
return scores.ravel()
else:
return scores
def predict(self, X):
scores = self.decision_function(X)
if len(scores.shape) == 1:
indices = (scores > 0).astype(int)
else:
indices = np.argmax(scores, axis=1)
return self.classes_[indices]
def predict_proba(self, X):
scores = self.decision_function(X)
if len(scores.shape) == 1:
scores = np.c_[-scores, scores]
scores -= np.max(scores, axis=1)[:, np.newaxis]
prob = np.exp(scores)
prob /= np.sum(prob, axis=1)[:, np.newaxis]
return prob
# binary classification
for C in [0.1, 1, 10]:
X, y = load_breast_cancer(return_X_y = True)
clf1 = LogisticRegression(C=C).fit(X, y)
clf2 = skLogisticRegression(C=C, multi_class="multinomial", solver="lbfgs",
# keep consisent with scipy default
tol=1e-5, max_iter=15000).fit(X, y)
assert clf1.coef_.shape == (1, X.shape[1])
assert np.allclose(clf1.coef_, clf2.coef_)
assert np.allclose(clf1.intercept_, clf2.intercept_)
prob1 = clf1.decision_function(X)
prob2 = clf2.decision_function(X)
assert np.allclose(prob1, prob2)
prob1 = clf1.predict_proba(X)
prob2 = clf2.predict_proba(X)
assert np.allclose(prob1, prob2)
pred1 = clf1.predict(X)
pred2 = clf2.predict(X)
assert np.array_equal(pred1, pred2)
# multiclass classification
for C in [0.1, 1, 10]:
X, y = load_iris(return_X_y=True)
clf1 = LogisticRegression(C=C).fit(X, y)
clf2 = skLogisticRegression(C=C, multi_class="multinomial", solver="lbfgs",
# keep consisent with scipy default
tol=1e-5, max_iter=15000).fit(X, y)
assert clf1.coef_.shape == (len(np.unique(y)), X.shape[1])
assert np.allclose(clf1.coef_, clf2.coef_)
assert np.allclose(clf1.intercept_, clf2.intercept_)
prob1 = clf1.decision_function(X)
prob2 = clf2.decision_function(X)
assert np.allclose(prob1, prob2)
prob1 = clf1.predict_proba(X)
prob2 = clf2.predict_proba(X)
assert np.allclose(prob1, prob2)
pred1 = clf1.predict(X)
pred2 = clf2.predict(X)
assert np.array_equal(pred1, pred2)
# penalty = 'none'
X, y = load_iris(return_X_y=True)
clf1 = LogisticRegression(C=np.inf).fit(X, y)
clf2 = skLogisticRegression(penalty='none', multi_class="multinomial", solver="lbfgs",
# keep consisent with scipy default
tol=1e-5, max_iter=15000).fit(X, y)
assert clf1.coef_.shape == (len(np.unique(y)), X.shape[1])
assert np.allclose(clf1.coef_, clf2.coef_)
assert np.allclose(clf1.intercept_, clf2.intercept_)
prob1 = clf1.decision_function(X)
prob2 = clf2.decision_function(X)
assert np.allclose(prob1, prob2)
prob1 = clf1.predict_proba(X)
prob2 = clf2.predict_proba(X)
assert np.allclose(prob1, prob2)
pred1 = clf1.predict(X)
pred2 = clf2.predict(X)
assert np.array_equal(pred1, pred2)