import numpy as np
from copy import deepcopy
from sklearn.datasets import load_iris
from sklearn.ensemble import VotingClassifier as skVotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
class VotingClassifier():
def __init__(self, estimators, voting='hard'):
self.estimators = estimators
self.voting = voting
def fit(self, X, y):
self.classes_, y_train = np.unique(y, return_inverse=True)
self.estimators_ = [deepcopy(est).fit(X, y_train) for _, est in self.estimators]
return self
def transform(self, X):
if self.voting == 'hard':
prob = np.array([est.predict(X) for est in self.estimators_]).T
elif self.voting == 'soft':
prob = np.array([est.predict_proba(X) for est in self.estimators_])
return prob
def predict(self, X):
prob = self.transform(X)
if self.voting == 'hard':
pred = np.apply_along_axis(lambda x:np.argmax(np.bincount(x)), axis=1, arr=prob)
elif self.voting == 'soft':
pred = np.argmax(np.mean(prob, axis=0), axis=1)
return self.classes_[pred]
def predict_proba(self, X):
if self.voting == 'hard':
raise AttributeError
return np.mean(self.transform(X), axis=0)
# soft voting
X, y = load_iris(return_X_y = True)
clf1 = LogisticRegression(solver='lbfgs', multi_class='multinomial', max_iter=15000, random_state=0)
clf2 = RandomForestClassifier(n_estimators=100, random_state=0)
eclf1 = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2)]).fit(X, y)
eclf2 = skVotingClassifier(estimators=[('lr', clf1), ('rf', clf2)]).fit(X, y)
prob1 = eclf1.transform(X)
prob2 = eclf2.transform(X)
assert np.allclose(prob1, prob2)
pred1 = eclf1.predict(X)
pred2 = eclf2.predict(X)
assert np.allclose(pred1, pred2)
# hard voting
X, y = load_iris(return_X_y = True)
clf1 = LogisticRegression(solver='lbfgs', multi_class='multinomial', max_iter=15000, random_state=0)
clf2 = RandomForestClassifier(n_estimators=100, random_state=0)
eclf1 = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2)], voting='soft').fit(X, y)
eclf2 = skVotingClassifier(estimators=[('lr', clf1), ('rf', clf2)],
voting='soft', flatten_transform=False).fit(X, y)
prob1 = eclf1.transform(X)
prob2 = eclf2.transform(X)
assert np.allclose(prob1, prob2)
prob1 = eclf1.predict_proba(X)
prob2 = eclf2.predict_proba(X)
assert np.allclose(prob1, prob2)
pred1 = eclf1.predict(X)
pred2 = eclf2.predict(X)
assert np.array_equal(pred1, pred2)