import numpy as np
import pandas as pd
from sklearn.base import clone
from sklearn.model_selection import cross_val_predict
from sklearn.datasets import load_boston
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.linear_model import Ridge
from sklearn.ensemble import StackingRegressor as skStackingRegressor
class StackingRegressor():
def __init__(self, estimators, final_estimator):
self.estimators = estimators
self.final_estimator = final_estimator
def fit(self, X, y):
self.estimators_ = []
for est in self.estimators:
self.estimators_.append(clone(est).fit(X, y))
predictions = []
for est in self.estimators:
predictions.append(cross_val_predict(est, X, y).reshape(-1, 1))
X_meta = np.hstack(predictions)
self.final_estimator_ = clone(self.final_estimator)
self.final_estimator_.fit(X_meta, y)
return self
def transform(self, X):
predictions = []
for est in self.estimators_:
predictions.append(est.predict(X).reshape(-1, 1))
return np.hstack(predictions)
def predict(self, X):
return self.final_estimator_.predict(self.transform(X))
X, y = load_boston(return_X_y=True)
reg1 = StackingRegressor(estimators=[RandomForestRegressor(random_state=0),
GradientBoostingRegressor(random_state=0),
SVR()],
final_estimator=Ridge(random_state=0)).fit(X, y)
reg2 = skStackingRegressor(estimators=[("rf", RandomForestRegressor(random_state=0)),
("gbdt", GradientBoostingRegressor(random_state=0)),
("svr", SVR())],
final_estimator=Ridge(random_state=0)).fit(X, y)
trans1 = reg1.transform(X)
trans2 = reg2.transform(X)
assert np.allclose(trans1, trans2)
pred1 = reg1.predict(X)
pred2 = reg2.predict(X)
assert np.allclose(pred1, pred2)