import numpy as np
from sklearn.datasets import load_boston
from sklearn.dummy import DummyRegressor as skDummyRegressor
class DummyRegressor():
def __init__(self, strategy="mean", constant=None, quantile=None):
self.strategy = strategy
self.constant = constant
self.quantile = quantile
def fit(self, X, y):
if self.strategy == "mean":
self.constant_ = np.mean(y)
elif self.strategy == "median":
self.constant_ = np.median(y)
elif self.strategy == "quantile":
self.constant_ = np.quantile(y, quantile);
elif self.strategy == "constant":
self.constant_ = self.constant
# keep consistent with scikit-learn
self.constant_ = np.reshape(self.constant_, (1, -1))
return self
def predict(self, X):
return np.full(X.shape[0], self.constant_)
X, y = load_boston(return_X_y=True)
clf1 = DummyRegressor(strategy="mean").fit(X, y)
clf2 = skDummyRegressor(strategy="mean").fit(X, y)
assert np.allclose(clf1.constant_, clf2.constant_)
pred1 = clf2.predict(X)
pred2 = clf2.predict(X)
assert np.allclose(pred1, pred2)
clf1 = DummyRegressor(strategy="median").fit(X, y)
clf2 = skDummyRegressor(strategy="median").fit(X, y)
assert np.allclose(clf1.constant_, clf2.constant_)
pred1 = clf2.predict(X)
pred2 = clf2.predict(X)
assert np.allclose(pred1, pred2)
clf1 = DummyRegressor(strategy="constant", constant=0).fit(X, y)
clf2 = skDummyRegressor(strategy="constant", constant=0).fit(X, y)
assert np.allclose(clf1.constant_, clf2.constant_)
pred1 = clf2.predict(X)
pred2 = clf2.predict(X)
assert np.allclose(pred1, pred2)
for quantile in [0.25, 0.75]:
clf1 = DummyRegressor(strategy="quantile", quantile=quantile).fit(X, y)
clf2 = skDummyRegressor(strategy="quantile", quantile=quantile).fit(X, y)
assert np.allclose(clf1.constant_, clf2.constant_)
pred1 = clf2.predict(X)
pred2 = clf2.predict(X)
assert np.allclose(pred1, pred2)