import numpy as np
from sklearn.datasets import load_boston
from sklearn.tree import DecisionTreeRegressor
from sklearn.utils.stats import _weighted_percentile
from sklearn.ensemble import GradientBoostingRegressor as skGradientBoostingRegressor
class GradientBoostingRegressor():
def __init__(self, learning_rate=0.1, n_estimators=100, max_depth=3, random_state=0):
self.learning_rate = learning_rate
self.n_estimators = n_estimators
self.max_depth = max_depth
self.random_state = random_state
def fit(self, X, y):
self.n_features_ = X.shape[1]
self.estimators_ = np.empty((self.n_estimators, 1), dtype=np.object)
raw_predictions = np.zeros(X.shape[0])
rng = np.random.RandomState(0)
for i in range(self.n_estimators):
residual = y - raw_predictions
tree = DecisionTreeRegressor(criterion="friedman_mse", max_depth=self.max_depth,
random_state=rng)
tree.fit(X, residual)
raw_predictions += self.learning_rate * tree.predict(X)
self.estimators_[i, 0] = tree
return self
def predict(self, X):
raw_predictions = np.zeros(X.shape[0])
for i in range(self.n_estimators):
raw_predictions += self.learning_rate * self.estimators_[i, 0].predict(X)
return raw_predictions
@property
def feature_importances_(self):
all_importances = np.zeros(self.n_features_)
for i in range(self.n_estimators):
all_importances += self.estimators_[i, 0].tree_.compute_feature_importances(normalize=False)
return all_importances / np.sum(all_importances)
X, y = load_boston(return_X_y=True)
clf1 = GradientBoostingRegressor().fit(X, y)
clf2 = skGradientBoostingRegressor(init="zero", presort=False, random_state=0).fit(X, y)
assert np.allclose(clf1.feature_importances_, clf2.feature_importances_)
pred1 = clf1.predict(X)
pred2 = clf2.predict(X)
assert np.allclose(pred1, pred2)
class GradientBoostingRegressor():
def __init__(self, learning_rate=0.1, n_estimators=100, max_depth=3, random_state=0):
self.learning_rate = learning_rate
self.n_estimators = n_estimators
self.max_depth = max_depth
self.random_state = random_state
def fit(self, X, y):
self.n_features_ = X.shape[1]
self.estimators_ = np.empty((self.n_estimators, 1), dtype=np.object)
raw_predictions = np.zeros(X.shape[0])
rng = np.random.RandomState(0)
for i in range(self.n_estimators):
residual = np.sign(y - raw_predictions)
tree = DecisionTreeRegressor(criterion="friedman_mse", max_depth=self.max_depth,
random_state=rng)
tree.fit(X, residual)
terminal_regions = tree.apply(X)
for leaf in np.where(tree.tree_.children_left == -1)[0]:
cur = np.where(terminal_regions == leaf)[0]
# scikit-learn uses _weightef_percentile, which is inconsistent with np.median
tree.tree_.value[leaf, 0, 0] = _weighted_percentile(y[cur] - raw_predictions[cur],
np.ones(cur.shape[0]))
raw_predictions += self.learning_rate * tree.tree_.value[:, 0, 0][terminal_regions]
self.estimators_[i, 0] = tree
return self
def predict(self, X):
raw_predictions = np.zeros(X.shape[0])
for i in range(self.n_estimators):
raw_predictions += self.learning_rate * self.estimators_[i, 0].predict(X)
return raw_predictions
@property
def feature_importances_(self):
all_importances = np.zeros(self.n_features_)
for i in range(self.n_estimators):
all_importances += self.estimators_[i, 0].tree_.compute_feature_importances(normalize=False)
return all_importances / np.sum(all_importances)
X, y = load_boston(return_X_y=True)
clf1 = GradientBoostingRegressor().fit(X, y)
clf2 = skGradientBoostingRegressor(init="zero", loss="lad", presort=False, random_state=0).fit(X, y)
assert np.allclose(clf1.feature_importances_, clf2.feature_importances_)
pred1 = clf1.predict(X)
pred2 = clf2.predict(X)
assert np.allclose(pred1, pred2)
class GradientBoostingRegressor():
def __init__(self, learning_rate=0.1, n_estimators=100, max_depth=3,
random_state=0, alpha=0.9):
self.learning_rate = learning_rate
self.n_estimators = n_estimators
self.max_depth = max_depth
self.random_state = random_state
self.alpha = alpha
def fit(self, X, y):
self.n_features_ = X.shape[1]
self.estimators_ = np.empty((self.n_estimators, 1), dtype=np.object)
raw_predictions = np.zeros(X.shape[0])
rng = np.random.RandomState(0)
for i in range(self.n_estimators):
residual = np.zeros(X.shape[0])
diff = y - raw_predictions
gamma = _weighted_percentile(np.abs(diff), np.ones(diff.shape[0]), self.alpha * 100)
gamma_mask = np.abs(diff) <= gamma
residual[gamma_mask] = diff[gamma_mask]
residual[~gamma_mask] = gamma * np.sign(diff[~gamma_mask])
tree = DecisionTreeRegressor(criterion="friedman_mse", max_depth=self.max_depth,
random_state=rng)
tree.fit(X, residual)
terminal_regions = tree.apply(X)
for leaf in np.where(tree.tree_.children_left == -1)[0]:
cur = np.where(terminal_regions == leaf)[0]
diff = y[cur] - raw_predictions[cur]
# scikit-learn uses _weightef_percentile, which is inconsistent with np.median
median = _weighted_percentile(diff, np.ones(diff.shape[0]))
diff_minus_median = diff - median
tree.tree_.value[leaf, 0, 0] = median + np.mean(np.sign(diff_minus_median)
* np.minimum(np.abs(diff_minus_median), gamma))
raw_predictions += self.learning_rate * tree.tree_.value[:, 0, 0][terminal_regions]
self.estimators_[i, 0] = tree
return self
def predict(self, X):
raw_predictions = np.zeros(X.shape[0])
for i in range(self.n_estimators):
raw_predictions += self.learning_rate * self.estimators_[i, 0].predict(X)
return raw_predictions
@property
def feature_importances_(self):
all_importances = np.zeros(self.n_features_)
for i in range(self.n_estimators):
all_importances += self.estimators_[i, 0].tree_.compute_feature_importances(normalize=False)
return all_importances / np.sum(all_importances)
X, y = load_boston(return_X_y=True)
clf1 = GradientBoostingRegressor().fit(X, y)
clf2 = skGradientBoostingRegressor(init="zero", loss="huber", presort=False, random_state=0).fit(X, y)
assert np.allclose(clf1.feature_importances_, clf2.feature_importances_)
pred1 = clf1.predict(X)
pred2 = clf2.predict(X)
assert np.allclose(pred1, pred2)