class AdaBoostRegressor():
def __init__(self, n_estimators=50, random_state=0):
self.n_estimators = n_estimators
self.random_state = 0
def fit(self, X, y):
sample_weight = np.full(X.shape[0], 1 / X.shape[0])
self.estimators_ = []
self.estimator_weights_ = np.zeros(self.n_estimators)
self.estimator_errors_ = np.ones(self.n_estimators)
MAX_INT = np.iinfo(np.int32).max
rng = np.random.RandomState(self.random_state)
for i in range(self.n_estimators):
est = DecisionTreeRegressor(max_depth=3,
random_state=rng.randint(MAX_INT))
cdf = np.cumsum(sample_weight)
cdf /= cdf[-1]
uniform_samples = rng.random_sample(X.shape[0])
bootstrap_idx = cdf.searchsorted(uniform_samples, side='right')
est.fit(X[bootstrap_idx], y[bootstrap_idx])
y_predict = est.predict(X)
error_vect = np.abs(y_predict - y)
error_vect /= error_vect.max()
estimator_error = (sample_weight * error_vect).sum()
beta = estimator_error / (1 - estimator_error)
estimator_weight = np.log(1 / beta)
sample_weight *= np.power(beta, 1 - error_vect)
sample_weight /= np.sum(sample_weight)
self.estimators_.append(est)
self.estimator_errors_[i] = estimator_error
self.estimator_weights_[i] = estimator_weight
return self
def predict(self, X):
predictions = np.array([est.predict(X) for est in self.estimators_]).T
sorted_idx = np.argsort(predictions, axis=1)
weight_cdf = np.cumsum(self.estimator_weights_[sorted_idx], axis=1)
median_or_above = weight_cdf >= 0.5 * weight_cdf[:, -1][:, np.newaxis]
median_idx = median_or_above.argmax(axis=1)
median_estimators = sorted_idx[np.arange(X.shape[0]), median_idx]
return predictions[np.arange(X.shape[0]), median_estimators]