class GradientBoostingClassifier():
def __init__(self, learning_rate=0.1, n_estimators=100, max_depth=3, random_state=0):
self.learning_rate = learning_rate
self.n_estimators = n_estimators
self.max_depth = max_depth
self.random_state = random_state
def fit(self, X, y):
self.n_features_ = X.shape[1]
self.classes_, y = np.unique(y, return_inverse=True)
self.n_classes_ = len(self.classes_)
if self.n_classes_ == 2:
n_effective_classes = 1
else:
n_effective_classes = self.n_classes_
self.estimators_ = np.empty((self.n_estimators, n_effective_classes), dtype=np.object)
raw_predictions = np.zeros((X.shape[0], n_effective_classes))
rng = np.random.RandomState(0)
for i in range(self.n_estimators):
raw_predictions_copy = raw_predictions.copy()
for j in range(n_effective_classes):
# binary classification
if n_effective_classes == 1:
y_enc = y
residual = y_enc - expit(raw_predictions_copy.ravel())
# multiclass classification
else:
y_enc = (y == j).astype(np.int)
residual = y_enc - np.nan_to_num(np.exp(raw_predictions_copy[:, j]
- logsumexp(raw_predictions_copy, axis=1)))
tree = DecisionTreeRegressor(criterion="friedman_mse", max_depth=self.max_depth,
random_state=rng)
tree.fit(X, residual)
terminal_regions = tree.apply(X)
for leaf in np.where(tree.tree_.children_left == -1)[0]:
cur = np.where(terminal_regions == leaf)[0]
# binary classification
if n_effective_classes == 1:
numerator = np.sum(residual[cur])
denominator = np.sum((y_enc[cur] - residual[cur]) * (1 - y_enc[cur] + residual[cur]))
# multiclass classification
else:
numerator = np.sum(residual[cur])
numerator *= (self.n_classes_ - 1) / self.n_classes_
denominator = np.sum((y_enc[cur] - residual[cur]) * (1 - y_enc[cur] + residual[cur]))
if np.abs(denominator) < 1e-150:
tree.tree_.value[leaf, 0, 0] = 0
else:
tree.tree_.value[leaf, 0, 0] = numerator / denominator
raw_predictions[:, j] += self.learning_rate * tree.tree_.value[:, 0, 0][terminal_regions]
self.estimators_[i, j] = tree
return self
def _predict(self, X):
raw_predictions = np.zeros((X.shape[0], self.estimators_.shape[1]))
for i in range(self.estimators_.shape[0]):
for j in range(self.estimators_.shape[1]):
raw_predictions[:, j] += self.learning_rate * self.estimators_[i, j].predict(X)
return raw_predictions
def decision_function(self, X):
prob = self._predict(X)
if self.n_classes_ == 2:
return prob.ravel()
else:
return prob
def predict_proba(self, X):
scores = self.decision_function(X)
if len(scores.shape) == 1:
prob = expit(scores)
prob = np.vstack((1 - prob, prob)).T
else:
prob = np.nan_to_num(np.exp(scores - logsumexp(scores, axis=1)[:, np.newaxis]))
return prob
def predict(self, X):
scores = self.decision_function(X)
if len(scores.shape) == 1:
indices = (scores > 0).astype(int)
else:
indices = np.argmax(scores, axis=1)
return self.classes_[indices]
@property
def feature_importances_(self):
all_importances = np.zeros(self.n_features_)
for i in range(self.estimators_.shape[0]):
for j in range(self.estimators_.shape[1]):
all_importances += self.estimators_[i, j].tree_.compute_feature_importances(normalize=False)
return all_importances / np.sum(all_importances)