class RandomForestRegressor():
def __init__(self, n_estimators=100, max_depth=None, max_features="auto",
oob_score=False, random_state=0):
self.n_estimators = n_estimators
self.max_depth = max_depth
self.max_features = max_features
self.oob_score = oob_score
self.random_state = random_state
def fit(self, X, y):
self.n_features_ = X.shape[1]
MAX_INT = np.iinfo(np.int32).max
rng = np.random.RandomState(self.random_state)
self.estimators_ = []
for i in range(self.n_estimators):
est = DecisionTreeRegressor(max_depth=self.max_depth,
max_features=self.max_features,
random_state=rng.randint(MAX_INT))
sample_rng = np.random.RandomState(est.random_state)
sample_indices = sample_rng.randint(0, X.shape[0], X.shape[0])
sample_counts = np.bincount(sample_indices, minlength=X.shape[0])
est.fit(X, y, sample_weight=sample_counts)
self.estimators_.append(est)
if self.oob_score:
self._set_oob_score(X, y)
return self
def _set_oob_score(self, X, y):
predictions = np.zeros(X.shape[0])
n_predictions = np.zeros(X.shape[0])
for i in range(self.n_estimators):
sample_rng = np.random.RandomState(self.estimators_[i].random_state)
sample_indices = sample_rng.randint(0, X.shape[0], X.shape[0])
mask = np.ones(X.shape[0], dtype=bool)
mask[sample_indices] = False
predictions[mask] += self.estimators_[i].predict(X[mask])
n_predictions[mask] += 1
predictions /= n_predictions
self.oob_prediction_ = predictions
self.oob_score_ = r2_score(y, predictions)
def predict(self, X):
pred = np.zeros(X.shape[0])
for i in range(self.n_estimators):
pred += self.estimators_[i].predict(X)
pred /= self.n_estimators
return pred
@property
def feature_importances_(self):
all_importances = np.zeros(self.n_features_)
for i in range(self.n_estimators):
all_importances += self.estimators_[i].feature_importances_
all_importances /= self.n_estimators
return all_importances / np.sum(all_importances)