class MLPRegressor():
def __init__(self, hidden_layer_sizes=(100,), alpha=0.0001, max_iter=200, random_state=0):
self.hidden_layer_sizes = hidden_layer_sizes
self.alpha = alpha
self.max_iter = max_iter
self.random_state = random_state
def _pack(self, coefs, intercepts):
return np.hstack([cur.ravel() for cur in coefs + intercepts])
def _unpack(self, packed_coef):
for i in range(self.n_layers_ - 1):
start, end, shape = self._coef_indptr[i]
self.coefs_[i] = np.reshape(packed_coef[start:end], shape)
start, end = self._intercept_indptr[i]
self.intercepts_[i] = packed_coef[start:end]
def _forward_pass(self, activations):
for i in range(self.n_layers_ - 1):
activations[i + 1] = np.dot(activations[i], self.coefs_[i]) + self.intercepts_[i]
if i + 1 != self.n_layers_ - 1:
activations[i + 1] = expit(activations[i + 1])
return activations
def _cost_grad(self, packed_coef, X, y_train, activations, deltas, coef_grads, intercept_grads):
self._unpack(packed_coef)
# forward pass
activations = self._forward_pass(activations)
loss = np.mean(np.square(y_train - activations[-1])) / 2
loss += 0.5 * self.alpha * np.sum([np.dot(c.ravel(), c.ravel()) for c in self.coefs_]) / X.shape[0]
# backward pass
deltas[self.n_layers_ - 2] = activations[-1] - y_train
coef_grads[self.n_layers_ - 2] = np.dot(activations[self.n_layers_ - 2].T, deltas[self.n_layers_ - 2])
coef_grads[self.n_layers_ - 2] += (self.alpha * self.coefs_[self.n_layers_ - 2])
coef_grads[self.n_layers_ - 2] /= X.shape[0]
intercept_grads[self.n_layers_ - 2] = np.mean(deltas[self.n_layers_ - 2], axis=0)
for i in range(self.n_layers_ - 2, 0, -1):
deltas[i - 1] = np.dot(deltas[i], self.coefs_[i].T)
deltas[i - 1] *= activations[i] * (1 - activations[i])
coef_grads[i - 1] = np.dot(activations[i - 1].T, deltas[i - 1])
coef_grads[i - 1] += (self.alpha * self.coefs_[i - 1])
coef_grads[i - 1] /= X.shape[0]
intercept_grads[i - 1] = np.mean(deltas[i - 1], axis=0)
grad = self._pack(coef_grads, intercept_grads)
return loss, grad
def fit(self, X, y):
y_train = y[:, np.newaxis]
self.n_outputs_ = y_train.shape[1]
layer_units = ([X.shape[1]] + list(self.hidden_layer_sizes) + [self.n_outputs_])
self.n_layers_ = len(layer_units)
self.coefs_, self.intercepts_ = [], []
rng = np.random.RandomState(self.random_state)
for i in range(self.n_layers_ - 1):
init_bound = np.sqrt(2 / (layer_units[i] + layer_units[i + 1]))
self.coefs_.append(rng.uniform(-init_bound, init_bound, (layer_units[i], layer_units[i + 1])))
self.intercepts_.append(rng.uniform(-init_bound, init_bound, layer_units[i + 1]))
activations = [X]
activations.extend(np.empty((X.shape[0], n_fan_out)) for n_fan_out in layer_units[1:])
deltas = [np.empty_like(a_layer) for a_layer in activations[1:]]
coef_grads = [np.empty((n_fan_in, n_fan_out)) for n_fan_in, n_fan_out in zip(layer_units[:-1], layer_units[1:])]
intercept_grads = [np.empty(n_fan_out) for n_fan_out in layer_units[1:]]
self._coef_indptr, self._intercept_indptr = [], []
start = 0
for i in range(self.n_layers_ - 1):
end = start + (self.coefs_[i].shape[0] * self.coefs_[i].shape[1])
self._coef_indptr.append((start, end, (self.coefs_[i].shape[0], self.coefs_[i].shape[1])))
start = end
for i in range(self.n_layers_ - 1):
end = start + self.intercepts_[i].shape[0]
self._intercept_indptr.append((start, end))
start = end
packed_coef = self._pack(self.coefs_, self.intercepts_)
res = minimize(fun=self._cost_grad, jac=True, x0=packed_coef,
args=(X, y_train, activations, deltas, coef_grads, intercept_grads), method='L-BFGS-B',
options = {"maxiter": self.max_iter})
self._unpack(res.x)
return self
def _predict(self, X):
layer_units = ([X.shape[1]] + list(self.hidden_layer_sizes) + [self.n_outputs_])
activations = [X]
activations.extend(np.empty((X.shape[0], n_fan_out)) for n_fan_out in layer_units[1:])
self._forward_pass(activations)
y_pred = activations[-1]
return y_pred
def predict(self, X):
y_pred = self._predict(X)
return y_pred.ravel()