class GaussianNB():
def fit(self, X, y):
self.classes_ = np.unique(y)
n_features = X.shape[1]
n_classes = len(self.classes_)
self.theta_ = np.zeros((n_classes, n_features))
self.sigma_ = np.zeros((n_classes, n_features))
self.class_count_ = np.zeros(n_classes)
for i, c in enumerate(self.classes_):
X_c = X[y == c]
self.theta_[i] = np.mean(X_c, axis=0)
self.sigma_[i] = np.var(X_c, axis=0)
self.class_count_[i] = X_c.shape[0]
self.class_prior_ = self.class_count_ / np.sum(self.class_count_)
return self
def _joint_log_likelihood(self, X):
joint_log_likelihood = np.zeros((X.shape[0], len(self.classes_)))
for i in range(len(self.classes_)):
p1 = np.log(self.class_prior_[i])
p2 = -0.5 * np.log(2 * np.pi * self.sigma_[i]) - 0.5 * (X - self.theta_[i]) ** 2 / self.sigma_[i]
joint_log_likelihood[:, i] = p1 + np.sum(p2, axis=1)
return joint_log_likelihood
def predict(self, X):
joint_log_likelihood = self._joint_log_likelihood(X)
return self.classes_[np.argmax(joint_log_likelihood, axis=1)]
def predict_proba(self, X):
joint_log_likelihood = self._joint_log_likelihood(X)
log_prob = joint_log_likelihood - logsumexp(joint_log_likelihood, axis=1)[:, np.newaxis]
return np.exp(log_prob)