참고: 데이타 사이언스 스쿨 노트북
import numpy as np
np.random.seed(0)
X = np.random.randint(2, size=(10, 4))
y = np.array([0,0,0,0,1,1,1,1,1,1])
X
array([[0, 1, 1, 0], [1, 1, 1, 1], [1, 1, 1, 0], [0, 1, 0, 0], [0, 0, 0, 1], [0, 1, 1, 0], [0, 1, 1, 1], [1, 0, 1, 0], [1, 0, 1, 1], [0, 1, 1, 0]])
y
array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1])
from sklearn.naive_bayes import MultinomialNB
clf = MultinomialNB().fit(X, y)
clf.class_count_
array([4., 6.])
clf.intercept_
array([-0.51082562])
clf.class_log_prior_
array([-0.91629073, -0.51082562])
fc = clf.feature_count_
fc
array([[2., 4., 3., 1.], [2., 3., 5., 3.]])
clf.feature_log_prob_
array([[-1.54044504, -1.02961942, -1.25276297, -1.94591015], [-1.73460106, -1.44691898, -1.04145387, -1.44691898]])
clf.coef_
array([[-1.73460106, -1.44691898, -1.04145387, -1.44691898]])
# 각 y class의 X elements 합, axis는 값을 합산하는 축
# axis=None(default): elements 전체 합
# axis=0: y 축 기준 합
fc.sum(axis=0)
array([4., 7., 8., 4.])
# axis=1: x 축 기준 합
fc.sum(axis=1)
array([10., 13.])
# newaxis는 길이 1의 새로운 축 생성. None과 동일하다.
fc.sum(axis=1)[:, np.newaxis]
array([[10.], [13.]])
# x 축 기준 합을 반복 축 따라(axis=1) 4회 반복.
denominator = np.repeat(fc.sum(axis=1)[:, np.newaxis], 4, axis=1)
denominator
array([[10., 10., 10., 10.], [13., 13., 13., 13.]])
fc / denominator
array([[0.2 , 0.4 , 0.3 , 0.1 ], [0.15384615, 0.23076923, 0.38461538, 0.23076923]])
# 라플라스 스무딩
clf.alpha
1.0
np.log((fc + clf.alpha) / (denominator + clf.alpha * X.shape[1]))
array([[-1.54044504, -1.02961942, -1.25276297, -1.94591015], [-1.73460106, -1.44691898, -1.04145387, -1.44691898]])
clf.feature_log_prob_
array([[-1.54044504, -1.02961942, -1.25276297, -1.94591015], [-1.73460106, -1.44691898, -1.04145387, -1.44691898]])
x_new = np.array([1,1,0,0])
clf.predict_proba([x_new])
array([[0.55131629, 0.44868371]])
log_p = clf.class_log_prior_ + np.sum(clf.feature_log_prob_ * x_new, axis=1)
log_p
array([-3.48635519, -3.69234566])
np.exp(log_p) / np.sum(np.exp(log_p))
array([0.55131629, 0.44868371])
from sklearn.naive_bayes import BernoulliNB
clf_bern = BernoulliNB().fit(X, y)
clf_bern.feature_log_prob_
array([[-0.69314718, -0.18232156, -0.40546511, -1.09861229], [-0.98082925, -0.69314718, -0.28768207, -0.69314718]])
# 베르누이 `feature_log_prob_` 계산
np.log((clf_bern.feature_count_ + 1) / \
(clf_bern.class_count_.reshape(-1, 1) + 2))
array([[-0.69314718, -0.18232156, -0.40546511, -1.09861229], [-0.98082925, -0.69314718, -0.28768207, -0.69314718]])
clf_bern.predict_proba([x_new])
array([[0.72480181, 0.27519819]])
neg_prob = np.log(1 - np.exp(clf_bern.feature_log_prob_))
neg_prob
array([[-0.69314718, -1.79175947, -1.09861229, -0.40546511], [-0.47000363, -0.69314718, -1.38629436, -0.69314718]])
clf_bern.feature_log_prob_ * x_new
array([[-0.69314718, -0.18232156, -0. , -0. ], [-0.98082925, -0.69314718, -0. , -0. ]])
neg_prob * (1 - x_new)
array([[-0. , -0. , -1.09861229, -0.40546511], [-0. , -0. , -1.38629436, -0.69314718]])
log_p_bern = clf_bern.class_log_prior_ + \
np.sum(clf_bern.feature_log_prob_ * x_new + \
neg_prob * (1 - x_new), axis=1)
log_p_bern
array([-3.29583687, -4.2642436 ])
np.exp(log_p_bern) / np.sum(np.exp(log_p_bern))
array([0.72480181, 0.27519819])