Naive Bayes Probability Model

참고: 데이타 사이언스 스쿨 노트북

In [1]:
import numpy as np
np.random.seed(0)
X = np.random.randint(2, size=(10, 4))
y = np.array([0,0,0,0,1,1,1,1,1,1])
In [2]:
X
Out[2]:
array([[0, 1, 1, 0],
       [1, 1, 1, 1],
       [1, 1, 1, 0],
       [0, 1, 0, 0],
       [0, 0, 0, 1],
       [0, 1, 1, 0],
       [0, 1, 1, 1],
       [1, 0, 1, 0],
       [1, 0, 1, 1],
       [0, 1, 1, 0]])
In [3]:
y
Out[3]:
array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1])

다항 분포 나이브 베이즈

In [4]:
from sklearn.naive_bayes import MultinomialNB
clf = MultinomialNB().fit(X, y)
In [5]:
clf.class_count_
Out[5]:
array([4., 6.])
In [6]:
clf.intercept_
Out[6]:
array([-0.51082562])
In [7]:
clf.class_log_prior_
Out[7]:
array([-0.91629073, -0.51082562])
In [8]:
fc = clf.feature_count_
fc
Out[8]:
array([[2., 4., 3., 1.],
       [2., 3., 5., 3.]])
In [9]:
clf.feature_log_prob_
Out[9]:
array([[-1.54044504, -1.02961942, -1.25276297, -1.94591015],
       [-1.73460106, -1.44691898, -1.04145387, -1.44691898]])
In [10]:
clf.coef_
Out[10]:
array([[-1.73460106, -1.44691898, -1.04145387, -1.44691898]])
In [11]:
# 각 y class의 X elements 합, axis는 값을 합산하는 축
# axis=None(default): elements 전체 합
# axis=0: y 축 기준 합 
fc.sum(axis=0)
Out[11]:
array([4., 7., 8., 4.])
In [12]:
# axis=1: x 축 기준 합
fc.sum(axis=1)
Out[12]:
array([10., 13.])
In [13]:
# newaxis는 길이 1의 새로운 축 생성. None과 동일하다.
fc.sum(axis=1)[:, np.newaxis]
Out[13]:
array([[10.],
       [13.]])
In [14]:
# x 축 기준 합을 반복 축 따라(axis=1) 4회 반복.
denominator = np.repeat(fc.sum(axis=1)[:, np.newaxis], 4, axis=1)
denominator
Out[14]:
array([[10., 10., 10., 10.],
       [13., 13., 13., 13.]])
In [15]:
fc / denominator
Out[15]:
array([[0.2       , 0.4       , 0.3       , 0.1       ],
       [0.15384615, 0.23076923, 0.38461538, 0.23076923]])
In [16]:
# 라플라스 스무딩
clf.alpha
Out[16]:
1.0
In [17]:
np.log((fc + clf.alpha) / (denominator + clf.alpha * X.shape[1]))
Out[17]:
array([[-1.54044504, -1.02961942, -1.25276297, -1.94591015],
       [-1.73460106, -1.44691898, -1.04145387, -1.44691898]])
In [18]:
clf.feature_log_prob_
Out[18]:
array([[-1.54044504, -1.02961942, -1.25276297, -1.94591015],
       [-1.73460106, -1.44691898, -1.04145387, -1.44691898]])
In [19]:
x_new = np.array([1,1,0,0])
clf.predict_proba([x_new])
Out[19]:
array([[0.55131629, 0.44868371]])
In [20]:
log_p = clf.class_log_prior_ + np.sum(clf.feature_log_prob_ * x_new, axis=1)
log_p
Out[20]:
array([-3.48635519, -3.69234566])
In [21]:
np.exp(log_p) / np.sum(np.exp(log_p))
Out[21]:
array([0.55131629, 0.44868371])

베르누이 나이브 베이즈

In [22]:
from sklearn.naive_bayes import BernoulliNB
clf_bern = BernoulliNB().fit(X, y)
In [23]:
clf_bern.feature_log_prob_
Out[23]:
array([[-0.69314718, -0.18232156, -0.40546511, -1.09861229],
       [-0.98082925, -0.69314718, -0.28768207, -0.69314718]])
In [24]:
# 베르누이 `feature_log_prob_` 계산
np.log((clf_bern.feature_count_ + 1) / \
       (clf_bern.class_count_.reshape(-1, 1) + 2))
Out[24]:
array([[-0.69314718, -0.18232156, -0.40546511, -1.09861229],
       [-0.98082925, -0.69314718, -0.28768207, -0.69314718]])
In [25]:
clf_bern.predict_proba([x_new])
Out[25]:
array([[0.72480181, 0.27519819]])
In [26]:
neg_prob = np.log(1 - np.exp(clf_bern.feature_log_prob_))
neg_prob
Out[26]:
array([[-0.69314718, -1.79175947, -1.09861229, -0.40546511],
       [-0.47000363, -0.69314718, -1.38629436, -0.69314718]])
In [27]:
clf_bern.feature_log_prob_ * x_new
Out[27]:
array([[-0.69314718, -0.18232156, -0.        , -0.        ],
       [-0.98082925, -0.69314718, -0.        , -0.        ]])
In [28]:
neg_prob * (1 - x_new)
Out[28]:
array([[-0.        , -0.        , -1.09861229, -0.40546511],
       [-0.        , -0.        , -1.38629436, -0.69314718]])
In [29]:
log_p_bern = clf_bern.class_log_prior_ + \
np.sum(clf_bern.feature_log_prob_ * x_new + \
       neg_prob * (1 - x_new), axis=1)
log_p_bern
Out[29]:
array([-3.29583687, -4.2642436 ])
In [30]:
np.exp(log_p_bern) / np.sum(np.exp(log_p_bern))
Out[30]:
array([0.72480181, 0.27519819])