Naive Bayes Probability Model¶

In [1]:

import numpy as np
np.random.seed(0)
X = np.random.randint(2, size=(10, 4))
y = np.array([0,0,0,0,1,1,1,1,1,1])

In [2]:

Out[2]:

array([[0, 1, 1, 0],
       [1, 1, 1, 1],
       [1, 1, 1, 0],
       [0, 1, 0, 0],
       [0, 0, 0, 1],
       [0, 1, 1, 0],
       [0, 1, 1, 1],
       [1, 0, 1, 0],
       [1, 0, 1, 1],
       [0, 1, 1, 0]])

In [3]:

Out[3]:

array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1])

다항 분포 나이브 베이즈¶

In [4]:

from sklearn.naive_bayes import MultinomialNB
clf = MultinomialNB().fit(X, y)

In [5]:

clf.class_count_

Out[5]:

array([4., 6.])

In [6]:

clf.intercept_

Out[6]:

array([-0.51082562])

In [7]:

clf.class_log_prior_

Out[7]:

array([-0.91629073, -0.51082562])

In [8]:

fc = clf.feature_count_
fc

Out[8]:

array([[2., 4., 3., 1.],
       [2., 3., 5., 3.]])

In [9]:

clf.feature_log_prob_

Out[9]:

array([[-1.54044504, -1.02961942, -1.25276297, -1.94591015],
       [-1.73460106, -1.44691898, -1.04145387, -1.44691898]])

In [10]:

clf.coef_

Out[10]:

array([[-1.73460106, -1.44691898, -1.04145387, -1.44691898]])

In [11]:

# 각 y class의 X elements 합, axis는 값을 합산하는 축
# axis=None(default): elements 전체 합
# axis=0: y 축 기준 합 
fc.sum(axis=0)

Out[11]:

array([4., 7., 8., 4.])

In [12]:

# axis=1: x 축 기준 합
fc.sum(axis=1)

Out[12]:

array([10., 13.])

In [13]:

# newaxis는 길이 1의 새로운 축 생성. None과 동일하다.
fc.sum(axis=1)[:, np.newaxis]

Out[13]:

array([[10.],
       [13.]])

In [14]:

# x 축 기준 합을 반복 축 따라(axis=1) 4회 반복.
denominator = np.repeat(fc.sum(axis=1)[:, np.newaxis], 4, axis=1)
denominator

Out[14]:

array([[10., 10., 10., 10.],
       [13., 13., 13., 13.]])

In [15]:

fc / denominator

Out[15]:

array([[0.2       , 0.4       , 0.3       , 0.1       ],
       [0.15384615, 0.23076923, 0.38461538, 0.23076923]])

In [16]:

# 라플라스 스무딩
clf.alpha

Out[16]:

1.0

In [17]:

np.log((fc + clf.alpha) / (denominator + clf.alpha * X.shape[1]))

Out[17]:

array([[-1.54044504, -1.02961942, -1.25276297, -1.94591015],
       [-1.73460106, -1.44691898, -1.04145387, -1.44691898]])

In [18]:

clf.feature_log_prob_

Out[18]:

array([[-1.54044504, -1.02961942, -1.25276297, -1.94591015],
       [-1.73460106, -1.44691898, -1.04145387, -1.44691898]])

In [19]:

x_new = np.array([1,1,0,0])
clf.predict_proba([x_new])

Out[19]:

array([[0.55131629, 0.44868371]])

In [20]:

log_p = clf.class_log_prior_ + np.sum(clf.feature_log_prob_ * x_new, axis=1)
log_p

Out[20]:

array([-3.48635519, -3.69234566])

In [21]:

np.exp(log_p) / np.sum(np.exp(log_p))

Out[21]:

array([0.55131629, 0.44868371])

베르누이 나이브 베이즈¶

In [22]:

from sklearn.naive_bayes import BernoulliNB
clf_bern = BernoulliNB().fit(X, y)

In [23]:

clf_bern.feature_log_prob_

Out[23]:

array([[-0.69314718, -0.18232156, -0.40546511, -1.09861229],
       [-0.98082925, -0.69314718, -0.28768207, -0.69314718]])

In [24]:

# 베르누이 `feature_log_prob_` 계산
np.log((clf_bern.feature_count_ + 1) / \
       (clf_bern.class_count_.reshape(-1, 1) + 2))

Out[24]:

array([[-0.69314718, -0.18232156, -0.40546511, -1.09861229],
       [-0.98082925, -0.69314718, -0.28768207, -0.69314718]])

In [25]:

clf_bern.predict_proba([x_new])

Out[25]:

array([[0.72480181, 0.27519819]])

In [26]:

neg_prob = np.log(1 - np.exp(clf_bern.feature_log_prob_))
neg_prob

Out[26]:

array([[-0.69314718, -1.79175947, -1.09861229, -0.40546511],
       [-0.47000363, -0.69314718, -1.38629436, -0.69314718]])

In [27]:

clf_bern.feature_log_prob_ * x_new

Out[27]:

array([[-0.69314718, -0.18232156, -0.        , -0.        ],
       [-0.98082925, -0.69314718, -0.        , -0.        ]])

In [28]:

neg_prob * (1 - x_new)

Out[28]:

array([[-0.        , -0.        , -1.09861229, -0.40546511],
       [-0.        , -0.        , -1.38629436, -0.69314718]])

In [29]:

log_p_bern = clf_bern.class_log_prior_ + \
np.sum(clf_bern.feature_log_prob_ * x_new + \
       neg_prob * (1 - x_new), axis=1)
log_p_bern

Out[29]:

array([-3.29583687, -4.2642436 ])

In [30]:

np.exp(log_p_bern) / np.sum(np.exp(log_p_bern))

Out[30]:

array([0.72480181, 0.27519819])