In [1]:
import numpy as np
import os
os.chdir('../')
import matplotlib.pyplot as plt
%matplotlib inline


### 一.简介¶

$$f(x)=sign(w^Tx^*)$$

$$L(w)=-\sum_{(x_i,y_i)\in M}{y_i(w^Tx_i^*)}$$

$$w=w+\eta y_ix_i^*$$

### 二.代码实现¶

In [2]:
class Perceptron(object):
def __init__(self, epochs=10, eta=None):
self.w = None
self.epochs = epochs
self.eta = eta

def init_params(self, n_features):
"""
初始化参数
:return:
"""
self.w = np.random.random(size=(n_features + 1, 1))

def fit(self, x, y):
"""
:param x: ndarray格式数据: m x n
:param y: ndarray格式数据: m x 1
:return:
"""
# 设置学习率
if self.eta is None:
self.eta = max(1e-2, 1.0 / np.sqrt(x.shape[0]))
y = y.reshape(-1, 1)
y[y == 0] = -1
# 初始化参数w,b
n_samples, n_features = x.shape
self.init_params(n_features)
x = np.c_[x, np.ones(shape=(n_samples,))]
x_y = np.c_[x, y]

for _ in range(self.epochs):
error_sum = 0
np.random.shuffle(x_y)
for index in range(0, n_samples):
x_i = x_y[index, :-1]
y_i = x_y[index, -1:]
# 更新错分点的参数
if (x_i.dot(self.w) * y_i)[0] < 0:
dw = (-x_i * y_i).reshape(-1, 1)
self.w = self.w - self.eta * dw
error_sum += 1
if error_sum == 0:
break

def get_params(self):
"""
输出原始的系数
:return: w
"""

return self.w

def predict(self, x):
"""
:param x:ndarray格式数据: m x n
:return: m x 1
"""
n_samples = x.shape[0]
x = np.c_[x, np.ones(shape=(n_samples,))]
return (x.dot(self.w) > 0).astype(int)

def predict_proba(self, x):
"""
:param x:ndarray格式数据: m x n
:return: m x 1
"""
n_samples = x.shape[0]
x = np.c_[x, np.ones(shape=(n_samples,))]
return utils.sigmoid(x.dot(self.w))

def plot_decision_boundary(self, x, y):
"""
绘制前两个维度的决策边界
:param x:
:param y:
:return:
"""
weights = self.get_params()
w1 = weights[0][0]
w2 = weights[1][0]
bias = weights[-1][0]
x1 = np.arange(np.min(x), np.max(x), 0.1)
x2 = -w1 / w2 * x1 - bias / w2
plt.scatter(x[:, 0], x[:, 1], c=y, s=50)
plt.plot(x1, x2, 'r')
plt.show()


### 三.校验¶

In [3]:
from sklearn.datasets import make_classification
data,target=make_classification(n_samples=200, n_features=2,n_classes=2,n_informative=1,n_redundant=0,n_repeated=0,n_clusters_per_class=1)

In [4]:
plt.scatter(data[:, 0], data[:, 1], c=target,s=50)

Out[4]:
<matplotlib.collections.PathCollection at 0x2b52b6c7048>
In [5]:
#训练模型
perceptron = Perceptron()
perceptron.fit(data, target)

In [6]:
perceptron.plot_decision_boundary(data,target)


### 四.感知机的另一种实现-对偶形式¶

$$w=\sum_{i=1}^N\alpha_i y_ix_i$$

$$\alpha_i=\alpha_i+\eta$$

def __init__(self,...,mode=None):
self.mode=mode
...
def fit(self, x, y):
if self.mode=='dual':
self._dual_fit(x,y,epochs,eta)
else:
#继续原始的代码
...

In [7]:
   def _dual_fit(self, x, y):
"""
模型训练的对偶形式
:param x:
:param y:
:return:
"""
y = y.reshape(-1, 1)
y[y == 0] = -1

n_samples, n_features = x.shape

# 初始化参数
self.alpha = np.zeros(shape=(n_samples, 1))

x = np.c_[x, np.ones(shape=(n_samples,))]

for _ in range(self.epochs):
error_sum = 0
indices = list(range(0, n_samples))
np.random.shuffle(indices)
for index in indices:
x_i = x[index, :]
y_i = y[index]
# 更新错分点的参数，（注意需要有等号，因为初始化的alpha全为0）
if (x_i.dot(x.T.dot(self.alpha * y)) * y_i)[0] <= 0:
self.alpha[index] += self.eta
error_sum += 1
if error_sum == 0:
break
# 更新回w
self.w = x.T.dot(alpha * y)

In [8]:
#测试
from ml_models.linear_model import Perceptron
perceptron = Perceptron(mode='dual')
perceptron.fit(data, target)

In [9]:
#可以发现结果几乎一致
perceptron.plot_decision_boundary(data,target)


In [10]:
np.sum(perceptron.alpha!=0)/len(perceptron.alpha)

Out[10]:
0.22

In [ ]: