LR是经典的分类方法
回归模型:$f(x) = \frac{1}{1+e^{-wx}}$
其中wx线性函数:$wx =w_0*x_0 + w_1*x_1 + w_2*x_2 +...+w_n*x_n,(x_0=1)$
分类函数: $h(z)=\frac{1}{1+e^{-z}}=\frac{1}{1+e^{-w^{T}x}}$
对于任意确定的x和w,有:
$P(y=1|x,w)=h_{w}(x) $
$P(y=0|x,w)=1−h_{w}(x) $
服从伯努利分布:
$P(y|x,w)=(h_{w}(x))^{y}(1−h_{w}(x))^{1−y}$
似然函数:
$L(w)=∏_{i=1}^{m}P(y^{(i)}|x^{(i)},w)=∏_{i=1}^{m}(h_{w}(x^{(i)}))^{y^{(i)}}(1−h_{w}(x^{(i)}))^{1−y^{(i)}}$
对数似然函数:
$J(w)=logL(w)=∑_{i=1}^{m}(y^{(i)}logh_{w}(x^{(i)})+(1−y^{(i)})log(1−h_{w}(x^{(i)})))$
训练过程就是求解似然函数最大的w: 梯度上升法。
$w_{j}:=w_{j}+α\frac{∂J(w)}{∂w_{j}} (j=1,2,3...n)$
我们对J(w)求偏导:
$\frac{∂}{∂w_{j}}J(w) =∑_{i=1}^{m}(y^{(i)}\frac{1}{h_{w}(x^{(i)})}\frac{∂h_{w}(x^{(i)})}{∂w_{j}}+(1−y^{(i)})\frac{1}{1+h_{w}(x^{(i)})}\frac{∂h_{w}(x^{(i)})}{∂w_{j}})\\=∑_{i=1}^{m}(y^{(i)}\frac{1}{h_{w}(x^{(i)})}+(1−y^{(i)})\frac{1}{1+h_{w}(x^{(i)})})\frac{∂h_{w}(x^{(i)})}{∂w_{j}}\\=∑_{i=1}^{m}(y^{(i)}\frac{1}{h_{w}(x^{(i)})}+(1−y^{(i)})\frac{1}{1+h_{w}(x^{(i)})})\frac{1}{(1+e^{−w^{T}x})^{2}}e^{−w^{T}x}\frac{∂z(x)}{∂w_{j}}\\=∑_{i=1}^{m}(y^{(i)}\frac{1}{h_{w}(x^{(i)})}+(1−y^{(i)})\frac{1}{1+h_{w}(x^{(i)})})\frac{1}{(1+e^{−w^{T}x)}}\frac{e^{−w^{T}x}}{(1+e^{−w^{T}x})}\frac{∂z(x)}{∂w_{j}}\\=∑_{i=1}^{m}(y^{(i)}\frac{1}{h_{w}(x^{(i)})}+(1−y^{(i)})\frac{1}{1+h_{w}(x^{(i)})})h_{w}(x^{(i)})(1+h_{w}(x^{(i)}))\frac{∂w^{T}x}{∂w_{j}}\\=∑_{i=1}^{m}(y^{(i)}(1+h_{w}(x^{(i)}))+(1−y^{(i)})h_{w}(x^{(i)}))x^{(i)}_{j}\\=∑_{i=1}^{m}(y(i)−h_{w}(x^{(i)}))x^{(i)}_{j}
$
梯度上升法迭代公示: $w_{j}:=w_{j}+α∑_{i=1}^{m}(y^{(i)}−h_{w}(x^{(i)}))x^{(i)}_{j}$
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
# data
def create_data():
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['label'] = iris.target
df.columns = ['sepal length', 'sepal width', 'petal length', 'petal width', 'label']
data = np.array(df.iloc[:100, [0,1,-1]])
# print(data)
return data[:,:2], data[:,-1]
X, y = create_data()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
X_train[:5]
array([[5.2, 3.5], [4.7, 3.2], [6.9, 3.1], [5.4, 3.4], [6.3, 3.3]])
y[:5]
array([0., 0., 0., 0., 0.])
class LogisticReressionClassifier:
def __init__(self, max_iter=200, learning_rate=0.01):
self.max_iter = max_iter
self.learning_rate = learning_rate
def sigmoid(self, x):
return 1 / (1 + np.exp(-x))
def fit(self, X, y):
X = np.hstack((np.ones((X.shape[0],1)),X))
self.weights = np.zeros((X.shape[1], 1))
y = np.expand_dims(y, axis=1)
for iter_ in range(self.max_iter):
h = self.sigmoid(np.dot(X, self.weights))
error = y - h
self.weights = self.weights + self.learning_rate * np.dot(X.T, error)
def predict(self, x):
x = np.hstack((np.ones((x.shape[0],1)),x))
pred = self.sigmoid(np.dot(x, self.weights))
if pred > 0:
return 1
else:
return 0
def score(self, X_test, y_test):
right = 0
X_test = np.hstack((np.ones((X_test.shape[0],1)),X_test))
res = np.dot(X_test, self.weights)
for (result, y) in zip(res, y_test):
if (result > 0 and y == 1) or (result < 0 and y == 0):
right += 1
return right / len(X_test)
lr_clf = LogisticReressionClassifier()
lr_clf.fit(X_train, y_train)
lr_clf.score(X_test, y_test)
1.0
x_ponits = np.arange(4, 8)
y_ = -(lr_clf.weights[1]*x_ponits + lr_clf.weights[0])/lr_clf.weights[2]
plt.plot(x_ponits, y_)
#lr_clf.show_graph()
plt.scatter(X[:50,0],X[:50,1], label='0')
plt.scatter(X[50:,0],X[50:,1], label='1')
plt.legend()
<matplotlib.legend.Legend at 0x7f464b26a588>
My_X = np.array([[3,3,3],
[4,3,2],
[2,1,2],
[1,1,1],
[-1,0,1],
[2,-2,1]])
My_X.shape
(6, 3)
My_y = np.array([1,1,1,0,0,0])
My_clf = LogisticReressionClassifier()
My_clf.fit(My_X, My_y)
test_point = np.array([1,2,-2])
My_clf.predict(np.expand_dims(test_point, axis=0))
1
solver参数决定了我们对逻辑回归损失函数的优化方法,有四种算法可以选择,分别是:
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(max_iter=200)
clf.fit(X_train, y_train)
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True, intercept_scaling=1, max_iter=200, multi_class='ovr', n_jobs=1, penalty='l2', random_state=None, solver='liblinear', tol=0.0001, verbose=0, warm_start=False)
clf.score(X_test, y_test)
0.9666666666666667
print(clf.coef_, clf.intercept_)
[[ 1.94474283 -3.29077674]] [-0.53064339]
x_ponits = np.arange(4, 8)
y_ = -(clf.coef_[0][0]*x_ponits + clf.intercept_)/clf.coef_[0][1]
plt.plot(x_ponits, y_)
plt.plot(X[:50, 0], X[:50, 1], 'bo', color='blue', label='0')
plt.plot(X[50:, 0], X[50:, 1], 'bo', color='orange', label='1')
plt.xlabel('sepal length')
plt.ylabel('sepal width')
plt.legend()
<matplotlib.legend.Legend at 0x1499696b710>
My_skl = LogisticRegression(max_iter=200)
My_skl.fit(My_X, My_y)
/usr/local/lib/python3.6/dist-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning. FutureWarning)
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True, intercept_scaling=1, l1_ratio=None, max_iter=200, multi_class='warn', n_jobs=None, penalty='l2', random_state=None, solver='warn', tol=0.0001, verbose=0, warm_start=False)
My_skl.predict(np.array([[1,2,-2]]))
array([1])