import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from scipy.io import loadmat
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
data = loadmat("ex6data1.mat")
X, y = data["X"], data["y"].ravel()
print(X.shape, y.shape)
(51, 2) (51,)
plt.figure()
plt.scatter(X[y == 1][:, 0], X[y == 1][:, 1], color="red")
plt.scatter(X[y == 0][:, 0], X[y == 0][:, 1], color="blue")
plt.show()
def plotDecisionBoundary(clf, X, y):
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
np.arange(y_min, y_max, 0.1))
X_plot = np.c_[xx.ravel(), yy.ravel()]
y_plot = clf.decision_function(X_plot).reshape(xx.shape)
plt.figure()
plt.scatter(X[y == 1][:, 0], X[y == 1][:, 1], color="red")
plt.scatter(X[y == 0][:, 0], X[y == 0][:, 1], color="blue")
plt.contour(xx, yy, y_plot, levels=[0])
plt.show()
clf = LinearSVC(C=1)
clf.fit(X, y)
plotDecisionBoundary(clf, X, y)
clf = LinearSVC(C=100)
clf.fit(X, y)
plotDecisionBoundary(clf, X, y)
D:\ProgramData\Anaconda3\lib\site-packages\sklearn\svm\base.py:931: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. "the number of iterations.", ConvergenceWarning)
data = loadmat("ex6data2.mat")
X, y = data["X"], data["y"].ravel()
print(X.shape, y.shape)
(863, 2) (863,)
plt.figure()
plt.scatter(X[y == 1][:, 0], X[y == 1][:, 1], color="red")
plt.scatter(X[y == 0][:, 0], X[y == 0][:, 1], color="blue")
plt.show()
def plotDecisionBoundary(clf, X, y):
x_min, x_max = X[:, 0].min() - 0.1, X[:, 0].max() + 0.1
y_min, y_max = X[:, 1].min() - 0.1, X[:, 1].max() + 0.1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
np.arange(y_min, y_max, 0.1))
X_plot = np.c_[xx.ravel(), yy.ravel()]
y_plot = clf.decision_function(X_plot).reshape(xx.shape)
plt.figure()
plt.scatter(X[y == 1][:, 0], X[y == 1][:, 1], color="red")
plt.scatter(X[y == 0][:, 0], X[y == 0][:, 1], color="blue")
plt.contour(xx, yy, y_plot, levels=[0])
plt.show()
clf = SVC(C=30, gamma=30)
clf.fit(X, y)
plotDecisionBoundary(clf, X, y)
data = loadmat("ex6data3.mat")
X, y = data["X"], data["y"].ravel()
Xval, yval = data["Xval"], data["yval"].ravel()
print(X.shape, y.shape)
print(Xval.shape, yval.shape)
(211, 2) (211,) (200, 2) (200,)
plt.figure()
plt.scatter(X[y == 1][:, 0], X[y == 1][:, 1], color="red")
plt.scatter(X[y == 0][:, 0], X[y == 0][:, 1], color="blue")
plt.show()
possible_C = np.array([0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30])
possible_gamma = np.array([0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30])
best_score = -np.inf
for C in possible_C:
for gamma in possible_gamma:
clf = SVC(C=C, gamma=gamma)
clf.fit(X, y)
score = clf.score(Xval, yval)
if score > best_score:
best_C = C
best_gamma = gamma
best_score = score
print(best_C)
print(best_gamma)
print(best_score)
3.0 30.0 0.965
def plotDecisionBoundary(clf, X, y):
x_min, x_max = X[:, 0].min() - 0.1, X[:, 0].max() + 0.1
y_min, y_max = X[:, 1].min() - 0.1, X[:, 1].max() + 0.1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
np.arange(y_min, y_max, 0.1))
X_plot = np.c_[xx.ravel(), yy.ravel()]
y_plot = clf.decision_function(X_plot).reshape(xx.shape)
plt.figure()
plt.scatter(X[y == 1][:, 0], X[y == 1][:, 1], color="red")
plt.scatter(X[y == 0][:, 0], X[y == 0][:, 1], color="blue")
plt.contour(xx, yy, y_plot, levels=[0])
plt.show()
clf = SVC(C=3, gamma=30)
clf.fit(X, y)
plotDecisionBoundary(clf, X, y)
data = loadmat("spamTrain.mat")
X_train, y_train = data["X"], data["y"].ravel()
print(X_train.shape, y_train.shape)
(4000, 1899) (4000,)
data = loadmat("spamTest.mat")
X_test, y_test = data["Xtest"], data["ytest"].ravel()
print(X_test.shape, y_test.shape)
(1000, 1899) (1000,)
clf = LinearSVC()
clf.fit(X_train, y_train)
print(clf.score(X_train, y_train))
print(clf.score(X_test, y_test))
0.99975 0.98