In [1]:

%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sklearn

Support Vector Machines¶

Example Dataset 1¶

In [2]:

import scipy.io

In [3]:

data1 = scipy.io.loadmat('ex6data1.mat')

In [4]:

plt.figure(figsize=(8, 6))
plt.title('Example Dataset 1')
plt.scatter(data1['X'][:, 0][data1['y'][:, 0] == 1], data1['X'][:, 1][data1['y'][:, 0] == 1], c='b', marker='+')
plt.scatter(data1['X'][:, 0][data1['y'][:, 0] == 0], data1['X'][:, 1][data1['y'][:, 0] == 0], c='y', marker='o')
plt.show()

$C=1$¶

In [5]:

from sklearn import svm

In [6]:

clf = svm.SVC(kernel='linear', C=1).fit(data1['X'], data1['y'].ravel())
clf.score(data1['X'], data1['y'].ravel())

Out[6]:

0.98039215686274506

In [7]:

w = clf.coef_[0]
a = -w[0] / w[1]
xx = np.linspace(0, 5)
yy = a * xx - clf.intercept_[0] / w[1]

plt.figure(figsize=(8, 6))
plt.title('SVM Decision Boundary with C = 1 (Example Dataset 1)')
plt.scatter(data1['X'][:, 0][data1['y'][:, 0] == 1], data1['X'][:, 1][data1['y'][:, 0] == 1], c='b', marker='+')
plt.scatter(data1['X'][:, 0][data1['y'][:, 0] == 0], data1['X'][:, 1][data1['y'][:, 0] == 0], c='y', marker='o')
plt.plot(xx, yy, 'k-')
plt.show()

$C=100$¶

In [8]:

clf2 = svm.SVC(kernel='linear', C=100).fit(data1['X'], data1['y'].ravel())
clf2.score(data1['X'], data1['y'].ravel())

Out[8]:

1.0

In [9]:

w = clf2.coef_[0]
a = -w[0] / w[1]
xx = np.linspace(0, 5)
yy = a * xx - clf2.intercept_[0] / w[1]

plt.figure(figsize=(8, 6))
plt.title('SVM Decision Boundary with C = 100 (Example Dataset 1)')
plt.scatter(data1['X'][:, 0][data1['y'][:, 0] == 1], data1['X'][:, 1][data1['y'][:, 0] == 1], c='b', marker='+')
plt.scatter(data1['X'][:, 0][data1['y'][:, 0] == 0], data1['X'][:, 1][data1['y'][:, 0] == 0], c='y', marker='o')
plt.plot(xx, yy, 'k-')
plt.show()

SVM with Gaussian Kernels¶

In [10]:

data2 = scipy.io.loadmat('ex6data2.mat')

In [11]:

plt.figure(figsize=(8, 6))
plt.title('Example Dataset 2')
plt.scatter(data2['X'][:, 0][data2['y'][:, 0] == 1], data2['X'][:, 1][data2['y'][:, 0] == 1], c='b', marker='+')
plt.scatter(data2['X'][:, 0][data2['y'][:, 0] == 0], data2['X'][:, 1][data2['y'][:, 0] == 0], c='y', marker='o')
plt.xlim([0, 1])
plt.ylim([0.4, 1])
plt.show()

In [12]:

clf3 = svm.SVC(kernel='rbf', C=1, gamma=50).fit(data2['X'], data2['y'].ravel())
clf3.score(data2['X'], data2['y'].ravel())

Out[12]:

0.98957126303592124

In [13]:

plt.figure(figsize=(8, 6))
plt.title('SVM (Gaussian Kernel) Decision Boundary (Example Dataset 2)')
plt.scatter(data2['X'][:, 0][data2['y'][:, 0] == 1], data2['X'][:, 1][data2['y'][:, 0] == 1], c='b', marker='+')
plt.scatter(data2['X'][:, 0][data2['y'][:, 0] == 0], data2['X'][:, 1][data2['y'][:, 0] == 0], c='y', marker='o')

xx, yy = np.meshgrid(np.linspace(0, 1, 500), np.linspace(0.4, 1, 500))

# Plot the decision boundary. For that, we will assign a color to each
# point in the mesh [x_min, m_max]x[y_min, y_max].
Z = clf3.decision_function(np.c_[xx.ravel(), yy.ravel()])

# Put the result into a color plot
Z = Z.reshape(xx.shape)
plt.contour(xx, yy, Z, cmap=plt.cm.Paired, levels=[0])

plt.xlim([0, 1])
plt.ylim([0.4, 1])
plt.show()

Example Dataset 3¶

In [14]:

data3 = scipy.io.loadmat('ex6data3.mat')

In [15]:

plt.figure(figsize=(8, 6))
plt.title('Example Dataset 3')
plt.scatter(data3['X'][:, 0][data3['y'][:, 0] == 1], data3['X'][:, 1][data3['y'][:, 0] == 1], c='b', marker='+')
plt.scatter(data3['X'][:, 0][data3['y'][:, 0] == 0], data3['X'][:, 1][data3['y'][:, 0] == 0], c='y', marker='o')
plt.xlim([-0.6, 0.3])
plt.ylim([-0.6, 0.6])
plt.show()

In [16]:

from sklearn.grid_search import GridSearchCV

In [17]:

C_range = np.array([0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30])
delta_range = np.array([0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30])
gamma_range = 1 / (2 *(delta_range ** 2))
params = dict(gamma=gamma_range, C=C_range)

clf4 = GridSearchCV(svm.SVC(), params, cv=3)
clf4.fit(data3['X'], data3['y'].ravel())

print 'best training score : %s' % clf4.best_score_
print clf4.best_estimator_

best training score : 0.881516587678
SVC(C=3.0, cache_size=200, class_weight=None, coef0=0.0, degree=3,
  gamma=49.999999999999993, kernel='rbf', max_iter=-1, probability=False,
  random_state=None, shrinking=True, tol=0.001, verbose=False)

In [18]:

plt.figure(figsize=(8, 6))
plt.title('SVM (Gaussian Kernel) Decision Boundary (Example Dataset 3)')
plt.scatter(data3['X'][:, 0][data3['y'][:, 0] == 1], data3['X'][:, 1][data3['y'][:, 0] == 1], c='b', marker='+')
plt.scatter(data3['X'][:, 0][data3['y'][:, 0] == 0], data3['X'][:, 1][data3['y'][:, 0] == 0], c='y', marker='o')

xx, yy = np.meshgrid(np.linspace(-0.6, 0.3, 500), np.linspace(-0.6, 0.6, 500))

# Plot the decision boundary. For that, we will assign a color to each
# point in the mesh [x_min, m_max]x[y_min, y_max].
Z = clf4.decision_function(np.c_[xx.ravel(), yy.ravel()])

# Put the result into a color plot
Z = Z.reshape(xx.shape)
plt.contour(xx, yy, Z, cmap=plt.cm.Paired, levels=[0])

plt.xlim([-0.6, 0.3])
plt.ylim([-0.6, 0.6])
plt.show()

In [ ]: