import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import svm
# generate some data that is not linearly seperable
df = pd.DataFrame(np.random.normal(0,1,(20,2)), columns=['x1', 'x2'])
y = df @ np.array([2,5]).reshape(-1, 1) + np.random.normal(0, 2, df.shape[0]).reshape(-1, 1)
df['y'] = y > y.mean()
# plot it
sns.scatterplot(x='x1', y='x2', hue='y', data = df)
<matplotlib.axes._subplots.AxesSubplot at 0x109086470>
# fit a support vector classifier
clf = svm.SVC(kernel='linear', C=1)
x = df.drop('y', axis=1)
y = df.y
clf.fit(x, y)
# plot the contours of the decision boundry
# highlight the support vectors
_, _ = plt.subplots(figsize=(10, 10))
x1, x2 = np.meshgrid(np.linspace(-2, 2, 21), np.linspace(-2, 2, 21))
x = pd.DataFrame({'x1' : x1.ravel(), 'x2' : x2.ravel()})
y = clf.decision_function(x)
y_sh = y.reshape(x1.shape)
sns.scatterplot(x='x1', y='x2', hue='y', data = df)
plt.contour(x1, x2, y_sh, cmap='coolwarm')
plt.colorbar()
sns.scatterplot(x=clf.support_vectors_[:,0], y=clf.support_vectors_[:,1], color='black', marker='+', s=500)
<matplotlib.axes._subplots.AxesSubplot at 0x10f61e128>
# generate some data that is linearly seperable
df = pd.DataFrame(np.random.normal(0,1,(20,2)), columns=['x1', 'x2'])
y = df @ np.array([2,5]).reshape(-1, 1)
df['y'] = y > y.mean()
# plot it
sns.scatterplot(x='x1', y='x2', hue='y', data = df)
<matplotlib.axes._subplots.AxesSubplot at 0x10fd4f588>
# fit a support vector classifier
clf = svm.SVC(kernel='linear', C=1)
x = df.drop('y', axis=1)
y = df.y
clf.fit(x, y)
# plot the contours of the decision boundry
# highlight the support vectors
_, _ = plt.subplots(figsize=(10, 10))
x1, x2 = np.meshgrid(np.linspace(-2, 2, 21), np.linspace(-2, 2, 21))
x = pd.DataFrame({'x1' : x1.ravel(), 'x2' : x2.ravel()})
y = clf.decision_function(x)
y_sh = y.reshape(x1.shape)
sns.scatterplot(x='x1', y='x2', hue='y', data = df)
plt.contour(x1, x2, y_sh, cmap='coolwarm');
plt.colorbar();
sns.scatterplot(x=clf.support_vectors_[:,0], y=clf.support_vectors_[:,1], color='black', marker='+', s=500)
<matplotlib.axes._subplots.AxesSubplot at 0x110b47320>
# generate some data with a non-linear class boundry
df = pd.DataFrame({'x1' : np.random.normal(0,1,100), 'x2' : np.random.normal(0,1,100)})
y = df.x1 ** 2 + df.x2 ** 2
df['y'] = y > y.mean()
sns.scatterplot(x='x1', y='x2', hue='y', data=df)
<matplotlib.axes._subplots.AxesSubplot at 0x1110e9e80>
# fit a support vector classifier
clf = svm.SVC(kernel='rbf', gamma=0.1, C=1000)
x = df.drop('y', axis=1)
y = df.y
clf.fit(x, y)
# plot the contours of the decision boundry
# highlight the support vectors
_, _ = plt.subplots(figsize=(10, 10))
x1, x2 = np.meshgrid(np.linspace(-2, 2, 100), np.linspace(-2, 2, 100))
x = pd.DataFrame({'x1' : x1.ravel(), 'x2' : x2.ravel()})
y = clf.decision_function(x)
y_sh = y.reshape(x1.shape)
sns.scatterplot(x='x1', y='x2', hue='y', data = df)
plt.contour(x1, x2, y_sh, cmap='coolwarm');
plt.colorbar();
sns.scatterplot(x=clf.support_vectors_[:,0], y=clf.support_vectors_[:,1], color='black', marker='+', s=500)
<matplotlib.axes._subplots.AxesSubplot at 0x1131bc198>