#!/usr/bin/env python # coding: utf-8 # > This is one of the 100 recipes of the [IPython Cookbook](http://ipython-books.github.io/), the definitive guide to high-performance scientific computing and data science in Python. # # # 8.5. Using Support Vector Machines for classification tasks # 1. Let's do the traditional imports. # In[ ]: import numpy as np import pandas as pd import sklearn import sklearn.datasets as ds import sklearn.cross_validation as cv import sklearn.grid_search as gs import sklearn.svm as svm import matplotlib as mpl import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') # 2. We generate 2D points and assign a binary label according to a linear operation on the coordinates. # In[ ]: X = np.random.randn(200, 2) y = X[:, 0] + X[:, 1] > 1 # 3. We now fit a linear **Support Vector Classifier** (SVC). This classifier tries to separate the two groups of points with a linear boundary (a line here, more generally a hyperplane). # In[ ]: # We train the classifier. est = svm.LinearSVC() est.fit(X, y); # 4. We define a function that displays the boundaries and decision function of a trained classifier. # In[ ]: # We generate a grid in the square [-3,3 ]^2. xx, yy = np.meshgrid(np.linspace(-3, 3, 500), np.linspace(-3, 3, 500)) # This function takes a SVM estimator as input. def plot_decision_function(est): # We evaluate the decision function on the grid. Z = est.decision_function(np.c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) cmap = plt.cm.Blues # We display the decision function on the grid. plt.figure(figsize=(5,5)); plt.imshow(Z, extent=(xx.min(), xx.max(), yy.min(), yy.max()), aspect='auto', origin='lower', cmap=cmap); # We display the boundaries. plt.contour(xx, yy, Z, levels=[0], linewidths=2, colors='k'); # We display the points with their true labels. plt.scatter(X[:, 0], X[:, 1], s=30, c=.5+.5*y, lw=1, cmap=cmap, vmin=0, vmax=1); plt.axhline(0, color='k', ls='--'); plt.axvline(0, color='k', ls='--'); plt.xticks(()); plt.yticks(()); plt.axis([-3, 3, -3, 3]); # 5. Let's take a look at the classification results with the linear SVC. # In[ ]: plot_decision_function(est); plt.title("Linearly separable, linear SVC"); # The linear SVC tried to separate the points with a line and it did a pretty good job. # 6. We now modify the labels with a *XOR* function. A point's label is 1 if the coordinates have different signs. This classification is not linearly separable. Therefore, a linear SVC fails completely. # In[ ]: y = np.logical_xor(X[:, 0] > 0, X[:, 1] > 0) # We train the classifier. est = gs.GridSearchCV(svm.LinearSVC(), {'C': np.logspace(-3., 3., 10)}); est.fit(X, y); print("Score: {0:.1f}".format( cv.cross_val_score(est, X, y).mean())) # Plot the decision function. plot_decision_function(est); plt.title("XOR, linear SVC"); # 7. Fortunately, it is possible to use non-linear SVCs by using non-linear **kernels**. Kernels specify a non-linear transformation of the points into a higher-dimensional space. Transformed points in this space are assumed to be more linearly separable, although they are not necessarily in the original space. By default, the `SVC` classifier in scikit-learn uses the **Radial Basis Function** (RBF) kernel. # In[ ]: y = np.logical_xor(X[:, 0] > 0, X[:, 1] > 0) est = gs.GridSearchCV(svm.SVC(), {'C': np.logspace(-3., 3., 10), 'gamma': np.logspace(-3., 3., 10)}); est.fit(X, y); print("Score: {0:.3f}".format( cv.cross_val_score(est, X, y).mean())) plot_decision_function(est.best_estimator_); plt.title("XOR, non-linear SVC"); # This time, the non-linear SVC does a pretty good job at classifying these non-linearly separable points. # > You'll find all the explanations, figures, references, and much more in the book (to be released later this summer). # # > [IPython Cookbook](http://ipython-books.github.io/), by [Cyrille Rossant](http://cyrille.rossant.net), Packt Publishing, 2014 (500 pages).