#!/usr/bin/env python # coding: utf-8 # > This is one of the 100 recipes of the [IPython Cookbook](http://ipython-books.github.io/), the definitive guide to high-performance scientific computing and data science in Python. # # # 8.3. Learning to recognize handwritten digits with a K-nearest neighbors classifier # 1. Let's do the traditional imports. # In[ ]: import numpy as np import sklearn import sklearn.datasets as ds import sklearn.cross_validation as cv import sklearn.neighbors as nb import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') # 2. Let's load the digits dataset, part of the `datasets` module of scikit-learn. This dataset contains hand-written digits that have been manually labeled. # In[ ]: digits = ds.load_digits() X = digits.data y = digits.target print((X.min(), X.max())) print(X.shape) # In the matrix `X`, each row contains the $8 \times 8=64$ pixels (in grayscale, values between 0 and 16). The pixels are ordered according to the row-major order. # 3. Let's display some of the images. # In[ ]: nrows, ncols = 2, 5 plt.figure(figsize=(6,3)); plt.gray() for i in range(ncols * nrows): ax = plt.subplot(nrows, ncols, i + 1) ax.matshow(digits.images[i,...]) plt.xticks([]); plt.yticks([]); plt.title(digits.target[i]); # 4. Now, let's fit a K-nearest neighbors classifier on the data. # In[ ]: (X_train, X_test, y_train, y_test) = cv.train_test_split(X, y, test_size=.25) # In[ ]: knc = nb.KNeighborsClassifier() # In[ ]: knc.fit(X_train, y_train); # 5. Let's evaluate the score of the trained classifier on the test dataset. # In[ ]: knc.score(X_test, y_test) # 6. Now, let's see if our classifier can recognize a "hand-written" digit! # In[ ]: # Let's draw a 1. one = np.zeros((8, 8)) one[1:-1, 4] = 16 # The image values are in [0, 16]. one[2, 3] = 16 # In[ ]: plt.figure(figsize=(2,2)); plt.imshow(one, interpolation='none'); plt.grid(False); plt.xticks(); plt.yticks(); plt.title("One"); # In[ ]: knc.predict(one.ravel()) # > You'll find all the explanations, figures, references, and much more in the book (to be released later this summer). # # > [IPython Cookbook](http://ipython-books.github.io/), by [Cyrille Rossant](http://cyrille.rossant.net), Packt Publishing, 2014 (500 pages).