from sklearn.datasets import load_digits digits = load_digits() %matplotlib inline from matplotlib import pyplot as plt # copied from notebook 02_representation_of_data.ipynb fig = plt.figure(figsize=(6, 6)) # figure size in inches fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05) # plot the digits: each image is 8x8 pixels for i in range(64): ax = fig.add_subplot(8, 8, i + 1, xticks=[], yticks=[]) ax.imshow(digits.images[i], cmap=plt.cm.binary, interpolation='nearest') # label the image with the target value ax.text(0, 7, str(digits.target[i])) from sklearn.decomposition import RandomizedPCA pca = RandomizedPCA(n_components=2) proj = pca.fit_transform(digits.data) plt.scatter(proj[:, 0], proj[:, 1], c=digits.target) plt.colorbar() from sklearn.naive_bayes import GaussianNB from sklearn.cross_validation import train_test_split # split the data into training and validation sets X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target) # train the model clf = GaussianNB() clf.fit(X_train, y_train) # use the model to predict the labels of the test data predicted = clf.predict(X_test) expected = y_test fig = plt.figure(figsize=(6, 6)) # figure size in inches fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05) # plot the digits: each image is 8x8 pixels for i in range(64): ax = fig.add_subplot(8, 8, i + 1, xticks=[], yticks=[]) ax.imshow(X_test.reshape(-1, 8, 8)[i], cmap=plt.cm.binary, interpolation='nearest') # label the image with the target value if predicted[i] == expected[i]: ax.text(0, 7, str(predicted[i]), color='green') else: ax.text(0, 7, str(predicted[i]), color='red') matches = (predicted == expected) print(matches.sum()) print(len(matches)) matches.sum() / float(len(matches)) from sklearn import metrics print(metrics.classification_report(expected, predicted)) print(metrics.confusion_matrix(expected, predicted))