from keras.datasets import mnist
# use Keras to import pre-shuffled MNIST database
(X_train, y_train), (X_test, y_test) = mnist.load_data()
print("The MNIST database has a training set of %d examples." % len(X_train))
print("The MNIST database has a test set of %d examples." % len(X_test))
The MNIST database has a training set of 60000 examples. The MNIST database has a test set of 10000 examples.
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib.cm as cm
import numpy as np
# plot first six training images
fig = plt.figure(figsize=(20,20))
for i in range(6):
ax = fig.add_subplot(1, 6, i+1, xticks=[], yticks=[])
ax.imshow(X_train[i], cmap='gray')
ax.set_title(str(y_train[i]))
def visualize_input(img, ax):
ax.imshow(img, cmap='gray')
width, height = img.shape
thresh = img.max()/2.5
for x in range(width):
for y in range(height):
ax.annotate(str(round(img[x][y],2)), xy=(y,x),
horizontalalignment='center',
verticalalignment='center',
color='white' if img[x][y]<thresh else 'black')
fig = plt.figure(figsize = (12,12))
ax = fig.add_subplot(111)
visualize_input(X_train[0], ax)
# rescale [0,255] --> [0,1]
X_train = X_train.astype('float32')/255
X_test = X_test.astype('float32')/255
from keras.utils import np_utils
# print first ten (integer-valued) training labels
print('Integer-valued labels:')
print(y_train[:10])
# one-hot encode the labels
y_train = np_utils.to_categorical(y_train, 10)
y_test = np_utils.to_categorical(y_test, 10)
# print first ten (one-hot) training labels
print('One-hot labels:')
print(y_train[:10])
Integer-valued labels: [5 0 4 1 9 2 1 3 1 4] One-hot labels: [[ 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.] [ 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.] [ 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.] [ 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.] [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.] [ 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.] [ 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.] [ 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.] [ 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.] [ 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]]
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
# define the model
model = Sequential()
model.add(Flatten(input_shape=X_train.shape[1:]))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(10, activation='softmax'))
# summarize the model
model.summary()
_________________________________________________________________ Layer (type) Output Shape Param # ================================================================= flatten_2 (Flatten) (None, 784) 0 _________________________________________________________________ dense_4 (Dense) (None, 512) 401920 _________________________________________________________________ dropout_3 (Dropout) (None, 512) 0 _________________________________________________________________ dense_5 (Dense) (None, 512) 262656 _________________________________________________________________ dropout_4 (Dropout) (None, 512) 0 _________________________________________________________________ dense_6 (Dense) (None, 10) 5130 ================================================================= Total params: 669,706 Trainable params: 669,706 Non-trainable params: 0 _________________________________________________________________
# compile the model
model.compile(loss='categorical_crossentropy', optimizer='rmsprop',
metrics=['accuracy'])
# evaluate test accuracy
score = model.evaluate(X_test, y_test, verbose=0)
accuracy = 100*score[1]
# print test accuracy
print('Test accuracy: %.4f%%' % accuracy)
Test accuracy: 11.4900%
from keras.callbacks import ModelCheckpoint
# train the model
checkpointer = ModelCheckpoint(filepath='mnist.model.best.hdf5',
verbose=1, save_best_only=True)
hist = model.fit(X_train, y_train, batch_size=128, epochs=10,
validation_split=0.2, callbacks=[checkpointer],
verbose=1, shuffle=True)
Train on 48000 samples, validate on 12000 samples Epoch 1/10 47744/48000 [============================>.] - ETA: 0s - loss: 0.2740 - acc: 0.9170Epoch 00000: val_loss improved from inf to 0.15338, saving model to mnist.model.best.hdf5 48000/48000 [==============================] - 15s - loss: 0.2735 - acc: 0.9171 - val_loss: 0.1534 - val_acc: 0.9532 Epoch 2/10 47872/48000 [============================>.] - ETA: 0s - loss: 0.1120 - acc: 0.9660Epoch 00001: val_loss improved from 0.15338 to 0.10635, saving model to mnist.model.best.hdf5 48000/48000 [==============================] - 15s - loss: 0.1120 - acc: 0.9660 - val_loss: 0.1064 - val_acc: 0.9688 Epoch 3/10 47744/48000 [============================>.] - ETA: 0s - loss: 0.0790 - acc: 0.9753Epoch 00002: val_loss improved from 0.10635 to 0.09692, saving model to mnist.model.best.hdf5 48000/48000 [==============================] - 15s - loss: 0.0788 - acc: 0.9754 - val_loss: 0.0969 - val_acc: 0.9729 Epoch 4/10 47872/48000 [============================>.] - ETA: 0s - loss: 0.0614 - acc: 0.9807Epoch 00003: val_loss did not improve 48000/48000 [==============================] - 15s - loss: 0.0615 - acc: 0.9806 - val_loss: 0.1038 - val_acc: 0.9751 Epoch 5/10 47744/48000 [============================>.] - ETA: 0s - loss: 0.0506 - acc: 0.9844Epoch 00004: val_loss improved from 0.09692 to 0.09438, saving model to mnist.model.best.hdf5 48000/48000 [==============================] - 14s - loss: 0.0505 - acc: 0.9844 - val_loss: 0.0944 - val_acc: 0.9762 Epoch 6/10 47744/48000 [============================>.] - ETA: 0s - loss: 0.0437 - acc: 0.9865Epoch 00005: val_loss improved from 0.09438 to 0.09354, saving model to mnist.model.best.hdf5 48000/48000 [==============================] - 17s - loss: 0.0436 - acc: 0.9865 - val_loss: 0.0935 - val_acc: 0.9783 Epoch 7/10 47872/48000 [============================>.] - ETA: 0s - loss: 0.0358 - acc: 0.9890Epoch 00006: val_loss did not improve 48000/48000 [==============================] - 17s - loss: 0.0358 - acc: 0.9890 - val_loss: 0.0978 - val_acc: 0.9783 Epoch 8/10 47744/48000 [============================>.] - ETA: 0s - loss: 0.0360 - acc: 0.9891Epoch 00007: val_loss improved from 0.09354 to 0.09142, saving model to mnist.model.best.hdf5 48000/48000 [==============================] - 14s - loss: 0.0359 - acc: 0.9891 - val_loss: 0.0914 - val_acc: 0.9802 Epoch 9/10 47872/48000 [============================>.] - ETA: 0s - loss: 0.0308 - acc: 0.9907Epoch 00008: val_loss did not improve 48000/48000 [==============================] - 14s - loss: 0.0309 - acc: 0.9906 - val_loss: 0.1176 - val_acc: 0.9778 Epoch 10/10 47744/48000 [============================>.] - ETA: 0s - loss: 0.0266 - acc: 0.9920Epoch 00009: val_loss did not improve 48000/48000 [==============================] - 12s - loss: 0.0268 - acc: 0.9919 - val_loss: 0.1140 - val_acc: 0.9775
# load the weights that yielded the best validation accuracy
model.load_weights('mnist.model.best.hdf5')
# evaluate test accuracy
score = model.evaluate(X_test, y_test, verbose=0)
accuracy = 100*score[1]
# print test accuracy
print('Test accuracy: %.4f%%' % accuracy)
Test accuracy: 98.3300%