Using the Keras library to import the MNIST dataset due to the simplicity over other libraries (e.x. CNTK and TensorFlow) <img src="https://upload.wikimedia.org/wikipedia/commons/2/27/MnistExamples.png", width=400px>
import keras
from keras import backend as K
import matplotlib.pyplot as plt
%matplotlib inline
# Loading the data, shuffled and split between train and test sets
(X_train, y_train), (X_test, y_test) = keras.datasets.mnist.load_data()
# Input image dimensions
img_rows, img_cols = 28, 28
num_channels = 1
# Ensuring the channels are in the correct
if K.image_data_format() == 'channels_first':
X_train = X_train.reshape(X_train.shape[0], num_channels, img_rows, img_cols)
X_test = X_test.reshape(X_test.shape[0], num_channels, img_rows, img_cols)
input_shape = (num_channels, img_rows, img_cols)
else:
X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, num_channels)
X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, num_channels)
input_shape = (img_rows, img_cols, num_channels)
print('X_train shape:', X_train.shape)
print(X_train.shape[0], 'Training samples')
print(X_test.shape[0], 'Testing samples')
Using TensorFlow backend.
X_train shape: (60000, 28, 28, 1) 60000 Training samples 10000 Testing samples
import sys
import time
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import advanced_activations
from keras.layers.normalization import BatchNormalization
from keras import callbacks
from keras import backend as K
print(time.strftime('%Y/%m/%d %H:%M'))
print('OS:', sys.platform)
print('Python:', sys.version)
print('NumPy:', np.__version__)
print('Keras:', keras.__version__)
# Printing backend and GPU information
if keras.backend.backend() == 'tensorflow':
import tensorflow as tf
from tensorflow.python.client import device_lib
print('Backend: TensorFlow', tf.__version__)
local_device_protos = device_lib.list_local_devices()
print([x for x in local_device_protos if x.device_type == 'GPU'])
# Avoiding memory issues with the GPU
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
K.set_session(sess)
elif keras.backend.backend() == 'cntk':
import cntk as C
print('Backend: CNTK', C.__version__)
print('GPU: ', C.gpu(0))
# Setting data types and normalizing the images
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
print('X_train shape:', X_train.shape)
print(X_train.shape[0], 'Training samples')
print(X_test.shape[0], 'Testing samples')
# Model settings
batch_size = 128
num_classes = 10
epochs = 12
# Converting class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
# Beginning model building
model = Sequential()
# Layer 1 - Conv (5x5)
model.add(Conv2D(32, kernel_size=(5, 5), input_shape=input_shape))
model.add(advanced_activations.LeakyReLU(alpha=0.03))
# Layer 2 - Conv (5x5) & Max Pooling
model.add(Conv2D(32, kernel_size=(5, 5)))
model.add(advanced_activations.LeakyReLU(alpha=0.03))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.5))
# Layer 3 - Conv (3x3)
model.add(Conv2D(64, kernel_size=(3, 3)))
model.add(advanced_activations.LeakyReLU(alpha=0.03))
# Layer 4 - Conv (3x3) & Max Pooling
model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
model.add(advanced_activations.LeakyReLU(alpha=0.03))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.5))
# Layer 5 - FC 1024
model.add(Flatten())
model.add(Dense(1024, activation='relu'))
# Layer 6 - FC 1024
model.add(Dense(1024, activation='relu'))
# Output Layer
model.add(Dense(num_classes, activation='softmax'))
# Defining loss function, optimizer, and metrics to report
model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=keras.optimizers.Adadelta(),
metrics=['accuracy'])
# Summary of the model before fitting
model.summary()
### Setting up callbacks
# Early stopping
earlystop = callbacks.EarlyStopping(monitor='val_loss',
min_delta=0.0001, # Amount counting as an improvement
patience=5, # Number of epochs before stopping
verbose=1,
mode='auto')
# Tracking the training time for each epoch
class TimeHistory(callbacks.Callback):
'''
Tracks training time on individual epochs for a Keras model
'''
def on_train_begin(self, logs={}):
self.times = []
def on_epoch_begin(self, batch, logs={}):
self.epoch_time_start = time.time()
def on_epoch_end(self, batch, logs={}):
self.times.append(time.time() - self.epoch_time_start)
time_callback = TimeHistory() # Gives training time for all epochs
# Model checkpoints - saves the model with the best validation loss
model_filepath = 'model.val_loss{val_loss:.5f}_epoch{epoch:04d}-.h5'
checkpoint = callbacks.ModelCheckpoint(model_filepath, monitor='val_loss',
save_best_only=True)
# Reducing the learning rate if training loss does not increase
learning_rate_redux = callbacks.ReduceLROnPlateau(monitor='loss',
patience=3, # Reduce after 3 epochs
verbose=1,
factor=0.3, # Reduce to 1/3
min_lr=0.00001)
# Fitting the model
model_info = model.fit(X_train, y_train,
epochs=epochs,
batch_size=batch_size, verbose=1,
validation_split=0.1, # Uses last 10% of data (not shuffled) for validation
callbacks=[earlystop, checkpoint, learning_rate_redux, time_callback])
# Getting test information
score, acc = model.evaluate(X_test, y_test,
batch_size=batch_size)
print()
print('Test score:', score)
print('Test accuracy:', acc)
# Saving the model
# model.save('model.h5')
# Reporting total training time
total_training_time = round(sum(time_callback.times))
print('Total training time for {0} epochs: {1} seconds'.format(epochs, total_training_time))
2018/04/28 20:31 OS: win32 Python: 3.6.2 |Anaconda custom (64-bit)| (default, Jul 20 2017, 12:30:02) [MSC v.1900 64 bit (AMD64)] NumPy: 1.12.1 Keras: 2.0.6 Backend: TensorFlow 1.3.0 [name: "/gpu:0" device_type: "GPU" memory_limit: 1524796620 locality { bus_id: 1 } incarnation: 17807082595901432932 physical_device_desc: "device: 0, name: GeForce GTX 680, pci bus id: 0000:01:00.0" ] X_train shape: (60000, 28, 28, 1) 60000 Training samples 10000 Testing samples _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= conv2d_1 (Conv2D) (None, 24, 24, 32) 832 _________________________________________________________________ leaky_re_lu_1 (LeakyReLU) (None, 24, 24, 32) 0 _________________________________________________________________ conv2d_2 (Conv2D) (None, 20, 20, 32) 25632 _________________________________________________________________ leaky_re_lu_2 (LeakyReLU) (None, 20, 20, 32) 0 _________________________________________________________________ max_pooling2d_1 (MaxPooling2 (None, 10, 10, 32) 0 _________________________________________________________________ dropout_1 (Dropout) (None, 10, 10, 32) 0 _________________________________________________________________ conv2d_3 (Conv2D) (None, 8, 8, 64) 18496 _________________________________________________________________ leaky_re_lu_3 (LeakyReLU) (None, 8, 8, 64) 0 _________________________________________________________________ conv2d_4 (Conv2D) (None, 6, 6, 64) 36928 _________________________________________________________________ leaky_re_lu_4 (LeakyReLU) (None, 6, 6, 64) 0 _________________________________________________________________ max_pooling2d_2 (MaxPooling2 (None, 3, 3, 64) 0 _________________________________________________________________ dropout_2 (Dropout) (None, 3, 3, 64) 0 _________________________________________________________________ flatten_1 (Flatten) (None, 576) 0 _________________________________________________________________ dense_1 (Dense) (None, 1024) 590848 _________________________________________________________________ dense_2 (Dense) (None, 1024) 1049600 _________________________________________________________________ dense_3 (Dense) (None, 10) 10250 ================================================================= Total params: 1,732,586 Trainable params: 1,732,586 Non-trainable params: 0 _________________________________________________________________ Train on 54000 samples, validate on 6000 samples Epoch 1/12 54000/54000 [==============================] - 14s - loss: 0.4236 - acc: 0.8607 - val_loss: 0.0711 - val_acc: 0.9762 Epoch 2/12 54000/54000 [==============================] - 13s - loss: 0.1124 - acc: 0.9646 - val_loss: 0.0471 - val_acc: 0.9848 Epoch 3/12 54000/54000 [==============================] - 12s - loss: 0.0829 - acc: 0.9743 - val_loss: 0.0425 - val_acc: 0.9877 Epoch 4/12 54000/54000 [==============================] - 13s - loss: 0.0687 - acc: 0.9779 - val_loss: 0.0324 - val_acc: 0.9900 Epoch 5/12 54000/54000 [==============================] - 13s - loss: 0.0605 - acc: 0.9812 - val_loss: 0.0292 - val_acc: 0.9917 Epoch 6/12 54000/54000 [==============================] - 12s - loss: 0.0519 - acc: 0.9836 - val_loss: 0.0300 - val_acc: 0.9917 Epoch 7/12 54000/54000 [==============================] - 12s - loss: 0.0487 - acc: 0.9846 - val_loss: 0.0344 - val_acc: 0.9903 Epoch 8/12 54000/54000 [==============================] - 12s - loss: 0.0464 - acc: 0.9854 - val_loss: 0.0283 - val_acc: 0.9920 Epoch 9/12 54000/54000 [==============================] - 12s - loss: 0.0424 - acc: 0.9869 - val_loss: 0.0256 - val_acc: 0.9927 Epoch 10/12 54000/54000 [==============================] - 12s - loss: 0.0405 - acc: 0.9874 - val_loss: 0.0269 - val_acc: 0.9933 Epoch 11/12 54000/54000 [==============================] - 12s - loss: 0.0370 - acc: 0.9883 - val_loss: 0.0283 - val_acc: 0.9920 Epoch 12/12 54000/54000 [==============================] - 12s - loss: 0.0354 - acc: 0.9895 - val_loss: 0.0265 - val_acc: 0.9932 10000/10000 [==============================] - 0s Test score: 0.0188353575279 Test accuracy: 0.9942 Total training time for 12 epochs: 157 seconds
def plot_model_loss_acc(model_history):
'''
Plots the accuracy and the loss for the training and
validation sets by epoch
'''
fig, axs = plt.subplots(1,2,figsize=(15,5))
# Accuracy plot
axs[0].plot(range(1, len(model_history.history['acc'])+1),
model_history.history['acc'])
axs[0].plot(range(1, len(model_history.history['val_acc'])+1),
model_history.history['val_acc'])
axs[0].set_title('Model Accuracy')
axs[0].set_ylabel('Accuracy')
axs[0].set_xlabel('Epoch')
axs[0].set_xticks(np.arange(1, len(model_history.history['acc'])+1),
len(model_history.history['acc'])/10)
axs[0].legend(['train', 'val'], loc='best')
# Loss plot
axs[1].plot(range(1, len(model_history.history['loss'])+1),
model_history.history['loss'])
axs[1].plot(range(1, len(model_history.history['val_loss'])+1),
model_history.history['val_loss'])
axs[1].set_title('Model Loss')
axs[1].set_ylabel('Loss')
axs[1].set_xlabel('Epoch')
axs[1].set_xticks(np.arange(1, len(model_history.history['loss'])+1),
len(model_history.history['loss'])/10)
axs[1].legend(['train', 'val'], loc='best')
plt.show()
plot_model_loss_acc(model_info)
Using Keras functionality for training on augmented data
from keras.preprocessing.image import ImageDataGenerator
datagenerator = ImageDataGenerator(
rotation_range=360,
width_shift_range=0.2,
height_shift_range=0.2,
rescale=None,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,
vertical_flip=True,
fill_mode='nearest')
datagenerator.fit(X_train)
# Fit the model on the batches generated by datagen.flow().
model_info = model.fit_generator(generator=datagenerator.flow(X_train, y_train,
batch_size=batch_size,
shuffle=True),
steps_per_epoch=10*round(X_train.shape[0] / batch_size),
epochs=epochs,
validation_data=(X_val, y_val),
verbose=1,
callbacks=[time_callback, # Gives epoch training times
earlystop,
callbacks.ModelCheckpoint('model.h5', save_best_only=True)])
Splitting the training dataset into training and validation since Keras automatically does this when fitting the model
from sklearn.model_selection import train_test_split
X_train, X_validation, y_train, y_validation = train_test_split(X_train, y_train, test_size=.2)
To-do:
import sys
import time
import numpy as np
import cntk as C
from cntk.layers import Convolution2D, MaxPooling, Dropout, Dense
print(time.strftime('%Y/%m/%d %H:%M'))
print('OS:', sys.platform)
print('Python:', sys.version)
print('NumPy:', np.__version__)
print('CNTK:', C.__version__)
print('GPU:', C.gpu(0))
2018/03/22 18:00 OS: win32 Python: 3.6.2 |Anaconda custom (64-bit)| (default, Jul 20 2017, 12:30:02) [MSC v.1900 64 bit (AMD64)] NumPy: 1.13.3 CNTK: 2.3.1 GPU: GPU[0] GeForce GTX 680
# Define the data dimensions
input_dim_model = (1, 28, 28) # Images are 28 x 28 with 1 channel of color (gray)
input_dim = 28*28 # used by readers to treat input data as a vector
num_output_classes = 10
X = C.input_variable(input_dim_model)
y = C.input_variable(num_output_classes)
def create_model(input_vars, out_dims=10, dropout_prob=0.0):
with C.layers.default_options(init=C.glorot_uniform(), activation=C.relu):
conv1 = Convolution2D(filter_shape=(5, 5),
num_filters=8,
strides=(2, 2),
pad=True,
init_bias=0.1,
name='conv1')(input_vars)
pooling1 = MaxPooling((2, 2),
strides=(2, 2),
pad=True)(conv1)
conv2 = Convolution2D(filter_shape=(5, 5),
num_filters=64,
strides=1,
pad=True,
init_bias=0.1,
name='conv2')(pooling1)
pooling2 = MaxPooling((2, 2),
strides=(2, 2),
pad=True)(conv2)
fc1 = Dense(1024,
init_bias=0.1,
name='fc1')(pooling2)
dropout1 = Dropout(dropout_prob)(fc1)
output_layer = Dense(out_dims,
activation=None,
init_bias=0.1,
name='classify')(dropout1)
return output_layer
# Defining the input to the neural network
input_vars = C.input_variable(input_dim, np.float32)
# Create the model
output = create_model(X)
# Print the output shapes / parameters of different components
print('Output Shape of the first convolution layer:', output.conv1.shape)
print('Bias value of the last dense layer:', output.classify.b.value)
# Number of parameters in the network
C.logging.log_number_of_parameters(output)
### Setting up the trainer
# Define the label as the other input parameter of the trainer
labels = C.input_variable(num_output_classes, np.float32)
# Initialize the parameters for the trainer
train_minibatch_size = 50
# lr_schedule = C.learners.learning_parameter_schedule_per_sample(lr_per_sample, epoch_size=epoch_size)
momentum = 0.9
# Define the loss function
loss = C.cross_entropy_with_softmax(output, labels)
# Define the function that calculates classification error
label_error = C.classification_error(output, labels)
# Instantiate the trainer object to drive the model training
# learner = C.learners.momentum_sgd(output.parameters, lr_schedule, momentum)
# trainer = C.Trainer(output, loss, label_error, [learner])
Output Shape of the first convolution layer: (8, 14, 14) Bias value of the last dense layer: [ 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1] Training 1072922 parameters in 8 parameter tensors.
Below is non-working code being saved here so I can use pull this notebook on other machines
LR = 0.01
MOMENTUM = 0.9
batch_size = 128
def shuffle_data(X, y):
s = np.arange(len(X))
np.random.shuffle(s)
X = X[s]
y = y[s]
return X, y
def yield_mb(X, y, batchsize=128, shuffle=False):
if shuffle:
X, y = shuffle_data(X, y)
# Only complete batches are submitted
for i in range(len(X) // batchsize):
yield X[i * batchsize:(i + 1) * batchsize], y[i * batchsize:(i + 1) * batchsize]
# def create_symbol(n_classes=10):
# # Weight initialiser from uniform distribution
# # Activation (unless states) is None
# with C.layers.default_options(init = C.glorot_uniform(), activation = C.relu):
# x = Convolution2D(filter_shape=(5, 5), num_filters=32, pad=True)(features)
# x = Convolution2D(filter_shape=(5, 5), num_filters=32, pad=True)(x)
# x = MaxPooling((2, 2), strides=(2, 2), pad=False)(x)
# x = Dropout(0.25)(x)
# x = Convolution2D(filter_shape=(3, 3), num_filters=100, pad=True)(x)
# x = Convolution2D(filter_shape=(3, 3), num_filters=100, pad=True)(x)
# x = MaxPooling((2, 2), strides=(2, 2), pad=False)(x)
# x = Dropout(0.25)(x)
# x = Dense(512)(x)
# x = Dropout(0.5)(x)
# x = Dense(n_classes, activation=None)(x)
# return x
# X = C.input_variable(input_dim_model)
# y = C.input_variable(num_output_classes)
def create_symbol(out_dims=10, dropout_prob=0.0):
with C.layers.default_options(init=C.glorot_uniform(), activation=C.relu):
conv1 = Convolution2D(filter_shape=(5, 5),
num_filters=8,
strides=(2, 2),
pad=True,
init_bias=0.1,
name='conv1')(features)
pooling1 = MaxPooling((2, 2),
strides=(2, 2),
pad=True)(conv1)
conv2 = Convolution2D(filter_shape=(5, 5),
num_filters=64,
strides=1,
pad=True,
init_bias=0.1,
name='conv2')(pooling1)
pooling2 = MaxPooling((2, 2),
strides=(2, 2),
pad=True)(conv2)
fc1 = Dense(1024,
init_bias=0.1,
name='fc1')(pooling2)
dropout1 = Dropout(dropout_prob)(fc1)
output_layer = Dense(out_dims,
activation=None,
init_bias=0.1,
name='classify')(dropout1)
return output_layer
def init_model(m, labels, lr=LR, momentum=MOMENTUM):
# Loss (dense labels); check if support for sparse labels
loss = C.cross_entropy_with_softmax(m, labels)
# Momentum SGD
# https://github.com/Microsoft/CNTK/blob/master/Manual/Manual_How_to_use_learners.ipynb
# unit_gain=False: momentum_direction = momentum*old_momentum_direction + gradient
# if unit_gain=True then ...(1-momentum)*gradient
learner = C.momentum_sgd(m.parameters,
lr=C.learning_rate_schedule(lr, C.UnitType.minibatch),
momentum=C.momentum_schedule(momentum),
unit_gain=False)
trainer = C.Trainer(m, (loss, C.classification_error(m, labels)), [learner])
return trainer
# Placeholders
# [128 x 28 x 28 x 1
# features = C.input_variable((3, 32, 32), np.float32)
features = C.input_variable((28, 28), np.float32)
labels = C.input_variable(10, np.float32)
# Load symbol
sym = create_symbol()
trainer = init_model(sym, labels)
y_train_onehot = np.eye(len(np.unique(y_train)))[y_train]
# Main training loop: 53s
for j in range(3):
for data, label in yield_mb(X_train.astype('float32'), y_train_onehot.astype('float32'), batch_size, shuffle=True):
trainer.train_minibatch({features: data, labels: label})
# Log (this is just last batch in epoch, not average of batches)
eval_error = trainer.previous_minibatch_evaluation_average
print("Epoch %d | Accuracy: %.6f" % (j+1, (1-eval_error)))
# learner = C.sgd(model.parameters,
# C.learning_parameter_schedule(0.1))
# progress_writer = C.logging.ProgressPrinter(0)
# train_summary = loss.train((X_train_lr, Y_train_lr), parameter_learners=[learner],
# callbacks=[progress_writer])
To-do:
To-do:
import sys
import time
import numpy as np
import torch
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms
from torch.autograd import Variable
print(time.strftime('%Y/%m/%d %H:%M'))
print('OS:', sys.platform)
print('Python:', sys.version)
print('NumPy:', np.__version__)
print('PyTorch:', torch.__version__)
if torch.cuda.is_available() == True:
print('GPU:', torch.cuda.current_device())
# Checking if there is a GPU and assigning it to a variable
if torch.cuda.is_available() == True:
gpu = True
else:
gpu = False
# Hyper Parameters
num_epochs = 5
batch_size = 100
learning_rate = 0.001
# MNIST Dataset
train_dataset = dsets.MNIST(root='./data/',
train=True,
transform=transforms.ToTensor(),
download=True)
test_dataset = dsets.MNIST(root='./data/',
train=False,
transform=transforms.ToTensor())
# Data Loader (Input Pipeline)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
batch_size=batch_size,
shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
batch_size=batch_size,
shuffle=False)
# CNN Model (2 conv layer)
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(1, 16, kernel_size=5, padding=2),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2))
self.layer2 = nn.Sequential(
nn.Conv2d(16, 32, kernel_size=5, padding=2),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2))
self.fc = nn.Linear(7*7*32, 10)
def forward(self, x):
out = self.layer1(x)
out = self.layer2(out)
out = out.view(out.size(0), -1)
out = self.fc(out)
return out
cnn = CNN()
if gpu == True:
cnn.cuda()
# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(cnn.parameters(), lr=learning_rate)
# Train the Model
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
if gpu == True:
images = Variable(images).cuda()
labels = Variable(labels).cuda()
else:
images = Variable(images)
labels = Variable(labels)
# Forward + Backward + Optimize
optimizer.zero_grad()
outputs = cnn(images)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
if (i+1) % 100 == 0:
print ('Epoch [%d/%d], Iter [%d/%d] Loss: %.4f'
%(epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss.data[0]))
# Test the Model
cnn.eval() # Change model to 'eval' mode (BN uses moving mean/var).
correct = 0
total = 0
for images, labels in test_loader:
if gpu == True:
images = Variable(images).cuda()
else:
images = Variable(images)
outputs = cnn(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum()
print('Test Accuracy of the model on the 10000 test images: %d %%' % (100.0 * correct / total))
# Save the Trained Model
# torch.save(cnn.state_dict(), 'cnn.pkl')
2018/02/06 13:20 OS: linux Python: 3.5.2 (default, Aug 18 2017, 17:48:00) [GCC 5.4.0 20160609] NumPy: 1.14.0 PyTorch: 0.3.0.post4 Epoch [1/5], Iter [100/600] Loss: 0.1559 Epoch [1/5], Iter [200/600] Loss: 0.2541 Epoch [1/5], Iter [300/600] Loss: 0.1078 Epoch [1/5], Iter [400/600] Loss: 0.0419 Epoch [1/5], Iter [500/600] Loss: 0.0725 Epoch [1/5], Iter [600/600] Loss: 0.1366 Epoch [2/5], Iter [100/600] Loss: 0.0406 Epoch [2/5], Iter [200/600] Loss: 0.0233 Epoch [2/5], Iter [300/600] Loss: 0.0631 Epoch [2/5], Iter [400/600] Loss: 0.0380 Epoch [2/5], Iter [500/600] Loss: 0.0049 Epoch [2/5], Iter [600/600] Loss: 0.0495 Epoch [3/5], Iter [100/600] Loss: 0.0103 Epoch [3/5], Iter [200/600] Loss: 0.0652 Epoch [3/5], Iter [300/600] Loss: 0.0153 Epoch [3/5], Iter [400/600] Loss: 0.0650 Epoch [3/5], Iter [500/600] Loss: 0.0205 Epoch [3/5], Iter [600/600] Loss: 0.0374 Epoch [4/5], Iter [100/600] Loss: 0.0126 Epoch [4/5], Iter [200/600] Loss: 0.0348 Epoch [4/5], Iter [300/600] Loss: 0.0123 Epoch [4/5], Iter [400/600] Loss: 0.0465 Epoch [4/5], Iter [500/600] Loss: 0.0342 Epoch [4/5], Iter [600/600] Loss: 0.0205 Epoch [5/5], Iter [100/600] Loss: 0.0088 Epoch [5/5], Iter [200/600] Loss: 0.0023 Epoch [5/5], Iter [300/600] Loss: 0.0108 Epoch [5/5], Iter [400/600] Loss: 0.0845 Epoch [5/5], Iter [500/600] Loss: 0.0307 Epoch [5/5], Iter [600/600] Loss: 0.0223 Test Accuracy of the model on the 10000 test images: 98 %
import sys
import time
from datetime import datetime
import numpy as np
import tensorflow as tf
print(time.strftime('%Y/%m/%d %H:%M'))
print('OS:', sys.platform)
print('Python:', sys.version)
print('NumPy:', np.__version__)
print('TensorFlow:', tf.__version__)
# Checking tensorflow processing devices
from tensorflow.python.client import device_lib
local_device_protos = device_lib.list_local_devices()
print([x for x in local_device_protos if x.device_type == 'GPU'])
# Avoiding memory issues with the GPU
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
image_size = 28
num_labels = 10
num_channels = 1
def reformat(dataset, labels):
dataset = dataset.reshape((-1, image_size * image_size)).astype(np.float32)
labels = (np.arange(num_labels) == labels[:, None]).astype(np.float32)
return dataset, labels
def accuracy(predictions, labels):
return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
/ predictions.shape[0])
X_train, y_train = reformat(X_train, y_train)
X_validation, y_validation = reformat(X_validation, y_validation)
X_test, y_test = reformat(X_test, y_test)
print('Reformatted data shapes:')
print('Training set', X_train.shape, y_train.shape)
print('Validation set', X_validation.shape, y_validation.shape)
print('Test set', X_test.shape, y_test.shape)
# Training Parameters
learning_rate = 0.001
num_steps = y_train.shape[0] + 1 # 200,000 per epoch
batch_size = 128
epochs = 12
display_step = 250 # To print progress
# Network Parameters
num_input = 784 # Data input (image shape: 28x28)
num_classes = 10 # Total classes (10 characters)
num_hidden = 1024
# Defining the model and graph
sgd_hidden_graph = tf.Graph()
with sgd_hidden_graph.as_default():
tf_train_dataset = tf.placeholder(
tf.float32, shape=(batch_size, image_size * image_size))
tf_train_labels = tf.placeholder(
tf.float32, shape=(batch_size, num_labels))
tf_valid_dataset = tf.constant(X_validation.astype('float32'))
tf_test_dataset = tf.constant(X_test, name='test_data')
w0 = tf.Variable(tf.truncated_normal(
[image_size * image_size, num_hidden]), name='W0')
w1 = tf.Variable(tf.truncated_normal([num_hidden, num_labels]), name='W1')
b0 = tf.Variable(tf.zeros([num_hidden]), name='b0')
b1 = tf.Variable(tf.zeros([num_labels]), name='b1')
def reluLayer(dataset):
return tf.nn.relu(tf.matmul(dataset, w0) + b0)
def logitLayer(dataset):
return tf.matmul(reluLayer(dataset), w1) + b1
sgd_hidden_loss = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(
labels=tf_train_labels,
logits=logitLayer(tf_train_dataset)))
sgd_hidden_optimizer = tf.train.GradientDescentOptimizer(
0.5).minimize(sgd_hidden_loss)
sgd_hidden_train_prediction = tf.nn.softmax(
logitLayer(tf_train_dataset), name='train_predictor')
sgd_hidden_valid_prediction = tf.nn.softmax(
logitLayer(tf_valid_dataset), name='validate_predictor')
sgd_hidden_test_prediction = tf.nn.softmax(
logitLayer(tf_test_dataset), name='test_predictor')
# Creating the graph and running the model
with tf.Session(graph=sgd_hidden_graph) as sgd_hidden_session:
saver = tf.train.Saver()
tf.global_variables_initializer().run()
# For tracking execution time and progress
start_time = time.time()
total_steps = 0
print('\nInitialized\n')
try:
for epoch in range(1, epochs + 1):
print('Beginning Epoch {0} -'.format(epoch))
for step in range(num_steps):
offset = (step * batch_size) % (y_train.shape[0] - batch_size)
batch_data = X_train[offset:(offset + batch_size), :]
batch_labels = y_train[offset:(
offset + batch_size), :].reshape(batch_size, num_labels)
feed_dict = {tf_train_dataset: batch_data,
tf_train_labels: batch_labels}
_, l, sgd_hidden_predictions = sgd_hidden_session.run(
[sgd_hidden_optimizer, sgd_hidden_loss, sgd_hidden_train_prediction],
feed_dict=feed_dict)
if (step % 1000 == 0) or (step == num_steps):
# Calculating percentage of completion
total_steps += step
pct_epoch = (step / float(num_steps)) * 100
pct_total = (total_steps / float(num_steps *
(epochs + 1))) * 100 # Fix this line
# Printing progress
print('Epoch %d Step %d (%.2f%% epoch, %.2f%% total)' %
(epoch, step, pct_epoch, pct_total))
print('------------------------------------')
print('Minibatch loss: %f' % l)
print("Minibatch accuracy: %.1f%%" %
accuracy(sgd_hidden_predictions, batch_labels))
print("Validation accuracy: %.1f%%" % accuracy(sgd_hidden_valid_prediction.eval(),
y_validation))
print(datetime.now())
print('Total execution time: %.2f minutes' %
((time.time() - start_time) / 60.))
print()
print("Test accuracy: %.1f%%" % accuracy(
sgd_hidden_test_prediction.eval(), y_test.astype('int32')))
except KeyboardInterrupt:
print('Training manually ended')
# saver.save(sgd_hidden_session, '{}/model'.format(Path.cwd()))
2018/03/20 17:36 OS: win32 Python: 3.6.2 |Anaconda custom (64-bit)| (default, Jul 20 2017, 12:30:02) [MSC v.1900 64 bit (AMD64)] NumPy: 1.13.3 TensorFlow: 1.3.0 [name: "/gpu:0" device_type: "GPU" memory_limit: 1524796620 locality { bus_id: 1 } incarnation: 12763196243698338072 physical_device_desc: "device: 0, name: GeForce GTX 680, pci bus id: 0000:01:00.0" ] Reformatted data shapes: Training set (48000, 784) (48000, 10) Validation set (12000, 784) (12000, 10) Test set (10000, 784) (10000, 10) Initialized Beginning Epoch 1 - Epoch 1 Step 0 (0.00% epoch, 0.00% total) ------------------------------------ Minibatch loss: 62305.937500 Minibatch accuracy: 10.2% Validation accuracy: 10.2% 2018-03-20 17:36:26.054341 Total execution time: 0.01 minutes Epoch 1 Step 1000 (2.08% epoch, 0.16% total) ------------------------------------ Minibatch loss: 2.296496 Minibatch accuracy: 13.3% Validation accuracy: 10.5% 2018-03-20 17:36:29.352841 Total execution time: 0.06 minutes Epoch 1 Step 2000 (4.17% epoch, 0.48% total) ------------------------------------ Minibatch loss: 2.312936 Minibatch accuracy: 6.2% Validation accuracy: 10.8% 2018-03-20 17:36:32.736199 Total execution time: 0.12 minutes Epoch 1 Step 3000 (6.25% epoch, 0.96% total) ------------------------------------ Minibatch loss: 2.294709 Minibatch accuracy: 15.6% Validation accuracy: 10.8% 2018-03-20 17:36:36.377200 Total execution time: 0.18 minutes Epoch 1 Step 4000 (8.33% epoch, 1.60% total) ------------------------------------ Minibatch loss: 2.309429 Minibatch accuracy: 8.6% Validation accuracy: 10.2% 2018-03-20 17:36:39.849201 Total execution time: 0.24 minutes Epoch 1 Step 5000 (10.42% epoch, 2.40% total) ------------------------------------ Minibatch loss: 2.294353 Minibatch accuracy: 14.1% Validation accuracy: 10.8% 2018-03-20 17:36:43.187700 Total execution time: 0.29 minutes Epoch 1 Step 6000 (12.50% epoch, 3.37% total) ------------------------------------ Minibatch loss: 2.296662 Minibatch accuracy: 17.2% Validation accuracy: 9.8% 2018-03-20 17:36:46.425175 Total execution time: 0.34 minutes Epoch 1 Step 7000 (14.58% epoch, 4.49% total) ------------------------------------ Minibatch loss: 2.302536 Minibatch accuracy: 10.9% Validation accuracy: 10.8% 2018-03-20 17:36:49.893675 Total execution time: 0.40 minutes Training manually ended
Most of this script is from a competition submission script and needs to be cleaned up. Additionally, it contains code for creating a pickle file from images within subdirectories.
from six.moves import cPickle as pickle
from scipy import ndimage
import math
import numpy as np
import os
from pathlib import Path
from __future__ import print_function
import tensorflow as tf
from pathlib import Path
# Change this line to the saved model
modelFile = 'trainedModel.meta'
image_size = 28 # Pixel width and height.
num_labels = 10
pixel_depth = 255.0 # Number of levels per pixel.
total_images = sum([len(files) for r, d, files in os.walk("./data/")])
def accuracy(predictions, labels):
return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
/ predictions.shape[0])
def load_letter(folder, min_num_images):
"""Load the data for a single letter label."""
image_files = list(folder.iterdir())
dataset = np.ndarray(shape=(len(image_files), image_size, image_size),
dtype=np.float32)
print(folder)
num_images = 0
for image in image_files:
image_file = folder.joinpath(image)
try:
image_data = (ndimage.imread(image_file).astype(float) -
pixel_depth / 2) / pixel_depth
if image_data.shape != (image_size, image_size):
raise Exception('Unexpected image shape: %s' %
str(image_data.shape))
dataset[num_images, :, :] = image_data
num_images = num_images + 1
except IOError as e:
print('Could not read:', image_file,
':', e, '- it\'s ok, skipping.')
dataset = dataset[0:num_images, :, :]
if num_images < min_num_images:
raise Exception('Many fewer images than expected: %d < %d' %
(num_images, min_num_images))
print('Full dataset tensor:', dataset.shape)
print('Mean:', np.mean(dataset))
print('Standard deviation:', np.std(dataset))
return dataset
def maybe_pickle(data_folders, min_num_images_per_class, force=False):
dataset_names = []
data_folders = (i for i in data_folders if i.is_dir())
for folder in data_folders:
set_filename = str(folder) + '.pickle'
dataset_names.append(set_filename)
if os.path.exists(set_filename) and not force:
# You may override by setting force=True.
print('%s already present - Skipping pickling.' % set_filename)
else:
print('Pickling %s.' % set_filename)
dataset = load_letter(folder, min_num_images_per_class)
try:
with open(set_filename, 'wb') as f:
pickle.dump(dataset, f, pickle.HIGHEST_PROTOCOL)
except Exception as e:
print('Unable to save data to', set_filename, ':', e)
return dataset_names
def make_arrays(nb_rows, img_size):
if nb_rows:
dataset = np.ndarray((nb_rows, img_size, img_size), dtype=np.float32)
labels = np.ndarray(nb_rows, dtype=np.int32)
else:
dataset, labels = None, None
return dataset, labels
def randomize(dataset, labels):
permutation = np.random.permutation(labels.shape[0])
shuffled_dataset = dataset[permutation, :, :]
shuffled_labels = labels[permutation]
return shuffled_dataset, shuffled_labels
def reformat(dataset, labels):
dataset = dataset.reshape(
labels.shape[0], image_size, image_size, 1).astype(np.float32)
labels = (np.arange(num_labels) == labels[:, None]).astype(np.float32)
return dataset, labels
def merge_datasets(pickle_files, dataset_size):
"""
Merge multiple glyph pickle files into nd-array dataset and nd-array labels
for model evaluation.
Simplification from https://github.com/udacity/deep-learning
"""
num_classes = len(pickle_files)
dataset, labels = make_arrays(dataset_size, image_size)
size_per_class = dataset_size // num_classes
start_t = 0
for label, pickle_file in enumerate(pickle_files):
try:
with open(pickle_file, 'rb') as f:
letter_set = pickle.load(f)
end_t = start_t + size_per_class
np.random.shuffle(letter_set)
dataset[start_t:end_t, :, :] = letter_set[0:size_per_class]
labels[start_t:end_t] = label
start_t = end_t
except Exception as e:
print('Unable to process data from', pickle_file, ':', e)
raise
return dataset, labels
dir_name = 'data'
glyph_dir = Path.cwd().joinpath(dir_name)
test_folders = [glyph_dir.joinpath(i) for i in glyph_dir.iterdir()]
test_datasets = maybe_pickle(test_folders, 2) # provide only 2 samples for now
test_dataset, test_labels = merge_datasets(test_datasets, total_images)
test_dataset, test_labels = randomize(test_dataset, test_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Testing size', test_dataset.shape, test_labels.shape)
def classify(test_dataset, model_filename=modelFile, checkpoint_path=None):
"""
A sample classifier to unpickle a TensorFlow model and label the dataset.
There are magic strings in this function derived from to the model to evaluate.
Your implementation will likely have different tags that depend upon the model
implementation.
We pad the input test_dataset to make it at least as large as the model batch,
and repeat prediction on chunks of the input if it is larger than the model batch.
Args:
test_dataset: Expect an input of N*28*28*1 shape numpy array, where N is number of images and
28*28 is pixel width and hieght.
model_filename: optional file name stored by a previous TensorFlow session.
checkpoint_path: optional path for previous TensorFlow session.
Returns:
The #observations by #labels nd-array labelings.
"""
# Re-scaling the dataset to a similar scale as the training data in the notMNIST.pickle file
pixel_depth = 255.0
test_dataset = (test_dataset - 255.0 / 2) / 255
num_classes = 10
n = int(test_dataset.shape[0])
result = np.ndarray([n, num_classes], dtype=np.float32)
with tf.Session() as session:
saver = tf.train.import_meta_graph('./model/' + model_filename)
saver.restore(session, './model/' + model_filename.split('.')[0])
graph_predict = tf.get_default_graph()
test_predict = graph_predict.get_tensor_by_name(
'Softmax_1:0') # string from model
m = int(graph_predict.get_tensor_by_name(
'Placeholder:0').shape[0]) # string from model
for i in range(0, int(math.ceil(n / m))):
start = i * m
end = min(n, ((i + 1) * m))
x = np.zeros((128, 28, 28, 1)).astype(np.float32)
x[0:(end - start)] = test_dataset[start:end]
result[start:end] = test_predict.eval(
feed_dict={"Placeholder:0": x})[0:(end - start)]
return result
def testing_accuracy(data, labels):
"""
Generates predictions and returns the accuracy
"""
predictions = classify(data)
print("Test accuracy: %.1f%%" % accuracy(predictions, labels))
testing_accuracy((test_dataset), test_labels)