Neural Networks for Data Science Applications¶

Lab session 4: Model building with an application to OCR¶

Contents of the lab session:

Building CNNs with keras.Model and tf.layers.
Effectively using the TensorBoard for debugging.
Using Keras' ImageDataGenerator for handling datasets of images.
Keras callbacks for checkpointing and TensorBoard.

Virtual machine setup¶

In [0]:

# Enable the GPU runtime and restart the kernel
# Do it at the very beginning 

In [0]:

# The TensorFlow version currently available on Colab is the 1.15.0

# Install the new 2.0 version with GPU support
!pip install --quiet tensorflow-gpu==2.0.0

# < 400MB
# ignore the errors during the installation procedure

In [0]:

# Import TensorFlow and check the version
import tensorflow as tf
print(tf.__version__)

Download the dataset¶

In [0]:

# Download the dataset from the "In Codice Ratio" website
!wget http://www.inf.uniroma3.it/db/icr/dataset_icr.zip
# < 5MB

In [0]:

# Unzip the compressed file
!unzip -q dataset_icr.zip

In [0]:

# Have a look to the dataset main folder

!ls -1 dataset

# + We have a separate folder for each character
# + We have 23 classes
# + There are some duplicated letters (same letter, different shape) and a special "no_char" character

In [1]:

# If you want to read more about the project, check out the publications here:
# http://www.inf.uniroma3.it/db/icr/publications.html

Load the dataset¶

In [0]:

import tensorflow as tf

In [0]:

# Generate batches of images with real-time data augmentation
# Define the abstract image generator
image_generator = tf.keras.preprocessing.image.ImageDataGenerator(rotation_range=5, # up to ±5 degrees
                                                                  width_shift_range=0.05, # up to ±5% of the image
                                                                  height_shift_range=0.05, # up to ±5% of the image
                                                                  rescale=1.0/255, # convert from uint [0, 255] to float [0.0, 1.0]
                                                                  validation_split=0.2) # 20% of the samples for the validation set

# You can also make a more flexible (and more complex) solution using tf.data

In [0]:

# Define the specific data generator for our images
train_data_gen = image_generator.flow_from_directory(directory='dataset',
                                                     subset='training',                                                  
                                                     batch_size=32,
                                                     shuffle=True,
                                                     target_size=(56, 56))

In [0]:

test_data_gen = image_generator.flow_from_directory(directory='dataset',
                                                    subset='validation',                                                  
                                                    batch_size=32,
                                                    shuffle=False,
                                                    target_size=(56, 56))

Look at the dataset¶

"Become one with the data"
Andrej Karpathy

(rule n. 1 of A Recipe for Training Neural Networks)

In [0]:

# Plot one (original) image
import matplotlib.pyplot as plt
image = plt.imread('./dataset/a/1.png')
print('image shape:', image.shape)
plt.imshow(image, cmap='gray')

In [0]:

# Plot one (augmented) image
import matplotlib.pyplot as plt
x_batch, y_batch = next(train_data_gen)
image = x_batch[0]
label_one_hot = y_batch[0] # one-hot encoded
plt.imshow(image)
print('label (one-hot encoded):', label_one_hot)

In [ ]:

import numpy as np
class_dictionary = train_data_gen.class_indices
label = list(class_dictionary.keys())[np.argmax(label_one_hot)]
print('label:', label)

In [0]:

print(x_batch.shape)
# Note: Channel last

Download a CNN from keras.applications¶

In [0]:

# Download a pretrained neural network
from tensorflow.keras import applications
vgg = applications.vgg16.VGG16(weights='imagenet')

vgg.summary()

# ImageNet images are 224x224x3 images belonging to 1000 classes
# VGG: Oxford Visual Geometry Group

In [0]:

vgg = applications.vgg16.VGG16(weights='imagenet', include_top=False, input_shape=(56, 56, 3))
vgg.summary()

In [0]:

# Make all the layers not trainable
for layer in vgg.layers:
    layer.trainable = False

In [0]:

vgg.summary()
# Non-trainable parameters: 14714688

In [0]:

from tensorflow.keras import models, layers

def build_model():
  model = models.Sequential()
  # use the pretrained network as a feature extractor (not trainable)
  model.add(vgg)
  # add a linear classifier (single layer, trainable) on top of it
  model.add(layers.Flatten())
  model.add(layers.Dense(23, activation='softmax'))

  model.compile(loss='categorical_crossentropy',
                optimizer='adam',
                metrics=['accuracy'])

  return model

In [0]:

model = build_model()
model.summary()

In [0]:

tf.keras.utils.plot_model(model)

Basic fine-tuning (linear classifier only)¶

In [0]:

history = model.fit_generator(train_data_gen, epochs=10, shuffle=True)

# ~1.5 minutes for each epoch
# Very inefficient computing strategy. Why?
# Loss is decreasing, accuracy is increasing. What about overfitting?
# Train loss curve only at the end VS while the training is running

In [0]:

plt.plot(history.history['accuracy'])
plt.xlabel('epoch')
plt.ylabel('accuracy')

In [0]:

model.evaluate(test_data_gen)

Checkpointing¶

In [0]:

# Let's start again from the beginning
model = build_model()

In [0]:

from tensorflow.keras import callbacks
checkpoint_callback = callbacks.ModelCheckpoint(filepath='./checkpoints/model-{epoch:02d}.ckpt', 
                                                save_weights_only=True,
                                                save_freq='epoch',
                                                verbose=1)

In [0]:

model.fit_generator(train_data_gen,
                    epochs=10,
                    shuffle=True,
                    callbacks=[checkpoint_callback])

In [0]:

!ls ./checkpoints

In [0]:

latest_checkpoint = tf.train.latest_checkpoint('./checkpoints')
print(latest_checkpoint)

Load previous checkpoints¶

In [0]:

model = build_model() # start again from skratch
model.evaluate(test_data_gen)

# - Trained feature extractor (on ImageNet)
# - Untrained linear classifier

In [0]:

model = build_model()
model.load_weights(latest_checkpoint)
model.evaluate(test_data_gen)

# - Trained feature extractor (on ImageNet)
# - Trained linear classifier (on In Codice Ratio)

TensorBoard¶

In [0]:

model = build_model()

In [0]:

!pip install tensorboard==2.0.0

In [0]:

import tensorboard
print(tensorboard.__version__)

In [0]:

%load_ext tensorboard
#%reload_ext tensorboard

In [0]:

logdir = './logs'

In [0]:

%tensorboard --logdir=$logdir
#!kill 3510

In [0]:

tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir, update_freq='batch')#10

In [0]:

history = model.fit(train_data_gen, epochs=10, shuffle=True, validation_data=test_data_gen, callbacks=[checkpoint_callback, tensorboard_callback])

In [0]:

#!rm -r ./logs/*

In [0]:

#vgg = applications.vgg16.VGG16(include_top=False, input_shape=(56, 56, 3), weights=None)

In [0]:

shrinked_vgg = models.Sequential([layers.InputLayer(input_shape=(56, 56, 3)), 
                                  *vgg.layers[0:6], 
                                  layers.MaxPool2D(pool_size=(5, 5)), 
                                  layers.Flatten(), 
                                  layers.Dense(23, activation='softmax')])

In [0]:

shrinked_vgg.summary()

In [0]:

# Make all the layers trainable
for layer in shrinked_vgg.layers:
    layer.trainable = True

shrinked_vgg.summary()

In [0]:

shrinked_vgg.compile(loss='categorical_crossentropy',
                     optimizer='adam',
                     metrics=['accuracy'])

In [0]:

history = shrinked_vgg.fit(train_data_gen, 
                           epochs=10, 
                           shuffle=True, 
                           validation_data=test_data_gen, 
                           callbacks=[checkpoint_callback, tensorboard_callback])

In [0]:

# Homework: define a proper architecture and try to reach the highest possible validation accuracy