# Deep learning and Dogs vs. Cats - Part 2... Now with Convolutions!¶

## Data set https://www.kaggle.com/c/dogs-vs-cats¶

(the data set should be installed in two directories just above the one where this notebook is located: train and test1)

### http://ricardodeazambuja.com/deep_learning/2017/03/05/easy-peasy_conv_deep_learning_two/¶

In [2]:
# The package 'os' will be necessary to find the current
# working directory os.getcwd() and also to list all
# files in a directory os.listdir().
import os

# https://docs.python.org/2/library/multiprocessing.html
from multiprocessing import Pool

# Only used for the final test
import urllib, cStringIO

# Yep, I don't like to import numpy as np :P
import numpy

# As I've explained somewhere, Scipy will help us
# reading (using Pillow) and resizing images.
import scipy.misc

# SOMETHING NEW HERE!!!
# For our convolution playground
from scipy.signal import convolve2d

# Pyplot is what we use for plotting
import matplotlib.pyplot as plt

# Only for the 2D convolution video
import matplotlib.patches as patches

# And here is the Keras stuff:

# We don't really need this since it would
# be easy do code something like this ourselves
from keras.utils import np_utils

# https://keras.io/getting-started/sequential-model-guide/
from keras.models import Sequential

# For visualization of what is happening inside
# http://ricardodeazambuja.com/deep_learning/2017/02/12/easy-peasy_deep_learning_one_and_a_half/
from keras.models import Model

from keras.layers import Dense
from keras.layers import Activation
from keras.layers import Dropout

# SOMETHING NEW HERE!!!
# Things we will need for our CNN
from keras.layers import Convolution2D, MaxPooling2D, Flatten

# https://keras.io/optimizers/
from keras.optimizers import SGD

# While the method for saving a model is
# part of the model object, to load we need
# to import something else


## 1) Load the data sets¶

In [3]:
# Returns the current working directory
# (where the Python interpreter was called)
path = os.getcwd()

# The path separator used by the OS:
sep = os.path.sep

# This is the directory where the training images are located
dirname = "train"

# Generates a list of all images (actualy the filenames) from the training set,
# but it will also include the full path
imagePaths = [path+sep+dirname+sep+filename
for filename in os.listdir(path+sep+dirname)]

In [4]:
# To speed up things, I will use parallel computing!
# Pool lets me use its map method and select the number of parallel processes.

def import_training_set(args):
'''
Reads the image from imagePath, resizes
and returns it together with its label and
original index.

'''
index,imagePath,new_image_size = args

# Split will literally split a string at that character
# returning a list of strings.
# First we split according to the os.path.sep and keep
# only the last item from the list gererated ([-1]).
# This will give us the filename.
filename = imagePath.split(os.path.sep)[-1]

# Then we split it again using "."
# and extract the first item ([0]):
label = filename.split(".")[0]

# and the second item ([1]):
original_index = filename.split(".")[1]

# SOMETHING NEW HERE!!!
# Resizes the image (downsampling) to new_image_size
input_image = scipy.misc.imresize(image,new_image_size)
# Do exactly what it says: put the last axis on the first position.
# https://docs.scipy.org/doc/numpy/reference/generated/numpy.rollaxis.html
input_image = numpy.rollaxis(input_image, 2, 0)

# SOMETHING NEW HERE!!!
return (index,(original_index,label,input_image))

In [5]:
%%time
# The size of the resized image.
# After we apply the flatten method, it will become a list of 32x32x3 items (1024x3).
# (Where is the 'x3' coming from? Our images are composed of three colors!)
new_image_size = 32,32

number_of_parallel_processes = 7

# When the map is finished, we will receive a list with tuples:
# (index,(original_index,label,input_vector)) => (index,(original_index,'category',1D resized image))
#
# There's no guarantee about the ordering because they are running in parallel.
ans = Pool(number_of_parallel_processes).map(import_training_set,[(i,img,new_image_size)
for i,img in enumerate(imagePaths)])

CPU times: user 232 ms, sys: 96 ms, total: 328 ms
Wall time: 37.5 s

In [6]:
# Because import_training_set returns a tuple like this:
# (index,(original_index,label,input_vector))
# and index is unique, we can convert to a dictionary
# to solve our problem with unordered items:
training_set = dict(ans)

# Gives a hint to the garbage collector...
del ans

In [7]:
# Let's verify if we loaded all images:
len(training_set)

Out[7]:
25000
In [9]:
# I've never tried Keras with TensorFlow, but, as I understood, it will
# need to be changed to a shape (32,32,3)
training_set[0][2].shape

Out[9]:
(3, 32, 32)

## Let's load one image from training set to test some classic kernels using Scipy convolve2D¶

In [10]:
cat_img = scipy.misc.imread(path+sep+dirname+"/cat.666.jpg")/255. # /255. normalises (0 to 1)

In [11]:
plt.imshow(cat_img)
plt.show()

In [12]:
# Some kernels from:
# https://en.wikipedia.org/wiki/Kernel_(image_processing)
# Indentity
kernel_id = [
[0, 0, 0],
[0, 1, 0],
[0, 0, 0]
]
kernel_id = numpy.array(kernel_id,dtype=float)

# Edge detector
kernel_edge = [
[ 1, 0, -1],
[ 0, 0,  0],
[-1, 0,  1]
]
kernel_edge = numpy.array(kernel_edge,dtype=float)

# Blur
kernel_blur = [
[1, 1, 1],
[1, 1, 1],
[1, 1, 1]
]
kernel_blur = numpy.array(kernel_blur,dtype=float)
kernel_blur /= kernel_blur.sum() # normalises

In [13]:
cat_img.shape

Out[13]:
(333, 499, 3)
In [14]:
# First uses the indentity kernel
# (should keep the same image, since it's full of zeros and with one at the centre)
cat_img_conv_id = numpy.zeros(cat_img.shape)
for i in range(3):
cat_img_conv_id[:,:,i] = convolve2d(cat_img[:,:,i],kernel_id,
mode='same',
boundary='wrap')

# http://stackoverflow.com/a/12201744/7658422
# Simple way to convert to grayscale:
cat_img_gray = numpy.dot(cat_img[...,:3], [0.299, 0.587, 0.114])

# Now uses the edge detection (the image was converted to grayscale)
cat_img_conv_edge = convolve2d(cat_img_gray,kernel_edge,
mode='same',
boundary='wrap')

# Blur or loss-pass filter
cat_img_conv_blur = numpy.zeros(cat_img.shape)
for i in range(3):
cat_img_conv_blur[:,:,i] = convolve2d(cat_img[:,:,i],kernel_blur,
mode='same',
boundary='wrap')

In [15]:
plt.figure(figsize=(20,10))
plt.subplot(141)
plt.imshow(cat_img_conv_id)
plt.title("Identity Kernel => Original Image")

plt.subplot(142)
plt.imshow(cat_img_conv_edge,cmap='gray')
plt.title("Edge detection")

plt.subplot(143)
plt.imshow(numpy.abs(cat_img_conv_edge),cmap='gray')
plt.title("Edge detection (abs)")

plt.subplot(144)
plt.imshow(cat_img_conv_blur)
plt.title("Blurred")
plt.show()

In [16]:
# Now, let's get only the cat's face:
cat_face_gray_small = cat_img_gray[120:180,220:280]

plt.figure(figsize=(20,10))
plt.subplot(121)
plt.imshow(cat_face_gray_small,interpolation='none',cmap='gray')
plt.title("Original size")
plt.subplot(122)
plt.imshow(scipy.misc.imresize(cat_face_gray_small,(32,32)),interpolation='none',cmap='gray')
plt.title("Resized to 32x32")
plt.show()

In [17]:
def my_patch(x, y, size, alpha_in=0.3, color_in='red'):
assert size%2==1, "size must be an odd number!"
# http://matthiaseisen.com/pp/patterns/p0203/
return patches.Rectangle((x-(size/2+1)+0.5, y-(size/2+1)+0.5), size, size, alpha=alpha_in, color=color_in)

In [18]:
#
# We will apply the edge detector kernel, manually instead of using scipy
#

kernel_size = 3
kernel_centre = (int(kernel_size/2)+1)
img_resized = (32,32)

# Here I'm zero padding the original image
cat_face_gray_smaller = numpy.zeros((img_resized[0]+kernel_centre,
img_resized[1]+kernel_centre),
dtype=float)
cat_face_gray_smaller[kernel_centre/2:-kernel_centre/2,
kernel_centre/2:-kernel_centre/2] = scipy.misc.imresize(cat_face_gray_small,
img_resized)/255.

# This will store the result of our 2D convolution
img_conv_result = numpy.zeros(img_resized, dtype=float)

if not os.path.exists("animation"):
os.makedirs("animation")
x,y=0,0

for x in range(img_resized[0]):
for y in range(img_resized[1]):
img_conv_result[y,x]=(cat_face_gray_smaller[y:y+kernel_size,x:x+kernel_size]*kernel_edge).sum()

In [19]:
plt.figure(figsize=(20,10))
plt.subplot(121)
plt.imshow(img_conv_result,interpolation='none',cmap='gray', vmin=-1, vmax=1)
plt.title("Original result")
plt.subplot(122)
plt.imshow(abs(img_conv_result),interpolation='none',cmap='gray', vmin=0, vmax=1)
plt.title("Absolute value")
plt.show()

In [162]:
%%time
img_conv_result_fig = numpy.ones(img_conv_result.shape,dtype=float)
# Generate the images
for x in range(img_resized[0]):
for y in range(img_resized[1]):
x_visual = x + kernel_centre/2
y_visual = y + kernel_centre/2

plt.figure(figsize=(20,10))
plt.suptitle("http://ricardodeazambuja.com", fontsize=15)
plt.subplot2grid((3,6), (0, 0), colspan=3, rowspan=2)
plt.imshow(cat_face_gray_smaller,interpolation='none',cmap='gray',
vmin=0, vmax=1)
ax = plt.gca()
plt.scatter(x_visual, y_visual, color='red')
plt.xlim(-0.5,img_resized[0]+kernel_centre-0.5)
plt.ylim(img_resized[0]+kernel_centre-0.5,-0.5)

plt.subplot2grid((3,6), (0, 3), colspan=3, rowspan=2)
img_conv_result_fig[y,x] = img_conv_result[y,x]
plt.imshow(img_conv_result_fig,interpolation='none',cmap='gray',
vmin=0, vmax=1)
# plt.scatter(x, y, color='red',marker='s', s=100, alpha=0.5)
plt.xlim(-0.5,img_resized[0]-0.5)
plt.ylim(img_resized[0]-0.5,-0.5)
plt.title("Result of the convolution")
plt.colorbar()

plt.subplot2grid((3,6), (2, 2), colspan=2)
plt.imshow(cat_face_gray_smaller[y:y+kernel_size,x:x+kernel_size]*kernel_edge,interpolation='none',cmap='gray',
vmin=-1, vmax=1)
plt.colorbar()
plt.title("Result of kernel x current image patch")

plt.close()
#         plt.show()

CPU times: user 18min 55s, sys: 16.6 s, total: 19min 12s
Wall time: 19min 14s


The final animation is a mix between these two ideas:
http://stackoverflow.com/a/28015795/7658422
and
http://stackoverflow.com/a/14666689/7658422

See the notebook:
Matplotlib Animation on Jupyter.ipynb

## 2) Prepare the data set for Keras¶

A lot of things are just repetition from the previous posts...

In [20]:
# Let's imagine we don't know how many different labels we have.
# (we know, they are 'cat' and 'dog'...)
# A Python set can help us to create a list of unique labels.
# (i[0]=>filename index, i[1]=>label, i[2]=>1D vector)
unique_labels = set([i[1] for i in training_set.values()])

# With a list of unique labels, we will generate a dictionary
# to convert from a label (string) to a index (integer):
labels2int = {j:i for i,j in enumerate(unique_labels)}

# Creates a list with labels (ordered according to the dictionary index)
labels = [labels2int[i[1]] for i in training_set.values()]

In [21]:
# Instead of '1' and '0', the function below will transform our labels
# into vectors [0.,1.] and [1.,0.]:
labels = np_utils.to_categorical(labels, 2)

In [22]:
# First, we will create a numpy array going from zero to len(training_set):
# (dtype=int guarantees they are integers)
random_selection = numpy.arange(len(training_set),dtype=int)

# Then we create a random state object with our seed:
# (the seed is useful to be able to reproduce our experiment later)
seed = 12345
rnd = numpy.random.RandomState(seed)

# Finally we shuffle the random_selection array:
rnd.shuffle(random_selection)

In [23]:
test_size=0.25 # we will use 25% for testing purposes

# Breaking the code below to make it easier to understand:
# => training_set[i][-1]
# Returns the 1D vector from item 'i'.
# => training_set[i][-1]/255.0
# Normalizes the input values from 0. to 1.
# => random_selection[:int(len(training_set)*(1-test_size))]
# Gives us the first (1-test_size)*100% of the shuffled items
trainData = [training_set[i][-1]/255.0 for i in random_selection[:int(len(training_set)*(1-test_size))]]
trainData = numpy.array(trainData)

trainLabels = [labels[i] for i in random_selection[:int(len(training_set)*(1-test_size))]]
trainLabels = numpy.array(trainLabels)

testData = [training_set[i][-1]/255.0 for i in random_selection[:int(len(training_set)*(test_size))]]
testData = numpy.array(testData)

testLabels = [labels[i] for i in random_selection[:int(len(training_set)*(test_size))]]
testLabels = numpy.array(testLabels)


## 3) Create the Sequential model¶

https://keras.io/layers/convolutional/#convolution2d
Convolution2D(nb_filter, nb_row, nb_col, init='glorot_uniform', activation=None, weights=None, border_mode='valid', subsample=(1, 1), dim_ordering='default', W_regularizer=None, b_regularizer=None, activity_regularizer=None, W_constraint=None, b_constraint=None, bias=True)

And apply MaxPooling2D (it's a pool layer that uses the maximum value):
https://keras.io/layers/pooling/#maxpooling2d
http://cs231n.github.io/convolutional-networks/#pool

In [28]:
# input layer shape (now it's made of three matrices instead of 1D vectors)
input_layer_shape = (3,new_image_size[0], new_image_size[1]) # We use '3' because it's a RGB image.

# number of convolutional filters to use
number_of_filters = 32
# convolution kernel size
kernel_size = (3, 3)
# size of pooling area for max pooling
pool_size = (2, 2)


# I MUST EXPLAIN HERE I'VE ADDED ANOTHER LAYER WHEN COMPARED WITH THE FIRST NETWORK.¶

In [177]:
use_dropout = True
dropout_perc = 0.3

# https://keras.io/getting-started/sequential-model-guide/
# define the architecture of the network
model = Sequential()

# SOMETHING NEW HERE!!!
# INPUT LAYER
# Now this is our input layer. It will receive the image without making it flat as we did.
# https://keras.io/layers/convolutional/#convolution2d
kernel_size[0],
kernel_size[1],
border_mode='same', # keeps the output the same shape as the input
input_shape=input_layer_shape,
name='ConvLayer1'))

# The output's shape of the Convolution2D is (None,nb_filters,input_layer_shape[1], input_layer_shape[2]

name='ConvLayer1Activation'))

# https://keras.io/layers/pooling/#maxpooling2d
dim_ordering='th', # 'th'=>because I'm using Theano
name='PoolingLayer1'))

# After MaxPooling2D, the outputs will be shrinked by pool_size (see Keras help link above...)

# https://keras.io/layers/core/#dropout
if use_dropout:

# SOMETHING NEW HERE!!!
# SECOND CONV LAYER
# Now this is our input layer. It will receive the image without making it flat as we did.
# https://keras.io/layers/convolutional/#convolution2d
kernel_size[0],
kernel_size[1],
border_mode='same', # keeps the output the same shape as the input
name='ConvLayer2'))

# The output's shape of the Convolution2D is (None,nb_filters,input_layer_shape[1], input_layer_shape[2]

name='ConvLayer2Activation'))

# https://keras.io/layers/pooling/#maxpooling2d
dim_ordering='th', # 'th'=>because I'm using Theano
name='PoolingLayer2'))

# https://keras.io/layers/core/#dropout
if use_dropout:

# Transforms into 1D

# A Dense layer is a fully connected NN layer (feedforward)
# https://keras.io/layers/core/#dense
# Input size is automatic
dense_output_size = 128
init="uniform",
name='HiddenLayer'))

name='HiddenLayerActivation'))

if use_dropout:

# Because we want to classify between only two classes (binary), the final output is 2


# I WANT TO VISUALIZE WHAT THE NETWORK IS GENERATING AFTER THE CONVOLUTION AND AFTER THE POOLING¶

In [178]:
# https://keras.io/getting-started/faq/#how-can-i-obtain-the-output-of-an-intermediate-layer

intermediate_layer_names = ['ConvLayer1','ConvLayer1Activation','PoolingLayer1',
'ConvLayer2','ConvLayer2Activation','PoolingLayer2']

intermediate_objects = []

for layer_name in intermediate_layer_names:
intermediate_objects.append(Model(input=model.input, output=model.get_layer(layer_name).output))

idx = 1
input_image = testData[idx]
# The method "predict" would like to receive a list of images,
# therefore we reshape our image to look like a list of images ;)

intermediate_outputs = []
for layer_object in intermediate_objects:
intermediate_outputs.append(layer_object.predict(input_image.reshape(1,
input_image.shape[0],
input_image.shape[1],
input_image.shape[2])))

In [233]:
# Compare the sizes and remember the MaxPooling2D - https://keras.io/layers/pooling/#maxpooling2d
for i,n in zip(intermediate_outputs,intermediate_layer_names):
print n + " - output shape:",i.shape

ConvLayer1 - output shape: (1, 32, 32, 32)
ConvLayer1Activation - output shape: (1, 32, 32, 32)
PoolingLayer1 - output shape: (1, 32, 16, 16)
ConvLayer2 - output shape: (1, 8, 16, 16)
ConvLayer2Activation - output shape: (1, 8, 16, 16)
PoolingLayer2 - output shape: (1, 8, 8, 8)

In [180]:
for layer_out,name in zip(intermediate_outputs,intermediate_layer_names):
plt.figure(figsize=(20,10))
plt.suptitle("Original Input and " + name + " Filters Output - randomly defined, no training", size=30)

plt.subplot2grid((4,layer_out.shape[1]/4+3), (0, 0), colspan=3, rowspan=4)
plt.imshow(numpy.rollaxis(input_image, 0,3), interpolation='none')
plt.gca().axis('off')
plt.xticks([],[]) # avoids white spaces
plt.yticks([],[]) # avoids white spaces
plt.title("Original Input")
index = numpy.arange(layer_out.shape[1]).reshape(4,layer_out.shape[1]/4)
for ri in range(4):
for ci in range(layer_out.shape[1]/4):
plt.subplot2grid((4,layer_out.shape[1]/4+3), (ri, 3+ci))
plt.imshow(layer_out.reshape(layer_out.shape[1],layer_out.shape[2],layer_out.shape[3])[index[ri,ci]],
interpolation='none', cmap='gray')
plt.gca().axis('off')
plt.xticks([],[]) # avoids white spaces
plt.yticks([],[]) # avoids white spaces
plt.show()


### Now, let's have a look at the weights:¶

In [181]:
layer_name = 'ConvLayer1'
weights_random_conv1 = numpy.copy(model.get_layer(layer_name).get_weights()[0]) # weights
bias_random_conv1 = numpy.copy(model.get_layer(layer_name).get_weights()[1]) # bias

layer_name = 'ConvLayer2'
weights_random_conv2 = numpy.copy(model.get_layer(layer_name).get_weights()[0]) # weights
bias_random_conv2 = numpy.copy(model.get_layer(layer_name).get_weights()[1]) # bias

In [182]:
weights_random_conv1.shape, bias_random_conv1.shape, weights_random_conv2.shape, bias_random_conv2.shape

Out[182]:
((32, 3, 3, 3), (32,), (8, 32, 3, 3), (8,))

## 4) Compile the model, learn and test against the 25% we saved as testing set¶

In [183]:
%%time
# learning rate=0.01
sgd = SGD(lr=0.01)
model.compile(loss="binary_crossentropy",
optimizer=sgd,
metrics=["accuracy"])
model.fit(trainData, trainLabels, nb_epoch=1000, batch_size=128, verbose=0)

CPU times: user 1h 14min 35s, sys: 1min 7s, total: 1h 15min 43s
Wall time: 1h 15min 40s

In [184]:
# model.save('my_convnet_SDG.h5')
model = load_model('my_convnet_SDG.h5') # The model was saved!

In [185]:
score = model.evaluate(testData, testLabels, batch_size=128, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])

('Test score:', 0.15456351254940032)
('Test accuracy:', 0.97056000011444088)

In [186]:
# https://keras.io/getting-started/faq/#how-can-i-obtain-the-output-of-an-intermediate-layer

intermediate_layer_names = ['ConvLayer1','ConvLayer1Activation','PoolingLayer1',
'ConvLayer2','ConvLayer2Activation','PoolingLayer2']

intermediate_objects = []

for layer_name in intermediate_layer_names:
intermediate_objects.append(Model(input=model.input, output=model.get_layer(layer_name).output))

idx = 1
input_image = testData[idx]
# The method "predict" would like to receive a list of images,
# therefore we reshape our image to look like a list of images ;)

intermediate_outputs = []
for layer_object in intermediate_objects:
intermediate_outputs.append(layer_object.predict(input_image.reshape(1,
input_image.shape[0],
input_image.shape[1],
input_image.shape[2])))

In [187]:
for layer_out,name in zip(intermediate_outputs,intermediate_layer_names):
plt.figure(figsize=(20,10))
plt.suptitle("Original Input and " + name + " Filters Output - trained network", size=30)

plt.subplot2grid((4,layer_out.shape[1]/4+3), (0, 0), colspan=3, rowspan=4)
plt.imshow(numpy.rollaxis(input_image, 0,3), interpolation='none')
plt.gca().axis('off')
plt.xticks([],[]) # avoids white spaces
plt.yticks([],[]) # avoids white spaces
plt.title("Original Input")
index = numpy.arange(layer_out.shape[1]).reshape(4,layer_out.shape[1]/4)
for ri in range(4):
for ci in range(layer_out.shape[1]/4):
plt.subplot2grid((4,layer_out.shape[1]/4+3), (ri, 3+ci))
plt.imshow(layer_out.reshape(layer_out.shape[1],layer_out.shape[2],layer_out.shape[3])[index[ri,ci]],
interpolation='none', cmap='gray')
plt.gca().axis('off')
plt.xticks([],[]) # avoids white spaces
plt.yticks([],[]) # avoids white spaces