#!/usr/bin/env python # coding: utf-8 # # Deep learning and Dogs vs. Cats - Part 1 # ## Data set https://www.kaggle.com/c/dogs-vs-cats # (the data set should be installed in two directories just above the one where this notebook is located: train and test1) # ### http://ricardodeazambuja.com/deep_learning/2017/01/29/easy-peasy_deep_learning/ # In[1]: # The package 'os' will be necessary to find the current # working directory os.getcwd() and also to list all # files in a directory os.listdir(). import os # https://docs.python.org/2/library/multiprocessing.html from multiprocessing import Pool # Only used for the final test import urllib, cStringIO # Yep, I don't like to import numpy as np :P import numpy # As I've explained somewhere, Scipy will help us # reading (using Pillow) and resizing images. import scipy.misc # Pyplot is what we use for plotting import matplotlib.pyplot as plt # And here is the Keras stuff: # We don't really need this since it would # be easy do code something like this ourselves from keras.utils import np_utils # https://keras.io/getting-started/sequential-model-guide/ from keras.models import Sequential from keras.layers import Dense from keras.layers import Activation from keras.layers import Dropout # https://keras.io/optimizers/ from keras.optimizers import SGD from keras.optimizers import Adagrad # While the method for saving a model is # part of the model object, to load we need # to import something else from keras.models import load_model # ## 1) Load the data sets # In[2]: # Returns the current working directory # (where the Python interpreter was called) path = os.getcwd() # The path separator used by the OS: sep = os.path.sep # This is the directory where the training images are located dirname = "train" # Generates a list of all images (actualy the filenames) from the training set, # but it will also include the full path imagePaths = [path+sep+dirname+sep+filename for filename in os.listdir(path+sep+dirname)] # In[3]: # To speed up things, I will use parallel computing! # Pool lets me use its map method and select the number of parallel processes. def import_training_set(args): ''' Reads the image from imagePath, resizes and returns it together with its label and original index. ''' index,imagePath,new_image_size = args # Reads the image image = scipy.misc.imread(imagePath) # Split will literally split a string at that character # returning a list of strings. # First we split according to the os.path.sep and keep # only the last item from the list gererated ([-1]). # This will give us the filename. filename = imagePath.split(os.path.sep)[-1] # Then we split it again using "." # and extract the first item ([0]): label = filename.split(".")[0] # and the second item ([1]): original_index = filename.split(".")[1] # Resizes the image (downsampling) to new_image_size and # converts it to a 1D list (flatten). input_vector = scipy.misc.imresize(image,new_image_size).flatten() return (index,(original_index,label,input_vector)) # In[4]: get_ipython().run_cell_magic('time', '', "# The size of the resized image.\n# After we apply the flatten method, it will become a list of 32x32x3 items (1024x3).\n# (Where is the 'x3' coming from? Our images are composed of three colors!)\nnew_image_size = 32,32\n\nnumber_of_parallel_processes = 7\n\n# When the map is finished, we will receive a list with tuples:\n# (index,(original_index,label,input_vector)) => (index,(original_index,'category',1D resized image))\n#\n# There's no guarantee about the ordering because they are running in parallel.\nans = Pool(number_of_parallel_processes).map(import_training_set,[(i,img,new_image_size) \n for i,img in enumerate(imagePaths)])\n") # In[5]: # Because import_training_set returns a tuple like this: # (index,(original_index,label,input_vector)) # and index is unique, we can convert to a dictionary # to solve our problem with unordered items: training_set = dict(ans) # Gives a hint to the garbage collector... del ans # In[6]: # Let's verify if we loaded all images: len(training_set) # In[7]: # And now test to see what is inside: training_set[0],training_set[24999] # ## 2) Prepare the data set for Keras # In[8]: # Let's imagine we don't know how many different labels we have. # (we know, they are 'cat' and 'dog'...) # A Python set can help us to create a list of unique labels. # (i[0]=>filename index, i[1]=>label, i[2]=>1D vector) unique_labels = set([i[1] for i in training_set.values()]) # With a list of unique labels, we will generate a dictionary # to convert from a label (string) to a index (integer): labels2int = {j:i for i,j in enumerate(unique_labels)} # Creates a list with labels (ordered according to the dictionary index) labels = [labels2int[i[1]] for i in training_set.values()] # In[9]: # Instead of '1' and '0', the function below will transform our labels # into vectors [0.,1.] and [1.,0.]: labels = np_utils.to_categorical(labels, 2) # In[10]: # First, we will create a numpy array going from zero to len(training_set): # (dtype=int guarantees they are integers) random_selection = numpy.arange(len(training_set),dtype=int) # Then we create a random state object with our seed: # (the seed is useful to be able to reproduce our experiment later) seed = 12345 rnd = numpy.random.RandomState(seed) # Finally we shuffle the random_selection array: rnd.shuffle(random_selection) # In[11]: test_size=0.25 # we will use 25% for testing purposes # Breaking the code below to make it easier to understand: # => training_set[i][-1] # Returns the 1D vector from item 'i'. # => training_set[i][-1]/255.0 # Normalizes the input values from 0. to 1. # => random_selection[:int(len(training_set)*(1-test_size))] # Gives us the first (1-test_size)*100% of the shuffled items trainData = [training_set[i][-1]/255.0 for i in random_selection[:int(len(training_set)*(1-test_size))]] trainData = numpy.array(trainData) trainLabels = [labels[i] for i in random_selection[:int(len(training_set)*(1-test_size))]] trainLabels = numpy.array(trainLabels) testData = [training_set[i][-1]/255.0 for i in random_selection[:int(len(training_set)*(test_size))]] testData = numpy.array(testData) testLabels = [labels[i] for i in random_selection[:int(len(training_set)*(test_size))]] testLabels = numpy.array(testLabels) # ## 3) Create the Sequential model # In[12]: use_dropout = True dropout_perc = 0.3 # https://keras.io/getting-started/sequential-model-guide/ # define the architecture of the network model = Sequential() # input layer has size "input_dim" (new_image_size[0]*new_image_size[1]*3). # The first hidden layer will have size 768, followed by 384 and 2. # 3072=>768=>384=>2 input_len = new_image_size[0]*new_image_size[1]*3 # A Dense layer is a fully connected NN layer (feedforward) # https://keras.io/layers/core/#dense model.add(Dense(input_len/4, input_dim=input_len, init="uniform")) # https://keras.io/layers/core/#activation # https://keras.io/activations/ model.add(Activation('relu')) # https://keras.io/layers/core/#dropout if use_dropout: model.add(Dropout(dropout_perc)) # Now this layer will have output dimension of 384 model.add(Dense(input_len/8, init="uniform")) model.add(Activation('relu')) if use_dropout: model.add(Dropout(dropout_perc)) # Because we want to classify between only two classes (binary), the final output is 2 model.add(Dense(2)) model.add(Activation("softmax")) # ## 4) Compile the model, learn and test against the 25% we saved as testing set # ### DON'T FORGET TO CREATE/RECREATE YOUR MODEL BEFORE COMPILING AND TRAINING (WEIGHTS ARE RANDOMLY INITIALISED) # In[13]: get_ipython().run_cell_magic('time', '', '# learning rate=0.01\nsgd = SGD(lr=0.01)\nmodel.compile(loss="binary_crossentropy", \n optimizer=sgd, \n metrics=["accuracy"])\nmodel.fit(trainData, trainLabels, nb_epoch=100, batch_size=128, verbose=0)\n') # In[14]: score = model.evaluate(testData, testLabels, batch_size=128, verbose=0) print('Test score:', score[0]) print('Test accuracy:', score[1]) # In[15]: model.save('my_model_SDG_dropout_100_epoch.h5') # In[17]: model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy']) model.fit(trainData, trainLabels, nb_epoch=50, batch_size=128, verbose=0) # In[18]: score = model.evaluate(testData, testLabels, batch_size=128, verbose=0) print('Test score:', score[0]) print('Test accuracy:', score[1]) # In[ ]: # model.save('my_model_RMSProp.h5') # In[20]: from keras.optimizers import Adagrad ada = Adagrad(lr=0.01, epsilon=1e-08, decay=0.0) model.compile(loss="binary_crossentropy", optimizer=sgd, metrics=["accuracy"]) model.fit(trainData, trainLabels, nb_epoch=50, batch_size=128, verbose=0) # In[1]: score = model.evaluate(testData, testLabels, batch_size=128, verbose=0) print('Test score:', score[0]) print('Test accuracy:', score[1]) # In[ ]: # model.save('my_model_Adagrad.h5') # # Test the saved models using images never seen before by the neural network # In[16]: # Loads my model that achieved accuracy 97.74% model = load_model('my_97perc_acc.h5') # model.save('my_model_SDG_dropout_100_epoch.h5') # In[17]: score = model.evaluate(testData, testLabels, batch_size=128, verbose=0) print('Test score:', score[0]) print('Test accuracy:', score[1]) # In[18]: plt.figure(figsize=(10,10)) for i in range(36): # Randomly selects one image from our test set (same used for error calculation) idx = numpy.random.randint(0,testData.shape[0]) X = testData[idx].reshape(1,testData[idx].shape[0]) # They are already normalised... Y = testLabels[idx] # [1.,0.]=>Dog # [0.,1.]=>Cat pred = model.predict(X) if pred[0,0]>pred[0,1]: title = "Dog! [{:.3f},{:.3f}]".format(pred[0,0],pred[0,1]) else: title = "Cat! [{:.3f},{:.3f}]".format(pred[0,0],pred[0,1]) # Visualization # But remember, the network was fed with the same data below reshaped as a 1D vector img = training_set[random_selection[:int(len(training_set)*(test_size))][idx]][-1].reshape((32,32,3)) plt.subplot2grid((6,6),(i/6,i%6)) plt.imshow(img,interpolation='none') plt.title(title, fontdict={'fontsize': 8}) plt.axis('off') plt.tight_layout() plt.show() # ## Testing using the test1 directory # ### Images the neural network has never seen before # But first we need to load and prepare the images # In[19]: # Returns the current working directory # (where the Python interpreter was called) path = os.getcwd() # The path separator used by the OS: sep = os.path.sep # This is the directory where the training images are located dirname = "test1" # Generates a list of all images (actualy the filenames) from the training set, # but it will also include the full path imagePaths = [path+sep+dirname+sep+filename for filename in os.listdir(path+sep+dirname)] # In[20]: get_ipython().run_cell_magic('time', '', 'ans = Pool(number_of_parallel_processes).map(import_training_set,[(i,img,new_image_size) \n for i,img in enumerate(imagePaths)])\n') # In[21]: testing_set = dict(ans) del ans # this is just to help saving memory... # ### Selects a random image # In[22]: idx = numpy.random.randint(0,len(testing_set)) X = testing_set[idx][-1].reshape(1,testing_set[idx][-1].shape[0])/255.0 # /255.0=>normalisation! # In[23]: # [1.,0.]=>Dog # [0.,1.]=>Cat pred = model.predict(X) if pred[0,0]>pred[0,1]: print("Dog! [{},{}]".format(pred[0,0],pred[0,1])) else: print("Cat! [{},{}]".format(pred[0,0],pred[0,1])) # In[24]: img = testing_set[idx][-1].reshape((32,32,3)) plt.imshow(img,interpolation='none') plt.show() # In[25]: plt.figure(figsize=(10,10)) for i in range(36): # Randomly selects one image from our test set (same used for error calculation) idx = numpy.random.randint(0,len(testing_set)) X = testing_set[idx][-1].reshape(1,testing_set[idx][-1].shape[0])/255.0 # /255.0=>normalisation! # [1.,0.]=>Dog # [0.,1.]=>Cat pred = model.predict(X) if pred[0,0]>pred[0,1]: title = "Dog! [{:.3f},{:.3f}]".format(pred[0,0],pred[0,1]) else: title = "Cat! [{:.3f},{:.3f}]".format(pred[0,0],pred[0,1]) # Visualization # But remember, the network 'eats' images reshaped as 1D vectors img = testing_set[idx][-1].reshape((32,32,3)) plt.subplot2grid((6,6),(i/6,i%6)) plt.imshow(img,interpolation='none') plt.title(title, fontdict={'fontsize': 8}) plt.axis('off') plt.tight_layout() plt.show() # ## Loading an image from a URL # In[26]: def get_img_from_url(url): # http://stackoverflow.com/a/7391991 file = cStringIO.StringIO(urllib.urlopen(url).read()) return scipy.misc.imread(file) # In[27]: dog_url = "https://cdn.pixabay.com/photo/2016/02/19/15/46/dog-1210559_960_720.jpg" cat_url = "https://cdn.pixabay.com/photo/2014/03/29/09/17/cat-300572_960_720.jpg" # In[28]: image = get_img_from_url(dog_url) X = scipy.misc.imresize(image,(32,32)).flatten()/255.0 # /255.0=>normalisation! # [1.,0.]=>Dog # [0.,1.]=>Cat pred = model.predict(X.reshape((1,X.shape[0]))) if pred[0,0]>pred[0,1]: print("Dog! [{},{}]".format(pred[0,0],pred[0,1])) else: print("Cat! [{},{}]".format(pred[0,0],pred[0,1])) img = X.reshape((32,32,3)) plt.imshow(img,interpolation='none') plt.show() # In[29]: image = get_img_from_url(cat_url) X = scipy.misc.imresize(image,(32,32)).flatten()/255.0 # /255.0=>normalisation! # [1.,0.]=>Dog # [0.,1.]=>Cat pred = model.predict(X.reshape((1,X.shape[0]))) if pred[0,0]>pred[0,1]: print("Dog! [{},{}]".format(pred[0,0],pred[0,1])) else: print("Cat! [{},{}]".format(pred[0,0],pred[0,1])) img = X.reshape((32,32,3)) plt.imshow(img,interpolation='none') plt.show() # In[ ]: