% cd /home/ubuntu/courses/deeplearning1/nbs
/home/ubuntu/courses/deeplearning1/nbs
%mkdir data
% cd data
/home/ubuntu/courses/deeplearning1/nbs/data
! pip install -U kaggle-cli
Requirement already up-to-date: kaggle-cli in /home/ubuntu/anaconda2/lib/python2.7/site-packages
Requirement already up-to-date: progressbar2<3.35,>=3.34.3 in /home/ubuntu/anaconda2/lib/python2.7/site-packages (from kaggle-cli)
Requirement already up-to-date: MechanicalSoup<0.9,>=0.7.0 in /home/ubuntu/anaconda2/lib/python2.7/site-packages (from kaggle-cli)
Requirement already up-to-date: cliff<2.9,>=2.8.0 in /home/ubuntu/anaconda2/lib/python2.7/site-packages (from kaggle-cli)
Requirement already up-to-date: configparser in /home/ubuntu/anaconda2/lib/python2.7/site-packages (from kaggle-cli)
Requirement already up-to-date: cssselect<1.1,>=1.0.1 in /home/ubuntu/anaconda2/lib/python2.7/site-packages (from kaggle-cli)
Requirement already up-to-date: lxml<4.1,>=4.0.0 in /home/ubuntu/anaconda2/lib/python2.7/site-packages (from kaggle-cli)
Requirement already up-to-date: python-utils>=2.1.0 in /home/ubuntu/anaconda2/lib/python2.7/site-packages (from progressbar2<3.35,>=3.34.3->kaggle-cli)
Requirement already up-to-date: beautifulsoup4 in /home/ubuntu/anaconda2/lib/python2.7/site-packages (from MechanicalSoup<0.9,>=0.7.0->kaggle-cli)
Requirement already up-to-date: requests>=2.0 in /home/ubuntu/anaconda2/lib/python2.7/site-packages (from MechanicalSoup<0.9,>=0.7.0->kaggle-cli)
Requirement already up-to-date: six>=1.4 in /home/ubuntu/anaconda2/lib/python2.7/site-packages (from MechanicalSoup<0.9,>=0.7.0->kaggle-cli)
Requirement already up-to-date: PrettyTable<0.8,>=0.7.1 in /home/ubuntu/anaconda2/lib/python2.7/site-packages (from cliff<2.9,>=2.8.0->kaggle-cli)
Requirement already up-to-date: pbr!=2.1.0,>=2.0.0 in /home/ubuntu/anaconda2/lib/python2.7/site-packages (from cliff<2.9,>=2.8.0->kaggle-cli)
Requirement already up-to-date: cmd2>=0.6.7 in /home/ubuntu/anaconda2/lib/python2.7/site-packages (from cliff<2.9,>=2.8.0->kaggle-cli)
Requirement already up-to-date: unicodecsv>=0.8.0; python_version < "3.0" in /home/ubuntu/anaconda2/lib/python2.7/site-packages (from cliff<2.9,>=2.8.0->kaggle-cli)
Requirement already up-to-date: PyYAML>=3.10.0 in /home/ubuntu/anaconda2/lib/python2.7/site-packages (from cliff<2.9,>=2.8.0->kaggle-cli)
Requirement already up-to-date: pyparsing>=2.1.0 in /home/ubuntu/anaconda2/lib/python2.7/site-packages (from cliff<2.9,>=2.8.0->kaggle-cli)
Requirement already up-to-date: stevedore>=1.20.0 in /home/ubuntu/anaconda2/lib/python2.7/site-packages (from cliff<2.9,>=2.8.0->kaggle-cli)
Requirement already up-to-date: urllib3<1.23,>=1.21.1 in /home/ubuntu/anaconda2/lib/python2.7/site-packages (from requests>=2.0->MechanicalSoup<0.9,>=0.7.0->kaggle-cli)
Requirement already up-to-date: idna<2.7,>=2.5 in /home/ubuntu/anaconda2/lib/python2.7/site-packages (from requests>=2.0->MechanicalSoup<0.9,>=0.7.0->kaggle-cli)
Requirement already up-to-date: chardet<3.1.0,>=3.0.2 in /home/ubuntu/anaconda2/lib/python2.7/site-packages (from requests>=2.0->MechanicalSoup<0.9,>=0.7.0->kaggle-cli)
Requirement already up-to-date: certifi>=2017.4.17 in /home/ubuntu/anaconda2/lib/python2.7/site-packages (from requests>=2.0->MechanicalSoup<0.9,>=0.7.0->kaggle-cli)
Requirement already up-to-date: pyperclip in /home/ubuntu/anaconda2/lib/python2.7/site-packages (from cmd2>=0.6.7->cliff<2.9,>=2.8.0->kaggle-cli)
You are using pip version 8.1.2, however version 9.0.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.
# make sure you accept the rules of the competition first
! kg download -u username -p password -c dogs-vs-cats-redux-kernels-edition
downloading https://www.kaggle.com/c/dogs-vs-cats-redux-kernels-edition/download/test.zip test.zip 100% |######################################| Time: 0:00:10 26.6 MiB/s downloading https://www.kaggle.com/c/dogs-vs-cats-redux-kernels-edition/download/train.zip train.zip 100% |#####################################| Time: 0:00:19 27.9 MiB/s downloading https://www.kaggle.com/c/dogs-vs-cats-redux-kernels-edition/download/sample_submission.csv sample_submission.csv 100% |#########################| Time: 0:00:00 479.1 KiB/s
!sudo apt-get update
! sudo apt install unzip
Hit:1 http://us-east-1.ec2.archive.ubuntu.com/ubuntu xenial InRelease Get:2 http://us-east-1.ec2.archive.ubuntu.com/ubuntu xenial-updates InRelease [102 kB] Get:3 http://us-east-1.ec2.archive.ubuntu.com/ubuntu xenial-backports InRelease [102 kB] Get:4 http://security.ubuntu.com/ubuntu xenial-security InRelease [102 kB] Get:5 http://us-east-1.ec2.archive.ubuntu.com/ubuntu xenial-updates/main amd64 Packages [644 kB] Ign:6 http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64 InRelease Hit:7 http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64 Release Get:8 http://us-east-1.ec2.archive.ubuntu.com/ubuntu xenial-updates/universe amd64 Packages [541 kB] Fetched 1,491 kB in 0s (4,087 kB/s) Reading package lists... Done Reading package lists... Done Building dependency tree Reading state information... Done unzip is already the newest version (6.0-20ubuntu1). 0 upgraded, 0 newly installed, 0 to remove and 262 not upgraded.
! unzip -q test.zip && unzip -q train.zip
! mkdir -p sample/test sample/train sample/valid
! mkdir -p sample/results
! mkdir results
! mkdir -p valid
import shutil, os, glob
import random
for _ in range(1000):
random_file = random.choice(os.listdir("train/"));
shutil.copy("train/" + random_file, "sample/train/");
for _ in range(500):
random_file = random.choice(os.listdir("test/"));
shutil.copy("test/" + random_file, "sample/test/");
for _ in range(3000):
random_file = random.choice(os.listdir("train/"));
shutil.move("train/" + random_file, "valid/");
for _ in range(1000):
random_file = random.choice(os.listdir("valid/"));
shutil.copy("valid/" + random_file, "sample/valid/");
! mkdir -p train/dogs train/cats
! mv train/cat.*.jpg train/cats
! mv train/dog.*.jpg train/dogs
! mkdir -p valid/dogs valid/cats
! mv valid/cat.*.jpg valid/cats
! mv valid/dog.*.jpg valid/dogs
%cd sample/train
/home/ubuntu/courses/deeplearning1/nbs/data/sample/train
! mkdir cats dogs
! mv dog.*.jpg dogs/
! mv cat.*.jpg cats/
% cd /home/ubuntu/courses/deeplearning1/nbs/data/sample/valid
/home/ubuntu/courses/deeplearning1/nbs/data/sample/valid
! mkdir cats dogs
! mv dog.*.jpg dogs/
! mv cat.*.jpg cats/
% cd /home/ubuntu/courses/deeplearning1/nbs/data/
/home/ubuntu/courses/deeplearning1/nbs/data
% cd test/
/home/ubuntu/courses/deeplearning1/nbs/data/test
! mkdir unknown
! mv *.jpg unknown/
% cd /home/ubuntu/courses/deeplearning1/nbs/data/sample/test
/home/ubuntu/courses/deeplearning1/nbs/data/sample/test
! mkdir unknown
! mv *.jpg unknown/
DATA_HOME_DIR = "/home/ubuntu/courses/deeplearning1/nbs/data"
%cd /home/ubuntu/courses/deeplearning1/nbs/
/home/ubuntu/courses/deeplearning1/nbs
from utils import *
from vgg16 import Vgg16
%matplotlib inline
Using gpu device 0: Tesla K80 (CNMeM is disabled, cuDNN 5103) /home/ubuntu/anaconda2/lib/python2.7/site-packages/theano/sandbox/cuda/__init__.py:600: UserWarning: Your cuDNN version is more recent than the one Theano officially supports. If you see any problems, try updating Theano or downgrading cuDNN to version 5. warnings.warn(warn) Using Theano backend.
%cd $DATA_HOME_DIR
#Set path to sample/ path if desired
path = DATA_HOME_DIR + '/sample/'
test_path = path + '/test/' #We use all the test data
results_path=DATA_HOME_DIR + '/results/'
train_path=path + '/train/'
valid_path=path + '/valid/'
/home/ubuntu/courses/deeplearning1/nbs/data
vgg = Vgg16()
batch_size=4
no_of_epochs=1
batches = vgg.get_batches(train_path, batch_size=batch_size)
Found 974 images belonging to 2 classes.
imgs,labels = next(batches)
plots(imgs, titles=labels)
vgg.predict(imgs, True)
(array([ 0.1868, 0.0339, 0.6409, 0.5215], dtype=float32), array([223, 159, 281, 205]), [u'schipperke', u'Rhodesian_ridgeback', u'tabby', u'flat-coated_retriever'])
#Finetune the model
batch_size = 64
no_of_epochs = 1
batches = vgg.get_batches(train_path, batch_size=batch_size)
val_batches = vgg.get_batches(valid_path, batch_size=batch_size*2)
vgg.finetune(batches)
vgg.model.optimizer.lr = 0.01
Found 974 images belonging to 2 classes. Found 855 images belonging to 2 classes.
#Notice we are passing in the validation dataset to the fit() method
#For each epoch we test our model against the validation set
latest_weights_filename = None
for epoch in range(no_of_epochs):
print "Running epoch: %d" % epoch
vgg.fit(batches, val_batches, nb_epoch=1)
latest_weights_filename = 'ft%d.h5' % epoch
vgg.model.save_weights(results_path+latest_weights_filename)
print "Completed %s fit operations" % no_of_epochs
Running epoch: 0 Epoch 1/1 974/974 [==============================] - 58s - loss: 0.5535 - acc: 0.9189 - val_loss: 0.3400 - val_acc: 0.9684 Completed 1 fit operations
batch_size = 4
val_batches = vgg.get_batches(valid_path, batch_size=batch_size)
Found 855 images belonging to 2 classes.
imgs,labels = next(val_batches)
plots(imgs, titles=labels)
vgg.predict(imgs, True)
(array([ 1., 1., 1., 1.], dtype=float32), array([0, 1, 1, 1]), ['cats', 'dogs', 'dogs', 'dogs'])
#You can verify the column ordering by viewing some images
val_batches, probs = vgg.test(valid_path, batch_size)
from PIL import Image
print probs[1]
print val_batches.classes[1]
print val_batches.filenames[1]
Image.open(valid_path + val_batches.filenames[1])
Found 3000 images belonging to 2 classes. [ 1. 0.] 0 cats/cat.2126.jpg
filenames = val_batches.filenames
expected_labels = val_batches.classes #0 or 1
#Round our predictions to 0/1 to generate labels
our_predictions = probs[:,0]
our_labels = np.round(1-our_predictions)
from keras.preprocessing import image
#Helper function to plot images by index in the validation set
#Plots is a helper function in utils.py
def plots_idx(idx, titles=None):
for i in idx:
print filenames[i];
plots([image.load_img(valid_path + filenames[i]) for i in idx], titles=titles)
#Number of images to view for each visualization task
n_view = 4
#1. A few correct labels at random
correct = np.where(our_labels==expected_labels)[0]
print "Found %d correct labels" % len(correct)
idx = permutation(correct)[:n_view]
plots_idx(idx, our_predictions[idx])
Found 2950 correct labels cats/cat.3887.jpg cats/cat.611.jpg dogs/dog.10859.jpg cats/cat.5595.jpg
#2. A few incorrect labels at random
incorrect = np.where(our_labels!=expected_labels)[0]
print "Found %d incorrect labels" % len(incorrect)
idx = permutation(incorrect)[:n_view]
plots_idx(idx, our_predictions[idx])
Found 50 incorrect labels cats/cat.4821.jpg dogs/dog.595.jpg cats/cat.7599.jpg cats/cat.2938.jpg
#3a. The images we most confident were cats, and are actually cats
correct_cats = np.where((our_labels==0) & (our_labels==expected_labels))[0]
print "Found %d confident correct cats labels" % len(correct_cats)
most_correct_cats = np.argsort(our_predictions[correct_cats])[::-1][:n_view]
plots_idx(correct_cats[most_correct_cats], our_predictions[correct_cats][most_correct_cats])
Found 1499 confident correct cats labels cats/cat.6884.jpg cats/cat.7752.jpg cats/cat.8724.jpg cats/cat.557.jpg
#3b. The images we most confident were dogs, and are actually dogs
correct_dogs = np.where((our_labels==1) & (our_labels==expected_labels))[0]
print "Found %d confident correct dogs labels" % len(correct_dogs)
most_correct_dogs = np.argsort(our_predictions[correct_dogs])[:n_view]
plots_idx(correct_dogs[most_correct_dogs], our_predictions[correct_dogs][most_correct_dogs])
Found 1451 confident correct dogs labels dogs/dog.8854.jpg dogs/dog.6413.jpg dogs/dog.9854.jpg dogs/dog.7749.jpg
#4a. The images we were most confident were cats, but are actually dogs
incorrect_cats = np.where((our_labels==0) & (our_labels!=expected_labels))[0]
print "Found %d incorrect cats" % len(incorrect_cats)
if len(incorrect_cats):
most_incorrect_cats = np.argsort(our_predictions[incorrect_cats])[::-1][:n_view]
plots_idx(incorrect_cats[most_incorrect_cats], our_predictions[incorrect_cats][most_incorrect_cats])
Found 6 incorrect cats dogs/dog.1622.jpg dogs/dog.5251.jpg dogs/dog.6694.jpg dogs/dog.5642.jpg
#4b. The images we were most confident were dogs, but are actually cats
incorrect_dogs = np.where((our_labels==1) & (our_labels!=expected_labels))[0]
print "Found %d incorrect dogs" % len(incorrect_dogs)
if len(incorrect_dogs):
most_incorrect_dogs = np.argsort(our_predictions[incorrect_dogs])[:n_view]
plots_idx(incorrect_dogs[most_incorrect_dogs], our_predictions[incorrect_dogs][most_incorrect_dogs])
Found 21 incorrect dogs cats/cat.7920.jpg cats/cat.1267.jpg cats/cat.7194.jpg cats/cat.376.jpg
#5. The most uncertain labels (ie those with probability closest to 0.5).
most_uncertain = np.argsort(np.abs(our_predictions-0.5))
plots_idx(most_uncertain[:n_view], our_predictions[most_uncertain])
vgg.model.load_weights(results_path+'final.h5')
%cd $DATA_HOME_DIR
#Set path to sample/ path if desired
path = DATA_HOME_DIR
test_path = path + '/test/' #We use all the test data
train_path=path + '/train/'
valid_path=path + '/valid/'
/home/ubuntu/courses/deeplearning1/nbs/data
Go to top and run