!mkdir ~/.kaggle
!mv kaggle.json ~/.kaggle
! kaggle competitions download -c dogs-vs-cats
Warning: Your Kaggle API key is readable by other users on this system! To fix this, you can run 'chmod 600 /root/.kaggle/kaggle.json' Downloading dogs-vs-cats.zip to /content 99% 808M/812M [00:20<00:00, 44.5MB/s] 100% 812M/812M [00:20<00:00, 41.9MB/s]
!unzip dogs-vs-cats.zip
Archive: dogs-vs-cats.zip inflating: sampleSubmission.csv inflating: test1.zip inflating: train.zip
!unzip -qq train.zip
!ls train | wc -l
25000
import os
len(os.listdir('train'))
25000
!rm -r dogs-vs-cats.zip sampleSubmission.csv test1.zip train.zip sample_data/
import os, shutil
# The path to the directory where the original
# dataset was uncompressed
original_dataset_dir = '/content/train'
# The directory where we will
# store our smaller dataset
base_dir = '/content/subset'
os.makedirs(base_dir, exist_ok=True)
# Directories for our training,
# validation and test splits
train_dir = os.path.join(base_dir, 'train')
os.mkdir(train_dir)
validation_dir = os.path.join(base_dir, 'validation')
os.mkdir(validation_dir)
test_dir = os.path.join(base_dir, 'test')
os.mkdir(test_dir)
# Directory with our training cat pictures
train_cats_dir = os.path.join(train_dir, 'cats')
os.mkdir(train_cats_dir)
# Directory with our training dog pictures
train_dogs_dir = os.path.join(train_dir, 'dogs')
os.mkdir(train_dogs_dir)
# Directory with our validation cat pictures
validation_cats_dir = os.path.join(validation_dir, 'cats')
os.mkdir(validation_cats_dir)
# Directory with our validation dog pictures
validation_dogs_dir = os.path.join(validation_dir, 'dogs')
os.mkdir(validation_dogs_dir)
# Directory with our validation cat pictures
test_cats_dir = os.path.join(test_dir, 'cats')
os.mkdir(test_cats_dir)
# Directory with our validation dog pictures
test_dogs_dir = os.path.join(test_dir, 'dogs')
os.mkdir(test_dogs_dir)
# Copy first 1000 cat images to train_cats_dir
fnames = ['cat.{}.jpg'.format(i) for i in range(1000)]
for fname in fnames:
src = os.path.join(original_dataset_dir, fname)
dst = os.path.join(train_cats_dir, fname)
shutil.copyfile(src, dst)
# Copy next 500 cat images to validation_cats_dir
fnames = ['cat.{}.jpg'.format(i) for i in range(1000, 1500)]
for fname in fnames:
src = os.path.join(original_dataset_dir, fname)
dst = os.path.join(validation_cats_dir, fname)
shutil.copyfile(src, dst)
# Copy next 500 cat images to test_cats_dir
fnames = ['cat.{}.jpg'.format(i) for i in range(1500, 2000)]
for fname in fnames:
src = os.path.join(original_dataset_dir, fname)
dst = os.path.join(test_cats_dir, fname)
shutil.copyfile(src, dst)
# Copy first 1000 dog images to train_dogs_dir
fnames = ['dog.{}.jpg'.format(i) for i in range(1000)]
for fname in fnames:
src = os.path.join(original_dataset_dir, fname)
dst = os.path.join(train_dogs_dir, fname)
shutil.copyfile(src, dst)
# Copy next 500 dog images to validation_dogs_dir
fnames = ['dog.{}.jpg'.format(i) for i in range(1000, 1500)]
for fname in fnames:
src = os.path.join(original_dataset_dir, fname)
dst = os.path.join(validation_dogs_dir, fname)
shutil.copyfile(src, dst)
# Copy next 500 dog images to test_dogs_dir
fnames = ['dog.{}.jpg'.format(i) for i in range(1500, 2000)]
for fname in fnames:
src = os.path.join(original_dataset_dir, fname)
dst = os.path.join(test_dogs_dir, fname)
shutil.copyfile(src, dst)
print('total training cat images:', len(os.listdir(train_cats_dir)))
print('total training dog images:', len(os.listdir(train_dogs_dir)))
print('total validation cat images:', len(os.listdir(validation_cats_dir)))
print('total validation dog images:', len(os.listdir(validation_dogs_dir)))
print('total test cat images:', len(os.listdir(test_cats_dir)))
print('total test dog images:', len(os.listdir(test_dogs_dir)))
total training cat images: 1000 total training dog images: 1000 total validation cat images: 500 total validation dog images: 500 total test cat images: 500 total test dog images: 500
!rm -r train
from keras import layers
from keras import models
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu',
input_shape=(150, 150, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
from keras import optimizers
model.compile(loss='binary_crossentropy',
optimizer='rmsprop',
metrics=['acc'])
from keras.preprocessing.image import ImageDataGenerator
# All images will be rescaled by 1./255
train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)
train_generator = train_datagen.flow_from_directory(
# This is the target directory
train_dir,
# All images will be resized to 150x150
target_size=(150, 150),
batch_size=20,
# Since we use binary_crossentropy loss, we need binary labels
class_mode='binary')
validation_generator = test_datagen.flow_from_directory(
validation_dir,
target_size=(150, 150),
batch_size=20,
class_mode='binary')
Found 2000 images belonging to 2 classes. Found 1000 images belonging to 2 classes.
history = model.fit(
train_generator,
epochs=30,
validation_data=validation_generator)
Epoch 1/30 100/100 [==============================] - 23s 110ms/step - loss: 0.7299 - acc: 0.5080 - val_loss: 0.6926 - val_acc: 0.5000 Epoch 2/30 100/100 [==============================] - 11s 105ms/step - loss: 0.6938 - acc: 0.5170 - val_loss: 0.7003 - val_acc: 0.5000 Epoch 3/30 100/100 [==============================] - 12s 118ms/step - loss: 0.6823 - acc: 0.5585 - val_loss: 0.6757 - val_acc: 0.5510 Epoch 4/30 100/100 [==============================] - 10s 103ms/step - loss: 0.6558 - acc: 0.6235 - val_loss: 0.6358 - val_acc: 0.6350 Epoch 5/30 100/100 [==============================] - 11s 106ms/step - loss: 0.6038 - acc: 0.6820 - val_loss: 0.6054 - val_acc: 0.6630 Epoch 6/30 100/100 [==============================] - 11s 107ms/step - loss: 0.5573 - acc: 0.7050 - val_loss: 0.5927 - val_acc: 0.6830 Epoch 7/30 100/100 [==============================] - 11s 107ms/step - loss: 0.5088 - acc: 0.7405 - val_loss: 0.6323 - val_acc: 0.6240 Epoch 8/30 100/100 [==============================] - 11s 107ms/step - loss: 0.4596 - acc: 0.7880 - val_loss: 0.5591 - val_acc: 0.7100 Epoch 9/30 100/100 [==============================] - 10s 97ms/step - loss: 0.4107 - acc: 0.8070 - val_loss: 0.6288 - val_acc: 0.7130 Epoch 10/30 100/100 [==============================] - 10s 101ms/step - loss: 0.3348 - acc: 0.8575 - val_loss: 0.6814 - val_acc: 0.7130 Epoch 11/30 100/100 [==============================] - 11s 106ms/step - loss: 0.2490 - acc: 0.8935 - val_loss: 0.9510 - val_acc: 0.7060 Epoch 12/30 100/100 [==============================] - 11s 105ms/step - loss: 0.1812 - acc: 0.9235 - val_loss: 0.8586 - val_acc: 0.7360 Epoch 13/30 100/100 [==============================] - 11s 106ms/step - loss: 0.1575 - acc: 0.9470 - val_loss: 1.0982 - val_acc: 0.7360 Epoch 14/30 100/100 [==============================] - 10s 105ms/step - loss: 0.0951 - acc: 0.9720 - val_loss: 1.2492 - val_acc: 0.7060 Epoch 15/30 100/100 [==============================] - 12s 119ms/step - loss: 0.0825 - acc: 0.9750 - val_loss: 1.4846 - val_acc: 0.7230 Epoch 16/30 100/100 [==============================] - 10s 105ms/step - loss: 0.0463 - acc: 0.9835 - val_loss: 1.8739 - val_acc: 0.7130 Epoch 17/30 100/100 [==============================] - 11s 107ms/step - loss: 0.0973 - acc: 0.9800 - val_loss: 1.8018 - val_acc: 0.7310 Epoch 18/30 100/100 [==============================] - 11s 107ms/step - loss: 0.0480 - acc: 0.9845 - val_loss: 1.6225 - val_acc: 0.7250 Epoch 19/30 100/100 [==============================] - 11s 107ms/step - loss: 0.0297 - acc: 0.9895 - val_loss: 2.1245 - val_acc: 0.7350 Epoch 20/30 100/100 [==============================] - 10s 98ms/step - loss: 0.0304 - acc: 0.9915 - val_loss: 2.0890 - val_acc: 0.7230 Epoch 21/30 100/100 [==============================] - 10s 102ms/step - loss: 0.0658 - acc: 0.9855 - val_loss: 2.4989 - val_acc: 0.7110 Epoch 22/30 100/100 [==============================] - 11s 107ms/step - loss: 0.0291 - acc: 0.9900 - val_loss: 2.0622 - val_acc: 0.7240 Epoch 23/30 100/100 [==============================] - 11s 108ms/step - loss: 0.0100 - acc: 0.9975 - val_loss: 3.7533 - val_acc: 0.6810 Epoch 24/30 100/100 [==============================] - 11s 108ms/step - loss: 0.0371 - acc: 0.9925 - val_loss: 3.0020 - val_acc: 0.6980 Epoch 25/30 100/100 [==============================] - 11s 107ms/step - loss: 0.0120 - acc: 0.9960 - val_loss: 3.6558 - val_acc: 0.7010 Epoch 26/30 100/100 [==============================] - 12s 120ms/step - loss: 0.0575 - acc: 0.9870 - val_loss: 3.1962 - val_acc: 0.7090 Epoch 27/30 100/100 [==============================] - 12s 120ms/step - loss: 0.0427 - acc: 0.9915 - val_loss: 3.7945 - val_acc: 0.7240 Epoch 28/30 100/100 [==============================] - 12s 119ms/step - loss: 0.0188 - acc: 0.9975 - val_loss: 3.1029 - val_acc: 0.7180 Epoch 29/30 100/100 [==============================] - 12s 119ms/step - loss: 0.0284 - acc: 0.9925 - val_loss: 3.6895 - val_acc: 0.7040 Epoch 30/30 100/100 [==============================] - 10s 102ms/step - loss: 0.0594 - acc: 0.9895 - val_loss: 3.0475 - val_acc: 0.7230
import matplotlib.pyplot as plt
%matplotlib inline
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(len(acc))
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()