from theano.sandbox import cuda
cuda.use('gpu0')
Using gpu device 0: GeForce GTX TITAN X (CNMeM is enabled with initial size: 90.0% of memory, cuDNN 4007)
%matplotlib inline
from __future__ import print_function, division
path = "data/state/"
#path = "data/state/sample/"
import utils; reload(utils)
from utils import *
from IPython.display import FileLink
Using Theano backend.
batch_size=64
batches = get_batches(path+'train', batch_size=batch_size)
val_batches = get_batches(path+'valid', batch_size=batch_size*2, shuffle=False)
Found 18946 images belonging to 10 classes. Found 3478 images belonging to 10 classes.
(val_classes, trn_classes, val_labels, trn_labels,
val_filenames, filenames, test_filenames) = get_classes(path)
Found 18946 images belonging to 10 classes. Found 3478 images belonging to 10 classes. Found 79726 images belonging to 1 classes.
Rather than using batches, we could just import all the data into an array to save some processing time. (In most examples I'm using the batches, however - just because that's how I happened to start out.)
trn = get_data(path+'train')
val = get_data(path+'valid')
Found 18946 images belonging to 10 classes.
----------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) <ipython-input-53-b0afbfb481fe> in <module>() ----> 1 trn = get_data(path+'train') 2 val = get_data(path+'valid') /data/jhoward/fast-image/nbs/utils.pyc in get_data(path) 80 def get_data(path): 81 batches = get_batches(path, shuffle=False, batch_size=1, class_mode=None) ---> 82 return np.concatenate([batches.next() for i in range(batches.nb_sample)]) 83 84 /usr/local/lib/python2.7/dist-packages/keras/preprocessing/image.py in next(self) 605 img = load_img(os.path.join(self.directory, fname), grayscale=grayscale, target_size=self.target_size) 606 x = img_to_array(img, dim_ordering=self.dim_ordering) --> 607 x = self.image_data_generator.random_transform(x) 608 x = self.image_data_generator.standardize(x) 609 batch_x[i] = x /usr/local/lib/python2.7/dist-packages/keras/preprocessing/image.py in random_transform(self, x) 364 h, w = x.shape[img_row_index], x.shape[img_col_index] 365 transform_matrix = transform_matrix_offset_center(transform_matrix, h, w) --> 366 x = apply_transform(x, transform_matrix, img_channel_index, fill_mode=self.fill_mode, cval=self.cval) 367 368 if self.channel_shift_range != 0: /usr/local/lib/python2.7/dist-packages/keras/preprocessing/image.py in apply_transform(x, transform_matrix, channel_index, fill_mode, cval) 104 def apply_transform(x, transform_matrix, channel_index=0, fill_mode='nearest', cval=0.): 105 x = np.rollaxis(x, channel_index, 3) --> 106 x = cv2.warpAffine(x, transform_matrix[:2], (x.shape[1],x.shape[0])) 107 if len(x.shape)==2: x=np.expand_dims(x, 2) 108 x = np.rollaxis(x, 2, channel_index) KeyboardInterrupt:
save_array(path+'results/val.dat', val)
save_array(path+'results/trn.dat', trn)
val = load_array(path+'results/val.dat')
trn = load_array(path+'results/trn.dat')
We should find that everything that worked on the sample (see statefarm-sample.ipynb), works on the full dataset too. Only better! Because now we have more data. So let's see how they go - the models in this section are exact copies of the sample notebook models.
def conv1(batches):
model = Sequential([
BatchNormalization(axis=1, input_shape=(3,224,224)),
Convolution2D(32,3,3, activation='relu'),
BatchNormalization(axis=1),
MaxPooling2D((3,3)),
Convolution2D(64,3,3, activation='relu'),
BatchNormalization(axis=1),
MaxPooling2D((3,3)),
Flatten(),
Dense(200, activation='relu'),
BatchNormalization(),
Dense(10, activation='softmax')
])
model.compile(Adam(lr=1e-4), loss='categorical_crossentropy', metrics=['accuracy'])
model.fit_generator(batches, batches.nb_sample, nb_epoch=2, validation_data=val_batches,
nb_val_samples=val_batches.nb_sample)
model.optimizer.lr = 0.001
model.fit_generator(batches, batches.nb_sample, nb_epoch=4, validation_data=val_batches,
nb_val_samples=val_batches.nb_sample)
return model
model = conv1(batches)
Epoch 1/2 18946/18946 [==============================] - 114s - loss: 0.2273 - acc: 0.9405 - val_loss: 2.4946 - val_acc: 0.2826 Epoch 2/2 18946/18946 [==============================] - 114s - loss: 0.0120 - acc: 0.9990 - val_loss: 1.5872 - val_acc: 0.5253 Epoch 1/4 18946/18946 [==============================] - 114s - loss: 0.0093 - acc: 0.9992 - val_loss: 1.4836 - val_acc: 0.5825 Epoch 2/4 18946/18946 [==============================] - 114s - loss: 0.0032 - acc: 1.0000 - val_loss: 1.3142 - val_acc: 0.6162 Epoch 3/4 18946/18946 [==============================] - 114s - loss: 0.0035 - acc: 0.9996 - val_loss: 1.5061 - val_acc: 0.5771 Epoch 4/4 18946/18946 [==============================] - 114s - loss: 0.0036 - acc: 0.9997 - val_loss: 1.4528 - val_acc: 0.5808
Interestingly, with no regularization or augmentation we're getting some reasonable results from our simple convolutional model. So with augmentation, we hopefully will see some very good results.
gen_t = image.ImageDataGenerator(rotation_range=15, height_shift_range=0.05,
shear_range=0.1, channel_shift_range=20, width_shift_range=0.1)
batches = get_batches(path+'train', gen_t, batch_size=batch_size)
Found 18946 images belonging to 10 classes.
model = conv1(batches)
Epoch 1/2 18946/18946 [==============================] - 114s - loss: 1.2804 - acc: 0.5891 - val_loss: 2.0614 - val_acc: 0.3407 Epoch 2/2 18946/18946 [==============================] - 114s - loss: 0.6716 - acc: 0.7916 - val_loss: 1.3377 - val_acc: 0.6208 Epoch 1/4 18946/18946 [==============================] - 115s - loss: 0.4787 - acc: 0.8594 - val_loss: 1.2230 - val_acc: 0.6228 Epoch 2/4 18946/18946 [==============================] - 114s - loss: 0.3724 - acc: 0.8931 - val_loss: 1.3030 - val_acc: 0.6282 Epoch 3/4 18946/18946 [==============================] - 114s - loss: 0.3086 - acc: 0.9162 - val_loss: 1.1986 - val_acc: 0.7119 Epoch 4/4 18946/18946 [==============================] - 114s - loss: 0.2612 - acc: 0.9283 - val_loss: 1.4794 - val_acc: 0.5799
model.optimizer.lr = 0.0001
model.fit_generator(batches, batches.nb_sample, nb_epoch=15, validation_data=val_batches,
nb_val_samples=val_batches.nb_sample)
Epoch 1/15 18946/18946 [==============================] - 114s - loss: 0.2391 - acc: 0.9361 - val_loss: 1.2511 - val_acc: 0.6886 Epoch 2/15 18946/18946 [==============================] - 114s - loss: 0.2075 - acc: 0.9430 - val_loss: 1.1327 - val_acc: 0.7294 Epoch 3/15 18946/18946 [==============================] - 114s - loss: 0.1800 - acc: 0.9529 - val_loss: 1.1099 - val_acc: 0.7294 Epoch 4/15 18946/18946 [==============================] - 114s - loss: 0.1675 - acc: 0.9557 - val_loss: 1.0660 - val_acc: 0.7363 Epoch 5/15 18946/18946 [==============================] - 114s - loss: 0.1432 - acc: 0.9625 - val_loss: 1.1585 - val_acc: 0.7073 Epoch 6/15 18946/18946 [==============================] - 114s - loss: 0.1358 - acc: 0.9627 - val_loss: 1.1389 - val_acc: 0.6947 Epoch 7/15 18946/18946 [==============================] - 114s - loss: 0.1283 - acc: 0.9665 - val_loss: 1.1329 - val_acc: 0.7369 Epoch 8/15 18946/18946 [==============================] - 114s - loss: 0.1180 - acc: 0.9686 - val_loss: 1.1817 - val_acc: 0.7194 Epoch 9/15 18946/18946 [==============================] - 114s - loss: 0.1137 - acc: 0.9704 - val_loss: 1.0923 - val_acc: 0.7142 Epoch 10/15 18946/18946 [==============================] - 114s - loss: 0.1076 - acc: 0.9720 - val_loss: 1.0983 - val_acc: 0.7358 Epoch 11/15 18946/18946 [==============================] - 114s - loss: 0.1032 - acc: 0.9736 - val_loss: 1.0206 - val_acc: 0.7458 Epoch 12/15 18946/18946 [==============================] - 114s - loss: 0.0956 - acc: 0.9740 - val_loss: 0.9039 - val_acc: 0.7809 Epoch 13/15 18946/18946 [==============================] - 114s - loss: 0.0962 - acc: 0.9740 - val_loss: 1.3386 - val_acc: 0.6587 Epoch 14/15 18946/18946 [==============================] - 114s - loss: 0.0892 - acc: 0.9777 - val_loss: 1.1150 - val_acc: 0.7470 Epoch 15/15 18946/18946 [==============================] - 114s - loss: 0.0886 - acc: 0.9773 - val_loss: 1.9190 - val_acc: 0.5802
<keras.callbacks.History at 0x7f3b6b66f610>
I'm shocked by how good these results are! We're regularly seeing 75-80% accuracy on the validation set, which puts us into the top third or better of the competition. With such a simple model and no dropout or semi-supervised learning, this really speaks to the power of this approach to data augmentation.
Unfortunately, the results are still very unstable - the validation accuracy jumps from epoch to epoch. Perhaps a deeper model with some dropout would help.
gen_t = image.ImageDataGenerator(rotation_range=15, height_shift_range=0.05,
shear_range=0.1, channel_shift_range=20, width_shift_range=0.1)
batches = get_batches(path+'train', gen_t, batch_size=batch_size)
Found 18946 images belonging to 10 classes.
model = Sequential([
BatchNormalization(axis=1, input_shape=(3,224,224)),
Convolution2D(32,3,3, activation='relu'),
BatchNormalization(axis=1),
MaxPooling2D(),
Convolution2D(64,3,3, activation='relu'),
BatchNormalization(axis=1),
MaxPooling2D(),
Convolution2D(128,3,3, activation='relu'),
BatchNormalization(axis=1),
MaxPooling2D(),
Flatten(),
Dense(200, activation='relu'),
BatchNormalization(),
Dropout(0.5),
Dense(200, activation='relu'),
BatchNormalization(),
Dropout(0.5),
Dense(10, activation='softmax')
])
model.compile(Adam(lr=10e-5), loss='categorical_crossentropy', metrics=['accuracy'])
model.fit_generator(batches, batches.nb_sample, nb_epoch=2, validation_data=val_batches,
nb_val_samples=val_batches.nb_sample)
Epoch 1/2 18946/18946 [==============================] - 159s - loss: 2.6578 - acc: 0.2492 - val_loss: 1.8681 - val_acc: 0.3844 Epoch 2/2 18946/18946 [==============================] - 158s - loss: 1.8098 - acc: 0.4334 - val_loss: 1.3152 - val_acc: 0.5670
<keras.callbacks.History at 0x7f227f103ad0>
model.optimizer.lr=0.001
model.fit_generator(batches, batches.nb_sample, nb_epoch=10, validation_data=val_batches,
nb_val_samples=val_batches.nb_sample)
Epoch 1/10 18946/18946 [==============================] - 159s - loss: 1.4232 - acc: 0.5405 - val_loss: 1.0877 - val_acc: 0.6452 Epoch 2/10 18946/18946 [==============================] - 159s - loss: 1.1155 - acc: 0.6346 - val_loss: 1.2730 - val_acc: 0.6878 Epoch 3/10 18946/18946 [==============================] - 159s - loss: 0.9043 - acc: 0.7025 - val_loss: 1.1393 - val_acc: 0.6354 Epoch 4/10 18946/18946 [==============================] - 159s - loss: 0.7444 - acc: 0.7529 - val_loss: 1.1037 - val_acc: 0.7087 Epoch 5/10 18946/18946 [==============================] - 159s - loss: 0.6299 - acc: 0.7955 - val_loss: 0.9123 - val_acc: 0.7455 Epoch 6/10 18946/18946 [==============================] - 159s - loss: 0.5220 - acc: 0.8275 - val_loss: 1.0418 - val_acc: 0.7484 Epoch 7/10 18946/18946 [==============================] - 159s - loss: 0.4686 - acc: 0.8495 - val_loss: 1.2907 - val_acc: 0.6599 Epoch 8/10 18946/18946 [==============================] - 159s - loss: 0.4190 - acc: 0.8653 - val_loss: 1.1321 - val_acc: 0.6906 Epoch 9/10 18946/18946 [==============================] - 159s - loss: 0.3735 - acc: 0.8802 - val_loss: 1.1235 - val_acc: 0.7458 Epoch 10/10 18946/18946 [==============================] - 159s - loss: 0.3226 - acc: 0.8969 - val_loss: 1.2040 - val_acc: 0.7343
<keras.callbacks.History at 0x7f227f104d10>
model.optimizer.lr=0.00001
model.fit_generator(batches, batches.nb_sample, nb_epoch=10, validation_data=val_batches,
nb_val_samples=val_batches.nb_sample)
Epoch 1/10 18946/18946 [==============================] - 159s - loss: 0.3183 - acc: 0.8976 - val_loss: 1.0359 - val_acc: 0.7688 Epoch 2/10 18946/18946 [==============================] - 158s - loss: 0.2788 - acc: 0.9109 - val_loss: 1.5806 - val_acc: 0.6705 Epoch 3/10 18946/18946 [==============================] - 158s - loss: 0.2810 - acc: 0.9124 - val_loss: 0.9836 - val_acc: 0.7887 Epoch 4/10 18946/18946 [==============================] - 158s - loss: 0.2403 - acc: 0.9244 - val_loss: 1.1832 - val_acc: 0.7493 Epoch 5/10 18946/18946 [==============================] - 159s - loss: 0.2195 - acc: 0.9303 - val_loss: 1.1524 - val_acc: 0.7510 Epoch 6/10 18946/18946 [==============================] - 159s - loss: 0.2085 - acc: 0.9359 - val_loss: 1.2245 - val_acc: 0.7415 Epoch 7/10 18946/18946 [==============================] - 158s - loss: 0.1961 - acc: 0.9399 - val_loss: 1.1232 - val_acc: 0.7654 Epoch 8/10 18946/18946 [==============================] - 158s - loss: 0.1851 - acc: 0.9416 - val_loss: 1.0956 - val_acc: 0.6892 Epoch 9/10 18946/18946 [==============================] - 158s - loss: 0.1798 - acc: 0.9451 - val_loss: 1.0586 - val_acc: 0.7740 Epoch 10/10 18946/18946 [==============================] - 159s - loss: 0.1669 - acc: 0.9471 - val_loss: 1.4633 - val_acc: 0.6656
<keras.callbacks.History at 0x7f227f104ed0>
This is looking quite a bit better - the accuracy is similar, but the stability is higher. There's still some way to go however...
Since we have so little data, and it is similar to imagenet images (full color photos), using pre-trained VGG weights is likely to be helpful - in fact it seems likely that we won't need to fine-tune the convolutional layer weights much, if at all. So we can pre-compute the output of the last convolutional layer, as we did in lesson 3 when we experimented with dropout. (However this means that we can't use full data augmentation, since we can't pre-compute something that changes every image.)
vgg = Vgg16()
model=vgg.model
last_conv_idx = [i for i,l in enumerate(model.layers) if type(l) is Convolution2D][-1]
conv_layers = model.layers[:last_conv_idx+1]
conv_model = Sequential(conv_layers)
# batches shuffle must be set to False when pre-computing features
batches = get_batches(path+'train', batch_size=batch_size, shuffle=False)
(val_classes, trn_classes, val_labels, trn_labels,
val_filenames, filenames, test_filenames) = get_classes(path)
Found 18946 images belonging to 10 classes. Found 3478 images belonging to 10 classes. Found 79726 images belonging to 1 classes.
conv_feat = conv_model.predict_generator(batches, batches.nb_sample)
conv_val_feat = conv_model.predict_generator(val_batches, val_batches.nb_sample)
conv_test_feat = conv_model.predict_generator(test_batches, test_batches.nb_sample)
save_array(path+'results/conv_val_feat.dat', conv_val_feat)
save_array(path+'results/conv_test_feat.dat', conv_test_feat)
save_array(path+'results/conv_feat.dat', conv_feat)
conv_feat = load_array(path+'results/conv_feat.dat')
conv_val_feat = load_array(path+'results/conv_val_feat.dat')
conv_val_feat.shape
(3478, 512, 14, 14)
Since we've pre-computed the output of the last convolutional layer, we need to create a network that takes that as input, and predicts our 10 classes. Let's try using a simplified version of VGG's dense layers.
def get_bn_layers(p):
return [
MaxPooling2D(input_shape=conv_layers[-1].output_shape[1:]),
Flatten(),
Dropout(p/2),
Dense(128, activation='relu'),
BatchNormalization(),
Dropout(p/2),
Dense(128, activation='relu'),
BatchNormalization(),
Dropout(p),
Dense(10, activation='softmax')
]
p=0.8
bn_model = Sequential(get_bn_layers(p))
bn_model.compile(Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
bn_model.fit(conv_feat, trn_labels, batch_size=batch_size, nb_epoch=1,
validation_data=(conv_val_feat, val_labels))
Train on 18946 samples, validate on 3478 samples Epoch 1/1 18946/18946 [==============================] - 3s - loss: 1.5894 - acc: 0.5625 - val_loss: 0.7031 - val_acc: 0.7522
<keras.callbacks.History at 0x7fdfd921a690>
bn_model.optimizer.lr=0.01
bn_model.fit(conv_feat, trn_labels, batch_size=batch_size, nb_epoch=2,
validation_data=(conv_val_feat, val_labels))
Train on 18946 samples, validate on 3478 samples Epoch 1/2 18946/18946 [==============================] - 3s - loss: 0.2870 - acc: 0.9109 - val_loss: 0.7728 - val_acc: 0.7683 Epoch 2/2 18946/18946 [==============================] - 3s - loss: 0.1422 - acc: 0.9594 - val_loss: 0.7576 - val_acc: 0.7936
<keras.callbacks.History at 0x7fdfd921a8d0>
bn_model.save_weights(path+'models/conv8.h5')
Looking good! Let's try pre-computing 5 epochs worth of augmented data, so we can experiment with combining dropout and augmentation on the pre-trained model.
We'll use our usual data augmentation parameters:
gen_t = image.ImageDataGenerator(rotation_range=15, height_shift_range=0.05,
shear_range=0.1, channel_shift_range=20, width_shift_range=0.1)
da_batches = get_batches(path+'train', gen_t, batch_size=batch_size, shuffle=False)
Found 18946 images belonging to 10 classes.
We use those to create a dataset of convolutional features 5x bigger than the training set.
da_conv_feat = conv_model.predict_generator(da_batches, da_batches.nb_sample*5)
save_array(path+'results/da_conv_feat2.dat', da_conv_feat)
da_conv_feat = load_array(path+'results/da_conv_feat2.dat')
Let's include the real training data as well in its non-augmented form.
da_conv_feat = np.concatenate([da_conv_feat, conv_feat])
Since we've now got a dataset 6x bigger than before, we'll need to copy our labels 6 times too.
da_trn_labels = np.concatenate([trn_labels]*6)
Based on some experiments the previous model works well, with bigger dense layers.
def get_bn_da_layers(p):
return [
MaxPooling2D(input_shape=conv_layers[-1].output_shape[1:]),
Flatten(),
Dropout(p),
Dense(256, activation='relu'),
BatchNormalization(),
Dropout(p),
Dense(256, activation='relu'),
BatchNormalization(),
Dropout(p),
Dense(10, activation='softmax')
]
p=0.8
bn_model = Sequential(get_bn_da_layers(p))
bn_model.compile(Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
Now we can train the model as usual, with pre-computed augmented data.
bn_model.fit(da_conv_feat, da_trn_labels, batch_size=batch_size, nb_epoch=1,
validation_data=(conv_val_feat, val_labels))
Train on 113676 samples, validate on 3478 samples Epoch 1/1 113676/113676 [==============================] - 16s - loss: 1.5848 - acc: 0.5068 - val_loss: 0.6340 - val_acc: 0.8131
<keras.callbacks.History at 0x7fdd886a7c90>
bn_model.optimizer.lr=0.01
bn_model.fit(da_conv_feat, da_trn_labels, batch_size=batch_size, nb_epoch=4,
validation_data=(conv_val_feat, val_labels))
Train on 113676 samples, validate on 3478 samples Epoch 1/4 113676/113676 [==============================] - 16s - loss: 0.6652 - acc: 0.7785 - val_loss: 0.6343 - val_acc: 0.8082 Epoch 2/4 113676/113676 [==============================] - 16s - loss: 0.5247 - acc: 0.8318 - val_loss: 0.6951 - val_acc: 0.8085 Epoch 3/4 113676/113676 [==============================] - 16s - loss: 0.4553 - acc: 0.8544 - val_loss: 0.6067 - val_acc: 0.8189 Epoch 4/4 113676/113676 [==============================] - 16s - loss: 0.4127 - acc: 0.8686 - val_loss: 0.7701 - val_acc: 0.7915
<keras.callbacks.History at 0x7fdd88642490>
bn_model.optimizer.lr=0.0001
bn_model.fit(da_conv_feat, da_trn_labels, batch_size=batch_size, nb_epoch=4,
validation_data=(conv_val_feat, val_labels))
Train on 113676 samples, validate on 3478 samples Epoch 1/4 113676/113676 [==============================] - 16s - loss: 0.3837 - acc: 0.8775 - val_loss: 0.6904 - val_acc: 0.8197 Epoch 2/4 113676/113676 [==============================] - 16s - loss: 0.3576 - acc: 0.8872 - val_loss: 0.6593 - val_acc: 0.8209 Epoch 3/4 113676/113676 [==============================] - 16s - loss: 0.3384 - acc: 0.8939 - val_loss: 0.7057 - val_acc: 0.8085 Epoch 4/4 113676/113676 [==============================] - 16s - loss: 0.3254 - acc: 0.8977 - val_loss: 0.6867 - val_acc: 0.8128
<keras.callbacks.History at 0x7fdd88642710>
Looks good - let's save those weights.
bn_model.save_weights(path+'models/da_conv8_1.h5')
We're going to try using a combination of pseudo labeling and knowledge distillation to allow us to use unlabeled data (i.e. do semi-supervised learning). For our initial experiment we'll use the validation set as the unlabeled data, so that we can see that it is working without using the test set. At a later date we'll try using the test set.
To do this, we simply calculate the predictions of our model...
val_pseudo = bn_model.predict(conv_val_feat, batch_size=batch_size)
...concatenate them with our training labels...
comb_pseudo = np.concatenate([da_trn_labels, val_pseudo])
comb_feat = np.concatenate([da_conv_feat, conv_val_feat])
...and fine-tune our model using that data.
bn_model.load_weights(path+'models/da_conv8_1.h5')
bn_model.fit(comb_feat, comb_pseudo, batch_size=batch_size, nb_epoch=1,
validation_data=(conv_val_feat, val_labels))
Train on 117154 samples, validate on 3478 samples Epoch 1/1 117154/117154 [==============================] - 17s - loss: 0.3412 - acc: 0.8948 - val_loss: 0.7653 - val_acc: 0.8191
<keras.callbacks.History at 0x7fdd88642f50>
bn_model.fit(comb_feat, comb_pseudo, batch_size=batch_size, nb_epoch=4,
validation_data=(conv_val_feat, val_labels))
Train on 117154 samples, validate on 3478 samples Epoch 1/4 117154/117154 [==============================] - 17s - loss: 0.3237 - acc: 0.9008 - val_loss: 0.7536 - val_acc: 0.8229 Epoch 2/4 117154/117154 [==============================] - 17s - loss: 0.3076 - acc: 0.9050 - val_loss: 0.7572 - val_acc: 0.8235 Epoch 3/4 117154/117154 [==============================] - 17s - loss: 0.2984 - acc: 0.9085 - val_loss: 0.7852 - val_acc: 0.8269 Epoch 4/4 117154/117154 [==============================] - 17s - loss: 0.2902 - acc: 0.9117 - val_loss: 0.7630 - val_acc: 0.8263
<keras.callbacks.History at 0x7fdd89bdd210>
bn_model.optimizer.lr=0.00001
bn_model.fit(comb_feat, comb_pseudo, batch_size=batch_size, nb_epoch=4,
validation_data=(conv_val_feat, val_labels))
Train on 117154 samples, validate on 3478 samples Epoch 1/4 117154/117154 [==============================] - 17s - loss: 0.2837 - acc: 0.9134 - val_loss: 0.7901 - val_acc: 0.8200 Epoch 2/4 117154/117154 [==============================] - 17s - loss: 0.2760 - acc: 0.9155 - val_loss: 0.7648 - val_acc: 0.8275 Epoch 3/4 117154/117154 [==============================] - 17s - loss: 0.2723 - acc: 0.9183 - val_loss: 0.7382 - val_acc: 0.8358 Epoch 4/4 117154/117154 [==============================] - 17s - loss: 0.2657 - acc: 0.9191 - val_loss: 0.7227 - val_acc: 0.8329
<keras.callbacks.History at 0x7fdd89bb2890>
That's a distinct improvement - even although the validation set isn't very big. This looks encouraging for when we try this on the test set.
bn_model.save_weights(path+'models/bn-ps8.h5')
We'll find a good clipping amount using the validation set, prior to submitting.
def do_clip(arr, mx): return np.clip(arr, (1-mx)/9, mx)
keras.metrics.categorical_crossentropy(val_labels, do_clip(val_preds, 0.93)).eval()
array(0.6726388006592667)
conv_test_feat = load_array(path+'results/conv_test_feat.dat')
preds = bn_model.predict(conv_test_feat, batch_size=batch_size*2)
subm = do_clip(preds,0.93)
subm_name = path+'results/subm.gz'
classes = sorted(batches.class_indices, key=batches.class_indices.get)
submission = pd.DataFrame(subm, columns=classes)
submission.insert(0, 'img', [a[4:] for a in test_filenames])
submission.head()
img | c0 | c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8 | c9 | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | img_68347.jpg | 0.007778 | 0.007778 | 0.007778 | 0.007778 | 0.007778 | 0.007778 | 0.093739 | 0.815874 | 0.079049 | 0.007778 |
1 | img_55725.jpg | 0.007778 | 0.007778 | 0.007778 | 0.007778 | 0.007778 | 0.007778 | 0.007778 | 0.007778 | 0.930000 | 0.007778 |
2 | img_92799.jpg | 0.007778 | 0.930000 | 0.017918 | 0.007778 | 0.007778 | 0.007778 | 0.009022 | 0.007778 | 0.007778 | 0.007778 |
3 | img_72170.jpg | 0.007778 | 0.007778 | 0.363869 | 0.007778 | 0.007778 | 0.007778 | 0.200521 | 0.007778 | 0.425176 | 0.007778 |
4 | img_59154.jpg | 0.695756 | 0.007778 | 0.007778 | 0.007778 | 0.007778 | 0.007778 | 0.007778 | 0.007778 | 0.047384 | 0.249183 |
submission.to_csv(subm_name, index=False, compression='gzip')
FileLink(subm_name)
This gets 0.534 on the leaderboard.
You can safely ignore everything from here on, because they didn't really help.
for l in get_bn_layers(p): conv_model.add(l)
for l1,l2 in zip(bn_model.layers, conv_model.layers[last_conv_idx+1:]):
l2.set_weights(l1.get_weights())
for l in conv_model.layers: l.trainable =False
for l in conv_model.layers[last_conv_idx+1:]: l.trainable =True
comb = np.concatenate([trn, val])
gen_t = image.ImageDataGenerator(rotation_range=8, height_shift_range=0.04,
shear_range=0.03, channel_shift_range=10, width_shift_range=0.08)
batches = gen_t.flow(comb, comb_pseudo, batch_size=batch_size)
----------------------------------------------------------------- Exception Traceback (most recent call last) <ipython-input-38-8e21fbf7f6e7> in <module>() ----> 1 batches = gen_t.flow(comb, comb_pseudo, batch_size=batch_size) /usr/local/lib/python2.7/dist-packages/keras/preprocessing/image.py in flow(self, X, y, batch_size, shuffle, seed, save_to_dir, save_prefix, save_format) 274 batch_size=batch_size, shuffle=shuffle, seed=seed, 275 dim_ordering=self.dim_ordering, --> 276 save_to_dir=save_to_dir, save_prefix=save_prefix, save_format=save_format) 277 278 def flow_from_directory(self, directory, /usr/local/lib/python2.7/dist-packages/keras/preprocessing/image.py in __init__(self, X, y, image_data_generator, batch_size, shuffle, seed, dim_ordering, save_to_dir, save_prefix, save_format) 473 raise Exception('X (images tensor) and y (labels) ' 474 'should have the same length. ' --> 475 'Found: X.shape = %s, y.shape = %s' % (np.asarray(X).shape, np.asarray(y).shape)) 476 if dim_ordering == 'default': 477 dim_ordering = K.image_dim_ordering() Exception: X (images tensor) and y (labels) should have the same length. Found: X.shape = (22424, 3, 224, 224), y.shape = (98208, 10)
val_batches = get_batches(path+'valid', batch_size=batch_size*2, shuffle=False)
Found 3478 images belonging to 10 classes.
conv_model.compile(Adam(lr=0.00001), loss='categorical_crossentropy', metrics=['accuracy'])
conv_model.fit_generator(batches, batches.N, nb_epoch=1, validation_data=val_batches,
nb_val_samples=val_batches.N)
Epoch 1/1 22400/22424 [============================>.] - ETA: 0s - loss: 0.4348 - acc: 0.9200
----------------------------------------------------------------- MemoryError Traceback (most recent call last) <ipython-input-178-50c2f05dc6a4> in <module>() 1 conv_model.fit_generator(batches, batches.N, nb_epoch=1, validation_data=val_batches, ----> 2 nb_val_samples=val_batches.N) /usr/local/lib/python2.7/dist-packages/keras/models.pyc in fit_generator(self, generator, samples_per_epoch, nb_epoch, verbose, callbacks, validation_data, nb_val_samples, class_weight, max_q_size, nb_worker, pickle_safe, **kwargs) 872 max_q_size=max_q_size, 873 nb_worker=nb_worker, --> 874 pickle_safe=pickle_safe) 875 876 def evaluate_generator(self, generator, val_samples, max_q_size=10, nb_worker=1, pickle_safe=False, **kwargs): /usr/local/lib/python2.7/dist-packages/keras/engine/training.pyc in fit_generator(self, generator, samples_per_epoch, nb_epoch, verbose, callbacks, validation_data, nb_val_samples, class_weight, max_q_size, nb_worker, pickle_safe) 1469 val_outs = self.evaluate_generator(validation_data, 1470 nb_val_samples, -> 1471 max_q_size=max_q_size) 1472 else: 1473 # no need for try/except because /usr/local/lib/python2.7/dist-packages/keras/engine/training.pyc in evaluate_generator(self, generator, val_samples, max_q_size, nb_worker, pickle_safe) 1552 'or (x, y). Found: ' + str(generator_output)) 1553 try: -> 1554 outs = self.test_on_batch(x, y, sample_weight=sample_weight) 1555 except: 1556 _stop.set() /usr/local/lib/python2.7/dist-packages/keras/engine/training.pyc in test_on_batch(self, x, y, sample_weight) 1257 ins = x + y + sample_weights 1258 self._make_test_function() -> 1259 outputs = self.test_function(ins) 1260 if len(outputs) == 1: 1261 return outputs[0] /usr/local/lib/python2.7/dist-packages/keras/backend/theano_backend.pyc in __call__(self, inputs) 715 def __call__(self, inputs): 716 assert type(inputs) in {list, tuple} --> 717 return self.function(*inputs) 718 719 /usr/local/lib/python2.7/dist-packages/theano/compile/function_module.pyc in __call__(self, *args, **kwargs) 869 node=self.fn.nodes[self.fn.position_of_error], 870 thunk=thunk, --> 871 storage_map=getattr(self.fn, 'storage_map', None)) 872 else: 873 # old-style linkers raise their own exceptions /usr/local/lib/python2.7/dist-packages/theano/gof/link.pyc in raise_with_op(node, thunk, exc_info, storage_map) 312 # extra long error message in that case. 313 pass --> 314 reraise(exc_type, exc_value, exc_trace) 315 316 /usr/local/lib/python2.7/dist-packages/theano/compile/function_module.pyc in __call__(self, *args, **kwargs) 857 t0_fn = time.time() 858 try: --> 859 outputs = self.fn() 860 except Exception: 861 if hasattr(self.fn, 'position_of_error'): MemoryError: Error allocating 1644167168 bytes of device memory (CNMEM_STATUS_OUT_OF_MEMORY). Apply node that caused the error: GpuAllocEmpty(Shape_i{0}.0, Shape_i{0}.0, Elemwise{Composite{(((i0 - i1) // i2) + i2)}}[(0, 1)].0, Elemwise{Composite{(((i0 - i1) // i2) + i2)}}[(0, 1)].0) Toposort index: 157 Inputs types: [TensorType(int64, scalar), TensorType(int64, scalar), TensorType(int64, scalar), TensorType(int64, scalar)] Inputs shapes: [(), (), (), ()] Inputs strides: [(), (), (), ()] Inputs values: [array(128), array(64), array(224), array(224)] Outputs clients: [[GpuDnnConv{algo='small', inplace=True}(GpuContiguous.0, GpuContiguous.0, GpuAllocEmpty.0, GpuDnnConvDesc{border_mode='valid', subsample=(1, 1), conv_mode='conv', precision='float32'}.0, Constant{1.0}, Constant{0.0})]] HINT: Re-running with most Theano optimization disabled could give you a back-trace of when this node was created. This can be done with by setting the Theano flag 'optimizer=fast_compile'. If that does not work, Theano optimizations can be disabled with 'optimizer=None'. HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.
conv_model.optimizer.lr = 0.0001
conv_model.fit_generator(batches, batches.N, nb_epoch=3, validation_data=val_batches,
nb_val_samples=val_batches.N)
for l in conv_model.layers[16:]: l.trainable =True
conv_model.optimizer.lr = 0.00001
conv_model.fit_generator(batches, batches.N, nb_epoch=8, validation_data=val_batches,
nb_val_samples=val_batches.N)
conv_model.save_weights(path+'models/conv8_ps.h5')
conv_model.load_weights(path+'models/conv8_da.h5')
val_pseudo = conv_model.predict(val, batch_size=batch_size*2)
save_array(path+'models/pseudo8_da.dat', val_pseudo)
drivers_ds = pd.read_csv(path+'driver_imgs_list.csv')
drivers_ds.head()
subject | classname | img | |
---|---|---|---|
0 | p002 | c0 | img_44733.jpg |
1 | p002 | c0 | img_72999.jpg |
2 | p002 | c0 | img_25094.jpg |
3 | p002 | c0 | img_69092.jpg |
4 | p002 | c0 | img_92629.jpg |
img2driver = drivers_ds.set_index('img')['subject'].to_dict()
driver2imgs = {k: g["img"].tolist()
for k,g in drivers_ds[['subject', 'img']].groupby("subject")}
def get_idx(driver_list):
return [i for i,f in enumerate(filenames) if img2driver[f[3:]] in driver_list]
drivers = driver2imgs.keys()
rnd_drivers = np.random.permutation(drivers)
ds1 = rnd_drivers[:len(rnd_drivers)//2]
ds2 = rnd_drivers[len(rnd_drivers)//2:]
models=[fit_conv([d]) for d in drivers]
models=[m for m in models if m is not None]
all_preds = np.stack([m.predict(conv_test_feat, batch_size=128) for m in models])
avg_preds = all_preds.mean(axis=0)
avg_preds = avg_preds/np.expand_dims(avg_preds.sum(axis=1), 1)
keras.metrics.categorical_crossentropy(val_labels, np.clip(avg_val_preds,0.01,0.99)).eval()
array(0.9753041572894531)
keras.metrics.categorical_accuracy(val_labels, np.clip(avg_val_preds,0.01,0.99)).eval()
array(0.6949396133422852, dtype=float32)