Notebook

In [1]:

# boilerplate code
import tensorflow as tf
print(tf.__version__)

2.0.0-alpha0

In [2]:

from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, Layer, MaxPool2D,  \
    Dropout, Dense, Input, concatenate,      \
    GlobalAveragePooling2D, AveragePooling2D,\
    Flatten, BatchNormalization

import cv2 #python -m pip install opencv-python
import numpy as np
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.utils import to_categorical
from tensorflow import keras

import math
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.callbacks import LearningRateScheduler

In [3]:

num_classes = 10

In [4]:

def load_cifar10_data(img_rows, img_cols):
    # Load cifar10 training and test sets
    (X_train, Y_train), (X_test, Y_test) = cifar10.load_data()

    # Resize training images
    X_train = np.array([cv2.resize(img, (img_rows, img_cols)) for img in X_train[:, :, :, :]])
    X_test = np.array([cv2.resize(img, (img_rows, img_cols)) for img in X_test[:, :, :, :]])

    X_train = X_train.astype('float16') / 255.0
    X_test = X_test.astype('float16') / 255.0

    # Transform targets to keras compatible format
    Y_train = to_categorical(Y_train, num_classes)
    Y_test = to_categorical(Y_test, num_classes)

    print("X_train: {0}".format(X_train.shape))
    print("Y_train: {0}".format(Y_train.shape))
    print("X_test: {0}".format(X_test.shape))
    print("Y_test: {0}".format(Y_test.shape))

    return X_train, Y_train, X_test, Y_test

In [5]:

X_train, y_train, X_test, y_test = load_cifar10_data(224, 224)

X_train: (50000, 224, 224, 3)
Y_train: (50000, 10)
X_test: (10000, 224, 224, 3)
Y_test: (10000, 10)

In [6]:

kernel_init = tf.keras.initializers.glorot_uniform()
bias_init = tf.keras.initializers.Constant(value=0.2)

In [8]:

from functools import partial

DefaultConv2D = partial(
    Conv2D, 
    kernel_size=(3, 3), 
    strides=(1, 1),
    padding="SAME",
    kernel_initializer=kernel_init,
    bias_initializer=bias_init
)

In [9]:

class ResidualUnit(Layer):
    def __init__(self, filters, strides=1, activation="relu", **kwargs):
        super().__init__(**kwargs)
        self.activation = keras.activations.get(activation)
        self.main_layers = [
            DefaultConv2D(filters, strides=strides),
            keras.layers.BatchNormalization(),
            self.activation,
            DefaultConv2D(filters),
            keras.layers.BatchNormalization()
        ]

        if strides > 1:
            self.skip_layers = [
                DefaultConv2D(filters, kernel_size=1, strides=strides),
                keras.layers.BatchNormalization()
            ]
        else:
            self.skip_layers = []

    def call(self, inputs):
        Z = inputs

        for layer in self.main_layers:
            Z = layer(Z)
        
        skip_Z = inputs    
        for layer in self.skip_layers:
            skip_Z = layer(skip_Z)
            
        return self.activation(Z + skip_Z)

In [10]:

model = keras.models.Sequential(name='resnet_34')

model.add(DefaultConv2D(
    64, kernel_size=7, strides=2, input_shape=[224, 224, 3]
))

model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Activation("relu"))
model.add(keras.layers.MaxPool2D(pool_size=3, strides=2, padding="SAME"))

prev_filters = 64
for filters in [64] * 3 + [128] * 4 + [256] * 6 + [512] * 3:
    strides = 1 if filters == prev_filters else 2
    model.add(ResidualUnit(filters, strides=strides))
    prev_filters = filters
    
model.add(keras.layers.GlobalAvgPool2D())
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(10, activation="softmax"))
model.summary()

Model: "resnet_34"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
conv2d (Conv2D)              (None, 112, 112, 64)      9472      
_________________________________________________________________
batch_normalization_v2 (Batc (None, 112, 112, 64)      256       
_________________________________________________________________
activation (Activation)      (None, 112, 112, 64)      0         
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 56, 56, 64)        0         
_________________________________________________________________
residual_unit (ResidualUnit) (None, 56, 56, 64)        74368     
_________________________________________________________________
residual_unit_1 (ResidualUni (None, 56, 56, 64)        74368     
_________________________________________________________________
residual_unit_2 (ResidualUni (None, 56, 56, 64)        74368     
_________________________________________________________________
residual_unit_3 (ResidualUni (None, 28, 28, 128)       231296    
_________________________________________________________________
residual_unit_4 (ResidualUni (None, 28, 28, 128)       296192    
_________________________________________________________________
residual_unit_5 (ResidualUni (None, 28, 28, 128)       296192    
_________________________________________________________________
residual_unit_6 (ResidualUni (None, 28, 28, 128)       296192    
_________________________________________________________________
residual_unit_7 (ResidualUni (None, 14, 14, 256)       921344    
_________________________________________________________________
residual_unit_8 (ResidualUni (None, 14, 14, 256)       1182208   
_________________________________________________________________
residual_unit_9 (ResidualUni (None, 14, 14, 256)       1182208   
_________________________________________________________________
residual_unit_10 (ResidualUn (None, 14, 14, 256)       1182208   
_________________________________________________________________
residual_unit_11 (ResidualUn (None, 14, 14, 256)       1182208   
_________________________________________________________________
residual_unit_12 (ResidualUn (None, 14, 14, 256)       1182208   
_________________________________________________________________
residual_unit_13 (ResidualUn (None, 7, 7, 512)         3677696   
_________________________________________________________________
residual_unit_14 (ResidualUn (None, 7, 7, 512)         4723712   
_________________________________________________________________
residual_unit_15 (ResidualUn (None, 7, 7, 512)         4723712   
_________________________________________________________________
global_average_pooling2d (Gl (None, 512)               0         
_________________________________________________________________
flatten (Flatten)            (None, 512)               0         
_________________________________________________________________
dense (Dense)                (None, 10)                5130      
=================================================================
Total params: 21,315,338
Trainable params: 21,298,314
Non-trainable params: 17,024
_________________________________________________________________

In [12]:

initial_lrate = 0.01

def decay(epoch, steps=100):
    drop = 0.96
    epochs_drop = 8
    lrate = initial_lrate * math.pow(drop, math.floor((1 + epoch) / epochs_drop))
    return lrate

lr_sc = LearningRateScheduler(decay, verbose=1)

sgd = SGD(lr=initial_lrate, momentum=0.9, nesterov=True)

model.compile(
    loss='categorical_crossentropy',
    optimizer=sgd,
    metrics=['accuracy']
)

epochs = 35

history = model.fit(
    x=X_train,
    y=y_train,
    validation_data=(X_test, y_test),
    epochs=epochs, batch_size=256, callbacks=[lr_sc]
)

Train on 50000 samples, validate on 10000 samples

Epoch 00001: LearningRateScheduler reducing learning rate to 0.01.
Epoch 1/35
50000/50000 [==============================] - 205s 4ms/sample - loss: 1.4258 - accuracy: 0.4903 - val_loss: 1.9112 - val_accuracy: 0.3711

Epoch 00002: LearningRateScheduler reducing learning rate to 0.01.
Epoch 2/35
50000/50000 [==============================] - 193s 4ms/sample - loss: 0.8504 - accuracy: 0.6988 - val_loss: 1.1408 - val_accuracy: 0.6201

Epoch 00003: LearningRateScheduler reducing learning rate to 0.01.
Epoch 3/35
50000/50000 [==============================] - 192s 4ms/sample - loss: 0.5902 - accuracy: 0.7951 - val_loss: 1.1549 - val_accuracy: 0.6433

Epoch 00004: LearningRateScheduler reducing learning rate to 0.01.
Epoch 4/35
50000/50000 [==============================] - 190s 4ms/sample - loss: 0.4049 - accuracy: 0.8586 - val_loss: 1.1226 - val_accuracy: 0.6340

Epoch 00005: LearningRateScheduler reducing learning rate to 0.01.
Epoch 5/35
50000/50000 [==============================] - 190s 4ms/sample - loss: 0.2547 - accuracy: 0.9108 - val_loss: 1.6556 - val_accuracy: 0.6001

Epoch 00006: LearningRateScheduler reducing learning rate to 0.01.
Epoch 6/35
50000/50000 [==============================] - 190s 4ms/sample - loss: 0.1696 - accuracy: 0.9421 - val_loss: 1.8149 - val_accuracy: 0.6156

Epoch 00007: LearningRateScheduler reducing learning rate to 0.01.
Epoch 7/35
50000/50000 [==============================] - 189s 4ms/sample - loss: 0.1107 - accuracy: 0.9640 - val_loss: 1.5841 - val_accuracy: 0.6699

Epoch 00008: LearningRateScheduler reducing learning rate to 0.0096.
Epoch 8/35
50000/50000 [==============================] - 189s 4ms/sample - loss: 0.0691 - accuracy: 0.9771 - val_loss: 2.2737 - val_accuracy: 0.6329

Epoch 00009: LearningRateScheduler reducing learning rate to 0.0096.
Epoch 9/35
50000/50000 [==============================] - 189s 4ms/sample - loss: 0.0437 - accuracy: 0.9857 - val_loss: 2.2252 - val_accuracy: 0.6192

Epoch 00010: LearningRateScheduler reducing learning rate to 0.0096.
Epoch 10/35
50000/50000 [==============================] - 189s 4ms/sample - loss: 0.0300 - accuracy: 0.9906 - val_loss: 1.1013 - val_accuracy: 0.7675

Epoch 00011: LearningRateScheduler reducing learning rate to 0.0096.
Epoch 11/35
50000/50000 [==============================] - 190s 4ms/sample - loss: 0.0163 - accuracy: 0.9953 - val_loss: 1.4369 - val_accuracy: 0.7379

Epoch 00012: LearningRateScheduler reducing learning rate to 0.0096.
Epoch 12/35
50000/50000 [==============================] - 190s 4ms/sample - loss: 0.0141 - accuracy: 0.9957 - val_loss: 1.4038 - val_accuracy: 0.7509

Epoch 00013: LearningRateScheduler reducing learning rate to 0.0096.
Epoch 13/35
50000/50000 [==============================] - 190s 4ms/sample - loss: 0.0129 - accuracy: 0.9964 - val_loss: 1.4215 - val_accuracy: 0.7438

Epoch 00014: LearningRateScheduler reducing learning rate to 0.0096.
Epoch 14/35
50000/50000 [==============================] - 190s 4ms/sample - loss: 0.0067 - accuracy: 0.9983 - val_loss: 1.2960 - val_accuracy: 0.7695

Epoch 00015: LearningRateScheduler reducing learning rate to 0.0096.
Epoch 15/35
50000/50000 [==============================] - 190s 4ms/sample - loss: 0.0014 - accuracy: 0.9999 - val_loss: 0.9393 - val_accuracy: 0.8182

Epoch 00016: LearningRateScheduler reducing learning rate to 0.009216.
Epoch 16/35
50000/50000 [==============================] - 191s 4ms/sample - loss: 3.6020e-04 - accuracy: 1.0000 - val_loss: 0.9105 - val_accuracy: 0.8231

Epoch 00017: LearningRateScheduler reducing learning rate to 0.009216.
Epoch 17/35
50000/50000 [==============================] - 190s 4ms/sample - loss: 2.1701e-04 - accuracy: 1.0000 - val_loss: 0.9101 - val_accuracy: 0.8233

Epoch 00018: LearningRateScheduler reducing learning rate to 0.009216.
Epoch 18/35
50000/50000 [==============================] - 190s 4ms/sample - loss: 1.7623e-04 - accuracy: 1.0000 - val_loss: 0.9136 - val_accuracy: 0.8238

Epoch 00019: LearningRateScheduler reducing learning rate to 0.009216.
Epoch 19/35
50000/50000 [==============================] - 191s 4ms/sample - loss: 1.5298e-04 - accuracy: 1.0000 - val_loss: 0.9170 - val_accuracy: 0.8248

Epoch 00020: LearningRateScheduler reducing learning rate to 0.009216.
Epoch 20/35
50000/50000 [==============================] - 190s 4ms/sample - loss: 1.3303e-04 - accuracy: 1.0000 - val_loss: 0.9178 - val_accuracy: 0.8265

Epoch 00021: LearningRateScheduler reducing learning rate to 0.009216.
Epoch 21/35
50000/50000 [==============================] - 189s 4ms/sample - loss: 1.2130e-04 - accuracy: 1.0000 - val_loss: 0.9188 - val_accuracy: 0.8253

Epoch 00022: LearningRateScheduler reducing learning rate to 0.009216.
Epoch 22/35
50000/50000 [==============================] - 190s 4ms/sample - loss: 1.0718e-04 - accuracy: 1.0000 - val_loss: 0.9209 - val_accuracy: 0.8254

Epoch 00023: LearningRateScheduler reducing learning rate to 0.009216.
Epoch 23/35
50000/50000 [==============================] - 189s 4ms/sample - loss: 9.9417e-05 - accuracy: 1.0000 - val_loss: 0.9238 - val_accuracy: 0.8266

Epoch 00024: LearningRateScheduler reducing learning rate to 0.008847359999999999.
Epoch 24/35
50000/50000 [==============================] - 190s 4ms/sample - loss: 9.1316e-05 - accuracy: 1.0000 - val_loss: 0.9236 - val_accuracy: 0.8266

Epoch 00025: LearningRateScheduler reducing learning rate to 0.008847359999999999.
Epoch 25/35
50000/50000 [==============================] - 190s 4ms/sample - loss: 8.8492e-05 - accuracy: 1.0000 - val_loss: 0.9260 - val_accuracy: 0.8270

Epoch 00026: LearningRateScheduler reducing learning rate to 0.008847359999999999.
Epoch 26/35
50000/50000 [==============================] - 190s 4ms/sample - loss: 8.4004e-05 - accuracy: 1.0000 - val_loss: 0.9270 - val_accuracy: 0.8264

Epoch 00027: LearningRateScheduler reducing learning rate to 0.008847359999999999.
Epoch 27/35
50000/50000 [==============================] - 190s 4ms/sample - loss: 7.6821e-05 - accuracy: 1.0000 - val_loss: 0.9286 - val_accuracy: 0.8264

Epoch 00028: LearningRateScheduler reducing learning rate to 0.008847359999999999.
Epoch 28/35
50000/50000 [==============================] - 190s 4ms/sample - loss: 7.1725e-05 - accuracy: 1.0000 - val_loss: 0.9297 - val_accuracy: 0.8265

Epoch 00029: LearningRateScheduler reducing learning rate to 0.008847359999999999.
Epoch 29/35
50000/50000 [==============================] - 190s 4ms/sample - loss: 6.9910e-05 - accuracy: 1.0000 - val_loss: 0.9298 - val_accuracy: 0.8270

Epoch 00030: LearningRateScheduler reducing learning rate to 0.008847359999999999.
Epoch 30/35
50000/50000 [==============================] - 190s 4ms/sample - loss: 6.7411e-05 - accuracy: 1.0000 - val_loss: 0.9316 - val_accuracy: 0.8272

Epoch 00031: LearningRateScheduler reducing learning rate to 0.008847359999999999.
Epoch 31/35
50000/50000 [==============================] - 190s 4ms/sample - loss: 6.2929e-05 - accuracy: 1.0000 - val_loss: 0.9318 - val_accuracy: 0.8275

Epoch 00032: LearningRateScheduler reducing learning rate to 0.008493465599999998.
Epoch 32/35
50000/50000 [==============================] - 190s 4ms/sample - loss: 5.9789e-05 - accuracy: 1.0000 - val_loss: 0.9338 - val_accuracy: 0.8272

Epoch 00033: LearningRateScheduler reducing learning rate to 0.008493465599999998.
Epoch 33/35
50000/50000 [==============================] - 189s 4ms/sample - loss: 5.8887e-05 - accuracy: 1.0000 - val_loss: 0.9339 - val_accuracy: 0.8267

Epoch 00034: LearningRateScheduler reducing learning rate to 0.008493465599999998.
Epoch 34/35
50000/50000 [==============================] - 189s 4ms/sample - loss: 5.7854e-05 - accuracy: 1.0000 - val_loss: 0.9351 - val_accuracy: 0.8264

Epoch 00035: LearningRateScheduler reducing learning rate to 0.008493465599999998.
Epoch 35/35
50000/50000 [==============================] - 190s 4ms/sample - loss: 5.6941e-05 - accuracy: 1.0000 - val_loss: 0.9361 - val_accuracy: 0.8266

In [ ]: