Notebook

Intro¶

In the previous blog post, we talked about the intuition behind GANs (Generative Adversarial Networks), how they work, and how to create a simple GAN model capable of learning how to generate images that look a lot like images from the MNIST and SVHNs datasets. Now, let us turn the wheels a little and talk about one of the most prominent applications of GANs, semi-supervised learning.

In [1]:

# Lets start by loading the necessary libraries
%matplotlib inline

import pickle as pkl
import time
import matplotlib.pyplot as plt
import numpy as np
from scipy.io import loadmat
import tensorflow as tf
import os
# Use second GPU -- change if you want to use a first one
os.environ["CUDA_VISIBLE_DEVICES"] = '0'

In [2]:

!mkdir data

mkdir: cannot create directory ‘data’: File exists

For these first cells, we are going to download the SVHN dataset files and preprocess them.

In [3]:

from urllib.request import urlretrieve
from os.path import isfile, isdir
from tqdm import tqdm

data_dir = 'data/'

if not isdir(data_dir):
    raise Exception("Data directory doesn't exist!")

class DLProgress(tqdm):
    last_block = 0

    def hook(self, block_num=1, block_size=1, total_size=None):
        self.total = total_size
        self.update((block_num - self.last_block) * block_size)
        self.last_block = block_num

if not isfile(data_dir + "train_32x32.mat"):
    with DLProgress(unit='B', unit_scale=True, miniters=1, desc='SVHN Training Set') as pbar:
        urlretrieve(
            'http://ufldl.stanford.edu/housenumbers/train_32x32.mat',
            data_dir + 'train_32x32.mat',
            pbar.hook)

if not isfile(data_dir + "test_32x32.mat"):
    with DLProgress(unit='B', unit_scale=True, miniters=1, desc='SVHN Training Set') as pbar:
        urlretrieve(
            'http://ufldl.stanford.edu/housenumbers/test_32x32.mat',
            data_dir + 'test_32x32.mat',
            pbar.hook)

In [4]:

# Load the training and testing datasets
trainset = loadmat(data_dir + 'train_32x32.mat')
testset = loadmat(data_dir + 'test_32x32.mat')
print("trainset shape:", trainset['X'].shape)
print("testset shape:", testset['X'].shape)

trainset shape: (32, 32, 3, 73257)
testset shape: (32, 32, 3, 26032)

In [5]:

# Let's just take a pick in some of the SVHN samples the network will process
idx = np.random.randint(0, trainset['X'].shape[3], size=36)
fig, axes = plt.subplots(6, 6, sharex=True, sharey=True, figsize=(5,5),)
for ii, ax in zip(idx, axes.flatten()):
    ax.imshow(trainset['X'][:,:,:,ii], aspect='equal')
    ax.xaxis.set_visible(False)
    ax.yaxis.set_visible(False)
plt.subplots_adjust(wspace=0, hspace=0)

Scales the image pixel values to the range of -1 to 1. That is necessary because the Generator net outputs values squashed between this range through the Tanh() function.

In [6]:

def scale(x, feature_range=(-1, 1)):
    # scale to (0, 1)
    x = ((x - x.min())/(255 - x.min()))
    
    # scale to feature_range
    min, max = feature_range
    x = x * (max - min) + min
    return x

In [7]:

class Dataset:
    def __init__(self, train, test, val_frac=0.5, shuffle=True, scale_func=None):
        split_idx = int(len(test['y'])*(1 - val_frac))
        self.test_x, self.valid_x = test['X'][:,:,:,:split_idx], test['X'][:,:,:,split_idx:]
        self.test_y, self.valid_y = test['y'][:split_idx], test['y'][split_idx:]
        self.train_x, self.train_y = train['X'], train['y']
        # The SVHN dataset comes with lots of labels, but for the purpose of this exercise,
        # we will pretend that there are only 1000.
        # We use this mask to say which labels we will allow ourselves to use.
        self.label_mask = np.zeros_like(self.train_y)
        self.label_mask[0:1000] = 1
        
        # Roll the specified axis backwards, until it lies in a given position.
        # From (32, 32, 3, 73257) to (73257, 32, 32, 3)
        self.train_x = np.rollaxis(self.train_x, axis=3)
        self.valid_x = np.rollaxis(self.valid_x, axis=3)
        self.test_x = np.rollaxis(self.test_x, axis=3)
        
        if scale_func is None:
            self.scaler = scale
        else:
            self.scaler = scale_func
        self.train_x = self.scaler(self.train_x)
        self.valid_x = self.scaler(self.valid_x)
        self.test_x = self.scaler(self.test_x)
        self.shuffle = shuffle
        
    def batches(self, batch_size, which_set="train"):
        x_name = which_set + "_x"
        y_name = which_set + "_y"

        # Return the value of the named attribute of object
        num_examples = len(getattr(dataset, y_name))
        if self.shuffle:
            idx = np.arange(num_examples)
            np.random.shuffle(idx)
            setattr(dataset, x_name, getattr(dataset, x_name)[idx])
            setattr(dataset, y_name, getattr(dataset, y_name)[idx])
            if which_set == "train":
                dataset.label_mask = dataset.label_mask[idx]
        
        dataset_x = getattr(dataset, x_name)
        dataset_y = getattr(dataset, y_name)
        for ii in range(0, num_examples, batch_size):
            x = dataset_x[ii:ii+batch_size]
            y = dataset_y[ii:ii+batch_size]
            
            if which_set == "train":
                # When we use the data for training, we need to include
                # the label mask, so we can pretend we don't have access
                # to some of the labels, as an exercise of our semi-supervised
                # learning ability
                # x: [BATCH_SIZE, 32, 32, 3]
                # y: [BATCH_SIZE, 1]
                # label_mask: [BATCH_SIZE, 1] whether a 0: Label Cannot be used or 1: Label can be used 
                yield x, y, self.label_mask[ii:ii+batch_size]
            else:
                yield x, y

In [8]:

def model_inputs(real_dim, z_dim):
    # placeholder for inputing the real images from the training set to the discriminator 
    inputs_real = tf.placeholder(tf.float32, (None, *real_dim), name='input_real')
    
    # placeholder for inputing random noise data into the discriminator
    inputs_z = tf.placeholder(tf.float32, (None, z_dim), name='input_z')
    
    # placeholder for inputing the real data labels (0-9 for the SVHN dataset)
    y = tf.placeholder(tf.int32, (None), name='y')
    
    # placeholder for inputing the label masks which tell the model for which 
    # sample images should it take the labels for training
    label_mask = tf.placeholder(tf.int32, (None), name='label_mask')
    
    return inputs_real, inputs_z, y, label_mask

Generator¶

For the generator we follow a very basic implementation described in the DCGAN paper. This approach consists reshaping a random vector z to have a 4D shape and then feed it to a sequence of transpose convolutions, batch normalization and leaky RELU operations that increase the spatial dimensions of the input vector while decreases the number of channels. As a result, the Network outputs a 32x32x3 tensor shape that is squashed between values of -1 and 1 through the Hyperbolic Tangent Function.

In [9]:

def generator(z, output_dim, reuse=False, alpha=0.2, training=True, size_mult=128):
    with tf.variable_scope('generator', reuse=reuse):
        # First fully connected layer
        x1 = tf.layers.dense(z, 4 * 4 * size_mult * 4)
        # Reshape it to start the convolutional stack
        x1 = tf.reshape(x1, (-1, 4, 4, size_mult * 4))
        x1 = tf.layers.batch_normalization(x1, training=training)
        x1 = tf.maximum(alpha * x1, x1)
        
        x2 = tf.layers.conv2d_transpose(x1, size_mult * 2, 5, strides=2, padding='same')
        x2 = tf.layers.batch_normalization(x2, training=training)
        x2 = tf.maximum(alpha * x2, x2)
        
        x3 = tf.layers.conv2d_transpose(x2, size_mult, 5, strides=2, padding='same')
        x3 = tf.layers.batch_normalization(x3, training=training)
        x3 = tf.maximum(alpha * x3, x3)
        
        # Output layer
        logits = tf.layers.conv2d_transpose(x3, output_dim, 5, strides=2, padding='same')
        
        out = tf.tanh(logits)
        
        return out

Discriminator¶

Here we setup an also similar DCGAN architecture in which we use a stack of strided 2 convolutions for dimensionality reduction and batch normalization for stabilizing learning (except for the first layer of the network).

The 2D convolution window (kernel or filter) is set to have a width and height of 5 across all the convolution operations. Also, note that we have some layers with dropout. It is important to understand that our discriminator behaves (in part) like any other regular classifier and because of that; it may suffers from the same problems any classifier would if not well designed.

After a series of convolutions, batch normalization, leaky RELUs and dropout, instead of directly applying a fully connected layer on top of the convolutions, we perform a Global Average Pooling (GAP) operation.

In [10]:

def discriminator(x, reuse=False, alpha=0.2, drop_rate=0., num_classes=10, size_mult=64):
    with tf.variable_scope('discriminator', reuse=reuse):
        x = tf.layers.dropout(x, rate=drop_rate/2.5)
        
        # Input layer is ?x32x32x3
        x1 = tf.layers.conv2d(x, size_mult, 3, strides=2, padding='same')
        relu1 = tf.maximum(alpha * x1, x1)
        relu1 = tf.layers.dropout(relu1, rate=drop_rate) # [?x16x16x?]

        x2 = tf.layers.conv2d(relu1, size_mult, 3, strides=2, padding='same')
        bn2 = tf.layers.batch_normalization(x2, training=True) # [?x8x8x?]
        relu2 = tf.maximum(alpha * bn2, bn2)
        
        x3 = tf.layers.conv2d(relu2, size_mult, 3, strides=2, padding='same') # [?x4x4x?]
        bn3 = tf.layers.batch_normalization(x3, training=True)
        relu3 = tf.maximum(alpha * bn3, bn3)
        relu3 = tf.layers.dropout(relu3, rate=drop_rate)
        
        x4 = tf.layers.conv2d(relu3, 2 * size_mult, 3, strides=1, padding='same') # [?x4x4x?]
        bn4 = tf.layers.batch_normalization(x4, training=True)
        relu4 = tf.maximum(alpha * bn4, bn4)
        
        x5 = tf.layers.conv2d(relu4, 2 * size_mult, 3, strides=1, padding='same') # [?x4x4x?]
        bn5 = tf.layers.batch_normalization(x5, training=True)
        relu5 = tf.maximum(alpha * bn5, bn5)

        x6 = tf.layers.conv2d(relu5, 2 * size_mult, 3, strides=2, padding='same') # [?x2x2x?]
        bn6 = tf.layers.batch_normalization(x6, training=True)
        relu6 = tf.maximum(alpha * bn6, bn6)
        relu6 = tf.layers.dropout(relu6, rate=drop_rate)
        
        x7 = tf.layers.conv2d(relu5, filters=(2 * size_mult), kernel_size=3, strides=1, padding='valid')
        # Don't use bn on this layer, because bn would set the mean of each feature
        # to the bn mu parameter.
        # This layer is used for the feature matching loss, which only works if
        # the means can be different when the discriminator is run on the data than
        # when the discriminator is run on the generator samples.
        relu7 = tf.maximum(alpha * x7, x7)
        
        # Flatten it by global average pooling
        # In global average pooling, for every feature map we take the average over all the spatial
        # domain and return a single value
        # In: [BATCH_SIZE,HEIGHT X WIDTH X CHANNELS] --> [BATCH_SIZE, CHANNELS]
        features = tf.reduce_mean(relu7, axis=[1,2])
        
        # Set class_logits to be the inputs to a softmax distribution over the different classes
        class_logits = tf.layers.dense(features, num_classes)
        
        # This function is more numerically stable than log(sum(exp(input))). 
        # It avoids overflows caused by taking the exp of large inputs and underflows 
        # caused by taking the log of small inputs.
        gan_logits = tf.reduce_logsumexp(class_logits, 1)
        
        # Get the probability that the input is real rather than fake
        out = tf.nn.softmax(class_logits) # class probabilities for the 10 real classes plus the fake class
        
        return out, class_logits, gan_logits, features

In [11]:

def model_loss(input_real, input_z, output_dim, y, num_classes, label_mask, alpha=0.2, drop_rate=0., smooth=0.1):
    """
    Get the loss for the discriminator and generator
    :param input_real: Images from the real dataset
    :param input_z: Z input random noise vector 
    :param output_dim: The number of channels in the output image
    :param y: Integer class labels
    :param num_classes: The number of classes
    :param alpha: The slope of the left half of leaky ReLU activation
    :param drop_rate: The probability of dropping a hidden unit
    :return: A tuple of (discriminator loss, generator loss)
    """
    
    # These numbers multiply the size of each layer of the generator and the discriminator,
    # respectively. You can reduce them to run your code faster for debugging purposes.
    g_size_mult = 32
    d_size_mult = 64
    
    # Here we instatiate the generator and the discriminator networks
    g_model = generator(input_z, output_dim, alpha=alpha, size_mult=g_size_mult)
    d_on_data = discriminator(input_real, alpha=alpha, drop_rate=drop_rate, size_mult=d_size_mult)
    
    # d_model_real: probability that the input is real
    # class_logits_on_data: the unnormalized log probability values for the probability of each classe
    # gan_logits_on_data: the probability of whether or not the image is real
    # data_features: features from the last layer of the discriminator to be used in the feature matching loss
    d_model_real, class_logits_on_data, gan_logits_on_data, data_features = d_on_data
    
    d_on_samples = discriminator(g_model, reuse=True, alpha=alpha, drop_rate=drop_rate, size_mult=d_size_mult)
    d_model_fake, class_logits_on_samples, gan_logits_on_samples, sample_features = d_on_samples
    
    # Here we compute `d_loss`, the loss for the discriminator.
    # This should combine two different losses:
    # 1. The loss for the GAN problem, where we minimize the cross-entropy for the binary
    #    real-vs-fake classification problem.
    real_data_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=gan_logits_on_data, 
                                                            labels=tf.ones_like(gan_logits_on_data) * (1 - smooth)))
    
    fake_data_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=gan_logits_on_samples, 
                                                         labels=tf.zeros_like(gan_logits_on_samples)))
    
    unsupervised_loss = real_data_loss + fake_data_loss
    
    #  2. The loss for the SVHN digit classification problem, where we minimize the cross-entropy
    #     for the multi-class softmax. For this one we use the labels. Don't forget to ignore
    #     use `label_mask` to ignore the examples that we are pretending are unlabeled for the
    #     semi-supervised learning problem.
    y = tf.squeeze(y)
    suppervised_loss = tf.nn.softmax_cross_entropy_with_logits(logits=class_logits_on_data,
                                                                  labels=tf.one_hot(y, num_classes, dtype=tf.float32))
    
    label_mask = tf.squeeze(tf.to_float(label_mask))
    
    # ignore the labels that we pretend does not exist for the loss
    suppervised_loss = tf.reduce_sum(tf.multiply(suppervised_loss, label_mask))
    
    # get the mean 
    suppervised_loss = suppervised_loss / tf.maximum(1.0, tf.reduce_sum(label_mask))
    d_loss = unsupervised_loss + suppervised_loss
    
    # Here we set `g_loss` to the "feature matching" loss invented by Tim Salimans at OpenAI.
    # This loss consists of minimizing the absolute difference between the expected features
    # on the data and the expected features on the generated samples.
    # This loss works better for semi-supervised learning than the tradition GAN losses.
    
    # Make the Generator output features that are on average similar to the features 
    # that are found by applying the real data to the discriminator
    
    data_moments = tf.reduce_mean(data_features, axis=0)
    sample_moments = tf.reduce_mean(sample_features, axis=0)
    g_loss = tf.reduce_mean(tf.abs(data_moments - sample_moments))

    pred_class = tf.cast(tf.argmax(class_logits_on_data, 1), tf.int32)
    eq = tf.equal(tf.squeeze(y), pred_class)
    correct = tf.reduce_sum(tf.to_float(eq))
    masked_correct = tf.reduce_sum(label_mask * tf.to_float(eq))
    
    return d_loss, g_loss, correct, masked_correct, g_model

In [12]:

def model_opt(d_loss, g_loss, learning_rate, beta1):
    """
    Get optimization operations
    :param d_loss: Discriminator loss Tensor
    :param g_loss: Generator loss Tensor
    :param learning_rate: Learning Rate Placeholder
    :param beta1: The exponential decay rate for the 1st moment in the optimizer
    :return: A tuple of (discriminator training operation, generator training operation)
    """
    # Get weights and biases to update. Get them separately for the discriminator and the generator
    discriminator_train_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES , scope='discriminator')
    generator_train_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='generator')
    
    # Minimize both players' costs simultaneously
    #update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    #with tf.control_dependencies(update_ops):
    d_train_opt = tf.train.AdamOptimizer(learning_rate, beta1, name='d_optimizer').minimize(d_loss, var_list=discriminator_train_vars)
    g_train_opt = tf.train.AdamOptimizer(learning_rate, beta1, name='g_optimizer').minimize(g_loss, var_list=generator_train_vars)

    shrink_lr = tf.assign(learning_rate, learning_rate * 0.9)
    
    return d_train_opt, g_train_opt, shrink_lr

In [13]:

class GAN:
    """
    A GAN model.
    :param real_size: The shape of the real data.
    :param z_size: The number of entries in the z code vector.
    :param learnin_rate: The learning rate to use for Adam.
    :param num_classes: The number of classes to recognize.
    :param alpha: The slope of the left half of the leaky ReLU activation
    :param beta1: The beta1 parameter for Adam.
    """
    def __init__(self, real_size, z_size, learning_rate, num_classes=10, alpha=0.2, beta1=0.5):
        tf.reset_default_graph()
        
        self.learning_rate = tf.Variable(learning_rate, trainable=False)
        inputs = model_inputs(real_size, z_size)
        self.input_real, self.input_z, self.y, self.label_mask = inputs
        self.drop_rate = tf.placeholder_with_default(.5, (), "drop_rate")
        
        loss_results = model_loss(self.input_real, self.input_z,
                                  real_size[2], self.y, num_classes,
                                  label_mask=self.label_mask,
                                  alpha=0.2,
                                  drop_rate=self.drop_rate)
        self.d_loss, self.g_loss, self.correct, self.masked_correct, self.samples = loss_results
        
        self.d_opt, self.g_opt, self.shrink_lr = model_opt(self.d_loss, self.g_loss, self.learning_rate, beta1)

In [14]:

def view_samples(epoch, samples, nrows, ncols, figsize=(5,5)):
    fig, axes = plt.subplots(figsize=figsize, nrows=nrows, ncols=ncols, 
                             sharey=True, sharex=True)
    for ax, img in zip(axes.flatten(), samples[epoch]):
        ax.axis('off')
        img = ((img - img.min())*255 / (img.max() - img.min())).astype(np.uint8)
        ax.set_adjustable('box-forced')
        im = ax.imshow(img)
   
    plt.subplots_adjust(wspace=0, hspace=0)
    return fig, axes

In [15]:

def train(net, dataset, epochs, batch_size, figsize=(5,5)):
    
    saver = tf.train.Saver()
    sample_z = np.random.normal(0, 1, size=(50, z_size))

    samples, train_accuracies, test_accuracies = [], [], []
    steps = 0

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for e in range(epochs):
            print("Epoch",e)
            
            t1e = time.time()
            num_examples = 0
            num_correct = 0
            for x, y, label_mask in dataset.batches(batch_size):
                assert 'int' in str(y.dtype)
                steps += 1
                num_examples += label_mask.sum()

                # Sample random noise for G
                batch_z = np.random.normal(0, 1, size=(batch_size, z_size))

                # Run optimizers
                t1 = time.time()
                _, _, correct = sess.run([net.d_opt, net.g_opt, net.masked_correct],
                                         feed_dict={net.input_real: x, net.input_z: batch_z,
                                                    net.y : y, net.label_mask : label_mask})
                t2 = time.time()
                num_correct += correct

            sess.run([net.shrink_lr])
            
            
            train_accuracy = num_correct / float(num_examples)
            
            print("\t\tClassifier train accuracy: ", train_accuracy)
            
            num_examples = 0
            num_correct = 0
            for x, y in dataset.batches(batch_size, which_set="test"):
                assert 'int' in str(y.dtype)
                num_examples += x.shape[0]

                correct, = sess.run([net.correct], feed_dict={net.input_real: x,
                                                   net.y : y,
                                                   net.drop_rate: 0.})
                num_correct += correct
            
            test_accuracy = num_correct / float(num_examples)
            print("\t\tClassifier test accuracy", test_accuracy)
            print("\t\tStep time: ", t2 - t1)
            t2e = time.time()
            print("\t\tEpoch time: ", t2e - t1e)
            
            
            gen_samples = sess.run(
                                   net.samples,
                                   feed_dict={net.input_z: sample_z})
            samples.append(gen_samples)
            _ = view_samples(-1, samples, 5, 10, figsize=figsize)
            plt.show()
            
            
            # Save history of accuracies to view after training
            train_accuracies.append(train_accuracy)
            test_accuracies.append(test_accuracy)
            

        saver.save(sess, './checkpoints/generator.ckpt')

    with open('samples.pkl', 'wb') as f:
        pkl.dump(samples, f)
    
    return train_accuracies, test_accuracies, samples

In [16]:

!mkdir checkpoints

mkdir: cannot create directory ‘checkpoints’: File exists

In [17]:

real_size = (32,32,3)
z_size = 100
learning_rate = 0.0003

net = GAN(real_size, z_size, learning_rate)

In [18]:

dataset = Dataset(trainset, testset)

batch_size = 128
epochs = 25
train_accuracies, test_accuracies, samples = train(net,
                                                   dataset,
                                                   epochs,
                                                   batch_size,
                                                   figsize=(10,5))

Epoch 0
		Classifier train accuracy:  0.19
		Classifier test accuracy 0.247771972956
		Step time:  0.06286406517028809
		Epoch time:  28.6306734085083

Epoch 1
		Classifier train accuracy:  0.309
		Classifier test accuracy 0.368930547019
		Step time:  0.039215087890625
		Epoch time:  28.045222759246826

Epoch 2
		Classifier train accuracy:  0.509
		Classifier test accuracy 0.505377996312
		Step time:  0.04323291778564453
		Epoch time:  28.572621822357178

Epoch 3
		Classifier train accuracy:  0.641
		Classifier test accuracy 0.562692071297
		Step time:  0.03950762748718262
		Epoch time:  28.342881679534912

Epoch 4
		Classifier train accuracy:  0.738
		Classifier test accuracy 0.547403196066
		Step time:  0.03790545463562012
		Epoch time:  26.752944469451904

Epoch 5
		Classifier train accuracy:  0.801
		Classifier test accuracy 0.613629379226
		Step time:  0.04494428634643555
		Epoch time:  27.91737198829651

Epoch 6
		Classifier train accuracy:  0.853
		Classifier test accuracy 0.656192378611
		Step time:  0.04487466812133789
		Epoch time:  29.24485492706299

Epoch 7
		Classifier train accuracy:  0.903
		Classifier test accuracy 0.656653349723
		Step time:  0.04403948783874512
		Epoch time:  29.1003315448761

Epoch 8
		Classifier train accuracy:  0.9
		Classifier test accuracy 0.65503995083
		Step time:  0.03714489936828613
		Epoch time:  28.673871517181396

Epoch 9
		Classifier train accuracy:  0.919
		Classifier test accuracy 0.688537185003
		Step time:  0.047916412353515625
		Epoch time:  29.255188941955566

Epoch 10
		Classifier train accuracy:  0.919
		Classifier test accuracy 0.688767670559
		Step time:  0.04770994186401367
		Epoch time:  29.302653551101685

Epoch 11
		Classifier train accuracy:  0.926
		Classifier test accuracy 0.696911493546
		Step time:  0.04100465774536133
		Epoch time:  28.871726512908936

Epoch 12
		Classifier train accuracy:  0.933
		Classifier test accuracy 0.691226183159
		Step time:  0.04149150848388672
		Epoch time:  29.167212963104248

Epoch 13
		Classifier train accuracy:  0.933
		Classifier test accuracy 0.696220036878
		Step time:  0.0490267276763916
		Epoch time:  29.121546745300293

Epoch 14
		Classifier train accuracy:  0.93
		Classifier test accuracy 0.696988322065
		Step time:  0.04289436340332031
		Epoch time:  28.889161825180054

Epoch 15
		Classifier train accuracy:  0.931
		Classifier test accuracy 0.690918869084
		Step time:  0.03994131088256836
		Epoch time:  27.28535747528076

Epoch 16
		Classifier train accuracy:  0.933
		Classifier test accuracy 0.681929932391
		Step time:  0.03889656066894531
		Epoch time:  26.704275846481323

Epoch 17
		Classifier train accuracy:  0.938
		Classifier test accuracy 0.683466502766
		Step time:  0.03771519660949707
		Epoch time:  26.865015745162964

Epoch 18
		Classifier train accuracy:  0.939
		Classifier test accuracy 0.68077750461
		Step time:  0.04466581344604492
		Epoch time:  26.684985399246216

Epoch 19
		Classifier train accuracy:  0.935
		Classifier test accuracy 0.685310387216
		Step time:  0.03772783279418945
		Epoch time:  26.833319425582886

Epoch 20
		Classifier train accuracy:  0.934
		Classifier test accuracy 0.685003073141
		Step time:  0.03722500801086426
		Epoch time:  26.43741750717163

Epoch 21
		Classifier train accuracy:  0.936
		Classifier test accuracy 0.67647510756
		Step time:  0.04166460037231445
		Epoch time:  26.90742778778076

Epoch 22
		Classifier train accuracy:  0.939
		Classifier test accuracy 0.678242163491
		Step time:  0.04402613639831543
		Epoch time:  26.405832052230835

Epoch 23
		Classifier train accuracy:  0.938
		Classifier test accuracy 0.672940995698
		Step time:  0.03714895248413086
		Epoch time:  26.47376036643982

Epoch 24
		Classifier train accuracy:  0.937
		Classifier test accuracy 0.67401659496
		Step time:  0.03694915771484375
		Epoch time:  26.15842580795288

In [19]:

fig, ax = plt.subplots()
plt.plot(train_accuracies, label='Train', alpha=0.5)
plt.plot(test_accuracies, label='Test', alpha=0.5)
plt.title("Accuracy")
plt.legend()

Out[19]:

<matplotlib.legend.Legend at 0x7f719f0f5320>

In [20]:

_ = view_samples(-1, samples, 5, 10, figsize=(10,5))

In [ ]: