In the previous blog post, we talked about the intuition behind GANs (Generative Adversarial Networks), how they work, and how to create a simple GAN model capable of learning how to generate images that look a lot like images from the MNIST and SVHNs datasets. Now, let us turn the wheels a little and talk about one of the most prominent applications of GANs, semi-supervised learning.
# Lets start by loading the necessary libraries
%matplotlib inline
import pickle as pkl
import time
import matplotlib.pyplot as plt
import numpy as np
from scipy.io import loadmat
import tensorflow as tf
import os
# Use second GPU -- change if you want to use a first one
os.environ["CUDA_VISIBLE_DEVICES"] = '0'
!mkdir data
mkdir: cannot create directory ‘data’: File exists
For these first cells, we are going to download the SVHN dataset files and preprocess them.
from urllib.request import urlretrieve
from os.path import isfile, isdir
from tqdm import tqdm
data_dir = 'data/'
if not isdir(data_dir):
raise Exception("Data directory doesn't exist!")
class DLProgress(tqdm):
last_block = 0
def hook(self, block_num=1, block_size=1, total_size=None):
self.total = total_size
self.update((block_num - self.last_block) * block_size)
self.last_block = block_num
if not isfile(data_dir + "train_32x32.mat"):
with DLProgress(unit='B', unit_scale=True, miniters=1, desc='SVHN Training Set') as pbar:
urlretrieve(
'http://ufldl.stanford.edu/housenumbers/train_32x32.mat',
data_dir + 'train_32x32.mat',
pbar.hook)
if not isfile(data_dir + "test_32x32.mat"):
with DLProgress(unit='B', unit_scale=True, miniters=1, desc='SVHN Training Set') as pbar:
urlretrieve(
'http://ufldl.stanford.edu/housenumbers/test_32x32.mat',
data_dir + 'test_32x32.mat',
pbar.hook)
# Load the training and testing datasets
trainset = loadmat(data_dir + 'train_32x32.mat')
testset = loadmat(data_dir + 'test_32x32.mat')
print("trainset shape:", trainset['X'].shape)
print("testset shape:", testset['X'].shape)
trainset shape: (32, 32, 3, 73257) testset shape: (32, 32, 3, 26032)
# Let's just take a pick in some of the SVHN samples the network will process
idx = np.random.randint(0, trainset['X'].shape[3], size=36)
fig, axes = plt.subplots(6, 6, sharex=True, sharey=True, figsize=(5,5),)
for ii, ax in zip(idx, axes.flatten()):
ax.imshow(trainset['X'][:,:,:,ii], aspect='equal')
ax.xaxis.set_visible(False)
ax.yaxis.set_visible(False)
plt.subplots_adjust(wspace=0, hspace=0)
Scales the image pixel values to the range of -1 to 1. That is necessary because the Generator net outputs values squashed between this range through the Tanh() function.
def scale(x, feature_range=(-1, 1)):
# scale to (0, 1)
x = ((x - x.min())/(255 - x.min()))
# scale to feature_range
min, max = feature_range
x = x * (max - min) + min
return x
class Dataset:
def __init__(self, train, test, val_frac=0.5, shuffle=True, scale_func=None):
split_idx = int(len(test['y'])*(1 - val_frac))
self.test_x, self.valid_x = test['X'][:,:,:,:split_idx], test['X'][:,:,:,split_idx:]
self.test_y, self.valid_y = test['y'][:split_idx], test['y'][split_idx:]
self.train_x, self.train_y = train['X'], train['y']
# The SVHN dataset comes with lots of labels, but for the purpose of this exercise,
# we will pretend that there are only 1000.
# We use this mask to say which labels we will allow ourselves to use.
self.label_mask = np.zeros_like(self.train_y)
self.label_mask[0:1000] = 1
# Roll the specified axis backwards, until it lies in a given position.
# From (32, 32, 3, 73257) to (73257, 32, 32, 3)
self.train_x = np.rollaxis(self.train_x, axis=3)
self.valid_x = np.rollaxis(self.valid_x, axis=3)
self.test_x = np.rollaxis(self.test_x, axis=3)
if scale_func is None:
self.scaler = scale
else:
self.scaler = scale_func
self.train_x = self.scaler(self.train_x)
self.valid_x = self.scaler(self.valid_x)
self.test_x = self.scaler(self.test_x)
self.shuffle = shuffle
def batches(self, batch_size, which_set="train"):
x_name = which_set + "_x"
y_name = which_set + "_y"
# Return the value of the named attribute of object
num_examples = len(getattr(dataset, y_name))
if self.shuffle:
idx = np.arange(num_examples)
np.random.shuffle(idx)
setattr(dataset, x_name, getattr(dataset, x_name)[idx])
setattr(dataset, y_name, getattr(dataset, y_name)[idx])
if which_set == "train":
dataset.label_mask = dataset.label_mask[idx]
dataset_x = getattr(dataset, x_name)
dataset_y = getattr(dataset, y_name)
for ii in range(0, num_examples, batch_size):
x = dataset_x[ii:ii+batch_size]
y = dataset_y[ii:ii+batch_size]
if which_set == "train":
# When we use the data for training, we need to include
# the label mask, so we can pretend we don't have access
# to some of the labels, as an exercise of our semi-supervised
# learning ability
# x: [BATCH_SIZE, 32, 32, 3]
# y: [BATCH_SIZE, 1]
# label_mask: [BATCH_SIZE, 1] whether a 0: Label Cannot be used or 1: Label can be used
yield x, y, self.label_mask[ii:ii+batch_size]
else:
yield x, y
def model_inputs(real_dim, z_dim):
# placeholder for inputing the real images from the training set to the discriminator
inputs_real = tf.placeholder(tf.float32, (None, *real_dim), name='input_real')
# placeholder for inputing random noise data into the discriminator
inputs_z = tf.placeholder(tf.float32, (None, z_dim), name='input_z')
# placeholder for inputing the real data labels (0-9 for the SVHN dataset)
y = tf.placeholder(tf.int32, (None), name='y')
# placeholder for inputing the label masks which tell the model for which
# sample images should it take the labels for training
label_mask = tf.placeholder(tf.int32, (None), name='label_mask')
return inputs_real, inputs_z, y, label_mask
For the generator we follow a very basic implementation described in the DCGAN paper. This approach consists reshaping a random vector z to have a 4D shape and then feed it to a sequence of transpose convolutions, batch normalization and leaky RELU operations that increase the spatial dimensions of the input vector while decreases the number of channels. As a result, the Network outputs a 32x32x3 tensor shape that is squashed between values of -1 and 1 through the Hyperbolic Tangent Function.
def generator(z, output_dim, reuse=False, alpha=0.2, training=True, size_mult=128):
with tf.variable_scope('generator', reuse=reuse):
# First fully connected layer
x1 = tf.layers.dense(z, 4 * 4 * size_mult * 4)
# Reshape it to start the convolutional stack
x1 = tf.reshape(x1, (-1, 4, 4, size_mult * 4))
x1 = tf.layers.batch_normalization(x1, training=training)
x1 = tf.maximum(alpha * x1, x1)
x2 = tf.layers.conv2d_transpose(x1, size_mult * 2, 5, strides=2, padding='same')
x2 = tf.layers.batch_normalization(x2, training=training)
x2 = tf.maximum(alpha * x2, x2)
x3 = tf.layers.conv2d_transpose(x2, size_mult, 5, strides=2, padding='same')
x3 = tf.layers.batch_normalization(x3, training=training)
x3 = tf.maximum(alpha * x3, x3)
# Output layer
logits = tf.layers.conv2d_transpose(x3, output_dim, 5, strides=2, padding='same')
out = tf.tanh(logits)
return out
Here we setup an also similar DCGAN architecture in which we use a stack of strided 2 convolutions for dimensionality reduction and batch normalization for stabilizing learning (except for the first layer of the network).
The 2D convolution window (kernel or filter) is set to have a width and height of 5 across all the convolution operations. Also, note that we have some layers with dropout. It is important to understand that our discriminator behaves (in part) like any other regular classifier and because of that; it may suffers from the same problems any classifier would if not well designed.
After a series of convolutions, batch normalization, leaky RELUs and dropout, instead of directly applying a fully connected layer on top of the convolutions, we perform a Global Average Pooling (GAP) operation.
def discriminator(x, reuse=False, alpha=0.2, drop_rate=0., num_classes=10, size_mult=64):
with tf.variable_scope('discriminator', reuse=reuse):
x = tf.layers.dropout(x, rate=drop_rate/2.5)
# Input layer is ?x32x32x3
x1 = tf.layers.conv2d(x, size_mult, 3, strides=2, padding='same')
relu1 = tf.maximum(alpha * x1, x1)
relu1 = tf.layers.dropout(relu1, rate=drop_rate) # [?x16x16x?]
x2 = tf.layers.conv2d(relu1, size_mult, 3, strides=2, padding='same')
bn2 = tf.layers.batch_normalization(x2, training=True) # [?x8x8x?]
relu2 = tf.maximum(alpha * bn2, bn2)
x3 = tf.layers.conv2d(relu2, size_mult, 3, strides=2, padding='same') # [?x4x4x?]
bn3 = tf.layers.batch_normalization(x3, training=True)
relu3 = tf.maximum(alpha * bn3, bn3)
relu3 = tf.layers.dropout(relu3, rate=drop_rate)
x4 = tf.layers.conv2d(relu3, 2 * size_mult, 3, strides=1, padding='same') # [?x4x4x?]
bn4 = tf.layers.batch_normalization(x4, training=True)
relu4 = tf.maximum(alpha * bn4, bn4)
x5 = tf.layers.conv2d(relu4, 2 * size_mult, 3, strides=1, padding='same') # [?x4x4x?]
bn5 = tf.layers.batch_normalization(x5, training=True)
relu5 = tf.maximum(alpha * bn5, bn5)
x6 = tf.layers.conv2d(relu5, 2 * size_mult, 3, strides=2, padding='same') # [?x2x2x?]
bn6 = tf.layers.batch_normalization(x6, training=True)
relu6 = tf.maximum(alpha * bn6, bn6)
relu6 = tf.layers.dropout(relu6, rate=drop_rate)
x7 = tf.layers.conv2d(relu5, filters=(2 * size_mult), kernel_size=3, strides=1, padding='valid')
# Don't use bn on this layer, because bn would set the mean of each feature
# to the bn mu parameter.
# This layer is used for the feature matching loss, which only works if
# the means can be different when the discriminator is run on the data than
# when the discriminator is run on the generator samples.
relu7 = tf.maximum(alpha * x7, x7)
# Flatten it by global average pooling
# In global average pooling, for every feature map we take the average over all the spatial
# domain and return a single value
# In: [BATCH_SIZE,HEIGHT X WIDTH X CHANNELS] --> [BATCH_SIZE, CHANNELS]
features = tf.reduce_mean(relu7, axis=[1,2])
# Set class_logits to be the inputs to a softmax distribution over the different classes
class_logits = tf.layers.dense(features, num_classes)
# This function is more numerically stable than log(sum(exp(input))).
# It avoids overflows caused by taking the exp of large inputs and underflows
# caused by taking the log of small inputs.
gan_logits = tf.reduce_logsumexp(class_logits, 1)
# Get the probability that the input is real rather than fake
out = tf.nn.softmax(class_logits) # class probabilities for the 10 real classes plus the fake class
return out, class_logits, gan_logits, features
def model_loss(input_real, input_z, output_dim, y, num_classes, label_mask, alpha=0.2, drop_rate=0., smooth=0.1):
"""
Get the loss for the discriminator and generator
:param input_real: Images from the real dataset
:param input_z: Z input random noise vector
:param output_dim: The number of channels in the output image
:param y: Integer class labels
:param num_classes: The number of classes
:param alpha: The slope of the left half of leaky ReLU activation
:param drop_rate: The probability of dropping a hidden unit
:return: A tuple of (discriminator loss, generator loss)
"""
# These numbers multiply the size of each layer of the generator and the discriminator,
# respectively. You can reduce them to run your code faster for debugging purposes.
g_size_mult = 32
d_size_mult = 64
# Here we instatiate the generator and the discriminator networks
g_model = generator(input_z, output_dim, alpha=alpha, size_mult=g_size_mult)
d_on_data = discriminator(input_real, alpha=alpha, drop_rate=drop_rate, size_mult=d_size_mult)
# d_model_real: probability that the input is real
# class_logits_on_data: the unnormalized log probability values for the probability of each classe
# gan_logits_on_data: the probability of whether or not the image is real
# data_features: features from the last layer of the discriminator to be used in the feature matching loss
d_model_real, class_logits_on_data, gan_logits_on_data, data_features = d_on_data
d_on_samples = discriminator(g_model, reuse=True, alpha=alpha, drop_rate=drop_rate, size_mult=d_size_mult)
d_model_fake, class_logits_on_samples, gan_logits_on_samples, sample_features = d_on_samples
# Here we compute `d_loss`, the loss for the discriminator.
# This should combine two different losses:
# 1. The loss for the GAN problem, where we minimize the cross-entropy for the binary
# real-vs-fake classification problem.
real_data_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=gan_logits_on_data,
labels=tf.ones_like(gan_logits_on_data) * (1 - smooth)))
fake_data_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=gan_logits_on_samples,
labels=tf.zeros_like(gan_logits_on_samples)))
unsupervised_loss = real_data_loss + fake_data_loss
# 2. The loss for the SVHN digit classification problem, where we minimize the cross-entropy
# for the multi-class softmax. For this one we use the labels. Don't forget to ignore
# use `label_mask` to ignore the examples that we are pretending are unlabeled for the
# semi-supervised learning problem.
y = tf.squeeze(y)
suppervised_loss = tf.nn.softmax_cross_entropy_with_logits(logits=class_logits_on_data,
labels=tf.one_hot(y, num_classes, dtype=tf.float32))
label_mask = tf.squeeze(tf.to_float(label_mask))
# ignore the labels that we pretend does not exist for the loss
suppervised_loss = tf.reduce_sum(tf.multiply(suppervised_loss, label_mask))
# get the mean
suppervised_loss = suppervised_loss / tf.maximum(1.0, tf.reduce_sum(label_mask))
d_loss = unsupervised_loss + suppervised_loss
# Here we set `g_loss` to the "feature matching" loss invented by Tim Salimans at OpenAI.
# This loss consists of minimizing the absolute difference between the expected features
# on the data and the expected features on the generated samples.
# This loss works better for semi-supervised learning than the tradition GAN losses.
# Make the Generator output features that are on average similar to the features
# that are found by applying the real data to the discriminator
data_moments = tf.reduce_mean(data_features, axis=0)
sample_moments = tf.reduce_mean(sample_features, axis=0)
g_loss = tf.reduce_mean(tf.abs(data_moments - sample_moments))
pred_class = tf.cast(tf.argmax(class_logits_on_data, 1), tf.int32)
eq = tf.equal(tf.squeeze(y), pred_class)
correct = tf.reduce_sum(tf.to_float(eq))
masked_correct = tf.reduce_sum(label_mask * tf.to_float(eq))
return d_loss, g_loss, correct, masked_correct, g_model
def model_opt(d_loss, g_loss, learning_rate, beta1):
"""
Get optimization operations
:param d_loss: Discriminator loss Tensor
:param g_loss: Generator loss Tensor
:param learning_rate: Learning Rate Placeholder
:param beta1: The exponential decay rate for the 1st moment in the optimizer
:return: A tuple of (discriminator training operation, generator training operation)
"""
# Get weights and biases to update. Get them separately for the discriminator and the generator
discriminator_train_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES , scope='discriminator')
generator_train_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='generator')
# Minimize both players' costs simultaneously
#update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
#with tf.control_dependencies(update_ops):
d_train_opt = tf.train.AdamOptimizer(learning_rate, beta1, name='d_optimizer').minimize(d_loss, var_list=discriminator_train_vars)
g_train_opt = tf.train.AdamOptimizer(learning_rate, beta1, name='g_optimizer').minimize(g_loss, var_list=generator_train_vars)
shrink_lr = tf.assign(learning_rate, learning_rate * 0.9)
return d_train_opt, g_train_opt, shrink_lr
class GAN:
"""
A GAN model.
:param real_size: The shape of the real data.
:param z_size: The number of entries in the z code vector.
:param learnin_rate: The learning rate to use for Adam.
:param num_classes: The number of classes to recognize.
:param alpha: The slope of the left half of the leaky ReLU activation
:param beta1: The beta1 parameter for Adam.
"""
def __init__(self, real_size, z_size, learning_rate, num_classes=10, alpha=0.2, beta1=0.5):
tf.reset_default_graph()
self.learning_rate = tf.Variable(learning_rate, trainable=False)
inputs = model_inputs(real_size, z_size)
self.input_real, self.input_z, self.y, self.label_mask = inputs
self.drop_rate = tf.placeholder_with_default(.5, (), "drop_rate")
loss_results = model_loss(self.input_real, self.input_z,
real_size[2], self.y, num_classes,
label_mask=self.label_mask,
alpha=0.2,
drop_rate=self.drop_rate)
self.d_loss, self.g_loss, self.correct, self.masked_correct, self.samples = loss_results
self.d_opt, self.g_opt, self.shrink_lr = model_opt(self.d_loss, self.g_loss, self.learning_rate, beta1)
def view_samples(epoch, samples, nrows, ncols, figsize=(5,5)):
fig, axes = plt.subplots(figsize=figsize, nrows=nrows, ncols=ncols,
sharey=True, sharex=True)
for ax, img in zip(axes.flatten(), samples[epoch]):
ax.axis('off')
img = ((img - img.min())*255 / (img.max() - img.min())).astype(np.uint8)
ax.set_adjustable('box-forced')
im = ax.imshow(img)
plt.subplots_adjust(wspace=0, hspace=0)
return fig, axes
def train(net, dataset, epochs, batch_size, figsize=(5,5)):
saver = tf.train.Saver()
sample_z = np.random.normal(0, 1, size=(50, z_size))
samples, train_accuracies, test_accuracies = [], [], []
steps = 0
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for e in range(epochs):
print("Epoch",e)
t1e = time.time()
num_examples = 0
num_correct = 0
for x, y, label_mask in dataset.batches(batch_size):
assert 'int' in str(y.dtype)
steps += 1
num_examples += label_mask.sum()
# Sample random noise for G
batch_z = np.random.normal(0, 1, size=(batch_size, z_size))
# Run optimizers
t1 = time.time()
_, _, correct = sess.run([net.d_opt, net.g_opt, net.masked_correct],
feed_dict={net.input_real: x, net.input_z: batch_z,
net.y : y, net.label_mask : label_mask})
t2 = time.time()
num_correct += correct
sess.run([net.shrink_lr])
train_accuracy = num_correct / float(num_examples)
print("\t\tClassifier train accuracy: ", train_accuracy)
num_examples = 0
num_correct = 0
for x, y in dataset.batches(batch_size, which_set="test"):
assert 'int' in str(y.dtype)
num_examples += x.shape[0]
correct, = sess.run([net.correct], feed_dict={net.input_real: x,
net.y : y,
net.drop_rate: 0.})
num_correct += correct
test_accuracy = num_correct / float(num_examples)
print("\t\tClassifier test accuracy", test_accuracy)
print("\t\tStep time: ", t2 - t1)
t2e = time.time()
print("\t\tEpoch time: ", t2e - t1e)
gen_samples = sess.run(
net.samples,
feed_dict={net.input_z: sample_z})
samples.append(gen_samples)
_ = view_samples(-1, samples, 5, 10, figsize=figsize)
plt.show()
# Save history of accuracies to view after training
train_accuracies.append(train_accuracy)
test_accuracies.append(test_accuracy)
saver.save(sess, './checkpoints/generator.ckpt')
with open('samples.pkl', 'wb') as f:
pkl.dump(samples, f)
return train_accuracies, test_accuracies, samples
!mkdir checkpoints
mkdir: cannot create directory ‘checkpoints’: File exists
real_size = (32,32,3)
z_size = 100
learning_rate = 0.0003
net = GAN(real_size, z_size, learning_rate)
dataset = Dataset(trainset, testset)
batch_size = 128
epochs = 25
train_accuracies, test_accuracies, samples = train(net,
dataset,
epochs,
batch_size,
figsize=(10,5))
Epoch 0 Classifier train accuracy: 0.19 Classifier test accuracy 0.247771972956 Step time: 0.06286406517028809 Epoch time: 28.6306734085083
Epoch 1 Classifier train accuracy: 0.309 Classifier test accuracy 0.368930547019 Step time: 0.039215087890625 Epoch time: 28.045222759246826
Epoch 2 Classifier train accuracy: 0.509 Classifier test accuracy 0.505377996312 Step time: 0.04323291778564453 Epoch time: 28.572621822357178
Epoch 3 Classifier train accuracy: 0.641 Classifier test accuracy 0.562692071297 Step time: 0.03950762748718262 Epoch time: 28.342881679534912
Epoch 4 Classifier train accuracy: 0.738 Classifier test accuracy 0.547403196066 Step time: 0.03790545463562012 Epoch time: 26.752944469451904
Epoch 5 Classifier train accuracy: 0.801 Classifier test accuracy 0.613629379226 Step time: 0.04494428634643555 Epoch time: 27.91737198829651
Epoch 6 Classifier train accuracy: 0.853 Classifier test accuracy 0.656192378611 Step time: 0.04487466812133789 Epoch time: 29.24485492706299
Epoch 7 Classifier train accuracy: 0.903 Classifier test accuracy 0.656653349723 Step time: 0.04403948783874512 Epoch time: 29.1003315448761
Epoch 8 Classifier train accuracy: 0.9 Classifier test accuracy 0.65503995083 Step time: 0.03714489936828613 Epoch time: 28.673871517181396
Epoch 9 Classifier train accuracy: 0.919 Classifier test accuracy 0.688537185003 Step time: 0.047916412353515625 Epoch time: 29.255188941955566
Epoch 10 Classifier train accuracy: 0.919 Classifier test accuracy 0.688767670559 Step time: 0.04770994186401367 Epoch time: 29.302653551101685
Epoch 11 Classifier train accuracy: 0.926 Classifier test accuracy 0.696911493546 Step time: 0.04100465774536133 Epoch time: 28.871726512908936
Epoch 12 Classifier train accuracy: 0.933 Classifier test accuracy 0.691226183159 Step time: 0.04149150848388672 Epoch time: 29.167212963104248
Epoch 13 Classifier train accuracy: 0.933 Classifier test accuracy 0.696220036878 Step time: 0.0490267276763916 Epoch time: 29.121546745300293
Epoch 14 Classifier train accuracy: 0.93 Classifier test accuracy 0.696988322065 Step time: 0.04289436340332031 Epoch time: 28.889161825180054
Epoch 15 Classifier train accuracy: 0.931 Classifier test accuracy 0.690918869084 Step time: 0.03994131088256836 Epoch time: 27.28535747528076
Epoch 16 Classifier train accuracy: 0.933 Classifier test accuracy 0.681929932391 Step time: 0.03889656066894531 Epoch time: 26.704275846481323
Epoch 17 Classifier train accuracy: 0.938 Classifier test accuracy 0.683466502766 Step time: 0.03771519660949707 Epoch time: 26.865015745162964
Epoch 18 Classifier train accuracy: 0.939 Classifier test accuracy 0.68077750461 Step time: 0.04466581344604492 Epoch time: 26.684985399246216
Epoch 19 Classifier train accuracy: 0.935 Classifier test accuracy 0.685310387216 Step time: 0.03772783279418945 Epoch time: 26.833319425582886
Epoch 20 Classifier train accuracy: 0.934 Classifier test accuracy 0.685003073141 Step time: 0.03722500801086426 Epoch time: 26.43741750717163
Epoch 21 Classifier train accuracy: 0.936 Classifier test accuracy 0.67647510756 Step time: 0.04166460037231445 Epoch time: 26.90742778778076
Epoch 22 Classifier train accuracy: 0.939 Classifier test accuracy 0.678242163491 Step time: 0.04402613639831543 Epoch time: 26.405832052230835
Epoch 23 Classifier train accuracy: 0.938 Classifier test accuracy 0.672940995698 Step time: 0.03714895248413086 Epoch time: 26.47376036643982
Epoch 24 Classifier train accuracy: 0.937 Classifier test accuracy 0.67401659496 Step time: 0.03694915771484375 Epoch time: 26.15842580795288
fig, ax = plt.subplots()
plt.plot(train_accuracies, label='Train', alpha=0.5)
plt.plot(test_accuracies, label='Test', alpha=0.5)
plt.title("Accuracy")
plt.legend()
<matplotlib.legend.Legend at 0x7f719f0f5320>
_ = view_samples(-1, samples, 5, 10, figsize=(10,5))