Original code from https://github.com/eriklindernoren/Keras-GAN/blob/master/dcgan/dcgan.py under the following license:
MIT License
Copyright (c) 2017 Erik Linder-Norén
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
The author's model was based on the paper here: https://arxiv.org/pdf/1511.06434.pdf
# upgrade tensorflow to tensorflow 2
%tensorflow_version 2.x
# display matplotlib plots
%matplotlib inline
TensorFlow 2.x selected.
from tensorflow.keras.datasets import mnist
from tensorflow.keras.layers import Input, Dense, Reshape, Flatten, Dropout
from tensorflow.keras.layers import BatchNormalization, Activation, ZeroPadding2D
from tensorflow.keras.layers import UpSampling2D, Conv2D, LeakyReLU
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.optimizers import Adam
from tensorflow import test
from tensorflow import device
import matplotlib.pyplot as plt
from IPython.display import Image
import sys
import numpy as np
# Global Constants
images_dir = "dcgan_images"
img_rows = 28
img_cols = 28
channels = 1
noise_len = 100
def build_discriminator():
'''
Put together a CNN that will return a single confidence output.
returns: the model object
'''
img_shape = (img_rows, img_cols, channels)
model = Sequential()
model.add(Conv2D(32, kernel_size=3, strides=2, input_shape=img_shape, padding="same"))
model.add(LeakyReLU(alpha=0.2))
model.add(Dropout(0.25))
model.add(Conv2D(64, kernel_size=3, strides=2, padding="same"))
model.add(ZeroPadding2D(padding=((0,1),(0,1))))
model.add(BatchNormalization(momentum=0.8))
model.add(LeakyReLU(alpha=0.2))
model.add(Dropout(0.25))
model.add(Conv2D(128, kernel_size=3, strides=2, padding="same"))
model.add(BatchNormalization(momentum=0.8))
model.add(LeakyReLU(alpha=0.2))
model.add(Dropout(0.25))
model.add(Conv2D(256, kernel_size=3, strides=1, padding="same"))
model.add(BatchNormalization(momentum=0.8))
model.add(LeakyReLU(alpha=0.2))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))
return model
def build_generator():
'''
Put together a model that takes in one-dimensional noise and outputs two-dimensional
data representing a black and white image, with -1 for black and 1 for white.
returns: the model object
'''
noise_shape = (noise_len,)
model = Sequential()
model.add(Dense(128 * 7 * 7, activation="relu", input_shape=noise_shape))
model.add(Reshape((7, 7, 128)))
model.add(UpSampling2D(interpolation="bilinear"))
model.add(Conv2D(128, kernel_size=3, padding="same"))
model.add(BatchNormalization(momentum=0.8))
model.add(LeakyReLU(alpha=0.2))
model.add(UpSampling2D(interpolation="bilinear"))
model.add(Conv2D(64, kernel_size=3, padding="same"))
model.add(BatchNormalization(momentum=0.8))
model.add(LeakyReLU(alpha=0.2))
model.add(Conv2D(1, kernel_size=3, padding="same"))
model.add(Activation("tanh"))
return model
def build_combined():
'''
Puts together a model that combines the discriminator and generator models.
returns: the generator, discriminator, and combined model objects
'''
optimizer = Adam(0.0002, 0.5)
# Build and compile the discriminator
discriminator = build_discriminator()
discriminator.compile(loss='binary_crossentropy',
optimizer=optimizer,
metrics=['accuracy'])
# Build and compile the generator
generator = build_generator()
# The generator takes noise as input and generates images
noise = Input(shape=(noise_len,))
img = generator(noise)
# For the combined model we will only train the generator
discriminator.trainable = False
# The discriminator takes generated images as input and determines validity
valid = discriminator(img)
# The combined model (stacked generator and discriminator) takes
# noise as input => generates images => determines validity
combined = Model(inputs=noise, outputs=valid)
combined.compile(loss='binary_crossentropy', optimizer=optimizer)
return generator, discriminator, combined
def save_imgs(generator, iteration):
'''
Has the generator create images and saves the images in a single file that includes
the number of iterations in the filename.
inputs:
generator: the generator model object returned by build_combined
iteration: the iteration number (but can be anything that can be represented as a string)
returns: None
'''
r, c = 5, 5
noise = np.random.normal(0, 1, (r * c, noise_len))
gen_imgs = generator.predict(noise)
fig, axs = plt.subplots(r, c)
cnt = 0
for i in range(r):
for j in range(c):
axs[i,j].imshow(gen_imgs[cnt, :,:,0], cmap='gray', vmin=-1, vmax=1)
axs[i,j].axis('off')
cnt += 1
fig.savefig(os.path.join(images_dir, 'mnist_{}.png'.format(iteration)))
plt.close()
def train(generator, discriminator, combined, iterations, batch_size=128, save_interval=50):
'''
Trains all model objects
generator: the generator model object returned by build_combined
discriminator: the discriminator model object returned by build_combined
combined: the combined model object returned by build_combined
iterations: integer, the number of iterations to train for
batch_size: integer, the number of training samples to use at a time
save_interval: integer, will generate and save images when the current iteration_num % save_interval is 0
returns: None
'''
# Load the dataset
(X_train, _), (_, _) = mnist.load_data()
# Rescale -1 to 1
X_train = (X_train.astype(np.float32) - 127.5) / 127.5
X_train = np.expand_dims(X_train, axis=3)
half_batch = int(batch_size / 2)
for iteration in range(iterations):
# ---------------------
# Train Discriminator
# ---------------------
# Select a random half batch of images
idx = np.random.randint(0, X_train.shape[0], half_batch)
imgs = X_train[idx]
# Sample noise and generate a half batch of new images
noise = np.random.normal(0, 1, (half_batch, noise_len))
gen_imgs = generator.predict(noise)
# Train the discriminator (real images classified as ones and generated images as zeros)
real = 1
fake = 0
# Use noisy labels: about 10% of the time, swap the labels
# (see https://github.com/soumith/ganhacks)
if np.random.randint(0, 10) < 1:
real = 0
fake = 1
d_loss_real = discriminator.train_on_batch(imgs, np.zeros((half_batch, 1)) + real)
d_loss_fake = discriminator.train_on_batch(gen_imgs, np.zeros((half_batch, 1)) + fake)
# ---------------------
# Train Generator
# ---------------------
noise = np.random.normal(0, 1, (batch_size, noise_len))
# Train the generator (wants discriminator to mistake images as real)
g_loss = combined.train_on_batch(noise, np.ones((batch_size, 1)))
# If at save interval => save generated image samples and plot progress
if iteration % save_interval == 0:
# Plot the progress
d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
print ("{} [D loss: {}, acc.: {:.2%}] [G loss: {}]".format(iteration, d_loss[0], d_loss[1], g_loss))
save_imgs(generator, iteration)
def show_new_image(generator):
'''
Generates and displays a new image
inputs: generator object model returned from build_combined
returns: generated image
'''
noise = np.random.normal(0, 1, (1, noise_len))
gen_img = generator.predict(noise)[0][:,:,0]
return plt.imshow(gen_img, cmap='gray', vmin=-1, vmax=1)
# set up directories to hold the images that are saved during training checkpoints.
import os
if (not os.path.isdir(images_dir)):
os.mkdir(images_dir)
generator, discriminator, combined = build_combined()
We can take a look at what each of the models look like.
generator.summary()
Model: "sequential_1" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= dense_1 (Dense) (None, 6272) 633472 _________________________________________________________________ reshape (Reshape) (None, 7, 7, 128) 0 _________________________________________________________________ up_sampling2d (UpSampling2D) (None, 14, 14, 128) 0 _________________________________________________________________ conv2d_4 (Conv2D) (None, 14, 14, 128) 147584 _________________________________________________________________ batch_normalization_3 (Batch (None, 14, 14, 128) 512 _________________________________________________________________ leaky_re_lu_4 (LeakyReLU) (None, 14, 14, 128) 0 _________________________________________________________________ up_sampling2d_1 (UpSampling2 (None, 28, 28, 128) 0 _________________________________________________________________ conv2d_5 (Conv2D) (None, 28, 28, 64) 73792 _________________________________________________________________ batch_normalization_4 (Batch (None, 28, 28, 64) 256 _________________________________________________________________ leaky_re_lu_5 (LeakyReLU) (None, 28, 28, 64) 0 _________________________________________________________________ conv2d_6 (Conv2D) (None, 28, 28, 1) 577 _________________________________________________________________ activation (Activation) (None, 28, 28, 1) 0 ================================================================= Total params: 856,193 Trainable params: 855,809 Non-trainable params: 384 _________________________________________________________________
# Note that we get a warning here about weights being trainable.
# When we are training the discriminator, we want its weights to be trainable.
# When we are training the generator, we want the discriminator weights to be fixed.
# This is surprising to Keras, so it is warning us to check that this is really what we want (it is!)
discriminator.summary()
Model: "sequential" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= conv2d (Conv2D) (None, 14, 14, 32) 320 _________________________________________________________________ leaky_re_lu (LeakyReLU) (None, 14, 14, 32) 0 _________________________________________________________________ dropout (Dropout) (None, 14, 14, 32) 0 _________________________________________________________________ conv2d_1 (Conv2D) (None, 7, 7, 64) 18496 _________________________________________________________________ zero_padding2d (ZeroPadding2 (None, 8, 8, 64) 0 _________________________________________________________________ batch_normalization (BatchNo (None, 8, 8, 64) 256 _________________________________________________________________ leaky_re_lu_1 (LeakyReLU) (None, 8, 8, 64) 0 _________________________________________________________________ dropout_1 (Dropout) (None, 8, 8, 64) 0 _________________________________________________________________ conv2d_2 (Conv2D) (None, 4, 4, 128) 73856 _________________________________________________________________ batch_normalization_1 (Batch (None, 4, 4, 128) 512 _________________________________________________________________ leaky_re_lu_2 (LeakyReLU) (None, 4, 4, 128) 0 _________________________________________________________________ dropout_2 (Dropout) (None, 4, 4, 128) 0 _________________________________________________________________ conv2d_3 (Conv2D) (None, 4, 4, 256) 295168 _________________________________________________________________ batch_normalization_2 (Batch (None, 4, 4, 256) 1024 _________________________________________________________________ leaky_re_lu_3 (LeakyReLU) (None, 4, 4, 256) 0 _________________________________________________________________ dropout_3 (Dropout) (None, 4, 4, 256) 0 _________________________________________________________________ flatten (Flatten) (None, 4096) 0 _________________________________________________________________ dense (Dense) (None, 1) 4097 ================================================================= WARNING:tensorflow:Discrepancy between trainable weights and collected trainable weights, did you set `model.trainable` without calling `model.compile` after ? Total params: 786,562 Trainable params: 392,833 Non-trainable params: 393,729 _________________________________________________________________
combined.summary()
Model: "model" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= input_1 (InputLayer) [(None, 100)] 0 _________________________________________________________________ sequential_1 (Sequential) (None, 28, 28, 1) 856193 _________________________________________________________________ sequential (Sequential) (None, 1) 393729 ================================================================= Total params: 1,249,922 Trainable params: 855,809 Non-trainable params: 394,113 _________________________________________________________________
# Train using GPU acceleration
# (see https://colab.research.google.com/notebooks/gpu.ipynb#scrollTo=Y04m-jvKRDsJ)
device_name = test.gpu_device_name()
if device_name != '/device:GPU:0':
print(
'\n\nThis error most likely means that this notebook is not '
'configured to use a GPU. Change this in Notebook Settings via the '
'command palette (cmd/ctrl-shift-P) or the Edit menu.\n\n')
raise SystemError('GPU device not found')
# Lower the number of iterations when you start debugging! You want a short testing cycle until you are confident
# that the code is working. Also, since the exercises involve models with less complicated patterns, you will
# likely not need as many iterations to train well.
with device('/device:GPU:0'):
train(generator, discriminator, combined, iterations=2001, batch_size=32, save_interval=50)
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz 11493376/11490434 [==============================] - 0s 0us/step 0 [D loss: 1.1544578075408936, acc.: 21.88%] [G loss: 0.7053578495979309] 50 [D loss: 0.9345921277999878, acc.: 43.75%] [G loss: 1.245182752609253] 100 [D loss: 0.8686975836753845, acc.: 50.00%] [G loss: 1.3115715980529785] 150 [D loss: 0.5672547817230225, acc.: 68.75%] [G loss: 1.0402133464813232] 200 [D loss: 0.9006196856498718, acc.: 50.00%] [G loss: 1.1437000036239624] 250 [D loss: 0.31124305725097656, acc.: 93.75%] [G loss: 1.2584846019744873] 300 [D loss: 0.7882351875305176, acc.: 53.12%] [G loss: 0.8200423717498779] 350 [D loss: 0.4814497232437134, acc.: 75.00%] [G loss: 1.1233599185943604] 400 [D loss: 0.44962307810783386, acc.: 81.25%] [G loss: 0.6487892866134644] 450 [D loss: 0.12017085403203964, acc.: 100.00%] [G loss: 0.7795431613922119] 500 [D loss: 0.2077733874320984, acc.: 96.88%] [G loss: 0.7009087800979614] 550 [D loss: 0.07344160974025726, acc.: 100.00%] [G loss: 0.6887441873550415] 600 [D loss: 0.13227102160453796, acc.: 100.00%] [G loss: 0.8408626914024353] 650 [D loss: 0.5133113861083984, acc.: 78.12%] [G loss: 0.4815252125263214] 700 [D loss: 0.13273777067661285, acc.: 100.00%] [G loss: 0.6770761013031006] 750 [D loss: 0.06418153643608093, acc.: 100.00%] [G loss: 0.5403904914855957] 800 [D loss: 0.17853617668151855, acc.: 96.88%] [G loss: 0.8621774911880493] 850 [D loss: 0.38654857873916626, acc.: 84.38%] [G loss: 0.41380786895751953] 900 [D loss: 3.509596347808838, acc.: 0.00%] [G loss: 0.7589566707611084] 950 [D loss: 2.796586275100708, acc.: 0.00%] [G loss: 0.759162425994873] 1000 [D loss: 0.3516937792301178, acc.: 93.75%] [G loss: 0.6133664846420288] 1050 [D loss: 0.22273698449134827, acc.: 93.75%] [G loss: 0.5366196632385254] 1100 [D loss: 2.862865924835205, acc.: 0.00%] [G loss: 0.6058429479598999] 1150 [D loss: 0.07716101408004761, acc.: 100.00%] [G loss: 0.933562159538269] 1200 [D loss: 0.022532951086759567, acc.: 100.00%] [G loss: 1.1765646934509277] 1250 [D loss: 2.714770793914795, acc.: 0.00%] [G loss: 0.7532151937484741] 1300 [D loss: 0.2733033001422882, acc.: 96.88%] [G loss: 0.8495866060256958] 1350 [D loss: 0.31903183460235596, acc.: 84.38%] [G loss: 0.9593919515609741] 1400 [D loss: 0.06647367775440216, acc.: 100.00%] [G loss: 0.9115855097770691] 1450 [D loss: 0.05962929129600525, acc.: 100.00%] [G loss: 1.197852611541748] 1500 [D loss: 0.1420375108718872, acc.: 100.00%] [G loss: 1.102597951889038] 1550 [D loss: 0.05265570059418678, acc.: 100.00%] [G loss: 0.8590636849403381] 1600 [D loss: 0.07089543342590332, acc.: 100.00%] [G loss: 1.357068419456482] 1650 [D loss: 0.04263726994395256, acc.: 100.00%] [G loss: 1.5785088539123535] 1700 [D loss: 0.07926680147647858, acc.: 100.00%] [G loss: 1.1031678915023804] 1750 [D loss: 2.85390043258667, acc.: 0.00%] [G loss: 0.8541635274887085] 1800 [D loss: 0.13204118609428406, acc.: 100.00%] [G loss: 1.014026403427124] 1850 [D loss: 0.7211976051330566, acc.: 68.75%] [G loss: 0.2760601043701172] 1900 [D loss: 0.0855787843465805, acc.: 100.00%] [G loss: 0.9019816517829895] 1950 [D loss: 0.08599527925252914, acc.: 100.00%] [G loss: 0.9975156784057617] 2000 [D loss: 1.544213056564331, acc.: 6.25%] [G loss: 0.4589151442050934]
You can look at the saved files to see what the model output looks like after a certain number of iterations:
Image(filename=os.path.join(images_dir, 'mnist_0.png'))
Image(filename=os.path.join(images_dir, 'mnist_500.png'))
Image(filename=os.path.join(images_dir, 'mnist_2000.png'))
Let's have the trained model generate some images for us.
show_new_image(generator)
<matplotlib.image.AxesImage at 0x7f93cb074780>
show_new_image(generator)
<matplotlib.image.AxesImage at 0x7f93cafee358>
show_new_image(generator)
<matplotlib.image.AxesImage at 0x7f93caf4aef0>
show_new_image(generator)
<matplotlib.image.AxesImage at 0x7f93caf305f8>
Change the model so that it learns to produce 9x9 images of some simple pattern, for instance horizontal lines.
Change the model so that you can select which number you get an image of rather than always getting a random one. I highly recommend that you limit your model to only learning two or three numbers, so that you can get decent results with less training time.
You may notice the current GAN with 2000 iterations only gives so-so output. Unfortunately, if you keep training past this point, the output just gets worse. I haven't yet been able to figure out how to change the model to get better results. Can you?