%load_ext watermark
%watermark -v -p numpy,sklearn,scipy,matplotlib,tensorflow
CPython 3.6.8 IPython 7.2.0 numpy 1.15.4 sklearn 0.20.2 scipy 1.1.0 matplotlib 3.0.2 tensorflow 1.13.1
15장 – 오토인코더
이 노트북은 15장에 있는 모든 샘플 코드와 연습문제 해답을 가지고 있습니다.
파이썬 2와 3을 모두 지원합니다. 공통 모듈을 임포트하고 맷플롯립 그림이 노트북 안에 포함되도록 설정하고 생성한 그림을 저장하기 위한 함수를 준비합니다:
# 파이썬 2와 파이썬 3 지원
from __future__ import division, print_function, unicode_literals
# 공통
import numpy as np
import os
import sys
# 일관된 출력을 위해 유사난수 초기화
def reset_graph(seed=42):
tf.reset_default_graph()
tf.set_random_seed(seed)
np.random.seed(seed)
# 맷플롯립 설정
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12
# 한글출력
plt.rcParams['font.family'] = 'NanumBarunGothic'
plt.rcParams['axes.unicode_minus'] = False
# 그림을 저장할 폴더
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "autoencoders"
def save_fig(fig_id, tight_layout=True):
path = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID, fig_id + ".png")
if tight_layout:
plt.tight_layout()
plt.savefig(path, format='png', dpi=300)
28x28 흑백 이미지를 그리기 위한 유틸리티 함수:
def plot_image(image, shape=[28, 28]):
plt.imshow(image.reshape(shape), cmap="Greys", interpolation="nearest")
plt.axis("off")
def plot_multiple_images(images, n_rows, n_cols, pad=2):
images = images - images.min() # 최소값을 0으로 만들어 패딩이 하얗게 보이도록 합니다.
w,h = images.shape[1:]
image = np.zeros(((w+pad)*n_rows+pad, (h+pad)*n_cols+pad))
for y in range(n_rows):
for x in range(n_cols):
image[(y*(h+pad)+pad):(y*(h+pad)+pad+h),(x*(w+pad)+pad):(x*(w+pad)+pad+w)] = images[y*n_cols+x]
plt.imshow(image, cmap="Greys", interpolation="nearest")
plt.axis("off")
3D 데이터셋을 만듭니다:
import numpy.random as rnd
rnd.seed(4)
m = 200
w1, w2 = 0.1, 0.3
noise = 0.1
angles = rnd.rand(m) * 3 * np.pi / 2 - 0.5
data = np.empty((m, 3))
data[:, 0] = np.cos(angles) + np.sin(angles)/2 + noise * rnd.randn(m) / 2
data[:, 1] = np.sin(angles) * 0.7 + noise * rnd.randn(m) / 2
data[:, 2] = data[:, 0] * w1 + data[:, 1] * w2 + noise * rnd.randn(m)
데이터를 정규화합니다:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(data[:100])
X_test = scaler.transform(data[100:])
오토인코더를 만듭니다:
import tensorflow as tf
reset_graph()
n_inputs = 3
n_hidden = 2 # 코딩 유닛
n_outputs = n_inputs
learning_rate = 0.01
X = tf.placeholder(tf.float32, shape=[None, n_inputs])
hidden = tf.layers.dense(X, n_hidden)
outputs = tf.layers.dense(hidden, n_outputs)
reconstruction_loss = tf.reduce_mean(tf.square(outputs - X))
optimizer = tf.train.AdamOptimizer(learning_rate)
training_op = optimizer.minimize(reconstruction_loss)
init = tf.global_variables_initializer()
WARNING:tensorflow:From <ipython-input-7-5e0a8af605ba>:12: dense (from tensorflow.python.layers.core) is deprecated and will be removed in a future version. Instructions for updating: Use keras.layers.dense instead. WARNING:tensorflow:From /home/haesun/anaconda3/envs/handson-ml/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version. Instructions for updating: Colocations handled automatically by placer.
n_iterations = 1000
codings = hidden
with tf.Session() as sess:
init.run()
for iteration in range(n_iterations):
training_op.run(feed_dict={X: X_train})
codings_val = codings.eval(feed_dict={X: X_test})
fig = plt.figure(figsize=(4,3))
plt.plot(codings_val[:,0], codings_val[:, 1], "b.")
plt.xlabel("$z_1$", fontsize=18)
plt.ylabel("$z_2$", fontsize=18, rotation=0)
save_fig("linear_autoencoder_pca_plot")
plt.show()
MNIST 데이터셋을 사용합니다:
주의: tf.examples.tutorials.mnist
은 삭제될 예정이므로 대신 tf.keras.datasets.mnist
를 사용하겠습니다.
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
X_valid, X_train = X_train[:5000], X_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]
def shuffle_batch(X, y, batch_size):
rnd_idx = np.random.permutation(len(X))
n_batches = len(X) // batch_size
for batch_idx in np.array_split(rnd_idx, n_batches):
X_batch, y_batch = X[batch_idx], y[batch_idx]
yield X_batch, y_batch
# from tensorflow.examples.tutorials.mnist import input_data
# mnist = input_data.read_data_sets("/tmp/data/")
3개의 은닉층과 1개의 출력층(즉, 두 개를 적층)을 가진 적층 오토인코더를 만들어 보겠습니다. ELU 활성화 함수와 He 초기화, L2 정규화를 사용하겠습니다.
reset_graph()
from functools import partial
n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 150 # 코딩 유닛
n_hidden3 = n_hidden1
n_outputs = n_inputs
learning_rate = 0.01
l2_reg = 0.0001
X = tf.placeholder(tf.float32, shape=[None, n_inputs])
he_init = tf.variance_scaling_initializer() # He 초기화
#아래와 동일합니다:
#he_init = lambda shape, dtype=tf.float32: tf.truncated_normal(shape, 0., stddev=np.sqrt(2/shape[0]))
l2_regularizer = tf.contrib.layers.l2_regularizer(l2_reg)
my_dense_layer = partial(tf.layers.dense,
activation=tf.nn.elu,
kernel_initializer=he_init,
kernel_regularizer=l2_regularizer)
hidden1 = my_dense_layer(X, n_hidden1)
hidden2 = my_dense_layer(hidden1, n_hidden2)
hidden3 = my_dense_layer(hidden2, n_hidden3)
outputs = my_dense_layer(hidden3, n_outputs, activation=None)
reconstruction_loss = tf.reduce_mean(tf.square(outputs - X))
reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
loss = tf.add_n([reconstruction_loss] + reg_losses)
optimizer = tf.train.AdamOptimizer(learning_rate)
training_op = optimizer.minimize(loss)
init = tf.global_variables_initializer()
saver = tf.train.Saver() # 책에는 없음
WARNING: The TensorFlow contrib module will not be included in TensorFlow 2.0. For more information, please see: * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md * https://github.com/tensorflow/addons If you depend on functionality not listed there, please file an issue.
이제 훈련시켜 보죠! 여기에서는 타깃 값을 주입하지 않습니다(y_batch
가 사용되지 않습니다). 이는 비지도 학습입니다.
n_epochs = 5
batch_size = 150
with tf.Session() as sess:
init.run()
for epoch in range(n_epochs):
n_batches = len(X_train) // batch_size
for iteration in range(n_batches):
print("\r{}%".format(100 * iteration // n_batches), end="") # 책에는 없음
sys.stdout.flush() # 책에는 없음
X_batch, y_batch = next(shuffle_batch(X_train, y_train, batch_size))
sess.run(training_op, feed_dict={X: X_batch})
loss_train = reconstruction_loss.eval(feed_dict={X: X_batch}) # 책에는 없음
print("\r{}".format(epoch), "훈련 MSE:", loss_train) # 책에는 없음
saver.save(sess, "./my_model_all_layers.ckpt") # 책에는 없음
0 훈련 MSE: 0.020764284 1 훈련 MSE: 0.011472851 2 훈련 MSE: 0.0117863845 3 훈련 MSE: 0.011655197 4 훈련 MSE: 0.01156232
이 함수는 모델을 로드하고 테스트 세트에서 이를 평가합니다(재구성 오차를 측정합니다). 그런 다음 원본 이미지와 재구성 이미지를 그립니다:
def show_reconstructed_digits(X, outputs, model_path = None, n_test_digits = 2):
with tf.Session() as sess:
if model_path:
saver.restore(sess, model_path)
# X_test = mnist.test.images[:n_test_digits]
outputs_val = outputs.eval(feed_dict={X: X_test[:n_test_digits]})
fig = plt.figure(figsize=(8, 3 * n_test_digits))
for digit_index in range(n_test_digits):
plt.subplot(n_test_digits, 2, digit_index * 2 + 1)
plot_image(X_test[digit_index])
plt.subplot(n_test_digits, 2, digit_index * 2 + 2)
plot_image(outputs_val[digit_index])
show_reconstructed_digits(X, outputs, "./my_model_all_layers.ckpt")
save_fig("reconstruction_plot")
WARNING:tensorflow:From /home/haesun/anaconda3/envs/handson-ml/lib/python3.6/site-packages/tensorflow/python/training/saver.py:1266: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version. Instructions for updating: Use standard file APIs to check for files with this prefix. INFO:tensorflow:Restoring parameters from ./my_model_all_layers.ckpt
인코더와 디코더의 가중치를 묶는 일은 자주 있습니다(weights_decoder = tf.transpose(weights_encoder)
). 안타깝지만 tf.layers.dense()
함수를 사용해서 이렇게 하기는 불가능합니다(또는 매우 어렵습니다). 수동으로 직접 오토인코더를 만들어야 합니다:
reset_graph()
n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 150 # 코딩 유닛
n_hidden3 = n_hidden1
n_outputs = n_inputs
learning_rate = 0.01
l2_reg = 0.0005
activation = tf.nn.elu
regularizer = tf.contrib.layers.l2_regularizer(l2_reg)
initializer = tf.variance_scaling_initializer()
X = tf.placeholder(tf.float32, shape=[None, n_inputs])
weights1_init = initializer([n_inputs, n_hidden1])
weights2_init = initializer([n_hidden1, n_hidden2])
weights1 = tf.Variable(weights1_init, dtype=tf.float32, name="weights1")
weights2 = tf.Variable(weights2_init, dtype=tf.float32, name="weights2")
weights3 = tf.transpose(weights2, name="weights3") # 가중치 묶기
weights4 = tf.transpose(weights1, name="weights4") # 가중치 묶기
biases1 = tf.Variable(tf.zeros(n_hidden1), name="biases1")
biases2 = tf.Variable(tf.zeros(n_hidden2), name="biases2")
biases3 = tf.Variable(tf.zeros(n_hidden3), name="biases3")
biases4 = tf.Variable(tf.zeros(n_outputs), name="biases4")
hidden1 = activation(tf.matmul(X, weights1) + biases1)
hidden2 = activation(tf.matmul(hidden1, weights2) + biases2)
hidden3 = activation(tf.matmul(hidden2, weights3) + biases3)
outputs = tf.matmul(hidden3, weights4) + biases4
reconstruction_loss = tf.reduce_mean(tf.square(outputs - X))
reg_loss = regularizer(weights1) + regularizer(weights2)
loss = reconstruction_loss + reg_loss
optimizer = tf.train.AdamOptimizer(learning_rate)
training_op = optimizer.minimize(loss)
init = tf.global_variables_initializer()
saver = tf.train.Saver()
n_epochs = 5
batch_size = 150
with tf.Session() as sess:
init.run()
for epoch in range(n_epochs):
n_batches = len(X_train) // batch_size
for iteration in range(n_batches):
print("\r{}%".format(100 * iteration // n_batches), end="")
sys.stdout.flush()
X_batch, y_batch = next(shuffle_batch(X_train, y_train, batch_size))
sess.run(training_op, feed_dict={X: X_batch})
loss_train = reconstruction_loss.eval(feed_dict={X: X_batch})
print("\r{}".format(epoch), "훈련 MSE:", loss_train)
saver.save(sess, "./my_model_tying_weights.ckpt")
0 훈련 MSE: 0.01781313 1 훈련 MSE: 0.016506404 2 훈련 MSE: 0.018105302 3 훈련 MSE: 0.018025953 4 훈련 MSE: 0.017611932
show_reconstructed_digits(X, outputs, "./my_model_tying_weights.ckpt")
INFO:tensorflow:Restoring parameters from ./my_model_tying_weights.ckpt
하나의 오토인코더를 따로따로 훈련하는 방법이 많이 있습니다. 첫 번째 방법은 각 오토인코더를 다른 그래프를 사용하여 훈련하는 것입니다. 그런 다음 이런 오토인코더의 가중치와 편향을 복사해 초깃값으로 지정해서 적층 오토인코더를 만듭니다.
하나의 오토인코더를 훈련하고 변환된 훈련 세트(즉, 은닉층의 출력)와 모델 파라미터를 반환하는 함수를 만들겠습니다.
reset_graph()
from functools import partial
def train_autoencoder(X_train, n_neurons, n_epochs, batch_size,
learning_rate = 0.01, l2_reg = 0.0005, seed=42,
hidden_activation=tf.nn.elu,
output_activation=tf.nn.elu):
graph = tf.Graph()
with graph.as_default():
tf.set_random_seed(seed)
n_inputs = X_train.shape[1]
X = tf.placeholder(tf.float32, shape=[None, n_inputs])
my_dense_layer = partial(
tf.layers.dense,
kernel_initializer=tf.variance_scaling_initializer(),
kernel_regularizer=tf.contrib.layers.l2_regularizer(l2_reg))
hidden = my_dense_layer(X, n_neurons, activation=hidden_activation, name="hidden")
outputs = my_dense_layer(hidden, n_inputs, activation=output_activation, name="outputs")
reconstruction_loss = tf.reduce_mean(tf.square(outputs - X))
reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
loss = tf.add_n([reconstruction_loss] + reg_losses)
optimizer = tf.train.AdamOptimizer(learning_rate)
training_op = optimizer.minimize(loss)
init = tf.global_variables_initializer()
with tf.Session(graph=graph) as sess:
init.run()
for epoch in range(n_epochs):
n_batches = len(X_train) // batch_size
for iteration in range(n_batches):
print("\r{}%".format(100 * iteration // n_batches), end="")
sys.stdout.flush()
indices = rnd.permutation(len(X_train))[:batch_size]
X_batch = X_train[indices]
sess.run(training_op, feed_dict={X: X_batch})
loss_train = reconstruction_loss.eval(feed_dict={X: X_batch})
print("\r{}".format(epoch), "훈련 MSE:", loss_train)
params = dict([(var.name, var.eval()) for var in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)])
hidden_val = hidden.eval(feed_dict={X: X_train})
return hidden_val, params["hidden/kernel:0"], params["hidden/bias:0"], params["outputs/kernel:0"], params["outputs/bias:0"]
이제 두 개의 오토인코더를 훈련시켜 보죠. 첫 번째는 훈련 데이터를 사용하고 두 번째는 첫 번째 오토인코더의 은닉층 출력을 사용해 훈련시킵니다:
hidden_output, W1, b1, W4, b4 = train_autoencoder(X_train, n_neurons=300, n_epochs=4, batch_size=150,
output_activation=None)
_, W2, b2, W3, b3 = train_autoencoder(hidden_output, n_neurons=150, n_epochs=4, batch_size=150)
0 훈련 MSE: 0.018396942 1 훈련 MSE: 0.017695118 2 훈련 MSE: 0.019602958 3 훈련 MSE: 0.019349579 0 훈련 MSE: 0.004570484 1 훈련 MSE: 0.004802368 2 훈련 MSE: 0.00477468 3 훈련 MSE: 0.004450083
마지막으로 방금전 훈련한 오토인코더의 가중치와 편향을 재사용하여 적층 오토인코더를 만듭니다:
reset_graph()
n_inputs = 28*28
X = tf.placeholder(tf.float32, shape=[None, n_inputs])
hidden1 = tf.nn.elu(tf.matmul(X, W1) + b1)
hidden2 = tf.nn.elu(tf.matmul(hidden1, W2) + b2)
hidden3 = tf.nn.elu(tf.matmul(hidden2, W3) + b3)
outputs = tf.matmul(hidden3, W4) + b4
show_reconstructed_digits(X, outputs)
하나의 그래프를 사용하는 방법도 있습니다. 이 방법은 전체 적층 오토인코더를 위한 그래프를 만들지만 각 오토인코더를 독립적으로 훈련하기 위한 연산도 추가합니다. 단계 1은 맨 아래층과 맨 윗층을 훈련하고(즉, 첫 번째 오토인코더), 단계 2는 두 개의 가운데 층을 훈련합니다(즉, 두 번째 오토인코더).
reset_graph()
n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 150 # 코딩 유닛
n_hidden3 = n_hidden1
n_outputs = n_inputs
learning_rate = 0.01
l2_reg = 0.0001
activation = tf.nn.elu
regularizer = tf.contrib.layers.l2_regularizer(l2_reg)
initializer = tf.variance_scaling_initializer()
X = tf.placeholder(tf.float32, shape=[None, n_inputs])
weights1_init = initializer([n_inputs, n_hidden1])
weights2_init = initializer([n_hidden1, n_hidden2])
weights3_init = initializer([n_hidden2, n_hidden3])
weights4_init = initializer([n_hidden3, n_outputs])
weights1 = tf.Variable(weights1_init, dtype=tf.float32, name="weights1")
weights2 = tf.Variable(weights2_init, dtype=tf.float32, name="weights2")
weights3 = tf.Variable(weights3_init, dtype=tf.float32, name="weights3")
weights4 = tf.Variable(weights4_init, dtype=tf.float32, name="weights4")
biases1 = tf.Variable(tf.zeros(n_hidden1), name="biases1")
biases2 = tf.Variable(tf.zeros(n_hidden2), name="biases2")
biases3 = tf.Variable(tf.zeros(n_hidden3), name="biases3")
biases4 = tf.Variable(tf.zeros(n_outputs), name="biases4")
hidden1 = activation(tf.matmul(X, weights1) + biases1)
hidden2 = activation(tf.matmul(hidden1, weights2) + biases2)
hidden3 = activation(tf.matmul(hidden2, weights3) + biases3)
outputs = tf.matmul(hidden3, weights4) + biases4
reconstruction_loss = tf.reduce_mean(tf.square(outputs - X))
optimizer = tf.train.AdamOptimizer(learning_rate)
with tf.name_scope("phase1"):
phase1_outputs = tf.matmul(hidden1, weights4) + biases4 # hidden2와 hidden3 통과합니다
phase1_reconstruction_loss = tf.reduce_mean(tf.square(phase1_outputs - X))
phase1_reg_loss = regularizer(weights1) + regularizer(weights4)
phase1_loss = phase1_reconstruction_loss + phase1_reg_loss
phase1_training_op = optimizer.minimize(phase1_loss)
with tf.name_scope("phase2"):
phase2_reconstruction_loss = tf.reduce_mean(tf.square(hidden3 - hidden1))
phase2_reg_loss = regularizer(weights2) + regularizer(weights3)
phase2_loss = phase2_reconstruction_loss + phase2_reg_loss
train_vars = [weights2, biases2, weights3, biases3]
phase2_training_op = optimizer.minimize(phase2_loss, var_list=train_vars) # hidden1 동결
init = tf.global_variables_initializer()
saver = tf.train.Saver()
training_ops = [phase1_training_op, phase2_training_op]
reconstruction_losses = [phase1_reconstruction_loss, phase2_reconstruction_loss]
n_epochs = [4, 4]
batch_sizes = [150, 150]
with tf.Session() as sess:
init.run()
for phase in range(2):
print("훈련 단계 #{}".format(phase + 1))
for epoch in range(n_epochs[phase]):
n_batches = len(X_train) // batch_sizes[phase]
for iteration in range(n_batches):
print("\r{}%".format(100 * iteration // n_batches), end="")
sys.stdout.flush()
X_batch, y_batch = next(shuffle_batch(X_train, y_train, batch_sizes[phase]))
sess.run(training_ops[phase], feed_dict={X: X_batch})
loss_train = reconstruction_losses[phase].eval(feed_dict={X: X_batch})
print("\r{}".format(epoch), "훈련 MSE:", loss_train)
saver.save(sess, "./my_model_one_at_a_time.ckpt")
loss_test = reconstruction_loss.eval(feed_dict={X: X_test})
print("테스트 MSE:", loss_test)
훈련 단계 #1 0 훈련 MSE: 0.007932796 1 훈련 MSE: 0.0073406156 2 훈련 MSE: 0.007749695 3 훈련 MSE: 0.0077196443 훈련 단계 #2 0 훈련 MSE: 0.24723183 1 훈련 MSE: 0.0069409623 2 훈련 MSE: 0.0033934086 3 훈련 MSE: 0.0025785582 테스트 MSE: 0.009948011
training_ops = [phase1_training_op, phase2_training_op]
reconstruction_losses = [phase1_reconstruction_loss, phase2_reconstruction_loss]
n_epochs = [4, 4]
batch_sizes = [150, 150]
with tf.Session() as sess:
init.run()
for phase in range(2):
print("훈련 단계 #{}".format(phase + 1))
if phase == 1:
hidden1_cache = hidden1.eval(feed_dict={X: X_train})
for epoch in range(n_epochs[phase]):
n_batches = len(X_train) // batch_sizes[phase]
for iteration in range(n_batches):
print("\r{}%".format(100 * iteration // n_batches), end="")
sys.stdout.flush()
if phase == 1:
indices = rnd.permutation(len(X_train))
hidden1_batch = hidden1_cache[indices[:batch_sizes[phase]]]
feed_dict = {hidden1: hidden1_batch}
sess.run(training_ops[phase], feed_dict=feed_dict)
else:
X_batch, y_batch = next(shuffle_batch(X_train, y_train, batch_sizes[phase]))
feed_dict = {X: X_batch}
sess.run(training_ops[phase], feed_dict=feed_dict)
loss_train = reconstruction_losses[phase].eval(feed_dict=feed_dict)
print("\r{}".format(epoch), "훈련 MSE:", loss_train)
saver.save(sess, "./my_model_cache_frozen.ckpt")
loss_test = reconstruction_loss.eval(feed_dict={X: X_test})
print("테스트 MSE:", loss_test)
훈련 단계 #1 0 훈련 MSE: 0.0081470255 1 훈련 MSE: 0.007710265 2 훈련 MSE: 0.007226933 3 훈련 MSE: 0.007944853 훈련 단계 #2 0 훈련 MSE: 0.27050307 1 훈련 MSE: 0.006214369 2 훈련 MSE: 0.0027293672 3 훈련 MSE: 0.0022585767 테스트 MSE: 0.00980641
n_test_digits = 2
# X_test = mnist.test.images[:n_test_digits]
with tf.Session() as sess:
saver.restore(sess, "./my_model_one_at_a_time.ckpt") # not shown in the book
outputs_val = outputs.eval(feed_dict={X: X_test[:n_test_digits]})
def plot_image(image, shape=[28, 28]):
plt.imshow(image.reshape(shape), cmap="Greys", interpolation="nearest")
plt.axis("off")
for digit_index in range(n_test_digits):
plt.subplot(n_test_digits, 2, digit_index * 2 + 1)
plot_image(X_test[digit_index])
plt.subplot(n_test_digits, 2, digit_index * 2 + 2)
plot_image(outputs_val[digit_index])
INFO:tensorflow:Restoring parameters from ./my_model_one_at_a_time.ckpt
with tf.Session() as sess:
saver.restore(sess, "./my_model_one_at_a_time.ckpt") # 책에는 없음
weights1_val = weights1.eval()
for i in range(5):
plt.subplot(1, 5, i + 1)
plot_image(weights1_val.T[i])
save_fig("extracted_features_plot") # 책에는 없음
plt.show() # 책에는 없음
INFO:tensorflow:Restoring parameters from ./my_model_one_at_a_time.ckpt
MNIST 분류 문제를 위한 작은 신경망을 만들겠습니다:
reset_graph()
n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 150
n_outputs = 10
learning_rate = 0.01
l2_reg = 0.0005
activation = tf.nn.elu
regularizer = tf.contrib.layers.l2_regularizer(l2_reg)
initializer = tf.variance_scaling_initializer()
X = tf.placeholder(tf.float32, shape=[None, n_inputs])
y = tf.placeholder(tf.int32, shape=[None])
weights1_init = initializer([n_inputs, n_hidden1])
weights2_init = initializer([n_hidden1, n_hidden2])
weights3_init = initializer([n_hidden2, n_outputs])
weights1 = tf.Variable(weights1_init, dtype=tf.float32, name="weights1")
weights2 = tf.Variable(weights2_init, dtype=tf.float32, name="weights2")
weights3 = tf.Variable(weights3_init, dtype=tf.float32, name="weights3")
biases1 = tf.Variable(tf.zeros(n_hidden1), name="biases1")
biases2 = tf.Variable(tf.zeros(n_hidden2), name="biases2")
biases3 = tf.Variable(tf.zeros(n_outputs), name="biases3")
hidden1 = activation(tf.matmul(X, weights1) + biases1)
hidden2 = activation(tf.matmul(hidden1, weights2) + biases2)
logits = tf.matmul(hidden2, weights3) + biases3
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
reg_loss = regularizer(weights1) + regularizer(weights2) + regularizer(weights3)
loss = cross_entropy + reg_loss
optimizer = tf.train.AdamOptimizer(learning_rate)
training_op = optimizer.minimize(loss)
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
init = tf.global_variables_initializer()
pretrain_saver = tf.train.Saver([weights1, weights2, biases1, biases2])
saver = tf.train.Saver()
(사전훈련 없이)평범하게 훈련시킵니다:
n_epochs = 4
batch_size = 150
n_labeled_instances = 20000
with tf.Session() as sess:
init.run()
for epoch in range(n_epochs):
n_batches = n_labeled_instances // batch_size
for iteration in range(n_batches):
print("\r{}%".format(100 * iteration // n_batches), end="")
sys.stdout.flush()
indices = rnd.permutation(n_labeled_instances)[:batch_size]
X_batch, y_batch = X_train[indices], y_train[indices]
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
accuracy_val = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
print("\r{}".format(epoch), "검증 세트 정확도:", accuracy_val, end=" ")
saver.save(sess, "./my_model_supervised.ckpt")
test_val = accuracy.eval(feed_dict={X: X_test, y: y_test})
print("테스트 정확도:", test_val)
0 검증 세트 정확도: 0.93333334 테스트 정확도: 0.9191 1 검증 세트 정확도: 0.97333336 테스트 정확도: 0.9371 2 검증 세트 정확도: 0.9866667 테스트 정확도: 0.9318 3 검증 세트 정확도: 0.97333336 테스트 정확도: 0.9403
사전 훈련된 오토인코더의 첫 두개의 층을 재사용해 보겠습니다:
n_epochs = 4
batch_size = 150
n_labeled_instances = 20000
#training_op = optimizer.minimize(loss, var_list=[weights3, biases3]) # layers 1와 2를 동결 (선택사항)
with tf.Session() as sess:
init.run()
pretrain_saver.restore(sess, "./my_model_cache_frozen.ckpt")
for epoch in range(n_epochs):
n_batches = n_labeled_instances // batch_size
for iteration in range(n_batches):
print("\r{}%".format(100 * iteration // n_batches), end="")
sys.stdout.flush()
indices = rnd.permutation(n_labeled_instances)[:batch_size]
X_batch, y_batch = X_train[indices], y_train[indices]
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
accuracy_val = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
print("\r{}".format(epoch), "훈련 정확도:", accuracy_val, end="\t")
saver.save(sess, "./my_model_supervised_pretrained.ckpt")
test_val = accuracy.eval(feed_dict={X: X_test, y: y_test})
print("테스트 정확도:", test_val)
INFO:tensorflow:Restoring parameters from ./my_model_cache_frozen.ckpt 0 훈련 정확도: 0.96666664 테스트 정확도: 0.9231 1 훈련 정확도: 0.96 테스트 정확도: 0.9361 2 훈련 정확도: 0.96666664 테스트 정확도: 0.9369 3 훈련 정확도: 0.98 테스트 정확도: 0.933
가우시안 잡음을 사용합니다:
reset_graph()
n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 150 # 코딩 유닛
n_hidden3 = n_hidden1
n_outputs = n_inputs
learning_rate = 0.01
noise_level = 1.0
X = tf.placeholder(tf.float32, shape=[None, n_inputs])
X_noisy = X + noise_level * tf.random_normal(tf.shape(X))
hidden1 = tf.layers.dense(X_noisy, n_hidden1, activation=tf.nn.relu,
name="hidden1")
hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=tf.nn.relu, # 책에는 없음
name="hidden2") # 책에는 없음
hidden3 = tf.layers.dense(hidden2, n_hidden3, activation=tf.nn.relu, # 책에는 없음
name="hidden3") # 책에는 없음
outputs = tf.layers.dense(hidden3, n_outputs, name="outputs") # 책에는 없음
reconstruction_loss = tf.reduce_mean(tf.square(outputs - X)) # MSE
optimizer = tf.train.AdamOptimizer(learning_rate)
training_op = optimizer.minimize(reconstruction_loss)
init = tf.global_variables_initializer()
saver = tf.train.Saver()
n_epochs = 10
batch_size = 150
with tf.Session() as sess:
init.run()
for epoch in range(n_epochs):
n_batches = len(X_train) // batch_size
for iteration in range(n_batches):
print("\r{}%".format(100 * iteration // n_batches), end="")
sys.stdout.flush()
X_batch, y_batch = next(shuffle_batch(X_train, y_train, batch_size))
sess.run(training_op, feed_dict={X: X_batch})
loss_train = reconstruction_loss.eval(feed_dict={X: X_batch})
print("\r{}".format(epoch), "훈련 MSE:", loss_train)
saver.save(sess, "./my_model_stacked_denoising_gaussian.ckpt")
0 훈련 MSE: 0.044450462 1 훈련 MSE: 0.038666468 2 훈련 MSE: 0.043584995 3 훈련 MSE: 0.04217102 4 훈련 MSE: 0.04093006 5 훈련 MSE: 0.041999266 6 훈련 MSE: 0.042555828 7 훈련 MSE: 0.04091479 8 훈련 MSE: 0.043924328 9 훈련 MSE: 0.042779252
드롭아웃을 사용합니다:
reset_graph()
n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 150 # 코딩 유닛
n_hidden3 = n_hidden1
n_outputs = n_inputs
learning_rate = 0.01
dropout_rate = 0.3
training = tf.placeholder_with_default(False, shape=(), name='training')
X = tf.placeholder(tf.float32, shape=[None, n_inputs])
X_drop = tf.layers.dropout(X, dropout_rate, training=training)
hidden1 = tf.layers.dense(X_drop, n_hidden1, activation=tf.nn.relu,
name="hidden1")
hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=tf.nn.relu, # 책에는 없음
name="hidden2") # 책에는 없음
hidden3 = tf.layers.dense(hidden2, n_hidden3, activation=tf.nn.relu, # 책에는 없음
name="hidden3") # 책에는 없음
outputs = tf.layers.dense(hidden3, n_outputs, name="outputs") # 책에는 없음
reconstruction_loss = tf.reduce_mean(tf.square(outputs - X)) # MSE
WARNING:tensorflow:From <ipython-input-41-0bee1b1cd27f>:6: dropout (from tensorflow.python.layers.core) is deprecated and will be removed in a future version. Instructions for updating: Use keras.layers.dropout instead. WARNING:tensorflow:From /home/haesun/anaconda3/envs/handson-ml/lib/python3.6/site-packages/tensorflow/python/keras/layers/core.py:143: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version. Instructions for updating: Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
optimizer = tf.train.AdamOptimizer(learning_rate)
training_op = optimizer.minimize(reconstruction_loss)
init = tf.global_variables_initializer()
saver = tf.train.Saver()
n_epochs = 10
batch_size = 150
with tf.Session() as sess:
init.run()
for epoch in range(n_epochs):
n_batches = len(X_train) // batch_size
for iteration in range(n_batches):
print("\r{}%".format(100 * iteration // n_batches), end="")
sys.stdout.flush()
X_batch, y_batch = next(shuffle_batch(X_train, y_train, batch_size))
sess.run(training_op, feed_dict={X: X_batch, training: True})
loss_train = reconstruction_loss.eval(feed_dict={X: X_batch})
print("\r{}".format(epoch), "훈련 MSE:", loss_train)
saver.save(sess, "./my_model_stacked_denoising_dropout.ckpt")
0 훈련 MSE: 0.028069168 1 훈련 MSE: 0.023578122 2 훈련 MSE: 0.025170064 3 훈련 MSE: 0.023552254 4 훈련 MSE: 0.02383423 5 훈련 MSE: 0.02311757 6 훈련 MSE: 0.02342265 7 훈련 MSE: 0.02238668 8 훈련 MSE: 0.02346741 9 훈련 MSE: 0.023901047
show_reconstructed_digits(X, outputs, "./my_model_stacked_denoising_dropout.ckpt")
INFO:tensorflow:Restoring parameters from ./my_model_stacked_denoising_dropout.ckpt
p = 0.1
q = np.linspace(0.001, 0.999, 500)
kl_div = p * np.log(p / q) + (1 - p) * np.log((1 - p) / (1 - q))
mse = (p - q)**2
plt.plot([p, p], [0, 0.3], "k:")
plt.text(0.05, 0.32, "목표 희소", fontsize=14)
plt.plot(q, kl_div, "b-", label="쿨백 라이블러 발산")
plt.plot(q, mse, "r--", label="MSE")
plt.legend(loc="upper left")
plt.xlabel("실제 희소")
plt.ylabel("비용", rotation=0)
plt.axis([0, 1, 0, 0.95])
save_fig("sparsity_loss_plot")
reset_graph()
n_inputs = 28 * 28
n_hidden1 = 1000 # 희소 코딩 유닛
n_outputs = n_inputs
def kl_divergence(p, q):
# 쿨백 라이블러 발산
return p * tf.log(p / q) + (1 - p) * tf.log((1 - p) / (1 - q))
learning_rate = 0.01
sparsity_target = 0.1
sparsity_weight = 0.2
X = tf.placeholder(tf.float32, shape=[None, n_inputs]) # 책에는 없음
hidden1 = tf.layers.dense(X, n_hidden1, activation=tf.nn.sigmoid) # 책에는 없음
outputs = tf.layers.dense(hidden1, n_outputs) # 책에는 없음
hidden1_mean = tf.reduce_mean(hidden1, axis=0) # 배치 평균
sparsity_loss = tf.reduce_sum(kl_divergence(sparsity_target, hidden1_mean))
reconstruction_loss = tf.reduce_mean(tf.square(outputs - X)) # MSE
loss = reconstruction_loss + sparsity_weight * sparsity_loss
optimizer = tf.train.AdamOptimizer(learning_rate)
training_op = optimizer.minimize(loss)
WARNING:tensorflow:From /home/haesun/anaconda3/envs/handson-ml/lib/python3.6/site-packages/tensorflow/python/ops/math_ops.py:3066: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version. Instructions for updating: Use tf.cast instead.
init = tf.global_variables_initializer()
saver = tf.train.Saver()
n_epochs = 100
batch_size = 1000
with tf.Session() as sess:
init.run()
for epoch in range(n_epochs):
n_batches = len(X_train) // batch_size
for iteration in range(n_batches):
print("\r{}%".format(100 * iteration // n_batches), end="")
sys.stdout.flush()
X_batch, y_batch = next(shuffle_batch(X_train, y_train, batch_size))
sess.run(training_op, feed_dict={X: X_batch})
reconstruction_loss_val, sparsity_loss_val, loss_val = sess.run([reconstruction_loss, sparsity_loss, loss], feed_dict={X: X_batch})
print("\r{}".format(epoch), "훈련 MSE:", reconstruction_loss_val, "\t희소 손실:", sparsity_loss_val, "\t전체 손실:", loss_val)
saver.save(sess, "./my_model_sparse.ckpt")
0 훈련 MSE: 0.13764463 희소 손실: 0.9086776 전체 손실: 0.31938016 1 훈련 MSE: 0.059780523 희소 손실: 0.023853485 전체 손실: 0.06455122 2 훈련 MSE: 0.05259747 희소 손실: 0.028874451 전체 손실: 0.05837236 3 훈련 MSE: 0.05017224 희소 손실: 0.19674255 전체 손실: 0.08952075 4 훈련 MSE: 0.044575125 희소 손실: 0.0070928843 전체 손실: 0.0459937 5 훈련 MSE: 0.03982869 희소 손실: 0.35642162 전체 손실: 0.11111301 6 훈련 MSE: 0.03850078 희소 손실: 0.0489216 전체 손실: 0.048285097 7 훈련 MSE: 0.036211826 희소 손실: 0.023476806 전체 손실: 0.040907186 8 훈련 MSE: 0.03363479 희소 손실: 0.057290047 전체 손실: 0.0450928 9 훈련 MSE: 0.030880695 희소 손실: 0.042790934 전체 손실: 0.03943888 10 훈련 MSE: 0.028282983 희소 손실: 0.32261822 전체 손실: 0.09280663 11 훈련 MSE: 0.025015578 희소 손실: 0.030909345 전체 손실: 0.031197447 12 훈련 MSE: 0.023248762 희소 손실: 0.08872007 전체 손실: 0.040992774 13 훈련 MSE: 0.021769175 희소 손실: 0.066300735 전체 손실: 0.035029322 14 훈련 MSE: 0.022006068 희소 손실: 0.11473212 전체 손실: 0.04495249 15 훈련 MSE: 0.020363854 희소 손실: 0.04796897 전체 손실: 0.029957648 16 훈련 MSE: 0.017985547 희소 손실: 0.183092 전체 손실: 0.05460395 17 훈련 MSE: 0.018661922 희소 손실: 0.029089238 전체 손실: 0.02447977 18 훈련 MSE: 0.017803263 희소 손실: 0.23516889 전체 손실: 0.06483704 19 훈련 MSE: 0.017381176 희소 손실: 0.024946982 전체 손실: 0.022370573 20 훈련 MSE: 0.017425468 희소 손실: 0.086695984 전체 손실: 0.034764666 21 훈련 MSE: 0.016189655 희소 손실: 0.04122501 전체 손실: 0.024434656 22 훈련 MSE: 0.016698195 희소 손실: 0.04975678 전체 손실: 0.02664955 23 훈련 MSE: 0.015898878 희소 손실: 0.05508837 전체 손실: 0.026916552 24 훈련 MSE: 0.017254826 희소 손실: 0.077819824 전체 손실: 0.03281879 25 훈련 MSE: 0.015573333 희소 손실: 0.08851832 전체 손실: 0.033276998 26 훈련 MSE: 0.014312162 희소 손실: 0.071159236 전체 손실: 0.028544009 27 훈련 MSE: 0.0134528335 희소 손실: 0.039494216 전체 손실: 0.021351676 28 훈련 MSE: 0.014181799 희소 손실: 0.03427296 전체 손실: 0.021036392 29 훈련 MSE: 0.014233899 희소 손실: 0.091789134 전체 손실: 0.032591727 30 훈련 MSE: 0.015912356 희소 손실: 0.088046856 전체 손실: 0.033521727 31 훈련 MSE: 0.014909081 희소 손실: 0.098699436 전체 손실: 0.03464897 32 훈련 MSE: 0.012846002 희소 손실: 0.049285438 전체 손실: 0.022703089 33 훈련 MSE: 0.013661299 희소 손실: 0.09084812 전체 손실: 0.03183092 34 훈련 MSE: 0.013174698 희소 손실: 0.041957255 전체 손실: 0.021566149 35 훈련 MSE: 0.013076468 희소 손실: 0.03809423 전체 손실: 0.020695314 36 훈련 MSE: 0.013874397 희소 손실: 0.053888112 전체 손실: 0.02465202 37 훈련 MSE: 0.014134639 희소 손실: 0.08780427 전체 손실: 0.031695493 38 훈련 MSE: 0.012819655 희소 손실: 0.23281106 전체 손실: 0.05938187 39 훈련 MSE: 0.012348061 희소 손실: 0.0440427 전체 손실: 0.021156602 40 훈련 MSE: 0.012252707 희소 손실: 0.13875358 전체 손실: 0.040003423 41 훈련 MSE: 0.012996029 희소 손실: 0.12448272 전체 손실: 0.037892573 42 훈련 MSE: 0.012731824 희소 손실: 0.052207123 전체 손실: 0.02317325 43 훈련 MSE: 0.012437606 희소 손실: 0.15004784 전체 손실: 0.042447172 44 훈련 MSE: 0.0139943315 희소 손실: 0.086156726 전체 손실: 0.031225678 45 훈련 MSE: 0.013336362 희소 손실: 0.18166208 전체 손실: 0.04966878 46 훈련 MSE: 0.012504976 희소 손실: 0.17851818 전체 손실: 0.048208613 47 훈련 MSE: 0.013035382 희소 손실: 0.1348057 전체 손실: 0.03999652 48 훈련 MSE: 0.012266196 희소 손실: 0.07756756 전체 손실: 0.02777971 49 훈련 MSE: 0.01317722 희소 손실: 0.094384246 전체 손실: 0.032054067 50 훈련 MSE: 0.011335708 희소 손실: 0.055621002 전체 손실: 0.02245991 51 훈련 MSE: 0.01165347 희소 손실: 0.1803157 전체 손실: 0.04771661 52 훈련 MSE: 0.011956133 희소 손실: 0.056386083 전체 손실: 0.02323335 53 훈련 MSE: 0.01185816 희소 손실: 0.09703015 전체 손실: 0.03126419 54 훈련 MSE: 0.011784163 희소 손실: 0.08330605 전체 손실: 0.028445374 55 훈련 MSE: 0.011937046 희소 손실: 0.15825102 전체 손실: 0.043587252 56 훈련 MSE: 0.012403196 희소 손실: 0.18447801 전체 손실: 0.0492988 57 훈련 MSE: 0.01206126 희소 손실: 0.14881732 전체 손실: 0.041824725 58 훈련 MSE: 0.013030418 희소 손실: 0.20365247 전체 손실: 0.053760912 59 훈련 MSE: 0.015654955 희소 손실: 0.15615591 전체 손실: 0.04688614 60 훈련 MSE: 0.012482262 희소 손실: 0.25862983 전체 손실: 0.064208224 61 훈련 MSE: 0.015941424 희소 손실: 0.95962346 전체 손실: 0.20786612 62 훈련 MSE: 0.01766432 희소 손실: 0.20046294 전체 손실: 0.05775691 63 훈련 MSE: 0.016196998 희소 손실: 0.5585599 전체 손실: 0.12790897 64 훈련 MSE: 0.013815206 희소 손실: 0.84754825 전체 손실: 0.18332486 65 훈련 MSE: 0.013000802 희소 손실: 0.1421502 전체 손실: 0.041430842 66 훈련 MSE: 0.015104004 희소 손실: 1.1474469 전체 손실: 0.24459338 67 훈련 MSE: 0.011953486 희소 손실: 0.26351807 전체 손실: 0.0646571 68 훈련 MSE: 0.01573701 희소 손실: 0.16915521 전체 손실: 0.04956805 69 훈련 MSE: 0.037008945 희소 손실: 0.19164175 전체 손실: 0.07533729 70 훈련 MSE: 0.013612317 희소 손실: 0.5762744 전체 손실: 0.1288672 71 훈련 MSE: 0.017147928 희소 손실: 0.17852888 전체 손실: 0.052853703 72 훈련 MSE: 0.015508033 희소 손실: 0.1816079 전체 손실: 0.051829614 73 훈련 MSE: 0.027388263 희소 손실: 0.3746488 전체 손실: 0.10231803 74 훈련 MSE: 0.02360942 희소 손실: 0.14110537 전체 손실: 0.051830493 75 훈련 MSE: 0.076053016 희소 손실: 0.8011689 전체 손실: 0.23628679 76 훈련 MSE: 0.024068106 희소 손실: 0.19580096 전체 손실: 0.063228294 77 훈련 MSE: 0.015808253 희소 손실: 0.7554366 전체 손실: 0.16689558 78 훈련 MSE: 0.017363977 희소 손실: 0.11928486 전체 손실: 0.041220948 79 훈련 MSE: 0.013585685 희소 손실: 0.27248436 전체 손실: 0.068082556 80 훈련 MSE: 0.014584755 희소 손실: 0.2480849 전체 손실: 0.064201735 81 훈련 MSE: 0.013331199 희소 손실: 0.6506817 전체 손실: 0.14346755 82 훈련 MSE: 0.019051224 희소 손실: 0.6267012 전체 손실: 0.14439146 83 훈련 MSE: 0.015483908 희소 손실: 0.1916886 전체 손실: 0.053821627 84 훈련 MSE: 0.027749855 희소 손실: 0.18579966 전체 손실: 0.064909786 85 훈련 MSE: 0.014414989 희소 손실: 0.6567534 전체 손실: 0.14576568 86 훈련 MSE: 0.015936274 희소 손실: 0.13425863 전체 손실: 0.042788 87 훈련 MSE: 0.014435577 희소 손실: 0.2481771 전체 손실: 0.064071 88 훈련 MSE: 0.034601822 희소 손실: 0.29140955 전체 손실: 0.092883736 89 훈련 MSE: 0.016649004 희소 손실: 0.43256503 전체 손실: 0.103162006 90 훈련 MSE: 0.017050372 희소 손실: 0.1723983 전체 손실: 0.051530033 91 훈련 MSE: 0.035351776 희소 손실: 0.21858487 전체 손실: 0.07906875 92 훈련 MSE: 0.02555419 희소 손실: 0.9047413 전체 손실: 0.20650245 93 훈련 MSE: 0.014923302 희소 손실: 0.07066038 전체 손실: 0.02905538 94 훈련 MSE: 0.014238221 희소 손실: 0.46209428 전체 손실: 0.10665708 95 훈련 MSE: 0.014198717 희소 손실: 0.0816534 전체 손실: 0.030529397 96 훈련 MSE: 0.019745337 희소 손실: 0.9571122 전체 손실: 0.21116778 97 훈련 MSE: 0.016251143 희소 손실: 1.0055496 전체 손실: 0.21736106 98 훈련 MSE: 0.02046022 희소 손실: 0.11733665 전체 손실: 0.04392755 99 훈련 MSE: 0.014461967 희소 손실: 0.28843147 전체 손실: 0.07214826
show_reconstructed_digits(X, outputs, "./my_model_sparse.ckpt")
INFO:tensorflow:Restoring parameters from ./my_model_sparse.ckpt
코딩층은 0에서 1사이의 값을 출력해야 하므로 시그모이드 활성화 함수를 사용합니다:
hidden1 = tf.layers.dense(X, n_hidden1, activation=tf.nn.sigmoid)
훈련 속도를 높이기 위해 입력을 0과 1사이로 정규화하고 비용 함수로 MSE 대신 크로스엔트로피를 사용합니다:
logits = tf.layers.dense(hidden1, n_outputs)
outputs = tf.nn.sigmoid(logits)
xentropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=X, logits=logits)
reconstruction_loss = tf.reduce_mean(xentropy)
reset_graph()
from functools import partial
n_inputs = 28 * 28
n_hidden1 = 500
n_hidden2 = 500
n_hidden3 = 20 # 코딩 유닛
n_hidden4 = n_hidden2
n_hidden5 = n_hidden1
n_outputs = n_inputs
learning_rate = 0.001
initializer = tf.variance_scaling_initializer()
my_dense_layer = partial(
tf.layers.dense,
activation=tf.nn.elu,
kernel_initializer=initializer)
X = tf.placeholder(tf.float32, [None, n_inputs])
hidden1 = my_dense_layer(X, n_hidden1)
hidden2 = my_dense_layer(hidden1, n_hidden2)
hidden3_mean = my_dense_layer(hidden2, n_hidden3, activation=None)
hidden3_sigma = my_dense_layer(hidden2, n_hidden3, activation=None)
noise = tf.random_normal(tf.shape(hidden3_sigma), dtype=tf.float32)
hidden3 = hidden3_mean + hidden3_sigma * noise
hidden4 = my_dense_layer(hidden3, n_hidden4)
hidden5 = my_dense_layer(hidden4, n_hidden5)
logits = my_dense_layer(hidden5, n_outputs, activation=None)
outputs = tf.sigmoid(logits)
xentropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=X, logits=logits)
reconstruction_loss = tf.reduce_sum(xentropy)
eps = 1e-10 # NaN을 반환하는 log(0)을 피하기 위한 안전항
latent_loss = 0.5 * tf.reduce_sum(
tf.square(hidden3_sigma) + tf.square(hidden3_mean)
- 1 - tf.log(eps + tf.square(hidden3_sigma)))
loss = reconstruction_loss + latent_loss
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)
init = tf.global_variables_initializer()
saver = tf.train.Saver()
n_epochs = 50
batch_size = 150
with tf.Session() as sess:
init.run()
for epoch in range(n_epochs):
n_batches = len(X_train) // batch_size
for iteration in range(n_batches):
print("\r{}%".format(100 * iteration // n_batches), end="")
sys.stdout.flush()
X_batch, y_batch = next(shuffle_batch(X_train, y_train, batch_size))
sess.run(training_op, feed_dict={X: X_batch})
loss_val, reconstruction_loss_val, latent_loss_val = sess.run([loss, reconstruction_loss, latent_loss], feed_dict={X: X_batch})
print("\r{}".format(epoch), "훈련 전체 손실:", loss_val, "\t재구성 손실:", reconstruction_loss_val, "\t잠재 손실:", latent_loss_val)
saver.save(sess, "./my_model_variational.ckpt")
0 훈련 전체 손실: 39178.113 재구성 손실: 24655.383 잠재 손실: 14522.7295 1 훈련 전체 손실: 26709.352 재구성 손실: 21087.395 잠재 손실: 5621.9575 2 훈련 전체 손실: 27936.418 재구성 손실: 22050.492 잠재 손실: 5885.9263 3 훈련 전체 손실: 24596.094 재구성 손실: 19495.64 잠재 손실: 5100.454 4 훈련 전체 손실: 24721.943 재구성 손실: 19489.229 잠재 손실: 5232.7144 5 훈련 전체 손실: 23755.867 재구성 손실: 18741.586 잠재 손실: 5014.282 6 훈련 전체 손실: 25383.01 재구성 손실: 20195.586 잠재 손실: 5187.424 7 훈련 전체 손실: 20105.082 재구성 손실: 16139.818 잠재 손실: 3965.264 8 훈련 전체 손실: 21950.242 재구성 손실: 18033.154 잠재 손실: 3917.0881 9 훈련 전체 손실: 19577.664 재구성 손실: 16220.92 잠재 손실: 3356.7441 10 훈련 전체 손실: 16286.686 재구성 손실: 13046.722 잠재 손실: 3239.9634 11 훈련 전체 손실: 16730.045 재구성 손실: 13380.163 잠재 손실: 3349.8826 12 훈련 전체 손실: 16480.047 재구성 손실: 12957.756 잠재 손실: 3522.292 13 훈련 전체 손실: 16356.154 재구성 손실: 12876.612 잠재 손실: 3479.5417 14 훈련 전체 손실: 15729.279 재구성 손실: 12256.35 잠재 손실: 3472.93 15 훈련 전체 손실: 15998.193 재구성 손실: 12510.395 잠재 손실: 3487.799 16 훈련 전체 손실: 15667.144 재구성 손실: 12163.436 잠재 손실: 3503.708 17 훈련 전체 손실: 16446.844 재구성 손실: 13009.144 잠재 손실: 3437.7002 18 훈련 전체 손실: 15515.571 재구성 손실: 11902.668 잠재 손실: 3612.903 19 훈련 전체 손실: 15367.52 재구성 손실: 12144.882 잠재 손실: 3222.6377 20 훈련 전체 손실: 17410.598 재구성 손실: 13445.16 잠재 손실: 3965.4368 21 훈련 전체 손실: 18168.033 재구성 손실: 14566.337 잠재 손실: 3601.696 22 훈련 전체 손실: 26334.355 재구성 손실: 20546.047 잠재 손실: 5788.3096 23 훈련 전체 손실: 31021.758 재구성 손실: 23053.541 잠재 손실: 7968.217 24 훈련 전체 손실: 31426.602 재구성 손실: 23689.54 잠재 손실: 7737.0635 25 훈련 전체 손실: 26836.31 재구성 손실: 21398.799 잠재 손실: 5437.511 26 훈련 전체 손실: 29283.21 재구성 손실: 21404.012 잠재 손실: 7879.1997 27 훈련 전체 손실: 24960.898 재구성 손실: 18269.293 잠재 손실: 6691.6064 28 훈련 전체 손실: 21431.984 재구성 손실: 17372.996 잠재 손실: 4058.9873 29 훈련 전체 손실: 26868.752 재구성 손실: 19250.443 잠재 손실: 7618.3086 30 훈련 전체 손실: 26129.672 재구성 손실: 20292.867 잠재 손실: 5836.8057 31 훈련 전체 손실: 25280.848 재구성 손실: 18714.299 잠재 손실: 6566.55 32 훈련 전체 손실: 19797.805 재구성 손실: 16459.428 잠재 손실: 3338.377 33 훈련 전체 손실: 22386.838 재구성 손실: 16541.697 잠재 손실: 5845.14 34 훈련 전체 손실: 16537.502 재구성 손실: 13364.735 잠재 손실: 3172.767 35 훈련 전체 손실: 15760.508 재구성 손실: 12542.663 잠재 손실: 3217.845 36 훈련 전체 손실: 16289.242 재구성 손실: 12966.763 잠재 손실: 3322.479 37 훈련 전체 손실: 16063.729 재구성 손실: 12541.804 잠재 손실: 3521.9248 38 훈련 전체 손실: 15939.033 재구성 손실: 12414.434 잠재 손실: 3524.6 39 훈련 전체 손실: 18257.29 재구성 손실: 14400.147 잠재 손실: 3857.1418 40 훈련 전체 손실: 16188.255 재구성 손실: 12762.699 잠재 손실: 3425.556 41 훈련 전체 손실: 16118.303 재구성 손실: 12681.053 잠재 손실: 3437.2495 42 훈련 전체 손실: 15753.109 재구성 손실: 12357.85 잠재 손실: 3395.2598 43 훈련 전체 손실: 15561.574 재구성 손실: 11921.074 잠재 손실: 3640.5005 44 훈련 전체 손실: 19075.18 재구성 손실: 14498.186 잠재 손실: 4576.994 45 훈련 전체 손실: 16066.097 재구성 손실: 12526.522 잠재 손실: 3539.5745 46 훈련 전체 손실: 16024.207 재구성 손실: 12422.732 잠재 손실: 3601.4749 47 훈련 전체 손실: 15211.7 재구성 손실: 11879.954 잠재 손실: 3331.746 48 훈련 전체 손실: 15604.895 재구성 손실: 12097.195 잠재 손실: 3507.6987 49 훈련 전체 손실: 17339.074 재구성 손실: 13778.751 잠재 손실: 3560.3223
reset_graph()
from functools import partial
n_inputs = 28 * 28
n_hidden1 = 500
n_hidden2 = 500
n_hidden3 = 20 # 코딩 유닛
n_hidden4 = n_hidden2
n_hidden5 = n_hidden1
n_outputs = n_inputs
learning_rate = 0.001
initializer = tf.variance_scaling_initializer()
my_dense_layer = partial(
tf.layers.dense,
activation=tf.nn.elu,
kernel_initializer=initializer)
X = tf.placeholder(tf.float32, [None, n_inputs])
hidden1 = my_dense_layer(X, n_hidden1)
hidden2 = my_dense_layer(hidden1, n_hidden2)
hidden3_mean = my_dense_layer(hidden2, n_hidden3, activation=None)
hidden3_gamma = my_dense_layer(hidden2, n_hidden3, activation=None)
noise = tf.random_normal(tf.shape(hidden3_gamma), dtype=tf.float32)
hidden3 = hidden3_mean + tf.exp(0.5 * hidden3_gamma) * noise
hidden4 = my_dense_layer(hidden3, n_hidden4)
hidden5 = my_dense_layer(hidden4, n_hidden5)
logits = my_dense_layer(hidden5, n_outputs, activation=None)
outputs = tf.sigmoid(logits)
xentropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=X, logits=logits)
reconstruction_loss = tf.reduce_sum(xentropy)
latent_loss = 0.5 * tf.reduce_sum(
tf.exp(hidden3_gamma) + tf.square(hidden3_mean) - 1 - hidden3_gamma)
loss = reconstruction_loss + latent_loss
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)
init = tf.global_variables_initializer()
saver = tf.train.Saver()
모델을 훈련시켜 랜덤한 이미지를 생성해 보겠습니다:
import numpy as np
n_digits = 60
n_epochs = 50
batch_size = 150
with tf.Session() as sess:
init.run()
for epoch in range(n_epochs):
n_batches = len(X_train) // batch_size
for iteration in range(n_batches):
print("\r{}%".format(100 * iteration // n_batches), end="") # not shown in the book
sys.stdout.flush() # not shown
X_batch, y_batch = next(shuffle_batch(X_train, y_train, batch_size))
sess.run(training_op, feed_dict={X: X_batch})
loss_val, reconstruction_loss_val, latent_loss_val = sess.run([loss, reconstruction_loss, latent_loss], feed_dict={X: X_batch}) # not shown
print("\r{}".format(epoch), "훈련 전체 손실:", loss_val, "\t재구성 손실:", reconstruction_loss_val, "\t잠재 손실:", latent_loss_val) # not shown
saver.save(sess, "./my_model_variational.ckpt") # not shown
codings_rnd = np.random.normal(size=[n_digits, n_hidden3])
outputs_val = outputs.eval(feed_dict={hidden3: codings_rnd})
0 훈련 전체 손실: 18105.059 재구성 손실: 14386.962 잠재 손실: 3718.0962 1 훈련 전체 손실: 16242.924 재구성 손실: 12569.525 잠재 손실: 3673.3984 2 훈련 전체 손실: 16819.03 재구성 손실: 13044.595 잠재 손실: 3774.4353 3 훈련 전체 손실: 16235.683 재구성 손실: 12447.65 잠재 손실: 3788.0322 4 훈련 전체 손실: 16116.493 재구성 손실: 12329.032 잠재 손실: 3787.461 5 훈련 전체 손실: 15875.421 재구성 손실: 12192.666 잠재 손실: 3682.7546 6 훈련 전체 손실: 15730.232 재구성 손실: 12036.995 잠재 손실: 3693.2378 7 훈련 전체 손실: 15206.102 재구성 손실: 11490.0 잠재 손실: 3716.1016 8 훈련 전체 손실: 15583.899 재구성 손실: 11798.406 잠재 손실: 3785.4932 9 훈련 전체 손실: 15705.096 재구성 손실: 11796.535 잠재 손실: 3908.5605 10 훈련 전체 손실: 15000.844 재구성 손실: 11308.117 잠재 손실: 3692.727 11 훈련 전체 손실: 15789.912 재구성 손실: 11941.982 잠재 손실: 3847.9292 12 훈련 전체 손실: 15503.939 재구성 손실: 11621.28 잠재 손실: 3882.659 13 훈련 전체 손실: 15657.734 재구성 손실: 11861.404 잠재 손실: 3796.3303 14 훈련 전체 손실: 15168.749 재구성 손실: 11429.629 잠재 손실: 3739.12 15 훈련 전체 손실: 15347.62 재구성 손실: 11565.424 잠재 손실: 3782.1963 16 훈련 전체 손실: 15212.977 재구성 손실: 11465.227 잠재 손실: 3747.7502 17 훈련 전체 손실: 15717.482 재구성 손실: 11980.299 잠재 손실: 3737.1836 18 훈련 전체 손실: 14895.756 재구성 손실: 11182.533 잠재 손실: 3713.2231 19 훈련 전체 손실: 14894.215 재구성 손실: 11241.623 잠재 손실: 3652.5913 20 훈련 전체 손실: 15420.28 재구성 손실: 11587.9 잠재 손실: 3832.3801 21 훈련 전체 손실: 15472.772 재구성 손실: 11650.051 잠재 손실: 3822.7214 22 훈련 전체 손실: 15200.027 재구성 손실: 11416.722 잠재 손실: 3783.3052 23 훈련 전체 손실: 15441.727 재구성 손실: 11542.334 잠재 손실: 3899.3923 24 훈련 전체 손실: 15632.7 재구성 손실: 11813.906 잠재 손실: 3818.794 25 훈련 전체 손실: 15050.419 재구성 손실: 11339.068 잠재 손실: 3711.3508 26 훈련 전체 손실: 14997.406 재구성 손실: 11273.856 잠재 손실: 3723.5493 27 훈련 전체 손실: 15035.2295 재구성 손실: 11241.114 잠재 손실: 3794.1155 28 훈련 전체 손실: 15268.279 재구성 손실: 11524.893 잠재 손실: 3743.3865 29 훈련 전체 손실: 15192.881 재구성 손실: 11495.053 잠재 손실: 3697.8281 30 훈련 전체 손실: 14803.912 재구성 손실: 11084.303 잠재 손실: 3719.6094 31 훈련 전체 손실: 14754.975 재구성 손실: 11055.221 잠재 손실: 3699.7542 32 훈련 전체 손실: 14738.451 재구성 손실: 11028.982 잠재 손실: 3709.469 33 훈련 전체 손실: 15054.943 재구성 손실: 11339.586 잠재 손실: 3715.3572 34 훈련 전체 손실: 14546.092 재구성 손실: 10922.399 잠재 손실: 3623.692 35 훈련 전체 손실: 14226.648 재구성 손실: 10607.357 잠재 손실: 3619.2913 36 훈련 전체 손실: 14910.793 재구성 손실: 11196.432 잠재 손실: 3714.3616 37 훈련 전체 손실: 14592.447 재구성 손실: 10871.217 잠재 손실: 3721.23 38 훈련 전체 손실: 14863.799 재구성 손실: 11205.761 잠재 손실: 3658.0383 39 훈련 전체 손실: 15030.525 재구성 손실: 11274.483 잠재 손실: 3756.0425 40 훈련 전체 손실: 14906.74 재구성 손실: 11165.52 잠재 손실: 3741.2212 41 훈련 전체 손실: 14924.993 재구성 손실: 11199.85 잠재 손실: 3725.1438 42 훈련 전체 손실: 14884.349 재구성 손실: 11197.784 잠재 손실: 3686.5642 43 훈련 전체 손실: 14541.242 재구성 손실: 10911.296 잠재 손실: 3629.9463 44 훈련 전체 손실: 14616.744 재구성 손실: 11014.871 잠재 손실: 3601.8733 45 훈련 전체 손실: 14973.416 재구성 손실: 11183.248 잠재 손실: 3790.1682 46 훈련 전체 손실: 15090.039 재구성 손실: 11397.224 잠재 손실: 3692.8157 47 훈련 전체 손실: 14409.345 재구성 손실: 10769.734 잠재 손실: 3639.61 48 훈련 전체 손실: 14673.551 재구성 손실: 10999.471 잠재 손실: 3674.0803 49 훈련 전체 손실: 15285.273 재구성 손실: 11517.977 잠재 손실: 3767.2969
plt.figure(figsize=(8,50)) # 책에는 없음
for iteration in range(n_digits):
plt.subplot(n_digits, 10, iteration + 1)
plot_image(outputs_val[iteration])
n_rows = 6
n_cols = 10
plot_multiple_images(outputs_val.reshape(-1, 28, 28), n_rows, n_cols)
save_fig("generated_digits_plot")
plt.show()
여기에서 잠재 변수 손실은 조금 다르게 계산된 점을 주목하세요:
latent_loss = 0.5 * tf.reduce_sum(
tf.exp(hidden3_gamma) + tf.square(hidden3_mean) - 1 - hidden3_gamma)
인코드:
n_digits = 3
# X_test, y_test = mnist.test.next_batch(batch_size)
X_test_batch, y_test_batch = next(shuffle_batch(X_test, y_test, batch_size))
codings = hidden3
with tf.Session() as sess:
saver.restore(sess, "./my_model_variational.ckpt")
codings_val = codings.eval(feed_dict={X: X_test_batch})
INFO:tensorflow:Restoring parameters from ./my_model_variational.ckpt
디코드:
with tf.Session() as sess:
saver.restore(sess, "./my_model_variational.ckpt")
outputs_val = outputs.eval(feed_dict={codings: codings_val})
INFO:tensorflow:Restoring parameters from ./my_model_variational.ckpt
재구성 이미지를 그려봅니다:
fig = plt.figure(figsize=(8, 2.5 * n_digits))
for iteration in range(n_digits):
plt.subplot(n_digits, 2, 1 + 2 * iteration)
plot_image(X_test_batch[iteration])
plt.subplot(n_digits, 2, 2 + 2 * iteration)
plot_image(outputs_val[iteration])
n_iterations = 3
n_digits = 6
codings_rnd = np.random.normal(size=[n_digits, n_hidden3])
with tf.Session() as sess:
saver.restore(sess, "./my_model_variational.ckpt")
target_codings = np.roll(codings_rnd, -1, axis=0)
for iteration in range(n_iterations + 1):
codings_interpolate = codings_rnd + (target_codings - codings_rnd) * iteration / n_iterations
outputs_val = outputs.eval(feed_dict={codings: codings_interpolate})
plt.figure(figsize=(11, 1.5*n_iterations))
for digit_index in range(n_digits):
plt.subplot(1, n_digits, digit_index + 1)
plot_image(outputs_val[digit_index])
plt.show()
INFO:tensorflow:Restoring parameters from ./my_model_variational.ckpt
Coming soon...