11장 – 심층 신경망 훈련하기
이 노트북은 11장에 있는 모든 샘플 코드와 연습문제 해답을 가지고 있습니다.
먼저 몇 개의 모듈을 임포트합니다. 맷플롯립 그래프를 인라인으로 출력하도록 만들고 그림을 저장하는 함수를 준비합니다. 또한 파이썬 버전이 3.5 이상인지 확인합니다(파이썬 2.x에서도 동작하지만 곧 지원이 중단되므로 파이썬 3을 사용하는 것이 좋습니다). 사이킷런 버전이 0.20 이상인지와 텐서플로 버전이 2.0 이상인지 확인합니다.
# 파이썬 ≥3.5 필수
import sys
assert sys.version_info >= (3, 5)
# 사이킷런 ≥0.20 필수
import sklearn
assert sklearn.__version__ >= "0.20"
# 텐서플로 ≥2.0 필수
import tensorflow as tf
from tensorflow import keras
assert tf.__version__ >= "2.0"
%load_ext tensorboard
# 공통 모듈 임포트
import numpy as np
import os
# 노트북 실행 결과를 동일하게 유지하기 위해
np.random.seed(42)
# 깔끔한 그래프 출력을 위해
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)
# 그림을 저장할 위치
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "deep"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)
os.makedirs(IMAGES_PATH, exist_ok=True)
def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
print("그림 저장:", fig_id)
if tight_layout:
plt.tight_layout()
plt.savefig(path, format=fig_extension, dpi=resolution)
def logit(z):
return 1 / (1 + np.exp(-z))
z = np.linspace(-5, 5, 200)
plt.plot([-5, 5], [0, 0], 'k-')
plt.plot([-5, 5], [1, 1], 'k--')
plt.plot([0, 0], [-0.2, 1.2], 'k-')
plt.plot([-5, 5], [-3/4, 7/4], 'g--')
plt.plot(z, logit(z), "b-", linewidth=2)
props = dict(facecolor='black', shrink=0.1)
plt.annotate('Saturating', xytext=(3.5, 0.7), xy=(5, 1), arrowprops=props, fontsize=14, ha="center")
plt.annotate('Saturating', xytext=(-3.5, 0.3), xy=(-5, 0), arrowprops=props, fontsize=14, ha="center")
plt.annotate('Linear', xytext=(2, 0.2), xy=(0, 0.5), arrowprops=props, fontsize=14, ha="center")
plt.grid(True)
plt.title("Sigmoid activation function", fontsize=14)
plt.axis([-5, 5, -0.2, 1.2])
save_fig("sigmoid_saturation_plot")
plt.show()
그림 저장: sigmoid_saturation_plot
[name for name in dir(keras.initializers) if not name.startswith("_")]
['Constant', 'GlorotNormal', 'GlorotUniform', 'HeNormal', 'HeUniform', 'Identity', 'Initializer', 'LecunNormal', 'LecunUniform', 'Ones', 'Orthogonal', 'RandomNormal', 'RandomUniform', 'TruncatedNormal', 'VarianceScaling', 'Zeros', 'constant', 'deserialize', 'get', 'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform', 'identity', 'lecun_normal', 'lecun_uniform', 'ones', 'orthogonal', 'random_normal', 'random_uniform', 'serialize', 'truncated_normal', 'variance_scaling', 'zeros']
keras.layers.Dense(10, activation="relu", kernel_initializer="he_normal")
<keras.layers.core.Dense at 0x7f98d0126828>
init = keras.initializers.VarianceScaling(scale=2., mode='fan_avg',
distribution='uniform')
keras.layers.Dense(10, activation="relu", kernel_initializer=init)
<keras.layers.core.Dense at 0x7f98207e8240>
def leaky_relu(z, alpha=0.01):
return np.maximum(alpha*z, z)
plt.plot(z, leaky_relu(z, 0.05), "b-", linewidth=2)
plt.plot([-5, 5], [0, 0], 'k-')
plt.plot([0, 0], [-0.5, 4.2], 'k-')
plt.grid(True)
props = dict(facecolor='black', shrink=0.1)
plt.annotate('Leak', xytext=(-3.5, 0.5), xy=(-5, -0.2), arrowprops=props, fontsize=14, ha="center")
plt.title("Leaky ReLU activation function", fontsize=14)
plt.axis([-5, 5, -0.5, 4.2])
save_fig("leaky_relu_plot")
plt.show()
그림 저장: leaky_relu_plot
[m for m in dir(keras.activations) if not m.startswith("_")]
['deserialize', 'elu', 'exponential', 'gelu', 'get', 'hard_sigmoid', 'linear', 'relu', 'selu', 'serialize', 'sigmoid', 'softmax', 'softplus', 'softsign', 'swish', 'tanh']
[m for m in dir(keras.layers) if "relu" in m.lower()]
['LeakyReLU', 'PReLU', 'ReLU', 'ThresholdedReLU']
LeakyReLU를 사용해 패션 MNIST에서 신경망을 훈련해 보죠:
(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.fashion_mnist.load_data()
X_train_full = X_train_full / 255.0
X_test = X_test / 255.0
X_valid, X_train = X_train_full[:5000], X_train_full[5000:]
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]
tf.random.set_seed(42)
np.random.seed(42)
model = keras.models.Sequential([
keras.layers.Flatten(input_shape=[28, 28]),
keras.layers.Dense(300, kernel_initializer="he_normal"),
keras.layers.LeakyReLU(),
keras.layers.Dense(100, kernel_initializer="he_normal"),
keras.layers.LeakyReLU(),
keras.layers.Dense(10, activation="softmax")
])
2021-10-10 01:38:52.146735: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
model.compile(loss="sparse_categorical_crossentropy",
optimizer=keras.optimizers.SGD(learning_rate=1e-3),
metrics=["accuracy"])
history = model.fit(X_train, y_train, epochs=10,
validation_data=(X_valid, y_valid))
2021-10-10 01:38:52.524612: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)
Epoch 1/10 1719/1719 [==============================] - 4s 2ms/step - loss: 1.2819 - accuracy: 0.6229 - val_loss: 0.8886 - val_accuracy: 0.7160 Epoch 2/10 1719/1719 [==============================] - 3s 2ms/step - loss: 0.7955 - accuracy: 0.7362 - val_loss: 0.7130 - val_accuracy: 0.7658 Epoch 3/10 1719/1719 [==============================] - 3s 2ms/step - loss: 0.6816 - accuracy: 0.7720 - val_loss: 0.6427 - val_accuracy: 0.7898 Epoch 4/10 1719/1719 [==============================] - 3s 2ms/step - loss: 0.6217 - accuracy: 0.7944 - val_loss: 0.5900 - val_accuracy: 0.8064 Epoch 5/10 1719/1719 [==============================] - 4s 2ms/step - loss: 0.5832 - accuracy: 0.8074 - val_loss: 0.5582 - val_accuracy: 0.8202 Epoch 6/10 1719/1719 [==============================] - 3s 2ms/step - loss: 0.5553 - accuracy: 0.8156 - val_loss: 0.5350 - val_accuracy: 0.8238 Epoch 7/10 1719/1719 [==============================] - 3s 2ms/step - loss: 0.5338 - accuracy: 0.8225 - val_loss: 0.5157 - val_accuracy: 0.8306 Epoch 8/10 1719/1719 [==============================] - 3s 2ms/step - loss: 0.5173 - accuracy: 0.8272 - val_loss: 0.5079 - val_accuracy: 0.8286 Epoch 9/10 1719/1719 [==============================] - 3s 2ms/step - loss: 0.5040 - accuracy: 0.8289 - val_loss: 0.4895 - val_accuracy: 0.8388 Epoch 10/10 1719/1719 [==============================] - 3s 2ms/step - loss: 0.4924 - accuracy: 0.8321 - val_loss: 0.4817 - val_accuracy: 0.8398
PReLU를 테스트해 보죠:
tf.random.set_seed(42)
np.random.seed(42)
model = keras.models.Sequential([
keras.layers.Flatten(input_shape=[28, 28]),
keras.layers.Dense(300, kernel_initializer="he_normal"),
keras.layers.PReLU(),
keras.layers.Dense(100, kernel_initializer="he_normal"),
keras.layers.PReLU(),
keras.layers.Dense(10, activation="softmax")
])
model.compile(loss="sparse_categorical_crossentropy",
optimizer=keras.optimizers.SGD(learning_rate=1e-3),
metrics=["accuracy"])
history = model.fit(X_train, y_train, epochs=10,
validation_data=(X_valid, y_valid))
Epoch 1/10 1719/1719 [==============================] - 4s 2ms/step - loss: 1.3461 - accuracy: 0.6209 - val_loss: 0.9255 - val_accuracy: 0.7184 Epoch 2/10 1719/1719 [==============================] - 4s 2ms/step - loss: 0.8197 - accuracy: 0.7355 - val_loss: 0.7305 - val_accuracy: 0.7632 Epoch 3/10 1719/1719 [==============================] - 4s 2ms/step - loss: 0.6966 - accuracy: 0.7693 - val_loss: 0.6565 - val_accuracy: 0.7884 Epoch 4/10 1719/1719 [==============================] - 4s 2ms/step - loss: 0.6331 - accuracy: 0.7909 - val_loss: 0.6003 - val_accuracy: 0.8046 Epoch 5/10 1719/1719 [==============================] - 4s 2ms/step - loss: 0.5917 - accuracy: 0.8057 - val_loss: 0.5656 - val_accuracy: 0.8184 Epoch 6/10 1719/1719 [==============================] - 4s 2ms/step - loss: 0.5618 - accuracy: 0.8135 - val_loss: 0.5406 - val_accuracy: 0.8238 Epoch 7/10 1719/1719 [==============================] - 4s 2ms/step - loss: 0.5390 - accuracy: 0.8205 - val_loss: 0.5196 - val_accuracy: 0.8312 Epoch 8/10 1719/1719 [==============================] - 4s 2ms/step - loss: 0.5213 - accuracy: 0.8257 - val_loss: 0.5113 - val_accuracy: 0.8312 Epoch 9/10 1719/1719 [==============================] - 4s 2ms/step - loss: 0.5070 - accuracy: 0.8289 - val_loss: 0.4917 - val_accuracy: 0.8380 Epoch 10/10 1719/1719 [==============================] - 4s 2ms/step - loss: 0.4945 - accuracy: 0.8315 - val_loss: 0.4826 - val_accuracy: 0.8396
def elu(z, alpha=1):
return np.where(z < 0, alpha * (np.exp(z) - 1), z)
plt.plot(z, elu(z), "b-", linewidth=2)
plt.plot([-5, 5], [0, 0], 'k-')
plt.plot([-5, 5], [-1, -1], 'k--')
plt.plot([0, 0], [-2.2, 3.2], 'k-')
plt.grid(True)
plt.title(r"ELU activation function ($\alpha=1$)", fontsize=14)
plt.axis([-5, 5, -2.2, 3.2])
save_fig("elu_plot")
plt.show()
그림 저장: elu_plot
텐서플로에서 쉽게 ELU를 적용할 수 있습니다. 층을 만들 때 활성화 함수로 지정하면 됩니다:
keras.layers.Dense(10, activation="elu")
<keras.layers.core.Dense at 0x7f97aa71d3c8>
Günter Klambauer, Thomas Unterthiner, Andreas Mayr는 2017년 한 훌륭한 논문에서 SELU 활성화 함수를 소개했습니다. 훈련하는 동안 완전 연결 층만 쌓아서 신경망을 만들고 SELU 활성화 함수와 LeCun 초기화를 사용한다면 자기 정규화됩니다. 각 층의 출력이 평균과 표준편차를 보존하는 경향이 있습니다. 이는 그레이디언트 소실과 폭주 문제를 막아줍니다. 그 결과로 SELU 활성화 함수는 이런 종류의 네트워크(특히 아주 깊은 네트워크)에서 다른 활성화 함수보다 뛰어난 성능을 종종 냅니다. 따라서 꼭 시도해 봐야 합니다. 하지만 SELU 활성화 함수의 자기 정규화 특징은 쉽게 깨집니다. ℓ1나 ℓ2 정규화, 드롭아웃, 맥스 노름, 스킵 연결이나 시퀀셜하지 않은 다른 토폴로지를 사용할 수 없습니다(즉 순환 신경망은 자기 정규화되지 않습니다). 하지만 실전에서 시퀀셜 CNN과 잘 동작합니다. 자기 정규화가 깨지면 SELU가 다른 활성화 함수보다 더 나은 성능을 내지 않을 것입니다.
from scipy.special import erfc
# alpha와 scale은 평균 0과 표준 편차 1로 자기 정규화합니다
# (논문에 있는 식 14 참조):
alpha_0_1 = -np.sqrt(2 / np.pi) / (erfc(1/np.sqrt(2)) * np.exp(1/2) - 1)
scale_0_1 = (1 - erfc(1 / np.sqrt(2)) * np.sqrt(np.e)) * np.sqrt(2 * np.pi) * (2 * erfc(np.sqrt(2))*np.e**2 + np.pi*erfc(1/np.sqrt(2))**2*np.e - 2*(2+np.pi)*erfc(1/np.sqrt(2))*np.sqrt(np.e)+np.pi+2)**(-1/2)
def selu(z, scale=scale_0_1, alpha=alpha_0_1):
return scale * elu(z, alpha)
plt.plot(z, selu(z), "b-", linewidth=2)
plt.plot([-5, 5], [0, 0], 'k-')
plt.plot([-5, 5], [-1.758, -1.758], 'k--')
plt.plot([0, 0], [-2.2, 3.2], 'k-')
plt.grid(True)
plt.title("SELU activation function", fontsize=14)
plt.axis([-5, 5, -2.2, 3.2])
save_fig("selu_plot")
plt.show()
그림 저장: selu_plot
기본적으로 SELU 하이퍼파라미터(scale
과 alpha
)는 각 뉴런의 평균 출력이 0에 가깝고 표준 편차는 1에 가깝도록 조정됩니다(입력은 평균이 0이고 표준 편차 1로 표준화되었다고 가정합니다). 이 활성화 함수를 사용하면 1,000개의 층이 있는 심층 신경망도 모든 층에 걸쳐 거의 평균이 0이고 표준 편차를 1로 유지합니다. 이를 통해 그레이디언트 폭주와 소실 문제를 피할 수 있습니다:
np.random.seed(42)
Z = np.random.normal(size=(500, 100)) # 표준화된 입력
for layer in range(1000):
W = np.random.normal(size=(100, 100), scale=np.sqrt(1 / 100)) # LeCun 초기화
Z = selu(np.dot(Z, W))
means = np.mean(Z, axis=0).mean()
stds = np.std(Z, axis=0).mean()
if layer % 100 == 0:
print("Layer {}: mean {:.2f}, std deviation {:.2f}".format(layer, means, stds))
Layer 0: mean -0.00, std deviation 1.00 Layer 100: mean 0.02, std deviation 0.96 Layer 200: mean 0.01, std deviation 0.90 Layer 300: mean -0.02, std deviation 0.92 Layer 400: mean 0.05, std deviation 0.89 Layer 500: mean 0.01, std deviation 0.93 Layer 600: mean 0.02, std deviation 0.92 Layer 700: mean -0.02, std deviation 0.90 Layer 800: mean 0.05, std deviation 0.83 Layer 900: mean 0.02, std deviation 1.00
쉽게 SELU를 사용할 수 있습니다:
keras.layers.Dense(10, activation="selu",
kernel_initializer="lecun_normal")
<keras.layers.core.Dense at 0x7f97aa71dc50>
100개의 은닉층과 SELU 활성화 함수를 사용한 패션 MNIST를 위한 신경망을 만들어 보죠:
np.random.seed(42)
tf.random.set_seed(42)
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[28, 28]))
model.add(keras.layers.Dense(300, activation="selu",
kernel_initializer="lecun_normal"))
for layer in range(99):
model.add(keras.layers.Dense(100, activation="selu",
kernel_initializer="lecun_normal"))
model.add(keras.layers.Dense(10, activation="softmax"))
model.compile(loss="sparse_categorical_crossentropy",
optimizer=keras.optimizers.SGD(learning_rate=1e-3),
metrics=["accuracy"])
이제 훈련해 보죠. 입력을 평균 0과 표준 편차 1로 바꾸어야 한다는 것을 잊지 마세요:
pixel_means = X_train.mean(axis=0, keepdims=True)
pixel_stds = X_train.std(axis=0, keepdims=True)
X_train_scaled = (X_train - pixel_means) / pixel_stds
X_valid_scaled = (X_valid - pixel_means) / pixel_stds
X_test_scaled = (X_test - pixel_means) / pixel_stds
history = model.fit(X_train_scaled, y_train, epochs=5,
validation_data=(X_valid_scaled, y_valid))
Epoch 1/5 1719/1719 [==============================] - 27s 14ms/step - loss: 1.2359 - accuracy: 0.5200 - val_loss: 0.8552 - val_accuracy: 0.6756 Epoch 2/5 1719/1719 [==============================] - 23s 13ms/step - loss: 0.7186 - accuracy: 0.7408 - val_loss: 0.6080 - val_accuracy: 0.7830 Epoch 3/5 1719/1719 [==============================] - 24s 14ms/step - loss: 0.6907 - accuracy: 0.7527 - val_loss: 0.6446 - val_accuracy: 0.7532 Epoch 4/5 1719/1719 [==============================] - 23s 13ms/step - loss: 0.6234 - accuracy: 0.7805 - val_loss: 0.6986 - val_accuracy: 0.7346 Epoch 5/5 1719/1719 [==============================] - 24s 14ms/step - loss: 0.6030 - accuracy: 0.7859 - val_loss: 0.7600 - val_accuracy: 0.7390
대신 ReLU 활성화 함수를 사용하면 어떤 일이 일어나는지 확인해 보죠:
np.random.seed(42)
tf.random.set_seed(42)
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[28, 28]))
model.add(keras.layers.Dense(300, activation="relu", kernel_initializer="he_normal"))
for layer in range(99):
model.add(keras.layers.Dense(100, activation="relu", kernel_initializer="he_normal"))
model.add(keras.layers.Dense(10, activation="softmax"))
model.compile(loss="sparse_categorical_crossentropy",
optimizer=keras.optimizers.SGD(learning_rate=1e-3),
metrics=["accuracy"])
history = model.fit(X_train_scaled, y_train, epochs=5,
validation_data=(X_valid_scaled, y_valid))
Epoch 1/5 1719/1719 [==============================] - 23s 12ms/step - loss: 1.8369 - accuracy: 0.2603 - val_loss: 1.3222 - val_accuracy: 0.4058 Epoch 2/5 1719/1719 [==============================] - 20s 12ms/step - loss: 1.2342 - accuracy: 0.4627 - val_loss: 1.0143 - val_accuracy: 0.5800 Epoch 3/5 1719/1719 [==============================] - 20s 12ms/step - loss: 0.9480 - accuracy: 0.5912 - val_loss: 0.8867 - val_accuracy: 0.5914 Epoch 4/5 1719/1719 [==============================] - 20s 12ms/step - loss: 1.0154 - accuracy: 0.5803 - val_loss: 0.9080 - val_accuracy: 0.6020 Epoch 5/5 1719/1719 [==============================] - 20s 11ms/step - loss: 0.8391 - accuracy: 0.6571 - val_loss: 0.7974 - val_accuracy: 0.6784
좋지 않군요. 그레이디언트 폭주나 소실 문제가 발생한 것입니다.
model = keras.models.Sequential([
keras.layers.Flatten(input_shape=[28, 28]),
keras.layers.BatchNormalization(),
keras.layers.Dense(300, activation="relu"),
keras.layers.BatchNormalization(),
keras.layers.Dense(100, activation="relu"),
keras.layers.BatchNormalization(),
keras.layers.Dense(10, activation="softmax")
])
model.summary()
Model: "sequential_4" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= flatten_4 (Flatten) (None, 784) 0 _________________________________________________________________ batch_normalization (BatchNo (None, 784) 3136 _________________________________________________________________ dense_212 (Dense) (None, 300) 235500 _________________________________________________________________ batch_normalization_1 (Batch (None, 300) 1200 _________________________________________________________________ dense_213 (Dense) (None, 100) 30100 _________________________________________________________________ batch_normalization_2 (Batch (None, 100) 400 _________________________________________________________________ dense_214 (Dense) (None, 10) 1010 ================================================================= Total params: 271,346 Trainable params: 268,978 Non-trainable params: 2,368 _________________________________________________________________
bn1 = model.layers[1]
[(var.name, var.trainable) for var in bn1.variables]
[('batch_normalization/gamma:0', True), ('batch_normalization/beta:0', True), ('batch_normalization/moving_mean:0', False), ('batch_normalization/moving_variance:0', False)]
# updates 속성은 향후 삭제될 예정입니다.
# bn1.updates
model.compile(loss="sparse_categorical_crossentropy",
optimizer=keras.optimizers.SGD(learning_rate=1e-3),
metrics=["accuracy"])
history = model.fit(X_train, y_train, epochs=10,
validation_data=(X_valid, y_valid))
Epoch 1/10 1719/1719 [==============================] - 5s 3ms/step - loss: 0.8750 - accuracy: 0.7123 - val_loss: 0.5526 - val_accuracy: 0.8230 Epoch 2/10 1719/1719 [==============================] - 5s 3ms/step - loss: 0.5753 - accuracy: 0.8032 - val_loss: 0.4725 - val_accuracy: 0.8466 Epoch 3/10 1719/1719 [==============================] - 5s 3ms/step - loss: 0.5189 - accuracy: 0.8205 - val_loss: 0.4375 - val_accuracy: 0.8552 Epoch 4/10 1719/1719 [==============================] - 5s 3ms/step - loss: 0.4827 - accuracy: 0.8323 - val_loss: 0.4152 - val_accuracy: 0.8598 Epoch 5/10 1719/1719 [==============================] - 5s 3ms/step - loss: 0.4565 - accuracy: 0.8408 - val_loss: 0.3997 - val_accuracy: 0.8640 Epoch 6/10 1719/1719 [==============================] - 5s 3ms/step - loss: 0.4398 - accuracy: 0.8473 - val_loss: 0.3867 - val_accuracy: 0.8696 Epoch 7/10 1719/1719 [==============================] - 5s 3ms/step - loss: 0.4242 - accuracy: 0.8513 - val_loss: 0.3764 - val_accuracy: 0.8704 Epoch 8/10 1719/1719 [==============================] - 5s 3ms/step - loss: 0.4143 - accuracy: 0.8540 - val_loss: 0.3712 - val_accuracy: 0.8736 Epoch 9/10 1719/1719 [==============================] - 5s 3ms/step - loss: 0.4023 - accuracy: 0.8580 - val_loss: 0.3632 - val_accuracy: 0.8758 Epoch 10/10 1719/1719 [==============================] - 5s 3ms/step - loss: 0.3914 - accuracy: 0.8623 - val_loss: 0.3573 - val_accuracy: 0.8766
이따금 활성화 함수전에 BN을 적용해도 잘 동작합니다(여기에는 논란의 여지가 있습니다). 또한 BatchNormalization
층 이전의 층은 편향을 위한 항이 필요 없습니다. BatchNormalization
층이 이를 무효화하기 때문입니다. 따라서 필요 없는 파라미터이므로 use_bias=False
를 지정하여 층을 만들 수 있습니다:
model = keras.models.Sequential([
keras.layers.Flatten(input_shape=[28, 28]),
keras.layers.BatchNormalization(),
keras.layers.Dense(300, use_bias=False),
keras.layers.BatchNormalization(),
keras.layers.Activation("relu"),
keras.layers.Dense(100, use_bias=False),
keras.layers.BatchNormalization(),
keras.layers.Activation("relu"),
keras.layers.Dense(10, activation="softmax")
])
model.compile(loss="sparse_categorical_crossentropy",
optimizer=keras.optimizers.SGD(learning_rate=1e-3),
metrics=["accuracy"])
history = model.fit(X_train, y_train, epochs=10,
validation_data=(X_valid, y_valid))
Epoch 1/10 1719/1719 [==============================] - 6s 3ms/step - loss: 1.0317 - accuracy: 0.6757 - val_loss: 0.6767 - val_accuracy: 0.7816 Epoch 2/10 1719/1719 [==============================] - 5s 3ms/step - loss: 0.6790 - accuracy: 0.7792 - val_loss: 0.5566 - val_accuracy: 0.8180 Epoch 3/10 1719/1719 [==============================] - 5s 3ms/step - loss: 0.5960 - accuracy: 0.8038 - val_loss: 0.5007 - val_accuracy: 0.8360 Epoch 4/10 1719/1719 [==============================] - 5s 3ms/step - loss: 0.5447 - accuracy: 0.8193 - val_loss: 0.4666 - val_accuracy: 0.8448 Epoch 5/10 1719/1719 [==============================] - 5s 3ms/step - loss: 0.5109 - accuracy: 0.8279 - val_loss: 0.4434 - val_accuracy: 0.8534 Epoch 6/10 1719/1719 [==============================] - 5s 3ms/step - loss: 0.4898 - accuracy: 0.8337 - val_loss: 0.4263 - val_accuracy: 0.8548 Epoch 7/10 1719/1719 [==============================] - 5s 3ms/step - loss: 0.4712 - accuracy: 0.8396 - val_loss: 0.4131 - val_accuracy: 0.8570 Epoch 8/10 1719/1719 [==============================] - 5s 3ms/step - loss: 0.4560 - accuracy: 0.8440 - val_loss: 0.4035 - val_accuracy: 0.8608 Epoch 9/10 1719/1719 [==============================] - 5s 3ms/step - loss: 0.4441 - accuracy: 0.8474 - val_loss: 0.3943 - val_accuracy: 0.8640 Epoch 10/10 1719/1719 [==============================] - 5s 3ms/step - loss: 0.4333 - accuracy: 0.8505 - val_loss: 0.3875 - val_accuracy: 0.8664
모든 케라스 옵티마이저는 clipnorm
이나 clipvalue
매개변수를 지원합니다:
optimizer = keras.optimizers.SGD(clipvalue=1.0)
optimizer = keras.optimizers.SGD(clipnorm=1.0)
패션 MNIST 훈련 세트를 두 개로 나누어 보죠:
X_train_A
: 샌달과 셔츠(클래스 5와 6)을 제외한 모든 이미지X_train_B
: 샌달과 셔츠 이미지 중 처음 200개만 가진 작은 훈련 세트검증 세트와 테스트 세트도 이렇게 나눕니다. 하지만 이미지 개수는 제한하지 않습니다.
A 세트(8개의 클래스를 가진 분류 문제)에서 모델을 훈련하고 이를 재사용하여 B 세트(이진 분류)를 해결해 보겠습니다. A 작업에서 B 작업으로 약간의 지식이 전달되기를 기대합니다. 왜냐하면 A 세트의 클래스(스니커즈, 앵클 부츠, 코트, 티셔츠 등)가 B 세트에 있는 클래스(샌달과 셔츠)와 조금 비슷하기 때문입니다. 하지만 Dense
층을 사용하기 때문에 동일한 위치에 나타난 패턴만 재사용할 수 있습니다(반대로 합성곱 층은 훨씬 많은 정보를 전송합니다. 학습한 패턴을 이미지의 어느 위치에서나 감지할 수 있기 때문입니다. CNN 장에서 자세히 알아 보겠습니다).
def split_dataset(X, y):
y_5_or_6 = (y == 5) | (y == 6) # sandals or shirts
y_A = y[~y_5_or_6]
y_A[y_A > 6] -= 2 # class indices 7, 8, 9 should be moved to 5, 6, 7
y_B = (y[y_5_or_6] == 6).astype(np.float32) # binary classification task: is it a shirt (class 6)?
return ((X[~y_5_or_6], y_A),
(X[y_5_or_6], y_B))
(X_train_A, y_train_A), (X_train_B, y_train_B) = split_dataset(X_train, y_train)
(X_valid_A, y_valid_A), (X_valid_B, y_valid_B) = split_dataset(X_valid, y_valid)
(X_test_A, y_test_A), (X_test_B, y_test_B) = split_dataset(X_test, y_test)
X_train_B = X_train_B[:200]
y_train_B = y_train_B[:200]
X_train_A.shape
(43986, 28, 28)
X_train_B.shape
(200, 28, 28)
y_train_A[:30]
array([4, 0, 5, 7, 7, 7, 4, 4, 3, 4, 0, 1, 6, 3, 4, 3, 2, 6, 5, 3, 4, 5, 1, 3, 4, 2, 0, 6, 7, 1], dtype=uint8)
y_train_B[:30]
array([1., 1., 0., 0., 0., 0., 1., 1., 1., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 1., 1., 0., 0., 1., 1., 0., 1., 1., 1., 1.], dtype=float32)
tf.random.set_seed(42)
np.random.seed(42)
model_A = keras.models.Sequential()
model_A.add(keras.layers.Flatten(input_shape=[28, 28]))
for n_hidden in (300, 100, 50, 50, 50):
model_A.add(keras.layers.Dense(n_hidden, activation="selu"))
model_A.add(keras.layers.Dense(8, activation="softmax"))
model_A.compile(loss="sparse_categorical_crossentropy",
optimizer=keras.optimizers.SGD(learning_rate=1e-3),
metrics=["accuracy"])
history = model_A.fit(X_train_A, y_train_A, epochs=20,
validation_data=(X_valid_A, y_valid_A))
Epoch 1/20 1375/1375 [==============================] - 3s 2ms/step - loss: 0.5926 - accuracy: 0.8104 - val_loss: 0.3894 - val_accuracy: 0.8665 Epoch 2/20 1375/1375 [==============================] - 3s 2ms/step - loss: 0.3523 - accuracy: 0.8788 - val_loss: 0.3287 - val_accuracy: 0.8822 Epoch 3/20 1375/1375 [==============================] - 3s 2ms/step - loss: 0.3170 - accuracy: 0.8895 - val_loss: 0.3010 - val_accuracy: 0.8994 Epoch 4/20 1375/1375 [==============================] - 3s 2ms/step - loss: 0.2973 - accuracy: 0.8976 - val_loss: 0.2894 - val_accuracy: 0.9023 Epoch 5/20 1375/1375 [==============================] - 3s 2ms/step - loss: 0.2835 - accuracy: 0.9020 - val_loss: 0.2770 - val_accuracy: 0.9068 Epoch 6/20 1375/1375 [==============================] - 3s 2ms/step - loss: 0.2730 - accuracy: 0.9060 - val_loss: 0.2731 - val_accuracy: 0.9068 Epoch 7/20 1375/1375 [==============================] - 3s 2ms/step - loss: 0.2641 - accuracy: 0.9090 - val_loss: 0.2719 - val_accuracy: 0.9081 Epoch 8/20 1375/1375 [==============================] - 3s 2ms/step - loss: 0.2573 - accuracy: 0.9125 - val_loss: 0.2587 - val_accuracy: 0.9141 Epoch 9/20 1375/1375 [==============================] - 3s 2ms/step - loss: 0.2519 - accuracy: 0.9133 - val_loss: 0.2565 - val_accuracy: 0.9141 Epoch 10/20 1375/1375 [==============================] - 3s 2ms/step - loss: 0.2469 - accuracy: 0.9154 - val_loss: 0.2541 - val_accuracy: 0.9158 Epoch 11/20 1375/1375 [==============================] - 3s 2ms/step - loss: 0.2423 - accuracy: 0.9176 - val_loss: 0.2495 - val_accuracy: 0.9153 Epoch 12/20 1375/1375 [==============================] - 3s 2ms/step - loss: 0.2382 - accuracy: 0.9189 - val_loss: 0.2510 - val_accuracy: 0.9131 Epoch 13/20 1375/1375 [==============================] - 3s 2ms/step - loss: 0.2351 - accuracy: 0.9200 - val_loss: 0.2444 - val_accuracy: 0.9158 Epoch 14/20 1375/1375 [==============================] - 3s 2ms/step - loss: 0.2315 - accuracy: 0.9213 - val_loss: 0.2414 - val_accuracy: 0.9175 Epoch 15/20 1375/1375 [==============================] - 3s 2ms/step - loss: 0.2287 - accuracy: 0.9214 - val_loss: 0.2448 - val_accuracy: 0.9185 Epoch 16/20 1375/1375 [==============================] - 3s 2ms/step - loss: 0.2255 - accuracy: 0.9225 - val_loss: 0.2384 - val_accuracy: 0.9193 Epoch 17/20 1375/1375 [==============================] - 3s 2ms/step - loss: 0.2231 - accuracy: 0.9232 - val_loss: 0.2409 - val_accuracy: 0.9175 Epoch 18/20 1375/1375 [==============================] - 3s 2ms/step - loss: 0.2201 - accuracy: 0.9246 - val_loss: 0.2423 - val_accuracy: 0.9145 Epoch 19/20 1375/1375 [==============================] - 3s 2ms/step - loss: 0.2178 - accuracy: 0.9256 - val_loss: 0.2328 - val_accuracy: 0.9203 Epoch 20/20 1375/1375 [==============================] - 3s 2ms/step - loss: 0.2156 - accuracy: 0.9261 - val_loss: 0.2332 - val_accuracy: 0.9210
model_A.save("my_model_A.h5")
model_B = keras.models.Sequential()
model_B.add(keras.layers.Flatten(input_shape=[28, 28]))
for n_hidden in (300, 100, 50, 50, 50):
model_B.add(keras.layers.Dense(n_hidden, activation="selu"))
model_B.add(keras.layers.Dense(1, activation="sigmoid"))
model_B.compile(loss="binary_crossentropy",
optimizer=keras.optimizers.SGD(learning_rate=1e-3),
metrics=["accuracy"])
history = model_B.fit(X_train_B, y_train_B, epochs=20,
validation_data=(X_valid_B, y_valid_B))
Epoch 1/20 7/7 [==============================] - 0s 28ms/step - loss: 0.9573 - accuracy: 0.4650 - val_loss: 0.6314 - val_accuracy: 0.6004 Epoch 2/20 7/7 [==============================] - 0s 10ms/step - loss: 0.5692 - accuracy: 0.7450 - val_loss: 0.4784 - val_accuracy: 0.8529 Epoch 3/20 7/7 [==============================] - 0s 10ms/step - loss: 0.4503 - accuracy: 0.8650 - val_loss: 0.4102 - val_accuracy: 0.8945 Epoch 4/20 7/7 [==============================] - 0s 11ms/step - loss: 0.3879 - accuracy: 0.8950 - val_loss: 0.3647 - val_accuracy: 0.9178 Epoch 5/20 7/7 [==============================] - 0s 10ms/step - loss: 0.3435 - accuracy: 0.9250 - val_loss: 0.3300 - val_accuracy: 0.9320 Epoch 6/20 7/7 [==============================] - 0s 10ms/step - loss: 0.3081 - accuracy: 0.9300 - val_loss: 0.3019 - val_accuracy: 0.9402 Epoch 7/20 7/7 [==============================] - 0s 10ms/step - loss: 0.2800 - accuracy: 0.9350 - val_loss: 0.2804 - val_accuracy: 0.9422 Epoch 8/20 7/7 [==============================] - 0s 11ms/step - loss: 0.2564 - accuracy: 0.9450 - val_loss: 0.2606 - val_accuracy: 0.9473 Epoch 9/20 7/7 [==============================] - 0s 10ms/step - loss: 0.2362 - accuracy: 0.9550 - val_loss: 0.2428 - val_accuracy: 0.9523 Epoch 10/20 7/7 [==============================] - 0s 11ms/step - loss: 0.2188 - accuracy: 0.9600 - val_loss: 0.2281 - val_accuracy: 0.9544 Epoch 11/20 7/7 [==============================] - 0s 10ms/step - loss: 0.2036 - accuracy: 0.9700 - val_loss: 0.2150 - val_accuracy: 0.9584 Epoch 12/20 7/7 [==============================] - 0s 10ms/step - loss: 0.1898 - accuracy: 0.9700 - val_loss: 0.2036 - val_accuracy: 0.9584 Epoch 13/20 7/7 [==============================] - 0s 10ms/step - loss: 0.1773 - accuracy: 0.9750 - val_loss: 0.1931 - val_accuracy: 0.9615 Epoch 14/20 7/7 [==============================] - 0s 10ms/step - loss: 0.1668 - accuracy: 0.9800 - val_loss: 0.1838 - val_accuracy: 0.9635 Epoch 15/20 7/7 [==============================] - 0s 10ms/step - loss: 0.1570 - accuracy: 0.9900 - val_loss: 0.1746 - val_accuracy: 0.9686 Epoch 16/20 7/7 [==============================] - 0s 10ms/step - loss: 0.1481 - accuracy: 0.9900 - val_loss: 0.1674 - val_accuracy: 0.9686 Epoch 17/20 7/7 [==============================] - 0s 10ms/step - loss: 0.1406 - accuracy: 0.9900 - val_loss: 0.1604 - val_accuracy: 0.9706 Epoch 18/20 7/7 [==============================] - 0s 11ms/step - loss: 0.1334 - accuracy: 0.9900 - val_loss: 0.1539 - val_accuracy: 0.9706 Epoch 19/20 7/7 [==============================] - 0s 10ms/step - loss: 0.1268 - accuracy: 0.9900 - val_loss: 0.1482 - val_accuracy: 0.9716 Epoch 20/20 7/7 [==============================] - 0s 14ms/step - loss: 0.1208 - accuracy: 0.9900 - val_loss: 0.1431 - val_accuracy: 0.9716
model_B.summary()
Model: "sequential_7" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= flatten_7 (Flatten) (None, 784) 0 _________________________________________________________________ dense_224 (Dense) (None, 300) 235500 _________________________________________________________________ dense_225 (Dense) (None, 100) 30100 _________________________________________________________________ dense_226 (Dense) (None, 50) 5050 _________________________________________________________________ dense_227 (Dense) (None, 50) 2550 _________________________________________________________________ dense_228 (Dense) (None, 50) 2550 _________________________________________________________________ dense_229 (Dense) (None, 1) 51 ================================================================= Total params: 275,801 Trainable params: 275,801 Non-trainable params: 0 _________________________________________________________________
model_A = keras.models.load_model("my_model_A.h5")
model_B_on_A = keras.models.Sequential(model_A.layers[:-1])
model_B_on_A.add(keras.layers.Dense(1, activation="sigmoid"))
model_B_on_A
와 model_A
는 층을 공유하기 때문에 하나를 훈련하면 두 모델이 업데이트됩니다. 이를 피하려면 model_A
를 클론한 것을 사용해 model_B_on_A
를 만들어야 합니다:
model_A_clone = keras.models.clone_model(model_A)
model_A_clone.set_weights(model_A.get_weights())
model_B_on_A = keras.models.Sequential(model_A_clone.layers[:-1])
model_B_on_A.add(keras.layers.Dense(1, activation="sigmoid"))
for layer in model_B_on_A.layers[:-1]:
layer.trainable = False
model_B_on_A.compile(loss="binary_crossentropy",
optimizer=keras.optimizers.SGD(learning_rate=1e-3),
metrics=["accuracy"])
history = model_B_on_A.fit(X_train_B, y_train_B, epochs=4,
validation_data=(X_valid_B, y_valid_B))
for layer in model_B_on_A.layers[:-1]:
layer.trainable = True
model_B_on_A.compile(loss="binary_crossentropy",
optimizer=keras.optimizers.SGD(learning_rate=1e-3),
metrics=["accuracy"])
history = model_B_on_A.fit(X_train_B, y_train_B, epochs=16,
validation_data=(X_valid_B, y_valid_B))
Epoch 1/4 7/7 [==============================] - 0s 27ms/step - loss: 0.2649 - accuracy: 0.9400 - val_loss: 0.2792 - val_accuracy: 0.9260 Epoch 2/4 7/7 [==============================] - 0s 9ms/step - loss: 0.2552 - accuracy: 0.9400 - val_loss: 0.2697 - val_accuracy: 0.9300 Epoch 3/4 7/7 [==============================] - 0s 10ms/step - loss: 0.2459 - accuracy: 0.9400 - val_loss: 0.2610 - val_accuracy: 0.9331 Epoch 4/4 7/7 [==============================] - 0s 10ms/step - loss: 0.2374 - accuracy: 0.9450 - val_loss: 0.2528 - val_accuracy: 0.9351 Epoch 1/16 7/7 [==============================] - 0s 27ms/step - loss: 0.2124 - accuracy: 0.9500 - val_loss: 0.2046 - val_accuracy: 0.9635 Epoch 2/16 7/7 [==============================] - 0s 10ms/step - loss: 0.1699 - accuracy: 0.9550 - val_loss: 0.1722 - val_accuracy: 0.9716 Epoch 3/16 7/7 [==============================] - 0s 10ms/step - loss: 0.1409 - accuracy: 0.9700 - val_loss: 0.1495 - val_accuracy: 0.9817 Epoch 4/16 7/7 [==============================] - 0s 10ms/step - loss: 0.1199 - accuracy: 0.9800 - val_loss: 0.1327 - val_accuracy: 0.9817 Epoch 5/16 7/7 [==============================] - 0s 10ms/step - loss: 0.1047 - accuracy: 0.9900 - val_loss: 0.1203 - val_accuracy: 0.9848 Epoch 6/16 7/7 [==============================] - 0s 10ms/step - loss: 0.0930 - accuracy: 0.9950 - val_loss: 0.1103 - val_accuracy: 0.9858 Epoch 7/16 7/7 [==============================] - 0s 10ms/step - loss: 0.0838 - accuracy: 0.9950 - val_loss: 0.1022 - val_accuracy: 0.9858 Epoch 8/16 7/7 [==============================] - 0s 11ms/step - loss: 0.0763 - accuracy: 0.9950 - val_loss: 0.0955 - val_accuracy: 0.9878 Epoch 9/16 7/7 [==============================] - 0s 10ms/step - loss: 0.0699 - accuracy: 0.9950 - val_loss: 0.0894 - val_accuracy: 0.9878 Epoch 10/16 7/7 [==============================] - 0s 10ms/step - loss: 0.0641 - accuracy: 0.9950 - val_loss: 0.0845 - val_accuracy: 0.9888 Epoch 11/16 7/7 [==============================] - 0s 10ms/step - loss: 0.0596 - accuracy: 0.9950 - val_loss: 0.0801 - val_accuracy: 0.9888 Epoch 12/16 7/7 [==============================] - 0s 10ms/step - loss: 0.0554 - accuracy: 1.0000 - val_loss: 0.0763 - val_accuracy: 0.9878 Epoch 13/16 7/7 [==============================] - 0s 10ms/step - loss: 0.0517 - accuracy: 1.0000 - val_loss: 0.0730 - val_accuracy: 0.9878 Epoch 14/16 7/7 [==============================] - 0s 10ms/step - loss: 0.0485 - accuracy: 1.0000 - val_loss: 0.0702 - val_accuracy: 0.9878 Epoch 15/16 7/7 [==============================] - 0s 10ms/step - loss: 0.0459 - accuracy: 1.0000 - val_loss: 0.0677 - val_accuracy: 0.9878 Epoch 16/16 7/7 [==============================] - 0s 10ms/step - loss: 0.0435 - accuracy: 1.0000 - val_loss: 0.0653 - val_accuracy: 0.9878
마지막 점수는 어떤가요?
model_B.evaluate(X_test_B, y_test_B)
63/63 [==============================] - 0s 1ms/step - loss: 0.1408 - accuracy: 0.9705
[0.1408407837152481, 0.9704999923706055]
model_B_on_A.evaluate(X_test_B, y_test_B)
63/63 [==============================] - 0s 1ms/step - loss: 0.0563 - accuracy: 0.9940
[0.056250184774398804, 0.9940000176429749]
훌륭하네요! 꽤 많은 정보를 전달했습니다: 오차율이 4.9배나 줄었네요!
(100 - 97.05) / (100 - 99.40)
4.916666666666718
optimizer = keras.optimizers.SGD(learning_rate=0.001, momentum=0.9)
optimizer = keras.optimizers.SGD(learning_rate=0.001, momentum=0.9, nesterov=True)
optimizer = keras.optimizers.Adagrad(learning_rate=0.001)
optimizer = keras.optimizers.RMSprop(learning_rate=0.001, rho=0.9)
optimizer = keras.optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999)
optimizer = keras.optimizers.Adamax(learning_rate=0.001, beta_1=0.9, beta_2=0.999)
optimizer = keras.optimizers.Nadam(learning_rate=0.001, beta_1=0.9, beta_2=0.999)
lr = lr0 / (1 + steps / s)**c
c=1
과 s = 1 / decay
을 사용합니다optimizer = keras.optimizers.SGD(learning_rate=0.01, decay=1e-4)
model = keras.models.Sequential([
keras.layers.Flatten(input_shape=[28, 28]),
keras.layers.Dense(300, activation="selu", kernel_initializer="lecun_normal"),
keras.layers.Dense(100, activation="selu", kernel_initializer="lecun_normal"),
keras.layers.Dense(10, activation="softmax")
])
model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])
n_epochs = 25
history = model.fit(X_train_scaled, y_train, epochs=n_epochs,
validation_data=(X_valid_scaled, y_valid))
Epoch 1/25 1719/1719 [==============================] - 4s 2ms/step - loss: 0.4898 - accuracy: 0.8266 - val_loss: 0.4064 - val_accuracy: 0.8608 Epoch 2/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.3792 - accuracy: 0.8654 - val_loss: 0.3731 - val_accuracy: 0.8720 Epoch 3/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.3468 - accuracy: 0.8774 - val_loss: 0.3744 - val_accuracy: 0.8728 Epoch 4/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.3259 - accuracy: 0.8848 - val_loss: 0.3509 - val_accuracy: 0.8792 Epoch 5/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.3108 - accuracy: 0.8897 - val_loss: 0.3449 - val_accuracy: 0.8778 Epoch 6/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.2973 - accuracy: 0.8941 - val_loss: 0.3417 - val_accuracy: 0.8846 Epoch 7/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.2871 - accuracy: 0.8981 - val_loss: 0.3379 - val_accuracy: 0.8828 Epoch 8/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.2779 - accuracy: 0.9014 - val_loss: 0.3421 - val_accuracy: 0.8798 Epoch 9/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.2697 - accuracy: 0.9030 - val_loss: 0.3289 - val_accuracy: 0.8868 Epoch 10/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.2624 - accuracy: 0.9058 - val_loss: 0.3282 - val_accuracy: 0.8858 Epoch 11/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.2564 - accuracy: 0.9088 - val_loss: 0.3264 - val_accuracy: 0.8876 Epoch 12/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.2504 - accuracy: 0.9113 - val_loss: 0.3337 - val_accuracy: 0.8816 Epoch 13/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.2448 - accuracy: 0.9135 - val_loss: 0.3245 - val_accuracy: 0.8910 Epoch 14/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.2407 - accuracy: 0.9144 - val_loss: 0.3283 - val_accuracy: 0.8858 Epoch 15/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.2354 - accuracy: 0.9166 - val_loss: 0.3225 - val_accuracy: 0.8882 Epoch 16/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.2314 - accuracy: 0.9185 - val_loss: 0.3204 - val_accuracy: 0.8904 Epoch 17/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.2275 - accuracy: 0.9191 - val_loss: 0.3243 - val_accuracy: 0.8888 Epoch 18/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.2235 - accuracy: 0.9212 - val_loss: 0.3189 - val_accuracy: 0.8924 Epoch 19/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.2202 - accuracy: 0.9225 - val_loss: 0.3226 - val_accuracy: 0.8900 Epoch 20/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.2169 - accuracy: 0.9242 - val_loss: 0.3203 - val_accuracy: 0.8906 Epoch 21/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.2132 - accuracy: 0.9255 - val_loss: 0.3201 - val_accuracy: 0.8894 Epoch 22/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.2107 - accuracy: 0.9267 - val_loss: 0.3181 - val_accuracy: 0.8896 Epoch 23/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.2078 - accuracy: 0.9273 - val_loss: 0.3201 - val_accuracy: 0.8926 Epoch 24/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.2049 - accuracy: 0.9292 - val_loss: 0.3202 - val_accuracy: 0.8904 Epoch 25/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.2027 - accuracy: 0.9295 - val_loss: 0.3196 - val_accuracy: 0.8894
import math
learning_rate = 0.01
decay = 1e-4
batch_size = 32
n_steps_per_epoch = math.ceil(len(X_train) / batch_size)
epochs = np.arange(n_epochs)
lrs = learning_rate / (1 + decay * epochs * n_steps_per_epoch)
plt.plot(epochs, lrs, "o-")
plt.axis([0, n_epochs - 1, 0, 0.01])
plt.xlabel("Epoch")
plt.ylabel("Learning Rate")
plt.title("Power Scheduling", fontsize=14)
plt.grid(True)
plt.show()
lr = lr0 * 0.1**(epoch / s)
def exponential_decay_fn(epoch):
return 0.01 * 0.1**(epoch / 20)
def exponential_decay(lr0, s):
def exponential_decay_fn(epoch):
return lr0 * 0.1**(epoch / s)
return exponential_decay_fn
exponential_decay_fn = exponential_decay(lr0=0.01, s=20)
model = keras.models.Sequential([
keras.layers.Flatten(input_shape=[28, 28]),
keras.layers.Dense(300, activation="selu", kernel_initializer="lecun_normal"),
keras.layers.Dense(100, activation="selu", kernel_initializer="lecun_normal"),
keras.layers.Dense(10, activation="softmax")
])
model.compile(loss="sparse_categorical_crossentropy", optimizer="nadam", metrics=["accuracy"])
n_epochs = 25
lr_scheduler = keras.callbacks.LearningRateScheduler(exponential_decay_fn)
history = model.fit(X_train_scaled, y_train, epochs=n_epochs,
validation_data=(X_valid_scaled, y_valid),
callbacks=[lr_scheduler])
Epoch 1/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.8326 - accuracy: 0.7621 - val_loss: 1.0349 - val_accuracy: 0.7256 Epoch 2/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.7964 - accuracy: 0.7646 - val_loss: 0.6616 - val_accuracy: 0.8154 Epoch 3/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.6590 - accuracy: 0.8045 - val_loss: 0.9604 - val_accuracy: 0.7426 Epoch 4/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.5828 - accuracy: 0.8258 - val_loss: 0.5403 - val_accuracy: 0.8402 Epoch 5/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.5404 - accuracy: 0.8358 - val_loss: 0.6116 - val_accuracy: 0.8410 Epoch 6/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.5186 - accuracy: 0.8456 - val_loss: 0.5160 - val_accuracy: 0.8506 Epoch 7/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.4688 - accuracy: 0.8583 - val_loss: 0.5551 - val_accuracy: 0.8354 Epoch 8/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.4353 - accuracy: 0.8694 - val_loss: 0.5710 - val_accuracy: 0.8452 Epoch 9/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.3989 - accuracy: 0.8770 - val_loss: 0.5819 - val_accuracy: 0.8142 Epoch 10/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.3863 - accuracy: 0.8810 - val_loss: 0.4887 - val_accuracy: 0.8764 Epoch 11/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.3576 - accuracy: 0.8898 - val_loss: 0.4841 - val_accuracy: 0.8690 Epoch 12/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.3277 - accuracy: 0.8957 - val_loss: 0.5037 - val_accuracy: 0.8642 Epoch 13/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.3250 - accuracy: 0.9001 - val_loss: 0.4737 - val_accuracy: 0.8728 Epoch 14/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.2895 - accuracy: 0.9073 - val_loss: 0.4703 - val_accuracy: 0.8732 Epoch 15/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.2711 - accuracy: 0.9122 - val_loss: 0.5017 - val_accuracy: 0.8750 Epoch 16/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.2598 - accuracy: 0.9159 - val_loss: 0.4757 - val_accuracy: 0.8800 Epoch 17/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.2483 - accuracy: 0.9202 - val_loss: 0.4981 - val_accuracy: 0.8772 Epoch 18/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.2329 - accuracy: 0.9248 - val_loss: 0.4908 - val_accuracy: 0.8756 Epoch 19/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.2218 - accuracy: 0.9282 - val_loss: 0.5164 - val_accuracy: 0.8840 Epoch 20/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.2108 - accuracy: 0.9311 - val_loss: 0.5471 - val_accuracy: 0.8772 Epoch 21/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.1995 - accuracy: 0.9352 - val_loss: 0.5695 - val_accuracy: 0.8814 Epoch 22/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.1921 - accuracy: 0.9369 - val_loss: 0.5697 - val_accuracy: 0.8826 Epoch 23/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.1832 - accuracy: 0.9413 - val_loss: 0.5817 - val_accuracy: 0.8774 Epoch 24/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.1763 - accuracy: 0.9431 - val_loss: 0.5968 - val_accuracy: 0.8806 Epoch 25/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.1683 - accuracy: 0.9457 - val_loss: 0.5972 - val_accuracy: 0.8812
plt.plot(history.epoch, history.history["lr"], "o-")
plt.axis([0, n_epochs - 1, 0, 0.011])
plt.xlabel("Epoch")
plt.ylabel("Learning Rate")
plt.title("Exponential Scheduling", fontsize=14)
plt.grid(True)
plt.show()
이 스케줄 함수는 두 번째 매개변수로 현재 학습률을 받을 수 있습니다:
def exponential_decay_fn(epoch, lr):
return lr * 0.1**(1 / 20)
에포크가 아니라 반복마다 학습률을 업데이트하려면 사용자 정의 콜백 클래스를 작성해야 합니다:
K = keras.backend
class ExponentialDecay(keras.callbacks.Callback):
def __init__(self, s=40000):
super().__init__()
self.s = s
def on_batch_begin(self, batch, logs=None):
# 노트: 에포크마다 `batch` 매개변수가 재설정됩니다
lr = K.get_value(self.model.optimizer.lr)
K.set_value(self.model.optimizer.lr, lr * 0.1**(1 / self.s))
def on_epoch_end(self, epoch, logs=None):
logs = logs or {}
logs['lr'] = K.get_value(self.model.optimizer.lr)
model = keras.models.Sequential([
keras.layers.Flatten(input_shape=[28, 28]),
keras.layers.Dense(300, activation="selu", kernel_initializer="lecun_normal"),
keras.layers.Dense(100, activation="selu", kernel_initializer="lecun_normal"),
keras.layers.Dense(10, activation="softmax")
])
lr0 = 0.01
optimizer = keras.optimizers.Nadam(learning_rate=lr0)
model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])
n_epochs = 25
s = 20 * len(X_train) // 32 # 20 에포크 동안 스텝 횟수 (배치 크기 = 32)
exp_decay = ExponentialDecay(s)
history = model.fit(X_train_scaled, y_train, epochs=n_epochs,
validation_data=(X_valid_scaled, y_valid),
callbacks=[exp_decay])
Epoch 1/25 1719/1719 [==============================] - 6s 3ms/step - loss: 0.7978 - accuracy: 0.7641 - val_loss: 0.8657 - val_accuracy: 0.7200 Epoch 2/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.6872 - accuracy: 0.7914 - val_loss: 0.6638 - val_accuracy: 0.8124 Epoch 3/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.5742 - accuracy: 0.8197 - val_loss: 3.4342 - val_accuracy: 0.6938 Epoch 4/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.5214 - accuracy: 0.8386 - val_loss: 0.6039 - val_accuracy: 0.8280 Epoch 5/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.4828 - accuracy: 0.8483 - val_loss: 0.4606 - val_accuracy: 0.8646 Epoch 6/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.4235 - accuracy: 0.8639 - val_loss: 0.4625 - val_accuracy: 0.8518 Epoch 7/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.3910 - accuracy: 0.8737 - val_loss: 0.4413 - val_accuracy: 0.8616 Epoch 8/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.3583 - accuracy: 0.8818 - val_loss: 0.4679 - val_accuracy: 0.8604 Epoch 9/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.3352 - accuracy: 0.8899 - val_loss: 0.4638 - val_accuracy: 0.8672 Epoch 10/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.3269 - accuracy: 0.8919 - val_loss: 0.4391 - val_accuracy: 0.8788 Epoch 11/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.2916 - accuracy: 0.9012 - val_loss: 0.4256 - val_accuracy: 0.8782 Epoch 12/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.2686 - accuracy: 0.9071 - val_loss: 0.4297 - val_accuracy: 0.8746 Epoch 13/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.2467 - accuracy: 0.9145 - val_loss: 0.4410 - val_accuracy: 0.8784 Epoch 14/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.2308 - accuracy: 0.9209 - val_loss: 0.4280 - val_accuracy: 0.8788 Epoch 15/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.2147 - accuracy: 0.9267 - val_loss: 0.3936 - val_accuracy: 0.8848 Epoch 16/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.1965 - accuracy: 0.9324 - val_loss: 0.4200 - val_accuracy: 0.8892 Epoch 17/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.1856 - accuracy: 0.9365 - val_loss: 0.4689 - val_accuracy: 0.8822 Epoch 18/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.1709 - accuracy: 0.9419 - val_loss: 0.5048 - val_accuracy: 0.8876 Epoch 19/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.1573 - accuracy: 0.9469 - val_loss: 0.5060 - val_accuracy: 0.8930 Epoch 20/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.1475 - accuracy: 0.9497 - val_loss: 0.5078 - val_accuracy: 0.8900 Epoch 21/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.1358 - accuracy: 0.9539 - val_loss: 0.5430 - val_accuracy: 0.8878 Epoch 22/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.1288 - accuracy: 0.9567 - val_loss: 0.5410 - val_accuracy: 0.8924 Epoch 23/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.1220 - accuracy: 0.9590 - val_loss: 0.5399 - val_accuracy: 0.8882 Epoch 24/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.1141 - accuracy: 0.9626 - val_loss: 0.5942 - val_accuracy: 0.8914 Epoch 25/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.1076 - accuracy: 0.9643 - val_loss: 0.6073 - val_accuracy: 0.8882
n_steps = n_epochs * len(X_train) // 32
steps = np.arange(n_steps)
lrs = lr0 * 0.1**(steps / s)
plt.plot(steps, lrs, "-", linewidth=2)
plt.axis([0, n_steps - 1, 0, lr0 * 1.1])
plt.xlabel("Batch")
plt.ylabel("Learning Rate")
plt.title("Exponential Scheduling (per batch)", fontsize=14)
plt.grid(True)
plt.show()
def piecewise_constant_fn(epoch):
if epoch < 5:
return 0.01
elif epoch < 15:
return 0.005
else:
return 0.001
def piecewise_constant(boundaries, values):
boundaries = np.array([0] + boundaries)
values = np.array(values)
def piecewise_constant_fn(epoch):
return values[np.argmax(boundaries > epoch) - 1]
return piecewise_constant_fn
piecewise_constant_fn = piecewise_constant([5, 15], [0.01, 0.005, 0.001])
lr_scheduler = keras.callbacks.LearningRateScheduler(piecewise_constant_fn)
model = keras.models.Sequential([
keras.layers.Flatten(input_shape=[28, 28]),
keras.layers.Dense(300, activation="selu", kernel_initializer="lecun_normal"),
keras.layers.Dense(100, activation="selu", kernel_initializer="lecun_normal"),
keras.layers.Dense(10, activation="softmax")
])
model.compile(loss="sparse_categorical_crossentropy", optimizer="nadam", metrics=["accuracy"])
n_epochs = 25
history = model.fit(X_train_scaled, y_train, epochs=n_epochs,
validation_data=(X_valid_scaled, y_valid),
callbacks=[lr_scheduler])
Epoch 1/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.8454 - accuracy: 0.7551 - val_loss: 0.9107 - val_accuracy: 0.7354 Epoch 2/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.8216 - accuracy: 0.7619 - val_loss: 0.6475 - val_accuracy: 0.7948 Epoch 3/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.8380 - accuracy: 0.7576 - val_loss: 1.1238 - val_accuracy: 0.7248 Epoch 4/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.8244 - accuracy: 0.7562 - val_loss: 1.2028 - val_accuracy: 0.6642 Epoch 5/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.9605 - accuracy: 0.7073 - val_loss: 0.8275 - val_accuracy: 0.7606 Epoch 6/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.6131 - accuracy: 0.8073 - val_loss: 0.6293 - val_accuracy: 0.8160 Epoch 7/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.5336 - accuracy: 0.8332 - val_loss: 0.6297 - val_accuracy: 0.8248 Epoch 8/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.5495 - accuracy: 0.8390 - val_loss: 0.6212 - val_accuracy: 0.8224 Epoch 9/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.4908 - accuracy: 0.8481 - val_loss: 0.6210 - val_accuracy: 0.8492 Epoch 10/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.4786 - accuracy: 0.8547 - val_loss: 0.6047 - val_accuracy: 0.8458 Epoch 11/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.4746 - accuracy: 0.8554 - val_loss: 0.7762 - val_accuracy: 0.8494 Epoch 12/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.4605 - accuracy: 0.8611 - val_loss: 0.5888 - val_accuracy: 0.8454 Epoch 13/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.4830 - accuracy: 0.8597 - val_loss: 0.9160 - val_accuracy: 0.8108 Epoch 14/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.4645 - accuracy: 0.8629 - val_loss: 0.7827 - val_accuracy: 0.8480 Epoch 15/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.4433 - accuracy: 0.8641 - val_loss: 0.6753 - val_accuracy: 0.8426 Epoch 16/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.3245 - accuracy: 0.8943 - val_loss: 0.5471 - val_accuracy: 0.8680 Epoch 17/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.2980 - accuracy: 0.9018 - val_loss: 0.5652 - val_accuracy: 0.8676 Epoch 18/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.2890 - accuracy: 0.9054 - val_loss: 0.5596 - val_accuracy: 0.8708 Epoch 19/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.2783 - accuracy: 0.9087 - val_loss: 0.6174 - val_accuracy: 0.8742 Epoch 20/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.2711 - accuracy: 0.9113 - val_loss: 0.6321 - val_accuracy: 0.8706 Epoch 21/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.2635 - accuracy: 0.9133 - val_loss: 0.6899 - val_accuracy: 0.8728 Epoch 22/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.2590 - accuracy: 0.9156 - val_loss: 0.6208 - val_accuracy: 0.8774 Epoch 23/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.2516 - accuracy: 0.9180 - val_loss: 0.6517 - val_accuracy: 0.8708 Epoch 24/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.2451 - accuracy: 0.9195 - val_loss: 0.6852 - val_accuracy: 0.8732 Epoch 25/25 1719/1719 [==============================] - 5s 3ms/step - loss: 0.2421 - accuracy: 0.9206 - val_loss: 0.6683 - val_accuracy: 0.8754
plt.plot(history.epoch, [piecewise_constant_fn(epoch) for epoch in history.epoch], "o-")
plt.axis([0, n_epochs - 1, 0, 0.011])
plt.xlabel("Epoch")
plt.ylabel("Learning Rate")
plt.title("Piecewise Constant Scheduling", fontsize=14)
plt.grid(True)
plt.show()
tf.random.set_seed(42)
np.random.seed(42)
lr_scheduler = keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=5)
model = keras.models.Sequential([
keras.layers.Flatten(input_shape=[28, 28]),
keras.layers.Dense(300, activation="selu", kernel_initializer="lecun_normal"),
keras.layers.Dense(100, activation="selu", kernel_initializer="lecun_normal"),
keras.layers.Dense(10, activation="softmax")
])
optimizer = keras.optimizers.SGD(learning_rate=0.02, momentum=0.9)
model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])
n_epochs = 25
history = model.fit(X_train_scaled, y_train, epochs=n_epochs,
validation_data=(X_valid_scaled, y_valid),
callbacks=[lr_scheduler])
Epoch 1/25 1719/1719 [==============================] - 4s 2ms/step - loss: 0.5889 - accuracy: 0.8079 - val_loss: 0.4879 - val_accuracy: 0.8516 Epoch 2/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.4959 - accuracy: 0.8388 - val_loss: 0.6396 - val_accuracy: 0.8240 Epoch 3/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.5203 - accuracy: 0.8412 - val_loss: 0.5057 - val_accuracy: 0.8576 Epoch 4/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.5169 - accuracy: 0.8459 - val_loss: 0.4907 - val_accuracy: 0.8578 Epoch 5/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.5305 - accuracy: 0.8484 - val_loss: 0.5726 - val_accuracy: 0.8306 Epoch 6/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.5184 - accuracy: 0.8537 - val_loss: 0.5930 - val_accuracy: 0.8454 Epoch 7/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.3131 - accuracy: 0.8913 - val_loss: 0.3942 - val_accuracy: 0.8722 Epoch 8/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.2618 - accuracy: 0.9037 - val_loss: 0.3978 - val_accuracy: 0.8732 Epoch 9/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.2385 - accuracy: 0.9118 - val_loss: 0.3797 - val_accuracy: 0.8828 Epoch 10/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.2240 - accuracy: 0.9181 - val_loss: 0.3976 - val_accuracy: 0.8916 Epoch 11/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.2106 - accuracy: 0.9227 - val_loss: 0.3871 - val_accuracy: 0.8896 Epoch 12/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.2015 - accuracy: 0.9250 - val_loss: 0.4809 - val_accuracy: 0.8680 Epoch 13/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.1890 - accuracy: 0.9299 - val_loss: 0.4645 - val_accuracy: 0.8830 Epoch 14/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.1777 - accuracy: 0.9339 - val_loss: 0.4485 - val_accuracy: 0.8768 Epoch 15/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.1216 - accuracy: 0.9527 - val_loss: 0.4029 - val_accuracy: 0.8922 Epoch 16/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.1057 - accuracy: 0.9593 - val_loss: 0.4127 - val_accuracy: 0.8952 Epoch 17/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.0984 - accuracy: 0.9618 - val_loss: 0.4463 - val_accuracy: 0.8926 Epoch 18/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.0895 - accuracy: 0.9653 - val_loss: 0.4632 - val_accuracy: 0.8898 Epoch 19/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.0854 - accuracy: 0.9671 - val_loss: 0.4770 - val_accuracy: 0.8938 Epoch 20/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.0636 - accuracy: 0.9765 - val_loss: 0.4725 - val_accuracy: 0.8928 Epoch 21/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.0572 - accuracy: 0.9797 - val_loss: 0.4881 - val_accuracy: 0.8928 Epoch 22/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.0540 - accuracy: 0.9807 - val_loss: 0.5021 - val_accuracy: 0.8934 Epoch 23/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.0506 - accuracy: 0.9826 - val_loss: 0.5114 - val_accuracy: 0.8912 Epoch 24/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.0472 - accuracy: 0.9838 - val_loss: 0.5236 - val_accuracy: 0.8942 Epoch 25/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.0393 - accuracy: 0.9875 - val_loss: 0.5303 - val_accuracy: 0.8932
plt.plot(history.epoch, history.history["lr"], "bo-")
plt.xlabel("Epoch")
plt.ylabel("Learning Rate", color='b')
plt.tick_params('y', colors='b')
plt.gca().set_xlim(0, n_epochs - 1)
plt.grid(True)
ax2 = plt.gca().twinx()
ax2.plot(history.epoch, history.history["val_loss"], "r^-")
ax2.set_ylabel('Validation Loss', color='r')
ax2.tick_params('y', colors='r')
plt.title("Reduce LR on Plateau", fontsize=14)
plt.show()
model = keras.models.Sequential([
keras.layers.Flatten(input_shape=[28, 28]),
keras.layers.Dense(300, activation="selu", kernel_initializer="lecun_normal"),
keras.layers.Dense(100, activation="selu", kernel_initializer="lecun_normal"),
keras.layers.Dense(10, activation="softmax")
])
s = 20 * len(X_train) // 32 # number of steps in 20 epochs (batch size = 32)
learning_rate = keras.optimizers.schedules.ExponentialDecay(0.01, s, 0.1)
optimizer = keras.optimizers.SGD(learning_rate)
model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])
n_epochs = 25
history = model.fit(X_train_scaled, y_train, epochs=n_epochs,
validation_data=(X_valid_scaled, y_valid))
Epoch 1/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.4894 - accuracy: 0.8277 - val_loss: 0.4096 - val_accuracy: 0.8592 Epoch 2/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.3820 - accuracy: 0.8652 - val_loss: 0.3740 - val_accuracy: 0.8700 Epoch 3/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.3487 - accuracy: 0.8766 - val_loss: 0.3735 - val_accuracy: 0.8688 Epoch 4/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.3264 - accuracy: 0.8837 - val_loss: 0.3494 - val_accuracy: 0.8796 Epoch 5/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.3104 - accuracy: 0.8896 - val_loss: 0.3431 - val_accuracy: 0.8792 Epoch 6/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.2958 - accuracy: 0.8951 - val_loss: 0.3415 - val_accuracy: 0.8808 Epoch 7/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.2853 - accuracy: 0.8987 - val_loss: 0.3356 - val_accuracy: 0.8814 Epoch 8/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.2760 - accuracy: 0.9016 - val_loss: 0.3368 - val_accuracy: 0.8814 Epoch 9/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.2677 - accuracy: 0.9052 - val_loss: 0.3266 - val_accuracy: 0.8854 Epoch 10/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.2607 - accuracy: 0.9067 - val_loss: 0.3243 - val_accuracy: 0.8862 Epoch 11/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.2550 - accuracy: 0.9086 - val_loss: 0.3253 - val_accuracy: 0.8866 Epoch 12/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.2496 - accuracy: 0.9126 - val_loss: 0.3305 - val_accuracy: 0.8808 Epoch 13/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.2449 - accuracy: 0.9138 - val_loss: 0.3222 - val_accuracy: 0.8864 Epoch 14/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.2415 - accuracy: 0.9148 - val_loss: 0.3225 - val_accuracy: 0.8860 Epoch 15/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.2375 - accuracy: 0.9167 - val_loss: 0.3212 - val_accuracy: 0.8880 Epoch 16/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.2343 - accuracy: 0.9182 - val_loss: 0.3187 - val_accuracy: 0.8884 Epoch 17/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.2316 - accuracy: 0.9183 - val_loss: 0.3201 - val_accuracy: 0.8896 Epoch 18/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.2291 - accuracy: 0.9197 - val_loss: 0.3171 - val_accuracy: 0.8900 Epoch 19/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.2270 - accuracy: 0.9206 - val_loss: 0.3200 - val_accuracy: 0.8898 Epoch 20/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.2250 - accuracy: 0.9220 - val_loss: 0.3173 - val_accuracy: 0.8900 Epoch 21/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.2229 - accuracy: 0.9223 - val_loss: 0.3183 - val_accuracy: 0.8910 Epoch 22/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.2216 - accuracy: 0.9224 - val_loss: 0.3167 - val_accuracy: 0.8912 Epoch 23/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.2201 - accuracy: 0.9234 - val_loss: 0.3175 - val_accuracy: 0.8898 Epoch 24/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.2188 - accuracy: 0.9239 - val_loss: 0.3170 - val_accuracy: 0.8898 Epoch 25/25 1719/1719 [==============================] - 3s 2ms/step - loss: 0.2179 - accuracy: 0.9241 - val_loss: 0.3169 - val_accuracy: 0.8908
구간별 고정 스케줄링은 다음을 사용하세요:
learning_rate = keras.optimizers.schedules.PiecewiseConstantDecay(
boundaries=[5. * n_steps_per_epoch, 15. * n_steps_per_epoch],
values=[0.01, 0.005, 0.001])
K = keras.backend
class ExponentialLearningRate(keras.callbacks.Callback):
def __init__(self, factor):
self.factor = factor
self.rates = []
self.losses = []
def on_batch_end(self, batch, logs):
self.rates.append(K.get_value(self.model.optimizer.lr))
self.losses.append(logs["loss"])
K.set_value(self.model.optimizer.lr, self.model.optimizer.lr * self.factor)
def find_learning_rate(model, X, y, epochs=1, batch_size=32, min_rate=10**-5, max_rate=10):
init_weights = model.get_weights()
iterations = math.ceil(len(X) / batch_size) * epochs
factor = np.exp(np.log(max_rate / min_rate) / iterations)
init_lr = K.get_value(model.optimizer.lr)
K.set_value(model.optimizer.lr, min_rate)
exp_lr = ExponentialLearningRate(factor)
history = model.fit(X, y, epochs=epochs, batch_size=batch_size,
callbacks=[exp_lr])
K.set_value(model.optimizer.lr, init_lr)
model.set_weights(init_weights)
return exp_lr.rates, exp_lr.losses
def plot_lr_vs_loss(rates, losses):
plt.plot(rates, losses)
plt.gca().set_xscale('log')
plt.hlines(min(losses), min(rates), max(rates))
plt.axis([min(rates), max(rates), min(losses), (losses[0] + min(losses)) / 2])
plt.xlabel("Learning rate")
plt.ylabel("Loss")
경고: on_batch_end()
메서드에서 logs["loss"]
로 배치 손실을 모으지만 텐서플로 2.2.0에서 (에포크의) 평균 손실로 바뀌었습니다. (텐서플로 2.2 이상을 사용한다면) 이런 이유로 아래 그래프가 이전보다 훨씬 부드럽습니다. 이는 그래프에서 배치 손실이 폭주하기 시작하는 지점과 그렇지 않은 지점 사이에 지연이 있다는 뜻입니다. 따라서 변동이 심한 그래프에서는 조금 더 작은 학습률을 선택해야 합니다. 또한 ExponentialLearningRate
콜백을 조금 바꾸어 (현재 평균 손실과 이전 평균 손실을 기반으로) 배치 손실을 계산할 수 있습니다:
class ExponentialLearningRate(keras.callbacks.Callback):
def __init__(self, factor):
self.factor = factor
self.rates = []
self.losses = []
def on_epoch_begin(self, epoch, logs=None):
self.prev_loss = 0
def on_batch_end(self, batch, logs=None):
batch_loss = logs["loss"] * (batch + 1) - self.prev_loss * batch
self.prev_loss = logs["loss"]
self.rates.append(K.get_value(self.model.optimizer.lr))
self.losses.append(batch_loss)
K.set_value(self.model.optimizer.lr, self.model.optimizer.lr * self.factor)
tf.random.set_seed(42)
np.random.seed(42)
model = keras.models.Sequential([
keras.layers.Flatten(input_shape=[28, 28]),
keras.layers.Dense(300, activation="selu", kernel_initializer="lecun_normal"),
keras.layers.Dense(100, activation="selu", kernel_initializer="lecun_normal"),
keras.layers.Dense(10, activation="softmax")
])
model.compile(loss="sparse_categorical_crossentropy",
optimizer=keras.optimizers.SGD(learning_rate=1e-3),
metrics=["accuracy"])
batch_size = 128
rates, losses = find_learning_rate(model, X_train_scaled, y_train, epochs=1, batch_size=batch_size)
plot_lr_vs_loss(rates, losses)
430/430 [==============================] - 2s 3ms/step - loss: nan - accuracy: 0.3861
class OneCycleScheduler(keras.callbacks.Callback):
def __init__(self, iterations, max_rate, start_rate=None,
last_iterations=None, last_rate=None):
self.iterations = iterations
self.max_rate = max_rate
self.start_rate = start_rate or max_rate / 10
self.last_iterations = last_iterations or iterations // 10 + 1
self.half_iteration = (iterations - self.last_iterations) // 2
self.last_rate = last_rate or self.start_rate / 1000
self.iteration = 0
def _interpolate(self, iter1, iter2, rate1, rate2):
return ((rate2 - rate1) * (self.iteration - iter1)
/ (iter2 - iter1) + rate1)
def on_batch_begin(self, batch, logs):
if self.iteration < self.half_iteration:
rate = self._interpolate(0, self.half_iteration, self.start_rate, self.max_rate)
elif self.iteration < 2 * self.half_iteration:
rate = self._interpolate(self.half_iteration, 2 * self.half_iteration,
self.max_rate, self.start_rate)
else:
rate = self._interpolate(2 * self.half_iteration, self.iterations,
self.start_rate, self.last_rate)
self.iteration += 1
K.set_value(self.model.optimizer.lr, rate)
n_epochs = 25
onecycle = OneCycleScheduler(math.ceil(len(X_train) / batch_size) * n_epochs, max_rate=0.05)
history = model.fit(X_train_scaled, y_train, epochs=n_epochs, batch_size=batch_size,
validation_data=(X_valid_scaled, y_valid),
callbacks=[onecycle])
Epoch 1/25 430/430 [==============================] - 1s 3ms/step - loss: 0.6572 - accuracy: 0.7739 - val_loss: 0.4872 - val_accuracy: 0.8336 Epoch 2/25 430/430 [==============================] - 1s 3ms/step - loss: 0.4581 - accuracy: 0.8396 - val_loss: 0.4275 - val_accuracy: 0.8520 Epoch 3/25 430/430 [==============================] - 1s 3ms/step - loss: 0.4122 - accuracy: 0.8547 - val_loss: 0.4117 - val_accuracy: 0.8582 Epoch 4/25 430/430 [==============================] - 1s 3ms/step - loss: 0.3837 - accuracy: 0.8642 - val_loss: 0.3869 - val_accuracy: 0.8682 Epoch 5/25 430/430 [==============================] - 1s 3ms/step - loss: 0.3640 - accuracy: 0.8717 - val_loss: 0.3767 - val_accuracy: 0.8678 Epoch 6/25 430/430 [==============================] - 1s 3ms/step - loss: 0.3457 - accuracy: 0.8772 - val_loss: 0.3743 - val_accuracy: 0.8712 Epoch 7/25 430/430 [==============================] - 1s 3ms/step - loss: 0.3331 - accuracy: 0.8810 - val_loss: 0.3638 - val_accuracy: 0.8712 Epoch 8/25 430/430 [==============================] - 1s 3ms/step - loss: 0.3185 - accuracy: 0.8858 - val_loss: 0.3958 - val_accuracy: 0.8606 Epoch 9/25 430/430 [==============================] - 1s 3ms/step - loss: 0.3066 - accuracy: 0.8890 - val_loss: 0.3487 - val_accuracy: 0.8764 Epoch 10/25 430/430 [==============================] - 1s 3ms/step - loss: 0.2944 - accuracy: 0.8922 - val_loss: 0.3401 - val_accuracy: 0.8802 Epoch 11/25 430/430 [==============================] - 1s 3ms/step - loss: 0.2840 - accuracy: 0.8963 - val_loss: 0.3459 - val_accuracy: 0.8808 Epoch 12/25 430/430 [==============================] - 1s 3ms/step - loss: 0.2710 - accuracy: 0.9023 - val_loss: 0.3656 - val_accuracy: 0.8704 Epoch 13/25 430/430 [==============================] - 1s 3ms/step - loss: 0.2538 - accuracy: 0.9085 - val_loss: 0.3360 - val_accuracy: 0.8834 Epoch 14/25 430/430 [==============================] - 1s 3ms/step - loss: 0.2406 - accuracy: 0.9136 - val_loss: 0.3459 - val_accuracy: 0.8808 Epoch 15/25 430/430 [==============================] - 1s 3ms/step - loss: 0.2280 - accuracy: 0.9183 - val_loss: 0.3254 - val_accuracy: 0.8852 Epoch 16/25 430/430 [==============================] - 1s 3ms/step - loss: 0.2160 - accuracy: 0.9233 - val_loss: 0.3296 - val_accuracy: 0.8832 Epoch 17/25 430/430 [==============================] - 1s 3ms/step - loss: 0.2062 - accuracy: 0.9267 - val_loss: 0.3344 - val_accuracy: 0.8858 Epoch 18/25 430/430 [==============================] - 1s 3ms/step - loss: 0.1979 - accuracy: 0.9297 - val_loss: 0.3242 - val_accuracy: 0.8904 Epoch 19/25 430/430 [==============================] - 1s 3ms/step - loss: 0.1892 - accuracy: 0.9339 - val_loss: 0.3234 - val_accuracy: 0.8896 Epoch 20/25 430/430 [==============================] - 1s 3ms/step - loss: 0.1821 - accuracy: 0.9371 - val_loss: 0.3226 - val_accuracy: 0.8924 Epoch 21/25 430/430 [==============================] - 1s 3ms/step - loss: 0.1752 - accuracy: 0.9400 - val_loss: 0.3219 - val_accuracy: 0.8912 Epoch 22/25 430/430 [==============================] - 1s 3ms/step - loss: 0.1700 - accuracy: 0.9419 - val_loss: 0.3180 - val_accuracy: 0.8954 Epoch 23/25 430/430 [==============================] - 1s 3ms/step - loss: 0.1654 - accuracy: 0.9439 - val_loss: 0.3185 - val_accuracy: 0.8940 Epoch 24/25 430/430 [==============================] - 1s 3ms/step - loss: 0.1626 - accuracy: 0.9457 - val_loss: 0.3175 - val_accuracy: 0.8938 Epoch 25/25 430/430 [==============================] - 1s 3ms/step - loss: 0.1609 - accuracy: 0.9463 - val_loss: 0.3168 - val_accuracy: 0.8952
layer = keras.layers.Dense(100, activation="elu",
kernel_initializer="he_normal",
kernel_regularizer=keras.regularizers.l2(0.01))
# or l1(0.1) for ℓ1 regularization with a factor of 0.1
# or l1_l2(0.1, 0.01) for both ℓ1 and ℓ2 regularization, with factors 0.1 and 0.01 respectively
model = keras.models.Sequential([
keras.layers.Flatten(input_shape=[28, 28]),
keras.layers.Dense(300, activation="elu",
kernel_initializer="he_normal",
kernel_regularizer=keras.regularizers.l2(0.01)),
keras.layers.Dense(100, activation="elu",
kernel_initializer="he_normal",
kernel_regularizer=keras.regularizers.l2(0.01)),
keras.layers.Dense(10, activation="softmax",
kernel_regularizer=keras.regularizers.l2(0.01))
])
model.compile(loss="sparse_categorical_crossentropy", optimizer="nadam", metrics=["accuracy"])
n_epochs = 2
history = model.fit(X_train_scaled, y_train, epochs=n_epochs,
validation_data=(X_valid_scaled, y_valid))
Epoch 1/2 1719/1719 [==============================] - 6s 3ms/step - loss: 1.5956 - accuracy: 0.8124 - val_loss: 0.7169 - val_accuracy: 0.8340 Epoch 2/2 1719/1719 [==============================] - 5s 3ms/step - loss: 0.7197 - accuracy: 0.8274 - val_loss: 0.6850 - val_accuracy: 0.8376
from functools import partial
RegularizedDense = partial(keras.layers.Dense,
activation="elu",
kernel_initializer="he_normal",
kernel_regularizer=keras.regularizers.l2(0.01))
model = keras.models.Sequential([
keras.layers.Flatten(input_shape=[28, 28]),
RegularizedDense(300),
RegularizedDense(100),
RegularizedDense(10, activation="softmax")
])
model.compile(loss="sparse_categorical_crossentropy", optimizer="nadam", metrics=["accuracy"])
n_epochs = 2
history = model.fit(X_train_scaled, y_train, epochs=n_epochs,
validation_data=(X_valid_scaled, y_valid))
Epoch 1/2 1719/1719 [==============================] - 5s 3ms/step - loss: 1.6313 - accuracy: 0.8113 - val_loss: 0.7218 - val_accuracy: 0.8310 Epoch 2/2 1719/1719 [==============================] - 5s 3ms/step - loss: 0.7187 - accuracy: 0.8273 - val_loss: 0.6826 - val_accuracy: 0.8382
model = keras.models.Sequential([
keras.layers.Flatten(input_shape=[28, 28]),
keras.layers.Dropout(rate=0.2),
keras.layers.Dense(300, activation="elu", kernel_initializer="he_normal"),
keras.layers.Dropout(rate=0.2),
keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"),
keras.layers.Dropout(rate=0.2),
keras.layers.Dense(10, activation="softmax")
])
model.compile(loss="sparse_categorical_crossentropy", optimizer="nadam", metrics=["accuracy"])
n_epochs = 2
history = model.fit(X_train_scaled, y_train, epochs=n_epochs,
validation_data=(X_valid_scaled, y_valid))
Epoch 1/2 1719/1719 [==============================] - 6s 3ms/step - loss: 0.5838 - accuracy: 0.7997 - val_loss: 0.3730 - val_accuracy: 0.8644 Epoch 2/2 1719/1719 [==============================] - 5s 3ms/step - loss: 0.4209 - accuracy: 0.8442 - val_loss: 0.3396 - val_accuracy: 0.8720
tf.random.set_seed(42)
np.random.seed(42)
model = keras.models.Sequential([
keras.layers.Flatten(input_shape=[28, 28]),
keras.layers.AlphaDropout(rate=0.2),
keras.layers.Dense(300, activation="selu", kernel_initializer="lecun_normal"),
keras.layers.AlphaDropout(rate=0.2),
keras.layers.Dense(100, activation="selu", kernel_initializer="lecun_normal"),
keras.layers.AlphaDropout(rate=0.2),
keras.layers.Dense(10, activation="softmax")
])
optimizer = keras.optimizers.SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])
n_epochs = 20
history = model.fit(X_train_scaled, y_train, epochs=n_epochs,
validation_data=(X_valid_scaled, y_valid))
Epoch 1/20 1719/1719 [==============================] - 5s 3ms/step - loss: 0.6641 - accuracy: 0.7594 - val_loss: 0.5788 - val_accuracy: 0.8446 Epoch 2/20 1719/1719 [==============================] - 4s 3ms/step - loss: 0.5565 - accuracy: 0.7947 - val_loss: 0.5192 - val_accuracy: 0.8522 Epoch 3/20 1719/1719 [==============================] - 4s 3ms/step - loss: 0.5206 - accuracy: 0.8075 - val_loss: 0.4896 - val_accuracy: 0.8598 Epoch 4/20 1719/1719 [==============================] - 4s 2ms/step - loss: 0.5084 - accuracy: 0.8124 - val_loss: 0.4880 - val_accuracy: 0.8596 Epoch 5/20 1719/1719 [==============================] - 4s 3ms/step - loss: 0.4949 - accuracy: 0.8188 - val_loss: 0.4237 - val_accuracy: 0.8694 Epoch 6/20 1719/1719 [==============================] - 4s 2ms/step - loss: 0.4831 - accuracy: 0.8192 - val_loss: 0.4572 - val_accuracy: 0.8634 Epoch 7/20 1719/1719 [==============================] - 4s 3ms/step - loss: 0.4720 - accuracy: 0.8252 - val_loss: 0.4702 - val_accuracy: 0.8632 Epoch 8/20 1719/1719 [==============================] - 4s 2ms/step - loss: 0.4610 - accuracy: 0.8273 - val_loss: 0.4265 - val_accuracy: 0.8668 Epoch 9/20 1719/1719 [==============================] - 4s 2ms/step - loss: 0.4618 - accuracy: 0.8278 - val_loss: 0.4330 - val_accuracy: 0.8750 Epoch 10/20 1719/1719 [==============================] - 4s 3ms/step - loss: 0.4563 - accuracy: 0.8308 - val_loss: 0.4402 - val_accuracy: 0.8616 Epoch 11/20 1719/1719 [==============================] - 4s 2ms/step - loss: 0.4491 - accuracy: 0.8323 - val_loss: 0.4245 - val_accuracy: 0.8722 Epoch 12/20 1719/1719 [==============================] - 4s 2ms/step - loss: 0.4451 - accuracy: 0.8347 - val_loss: 0.5396 - val_accuracy: 0.8552 Epoch 13/20 1719/1719 [==============================] - 4s 2ms/step - loss: 0.4384 - accuracy: 0.8377 - val_loss: 0.4285 - val_accuracy: 0.8770 Epoch 14/20 1719/1719 [==============================] - 4s 2ms/step - loss: 0.4364 - accuracy: 0.8393 - val_loss: 0.4391 - val_accuracy: 0.8664 Epoch 15/20 1719/1719 [==============================] - 4s 2ms/step - loss: 0.4326 - accuracy: 0.8386 - val_loss: 0.4343 - val_accuracy: 0.8696 Epoch 16/20 1719/1719 [==============================] - 4s 3ms/step - loss: 0.4316 - accuracy: 0.8390 - val_loss: 0.4204 - val_accuracy: 0.8776 Epoch 17/20 1719/1719 [==============================] - 4s 3ms/step - loss: 0.4279 - accuracy: 0.8400 - val_loss: 0.5390 - val_accuracy: 0.8598 Epoch 18/20 1719/1719 [==============================] - 4s 2ms/step - loss: 0.4267 - accuracy: 0.8404 - val_loss: 0.4812 - val_accuracy: 0.8732 Epoch 19/20 1719/1719 [==============================] - 4s 3ms/step - loss: 0.4251 - accuracy: 0.8409 - val_loss: 0.4696 - val_accuracy: 0.8738 Epoch 20/20 1719/1719 [==============================] - 4s 2ms/step - loss: 0.4184 - accuracy: 0.8431 - val_loss: 0.4388 - val_accuracy: 0.8740
model.evaluate(X_test_scaled, y_test)
313/313 [==============================] - 0s 1ms/step - loss: 0.4765 - accuracy: 0.8596
[0.4765377938747406, 0.8596000075340271]
model.evaluate(X_train_scaled, y_train)
1719/1719 [==============================] - 2s 1ms/step - loss: 0.3489 - accuracy: 0.8833
[0.3489045798778534, 0.8833272457122803]
history = model.fit(X_train_scaled, y_train)
1719/1719 [==============================] - 4s 2ms/step - loss: 0.4226 - accuracy: 0.8425
tf.random.set_seed(42)
np.random.seed(42)
y_probas = np.stack([model(X_test_scaled, training=True)
for sample in range(100)])
y_proba = y_probas.mean(axis=0)
y_std = y_probas.std(axis=0)
np.round(model.predict(X_test_scaled[:1]), 2)
array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]], dtype=float32)
np.round(y_probas[:, :1], 2)
array([[[0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.65, 0. , 0.33]], [[0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.89, 0. , 0.09]], [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0. , 0. , 0.99]], [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.06, 0. , 0.93]], [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.27, 0. , 0.72]], [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.53, 0. , 0.46]], [[0. , 0. , 0. , 0. , 0. , 0.2 , 0. , 0.31, 0. , 0.49]], [[0. , 0. , 0. , 0. , 0. , 0.13, 0. , 0.31, 0. , 0.56]], [[0. , 0. , 0. , 0. , 0. , 0.38, 0. , 0.07, 0. , 0.55]], [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.99]], [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.05, 0. , 0.94]], [[0. , 0. , 0. , 0. , 0. , 0.43, 0. , 0.25, 0. , 0.31]], [[0. , 0. , 0. , 0. , 0. , 0.03, 0. , 0.11, 0. , 0.86]], [[0. , 0. , 0. , 0. , 0. , 0.1 , 0. , 0.1 , 0. , 0.79]], [[0. , 0. , 0. , 0. , 0. , 0.07, 0. , 0.31, 0. , 0.61]], [[0. , 0. , 0. , 0. , 0. , 0.37, 0. , 0.08, 0. , 0.55]], [[0. , 0. , 0. , 0. , 0. , 0.04, 0. , 0. , 0. , 0.96]], [[0. , 0. , 0. , 0. , 0. , 0.04, 0. , 0.49, 0. , 0.47]], [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.15, 0. , 0.84]], [[0. , 0. , 0. , 0. , 0. , 0.03, 0. , 0. , 0. , 0.96]], [[0. , 0. , 0. , 0. , 0. , 0.93, 0.01, 0. , 0. , 0.06]], [[0. , 0. , 0. , 0. , 0. , 0.08, 0. , 0.02, 0. , 0.89]], [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 1. ]], [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.09, 0. , 0.91]], [[0. , 0. , 0. , 0. , 0. , 0.03, 0. , 0.03, 0. , 0.94]], [[0. , 0. , 0. , 0. , 0. , 0.05, 0. , 0.79, 0. , 0.17]], [[0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.06, 0. , 0.91]], [[0. , 0. , 0. , 0. , 0. , 0.23, 0. , 0.33, 0. , 0.44]], [[0. , 0. , 0. , 0. , 0. , 0.05, 0. , 0.05, 0. , 0.9 ]], [[0. , 0. , 0. , 0. , 0. , 0.25, 0. , 0.5 , 0. , 0.25]], [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 1. ]], [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 1. ]], [[0. , 0. , 0. , 0. , 0. , 0.65, 0. , 0.01, 0. , 0.34]], [[0. , 0. , 0. , 0. , 0. , 0.18, 0. , 0.74, 0. , 0.08]], [[0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.08, 0. , 0.9 ]], [[0. , 0. , 0. , 0. , 0. , 0.07, 0. , 0.77, 0. , 0.16]], [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.98]], [[0. , 0. , 0. , 0. , 0. , 0.03, 0. , 0.13, 0. , 0.84]], [[0. , 0. , 0. , 0. , 0. , 0.08, 0. , 0.23, 0. , 0.69]], [[0. , 0. , 0. , 0. , 0. , 0.48, 0. , 0.03, 0. , 0.49]], [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.07, 0. , 0.92]], [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.98]], [[0. , 0. , 0. , 0. , 0. , 0.5 , 0. , 0.29, 0. , 0.21]], [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.99]], [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.02, 0. , 0.97]], [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.99]], [[0. , 0. , 0. , 0. , 0. , 0.12, 0. , 0.08, 0. , 0.8 ]], [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.99]], [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.47, 0. , 0.53]], [[0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.2 , 0. , 0.78]], [[0. , 0. , 0. , 0. , 0. , 0.42, 0. , 0.25, 0. , 0.33]], [[0. , 0. , 0. , 0. , 0. , 0.06, 0. , 0.83, 0. , 0.1 ]], [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 1. ]], [[0. , 0. , 0. , 0. , 0. , 0.19, 0. , 0.04, 0. , 0.77]], [[0. , 0. , 0. , 0. , 0. , 0.11, 0. , 0.01, 0. , 0.88]], [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.98]], [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.15, 0. , 0.85]], [[0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.15, 0. , 0.82]], [[0. , 0. , 0. , 0. , 0. , 0.05, 0. , 0.23, 0. , 0.72]], [[0. , 0. , 0. , 0. , 0. , 0.22, 0. , 0.2 , 0. , 0.57]], [[0. , 0. , 0. , 0. , 0. , 0.13, 0. , 0.02, 0. , 0.84]], [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.15, 0. , 0.84]], [[0. , 0. , 0. , 0. , 0. , 0.04, 0. , 0.01, 0. , 0.95]], [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.12, 0. , 0.87]], [[0. , 0. , 0. , 0. , 0. , 0.36, 0. , 0.55, 0. , 0.09]], [[0. , 0. , 0. , 0. , 0. , 0.41, 0. , 0.43, 0. , 0.16]], [[0. , 0. , 0. , 0. , 0. , 0.14, 0. , 0.47, 0. , 0.39]], [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.99]], [[0. , 0. , 0. , 0. , 0. , 0.09, 0. , 0.54, 0. , 0.37]], [[0. , 0. , 0. , 0. , 0. , 0.07, 0. , 0.52, 0. , 0.41]], [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 1. ]], [[0. , 0. , 0. , 0. , 0. , 0.03, 0. , 0.08, 0. , 0.89]], [[0. , 0. , 0. , 0. , 0. , 0.07, 0. , 0.12, 0. , 0.81]], [[0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.09, 0. , 0.89]], [[0. , 0. , 0. , 0. , 0. , 0.12, 0. , 0.26, 0. , 0.62]], [[0. , 0. , 0. , 0. , 0. , 0.26, 0. , 0.01, 0. , 0.72]], [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.07, 0. , 0.92]], [[0. , 0. , 0. , 0. , 0. , 0.06, 0. , 0.65, 0. , 0.29]], [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.27, 0. , 0.72]], [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.08, 0. , 0.91]], [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.03, 0. , 0.96]], [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.01, 0. , 0.98]], [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 1. ]], [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 1. ]], [[0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.04, 0. , 0.94]], [[0. , 0. , 0. , 0. , 0. , 0.05, 0. , 0.41, 0. , 0.54]], [[0. , 0. , 0. , 0. , 0. , 0.96, 0. , 0.02, 0. , 0.01]], [[0. , 0. , 0. , 0. , 0. , 0.13, 0. , 0.02, 0. , 0.85]], [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.03, 0. , 0.97]], [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.98]], [[0. , 0. , 0. , 0. , 0. , 0.04, 0. , 0.06, 0. , 0.9 ]], [[0. , 0. , 0. , 0. , 0. , 0.04, 0. , 0.08, 0. , 0.87]], [[0. , 0. , 0. , 0. , 0. , 0.24, 0. , 0.56, 0. , 0.2 ]], [[0. , 0. , 0. , 0. , 0. , 0.31, 0. , 0.14, 0. , 0.56]], [[0. , 0. , 0. , 0. , 0. , 0.02, 0. , 0.08, 0. , 0.9 ]], [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0. , 0. , 0.99]], [[0. , 0. , 0. , 0. , 0. , 0.01, 0. , 0.27, 0. , 0.72]], [[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.03, 0. , 0.97]], [[0. , 0. , 0. , 0. , 0. , 0.28, 0. , 0.19, 0. , 0.52]], [[0. , 0. , 0. , 0. , 0. , 0.26, 0. , 0.17, 0. , 0.57]]], dtype=float32)
np.round(y_proba[:1], 2)
array([[0. , 0. , 0. , 0. , 0. , 0.11, 0. , 0.18, 0. , 0.71]], dtype=float32)
y_std = y_probas.std(axis=0)
np.round(y_std[:1], 2)
array([[0. , 0. , 0. , 0. , 0. , 0.18, 0. , 0.22, 0. , 0.29]], dtype=float32)
y_pred = np.argmax(y_proba, axis=1)
accuracy = np.sum(y_pred == y_test) / len(y_test)
accuracy
0.8627
class MCDropout(keras.layers.Dropout):
def call(self, inputs):
return super().call(inputs, training=True)
class MCAlphaDropout(keras.layers.AlphaDropout):
def call(self, inputs):
return super().call(inputs, training=True)
tf.random.set_seed(42)
np.random.seed(42)
mc_model = keras.models.Sequential([
MCAlphaDropout(layer.rate) if isinstance(layer, keras.layers.AlphaDropout) else layer
for layer in model.layers
])
mc_model.summary()
Model: "sequential_21" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= flatten_18 (Flatten) (None, 784) 0 _________________________________________________________________ mc_alpha_dropout (MCAlphaDro (None, 784) 0 _________________________________________________________________ dense_263 (Dense) (None, 300) 235500 _________________________________________________________________ mc_alpha_dropout_1 (MCAlphaD (None, 300) 0 _________________________________________________________________ dense_264 (Dense) (None, 100) 30100 _________________________________________________________________ mc_alpha_dropout_2 (MCAlphaD (None, 100) 0 _________________________________________________________________ dense_265 (Dense) (None, 10) 1010 ================================================================= Total params: 266,610 Trainable params: 266,610 Non-trainable params: 0 _________________________________________________________________
optimizer = keras.optimizers.SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
mc_model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])
mc_model.set_weights(model.get_weights())
이제 MC 드롭아웃을 모델에 사용할 수 있습니다:
np.round(np.mean([mc_model.predict(X_test_scaled[:1]) for sample in range(100)], axis=0), 2)
array([[0. , 0. , 0. , 0. , 0. , 0.16, 0. , 0.22, 0. , 0.62]], dtype=float32)
layer = keras.layers.Dense(100, activation="selu", kernel_initializer="lecun_normal",
kernel_constraint=keras.constraints.max_norm(1.))
MaxNormDense = partial(keras.layers.Dense,
activation="selu", kernel_initializer="lecun_normal",
kernel_constraint=keras.constraints.max_norm(1.))
model = keras.models.Sequential([
keras.layers.Flatten(input_shape=[28, 28]),
MaxNormDense(300),
MaxNormDense(100),
keras.layers.Dense(10, activation="softmax")
])
model.compile(loss="sparse_categorical_crossentropy", optimizer="nadam", metrics=["accuracy"])
n_epochs = 2
history = model.fit(X_train_scaled, y_train, epochs=n_epochs,
validation_data=(X_valid_scaled, y_valid))
Epoch 1/2 1719/1719 [==============================] - 6s 3ms/step - loss: 0.4747 - accuracy: 0.8329 - val_loss: 0.3831 - val_accuracy: 0.8564 Epoch 2/2 1719/1719 [==============================] - 5s 3ms/step - loss: 0.3536 - accuracy: 0.8716 - val_loss: 0.3676 - val_accuracy: 0.8670
부록 A 참조.
문제: 100개의 뉴런을 가진 은닉층 20개로 심층 신경망을 만들어보세요(너무 많은 것 같지만 이 연습문제의 핵심입니다). He 초기화와 ELU 활성화 함수를 사용하세요.
keras.backend.clear_session()
tf.random.set_seed(42)
np.random.seed(42)
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))
for _ in range(20):
model.add(keras.layers.Dense(100,
activation="elu",
kernel_initializer="he_normal"))
문제: Nadam 옵티마이저와 조기 종료를 사용하여 CIFAR10 데이터셋에 이 네트워크를 훈련하세요. keras.datasets.cifar10.load_ data()
를 사용하여 데이터를 적재할 수 있습니다. 이 데이터셋은 10개의 클래스와 32×32 크기의 컬러 이미지 60,000개로 구성됩니다(50,000개는 훈련, 10,000개는 테스트). 따라서 10개의 뉴런과 소프트맥스 활성화 함수를 사용하는 출력층이 필요합니다. 모델 구조와 하이퍼파라미터를 바꿀 때마다 적절한 학습률을 찾아야 한다는 것을 기억하세요.
모델에 출력층을 추가합니다:
model.add(keras.layers.Dense(10, activation="softmax"))
학습률 5e-5인 Nadam 옵티마이저를 사용해 보죠. 학습률 1e-5, 3e-5, 1e-4, 3e-4, 1e-3, 3e-3, 1e-2를 테스트하고 10번의 에포크 동안 (아래 텐서보드 콜백으로) 학습 곡선을 비교해 보았습니다. 학습률 3e-5와 1e-4가 꽤 좋았기 때문에 5e-5를 시도해 보았고 조금 더 나은 결과를 냈습니다.
optimizer = keras.optimizers.Nadam(learning_rate=5e-5)
model.compile(loss="sparse_categorical_crossentropy",
optimizer=optimizer,
metrics=["accuracy"])
CIFAR10 데이터셋을 로드하죠. 조기 종료를 사용하기 때문에 검증 세트가 필요합니다. 원본 훈련 세트에서 처음 5,000개를 검증 세트로 사용하겠습니다:
(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.cifar10.load_data()
X_train = X_train_full[5000:]
y_train = y_train_full[5000:]
X_valid = X_train_full[:5000]
y_valid = y_train_full[:5000]
이제 콜백을 만들고 모델을 훈련합니다:
early_stopping_cb = keras.callbacks.EarlyStopping(patience=20)
model_checkpoint_cb = keras.callbacks.ModelCheckpoint("my_cifar10_model.h5", save_best_only=True)
run_index = 1 # 모델을 훈련할 때마다 증가시킴
run_logdir = os.path.join(os.curdir, "my_cifar10_logs", "run_{:03d}".format(run_index))
tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)
callbacks = [early_stopping_cb, model_checkpoint_cb, tensorboard_cb]
2021-10-10 02:00:32.263585: I tensorflow/core/profiler/lib/profiler_session.cc:131] Profiler session initializing. 2021-10-10 02:00:32.263638: I tensorflow/core/profiler/lib/profiler_session.cc:146] Profiler session started. 2021-10-10 02:00:32.263724: I tensorflow/core/profiler/lib/profiler_session.cc:164] Profiler session tear down.
코랩에서 아래 셀에 있는 %tensorboard
명령을 실행하면 다음과 같은 텐서보드 화면을 볼 수 있습니다.
%tensorboard --logdir=./my_cifar10_logs --port=6006
model.fit(X_train, y_train, epochs=100,
validation_data=(X_valid, y_valid),
callbacks=callbacks)
Epoch 1/100 22/1407 [..............................] - ETA: 16s - loss: 55.1513 - accuracy: 0.1207
2021-10-10 02:00:37.792046: I tensorflow/core/profiler/lib/profiler_session.cc:131] Profiler session initializing. 2021-10-10 02:00:37.792093: I tensorflow/core/profiler/lib/profiler_session.cc:146] Profiler session started. 2021-10-10 02:00:37.801048: I tensorflow/core/profiler/lib/profiler_session.cc:66] Profiler session collecting data. 2021-10-10 02:00:37.806955: I tensorflow/core/profiler/lib/profiler_session.cc:164] Profiler session tear down. 2021-10-10 02:00:37.817439: I tensorflow/core/profiler/rpc/client/save_profile.cc:136] Creating directory: ./my_cifar10_logs/run_001/train/plugins/profile/2021_10_10_02_00_37 2021-10-10 02:00:37.823185: I tensorflow/core/profiler/rpc/client/save_profile.cc:142] Dumped gzipped tool data for trace.json.gz to ./my_cifar10_logs/run_001/train/plugins/profile/2021_10_10_02_00_37/instance-1.trace.json.gz 2021-10-10 02:00:37.833949: I tensorflow/core/profiler/rpc/client/save_profile.cc:136] Creating directory: ./my_cifar10_logs/run_001/train/plugins/profile/2021_10_10_02_00_37 2021-10-10 02:00:37.834168: I tensorflow/core/profiler/rpc/client/save_profile.cc:142] Dumped gzipped tool data for memory_profile.json.gz to ./my_cifar10_logs/run_001/train/plugins/profile/2021_10_10_02_00_37/instance-1.memory_profile.json.gz 2021-10-10 02:00:37.834914: I tensorflow/core/profiler/rpc/client/capture_profile.cc:251] Creating directory: ./my_cifar10_logs/run_001/train/plugins/profile/2021_10_10_02_00_37 Dumped tool data for xplane.pb to ./my_cifar10_logs/run_001/train/plugins/profile/2021_10_10_02_00_37/instance-1.xplane.pb Dumped tool data for overview_page.pb to ./my_cifar10_logs/run_001/train/plugins/profile/2021_10_10_02_00_37/instance-1.overview_page.pb Dumped tool data for input_pipeline.pb to ./my_cifar10_logs/run_001/train/plugins/profile/2021_10_10_02_00_37/instance-1.input_pipeline.pb Dumped tool data for tensorflow_stats.pb to ./my_cifar10_logs/run_001/train/plugins/profile/2021_10_10_02_00_37/instance-1.tensorflow_stats.pb Dumped tool data for kernel_stats.pb to ./my_cifar10_logs/run_001/train/plugins/profile/2021_10_10_02_00_37/instance-1.kernel_stats.pb
1407/1407 [==============================] - 12s 6ms/step - loss: 4.2185 - accuracy: 0.1574 - val_loss: 2.1635 - val_accuracy: 0.2170 Epoch 2/100 1407/1407 [==============================] - 9s 6ms/step - loss: 2.0720 - accuracy: 0.2463 - val_loss: 2.0470 - val_accuracy: 0.2470 Epoch 3/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.9429 - accuracy: 0.2905 - val_loss: 1.9534 - val_accuracy: 0.2886 Epoch 4/100 1407/1407 [==============================] - 8s 6ms/step - loss: 1.8597 - accuracy: 0.3232 - val_loss: 1.8771 - val_accuracy: 0.3340 Epoch 5/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.8019 - accuracy: 0.3426 - val_loss: 1.8094 - val_accuracy: 0.3466 Epoch 6/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.7501 - accuracy: 0.3665 - val_loss: 1.7618 - val_accuracy: 0.3708 Epoch 7/100 1407/1407 [==============================] - 8s 6ms/step - loss: 1.7086 - accuracy: 0.3802 - val_loss: 1.7529 - val_accuracy: 0.3648 Epoch 8/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.6742 - accuracy: 0.3963 - val_loss: 1.6654 - val_accuracy: 0.3974 Epoch 9/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.6430 - accuracy: 0.4063 - val_loss: 1.6337 - val_accuracy: 0.4082 Epoch 10/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.6197 - accuracy: 0.4140 - val_loss: 1.6689 - val_accuracy: 0.4024 Epoch 11/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.5987 - accuracy: 0.4226 - val_loss: 1.6639 - val_accuracy: 0.4046 Epoch 12/100 1407/1407 [==============================] - 8s 6ms/step - loss: 1.5758 - accuracy: 0.4297 - val_loss: 1.6391 - val_accuracy: 0.4022 Epoch 13/100 1407/1407 [==============================] - 8s 6ms/step - loss: 1.5559 - accuracy: 0.4358 - val_loss: 1.6196 - val_accuracy: 0.4108 Epoch 14/100 1407/1407 [==============================] - 8s 6ms/step - loss: 1.5429 - accuracy: 0.4430 - val_loss: 1.6304 - val_accuracy: 0.4172 Epoch 15/100 1407/1407 [==============================] - 8s 6ms/step - loss: 1.5268 - accuracy: 0.4497 - val_loss: 1.5864 - val_accuracy: 0.4298 Epoch 16/100 1407/1407 [==============================] - 8s 6ms/step - loss: 1.5095 - accuracy: 0.4554 - val_loss: 1.5616 - val_accuracy: 0.4438 Epoch 17/100 1407/1407 [==============================] - 8s 6ms/step - loss: 1.4956 - accuracy: 0.4614 - val_loss: 1.5776 - val_accuracy: 0.4400 Epoch 18/100 1407/1407 [==============================] - 8s 6ms/step - loss: 1.4803 - accuracy: 0.4684 - val_loss: 1.6018 - val_accuracy: 0.4248 Epoch 19/100 1407/1407 [==============================] - 8s 6ms/step - loss: 1.4687 - accuracy: 0.4696 - val_loss: 1.5597 - val_accuracy: 0.4402 Epoch 20/100 1407/1407 [==============================] - 8s 6ms/step - loss: 1.4562 - accuracy: 0.4734 - val_loss: 1.5343 - val_accuracy: 0.4492 Epoch 21/100 1407/1407 [==============================] - 8s 6ms/step - loss: 1.4422 - accuracy: 0.4806 - val_loss: 1.5665 - val_accuracy: 0.4384 Epoch 22/100 1407/1407 [==============================] - 8s 6ms/step - loss: 1.4297 - accuracy: 0.4844 - val_loss: 1.5450 - val_accuracy: 0.4450 Epoch 23/100 1407/1407 [==============================] - 8s 6ms/step - loss: 1.4207 - accuracy: 0.4896 - val_loss: 1.5538 - val_accuracy: 0.4478 Epoch 24/100 1407/1407 [==============================] - 8s 6ms/step - loss: 1.4067 - accuracy: 0.4929 - val_loss: 1.5521 - val_accuracy: 0.4400 Epoch 25/100 1407/1407 [==============================] - 8s 6ms/step - loss: 1.4008 - accuracy: 0.4956 - val_loss: 1.5262 - val_accuracy: 0.4514 Epoch 26/100 1407/1407 [==============================] - 8s 6ms/step - loss: 1.3853 - accuracy: 0.4983 - val_loss: 1.5717 - val_accuracy: 0.4388 Epoch 27/100 1407/1407 [==============================] - 8s 6ms/step - loss: 1.3736 - accuracy: 0.5059 - val_loss: 1.5212 - val_accuracy: 0.4598 Epoch 28/100 1407/1407 [==============================] - 8s 6ms/step - loss: 1.3686 - accuracy: 0.5076 - val_loss: 1.5759 - val_accuracy: 0.4458 Epoch 29/100 1407/1407 [==============================] - 8s 6ms/step - loss: 1.3565 - accuracy: 0.5109 - val_loss: 1.4968 - val_accuracy: 0.4686 Epoch 30/100 1407/1407 [==============================] - 8s 6ms/step - loss: 1.3459 - accuracy: 0.5141 - val_loss: 1.5707 - val_accuracy: 0.4494 Epoch 31/100 1407/1407 [==============================] - 8s 6ms/step - loss: 1.3368 - accuracy: 0.5203 - val_loss: 1.5303 - val_accuracy: 0.4542 Epoch 32/100 1407/1407 [==============================] - 8s 6ms/step - loss: 1.3293 - accuracy: 0.5209 - val_loss: 1.5137 - val_accuracy: 0.4608 Epoch 33/100 1407/1407 [==============================] - 8s 6ms/step - loss: 1.3219 - accuracy: 0.5219 - val_loss: 1.5409 - val_accuracy: 0.4592 Epoch 34/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.3153 - accuracy: 0.5272 - val_loss: 1.5710 - val_accuracy: 0.4514 Epoch 35/100 1407/1407 [==============================] - 8s 6ms/step - loss: 1.3043 - accuracy: 0.5298 - val_loss: 1.5353 - val_accuracy: 0.4576 Epoch 36/100 1407/1407 [==============================] - 8s 6ms/step - loss: 1.2980 - accuracy: 0.5328 - val_loss: 1.5232 - val_accuracy: 0.4652 Epoch 37/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.2887 - accuracy: 0.5342 - val_loss: 1.5187 - val_accuracy: 0.4662 Epoch 38/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.2783 - accuracy: 0.5399 - val_loss: 1.5217 - val_accuracy: 0.4688 Epoch 39/100 1407/1407 [==============================] - 8s 6ms/step - loss: 1.2728 - accuracy: 0.5413 - val_loss: 1.5294 - val_accuracy: 0.4650 Epoch 40/100 1407/1407 [==============================] - 8s 6ms/step - loss: 1.2647 - accuracy: 0.5430 - val_loss: 1.5035 - val_accuracy: 0.4694 Epoch 41/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.2586 - accuracy: 0.5484 - val_loss: 1.5070 - val_accuracy: 0.4670 Epoch 42/100 1407/1407 [==============================] - 8s 6ms/step - loss: 1.2520 - accuracy: 0.5488 - val_loss: 1.5238 - val_accuracy: 0.4670 Epoch 43/100 1407/1407 [==============================] - 8s 6ms/step - loss: 1.2403 - accuracy: 0.5514 - val_loss: 1.5230 - val_accuracy: 0.4648 Epoch 44/100 1407/1407 [==============================] - 8s 6ms/step - loss: 1.2356 - accuracy: 0.5552 - val_loss: 1.5581 - val_accuracy: 0.4540 Epoch 45/100 1407/1407 [==============================] - 8s 6ms/step - loss: 1.2231 - accuracy: 0.5590 - val_loss: 1.5044 - val_accuracy: 0.4722 Epoch 46/100 1407/1407 [==============================] - 8s 6ms/step - loss: 1.2191 - accuracy: 0.5606 - val_loss: 1.5136 - val_accuracy: 0.4720 Epoch 47/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.2086 - accuracy: 0.5625 - val_loss: 1.5134 - val_accuracy: 0.4690 Epoch 48/100 1407/1407 [==============================] - 8s 6ms/step - loss: 1.2042 - accuracy: 0.5661 - val_loss: 1.5057 - val_accuracy: 0.4742 Epoch 49/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.1936 - accuracy: 0.5680 - val_loss: 1.5303 - val_accuracy: 0.4734
<keras.callbacks.History at 0x7f97b20c00b8>
model = keras.models.load_model("my_cifar10_model.h5")
model.evaluate(X_valid, y_valid)
157/157 [==============================] - 1s 2ms/step - loss: 1.4968 - accuracy: 0.4686
[1.4967584609985352, 0.46860000491142273]
가장 낮은 검증 손실을 내는 모델은 검증 세트에서 약 47.6% 정확도를 얻었습니다. 이 검증 점수에 도달하는데 27번의 에포크가 걸렸습니다. (GPU가 없는) 제 노트북에서 에포크당 약 8초 정도 걸렸습니다. 배치 정규화를 사용해 성능을 올릴 수 있는지 확인해 보죠.
문제: 배치 정규화를 추가하고 학습 곡선을 비교해보세요. 이전보다 빠르게 수렴하나요? 더 좋은 모델이 만들어지나요? 훈련 속도에는 어떤 영향을 미치나요?
다음 코드는 위의 코드와 배우 비슷합니다. 몇 가지 다른 점은 아래와 같습니다:
Dense
층 다음에 (활성화 함수 전에) BN 층을 추가했습니다. 처음 은닉층 전에도 BN 층을 추가했습니다.keras.backend.clear_session()
tf.random.set_seed(42)
np.random.seed(42)
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))
model.add(keras.layers.BatchNormalization())
for _ in range(20):
model.add(keras.layers.Dense(100, kernel_initializer="he_normal"))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Activation("elu"))
model.add(keras.layers.Dense(10, activation="softmax"))
optimizer = keras.optimizers.Nadam(learning_rate=5e-4)
model.compile(loss="sparse_categorical_crossentropy",
optimizer=optimizer,
metrics=["accuracy"])
early_stopping_cb = keras.callbacks.EarlyStopping(patience=20)
model_checkpoint_cb = keras.callbacks.ModelCheckpoint("my_cifar10_bn_model.h5", save_best_only=True)
run_index = 1 # 모델을 훈련할 때마다 증가시킴
run_logdir = os.path.join(os.curdir, "my_cifar10_logs", "run_bn_{:03d}".format(run_index))
tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)
callbacks = [early_stopping_cb, model_checkpoint_cb, tensorboard_cb]
model.fit(X_train, y_train, epochs=100,
validation_data=(X_valid, y_valid),
callbacks=callbacks)
model = keras.models.load_model("my_cifar10_bn_model.h5")
model.evaluate(X_valid, y_valid)
2021-10-10 02:07:32.842926: I tensorflow/core/profiler/lib/profiler_session.cc:131] Profiler session initializing. 2021-10-10 02:07:32.843104: I tensorflow/core/profiler/lib/profiler_session.cc:146] Profiler session started. 2021-10-10 02:07:32.843403: I tensorflow/core/profiler/lib/profiler_session.cc:164] Profiler session tear down.
Epoch 1/100 2/1407 [..............................] - ETA: 5:02 - loss: 2.8693 - accuracy: 0.1094
2021-10-10 02:07:40.561943: I tensorflow/core/profiler/lib/profiler_session.cc:131] Profiler session initializing. 2021-10-10 02:07:40.562214: I tensorflow/core/profiler/lib/profiler_session.cc:146] Profiler session started.
8/1407 [..............................] - ETA: 2:55 - loss: 2.7177 - accuracy: 0.1016
2021-10-10 02:07:41.055928: I tensorflow/core/profiler/lib/profiler_session.cc:66] Profiler session collecting data. 2021-10-10 02:07:41.078270: I tensorflow/core/profiler/lib/profiler_session.cc:164] Profiler session tear down. 2021-10-10 02:07:41.108257: I tensorflow/core/profiler/rpc/client/save_profile.cc:136] Creating directory: ./my_cifar10_logs/run_bn_001/train/plugins/profile/2021_10_10_02_07_41 2021-10-10 02:07:41.122373: I tensorflow/core/profiler/rpc/client/save_profile.cc:142] Dumped gzipped tool data for trace.json.gz to ./my_cifar10_logs/run_bn_001/train/plugins/profile/2021_10_10_02_07_41/instance-1.trace.json.gz 2021-10-10 02:07:41.155742: I tensorflow/core/profiler/rpc/client/save_profile.cc:136] Creating directory: ./my_cifar10_logs/run_bn_001/train/plugins/profile/2021_10_10_02_07_41 2021-10-10 02:07:41.156074: I tensorflow/core/profiler/rpc/client/save_profile.cc:142] Dumped gzipped tool data for memory_profile.json.gz to ./my_cifar10_logs/run_bn_001/train/plugins/profile/2021_10_10_02_07_41/instance-1.memory_profile.json.gz 2021-10-10 02:07:41.158809: I tensorflow/core/profiler/rpc/client/capture_profile.cc:251] Creating directory: ./my_cifar10_logs/run_bn_001/train/plugins/profile/2021_10_10_02_07_41 Dumped tool data for xplane.pb to ./my_cifar10_logs/run_bn_001/train/plugins/profile/2021_10_10_02_07_41/instance-1.xplane.pb Dumped tool data for overview_page.pb to ./my_cifar10_logs/run_bn_001/train/plugins/profile/2021_10_10_02_07_41/instance-1.overview_page.pb Dumped tool data for input_pipeline.pb to ./my_cifar10_logs/run_bn_001/train/plugins/profile/2021_10_10_02_07_41/instance-1.input_pipeline.pb Dumped tool data for tensorflow_stats.pb to ./my_cifar10_logs/run_bn_001/train/plugins/profile/2021_10_10_02_07_41/instance-1.tensorflow_stats.pb Dumped tool data for kernel_stats.pb to ./my_cifar10_logs/run_bn_001/train/plugins/profile/2021_10_10_02_07_41/instance-1.kernel_stats.pb
1407/1407 [==============================] - 25s 13ms/step - loss: 1.8375 - accuracy: 0.3419 - val_loss: 1.6449 - val_accuracy: 0.4120 Epoch 2/100 1407/1407 [==============================] - 17s 12ms/step - loss: 1.6605 - accuracy: 0.4096 - val_loss: 1.6076 - val_accuracy: 0.4172 Epoch 3/100 1407/1407 [==============================] - 16s 12ms/step - loss: 1.5923 - accuracy: 0.4328 - val_loss: 1.5143 - val_accuracy: 0.4638 Epoch 4/100 1407/1407 [==============================] - 16s 12ms/step - loss: 1.5420 - accuracy: 0.4536 - val_loss: 1.5096 - val_accuracy: 0.4654 Epoch 5/100 1407/1407 [==============================] - 16s 11ms/step - loss: 1.4995 - accuracy: 0.4678 - val_loss: 1.4309 - val_accuracy: 0.4936 Epoch 6/100 1407/1407 [==============================] - 16s 11ms/step - loss: 1.4651 - accuracy: 0.4808 - val_loss: 1.4100 - val_accuracy: 0.4954 Epoch 7/100 1407/1407 [==============================] - 16s 12ms/step - loss: 1.4308 - accuracy: 0.4934 - val_loss: 1.4097 - val_accuracy: 0.4982 Epoch 8/100 1407/1407 [==============================] - 16s 11ms/step - loss: 1.4024 - accuracy: 0.5018 - val_loss: 1.3888 - val_accuracy: 0.5028 Epoch 9/100 1407/1407 [==============================] - 16s 11ms/step - loss: 1.3789 - accuracy: 0.5106 - val_loss: 1.3670 - val_accuracy: 0.5172 Epoch 10/100 1407/1407 [==============================] - 16s 12ms/step - loss: 1.3578 - accuracy: 0.5190 - val_loss: 1.3578 - val_accuracy: 0.5166 Epoch 11/100 1407/1407 [==============================] - 17s 12ms/step - loss: 1.3384 - accuracy: 0.5264 - val_loss: 1.3728 - val_accuracy: 0.5106 Epoch 12/100 1407/1407 [==============================] - 16s 12ms/step - loss: 1.3142 - accuracy: 0.5364 - val_loss: 1.3836 - val_accuracy: 0.5076 Epoch 13/100 1407/1407 [==============================] - 16s 12ms/step - loss: 1.2976 - accuracy: 0.5418 - val_loss: 1.3877 - val_accuracy: 0.5080 Epoch 14/100 1407/1407 [==============================] - 17s 12ms/step - loss: 1.2772 - accuracy: 0.5473 - val_loss: 1.3546 - val_accuracy: 0.5262 Epoch 15/100 1407/1407 [==============================] - 16s 12ms/step - loss: 1.2586 - accuracy: 0.5564 - val_loss: 1.3646 - val_accuracy: 0.5232 Epoch 16/100 1407/1407 [==============================] - 17s 12ms/step - loss: 1.2498 - accuracy: 0.5575 - val_loss: 1.3733 - val_accuracy: 0.5278 Epoch 17/100 1407/1407 [==============================] - 16s 12ms/step - loss: 1.2277 - accuracy: 0.5647 - val_loss: 1.3282 - val_accuracy: 0.5286 Epoch 18/100 1407/1407 [==============================] - 16s 12ms/step - loss: 1.2111 - accuracy: 0.5727 - val_loss: 1.3356 - val_accuracy: 0.5336 Epoch 19/100 1407/1407 [==============================] - 16s 12ms/step - loss: 1.1970 - accuracy: 0.5799 - val_loss: 1.3403 - val_accuracy: 0.5324 Epoch 20/100 1407/1407 [==============================] - 16s 12ms/step - loss: 1.1867 - accuracy: 0.5828 - val_loss: 1.3695 - val_accuracy: 0.5220 Epoch 21/100 1407/1407 [==============================] - 16s 12ms/step - loss: 1.1739 - accuracy: 0.5862 - val_loss: 1.3694 - val_accuracy: 0.5206 Epoch 22/100 1407/1407 [==============================] - 17s 12ms/step - loss: 1.1538 - accuracy: 0.5933 - val_loss: 1.3414 - val_accuracy: 0.5270 Epoch 23/100 1407/1407 [==============================] - 16s 12ms/step - loss: 1.1470 - accuracy: 0.5964 - val_loss: 1.3346 - val_accuracy: 0.5382 Epoch 24/100 1407/1407 [==============================] - 16s 12ms/step - loss: 1.1348 - accuracy: 0.6004 - val_loss: 1.3432 - val_accuracy: 0.5392 Epoch 25/100 1407/1407 [==============================] - 16s 12ms/step - loss: 1.1244 - accuracy: 0.6039 - val_loss: 1.3435 - val_accuracy: 0.5370 Epoch 26/100 1407/1407 [==============================] - 17s 12ms/step - loss: 1.1108 - accuracy: 0.6087 - val_loss: 1.3529 - val_accuracy: 0.5326 Epoch 27/100 1407/1407 [==============================] - 17s 12ms/step - loss: 1.0964 - accuracy: 0.6130 - val_loss: 1.3500 - val_accuracy: 0.5292 Epoch 28/100 1407/1407 [==============================] - 17s 12ms/step - loss: 1.0934 - accuracy: 0.6170 - val_loss: 1.3525 - val_accuracy: 0.5360 Epoch 29/100 1407/1407 [==============================] - 17s 12ms/step - loss: 1.0825 - accuracy: 0.6184 - val_loss: 1.3644 - val_accuracy: 0.5272 Epoch 30/100 1407/1407 [==============================] - 17s 12ms/step - loss: 1.0681 - accuracy: 0.6236 - val_loss: 1.3699 - val_accuracy: 0.5306 Epoch 31/100 1407/1407 [==============================] - 17s 12ms/step - loss: 1.0545 - accuracy: 0.6249 - val_loss: 1.3717 - val_accuracy: 0.5376 Epoch 32/100 1407/1407 [==============================] - 17s 12ms/step - loss: 1.0445 - accuracy: 0.6323 - val_loss: 1.3760 - val_accuracy: 0.5412 Epoch 33/100 1407/1407 [==============================] - 16s 12ms/step - loss: 1.0319 - accuracy: 0.6372 - val_loss: 1.3725 - val_accuracy: 0.5388 Epoch 34/100 1407/1407 [==============================] - 17s 12ms/step - loss: 1.0282 - accuracy: 0.6355 - val_loss: 1.3553 - val_accuracy: 0.5488 Epoch 35/100 1407/1407 [==============================] - 17s 12ms/step - loss: 1.0174 - accuracy: 0.6413 - val_loss: 1.4069 - val_accuracy: 0.5312 Epoch 36/100 1407/1407 [==============================] - 17s 12ms/step - loss: 1.0103 - accuracy: 0.6444 - val_loss: 1.3772 - val_accuracy: 0.5446 Epoch 37/100 1407/1407 [==============================] - 16s 12ms/step - loss: 0.9954 - accuracy: 0.6481 - val_loss: 1.3570 - val_accuracy: 0.5406 157/157 [==============================] - 1s 3ms/step - loss: 1.3282 - accuracy: 0.5286
[1.328158974647522, 0.5285999774932861]
문제: 배치 정규화를 SELU로 바꾸어보세요. 네트워크가 자기 정규화하기 위해 필요한 변경 사항을 적용해보세요(즉, 입력 특성 표준화, 르쿤 정규분포 초기화, 완전 연결 층만 순차적으로 쌓은 심층 신경망 등).
keras.backend.clear_session()
tf.random.set_seed(42)
np.random.seed(42)
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))
for _ in range(20):
model.add(keras.layers.Dense(100,
kernel_initializer="lecun_normal",
activation="selu"))
model.add(keras.layers.Dense(10, activation="softmax"))
optimizer = keras.optimizers.Nadam(learning_rate=7e-4)
model.compile(loss="sparse_categorical_crossentropy",
optimizer=optimizer,
metrics=["accuracy"])
early_stopping_cb = keras.callbacks.EarlyStopping(patience=20)
model_checkpoint_cb = keras.callbacks.ModelCheckpoint("my_cifar10_selu_model.h5", save_best_only=True)
run_index = 1 # 모델을 훈련할 때마다 증가시킴
run_logdir = os.path.join(os.curdir, "my_cifar10_logs", "run_selu_{:03d}".format(run_index))
tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)
callbacks = [early_stopping_cb, model_checkpoint_cb, tensorboard_cb]
X_means = X_train.mean(axis=0)
X_stds = X_train.std(axis=0)
X_train_scaled = (X_train - X_means) / X_stds
X_valid_scaled = (X_valid - X_means) / X_stds
X_test_scaled = (X_test - X_means) / X_stds
model.fit(X_train_scaled, y_train, epochs=100,
validation_data=(X_valid_scaled, y_valid),
callbacks=callbacks)
model = keras.models.load_model("my_cifar10_selu_model.h5")
model.evaluate(X_valid_scaled, y_valid)
2021-10-10 02:17:56.621633: I tensorflow/core/profiler/lib/profiler_session.cc:131] Profiler session initializing. 2021-10-10 02:17:56.621804: I tensorflow/core/profiler/lib/profiler_session.cc:146] Profiler session started. 2021-10-10 02:17:56.622160: I tensorflow/core/profiler/lib/profiler_session.cc:164] Profiler session tear down.
Epoch 1/100 2/1407 [..............................] - ETA: 1:47 - loss: 3.0440 - accuracy: 0.1094
2021-10-10 02:18:02.441434: I tensorflow/core/profiler/lib/profiler_session.cc:131] Profiler session initializing. 2021-10-10 02:18:02.441637: I tensorflow/core/profiler/lib/profiler_session.cc:146] Profiler session started.
28/1407 [..............................] - ETA: 28s - loss: 2.4112 - accuracy: 0.1864
2021-10-10 02:18:02.724666: I tensorflow/core/profiler/lib/profiler_session.cc:66] Profiler session collecting data. 2021-10-10 02:18:02.732012: I tensorflow/core/profiler/lib/profiler_session.cc:164] Profiler session tear down. 2021-10-10 02:18:02.743009: I tensorflow/core/profiler/rpc/client/save_profile.cc:136] Creating directory: ./my_cifar10_logs/run_selu_001/train/plugins/profile/2021_10_10_02_18_02 2021-10-10 02:18:02.748681: I tensorflow/core/profiler/rpc/client/save_profile.cc:142] Dumped gzipped tool data for trace.json.gz to ./my_cifar10_logs/run_selu_001/train/plugins/profile/2021_10_10_02_18_02/instance-1.trace.json.gz 2021-10-10 02:18:02.759983: I tensorflow/core/profiler/rpc/client/save_profile.cc:136] Creating directory: ./my_cifar10_logs/run_selu_001/train/plugins/profile/2021_10_10_02_18_02 2021-10-10 02:18:02.760264: I tensorflow/core/profiler/rpc/client/save_profile.cc:142] Dumped gzipped tool data for memory_profile.json.gz to ./my_cifar10_logs/run_selu_001/train/plugins/profile/2021_10_10_02_18_02/instance-1.memory_profile.json.gz 2021-10-10 02:18:02.761347: I tensorflow/core/profiler/rpc/client/capture_profile.cc:251] Creating directory: ./my_cifar10_logs/run_selu_001/train/plugins/profile/2021_10_10_02_18_02 Dumped tool data for xplane.pb to ./my_cifar10_logs/run_selu_001/train/plugins/profile/2021_10_10_02_18_02/instance-1.xplane.pb Dumped tool data for overview_page.pb to ./my_cifar10_logs/run_selu_001/train/plugins/profile/2021_10_10_02_18_02/instance-1.overview_page.pb Dumped tool data for input_pipeline.pb to ./my_cifar10_logs/run_selu_001/train/plugins/profile/2021_10_10_02_18_02/instance-1.input_pipeline.pb Dumped tool data for tensorflow_stats.pb to ./my_cifar10_logs/run_selu_001/train/plugins/profile/2021_10_10_02_18_02/instance-1.tensorflow_stats.pb Dumped tool data for kernel_stats.pb to ./my_cifar10_logs/run_selu_001/train/plugins/profile/2021_10_10_02_18_02/instance-1.kernel_stats.pb
1407/1407 [==============================] - 11s 6ms/step - loss: 1.9366 - accuracy: 0.3096 - val_loss: 1.8654 - val_accuracy: 0.3362 Epoch 2/100 1407/1407 [==============================] - 8s 6ms/step - loss: 1.7306 - accuracy: 0.3857 - val_loss: 1.8635 - val_accuracy: 0.3384 Epoch 3/100 1407/1407 [==============================] - 8s 6ms/step - loss: 1.6275 - accuracy: 0.4276 - val_loss: 1.6944 - val_accuracy: 0.3836 Epoch 4/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.5621 - accuracy: 0.4501 - val_loss: 1.6325 - val_accuracy: 0.4224 Epoch 5/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.5064 - accuracy: 0.4725 - val_loss: 1.6295 - val_accuracy: 0.4146 Epoch 6/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.4631 - accuracy: 0.4894 - val_loss: 1.5299 - val_accuracy: 0.4708 Epoch 7/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.4156 - accuracy: 0.5050 - val_loss: 1.5704 - val_accuracy: 0.4500 Epoch 8/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.3811 - accuracy: 0.5176 - val_loss: 1.4958 - val_accuracy: 0.4738 Epoch 9/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.3505 - accuracy: 0.5323 - val_loss: 1.5240 - val_accuracy: 0.4626 Epoch 10/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.3216 - accuracy: 0.5419 - val_loss: 1.5021 - val_accuracy: 0.4892 Epoch 11/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.2884 - accuracy: 0.5514 - val_loss: 1.5091 - val_accuracy: 0.4750 Epoch 12/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.2561 - accuracy: 0.5650 - val_loss: 1.4831 - val_accuracy: 0.4900 Epoch 13/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.2300 - accuracy: 0.5751 - val_loss: 1.5019 - val_accuracy: 0.4966 Epoch 14/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.2116 - accuracy: 0.5787 - val_loss: 1.5095 - val_accuracy: 0.4994 Epoch 15/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.1869 - accuracy: 0.5916 - val_loss: 1.5340 - val_accuracy: 0.4886 Epoch 16/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.1557 - accuracy: 0.6028 - val_loss: 1.5245 - val_accuracy: 0.5026 Epoch 17/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.1434 - accuracy: 0.6089 - val_loss: 1.4797 - val_accuracy: 0.5054 Epoch 18/100 1407/1407 [==============================] - 8s 6ms/step - loss: 1.1211 - accuracy: 0.6151 - val_loss: 1.4863 - val_accuracy: 0.4960 Epoch 19/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.1024 - accuracy: 0.6194 - val_loss: 1.5406 - val_accuracy: 0.5066 Epoch 20/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.0803 - accuracy: 0.6310 - val_loss: 1.5287 - val_accuracy: 0.5106 Epoch 21/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.0606 - accuracy: 0.6384 - val_loss: 1.5305 - val_accuracy: 0.5068 Epoch 22/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.0349 - accuracy: 0.6486 - val_loss: 1.5436 - val_accuracy: 0.4980 Epoch 23/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.0217 - accuracy: 0.6529 - val_loss: 1.5507 - val_accuracy: 0.4948 Epoch 24/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.0082 - accuracy: 0.6585 - val_loss: 1.5921 - val_accuracy: 0.5016 Epoch 25/100 1407/1407 [==============================] - 9s 6ms/step - loss: 0.9880 - accuracy: 0.6668 - val_loss: 1.5627 - val_accuracy: 0.5180 Epoch 26/100 1407/1407 [==============================] - 9s 6ms/step - loss: 0.9745 - accuracy: 0.6697 - val_loss: 1.5463 - val_accuracy: 0.5080 Epoch 27/100 1407/1407 [==============================] - 9s 6ms/step - loss: 0.9713 - accuracy: 0.6698 - val_loss: 1.5078 - val_accuracy: 0.5074 Epoch 28/100 1407/1407 [==============================] - 9s 6ms/step - loss: 0.9500 - accuracy: 0.6792 - val_loss: 1.5613 - val_accuracy: 0.5008 Epoch 29/100 1407/1407 [==============================] - 8s 6ms/step - loss: 45.0468 - accuracy: 0.6433 - val_loss: 1.6315 - val_accuracy: 0.4506 Epoch 30/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.3280 - accuracy: 0.5452 - val_loss: 1.5685 - val_accuracy: 0.4696 Epoch 31/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.2027 - accuracy: 0.5827 - val_loss: 1.5454 - val_accuracy: 0.4902 Epoch 32/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.1413 - accuracy: 0.6045 - val_loss: 1.5691 - val_accuracy: 0.4882 Epoch 33/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.1030 - accuracy: 0.6172 - val_loss: 1.5414 - val_accuracy: 0.5010 Epoch 34/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.0693 - accuracy: 0.6291 - val_loss: 1.5601 - val_accuracy: 0.4992 Epoch 35/100 1407/1407 [==============================] - 8s 6ms/step - loss: 1.0419 - accuracy: 0.6390 - val_loss: 1.6308 - val_accuracy: 0.4934 Epoch 36/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.0214 - accuracy: 0.6486 - val_loss: 1.6348 - val_accuracy: 0.4984 Epoch 37/100 1407/1407 [==============================] - 9s 6ms/step - loss: 0.9975 - accuracy: 0.6578 - val_loss: 1.5899 - val_accuracy: 0.4958 157/157 [==============================] - 0s 2ms/step - loss: 1.4797 - accuracy: 0.5054
[1.4796942472457886, 0.5054000020027161]
model = keras.models.load_model("my_cifar10_selu_model.h5")
model.evaluate(X_valid_scaled, y_valid)
157/157 [==============================] - 0s 2ms/step - loss: 1.4797 - accuracy: 0.5054
[1.4796942472457886, 0.5054000020027161]
47.9% 정확도를 얻었습니다. 원래 모델(47.6%)보다 크게 높지 않습니다. 배치 정규화를 사용한 모델(54.0%)만큼 좋지도 않습니다. 하지만 BN 모델만큼 빠르게 수렴했습니다. 각 에포크는 7초만 걸렸습니다. 따라서 이 모델이 지금까지 가장 빠른 모델입니다.
문제: 알파 드롭아웃으로 모델에 규제를 적용해보세요. 그다음 모델을 다시 훈련하지 않고 MC 드롭아웃으로 더 높은 정확도를 얻을 수 있는지 확인해보세요.
keras.backend.clear_session()
tf.random.set_seed(42)
np.random.seed(42)
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))
for _ in range(20):
model.add(keras.layers.Dense(100,
kernel_initializer="lecun_normal",
activation="selu"))
model.add(keras.layers.AlphaDropout(rate=0.1))
model.add(keras.layers.Dense(10, activation="softmax"))
optimizer = keras.optimizers.Nadam(learning_rate=5e-4)
model.compile(loss="sparse_categorical_crossentropy",
optimizer=optimizer,
metrics=["accuracy"])
early_stopping_cb = keras.callbacks.EarlyStopping(patience=20)
model_checkpoint_cb = keras.callbacks.ModelCheckpoint("my_cifar10_alpha_dropout_model.h5", save_best_only=True)
run_index = 1 # 모델을 훈련할 때마다 증가시킴
run_logdir = os.path.join(os.curdir, "my_cifar10_logs", "run_alpha_dropout_{:03d}".format(run_index))
tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)
callbacks = [early_stopping_cb, model_checkpoint_cb, tensorboard_cb]
X_means = X_train.mean(axis=0)
X_stds = X_train.std(axis=0)
X_train_scaled = (X_train - X_means) / X_stds
X_valid_scaled = (X_valid - X_means) / X_stds
X_test_scaled = (X_test - X_means) / X_stds
model.fit(X_train_scaled, y_train, epochs=100,
validation_data=(X_valid_scaled, y_valid),
callbacks=callbacks)
model = keras.models.load_model("my_cifar10_alpha_dropout_model.h5")
model.evaluate(X_valid_scaled, y_valid)
2021-10-10 02:23:24.536681: I tensorflow/core/profiler/lib/profiler_session.cc:131] Profiler session initializing. 2021-10-10 02:23:24.536860: I tensorflow/core/profiler/lib/profiler_session.cc:146] Profiler session started. 2021-10-10 02:23:24.537406: I tensorflow/core/profiler/lib/profiler_session.cc:164] Profiler session tear down.
Epoch 1/100 2/1407 [..............................] - ETA: 1:41 - loss: 3.0759 - accuracy: 0.1094
2021-10-10 02:23:30.647672: I tensorflow/core/profiler/lib/profiler_session.cc:131] Profiler session initializing. 2021-10-10 02:23:30.647844: I tensorflow/core/profiler/lib/profiler_session.cc:146] Profiler session started.
19/1407 [..............................] - ETA: 34s - loss: 2.5471 - accuracy: 0.1562
2021-10-10 02:23:30.881209: I tensorflow/core/profiler/lib/profiler_session.cc:66] Profiler session collecting data. 2021-10-10 02:23:30.888528: I tensorflow/core/profiler/lib/profiler_session.cc:164] Profiler session tear down. 2021-10-10 02:23:30.900150: I tensorflow/core/profiler/rpc/client/save_profile.cc:136] Creating directory: ./my_cifar10_logs/run_alpha_dropout_001/train/plugins/profile/2021_10_10_02_23_30 2021-10-10 02:23:30.906102: I tensorflow/core/profiler/rpc/client/save_profile.cc:142] Dumped gzipped tool data for trace.json.gz to ./my_cifar10_logs/run_alpha_dropout_001/train/plugins/profile/2021_10_10_02_23_30/instance-1.trace.json.gz 2021-10-10 02:23:30.918678: I tensorflow/core/profiler/rpc/client/save_profile.cc:136] Creating directory: ./my_cifar10_logs/run_alpha_dropout_001/train/plugins/profile/2021_10_10_02_23_30 2021-10-10 02:23:30.918960: I tensorflow/core/profiler/rpc/client/save_profile.cc:142] Dumped gzipped tool data for memory_profile.json.gz to ./my_cifar10_logs/run_alpha_dropout_001/train/plugins/profile/2021_10_10_02_23_30/instance-1.memory_profile.json.gz 2021-10-10 02:23:30.919972: I tensorflow/core/profiler/rpc/client/capture_profile.cc:251] Creating directory: ./my_cifar10_logs/run_alpha_dropout_001/train/plugins/profile/2021_10_10_02_23_30 Dumped tool data for xplane.pb to ./my_cifar10_logs/run_alpha_dropout_001/train/plugins/profile/2021_10_10_02_23_30/instance-1.xplane.pb Dumped tool data for overview_page.pb to ./my_cifar10_logs/run_alpha_dropout_001/train/plugins/profile/2021_10_10_02_23_30/instance-1.overview_page.pb Dumped tool data for input_pipeline.pb to ./my_cifar10_logs/run_alpha_dropout_001/train/plugins/profile/2021_10_10_02_23_30/instance-1.input_pipeline.pb Dumped tool data for tensorflow_stats.pb to ./my_cifar10_logs/run_alpha_dropout_001/train/plugins/profile/2021_10_10_02_23_30/instance-1.tensorflow_stats.pb Dumped tool data for kernel_stats.pb to ./my_cifar10_logs/run_alpha_dropout_001/train/plugins/profile/2021_10_10_02_23_30/instance-1.kernel_stats.pb
1407/1407 [==============================] - 12s 7ms/step - loss: 1.8827 - accuracy: 0.3335 - val_loss: 1.8141 - val_accuracy: 0.3422 Epoch 2/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.6604 - accuracy: 0.4151 - val_loss: 1.6295 - val_accuracy: 0.4204 Epoch 3/100 1407/1407 [==============================] - 9s 7ms/step - loss: 1.5713 - accuracy: 0.4498 - val_loss: 1.6646 - val_accuracy: 0.4162 Epoch 4/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.5043 - accuracy: 0.4716 - val_loss: 1.6436 - val_accuracy: 0.4452 Epoch 5/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.4497 - accuracy: 0.4917 - val_loss: 1.5975 - val_accuracy: 0.4644 Epoch 6/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.3983 - accuracy: 0.5140 - val_loss: 1.4979 - val_accuracy: 0.4880 Epoch 7/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.3535 - accuracy: 0.5312 - val_loss: 1.5254 - val_accuracy: 0.4744 Epoch 8/100 1407/1407 [==============================] - 8s 6ms/step - loss: 1.3149 - accuracy: 0.5426 - val_loss: 1.4812 - val_accuracy: 0.5004 Epoch 9/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.2799 - accuracy: 0.5561 - val_loss: 1.5204 - val_accuracy: 0.4882 Epoch 10/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.2509 - accuracy: 0.5674 - val_loss: 1.4942 - val_accuracy: 0.5012 Epoch 11/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.2206 - accuracy: 0.5800 - val_loss: 1.5644 - val_accuracy: 0.4970 Epoch 12/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.1914 - accuracy: 0.5904 - val_loss: 1.5452 - val_accuracy: 0.5000 Epoch 13/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.1565 - accuracy: 0.5999 - val_loss: 1.6069 - val_accuracy: 0.5040 Epoch 14/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.1325 - accuracy: 0.6092 - val_loss: 1.5100 - val_accuracy: 0.5094 Epoch 15/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.1061 - accuracy: 0.6182 - val_loss: 1.6162 - val_accuracy: 0.5102 Epoch 16/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.0858 - accuracy: 0.6301 - val_loss: 1.6036 - val_accuracy: 0.5164 Epoch 17/100 1407/1407 [==============================] - 8s 6ms/step - loss: 1.0617 - accuracy: 0.6362 - val_loss: 1.6463 - val_accuracy: 0.5018 Epoch 18/100 1407/1407 [==============================] - 9s 6ms/step - loss: 1.0394 - accuracy: 0.6424 - val_loss: 1.6183 - val_accuracy: 0.5084 Epoch 19/100 1407/1407 [==============================] - 8s 6ms/step - loss: 1.0227 - accuracy: 0.6516 - val_loss: 1.6803 - val_accuracy: 0.5202 Epoch 20/100 1407/1407 [==============================] - 8s 6ms/step - loss: 0.9968 - accuracy: 0.6591 - val_loss: 1.6436 - val_accuracy: 0.5026 Epoch 21/100 1407/1407 [==============================] - 8s 6ms/step - loss: 0.9775 - accuracy: 0.6673 - val_loss: 1.7502 - val_accuracy: 0.5114 Epoch 22/100 1407/1407 [==============================] - 8s 6ms/step - loss: 0.9560 - accuracy: 0.6764 - val_loss: 1.7188 - val_accuracy: 0.5170 Epoch 23/100 1407/1407 [==============================] - 8s 6ms/step - loss: 0.9427 - accuracy: 0.6808 - val_loss: 1.7112 - val_accuracy: 0.5120 Epoch 24/100 1407/1407 [==============================] - 8s 6ms/step - loss: 0.9330 - accuracy: 0.6839 - val_loss: 1.6890 - val_accuracy: 0.5194 Epoch 25/100 1407/1407 [==============================] - 8s 6ms/step - loss: 0.9064 - accuracy: 0.6920 - val_loss: 1.7430 - val_accuracy: 0.5184 Epoch 26/100 1407/1407 [==============================] - 9s 6ms/step - loss: 0.8920 - accuracy: 0.6994 - val_loss: 1.7455 - val_accuracy: 0.5002 Epoch 27/100 1407/1407 [==============================] - 9s 6ms/step - loss: 0.8743 - accuracy: 0.7047 - val_loss: 1.8365 - val_accuracy: 0.5138 Epoch 28/100 1407/1407 [==============================] - 8s 6ms/step - loss: 0.8566 - accuracy: 0.7108 - val_loss: 1.7643 - val_accuracy: 0.5056 157/157 [==============================] - 0s 2ms/step - loss: 1.4812 - accuracy: 0.5004
[1.481205701828003, 0.5004000067710876]
이 모델은 검증 세트에서 48.9% 정확도에 도달합니다. 드롭아웃이 없을 때보다(47.6%) 조금 더 좋습니다. 하이퍼파라미터 탐색을 좀 많이 수행해 보면 더 나아 질 수 있습니다(드롭아웃 비율 5%, 10%, 20%, 40%과 학습률 1e-4, 3e-4, 5e-4, 1e-3을 시도했습니다). 하지만 이 경우에는 크지 않을 것 같습니다.
이제 MC 드롭아웃을 사용해 보죠. 앞서 사용한 MCAlphaDropout
클래스를 복사해 사용하겠습니다:
class MCAlphaDropout(keras.layers.AlphaDropout):
def call(self, inputs):
return super().call(inputs, training=True)
방금 훈련했던 모델과 (같은 가중치를 가진) 동일한 새로운 모델을 만들어 보죠. 하지만 AlphaDropout
층 대신 MCAlphaDropout
드롭아웃 층을 사용합니다:
mc_model = keras.models.Sequential([
MCAlphaDropout(layer.rate) if isinstance(layer, keras.layers.AlphaDropout) else layer
for layer in model.layers
])
그다음 몇 가지 유틸리티 함수를 추가합니다. 첫 번째 함수는 모델을 여러 번 실행합니다(기본적으로 10번). 그다음 평균한 예측 클래스 확률을 반환합니다. 두 번째 함수는 이 평균 확률을 사용해 각 샘플의 클래스를 예측합니다:
def mc_dropout_predict_probas(mc_model, X, n_samples=10):
Y_probas = [mc_model.predict(X) for sample in range(n_samples)]
return np.mean(Y_probas, axis=0)
def mc_dropout_predict_classes(mc_model, X, n_samples=10):
Y_probas = mc_dropout_predict_probas(mc_model, X, n_samples)
return np.argmax(Y_probas, axis=1)
이제 검증 세트의 모든 샘플에 대해 예측을 만들고 정확도를 계산해 보죠:
keras.backend.clear_session()
tf.random.set_seed(42)
np.random.seed(42)
y_pred = mc_dropout_predict_classes(mc_model, X_valid_scaled)
accuracy = np.mean(y_pred == y_valid[:, 0])
accuracy
0.5008
이 경우에는 정확도 향상이 없습니다(여전히 정확도는 48.9%입니다).
따라서 이 연습문에서 얻은 최상의 모델은 배치 정규화 모델입니다.
문제: 1사이클 스케줄링으로 모델을 다시 훈련하고 훈련 속도와 모델 정확도가 향상되는지 확인해보세요.
keras.backend.clear_session()
tf.random.set_seed(42)
np.random.seed(42)
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))
for _ in range(20):
model.add(keras.layers.Dense(100,
kernel_initializer="lecun_normal",
activation="selu"))
model.add(keras.layers.AlphaDropout(rate=0.1))
model.add(keras.layers.Dense(10, activation="softmax"))
optimizer = keras.optimizers.SGD(learning_rate=1e-3)
model.compile(loss="sparse_categorical_crossentropy",
optimizer=optimizer,
metrics=["accuracy"])
batch_size = 128
rates, losses = find_learning_rate(model, X_train_scaled, y_train, epochs=1, batch_size=batch_size)
plot_lr_vs_loss(rates, losses)
plt.axis([min(rates), max(rates), min(losses), (losses[0] + min(losses)) / 1.4])
352/352 [==============================] - 3s 8ms/step - loss: nan - accuracy: 0.1378
(9.999999747378752e-06, 9.615227699279785, 2.6294026374816895, 3.9444747992924283)
keras.backend.clear_session()
tf.random.set_seed(42)
np.random.seed(42)
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[32, 32, 3]))
for _ in range(20):
model.add(keras.layers.Dense(100,
kernel_initializer="lecun_normal",
activation="selu"))
model.add(keras.layers.AlphaDropout(rate=0.1))
model.add(keras.layers.Dense(10, activation="softmax"))
optimizer = keras.optimizers.SGD(learning_rate=1e-2)
model.compile(loss="sparse_categorical_crossentropy",
optimizer=optimizer,
metrics=["accuracy"])
n_epochs = 15
onecycle = OneCycleScheduler(len(X_train_scaled) // batch_size * n_epochs, max_rate=0.05)
history = model.fit(X_train_scaled, y_train, epochs=n_epochs, batch_size=batch_size,
validation_data=(X_valid_scaled, y_valid),
callbacks=[onecycle])
Epoch 1/15 352/352 [==============================] - 4s 9ms/step - loss: 2.0630 - accuracy: 0.2825 - val_loss: 1.8195 - val_accuracy: 0.3674 Epoch 2/15 352/352 [==============================] - 3s 8ms/step - loss: 1.7607 - accuracy: 0.3766 - val_loss: 1.6499 - val_accuracy: 0.4174 Epoch 3/15 352/352 [==============================] - 3s 8ms/step - loss: 1.6213 - accuracy: 0.4257 - val_loss: 1.6189 - val_accuracy: 0.4332 Epoch 4/15 352/352 [==============================] - 3s 8ms/step - loss: 1.5391 - accuracy: 0.4540 - val_loss: 1.6567 - val_accuracy: 0.4244 Epoch 5/15 352/352 [==============================] - 3s 8ms/step - loss: 1.4839 - accuracy: 0.4726 - val_loss: 1.6156 - val_accuracy: 0.4482 Epoch 6/15 352/352 [==============================] - 3s 8ms/step - loss: 1.4409 - accuracy: 0.4889 - val_loss: 1.5545 - val_accuracy: 0.4616 Epoch 7/15 352/352 [==============================] - 3s 8ms/step - loss: 1.4074 - accuracy: 0.5002 - val_loss: 1.5639 - val_accuracy: 0.4598 Epoch 8/15 352/352 [==============================] - 3s 8ms/step - loss: 1.3381 - accuracy: 0.5242 - val_loss: 1.4700 - val_accuracy: 0.4948 Epoch 9/15 352/352 [==============================] - 3s 8ms/step - loss: 1.2623 - accuracy: 0.5527 - val_loss: 1.5123 - val_accuracy: 0.4796 Epoch 10/15 352/352 [==============================] - 3s 8ms/step - loss: 1.1919 - accuracy: 0.5756 - val_loss: 1.5519 - val_accuracy: 0.4826 Epoch 11/15 352/352 [==============================] - 3s 8ms/step - loss: 1.1223 - accuracy: 0.6001 - val_loss: 1.5353 - val_accuracy: 0.4968 Epoch 12/15 352/352 [==============================] - 3s 8ms/step - loss: 1.0540 - accuracy: 0.6239 - val_loss: 1.5265 - val_accuracy: 0.5006 Epoch 13/15 352/352 [==============================] - 3s 8ms/step - loss: 0.9837 - accuracy: 0.6476 - val_loss: 1.5636 - val_accuracy: 0.5146 Epoch 14/15 352/352 [==============================] - 3s 8ms/step - loss: 0.9180 - accuracy: 0.6714 - val_loss: 1.5781 - val_accuracy: 0.5114 Epoch 15/15 352/352 [==============================] - 3s 8ms/step - loss: 0.8779 - accuracy: 0.6862 - val_loss: 1.6093 - val_accuracy: 0.5112
1사이클 방식을 사용해 모델을 15에포크 동안 훈련했습니다. (큰 배치 크기 덕분에) 각 에포크는 2초만 걸렸습니다. 이는 지금까지 훈련한 가장 빠른 모델보다 몇 배 더 빠릅니다. 또한 모델 성능도 올라갔습니다(47.6%에서 52.0%). 배치 정규화 모델이 조금 더 성능(54%)이 높지만 훈련 속도가 더 느립니다.