# coding: utf-8
import sys, os
import numpy as np
import matplotlib.pyplot as plt
import math
sys.path.append(os.pardir)
from deeplink.mnist import *
from deeplink.networks import *
class MultiLayerNetExtended(MultiLayerNet):
def __init__(self, input_size, hidden_size_list, output_size, activation='ReLU', initializer='N2',
optimizer='AdaGrad', learning_rate=0.01,
use_batch_normalization=False,
use_weight_decay=False, weight_decay_lambda=0.0,
use_dropout=False, dropout_ratio_list=None):
self.input_size = input_size
self.output_size = output_size
self.hidden_size_list = hidden_size_list
self.hidden_layer_num = len(hidden_size_list)
self.use_batch_normalization = use_batch_normalization
self.use_weight_decay = use_weight_decay
self.weight_decay_lambda = weight_decay_lambda
self.use_dropout = use_dropout
self.dropout_ratio_list = dropout_ratio_list
# Weight Initialization
self.params = {}
self.weight_initialization(initializer)
# Layering
self.layers = OrderedDict()
self.last_layer = None
self.layering(activation)
# Optimization Method
self.optimizer = optimizers[optimizer](lr=learning_rate)
def weight_initialization(self, initializer):
params_size_list = [self.input_size] + self.hidden_size_list + [self.output_size]
initializer_obj = initializers[initializer](self.params,
params_size_list,
self.use_batch_normalization)
initializer_obj.initialize_params();
def layering(self, activation):
for idx in range(1, self.hidden_layer_num + 1):
self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)], self.params['b' + str(idx)])
if self.use_batch_normalization:
self.layers['Batch_Normalization' + str(idx)] = BatchNormalization(self.params['gamma' + str(idx)],
self.params['beta' + str(idx)])
self.layers['Activation' + str(idx)] = activation_layers[activation]()
if self.use_dropout:
self.layers['Dropout' + str(idx)] = Dropout(self.dropout_ratio_list[idx - 1])
idx = self.hidden_layer_num + 1
self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)], self.params['b' + str(idx)])
self.last_layer = SoftmaxWithCrossEntropyLoss()
def predict(self, x, is_train=False):
for key, layer in self.layers.items():
if "BatchNorm" in key or "Dropout" in key:
x = layer.forward(x, is_train)
else:
x = layer.forward(x)
return x
def loss(self, x, t, is_train=False):
y = self.predict(x, is_train)
if self.use_weight_decay:
weight_decay = 0.0
for idx in range(1, self.hidden_layer_num + 2):
W = self.params['W' + str(idx)]
weight_decay += 0.5 * self.weight_decay_lambda * np.sum(W**2)
return self.last_layer.forward(y, t) + weight_decay
else:
return self.last_layer.forward(y, t)
def accuracy(self, x, t):
y = self.predict(x, is_train=False)
y = np.argmax(y, axis=1)
if t.ndim != 1 : t = np.argmax(t, axis=1)
accuracy = np.sum(y == t) / float(x.shape[0])
return accuracy
def backpropagation_gradient(self, x, t):
# forward
self.loss(x, t, is_train=True)
# backward
din = 1
din = self.last_layer.backward(din)
layers = list(self.layers.values())
layers.reverse()
for layer in layers:
din = layer.backward(din)
grads = {}
for idx in range(1, self.hidden_layer_num + 2):
if self.use_weight_decay:
grads['W' + str(idx)] = self.layers['Affine' + str(idx)].dW + self.weight_decay_lambda * self.params['W' + str(idx)]
else:
grads['W' + str(idx)] = self.layers['Affine' + str(idx)].dW
grads['b' + str(idx)] = self.layers['Affine' + str(idx)].db
if self.use_batch_normalization and idx <= self.hidden_layer_num:
grads['gamma' + str(idx)] = self.layers['Batch_Normalization' + str(idx)].dgamma
grads['beta' + str(idx)] = self.layers['Batch_Normalization' + str(idx)].dbeta
return grads
def learning(self, x_batch, t_batch):
grads = self.backpropagation_gradient(x_batch, t_batch)
self.optimizer.update(self.params, grads)
data = mnist_data("/Users/yhhan/git/aiclass/0.Professor/data/MNIST_data/.")
(img_train, label_train), (img_validation, label_validation), (img_test, label_test) = data.load_mnist(flatten=True, normalize=True, one_hot_label=True)
# 오버피팅을 유도하기 위하여 데이터 수를 대폭 줄임
img_train = img_train[:200]
label_train = label_train[:200]
# 오버피팅을 유도하기 위하여 레이어를 깊게 가져가고 파라미터를 대폭 늘림
input_size=784
hidden_layer1_size=128
hidden_layer2_size=128
hidden_layer3_size=128
hidden_layer4_size=128
hidden_layer5_size=128
hidden_layer6_size=128
output_size=10
num_epochs = 200
train_size = img_train.shape[0]
batch_size = 100
learning_rate = 0.1
markers = {"N2, AdaGrad, No_Batch_Norm, No_Weight_Decay, No_dropout": "x", "N2, AdaGrad, No_Batch_Norm, No_Weight_Decay, Dropout": "o"}
networks = {}
train_errors = {}
validation_errors = {}
test_accuracy_values = {}
max_test_accuracy_epoch = {}
max_test_accuracy_value = {}
for key in markers.keys():
if key == "N2, AdaGrad, No_Batch_Norm, No_Weight_Decay, No_dropout":
networks[key] = MultiLayerNetExtended(input_size,
[hidden_layer1_size, hidden_layer2_size, hidden_layer3_size, hidden_layer4_size, hidden_layer5_size, hidden_layer6_size],
output_size,
activation='ReLU',
initializer='N2',
optimizer='AdaGrad', learning_rate=learning_rate,
use_batch_normalization=False,
use_weight_decay=False, weight_decay_lambda=0.0,
use_dropout=False, dropout_ratio_list=[0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
elif key == "N2, AdaGrad, No_Batch_Norm, No_Weight_Decay, Dropout":
networks[key] = MultiLayerNetExtended(input_size,
[hidden_layer1_size, hidden_layer2_size, hidden_layer3_size, hidden_layer4_size, hidden_layer5_size, hidden_layer6_size],
output_size,
activation='ReLU',
initializer='N2',
optimizer='AdaGrad', learning_rate=learning_rate,
use_batch_normalization=False,
use_weight_decay=False, weight_decay_lambda=0.0,
use_dropout=True, dropout_ratio_list=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5])
train_errors[key] = []
validation_errors[key] = []
test_accuracy_values[key] = []
max_test_accuracy_epoch[key] = 0
max_test_accuracy_value[key] = 0.0
epoch_list = []
num_batch = math.ceil(train_size / batch_size)
for i in range(num_epochs):
epoch_list.append(i)
for key in markers.keys():
for k in range(num_batch):
x_batch = img_train[k * batch_size : k * batch_size + batch_size]
t_batch = label_train[k * batch_size : k * batch_size + batch_size]
networks[key].learning(x_batch, t_batch)
train_loss = networks[key].loss(x_batch, t_batch, is_train=True)
train_errors[key].append(train_loss)
validation_loss = networks[key].loss(img_validation, label_validation, is_train=False)
validation_errors[key].append(validation_loss)
test_accuracy = networks[key].accuracy(img_test, label_test)
test_accuracy_values[key].append(test_accuracy)
if test_accuracy > max_test_accuracy_value[key]:
max_test_accuracy_epoch[key] = i
max_test_accuracy_value[key] = test_accuracy
# print("{0:26s}-Epoch:{1:3d}, Train Err.:{2:7.5f}, Validation Err.:{3:7.5f}, Test Accuracy:{4:7.5f}, Max Test Accuracy:{5:7.5f}".format(
# key,
# i,
# train_loss,
# validation_loss,
# test_accuracy,
# max_test_accuracy_value[key]
# ))
print(i, end=", ")
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
../common/functions.py:56: RuntimeWarning: divide by zero encountered in log return -np.sum(np.log(y[np.arange(batch_size), t])) / batch_size
14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199,
f, axarr = plt.subplots(2, 2, figsize=(20, 12))
for key in markers.keys():
axarr[0, 0].plot(epoch_list[1:], train_errors[key][1:], marker=markers[key], markevery=2, label=key)
axarr[0, 0].set_ylabel('Train - Total Error')
axarr[0, 0].set_xlabel('Epochs')
axarr[0, 0].grid(True)
axarr[0, 0].set_title('Train Error')
axarr[0, 0].legend(loc='upper right')
for key in markers.keys():
axarr[0, 1].plot(epoch_list[1:], validation_errors[key][1:], marker=markers[key], markevery=2, label=key)
axarr[0, 1].set_ylabel('Validation - Total Error')
axarr[0, 1].set_xlabel('Epochs')
axarr[0, 1].grid(True)
axarr[0, 1].set_title('Validation Error')
axarr[0, 1].legend(loc='upper right')
for key in markers.keys():
axarr[1, 0].plot(epoch_list[1:], train_errors[key][1:], marker=markers[key], markevery=2, label=key)
axarr[1, 0].set_ylabel('Train - Total Error')
axarr[1, 0].set_xlabel('Epochs')
axarr[1, 0].grid(True)
axarr[1, 0].set_ylim(0.8, 2.4)
axarr[1, 0].set_title('Train Error (0.8 ~ 2.4)')
axarr[1, 0].legend(loc='upper right')
for key in markers.keys():
axarr[1, 1].plot(epoch_list[1:], validation_errors[key][1:], marker=markers[key], markevery=2, label=key)
axarr[1, 1].set_ylabel('Validation - Total Error')
axarr[1, 1].set_xlabel('Epochs')
axarr[1, 1].grid(True)
axarr[1, 1].set_ylim(2.25, 2.4)
axarr[1, 1].set_title('Validation Error (2.25 ~ 2.4)')
axarr[1, 1].legend(loc='upper right')
f.subplots_adjust(hspace=0.3)
plt.show()
f, axarr = plt.subplots(2, 1, figsize=(15,10))
for key in markers.keys():
axarr[0].plot(epoch_list[1:], test_accuracy_values[key][1:], marker=markers[key], markevery=1, label=key)
axarr[0].set_ylabel('Test Accuracy')
axarr[0].set_xlabel('Epochs')
axarr[0].grid(True)
axarr[0].set_title('Test Accuracy')
axarr[0].legend(loc='lower right')
for key in markers.keys():
axarr[1].plot(epoch_list[1:], test_accuracy_values[key][1:], marker=markers[key], markevery=1, label=key)
axarr[1].set_ylabel('Test Accuracy')
axarr[1].set_xlabel('Epochs')
axarr[1].grid(True)
axarr[1].set_ylim(0.30, 0.50)
axarr[1].set_title('Test Accuracy (0.30 ~ 0.50)')
axarr[1].legend(loc='lower right')
f.subplots_adjust(hspace=0.3)
plt.show()
for key in markers.keys():
print("{0:26s} - Epoch:{1:3d}, Max Test Accuracy: {2:7.5f}".format(key, max_test_accuracy_epoch[key], max_test_accuracy_value[key]))
N2, AdaGrad, No_Batch_Norm, No_Weight_Decay, No_dropout - Epoch: 13, Max Test Accuracy: 0.15370 N2, AdaGrad, No_Batch_Norm, No_Weight_Decay, Dropout - Epoch:161, Max Test Accuracy: 0.44620