#!/usr/bin/env python # coding: utf-8 # # MNIST-Overfit-Dropout # In[1]: # coding: utf-8 import sys, os import numpy as np import matplotlib.pyplot as plt import math sys.path.append(os.pardir) from deeplink.mnist import * from deeplink.networks import * # ## Multilayer Neural Network Model (Two Hidden Layers) and Learing/Validation # ### Multi Layer Model Class # In[2]: class MultiLayerNetExtended(MultiLayerNet): def __init__(self, input_size, hidden_size_list, output_size, activation='ReLU', initializer='N2', optimizer='AdaGrad', learning_rate=0.01, use_batch_normalization=False, use_weight_decay=False, weight_decay_lambda=0.0 use_dropout=False, dropout_rate_list): self.input_size = input_size self.output_size = output_size self.hidden_size_list = hidden_size_list self.hidden_layer_num = len(hidden_size_list) self.use_batch_normalization = use_batch_normalization self.use_weight_decay = use_weight_decay self.weight_decay_lambda = weight_decay_lambda # Weight Initialization self.params = {} self.weight_initialization(initializer) # Layering self.layers = OrderedDict() self.last_layer = None self.layering(activation) # Optimization Method self.optimizer = optimizers[optimizer](lr=learning_rate) def weight_initialization(self, initializer): params_size_list = [self.input_size] + self.hidden_size_list + [self.output_size] initializer_obj = initializers[initializer](self.params, params_size_list, self.use_batch_normalization) initializer_obj.initialize_params(); def layering(self, activation): for idx in range(1, self.hidden_layer_num + 1): self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)], self.params['b' + str(idx)]) if self.use_batch_normalization: self.layers['Batch_Normalization' + str(idx)] = BatchNormalization(self.params['gamma' + str(idx)], self.params['beta' + str(idx)]) self.layers['Activation' + str(idx)] = activation_layers[activation]() idx = self.hidden_layer_num + 1 self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)], self.params['b' + str(idx)]) self.last_layer = SoftmaxWithCrossEntropyLoss() def predict(self, x, is_train=False): for key, layer in self.layers.items(): if "BatchNorm" in key: x = layer.forward(x, is_train) else: x = layer.forward(x) return x def loss(self, x, t, is_train=False) y = self.predict(x, is_train) if self.use_weight_decay: weight_decay = 0.0 for idx in range(1, self.hidden_layer_num + 2): W = self.params['W' + str(idx)] weight_decay += 0.5 * self.weight_decay_lambda * np.sum(W**2) return self.last_layer.forward(y, t) + weight_decay else: return self.last_layer.forward(y, t) def accuracy(self, x, t): y = self.predict(x, is_train=False) y = np.argmax(y, axis=1) if t.ndim != 1 : t = np.argmax(t, axis=1) accuracy = np.sum(y == t) / float(x.shape[0]) return accuracy def backpropagation_gradient(self, x, t): # forward self.loss(x, t, is_train=True) # backward din = 1 din = self.last_layer.backward(din) layers = list(self.layers.values()) layers.reverse() for layer in layers: din = layer.backward(din) grads = {} for idx in range(1, self.hidden_layer_num + 2): if self.use_weight_decay: grads['W' + str(idx)] = self.layers['Affine' + str(idx)].dW + self.weight_decay_lambda * self.params['W' + str(idx)] else: grads['W' + str(idx)] = self.layers['Affine' + str(idx)].dW grads['b' + str(idx)] = self.layers['Affine' + str(idx)].db if self.use_batch_normalization and idx <= self.hidden_layer_num: grads['gamma' + str(idx)] = self.layers['Batch_Normalization' + str(idx)].dgamma grads['beta' + str(idx)] = self.layers['Batch_Normalization' + str(idx)].dbeta return grads def learning(self, x_batch, t_batch): grads = self.backpropagation_gradient(x_batch, t_batch) self.optimizer.update(self.params, grads) # ### Training and Evaluation # In[ ]: data = mnist_data("/Users/yhhan/git/aiclass/0.Professor/data/MNIST_data/.") (img_train, label_train), (img_validation, label_validation), (img_test, label_test) = data.load_mnist(flatten=True, normalize=True, one_hot_label=True) # 오버피팅을 유도하기 위하여 데이터 수를 대폭 줄임 img_train = img_train[:200] label_train = label_train[:200] # 오버피팅을 유도하기 위하여 레이어를 깊게 가져가고 파라미터를 대폭 늘림 input_size=784 hidden_layer1_size=128 hidden_layer2_size=128 hidden_layer3_size=128 hidden_layer4_size=128 hidden_layer5_size=128 hidden_layer6_size=128 output_size=10 num_epochs = 200 train_size = img_train.shape[0] batch_size = 100 learning_rate = 0.1 markers = {"N2, AdaGrad, No_Batch_Norm, lambda=0.0": "x", "N2, AdaGrad, No_Batch_Norm, lambda=0.1": "o"} networks = {} train_errors = {} validation_errors = {} test_accuracy_values = {} max_test_accuracy_epoch = {} max_test_accuracy_value = {} for key in markers.keys(): if key == "N2, AdaGrad, No_Batch_Norm, lambda=0.0": networks[key] = MultiLayerNetExtended(input_size, [hidden_layer1_size, hidden_layer2_size, hidden_layer3_size, hidden_layer4_size, hidden_layer5_size, hidden_layer6_size], output_size, activation='ReLU', initializer='N2', optimizer='AdaGrad', learning_rate=learning_rate, use_batch_normalization=False, weight_decay_lambda=0.0) elif key == "N2, AdaGrad, No_Batch_Norm, lambda=0.1": networks[key] = MultiLayerNetExtended(input_size, [hidden_layer1_size, hidden_layer2_size, hidden_layer3_size, hidden_layer4_size, hidden_layer5_size, hidden_layer6_size], output_size, activation='ReLU', initializer='N2', optimizer='AdaGrad', learning_rate=learning_rate, use_batch_normalization=False, weight_decay_lambda=0.1) train_errors[key] = [] validation_errors[key] = [] test_accuracy_values[key] = [] max_test_accuracy_epoch[key] = 0 max_test_accuracy_value[key] = 0.0 # In[ ]: epoch_list = [] num_batch = math.ceil(train_size / batch_size) for i in range(num_epochs): epoch_list.append(i) for key in markers.keys(): for k in range(num_batch): x_batch = img_train[k * batch_size : k * batch_size + batch_size] t_batch = label_train[k * batch_size : k * batch_size + batch_size] networks[key].learning(x_batch, t_batch) train_loss = networks[key].loss(x_batch, t_batch, is_train=True) train_errors[key].append(train_loss) validation_loss = networks[key].loss(img_validation, label_validation, is_train=False) validation_errors[key].append(validation_loss) test_accuracy = networks[key].accuracy(img_test, label_test) test_accuracy_values[key].append(test_accuracy) if test_accuracy > max_test_accuracy_value[key]: max_test_accuracy_epoch[key] = i max_test_accuracy_value[key] = test_accuracy # print("{0:26s}-Epoch:{1:3d}, Train Err.:{2:7.5f}, Validation Err.:{3:7.5f}, Test Accuracy:{4:7.5f}, Max Test Accuracy:{5:7.5f}".format( # key, # i, # train_loss, # validation_loss, # test_accuracy, # max_test_accuracy_value[key] # )) print(i, end=", ") # In[ ]: f, axarr = plt.subplots(2, 2, figsize=(20, 12)) for key in markers.keys(): axarr[0, 0].plot(epoch_list[1:], train_errors[key][1:], marker=markers[key], markevery=2, label=key) axarr[0, 0].set_ylabel('Train - Total Error') axarr[0, 0].set_xlabel('Epochs') axarr[0, 0].grid(True) axarr[0, 0].set_title('Train Error') axarr[0, 0].legend(loc='upper right') for key in markers.keys(): axarr[0, 1].plot(epoch_list[1:], validation_errors[key][1:], marker=markers[key], markevery=2, label=key) axarr[0, 1].set_ylabel('Validation - Total Error') axarr[0, 1].set_xlabel('Epochs') axarr[0, 1].grid(True) axarr[0, 1].set_title('Validation Error') axarr[0, 1].legend(loc='upper right') for key in markers.keys(): axarr[1, 0].plot(epoch_list[1:], train_errors[key][1:], marker=markers[key], markevery=2, label=key) axarr[1, 0].set_ylabel('Train - Total Error') axarr[1, 0].set_xlabel('Epochs') axarr[1, 0].grid(True) axarr[1, 0].set_ylim(2.25, 2.4) axarr[1, 0].set_title('Train Error (2.25 ~ 2.4)') axarr[1, 0].legend(loc='upper right') for key in markers.keys(): axarr[1, 1].plot(epoch_list[1:], validation_errors[key][1:], marker=markers[key], markevery=2, label=key) axarr[1, 1].set_ylabel('Validation - Total Error') axarr[1, 1].set_xlabel('Epochs') axarr[1, 1].grid(True) axarr[1, 1].set_ylim(2.25, 2.4) axarr[1, 1].set_title('Validation Error (2.25 ~ 2.4)') axarr[1, 1].legend(loc='upper right') f.subplots_adjust(hspace=0.3) plt.show() # In[ ]: f, axarr = plt.subplots(2, 1, figsize=(15,10)) for key in markers.keys(): axarr[0].plot(epoch_list[1:], test_accuracy_values[key][1:], marker=markers[key], markevery=1, label=key) axarr[0].set_ylabel('Test Accuracy') axarr[0].set_xlabel('Epochs') axarr[0].grid(True) axarr[0].set_title('Test Accuracy') axarr[0].legend(loc='lower right') for key in markers.keys(): axarr[1].plot(epoch_list[1:], test_accuracy_values[key][1:], marker=markers[key], markevery=1, label=key) axarr[1].set_ylabel('Test Accuracy') axarr[1].set_xlabel('Epochs') axarr[1].grid(True) axarr[1].set_ylim(0.94, 0.99) axarr[1].set_title('Test Accuracy (0.7 ~ 1.0)') axarr[1].legend(loc='lower right') f.subplots_adjust(hspace=0.3) plt.show() # In[ ]: for key in markers.keys(): print("{0:26s} - Epoch:{1:3d}, Max Test Accuracy: {2:7.5f}".format(key, max_test_accuracy_epoch[key], max_test_accuracy_value[key]))