# coding: utf-8
import sys, os
import numpy as np
import matplotlib.pyplot as plt
import math
from collections import OrderedDict
class Affine:
def __init__(self, W, b):
self.W = W
self.b = b
self.x = None
self.dW = None
self.db = None
def forward(self, x):
self.x = x
out = np.dot(self.x, self.W) + self.b
return out
def backward(self, din):
dx = np.dot(din, self.W.T)
self.dW = np.dot(self.x.T, din)
self.db = np.sum(din, axis=0)
return dx
class Sigmoid:
def __init__(self):
self.out = None
def forward(self, x):
out = 1 / (1 + np.exp(-x))
self.out = out
return out
def backward(self, din):
dx = din * self.out * (1.0 - self.out)
return dx
class Relu:
def __init__(self):
self.mask = None
def forward(self, x):
self.mask = (x <= 0)
out = x.copy()
out[self.mask] = 0
return out
def backward(self, din):
din[self.mask] = 0
dx = din
return dx
def activation_value_histogram(layers, num_epoch, forward_plt=False, backward_plt=False, describe=False):
x = np.random.randn(1000, 100) # 1000 x 100
y = np.ones((1000, 100))
num_layers = len(layers)
activations = OrderedDict()
backward_activations = OrderedDict()
forward = {}
backward = {}
for epoch in range(num_epoch):
forward[epoch] = []
backward[epoch] = []
if forward_plt:
print("Epoch:", epoch)
# Forward
for i in range(num_layers):
if i != 0:
x = activations[i-1]
activations[i] = layers[i].forward(x)
# Backward
for i in reversed(range(num_layers)):
if i != num_layers - 1:
y = backward_activations[i+1]
backward_activations[i] = layers[i].backward(y)
# Parameter Update
for i in range(num_layers):
if i % 2 == 0:
layers[i].W = layers[i].W - 0.01 * layers[i].dW
layers[i].b = layers[i].b - 0.01 * layers[i].db
# Histograms of Forward Activation Values
if forward_plt:
print("Forward Activations")
f, axarr = plt.subplots(1, num_layers, figsize=(25, 5))
f.subplots_adjust(wspace=0.5)
for i, a in activations.items():
values = list(a.flatten())
forward[epoch].append((np.mean(values), np.std(values)))
if forward_plt:
std = "{:5.4f}".format(np.std(values))
axarr[i].hist(values, 20)
if i % 2 == 0:
axarr[i].set_title(str(i+1) + "-Layer ($\sigma=$" + std + ")")
else:
axarr[i].set_title("ReLU Layer ($\sigma=$" + std + ")")
if forward_plt:
plt.show()
# Histograms of Backward Activation Values
if backward_plt:
print("Backward Activations")
f, axarr = plt.subplots(1, num_layers, figsize=(25, 5))
f.subplots_adjust(wspace=0.5)
for i, b in backward_activations.items():
values = list(b.flatten())
backward[epoch].append((np.mean(values), np.std(values)))
if backward_plt:
std = "{:5.4f}".format(np.std(values))
axarr[i].hist(values, 20)
if i % 2 == 0:
axarr[i].set_title(str(i+1) + "-Layer ($\sigma=$" + std + ")")
else:
axarr[i].set_title("ReLU Layer ($\sigma=$" + std + ")")
if backward_plt:
plt.show()
if describe:
for epoch in range(num_epochs):
forward_mean_str = ""
forward_str = ""
backward_mean_str = ""
backward_str = ""
print("[Forward Epoch {:d}]".format(epoch))
for layer in range(num_layers):
print("Layer {:d}, mean {:f}, std {:f}".format(layer, forward[epoch][layer][0], forward[epoch][layer][1]))
print("[Backward Epoch {:d}]".format(epoch))
for layer in range(num_layers):
print("Layer {:d}, mean {:f}, std {:f}".format(layer, backward[epoch][layer][0], backward[epoch][layer][1]))
print()
return forward, backward
markers = {
"zero": "h",
"normal1": "_",
"normal2": "|",
"trunc_normal": "v",
"lecun_normal": "x",
"lecun_uniform": "s",
"xavier_normal": "+",
"xavier_uniform": "d",
"he_normal": "*",
"he_uniform": "o"
}
forward_dic = OrderedDict()
backward_dic = OrderedDict()
node_num = 100 #각 은닉층의 노드 수
num_layers = 6 # 3개의 Affine층 + 3개의 활성화층
num_epochs = 5
layers = OrderedDict()
for i in range(num_layers):
if i % 2 == 0:
w = np.zeros((node_num, node_num))
b = np.zeros(node_num)
layers[i] = Affine(w, b)
else:
layers[i] = Relu()
forward, backward = activation_value_histogram(layers, num_epochs, forward_plt=False, backward_plt=False, describe=False)
forward_dic['zero'] = forward
backward_dic['zero'] = backward
node_num = 100 #각 은닉층의 노드 수
num_layers = 6 # 3개의 은닉층 + 3개의 활성화층
num_epochs = 5
layers = OrderedDict()
for i in range(num_layers):
if i % 2 == 0:
w = np.random.randn(node_num, node_num)
b = np.zeros(node_num)
layers[i] = Affine(w, b)
else:
layers[i] = Relu()
forward, backward = activation_value_histogram(layers, num_epochs, forward_plt=False, backward_plt=False, describe=False)
forward_dic['normal1'] = forward
backward_dic['normal1'] = backward
node_num = 100 #각 은닉층의 노드 수
num_layers = 6 # 3개의 은닉층 + 3개의 활성화층
num_epochs = 5
layers = OrderedDict()
std = 0.01
for i in range(num_layers):
if i % 2 == 0:
w = np.random.randn(node_num, node_num) * std
b = np.zeros(node_num)
layers[i] = Affine(w, b)
else:
layers[i] = Relu()
forward, backward = activation_value_histogram(layers, num_epochs, forward_plt=False, backward_plt=False, describe=False)
forward_dic['normal2'] = forward
backward_dic['normal2'] = backward
def get_truncated_normal(shape, mean=0, sd=1, low=0, upp=10):
x = truncnorm(a=(low - mean) / sd, b=(upp - mean) / sd, loc=mean, scale=sd)
num_elements = 1
for dim in shape:
num_elements *= dim
x = x.rvs(num_elements)
x = x.reshape(shape)
return x
node_num = 100 #각 은닉층의 노드 수
num_layers = 6 # 3개의 은닉층 + 3개의 활성화층
num_epochs = 5
layers = OrderedDict()
from scipy.stats import truncnorm
std = 0.01
for i in range(num_layers):
if i % 2 == 0:
w = get_truncated_normal(shape=(node_num, node_num), mean=0.0, sd=std, low=-std, upp=std)
b = np.zeros(node_num)
layers[i] = Affine(w, b)
else:
layers[i] = Relu()
forward, backward = activation_value_histogram(layers, num_epochs, forward_plt=False, backward_plt=False, describe=False)
forward_dic['trunc_normal'] = forward
backward_dic['trunc_normal'] = backward
node_num = 100 #각 은닉층의 노드 수
num_layers = 6 # 3개의 은닉층 + 3개의 활성화층
num_epochs = 5
layers = OrderedDict()
from scipy.stats import truncnorm
for i in range(num_layers):
if i % 2 == 0:
sd = math.sqrt(1.0 / node_num)
w = get_truncated_normal(shape=(node_num, node_num), mean=0.0, sd=std, low=-std, upp=std)
b = np.zeros(node_num)
layers[i] = Affine(w, b)
else:
layers[i] = Relu()
forward, backward = activation_value_histogram(layers, num_epochs, forward_plt=False, backward_plt=False, describe=False)
forward_dic['lecun_normal'] = forward
backward_dic['lecun_normal'] = backward
node_num = 100 #각 은닉층의 노드 수
num_layers = 6 # 3개의 은닉층 + 3개의 활성화층
num_epochs = 5
layers = OrderedDict()
from scipy.stats import truncnorm
for i in range(num_layers):
if i % 2 == 0:
sd = math.sqrt(1.0 / node_num)
w = np.random.uniform(low=-sd, high=sd, size=(node_num, node_num))
b = np.zeros(node_num)
layers[i] = Affine(w, b)
else:
layers[i] = Relu()
forward, backward = activation_value_histogram(layers, num_epochs, forward_plt=False, backward_plt=False, describe=False)
forward_dic['lecun_uniform'] = forward
backward_dic['lecun_uniform'] = backward
node_num = 100 #각 은닉층의 노드 수
num_layers = 6 # 3개의 은닉층 + 3개의 활성화층
num_epochs = 5
layers = OrderedDict()
from scipy.stats import truncnorm
for i in range(num_layers):
if i % 2 == 0:
sd = math.sqrt(1.0 / (node_num + node_num))
w = get_truncated_normal(shape=(node_num, node_num), mean=0.0, sd=std, low=-std, upp=std)
b = np.zeros(node_num)
layers[i] = Affine(w, b)
else:
layers[i] = Relu()
forward, backward = activation_value_histogram(layers, num_epochs, forward_plt=False, backward_plt=False, describe=False)
forward_dic['xavier_normal'] = forward
backward_dic['xavier_normal'] = backward
node_num = 100 #각 은닉층의 노드 수
num_layers = 6 # 3개의 은닉층 + 3개의 활성화층
num_epochs = 5
layers = OrderedDict()
from scipy.stats import truncnorm
for i in range(num_layers):
if i % 2 == 0:
sd = math.sqrt(1.0 / (node_num + node_num))
w = np.random.uniform(low=-sd, high=sd, size=(node_num, node_num))
b = np.zeros(node_num)
layers[i] = Affine(w, b)
else:
layers[i] = Relu()
forward, backward = activation_value_histogram(layers, num_epochs, forward_plt=False, backward_plt=False, describe=False)
forward_dic['xavier_uniform'] = forward
backward_dic['xavier_uniform'] = backward
node_num = 100 #각 은닉층의 노드 수
num_layers = 6 # 3개의 은닉층 + 3개의 활성화층
num_epochs = 5
layers = OrderedDict()
from scipy.stats import truncnorm
for i in range(num_layers):
if i % 2 == 0:
sd = math.sqrt(2.0 / (node_num + node_num))
w = get_truncated_normal(shape=(node_num, node_num), mean=0.0, sd=std, low=-std, upp=std)
b = np.zeros(node_num)
layers[i] = Affine(w, b)
else:
layers[i] = Relu()
forward, backward = activation_value_histogram(layers, num_epochs, forward_plt=False, backward_plt=False, describe=False)
forward_dic['he_normal'] = forward
backward_dic['he_normal'] = backward
node_num = 100 #각 은닉층의 노드 수
num_layers = 6 # 3개의 은닉층 + 3개의 활성화층
num_epochs = 5
layers = OrderedDict()
from scipy.stats import truncnorm
for i in range(num_layers):
if i % 2 == 0:
sd = math.sqrt(2.0 / (node_num + node_num))
w = np.random.uniform(low=-sd, high=sd, size=(node_num, node_num))
b = np.zeros(node_num)
layers[i] = Affine(w, b)
else:
layers[i] = Relu()
forward, backward = activation_value_histogram(layers, num_epochs, forward_plt=False, backward_plt=False, describe=False)
forward_dic['he_uniform'] = forward
backward_dic['he_uniform'] = backward
%matplotlib inline
epoch_list = np.arange(num_epochs)
f, axarr = plt.subplots(1, 6, figsize=(30,10))
for layer in range(6):
for key, forward in forward_dic.items():
if key not in ("zero", "normal1"):
mean_per_layer = []
for epoch in range(num_epochs):
mean_per_layer.append(forward[epoch][layer][0])
axarr[layer].plot(epoch_list, mean_per_layer, marker=markers[key], markevery=2, label=key)
axarr[layer].set_ylabel('Mean')
axarr[layer].set_xlabel('Epochs')
axarr[layer].grid(True)
axarr[layer].set_title('Mean: layer' + str(layer))
axarr[layer].legend(loc='upper left')
f.subplots_adjust(hspace=0.7)
plt.show()
epoch_list = np.arange(num_epochs)
f, axarr = plt.subplots(2, 6, figsize=(30, 20))
for layer in range(6):
for key, forward in forward_dic.items():
if key not in ("zero", "normal1"):
std_per_layer = []
for epoch in range(num_epochs):
std_per_layer.append(forward[epoch][layer][1])
axarr[0, layer].plot(epoch_list, std_per_layer, marker=markers[key], markevery=2, label=key)
axarr[0, layer].set_ylabel('STD.')
axarr[0, layer].set_xlabel('Epochs')
axarr[0, layer].grid(True)
axarr[0, layer].set_title('STD: layer' + str(layer))
axarr[0, layer].legend(loc='upper left')
for key, forward in forward_dic.items():
if key not in ("zero", "normal1"):
std_per_layer = []
for epoch in range(num_epochs):
std_per_layer.append(forward[epoch][layer][1])
axarr[1, layer].plot(epoch_list, std_per_layer, marker=markers[key], markevery=2, label=key)
axarr[1, layer].set_ylabel('STD.')
axarr[1, layer].set_xlabel('Epochs')
axarr[1, layer].grid(True)
if layer == 4:
axarr[1, layer].set_ylim(0.0, 10.0)
else:
axarr[1, layer].set_ylim(0.0, 0.5)
axarr[1, layer].set_title('STD: layer' + str(layer))
axarr[1, layer].legend(loc='upper left')
plt.show()