이 노트북을 주피터 노트북 뷰어(nbviewer.jupyter.org)로 보거나 구글 코랩(colab.research.google.com)에서 실행할 수 있습니다.
주피터 노트북 뷰어로 보기 | 구글 코랩(Colab)에서 실행하기 |
이 노트북을 실행하려면 텐서플로 2.0.0-alpha0 버전 이상이 필요합니다.
import numpy as np
class MultiClassNetwork:
def __init__(self, units=10, batch_size=32, learning_rate=0.1, l1=0, l2=0):
self.units = units # 은닉층의 뉴런 개수
self.batch_size = batch_size # 배치 크기
self.w1 = None # 은닉층의 가중치
self.b1 = None # 은닉층의 절편
self.w2 = None # 출력층의 가중치
self.b2 = None # 출력층의 절편
self.a1 = None # 은닉층의 활성화 출력
self.losses = [] # 훈련 손실
self.val_losses = [] # 검증 손실
self.lr = learning_rate # 학습률
self.l1 = l1 # L1 손실 하이퍼파라미터
self.l2 = l2 # L2 손실 하이퍼파라미터
def forpass(self, x):
z1 = np.dot(x, self.w1) + self.b1 # 첫 번째 층의 선형 식을 계산합니다
self.a1 = self.sigmoid(z1) # 활성화 함수를 적용합니다
z2 = np.dot(self.a1, self.w2) + self.b2 # 두 번째 층의 선형 식을 계산합니다.
return z2
def backprop(self, x, err):
m = len(x) # 샘플 개수
# 출력층의 가중치와 절편에 대한 그래디언트를 계산합니다.
w2_grad = np.dot(self.a1.T, err) / m
b2_grad = np.sum(err) / m
# 시그모이드 함수까지 그래디언트를 계산합니다.
err_to_hidden = np.dot(err, self.w2.T) * self.a1 * (1 - self.a1)
# 은닉층의 가중치와 절편에 대한 그래디언트를 계산합니다.
w1_grad = np.dot(x.T, err_to_hidden) / m
b1_grad = np.sum(err_to_hidden, axis=0) / m
return w1_grad, b1_grad, w2_grad, b2_grad
def sigmoid(self, z):
z = np.clip(z, -100, None) # 안전한 np.exp() 계산을 위해
a = 1 / (1 + np.exp(-z)) # 시그모이드 계산
return a
def softmax(self, z):
# 소프트맥스 함수
z = np.clip(z, -100, None) # 안전한 np.exp() 계산을 위해
exp_z = np.exp(z)
return exp_z / np.sum(exp_z, axis=1).reshape(-1, 1)
def init_weights(self, n_features, n_classes):
self.w1 = np.random.normal(0, 1,
(n_features, self.units)) # (특성 개수, 은닉층의 크기)
self.b1 = np.zeros(self.units) # 은닉층의 크기
self.w2 = np.random.normal(0, 1,
(self.units, n_classes)) # (은닉층의 크기, 클래스 개수)
self.b2 = np.zeros(n_classes)
def fit(self, x, y, epochs=100, x_val=None, y_val=None):
np.random.seed(42)
self.init_weights(x.shape[1], y.shape[1]) # 은닉층과 출력층의 가중치를 초기화합니다.
# epochs만큼 반복합니다.
for i in range(epochs):
loss = 0
print('.', end='')
# 제너레이터 함수에서 반환한 미니배치를 순환합니다.
for x_batch, y_batch in self.gen_batch(x, y):
a = self.training(x_batch, y_batch)
# 안전한 로그 계산을 위해 클리핑합니다.
a = np.clip(a, 1e-10, 1-1e-10)
# 로그 손실과 규제 손실을 더하여 리스트에 추가합니다.
loss += np.sum(-y_batch*np.log(a))
self.losses.append((loss + self.reg_loss()) / len(x))
# 검증 세트에 대한 손실을 계산합니다.
self.update_val_loss(x_val, y_val)
# 미니배치 제너레이터 함수
def gen_batch(self, x, y):
length = len(x)
bins = length // self.batch_size # 미니배치 횟수
if length % self.batch_size:
bins += 1 # 나누어 떨어지지 않을 때
indexes = np.random.permutation(np.arange(len(x))) # 인덱스를 섞습니다.
x = x[indexes]
y = y[indexes]
for i in range(bins):
start = self.batch_size * i
end = self.batch_size * (i + 1)
yield x[start:end], y[start:end] # batch_size만큼 슬라이싱하여 반환합니다.
def training(self, x, y):
m = len(x) # 샘플 개수를 저장합니다.
z = self.forpass(x) # 정방향 계산을 수행합니다.
a = self.softmax(z) # 활성화 함수를 적용합니다.
err = -(y - a) # 오차를 계산합니다.
# 오차를 역전파하여 그래디언트를 계산합니다.
w1_grad, b1_grad, w2_grad, b2_grad = self.backprop(x, err)
# 그래디언트에서 페널티 항의 미분 값을 뺍니다
w1_grad += (self.l1 * np.sign(self.w1) + self.l2 * self.w1) / m
w2_grad += (self.l1 * np.sign(self.w2) + self.l2 * self.w2) / m
# 은닉층의 가중치와 절편을 업데이트합니다.
self.w1 -= self.lr * w1_grad
self.b1 -= self.lr * b1_grad
# 출력층의 가중치와 절편을 업데이트합니다.
self.w2 -= self.lr * w2_grad
self.b2 -= self.lr * b2_grad
return a
def predict(self, x):
z = self.forpass(x) # 정방향 계산을 수행합니다.
return np.argmax(z, axis=1) # 가장 큰 값의 인덱스를 반환합니다.
def score(self, x, y):
# 예측과 타깃 열 벡터를 비교하여 True의 비율을 반환합니다.
return np.mean(self.predict(x) == np.argmax(y, axis=1))
def reg_loss(self):
# 은닉층과 출력층의 가중치에 규제를 적용합니다.
return self.l1 * (np.sum(np.abs(self.w1)) + np.sum(np.abs(self.w2))) + \
self.l2 / 2 * (np.sum(self.w1**2) + np.sum(self.w2**2))
def update_val_loss(self, x_val, y_val):
z = self.forpass(x_val) # 정방향 계산을 수행합니다.
a = self.softmax(z) # 활성화 함수를 적용합니다.
a = np.clip(a, 1e-10, 1-1e-10) # 출력 값을 클리핑합니다.
# 크로스 엔트로피 손실과 규제 손실을 더하여 리스트에 추가합니다.
val_loss = np.sum(-y_val*np.log(a))
self.val_losses.append((val_loss + self.reg_loss()) / len(y_val))
import tensorflow as tf
tf.__version__
'2.6.0'
(x_train_all, y_train_all), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz 32768/29515 [=================================] - 0s 0us/step 40960/29515 [=========================================] - 0s 0us/step Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz 26427392/26421880 [==============================] - 0s 0us/step 26435584/26421880 [==============================] - 0s 0us/step Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz 16384/5148 [===============================================================================================] - 0s 0us/step Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz 4423680/4422102 [==============================] - 0s 0us/step 4431872/4422102 [==============================] - 0s 0us/step
print(x_train_all.shape, y_train_all.shape)
(60000, 28, 28) (60000,)
import matplotlib.pyplot as plt
plt.imshow(x_train_all[0], cmap='gray')
plt.show()
print(y_train_all[:10])
[9 0 0 3 0 2 7 2 5 5]
class_names = ['티셔츠/윗도리', '바지', '스웨터', '드레스', '코트',
'샌들', '셔츠', '스니커즈', '가방', '앵클부츠']
print(class_names[y_train_all[0]])
앵클부츠
np.bincount(y_train_all)
array([6000, 6000, 6000, 6000, 6000, 6000, 6000, 6000, 6000, 6000])
from sklearn.model_selection import train_test_split
x_train, x_val, y_train, y_val = train_test_split(x_train_all, y_train_all, stratify=y_train_all,
test_size=0.2, random_state=42)
np.bincount(y_train)
array([4800, 4800, 4800, 4800, 4800, 4800, 4800, 4800, 4800, 4800])
np.bincount(y_val)
array([1200, 1200, 1200, 1200, 1200, 1200, 1200, 1200, 1200, 1200])
x_train = x_train / 255
x_val = x_val / 255
x_train = x_train.reshape(-1, 784)
x_val = x_val.reshape(-1, 784)
print(x_train.shape, x_val.shape)
(48000, 784) (12000, 784)
from sklearn.preprocessing import LabelBinarizer
lb = LabelBinarizer()
lb.fit_transform([0, 1, 3, 1])
array([[1, 0, 0], [0, 1, 0], [0, 0, 1], [0, 1, 0]])
tf.keras.utils.to_categorical([0, 1, 3])
array([[1., 0., 0., 0.], [0., 1., 0., 0.], [0., 0., 0., 1.]], dtype=float32)
y_train_encoded = tf.keras.utils.to_categorical(y_train)
y_val_encoded = tf.keras.utils.to_categorical(y_val)
print(y_train_encoded.shape, y_val_encoded.shape)
(48000, 10) (12000, 10)
print(y_train[0], y_train_encoded[0])
6 [0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
fc = MultiClassNetwork(units=100, batch_size=256)
fc.fit(x_train, y_train_encoded,
x_val=x_val, y_val=y_val_encoded, epochs=40)
........................................
plt.plot(fc.losses)
plt.plot(fc.val_losses)
plt.ylabel('loss')
plt.xlabel('iteration')
plt.legend(['train_loss', 'val_loss'])
plt.show()
fc.score(x_val, y_val_encoded)
0.8150833333333334
np.random.permutation(np.arange(12000)%10)
array([4, 6, 3, ..., 0, 6, 6])
np.sum(y_val == np.random.permutation(np.arange(12000)%10)) / 12000
0.10325
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
model = Sequential()
model.add(Dense(100, activation='sigmoid', input_shape=(784,)))
model.add(Dense(10, activation='softmax'))
model.compile(optimizer='sgd', loss='categorical_crossentropy',
metrics=['accuracy'])
history = model.fit(x_train, y_train_encoded, epochs=40,
validation_data=(x_val, y_val_encoded))
Epoch 1/40 1500/1500 [==============================] - 7s 3ms/step - loss: 1.3560 - accuracy: 0.6576 - val_loss: 0.9579 - val_accuracy: 0.7210 Epoch 2/40 1500/1500 [==============================] - 5s 3ms/step - loss: 0.8419 - accuracy: 0.7402 - val_loss: 0.7535 - val_accuracy: 0.7562 Epoch 3/40 1500/1500 [==============================] - 5s 3ms/step - loss: 0.7128 - accuracy: 0.7647 - val_loss: 0.6663 - val_accuracy: 0.7745 Epoch 4/40 1500/1500 [==============================] - 4s 3ms/step - loss: 0.6472 - accuracy: 0.7816 - val_loss: 0.6151 - val_accuracy: 0.7938 Epoch 5/40 1500/1500 [==============================] - 5s 3ms/step - loss: 0.6047 - accuracy: 0.7940 - val_loss: 0.5774 - val_accuracy: 0.8018 Epoch 6/40 1500/1500 [==============================] - 5s 3ms/step - loss: 0.5741 - accuracy: 0.8038 - val_loss: 0.5517 - val_accuracy: 0.8105 Epoch 7/40 1500/1500 [==============================] - 4s 3ms/step - loss: 0.5511 - accuracy: 0.8104 - val_loss: 0.5309 - val_accuracy: 0.8168 Epoch 8/40 1500/1500 [==============================] - 4s 3ms/step - loss: 0.5326 - accuracy: 0.8176 - val_loss: 0.5145 - val_accuracy: 0.8215 Epoch 9/40 1500/1500 [==============================] - 4s 3ms/step - loss: 0.5178 - accuracy: 0.8213 - val_loss: 0.4999 - val_accuracy: 0.8264 Epoch 10/40 1500/1500 [==============================] - 5s 3ms/step - loss: 0.5055 - accuracy: 0.8254 - val_loss: 0.4889 - val_accuracy: 0.8300 Epoch 11/40 1500/1500 [==============================] - 4s 3ms/step - loss: 0.4948 - accuracy: 0.8285 - val_loss: 0.4803 - val_accuracy: 0.8313 Epoch 12/40 1500/1500 [==============================] - 4s 3ms/step - loss: 0.4859 - accuracy: 0.8318 - val_loss: 0.4718 - val_accuracy: 0.8350 Epoch 13/40 1500/1500 [==============================] - 5s 3ms/step - loss: 0.4781 - accuracy: 0.8335 - val_loss: 0.4642 - val_accuracy: 0.8380 Epoch 14/40 1500/1500 [==============================] - 4s 3ms/step - loss: 0.4710 - accuracy: 0.8371 - val_loss: 0.4568 - val_accuracy: 0.8394 Epoch 15/40 1500/1500 [==============================] - 4s 3ms/step - loss: 0.4645 - accuracy: 0.8372 - val_loss: 0.4514 - val_accuracy: 0.8403 Epoch 16/40 1500/1500 [==============================] - 4s 3ms/step - loss: 0.4588 - accuracy: 0.8405 - val_loss: 0.4482 - val_accuracy: 0.8427 Epoch 17/40 1500/1500 [==============================] - 4s 3ms/step - loss: 0.4536 - accuracy: 0.8420 - val_loss: 0.4415 - val_accuracy: 0.8437 Epoch 18/40 1500/1500 [==============================] - 4s 3ms/step - loss: 0.4486 - accuracy: 0.8429 - val_loss: 0.4370 - val_accuracy: 0.8455 Epoch 19/40 1500/1500 [==============================] - 5s 3ms/step - loss: 0.4440 - accuracy: 0.8453 - val_loss: 0.4333 - val_accuracy: 0.8462 Epoch 20/40 1500/1500 [==============================] - 4s 3ms/step - loss: 0.4399 - accuracy: 0.8458 - val_loss: 0.4291 - val_accuracy: 0.8478 Epoch 21/40 1500/1500 [==============================] - 5s 3ms/step - loss: 0.4359 - accuracy: 0.8482 - val_loss: 0.4257 - val_accuracy: 0.8514 Epoch 22/40 1500/1500 [==============================] - 4s 3ms/step - loss: 0.4322 - accuracy: 0.8485 - val_loss: 0.4218 - val_accuracy: 0.8511 Epoch 23/40 1500/1500 [==============================] - 5s 3ms/step - loss: 0.4289 - accuracy: 0.8505 - val_loss: 0.4211 - val_accuracy: 0.8526 Epoch 24/40 1500/1500 [==============================] - 4s 3ms/step - loss: 0.4257 - accuracy: 0.8511 - val_loss: 0.4169 - val_accuracy: 0.8537 Epoch 25/40 1500/1500 [==============================] - 4s 3ms/step - loss: 0.4224 - accuracy: 0.8522 - val_loss: 0.4151 - val_accuracy: 0.8543 Epoch 26/40 1500/1500 [==============================] - 5s 3ms/step - loss: 0.4196 - accuracy: 0.8532 - val_loss: 0.4116 - val_accuracy: 0.8561 Epoch 27/40 1500/1500 [==============================] - 4s 3ms/step - loss: 0.4166 - accuracy: 0.8543 - val_loss: 0.4083 - val_accuracy: 0.8568 Epoch 28/40 1500/1500 [==============================] - 4s 3ms/step - loss: 0.4137 - accuracy: 0.8544 - val_loss: 0.4067 - val_accuracy: 0.8584 Epoch 29/40 1500/1500 [==============================] - 5s 3ms/step - loss: 0.4115 - accuracy: 0.8557 - val_loss: 0.4051 - val_accuracy: 0.8593 Epoch 30/40 1500/1500 [==============================] - 4s 3ms/step - loss: 0.4090 - accuracy: 0.8571 - val_loss: 0.4021 - val_accuracy: 0.8583 Epoch 31/40 1500/1500 [==============================] - 5s 3ms/step - loss: 0.4065 - accuracy: 0.8576 - val_loss: 0.4005 - val_accuracy: 0.8602 Epoch 32/40 1500/1500 [==============================] - 4s 3ms/step - loss: 0.4042 - accuracy: 0.8585 - val_loss: 0.3994 - val_accuracy: 0.8603 Epoch 33/40 1500/1500 [==============================] - 4s 3ms/step - loss: 0.4021 - accuracy: 0.8579 - val_loss: 0.3973 - val_accuracy: 0.8613 Epoch 34/40 1500/1500 [==============================] - 5s 3ms/step - loss: 0.3999 - accuracy: 0.8590 - val_loss: 0.3959 - val_accuracy: 0.8610 Epoch 35/40 1500/1500 [==============================] - 5s 3ms/step - loss: 0.3980 - accuracy: 0.8602 - val_loss: 0.3953 - val_accuracy: 0.8609 Epoch 36/40 1500/1500 [==============================] - 4s 3ms/step - loss: 0.3960 - accuracy: 0.8602 - val_loss: 0.3920 - val_accuracy: 0.8627 Epoch 37/40 1500/1500 [==============================] - 5s 3ms/step - loss: 0.3940 - accuracy: 0.8607 - val_loss: 0.3927 - val_accuracy: 0.8612 Epoch 38/40 1500/1500 [==============================] - 5s 3ms/step - loss: 0.3921 - accuracy: 0.8617 - val_loss: 0.3911 - val_accuracy: 0.8624 Epoch 39/40 1500/1500 [==============================] - 5s 3ms/step - loss: 0.3903 - accuracy: 0.8619 - val_loss: 0.3880 - val_accuracy: 0.8635 Epoch 40/40 1500/1500 [==============================] - 5s 3ms/step - loss: 0.3886 - accuracy: 0.8624 - val_loss: 0.3865 - val_accuracy: 0.8630
print(history.history.keys())
dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy'])
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train_loss', 'val_loss'])
plt.show()
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train_accuracy', 'val_accuracy'])
plt.show()
loss, accuracy = model.evaluate(x_val, y_val_encoded, verbose=0)
print(accuracy)
0.8629999756813049