CNN 的創始者 Yann LeCun 第一個 CNN 例子就是用來辨識手寫、印刷的 0-9 數字。這裡用 LeCun 原版資料:
每個手寫圖檔都是 28x28 大小的, 輸出原本是 0-9 的數字, 但我們想改成辨視資料是 0 就要輸出
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
是 3 就是
[0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
等等。
另外我們用最傳統的 Fully Connected Feedforward 架構, 而不是用 CNN:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from keras.datasets import mnist
Using Theano backend.
其中的 keras
是我們選用非常方便的 deep learning 套件, 它甚至幫我們準備好了 MNIST 等這類有名的資料庫。
(x0_train, y0_train), (x0_test, y0_test) = mnist.load_data()
最親切的是它還分好訓練資料和測試資料, 我們來看看各有多少筆資料。
print("訓練資料筆數: ", len(x0_train))
print("測試資料筆數: ", len(x0_test))
訓練資料筆數: 60000 測試資料筆數: 10000
我們來看看資料的內容長什麼樣子。
ind = 777
X = x0_train[ind]
X
array([[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 59, 156, 239, 254, 254, 254, 255, 246, 109, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 147, 220, 253, 254, 253, 253, 253, 245, 254, 253, 129, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 31, 138, 211, 253, 253, 253, 254, 216, 144, 78, 48, 101, 92, 24, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 22, 181, 254, 253, 253, 216, 138, 57, 10, 0, 0, 0, 0, 31, 0, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 115, 253, 254, 245, 126, 5, 0, 0, 0, 0, 0, 0, 121, 239, 135, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 115, 254, 255, 246, 71, 0, 0, 0, 0, 0, 85, 254, 255, 249, 60, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 114, 254, 253, 241, 69, 0, 0, 10, 118, 250, 253, 249, 65, 0, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 119, 253, 253, 241, 101, 138, 211, 253, 253, 222, 60, 0, 0, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 149, 253, 253, 253, 254, 253, 240, 198, 34, 0, 0, 0, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58, 253, 253, 253, 254, 207, 70, 0, 0, 0, 0, 0, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 59, 239, 254, 254, 254, 255, 39, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 95, 239, 253, 247, 236, 253, 254, 122, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 107, 253, 254, 247, 84, 40, 253, 254, 135, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0, 0, 0, 58, 237, 253, 207, 32, 0, 24, 230, 254, 135, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0, 0, 0, 175, 253, 253, 60, 0, 0, 152, 253, 254, 135, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0, 0, 20, 254, 254, 155, 0, 0, 49, 246, 254, 241, 68, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0, 0, 20, 253, 253, 162, 13, 73, 226, 253, 253, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0, 0, 14, 229, 253, 253, 224, 253, 253, 253, 185, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0, 0, 0, 81, 227, 253, 254, 253, 253, 201, 34, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 47, 155, 216, 245, 126, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=uint8)
plt.imshow(X, cmap='Greys')
<matplotlib.image.AxesImage at 0x108edb048>
如果堅持不要外框。
fig, ax = plt.subplots()
ax.set_axis_off()
plt.imshow(X, cmap='Greys')
<matplotlib.image.AxesImage at 0x10ac13cf8>
看一下「正確答案」。
y0_train[ind]
8
from ipywidgets import interact
def show(ind):
print("數字: ", y0_train[ind])
fig, ax = plt.subplots()
ax.set_axis_off()
plt.imshow(x0_train[ind], cmap='Greys')
interact(show, ind=(0,59999))
數字: 7
現在每個訓練資料都是 28x28 的矩陣, 但我們常常都是「拉平」的向量, 也就是每筆輸入可能是 784 (=28x28) 的向量。
x_train = x0_train.reshape(60000, 28*28)
x_train[0]
array([ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 18, 18, 18, 126, 136, 175, 26, 166, 255, 247, 127, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30, 36, 94, 154, 170, 253, 253, 253, 253, 253, 225, 172, 253, 242, 195, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 49, 238, 253, 253, 253, 253, 253, 253, 253, 253, 251, 93, 82, 82, 56, 39, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 219, 253, 253, 253, 253, 253, 198, 182, 247, 241, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 80, 156, 107, 253, 253, 205, 11, 0, 43, 154, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 1, 154, 253, 90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 139, 253, 190, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 190, 253, 70, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 35, 241, 225, 160, 108, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 81, 240, 253, 253, 119, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 45, 186, 253, 253, 150, 27, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 93, 252, 253, 187, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 249, 253, 249, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 46, 130, 183, 253, 253, 207, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39, 148, 229, 253, 253, 253, 250, 182, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 114, 221, 253, 253, 253, 253, 201, 78, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 66, 213, 253, 253, 253, 253, 198, 81, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 171, 219, 253, 253, 253, 253, 195, 80, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 172, 226, 253, 253, 253, 253, 244, 133, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 136, 253, 253, 253, 212, 135, 132, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=uint8)
x0_test
也改成「平的」向量¶輸出原本是 0-9 的數字, 但我們想改成辨視資料是 0 就要輸出
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
是 3 就是
[0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
等等。
y0_train[0]
5
from keras.utils import np_utils
y_train = np_utils.to_categorical(y0_train,10)
y_train[0]
array([ 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.])
ind = 10
print("改變前: ", y0_train[ind])
print("改變後: ", y_train[ind])
改變前: 3 改變後: [ 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
y0_test
也修改過來¶