%matplotlib inline
import gluonbook as gb
from mxnet.gluon import data as gdata
import sys
import time
mnist_train = gdata.vision.FashionMNIST(train=True)
mnist_test = gdata.vision.FashionMNIST(train=False)
len(mnist_train), len(mnist_test)
(60000, 10000)
feature, label = mnist_train[0]
print(feature.shape, feature.dtype)
print(label, label.shape, label.dtype)
(28, 28, 1) <class 'numpy.uint8'> 2 () int32
def get_fashion_mnist_labels(labels):
text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat',
'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
return [text_labels[int(i)] for i in labels]
def show_fashion_mnist(images, labels):
gb.use_svg_display()
_, figs = gb.plt.subplots(1, len(images), figsize=(12, 12))
for f, img, lbl in zip(figs, images, labels):
f.imshow(img.reshape((28, 28)).asnumpy())
f.set_title(lbl)
f.axes.get_xaxis().set_visible(False)
f.axes.get_yaxis().set_visible(False)
X, y = mnist_train[0:9]
show_fashion_mnist(X, get_fashion_mnist_labels(y))
DataLoader
is the ability to use multiple processes to speed up data reading.num_workers
)ToTensor
classtransform_first
function of the data set, we apply the transformation of ToTensor
to the first element of each data examplebatch_size = 256
transformer = gdata.vision.transforms.ToTensor()
if sys.platform.startswith('win'):
num_workers = 0 # 0 means no additional processes are needed to speed up the reading of data.
else:
num_workers = 4
train_iter = gdata.DataLoader(
dataset=mnist_train.transform_first(transformer),
batch_size=batch_size,
shuffle=True,
num_workers=num_workers
)
test_iter = gdata.DataLoader(
dataset=mnist_test.transform_first(transformer),
batch_size=batch_size,
shuffle=False,
num_workers=num_workers
)
gluonbook.load_data_fashion_mnist
functiontrain_iter
and test_iter
start = time.time()
idx = 0
for X, y in train_iter:
if idx < 5:
print(type(X), X.shape, type(y), y.shape)
idx += 1
'%.2f sec' % (time.time() - start)
<class 'mxnet.ndarray.ndarray.NDArray'> (256, 1, 28, 28) <class 'mxnet.ndarray.ndarray.NDArray'> (256,) <class 'mxnet.ndarray.ndarray.NDArray'> (256, 1, 28, 28) <class 'mxnet.ndarray.ndarray.NDArray'> (256,) <class 'mxnet.ndarray.ndarray.NDArray'> (256, 1, 28, 28) <class 'mxnet.ndarray.ndarray.NDArray'> (256,) <class 'mxnet.ndarray.ndarray.NDArray'> (256, 1, 28, 28) <class 'mxnet.ndarray.ndarray.NDArray'> (256,) <class 'mxnet.ndarray.ndarray.NDArray'> (256, 1, 28, 28) <class 'mxnet.ndarray.ndarray.NDArray'> (256,)
'1.12 sec'
%matplotlib inline
import gluonbook as gb
from mxnet import autograd, nd
batch_size = 256
train_iter, test_iter = gb.load_data_fashion_mnist(batch_size)
num_inputs = 784
num_outputs = 10
W = nd.random.normal(scale=0.01, shape=(num_inputs, num_outputs))
b = nd.zeros(num_outputs)
W.attach_grad()
b.attach_grad()
sum
..keepdims=True
).keepdims=False
X = nd.array([[1, 2, 3], [4, 5, 6]])
print(X.sum(axis=0, keepdims=True))
print(X.sum(axis=0, keepdims=False))
print(X.sum(axis=1, keepdims=True))
print(X.sum(axis=1, keepdims=False))
[[5. 7. 9.]] <NDArray 1x3 @cpu(0)> [5. 7. 9.] <NDArray 3 @cpu(0)> [[ 6.] [15.]] <NDArray 2x1 @cpu(0)> [ 6. 15.] <NDArray 2 @cpu(0)>
def softmax(X):
X_exp = X.exp()
partition = X_exp.sum(axis=1, keepdims=True)
return X_exp / partition # The broadcast mechanism is applied here.
X = nd.random.normal(shape=(2, 5))
print(X)
X_prob = softmax(X)
print(X_prob)
print(X_prob.sum(axis=1))
[[-0.39319903 -0.01926271 2.0030491 -0.2795944 0.08167122] [-0.9285665 -0.59975994 -0.55876654 -1.2474236 0.38869882]] <NDArray 2x5 @cpu(0)> [[0.06186795 0.08992165 0.6794275 0.06931123 0.09947164] [0.12052648 0.16744854 0.17445546 0.08762027 0.44994926]] <NDArray 2x5 @cpu(0)> [1. 1.] <NDArray 2 @cpu(0)>
def net(X):
return softmax(nd.dot(X.reshape((-1, num_inputs)), W) + b)
nd.pick
functiony_hat = nd.array([[0.1, 0.3, 0.6], [0.3, 0.2, 0.5]])
y = nd.array([0, 2])
nd.pick(y_hat, y)
[0.1 0.5] <NDArray 2 @cpu(0)>
def cross_entropy(y_hat, y):
return - nd.pick(y_hat, y).log()
def accuracy(y_hat, y):
return (y_hat.argmax(axis=1) == y.astype('float32')).mean().asscalar()
accuracy(y_hat, y)
0.5
def evaluate_accuracy(data_iter, net):
acc = 0
for X, y in data_iter:
acc += accuracy(net(X), y)
return acc / len(data_iter)
evaluate_accuracy(test_iter, net)
0.087109375
num_epochs, lr = 5, 0.1
def train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size,
params=None, lr=None, trainer=None):
for epoch in range(num_epochs):
train_l_sum = 0
train_acc_sum = 0
for X, y in train_iter:
with autograd.record():
y_hat = net(X)
l = loss(y_hat, y)
l.backward()
if trainer is None:
gb.sgd(params, lr, batch_size)
else:
trainer.step(batch_size) # This will be illustrated in the next section.
train_l_sum += l.mean().asscalar()
train_acc_sum += accuracy(y_hat, y)
test_acc = evaluate_accuracy(test_iter, net)
print('epoch {0}, loss {1:.4f}, train acc {2:.3f}, test acc {3:.3f}'.format(
epoch + 1, train_l_sum / len(train_iter),
train_acc_sum / len(train_iter), test_acc)
)
train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, batch_size, [W, b], lr, None)
epoch 1, loss 0.7879, train acc 0.748, test acc 0.802 epoch 2, loss 0.5732, train acc 0.812, test acc 0.824 epoch 3, loss 0.5293, train acc 0.823, test acc 0.827 epoch 4, loss 0.5048, train acc 0.831, test acc 0.837 epoch 5, loss 0.4884, train acc 0.836, test acc 0.840
for X, y in test_iter:
break
true_labels = gb.get_fashion_mnist_labels(y.asnumpy())
pred_labels = gb.get_fashion_mnist_labels(net(X).argmax(axis=1).asnumpy())
titles = [truelabel + '\n' + predlabel for truelabel, predlabel in zip(true_labels, pred_labels)]
gb.show_fashion_mnist(X[0:9], titles[0:9])
%matplotlib inline
import gluonbook as gb
from mxnet import gluon, init
from mxnet.gluon import loss as gloss, nn
batch_size = 256
train_iter, test_iter = gb.load_data_fashion_mnist(batch_size)
net = nn.Sequential()
net.add(nn.Dense(10))
net.initialize(init.Normal(sigma=0.01))
y_linear
variablewhere $$z^* = max \left\{ z_1, z_2, ..., z_j \right\} $$
loss = gloss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1})
num_epochs = 5
gb.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, trainer)
epoch 1, loss 0.7903, train acc 0.746, test acc 0.803 epoch 2, loss 0.5733, train acc 0.811, test acc 0.823 epoch 3, loss 0.5285, train acc 0.824, test acc 0.835 epoch 4, loss 0.5048, train acc 0.830, test acc 0.836 epoch 5, loss 0.4898, train acc 0.834, test acc 0.840