import gluonbook as gb
from mxnet import autograd, gluon, init, nd
from mxnet.gluon import nn
def batch_norm(X, gamma, beta, moving_mean, moving_var, eps, momentum):
# Use autograd to determine whether the current mode is training mode or prediction mode.
if not autograd.is_training():
# If it is the prediction mode, directly use the mean and variance obtained
# from the incoming moving average.
X_hat = (X - moving_mean) / nd.sqrt(moving_var + eps)
else:
assert len(X.shape) in (2, 4)
if len(X.shape) == 2:
# When using a fully connected layer, calculate the mean and variance on the feature dimension.
mean = X.mean(axis=0)
var = ((X - mean) ** 2).mean(axis=0)
else:
# When using a two-dimensional convolutional layer, calculate the mean
# and variance on the channel dimension (axis=1). Here we need to maintain
# the shape of X, so that the broadcast operation can be carried out later.
mean = X.mean(axis=(0, 2, 3), keepdims=True)
var = ((X - mean) ** 2).mean(axis=(0, 2, 3), keepdims=True)
# In training mode, the current mean and variance are used for the standardization.
X_hat = (X - mean) / nd.sqrt(var + eps)
# Update the mean and variance of the moving average.
moving_mean = momentum * moving_mean + (1.0 - momentum) * mean
moving_var = momentum * moving_var + (1.0 - momentum) * var
Y = gamma * X_hat + beta # Scale and shift.
return Y, moving_mean, moving_var
BatchNorm
retains the scale parameter gamma
and the shift parameter beta
involved in gradient finding and iterationclass BatchNorm(nn.Block):
def __init__(self, num_features, num_dims, **kwargs):
super(BatchNorm, self).__init__(**kwargs)
if num_dims == 2:
shape = (1, num_features)
else:
shape = (1, num_features, 1, 1)
# The scale parameter and the shift parameter involved in gradient finding and iteration are initialized to 0 and 1 respectively.
self.gamma = self.params.get('gamma', shape=shape, init=init.One())
self.beta = self.params.get('beta', shape=shape, init=init.Zero())
# All the variables not involved in gradient finding and iteration are initialized to 0 on the CPU.
self.moving_mean = nd.zeros(shape)
self.moving_var = nd.zeros(shape)
def forward(self, X):
# If X is not on the CPU, copy moving_mean and moving_var to the device where X is located.
if self.moving_mean.context != X.context:
self.moving_mean = self.moving_mean.copyto(X.context)
self.moving_var = self.moving_var.copyto(X.context)
# Save the updated moving_mean and moving_var.
Y, self.moving_mean, self.moving_var = batch_norm(
X,
self.gamma.data(),
self.beta.data(),
self.moving_mean,
self.moving_var,
eps=1e-5,
momentum=0.9
)
return Y
num_features
parameter required by the BatchNorm
instance is the number of outputs for a fully connected layer and the number of output channels for a convolutional layer.num_dims
parameter also required by this instance is 2 for a fully connected layer and 4 for a convolutional layer.net = nn.Sequential()
net.add(
nn.Conv2D(6, kernel_size=5),
BatchNorm(6, num_dims=4),
nn.Activation('sigmoid'),
nn.MaxPool2D(pool_size=2, strides=2),
nn.Conv2D(16, kernel_size=5),
BatchNorm(16, num_dims=4),
nn.Activation('sigmoid'),
nn.MaxPool2D(pool_size=2, strides=2),
nn.Dense(120),
BatchNorm(120, num_dims=2),
nn.Activation('sigmoid'),
nn.Dense(84),
BatchNorm(84, num_dims=2),
nn.Activation('sigmoid'),
nn.Dense(10)
)
lr = 1.0
num_epochs = 5
batch_size = 256
ctx = gb.try_gpu()
net.initialize(ctx=ctx, init=init.Xavier(), force_reinit=True)
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
train_iter, test_iter = gb.load_data_fashion_mnist(batch_size)
gb.train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs)
training on cpu(0) epoch 1, loss 0.6653, train acc 0.761, test acc 0.806, time 142.5 sec epoch 2, loss 0.3917, train acc 0.859, test acc 0.847, time 141.8 sec epoch 3, loss 0.3493, train acc 0.874, test acc 0.854, time 142.9 sec epoch 4, loss 0.3212, train acc 0.884, test acc 0.872, time 138.8 sec epoch 5, loss 0.3033, train acc 0.889, test acc 0.861, time 136.9 sec
net[1].gamma.data().reshape((-1,)), net[1].beta.data().reshape((-1,))
( [1.3520054 1.3801662 1.8764832 1.4937813 0.93755937 1.8829043 ] <NDArray 6 @cpu(0)>, [ 0.6523215 0.2318771 0.2659682 0.7197848 -0.51127845 -2.0419025 ] <NDArray 6 @cpu(0)>)
net = nn.Sequential()
net.add(
nn.Conv2D(6, kernel_size=5),
nn.BatchNorm(),
nn.Activation('sigmoid'),
nn.MaxPool2D(pool_size=2, strides=2),
nn.Conv2D(16, kernel_size=5),
nn.BatchNorm(),
nn.Activation('sigmoid'),
nn.MaxPool2D(pool_size=2, strides=2),
nn.Dense(120),
nn.BatchNorm(),
nn.Activation('sigmoid'),
nn.Dense(84),
nn.BatchNorm(),
nn.Activation('sigmoid'),
nn.Dense(10)
)
lr = 1.0
num_epochs = 5
batch_size = 256
ctx = gb.try_gpu()
net.initialize(ctx=ctx, init=init.Xavier(), force_reinit=True)
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
train_iter, test_iter = gb.load_data_fashion_mnist(batch_size)
gb.train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs)
During model training, batch normalization continuously adjusts the intermediate output of the neural network by utilizing the mean and standard deviation of the mini-batch, so that the values of the intermediate output in each layer throughout the neural network are more stable.
Like a dropout layer, batch normalization layers have different computation results in training mode and prediction mode.
Batch Normalization has many beneficial side effects, primarily that of regularization.
On the other hand, the original motivation of reducing covariate shift seems not to be a valid explanation.
Can we add a new layer to the neural network so that the fully trained model can reduce training errors more effectively?
In practice, however, with the addition of too many layers, training errors increase rather than decrease.
Function Classes
He Kaiming and his colleagues proposed the ResNet.
Papers
He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep residual learning for image recognition. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 770-778). https://arxiv.org/abs/1512.03385
He, K., Zhang, X., Ren, S., & Sun, J. (2016, October). Identity mappings in deep residual networks. In European Conference on Computer Vision (pp. 630-645). Springer, Cham. - https://arxiv.org/abs/1603.05027
It won the ImageNet Visual Recognition Challenge in 2015
It had a profound influence on the design of subsequent deep neural networks.
At the heart of ResNet is the idea that *every additional layer should contain the identity function as one of its elements*.
These considerations are rather profound but they led to a surprisingly simple solution, a *residual block*
import gluonbook as gb
from mxnet import gluon, init, nd
from mxnet.gluon import nn
class Residual(nn.Block): # This class is part of the gluonbook package
def __init__(self, num_channels, use_1x1conv=False, strides=1, **kwargs):
super(Residual, self).__init__(**kwargs)
self.conv1 = nn.Conv2D(num_channels, kernel_size=3, padding=1, strides=strides)
self.conv2 = nn.Conv2D(num_channels, kernel_size=3, padding=1)
self.bn1 = nn.BatchNorm()
self.bn2 = nn.BatchNorm()
if use_1x1conv:
self.conv3 = nn.Conv2D(num_channels, kernel_size=1, strides=strides)
else:
self.conv3 = None
def forward(self, X):
Y = nd.relu(self.bn1(self.conv1(X)))
Y = self.bn2(self.conv2(Y))
if self.conv3:
X = self.conv3(X)
return nd.relu(Y + X)
use_1x1conv=True
, adjust channels and resolution by means of a $1 \times 1$ convolution before adding.blk = Residual(num_channels=3)
blk.initialize()
X = nd.random.uniform(shape=(4, 3, 6, 6))
blk(X).shape
(4, 3, 6, 6)
blk = Residual(num_channels=6, use_1x1conv=True, strides=2)
blk.initialize()
X = nd.random.uniform(shape=(4, 3, 6, 6))
blk(X).shape
(4, 6, 3, 3)
net = nn.Sequential()
net.add(
nn.Conv2D(64, kernel_size=7, strides=2, padding=3),
nn.BatchNorm(),
nn.Activation('relu'),
nn.MaxPool2D(pool_size=3, strides=2, padding=1)
)
X = nd.random.uniform(shape=(1, 1, 28, 28))
net.initialize()
for layer in net:
X = layer(X)
print(layer.name, 'output shape:\t', X.shape)
conv52 output shape: (1, 64, 14, 14) batchnorm45 output shape: (1, 64, 14, 14) relu1 output shape: (1, 64, 14, 14) pool4 output shape: (1, 64, 7, 7)
net.add(
#Since a maximum pooling layer with a stride of 2 has already been used,
#it is not necessary to reduce the height and width at the first residual block.
Residual(num_channels=64),
Residual(num_channels=64),
Residual(num_channels=64),
Residual(num_channels=128, use_1x1conv=True, strides=2), # height and width are halved
Residual(num_channels=128),
Residual(num_channels=128),
Residual(num_channels=256, use_1x1conv=True, strides=2), # height and width are halved
Residual(num_channels=256),
Residual(num_channels=256),
Residual(num_channels=512, use_1x1conv=True, strides=2), # height and width are halved
Residual(num_channels=512),
Residual(num_channels=512)
)
net.add(
nn.GlobalAvgPool2D(),
nn.Dense(10)
)
X = nd.random.uniform(shape=(1, 1, 28, 28))
net.initialize(force_reinit=True)
for layer in net:
X = layer(X)
print(layer.name, 'output shape:\t', X.shape)
conv52 output shape: (1, 64, 14, 14) batchnorm45 output shape: (1, 64, 14, 14) relu1 output shape: (1, 64, 14, 14) pool4 output shape: (1, 64, 7, 7) residual22 output shape: (1, 64, 7, 7) residual23 output shape: (1, 64, 7, 7) residual24 output shape: (1, 64, 7, 7) residual25 output shape: (1, 128, 4, 4) residual26 output shape: (1, 128, 4, 4) residual27 output shape: (1, 128, 4, 4) residual28 output shape: (1, 256, 2, 2) residual29 output shape: (1, 256, 2, 2) residual30 output shape: (1, 256, 2, 2) residual31 output shape: (1, 512, 1, 1) residual32 output shape: (1, 512, 1, 1) residual33 output shape: (1, 512, 1, 1) pool5 output shape: (1, 512, 1, 1) dense3 output shape: (1, 10)
lr = 0.05
num_epochs = 5
batch_size = 256
ctx = gb.try_gpu()
net.initialize(force_reinit=True, ctx=ctx, init=init.Xavier())
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
train_iter, test_iter = gb.load_data_fashion_mnist(batch_size)
gb.train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs)
training on cpu(0) epoch 1, loss 0.8086, train acc 0.760, test acc 0.861, time 1187.2 sec epoch 2, loss 0.3464, train acc 0.872, test acc 0.863, time 1174.8 sec
Process Process-17: Process Process-19: Process Process-18: Process Process-20: Traceback (most recent call last): Traceback (most recent call last): Traceback (most recent call last): Traceback (most recent call last): File "/Users/yhhan/anaconda3/envs/gluon/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap self.run() File "/Users/yhhan/anaconda3/envs/gluon/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap self.run() File "/Users/yhhan/anaconda3/envs/gluon/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap self.run() File "/Users/yhhan/anaconda3/envs/gluon/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap self.run() File "/Users/yhhan/anaconda3/envs/gluon/lib/python3.6/multiprocessing/process.py", line 93, in run self._target(*self._args, **self._kwargs) File "/Users/yhhan/anaconda3/envs/gluon/lib/python3.6/multiprocessing/process.py", line 93, in run self._target(*self._args, **self._kwargs) File "/Users/yhhan/anaconda3/envs/gluon/lib/python3.6/multiprocessing/process.py", line 93, in run self._target(*self._args, **self._kwargs) File "/Users/yhhan/anaconda3/envs/gluon/lib/python3.6/site-packages/mxnet/gluon/data/dataloader.py", line 116, in worker_loop idx, samples = key_queue.get() File "/Users/yhhan/anaconda3/envs/gluon/lib/python3.6/site-packages/mxnet/gluon/data/dataloader.py", line 116, in worker_loop idx, samples = key_queue.get() File "/Users/yhhan/anaconda3/envs/gluon/lib/python3.6/multiprocessing/process.py", line 93, in run self._target(*self._args, **self._kwargs) File "/Users/yhhan/anaconda3/envs/gluon/lib/python3.6/site-packages/mxnet/gluon/data/dataloader.py", line 116, in worker_loop idx, samples = key_queue.get() File "/Users/yhhan/anaconda3/envs/gluon/lib/python3.6/site-packages/mxnet/gluon/data/dataloader.py", line 116, in worker_loop idx, samples = key_queue.get() File "/Users/yhhan/anaconda3/envs/gluon/lib/python3.6/multiprocessing/queues.py", line 93, in get with self._rlock: File "/Users/yhhan/anaconda3/envs/gluon/lib/python3.6/multiprocessing/queues.py", line 93, in get with self._rlock: File "/Users/yhhan/anaconda3/envs/gluon/lib/python3.6/multiprocessing/queues.py", line 94, in get res = self._recv_bytes() File "/Users/yhhan/anaconda3/envs/gluon/lib/python3.6/multiprocessing/queues.py", line 93, in get with self._rlock: File "/Users/yhhan/anaconda3/envs/gluon/lib/python3.6/multiprocessing/synchronize.py", line 95, in __enter__ return self._semlock.__enter__() File "/Users/yhhan/anaconda3/envs/gluon/lib/python3.6/multiprocessing/synchronize.py", line 95, in __enter__ return self._semlock.__enter__() File "/Users/yhhan/anaconda3/envs/gluon/lib/python3.6/multiprocessing/connection.py", line 216, in recv_bytes buf = self._recv_bytes(maxlength) File "/Users/yhhan/anaconda3/envs/gluon/lib/python3.6/multiprocessing/synchronize.py", line 95, in __enter__ return self._semlock.__enter__() KeyboardInterrupt File "/Users/yhhan/anaconda3/envs/gluon/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes buf = self._recv(4) KeyboardInterrupt KeyboardInterrupt File "/Users/yhhan/anaconda3/envs/gluon/lib/python3.6/multiprocessing/connection.py", line 379, in _recv chunk = read(handle, remaining) KeyboardInterrupt
--------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) <ipython-input-24-9a16725dc17b> in <module> 11 train_iter, test_iter = gb.load_data_fashion_mnist(batch_size) 12 ---> 13 gb.train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs) ~/anaconda3/envs/gluon/lib/python3.6/site-packages/gluonbook/utils.py in train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs) 682 l.backward() 683 trainer.step(batch_size) --> 684 train_l_sum += l.mean().asscalar() 685 train_acc_sum += accuracy(y_hat, y) 686 test_acc = evaluate_accuracy(test_iter, net, ctx) ~/anaconda3/envs/gluon/lib/python3.6/site-packages/mxnet/ndarray/ndarray.py in asscalar(self) 1892 if self.shape != (1,): 1893 raise ValueError("The current array is not a scalar") -> 1894 return self.asnumpy()[0] 1895 1896 def astype(self, dtype, copy=True): ~/anaconda3/envs/gluon/lib/python3.6/site-packages/mxnet/ndarray/ndarray.py in asnumpy(self) 1874 self.handle, 1875 data.ctypes.data_as(ctypes.c_void_p), -> 1876 ctypes.c_size_t(data.size))) 1877 return data 1878 KeyboardInterrupt:
import gluonbook as gb
from mxnet import gluon, init, nd
from mxnet.gluon import nn
def conv_block(num_channels):
blk = nn.Sequential()
blk.add(
nn.BatchNorm(),
nn.Activation('relu'),
nn.Conv2D(num_channels, kernel_size=3, padding=1)
)
return blk
class DenseBlock(nn.Block):
def __init__(self, num_convs, num_channels, **kwargs):
super(DenseBlock, self).__init__(**kwargs)
self.net = nn.Sequential()
for _ in range(num_convs):
self.net.add(conv_block(num_channels))
def forward(self, X):
for blk in self.net:
Y = blk(X)
print("X.shape: {0} --> Y.shape: {1}".format(X.shape, Y.shape))
X = nd.concat(X, Y, dim=1) # Concatenate the input and output of each block on the channel dimension.
return X
blk = DenseBlock(num_convs=2, num_channels=10)
blk.initialize(force_reinit=True)
X = nd.random.uniform(shape=(4, 3, 8, 8))
Y = blk(X)
Y.shape
X.shape: (4, 3, 8, 8) --> Y.shape: (4, 10, 8, 8) X.shape: (4, 13, 8, 8) --> Y.shape: (4, 10, 8, 8)
(4, 23, 8, 8)
def transition_block(num_channels):
blk = nn.Sequential()
blk.add(
nn.BatchNorm(),
nn.Activation('relu'),
nn.Conv2D(num_channels, kernel_size=1),
nn.AvgPool2D(pool_size=2, strides=2)
)
return blk
blk = transition_block(num_channels=10)
blk.initialize(force_reinit=True)
blk(Y).shape
(4, 10, 4, 4)
net = nn.Sequential()
net.add(
nn.Conv2D(64, kernel_size=7, strides=2, padding=3),
nn.BatchNorm(),
nn.Activation('relu'),
nn.MaxPool2D(pool_size=3, strides=2, padding=1)
)
num_channels = 64 # Num_channels: the current number of channels.
growth_rate = 32
num_convs_in_dense_blocks = [4, 4, 4, 4]
for i, num_convs in enumerate(num_convs_in_dense_blocks):
net.add(DenseBlock(num_convs=num_convs, num_channels=growth_rate))
# This is the number of output channels in the previous dense block.
num_channels += num_convs * growth_rate
# A transition layer that haves the number of channels is added between the dense blocks.
if i != len(num_convs_in_dense_blocks) - 1:
net.add(transition_block(num_channels // 2))
net.add(
nn.BatchNorm(),
nn.Activation('relu'),
nn.GlobalAvgPool2D(),
nn.Dense(10)
)
lr = 0.1
num_epochs = 5
batch_size = 256
ctx = gb.try_gpu()
net.initialize(ctx=ctx, init=init.Xavier(), force_reinit=True)
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr})
train_iter, test_iter = gb.load_data_fashion_mnist(batch_size, resize=96)
gb.train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs)
training on cpu(0) X.shape: (256, 64, 24, 24) --> Y.shape: (256, 32, 24, 24) X.shape: (256, 96, 24, 24) --> Y.shape: (256, 32, 24, 24) X.shape: (256, 128, 24, 24) --> Y.shape: (256, 32, 24, 24) X.shape: (256, 160, 24, 24) --> Y.shape: (256, 32, 24, 24) X.shape: (256, 96, 12, 12) --> Y.shape: (256, 32, 12, 12) X.shape: (256, 128, 12, 12) --> Y.shape: (256, 32, 12, 12) X.shape: (256, 160, 12, 12) --> Y.shape: (256, 32, 12, 12) X.shape: (256, 192, 12, 12) --> Y.shape: (256, 32, 12, 12) X.shape: (256, 160, 6, 6) --> Y.shape: (256, 32, 6, 6) X.shape: (256, 192, 6, 6) --> Y.shape: (256, 32, 6, 6) X.shape: (256, 224, 6, 6) --> Y.shape: (256, 32, 6, 6) X.shape: (256, 256, 6, 6) --> Y.shape: (256, 32, 6, 6) X.shape: (256, 224, 3, 3) --> Y.shape: (256, 32, 3, 3) X.shape: (256, 256, 3, 3) --> Y.shape: (256, 32, 3, 3) X.shape: (256, 288, 3, 3) --> Y.shape: (256, 32, 3, 3) X.shape: (256, 320, 3, 3) --> Y.shape: (256, 32, 3, 3) X.shape: (256, 64, 24, 24) --> Y.shape: (256, 32, 24, 24) X.shape: (256, 96, 24, 24) --> Y.shape: (256, 32, 24, 24) X.shape: (256, 128, 24, 24) --> Y.shape: (256, 32, 24, 24) X.shape: (256, 160, 24, 24) --> Y.shape: (256, 32, 24, 24) X.shape: (256, 96, 12, 12) --> Y.shape: (256, 32, 12, 12) X.shape: (256, 128, 12, 12) --> Y.shape: (256, 32, 12, 12) X.shape: (256, 160, 12, 12) --> Y.shape: (256, 32, 12, 12) X.shape: (256, 192, 12, 12) --> Y.shape: (256, 32, 12, 12) X.shape: (256, 160, 6, 6) --> Y.shape: (256, 32, 6, 6) X.shape: (256, 192, 6, 6) --> Y.shape: (256, 32, 6, 6) X.shape: (256, 224, 6, 6) --> Y.shape: (256, 32, 6, 6) X.shape: (256, 256, 6, 6) --> Y.shape: (256, 32, 6, 6) X.shape: (256, 224, 3, 3) --> Y.shape: (256, 32, 3, 3) X.shape: (256, 256, 3, 3) --> Y.shape: (256, 32, 3, 3) X.shape: (256, 288, 3, 3) --> Y.shape: (256, 32, 3, 3) X.shape: (256, 320, 3, 3) --> Y.shape: (256, 32, 3, 3) X.shape: (256, 64, 24, 24) --> Y.shape: (256, 32, 24, 24) X.shape: (256, 96, 24, 24) --> Y.shape: (256, 32, 24, 24) X.shape: (256, 128, 24, 24) --> Y.shape: (256, 32, 24, 24) X.shape: (256, 160, 24, 24) --> Y.shape: (256, 32, 24, 24) X.shape: (256, 96, 12, 12) --> Y.shape: (256, 32, 12, 12) X.shape: (256, 128, 12, 12) --> Y.shape: (256, 32, 12, 12) X.shape: (256, 160, 12, 12) --> Y.shape: (256, 32, 12, 12) X.shape: (256, 192, 12, 12) --> Y.shape: (256, 32, 12, 12) X.shape: (256, 160, 6, 6) --> Y.shape: (256, 32, 6, 6) X.shape: (256, 192, 6, 6) --> Y.shape: (256, 32, 6, 6) X.shape: (256, 224, 6, 6) --> Y.shape: (256, 32, 6, 6) X.shape: (256, 256, 6, 6) --> Y.shape: (256, 32, 6, 6) X.shape: (256, 224, 3, 3) --> Y.shape: (256, 32, 3, 3) X.shape: (256, 256, 3, 3) --> Y.shape: (256, 32, 3, 3) X.shape: (256, 288, 3, 3) --> Y.shape: (256, 32, 3, 3) X.shape: (256, 320, 3, 3) --> Y.shape: (256, 32, 3, 3)
Process Process-26: Process Process-28: Process Process-25: Process Process-27: Traceback (most recent call last): Traceback (most recent call last): Traceback (most recent call last): Traceback (most recent call last): File "/Users/yhhan/anaconda3/envs/gluon/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap self.run() File "/Users/yhhan/anaconda3/envs/gluon/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap self.run() File "/Users/yhhan/anaconda3/envs/gluon/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap self.run() File "/Users/yhhan/anaconda3/envs/gluon/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap self.run() File "/Users/yhhan/anaconda3/envs/gluon/lib/python3.6/multiprocessing/process.py", line 93, in run self._target(*self._args, **self._kwargs) File "/Users/yhhan/anaconda3/envs/gluon/lib/python3.6/multiprocessing/process.py", line 93, in run self._target(*self._args, **self._kwargs) File "/Users/yhhan/anaconda3/envs/gluon/lib/python3.6/multiprocessing/process.py", line 93, in run self._target(*self._args, **self._kwargs) File "/Users/yhhan/anaconda3/envs/gluon/lib/python3.6/multiprocessing/process.py", line 93, in run self._target(*self._args, **self._kwargs) File "/Users/yhhan/anaconda3/envs/gluon/lib/python3.6/site-packages/mxnet/gluon/data/dataloader.py", line 116, in worker_loop idx, samples = key_queue.get() File "/Users/yhhan/anaconda3/envs/gluon/lib/python3.6/site-packages/mxnet/gluon/data/dataloader.py", line 116, in worker_loop idx, samples = key_queue.get() File "/Users/yhhan/anaconda3/envs/gluon/lib/python3.6/site-packages/mxnet/gluon/data/dataloader.py", line 116, in worker_loop idx, samples = key_queue.get() File "/Users/yhhan/anaconda3/envs/gluon/lib/python3.6/multiprocessing/queues.py", line 93, in get with self._rlock: File "/Users/yhhan/anaconda3/envs/gluon/lib/python3.6/site-packages/mxnet/gluon/data/dataloader.py", line 116, in worker_loop idx, samples = key_queue.get() File "/Users/yhhan/anaconda3/envs/gluon/lib/python3.6/multiprocessing/queues.py", line 94, in get res = self._recv_bytes() File "/Users/yhhan/anaconda3/envs/gluon/lib/python3.6/multiprocessing/queues.py", line 93, in get with self._rlock: File "/Users/yhhan/anaconda3/envs/gluon/lib/python3.6/multiprocessing/queues.py", line 93, in get with self._rlock: File "/Users/yhhan/anaconda3/envs/gluon/lib/python3.6/multiprocessing/connection.py", line 216, in recv_bytes buf = self._recv_bytes(maxlength) File "/Users/yhhan/anaconda3/envs/gluon/lib/python3.6/multiprocessing/synchronize.py", line 95, in __enter__ return self._semlock.__enter__() File "/Users/yhhan/anaconda3/envs/gluon/lib/python3.6/multiprocessing/synchronize.py", line 95, in __enter__ return self._semlock.__enter__() File "/Users/yhhan/anaconda3/envs/gluon/lib/python3.6/multiprocessing/synchronize.py", line 95, in __enter__ return self._semlock.__enter__() File "/Users/yhhan/anaconda3/envs/gluon/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes buf = self._recv(4) KeyboardInterrupt KeyboardInterrupt KeyboardInterrupt File "/Users/yhhan/anaconda3/envs/gluon/lib/python3.6/multiprocessing/connection.py", line 379, in _recv chunk = read(handle, remaining) KeyboardInterrupt
--------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) <ipython-input-42-d01d407ea096> in <module> 11 train_iter, test_iter = gb.load_data_fashion_mnist(batch_size, resize=96) 12 ---> 13 gb.train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs) ~/anaconda3/envs/gluon/lib/python3.6/site-packages/gluonbook/utils.py in train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs) 682 l.backward() 683 trainer.step(batch_size) --> 684 train_l_sum += l.mean().asscalar() 685 train_acc_sum += accuracy(y_hat, y) 686 test_acc = evaluate_accuracy(test_iter, net, ctx) ~/anaconda3/envs/gluon/lib/python3.6/site-packages/mxnet/ndarray/ndarray.py in asscalar(self) 1892 if self.shape != (1,): 1893 raise ValueError("The current array is not a scalar") -> 1894 return self.asnumpy()[0] 1895 1896 def astype(self, dtype, copy=True): ~/anaconda3/envs/gluon/lib/python3.6/site-packages/mxnet/ndarray/ndarray.py in asnumpy(self) 1874 self.handle, 1875 data.ctypes.data_as(ctypes.c_void_p), -> 1876 ctypes.c_size_t(data.size))) 1877 return data 1878 KeyboardInterrupt: