Gluon
¶import mxnet as mx
from mxnet import autograd
from mxnet import gluon
from mxnet import ndarray as nd
import numpy as np
import random
mx.random.seed(1)
random.seed(1)
# Generate data.
num_inputs = 2
num_examples = 1000
true_w = [2, -3.4]
true_b = 4.2
X = nd.random_normal(scale=1, shape=(num_examples, num_inputs))
y = true_w[0] * X[:, 0] + true_w[1] * X[:, 1] + true_b
y += .01 * nd.random_normal(scale=1, shape=y.shape)
dataset = gluon.data.ArrayDataset(X, y)
net = gluon.nn.Sequential()
net.add(gluon.nn.Dense(1))
square_loss = gluon.loss.L2Loss()
%matplotlib inline
import matplotlib as mpl
mpl.rcParams['figure.dpi']= 120
import matplotlib.pyplot as plt
def train(batch_size, lr, epochs, period):
assert period >= batch_size and period % batch_size == 0
net.collect_params().initialize(mx.init.Normal(sigma=1), force_reinit=True)
# SGD.
trainer = gluon.Trainer(net.collect_params(), 'sgd',
{'learning_rate': lr})
data_iter = gluon.data.DataLoader(dataset, batch_size, shuffle=True)
total_loss = [np.mean(square_loss(net(X), y).asnumpy())]
for epoch in range(1, epochs + 1):
# Decay learning rate.
if epoch > 2:
trainer.set_learning_rate(trainer.learning_rate * 0.1)
for batch_i, (data, label) in enumerate(data_iter):
with autograd.record():
output = net(data)
loss = square_loss(output, label)
loss.backward()
trainer.step(batch_size)
if batch_i * batch_size % period == 0:
total_loss.append(np.mean(square_loss(net(X), y).asnumpy()))
print("Batch size %d, Learning rate %f, Epoch %d, loss %.4e" %
(batch_size, trainer.learning_rate, epoch, total_loss[-1]))
print('w:', np.reshape(net[0].weight.data().asnumpy(), (1, -1)),
'b:', net[0].bias.data().asnumpy()[0], '\n')
x_axis = np.linspace(0, epochs, len(total_loss), endpoint=True)
plt.semilogy(x_axis, total_loss)
plt.xlabel('epoch')
plt.ylabel('loss')
plt.show()
train(batch_size=1, lr=0.2, epochs=3, period=10)
Batch size 1, Learning rate 0.200000, Epoch 1, loss 5.5937e-05 Batch size 1, Learning rate 0.200000, Epoch 2, loss 8.0472e-05 Batch size 1, Learning rate 0.020000, Epoch 3, loss 4.9757e-05 w: [[ 1.99949276 -3.39981604]] b: 4.19997
train(batch_size=1000, lr=0.999, epochs=3, period=1000)
Batch size 1000, Learning rate 0.999000, Epoch 1, loss 1.1561e-01 Batch size 1000, Learning rate 0.999000, Epoch 2, loss 8.4421e-04 Batch size 1000, Learning rate 0.099900, Epoch 3, loss 6.9547e-04 w: [[ 2.00893021 -3.36536145]] b: 4.19384
train(batch_size=10, lr=0.2, epochs=3, period=10)
Batch size 10, Learning rate 0.200000, Epoch 1, loss 4.9184e-05 Batch size 10, Learning rate 0.200000, Epoch 2, loss 4.9389e-05 Batch size 10, Learning rate 0.020000, Epoch 3, loss 4.8990e-05 w: [[ 1.99998689 -3.39983392]] b: 4.20028
train(batch_size=10, lr=5, epochs=3, period=10)
Batch size 10, Learning rate 5.000000, Epoch 1, loss nan Batch size 10, Learning rate 5.000000, Epoch 2, loss nan Batch size 10, Learning rate 0.500000, Epoch 3, loss nan w: [[ nan nan]] b: nan
train(batch_size=10, lr=0.002, epochs=3, period=10)
Batch size 10, Learning rate 0.002000, Epoch 1, loss 9.1293e+00 Batch size 10, Learning rate 0.002000, Epoch 2, loss 6.1059e+00 Batch size 10, Learning rate 0.000200, Epoch 3, loss 5.8656e+00 w: [[ 0.9720636 -1.67973948]] b: 1.42253
For whinges or inquiries, open an issue on GitHub.