%matplotlib inline
import d2l
from mxnet import autograd, gluon, init, nd
from mxnet.gluon import nn, data as gdata, loss as gloss
import numpy as np
import time
def get_data_ch7():
data = np.genfromtxt('./airfoil_self_noise.dat', delimiter='\t')
data = (data - data.mean(axis=0)) / data.std(axis=0)
return nd.array(data[:1500, :-1]), nd.array(data[:1500, -1])
features, labels = get_data_ch7()
features.shape
(1500, 5)
def sgd(params, states, hyperparams):
for p in params:
p[:] -= hyperparams['lr'] * p.grad
The training function
def train_ch7(trainer_fn, states, hyperparams, features, labels,
batch_size=10, num_epochs=2):
net, loss = d2l.linreg, d2l.squared_loss
w = nd.random.normal(scale=0.01, shape=(features.shape[1], 1))
b = nd.zeros(1)
w.attach_grad()
b.attach_grad()
def eval_loss():
return loss(net(features, w, b), labels).mean().asscalar()
ls, ts = [eval_loss()], [0,]
data_iter = gdata.DataLoader(
gdata.ArrayDataset(features, labels), batch_size, shuffle=True)
start = time.time()
for _ in range(num_epochs):
for batch_i, (X, y) in enumerate(data_iter):
with autograd.record():
l = loss(net(X, w, b), y).mean()
l.backward()
trainer_fn([w, b], states, hyperparams)
if (batch_i + 1) * batch_size % 10 == 0:
ts.append(time.time() - start + ts[-1])
ls.append(eval_loss())
start = time.time()
print('loss: %f, %f sec per epoch' % (ls[-1], ts[-1]/num_epochs))
d2l.set_figsize()
d2l.plt.plot(np.linspace(0, num_epochs, len(ls)), ls)
d2l.plt.xlabel('epoch')
d2l.plt.ylabel('loss')
return ts, ls
Gradient descent
def train_sgd(lr, batch_size, num_epochs=2):
return train_ch7(sgd, None, {'lr': lr}, features, labels, batch_size, num_epochs)
gd_res = train_sgd(1, 1500, 6)
loss: 0.243936, 0.011909 sec per epoch
SGD
sgd_res = train_sgd(0.005, 1)
loss: 0.242317, 1.111633 sec per epoch
Mini-batch with batch size 100.
mini1_res = train_sgd(.4, 100)
loss: 0.250650, 0.029515 sec per epoch
Mini-batch with batch size 10.
mini2_res = train_sgd(.05, 10)
loss: 0.243461, 0.131726 sec per epoch
Compare time versus loss
d2l.set_figsize([6, 3])
for res in [gd_res, sgd_res, mini1_res, mini2_res]:
d2l.plt.plot(res[0], res[1])
d2l.plt.xlabel('time (sec)')
d2l.plt.ylabel('loss')
d2l.plt.xscale('log')
d2l.plt.xlim([1e-3, 1])
d2l.plt.legend(['gd', 'sgd', 'batch size=100', 'batch size=10']);
def train_gluon_ch7(trainer_name, trainer_hyperparams, features, labels,
batch_size=10, num_epochs=2):
net = nn.Sequential()
net.add(nn.Dense(1))
net.initialize(init.Normal(sigma=0.01))
loss = gloss.L2Loss()
def eval_loss():
return loss(net(features), labels).mean().asscalar()
ls = [eval_loss()]
data_iter = gdata.DataLoader(
gdata.ArrayDataset(features, labels), batch_size, shuffle=True)
trainer = gluon.Trainer(
net.collect_params(), trainer_name, trainer_hyperparams)
for _ in range(num_epochs):
start = time.time()
for batch_i, (X, y) in enumerate(data_iter):
with autograd.record():
l = loss(net(X), y)
l.backward()
trainer.step(batch_size)
if (batch_i + 1) * batch_size % 100 == 0:
ls.append(eval_loss())
print('loss: %f, %f sec per epoch' % (ls[-1], time.time() - start))
d2l.set_figsize()
d2l.plt.plot(np.linspace(0, num_epochs, len(ls)), ls)
d2l.plt.xlabel('epoch')
d2l.plt.ylabel('loss')
Repeat the last experiment.
train_gluon_ch7('sgd', {'learning_rate': 0.05}, features, labels, 10)
loss: 0.248125, 0.128904 sec per epoch