import mxnet as mx
from mxnet import gluon, autograd, ndarray
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.datasets import load_boston
data = load_boston()
df = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target
df_norm = (df - df.mean()) / (df.max() - df.min())
df_norm.head()
CRIM | ZN | INDUS | CHAS | NOX | RM | AGE | DIS | RAD | TAX | PTRATIO | B | LSTAT | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | -0.040322 | 0.066364 | -0.323562 | -0.06917 | -0.034352 | 0.055636 | -0.034757 | 0.026822 | -0.371713 | -0.214193 | -0.335695 | 0.101432 | -0.211729 |
1 | -0.040086 | -0.113636 | -0.149075 | -0.06917 | -0.176327 | 0.026129 | 0.106335 | 0.106581 | -0.328235 | -0.317246 | -0.069738 | 0.101432 | -0.096939 |
2 | -0.040086 | -0.113636 | -0.149075 | -0.06917 | -0.176327 | 0.172517 | -0.076981 | 0.106581 | -0.328235 | -0.317246 | -0.069738 | 0.091169 | -0.237943 |
3 | -0.040029 | -0.113636 | -0.328328 | -0.06917 | -0.198961 | 0.136686 | -0.234551 | 0.206163 | -0.284757 | -0.355414 | 0.026007 | 0.095708 | -0.268021 |
4 | -0.039617 | -0.113636 | -0.328328 | -0.06917 | -0.198961 | 0.165236 | -0.148042 | 0.206163 | -0.284757 | -0.355414 | 0.026007 | 0.101432 | -0.202071 |
X_train, X_test, y_train, y_test = train_test_split(df_norm, y,
test_size=0.2, random_state=1111)
BATCH_SIZE = 32
LEARNING_R = 0.01
EPOCHS = 300
train_dataset = mx.gluon.data.ArrayDataset(X_train.as_matrix(),y_train)
test_dataset = mx.gluon.data.ArrayDataset(X_test.as_matrix(),y_test)
train_data = mx.gluon.data.DataLoader(train_dataset,
batch_size=BATCH_SIZE, shuffle=True)
test_data = mx.gluon.data.DataLoader(test_dataset,
batch_size=BATCH_SIZE, shuffle=False)
net = gluon.nn.Sequential()
# Define the model architecture
with net.name_scope():
net.add(gluon.nn.Dense(16, activation="relu") )
net.add(gluon.nn.BatchNorm())
net.add(gluon.nn.Dense(8, activation="relu") )
net.add(gluon.nn.BatchNorm())
net.add(gluon.nn.Dense(1))
# Intitalize parametes of the model
net.collect_params().initialize(mx.init.Uniform())
# Add L2 loss function
l2loss = gluon.loss.L2Loss()
trainer = gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': LEARNING_R})
for e in range(EPOCHS):
for i, (data, label) in enumerate(train_data):
data = data.as_in_context(mx.cpu()).astype('float32')
label = label.as_in_context(mx.cpu()).astype('float32')
with autograd.record(): # Start recording the derivatives
output = net(data) # the forward iteration
loss = l2loss(output, label)
loss.backward()
trainer.step(data.shape[0])
# Provide stats on the improvement of the model over each epoch
curr_loss = ndarray.mean(loss).asscalar()
if e % 20 == 0:
print("Epoch {}. Current Loss: {}.".format(e, curr_loss))
# if curr_loss < 2.0:
# break
Epoch 0. Current Loss: 245.78768920898438. Epoch 20. Current Loss: 7.980735778808594. Epoch 40. Current Loss: 4.9582600593566895. Epoch 60. Current Loss: 6.980565547943115. Epoch 80. Current Loss: 0.960381031036377. Epoch 100. Current Loss: 7.814993381500244. Epoch 120. Current Loss: 9.498841285705566. Epoch 140. Current Loss: 17.053531646728516. Epoch 160. Current Loss: 5.0172576904296875. Epoch 180. Current Loss: 7.0766472816467285. Epoch 200. Current Loss: 2.061584949493408. Epoch 220. Current Loss: 4.6772260665893555. Epoch 240. Current Loss: 4.503413200378418. Epoch 260. Current Loss: 6.620635986328125. Epoch 280. Current Loss: 3.283313751220703.
y_pred = np.array([])
for data,label in test_data:
data = data.as_in_context(mx.cpu()).astype('float32')
label = label.as_in_context(mx.cpu()).astype('float32')
output = net(data)
y_pred = np.append(y_pred, output.asnumpy())
mean_squared_error(y_test, y_pred)
5.9256329511598826