In [14]:

import mxnet as mx
from mxnet import gluon, autograd, ndarray
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [102]:

from sklearn.datasets import load_boston
data = load_boston()
df = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target

Normalize data for the NN¶

In [103]:

df_norm = (df - df.mean()) / (df.max() - df.min())

In [104]:

df_norm.head()

Out[104]:

	CRIM	ZN	INDUS	CHAS	NOX	RM	AGE	DIS	RAD	TAX	PTRATIO	B	LSTAT
0	-0.040322	0.066364	-0.323562	-0.06917	-0.034352	0.055636	-0.034757	0.026822	-0.371713	-0.214193	-0.335695	0.101432	-0.211729
1	-0.040086	-0.113636	-0.149075	-0.06917	-0.176327	0.026129	0.106335	0.106581	-0.328235	-0.317246	-0.069738	0.101432	-0.096939
2	-0.040086	-0.113636	-0.149075	-0.06917	-0.176327	0.172517	-0.076981	0.106581	-0.328235	-0.317246	-0.069738	0.091169	-0.237943
3	-0.040029	-0.113636	-0.328328	-0.06917	-0.198961	0.136686	-0.234551	0.206163	-0.284757	-0.355414	0.026007	0.095708	-0.268021
4	-0.039617	-0.113636	-0.328328	-0.06917	-0.198961	0.165236	-0.148042	0.206163	-0.284757	-0.355414	0.026007	0.101432	-0.202071

Split the data¶

In [106]:

X_train, X_test, y_train, y_test = train_test_split(df_norm, y,
                                                    test_size=0.2, random_state=1111)

Gluon Model¶

In [132]:

BATCH_SIZE = 32
LEARNING_R = 0.01
EPOCHS = 300

Prepare data¶

In [133]:

train_dataset = mx.gluon.data.ArrayDataset(X_train.as_matrix(),y_train)
test_dataset = mx.gluon.data.ArrayDataset(X_test.as_matrix(),y_test)

In [134]:

train_data = mx.gluon.data.DataLoader(train_dataset,
                                      batch_size=BATCH_SIZE, shuffle=True)

test_data = mx.gluon.data.DataLoader(test_dataset,
                                     batch_size=BATCH_SIZE, shuffle=False)

Initialize the model¶

In [135]:

net = gluon.nn.Sequential()

# Define the model architecture
with net.name_scope(): 
    net.add(gluon.nn.Dense(16, activation="relu") ) 
    net.add(gluon.nn.BatchNorm())    
    net.add(gluon.nn.Dense(8, activation="relu") ) 
    net.add(gluon.nn.BatchNorm())    
    net.add(gluon.nn.Dense(1))

# Intitalize parametes of the model
net.collect_params().initialize(mx.init.Uniform())

# Add L2 loss function
l2loss = gluon.loss.L2Loss()

trainer = gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': LEARNING_R})

In [136]:

for e in range(EPOCHS):
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(mx.cpu()).astype('float32')
        label = label.as_in_context(mx.cpu()).astype('float32')
        with autograd.record(): # Start recording the derivatives
            output = net(data) # the forward iteration
            loss = l2loss(output, label)
            loss.backward()
        trainer.step(data.shape[0])
        # Provide stats on the improvement of the model over each epoch
        curr_loss = ndarray.mean(loss).asscalar()
    if e % 20 == 0:
        print("Epoch {}. Current Loss: {}.".format(e, curr_loss))
#     if curr_loss < 2.0:
#             break

Epoch 0. Current Loss: 245.78768920898438.
Epoch 20. Current Loss: 7.980735778808594.
Epoch 40. Current Loss: 4.9582600593566895.
Epoch 60. Current Loss: 6.980565547943115.
Epoch 80. Current Loss: 0.960381031036377.
Epoch 100. Current Loss: 7.814993381500244.
Epoch 120. Current Loss: 9.498841285705566.
Epoch 140. Current Loss: 17.053531646728516.
Epoch 160. Current Loss: 5.0172576904296875.
Epoch 180. Current Loss: 7.0766472816467285.
Epoch 200. Current Loss: 2.061584949493408.
Epoch 220. Current Loss: 4.6772260665893555.
Epoch 240. Current Loss: 4.503413200378418.
Epoch 260. Current Loss: 6.620635986328125.
Epoch 280. Current Loss: 3.283313751220703.

Predict¶

In [137]:

y_pred = np.array([])
for data,label in test_data:
        data = data.as_in_context(mx.cpu()).astype('float32')
        label = label.as_in_context(mx.cpu()).astype('float32')
        output = net(data)
        y_pred = np.append(y_pred, output.asnumpy())

In [138]:

mean_squared_error(y_test, y_pred)

Out[138]:

5.9256329511598826

In [ ]: