import numpy as np
import pandas as pd
import mxnet as mx
from sklearn.metrics import mean_squared_error
from sklearn.metrics import log_loss
from sklearn.metrics import roc_auc_score
def mx_log_loss(actual, predicted):
return log_loss(actual, predicted)
import logging
logging.getLogger().setLevel(logging.DEBUG)
from sklearn.datasets import load_boston
boston = load_boston()
x = pd.DataFrame(boston.data, columns=boston.feature_names)
y = boston.target
print((x.shape, y.shape))
((506, 13), (506,))
flg_train = np.random.choice([False, True], len(y), replace=True, p=[0.3, 0.7])
flg_valid = np.logical_not(flg_train)
batch_size = 100
train_iter = mx.io.NDArrayIter(x[flg_train].values, y[flg_train],
batch_size, shuffle=True,
label_name='label')
valid_iter = mx.io.NDArrayIter(x[flg_valid].values, y[flg_valid],
batch_size,
label_name='label')
test_iter = mx.io.NDArrayIter(x[flg_valid].values)
data = mx.sym.Variable('data')
label = mx.sym.Variable('label')
fc = mx.sym.FullyConnected(data, num_hidden=3)
act = mx.sym.Activation(fc, act_type='relu')
fco = mx.sym.FullyConnected(act, num_hidden=1)
net1 = mx.sym.LinearRegressionOutput(fco, label)
model1 = mx.model.FeedForward(
symbol=net1,
num_epoch=15,
learning_rate=0.1
)
model1.fit(
X=train_iter,
eval_data=valid_iter,
eval_metric='MSE'
)
INFO:root:Start training with [cpu(0)] INFO:root:Epoch[0] Resetting Data Iterator INFO:root:Epoch[0] Time cost=0.017 INFO:root:Epoch[0] Validation-mse=1395687.562500 INFO:root:Epoch[1] Resetting Data Iterator INFO:root:Epoch[1] Time cost=0.016 INFO:root:Epoch[1] Validation-mse=600322.187500 INFO:root:Epoch[2] Resetting Data Iterator INFO:root:Epoch[2] Time cost=0.014 INFO:root:Epoch[2] Validation-mse=258119.859375 INFO:root:Epoch[3] Resetting Data Iterator INFO:root:Epoch[3] Time cost=0.037 INFO:root:Epoch[3] Validation-mse=110928.750000 INFO:root:Epoch[4] Resetting Data Iterator INFO:root:Epoch[4] Time cost=0.008 INFO:root:Epoch[4] Validation-mse=47643.732422 INFO:root:Epoch[5] Resetting Data Iterator INFO:root:Epoch[5] Time cost=0.007 INFO:root:Epoch[5] Validation-mse=20451.433594 INFO:root:Epoch[6] Resetting Data Iterator INFO:root:Epoch[6] Time cost=0.006 INFO:root:Epoch[6] Validation-mse=8778.733398 INFO:root:Epoch[7] Resetting Data Iterator INFO:root:Epoch[7] Time cost=0.006 INFO:root:Epoch[7] Validation-mse=3775.464355 INFO:root:Epoch[8] Resetting Data Iterator INFO:root:Epoch[8] Time cost=0.005 INFO:root:Epoch[8] Validation-mse=1635.793518 INFO:root:Epoch[9] Resetting Data Iterator INFO:root:Epoch[9] Time cost=0.005 INFO:root:Epoch[9] Validation-mse=723.968018 INFO:root:Epoch[10] Resetting Data Iterator INFO:root:Epoch[10] Time cost=0.005 INFO:root:Epoch[10] Validation-mse=337.514572 INFO:root:Epoch[11] Resetting Data Iterator INFO:root:Epoch[11] Time cost=0.005 INFO:root:Epoch[11] Validation-mse=175.133366 INFO:root:Epoch[12] Resetting Data Iterator INFO:root:Epoch[12] Time cost=0.019 INFO:root:Epoch[12] Validation-mse=107.841146 INFO:root:Epoch[13] Resetting Data Iterator INFO:root:Epoch[13] Time cost=0.007 INFO:root:Epoch[13] Validation-mse=80.584860 INFO:root:Epoch[14] Resetting Data Iterator INFO:root:Epoch[14] Time cost=0.007 INFO:root:Epoch[14] Validation-mse=69.974375
pred = model1.predict(test_iter)
print(mean_squared_error(y[flg_valid], pred))
mx.visualization.plot_network(net1, node_attrs={'fixedsize': 'false'})
82.7316956117
loss = mx.sym.square(
mx.sym.Reshape(fco, shape=(-1,)) - label
)
net2 = mx.sym.MakeLoss(loss)
model2 = mx.model.FeedForward(
symbol=net2,
num_epoch=15,
learning_rate=0.1
)
model2.fit(
X=train_iter,
eval_data=valid_iter,
eval_metric='MSE'
)
INFO:root:Start training with [cpu(0)] INFO:root:Epoch[0] Resetting Data Iterator INFO:root:Epoch[0] Time cost=0.011 INFO:root:Epoch[0] Validation-mse=123389273633193984.000000 INFO:root:Epoch[1] Resetting Data Iterator INFO:root:Epoch[1] Time cost=0.007 INFO:root:Epoch[1] Validation-mse=3472040824668160.000000 INFO:root:Epoch[2] Resetting Data Iterator INFO:root:Epoch[2] Time cost=0.007 INFO:root:Epoch[2] Validation-mse=97658467254272.000000 INFO:root:Epoch[3] Resetting Data Iterator INFO:root:Epoch[3] Time cost=0.007 INFO:root:Epoch[3] Validation-mse=2744262066176.000000 INFO:root:Epoch[4] Resetting Data Iterator INFO:root:Epoch[4] Time cost=0.006 INFO:root:Epoch[4] Validation-mse=76978479104.000000 INFO:root:Epoch[5] Resetting Data Iterator INFO:root:Epoch[5] Time cost=0.007 INFO:root:Epoch[5] Validation-mse=2157003904.000000 INFO:root:Epoch[6] Resetting Data Iterator INFO:root:Epoch[6] Time cost=0.007 INFO:root:Epoch[6] Validation-mse=61591550.000000 INFO:root:Epoch[7] Resetting Data Iterator INFO:root:Epoch[7] Time cost=0.006 INFO:root:Epoch[7] Validation-mse=2112167.125000 INFO:root:Epoch[8] Resetting Data Iterator INFO:root:Epoch[8] Time cost=0.006 INFO:root:Epoch[8] Validation-mse=172325.460938 INFO:root:Epoch[9] Resetting Data Iterator INFO:root:Epoch[9] Time cost=0.006 INFO:root:Epoch[9] Validation-mse=50338.280273 INFO:root:Epoch[10] Resetting Data Iterator INFO:root:Epoch[10] Time cost=0.007 INFO:root:Epoch[10] Validation-mse=29807.058594 INFO:root:Epoch[11] Resetting Data Iterator INFO:root:Epoch[11] Time cost=0.009 INFO:root:Epoch[11] Validation-mse=24232.414062 INFO:root:Epoch[12] Resetting Data Iterator INFO:root:Epoch[12] Time cost=0.009 INFO:root:Epoch[12] Validation-mse=22382.235352 INFO:root:Epoch[13] Resetting Data Iterator INFO:root:Epoch[13] Time cost=0.010 INFO:root:Epoch[13] Validation-mse=21697.045898 INFO:root:Epoch[14] Resetting Data Iterator INFO:root:Epoch[14] Time cost=0.007 INFO:root:Epoch[14] Validation-mse=21428.656250
pred = np.sqrt(model2.predict(test_iter))
print(mean_squared_error(y[flg_valid], pred))
mx.visualization.plot_network(net2, node_attrs={'fixedsize': 'false'})
76.778488739
x = pd.DataFrame(boston.data, columns=boston.feature_names)
y = (boston.target >= boston.target.mean()).astype(np.int8)
print((x.shape, y.shape))
((506, 13), (506,))
flg_train = np.random.choice([False, True], len(y), replace=True, p=[0.3, 0.7])
flg_valid = np.logical_not(flg_train)
batch_size = 100
train_iter = mx.io.NDArrayIter(x[flg_train].values, y[flg_train],
batch_size, shuffle=True,
label_name='sm_label')
valid_iter = mx.io.NDArrayIter(x[flg_valid].values, y[flg_valid],
batch_size,
label_name='sm_label')
test_iter = mx.io.NDArrayIter(x[flg_valid].values)
data = mx.sym.Variable('data')
label = mx.sym.Variable('sm_label')
fc = mx.sym.FullyConnected(data, num_hidden=3)
act = mx.sym.Activation(fc, act_type='relu')
fco = mx.sym.FullyConnected(act, num_hidden=2)
net1 = mx.sym.SoftmaxOutput(fco, name='sm')
model1 = mx.model.FeedForward(
symbol=net1,
num_epoch=15,
learning_rate=0.1
)
model1.fit(
X=train_iter,
eval_data=valid_iter,
eval_metric='CE'
)
INFO:root:Start training with [cpu(0)] INFO:root:Epoch[0] Resetting Data Iterator INFO:root:Epoch[0] Time cost=0.007 INFO:root:Epoch[0] Validation-cross-entropy=0.694896 INFO:root:Epoch[1] Resetting Data Iterator INFO:root:Epoch[1] Time cost=0.005 INFO:root:Epoch[1] Validation-cross-entropy=0.691014 INFO:root:Epoch[2] Resetting Data Iterator INFO:root:Epoch[2] Time cost=0.006 INFO:root:Epoch[2] Validation-cross-entropy=0.688830 INFO:root:Epoch[3] Resetting Data Iterator INFO:root:Epoch[3] Time cost=0.008 INFO:root:Epoch[3] Validation-cross-entropy=0.687697 INFO:root:Epoch[4] Resetting Data Iterator INFO:root:Epoch[4] Time cost=0.007 INFO:root:Epoch[4] Validation-cross-entropy=0.687203 INFO:root:Epoch[5] Resetting Data Iterator INFO:root:Epoch[5] Time cost=0.006 INFO:root:Epoch[5] Validation-cross-entropy=0.687085 INFO:root:Epoch[6] Resetting Data Iterator INFO:root:Epoch[6] Time cost=0.005 INFO:root:Epoch[6] Validation-cross-entropy=0.687178 INFO:root:Epoch[7] Resetting Data Iterator INFO:root:Epoch[7] Time cost=0.006 INFO:root:Epoch[7] Validation-cross-entropy=0.687380 INFO:root:Epoch[8] Resetting Data Iterator INFO:root:Epoch[8] Time cost=0.006 INFO:root:Epoch[8] Validation-cross-entropy=0.687629 INFO:root:Epoch[9] Resetting Data Iterator INFO:root:Epoch[9] Time cost=0.007 INFO:root:Epoch[9] Validation-cross-entropy=0.687889 INFO:root:Epoch[10] Resetting Data Iterator INFO:root:Epoch[10] Time cost=0.005 INFO:root:Epoch[10] Validation-cross-entropy=0.688140 INFO:root:Epoch[11] Resetting Data Iterator INFO:root:Epoch[11] Time cost=0.011 INFO:root:Epoch[11] Validation-cross-entropy=0.688370 INFO:root:Epoch[12] Resetting Data Iterator INFO:root:Epoch[12] Time cost=0.008 INFO:root:Epoch[12] Validation-cross-entropy=0.688575 INFO:root:Epoch[13] Resetting Data Iterator INFO:root:Epoch[13] Time cost=0.008 INFO:root:Epoch[13] Validation-cross-entropy=0.688754 INFO:root:Epoch[14] Resetting Data Iterator INFO:root:Epoch[14] Time cost=0.008 INFO:root:Epoch[14] Validation-cross-entropy=0.688909
pred = model1.predict(test_iter)[:, 1]
print(log_loss(y[flg_valid], pred))
print(roc_auc_score(y[flg_valid], pred))
mx.visualization.plot_network(net1, node_attrs={'fixedsize': 'false'})
0.687112347189 0.5
train_iter = mx.io.NDArrayIter(x[flg_train].values, y[flg_train],
batch_size, shuffle=True,
label_name='label')
valid_iter = mx.io.NDArrayIter(x[flg_valid].values, y[flg_valid],
batch_size,
label_name='label')
test_iter = mx.io.NDArrayIter(x[flg_valid].values)
data = mx.sym.Variable('data')
label = mx.sym.Variable('label')
fc = mx.sym.FullyConnected(data, num_hidden=3)
act = mx.sym.Activation(fc, act_type='relu')
fco = mx.sym.FullyConnected(act, num_hidden=1)
p = mx.sym.Activation(fco, act_type='sigmoid')
eps = 1e-6
p = mx.sym.minimum(mx.sym.maximum(p, eps), 1.0-eps)
q = 1.0 - p
lp = mx.sym.Reshape(mx.sym.log(p), shape=(-1,))
lq = mx.sym.Reshape(mx.sym.log(q), shape=(-1,))
loss = label * lp + (1.0 - label) * lq
net2 = mx.sym.MakeLoss(loss)
model2 = mx.model.FeedForward(
symbol=net2,
num_epoch=15,
learning_rate=0.1
)
model2.fit(
X=train_iter,
eval_data=valid_iter,
eval_metric='MAE'
)
INFO:root:Start training with [cpu(0)] INFO:root:Epoch[0] Resetting Data Iterator INFO:root:Epoch[0] Time cost=0.005 INFO:root:Epoch[0] Validation-mae=8.105288 INFO:root:Epoch[1] Resetting Data Iterator INFO:root:Epoch[1] Time cost=0.005 INFO:root:Epoch[1] Validation-mae=8.105288 INFO:root:Epoch[2] Resetting Data Iterator INFO:root:Epoch[2] Time cost=0.010 INFO:root:Epoch[2] Validation-mae=8.105288 INFO:root:Epoch[3] Resetting Data Iterator INFO:root:Epoch[3] Time cost=0.008 INFO:root:Epoch[3] Validation-mae=8.105288 INFO:root:Epoch[4] Resetting Data Iterator INFO:root:Epoch[4] Time cost=0.007 INFO:root:Epoch[4] Validation-mae=8.105288 INFO:root:Epoch[5] Resetting Data Iterator INFO:root:Epoch[5] Time cost=0.007 INFO:root:Epoch[5] Validation-mae=8.105288 INFO:root:Epoch[6] Resetting Data Iterator INFO:root:Epoch[6] Time cost=0.008 INFO:root:Epoch[6] Validation-mae=8.105288 INFO:root:Epoch[7] Resetting Data Iterator INFO:root:Epoch[7] Time cost=0.007 INFO:root:Epoch[7] Validation-mae=8.105288 INFO:root:Epoch[8] Resetting Data Iterator INFO:root:Epoch[8] Time cost=0.007 INFO:root:Epoch[8] Validation-mae=8.105288 INFO:root:Epoch[9] Resetting Data Iterator INFO:root:Epoch[9] Time cost=0.007 INFO:root:Epoch[9] Validation-mae=8.105288 INFO:root:Epoch[10] Resetting Data Iterator INFO:root:Epoch[10] Time cost=0.006 INFO:root:Epoch[10] Validation-mae=8.105288 INFO:root:Epoch[11] Resetting Data Iterator INFO:root:Epoch[11] Time cost=0.007 INFO:root:Epoch[11] Validation-mae=8.105288 INFO:root:Epoch[12] Resetting Data Iterator INFO:root:Epoch[12] Time cost=0.007 INFO:root:Epoch[12] Validation-mae=8.105288 INFO:root:Epoch[13] Resetting Data Iterator INFO:root:Epoch[13] Time cost=0.006 INFO:root:Epoch[13] Validation-mae=8.105288 INFO:root:Epoch[14] Resetting Data Iterator INFO:root:Epoch[14] Time cost=0.006 INFO:root:Epoch[14] Validation-mae=8.105288
pred = np.exp(model2.predict(test_iter))
print(log_loss(y[flg_valid], pred))
print(roc_auc_score(y[flg_valid], pred))
mx.visualization.plot_network(net2, node_attrs={'fixedsize': 'false'})
6.0696948799 0.5