Notebook

In [1]:

%load_ext watermark
%watermark -a 'Christian Schuhegger' -u -d -v -p numpy,scipy,pandas,matplotlib,seaborn,mxnet

Christian Schuhegger 
last updated: 2019-02-27 

CPython 3.6.8
IPython 7.3.0

numpy 1.14.6
scipy 1.2.0
pandas 0.24.1
matplotlib 3.0.2
seaborn 0.9.0
mxnet 1.3.1

In [2]:

%matplotlib inline
import numpy as np, scipy, scipy.stats as stats, pandas as pd, matplotlib.pyplot as plt, seaborn as sns
import sklearn, sklearn.pipeline, sklearn.model_selection, sklearn.preprocessing
import logging, time, datetime, tqdm
import mxnet as mx
from mxnet import gluon, nd, autograd, metric
import sys,os,subprocess,glob,multiprocessing


pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
# pd.set_option('display.float_format', lambda x: '%.2f' % x)
np.set_printoptions(edgeitems=10)
np.set_printoptions(linewidth=1000)
np.set_printoptions(suppress=True)
np.core.arrayprint._line_width = 180

sns.set()

In [3]:

def get_gpu_name():
    try:
        out_str = subprocess.run(["nvidia-smi", "--query-gpu=gpu_name", "--format=csv"], stdout=subprocess.PIPE).stdout
        out_list = out_str.decode("utf-8").split('\n')
        out_list = out_list[1:-1]
        return out_list
    except Exception as e:
        print(e)


def get_cuda_version():
    """Get CUDA version"""
    if sys.platform == 'win32':
        raise NotImplementedError("Implement this!")
        # This breaks on linux:
        #cuda=!ls "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA"
        #path = "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\" + str(cuda[0]) +"\\version.txt"
    elif sys.platform == 'linux' or sys.platform == 'darwin':
        path = '/usr/local/cuda/version.txt'
    else:
        raise ValueError("Not in Windows, Linux or Mac")
    if os.path.isfile(path):
        with open(path, 'r') as f:
            data = f.read().replace('\n','')
        return data
    else:
        return "No CUDA in this machine"

def get_cudnn_version():
    """Get CUDNN version"""
    if sys.platform == 'win32':
        raise NotImplementedError("Implement this!")
        # This breaks on linux:
        #cuda=!ls "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA"
        #candidates = ["C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\" + str(cuda[0]) +"\\include\\cudnn.h"]
    elif sys.platform == 'linux':
        candidates = ['/usr/include/x86_64-linux-gnu/cudnn_v[0-99].h',
                      '/usr/local/cuda/include/cudnn.h',
                      '/usr/include/cudnn.h']
    elif sys.platform == 'darwin':
        candidates = ['/usr/local/cuda/include/cudnn.h',
                      '/usr/include/cudnn.h']
    else:
        raise ValueError("Not in Windows, Linux or Mac")
    for c in candidates:
        file = glob.glob(c)
        if file: break
    if file:
        with open(file[0], 'r') as f:
            version = ''
            for line in f:
                if "#define CUDNN_MAJOR" in line:
                    version = line.split()[-1]
                if "#define CUDNN_MINOR" in line:
                    version += '.' + line.split()[-1]
                if "#define CUDNN_PATCHLEVEL" in line:
                    version += '.' + line.split()[-1]
        if version:
            return version
        else:
            return "Cannot find CUDNN version"
    else:
        return "No CUDNN in this machine"

In [4]:

print("OS: ", sys.platform)
print("Python: ", sys.version)
print("MXNet: ", mx.__version__)
print("Numpy: ", np.__version__)
print("GPU: ", get_gpu_name())
print("CPU cores:", multiprocessing.cpu_count())
print(get_cuda_version())
print("CuDNN Version ", get_cudnn_version())

OS:  linux
Python:  3.6.8 |Anaconda, Inc.| (default, Dec 30 2018, 01:22:34) 
[GCC 7.3.0]
MXNet:  1.3.1
Numpy:  1.14.6
GPU:  ['GeForce GTX 1080']
CPU cores: 12
CUDA Version 9.0.176
CuDNN Version  7.4.2

In [5]:

from IPython.display import display, HTML

from IPython.display import display_html
def display_side_by_side(*args):
    html_str=''
    for df in args:
        if type(df) == np.ndarray:
            df = pd.DataFrame(df)
        html_str+=df.to_html()
    html_str = html_str.replace('table','table style="display:inline"')
    # print(html_str)
    display_html(html_str,raw=True)

CSS = """
.output {
    flex-direction: row;
}
"""

def display_graphs_side_by_side(*args):
    html_str='<table><tr>'
    for g in args:
        html_str += '<td>'
        html_str += g._repr_svg_()
        html_str += '</td>'
    html_str += '</tr></table>'
    display_html(html_str,raw=True)
    

display(HTML("<style>.container { width:70% !important; }</style>"))

In [6]:

logging.basicConfig(level=logging.DEBUG, format='%(asctime)s:%(name)s:%(levelname)s: %(message)s')
log = logging.getLogger('std')

In [7]:

N_in       = 1000
N_subjects = 260 * 10 # 100000
N_subjects = 100000
W = stats.norm(loc=0, scale=1).rvs(size=(2,N_in), random_state=np.random.RandomState(42))
X = stats.norm(loc=0, scale=1).rvs(size=(N_subjects,N_in), random_state=np.random.RandomState(43))
y = np.sum(W[1:,:] * X + W[0,:], axis=1)

In [8]:

y.shape, X.shape

Out[8]:

((100000,), (100000, 1000))

In [9]:

pd.Series(y).describe()

Out[9]:

count    100000.000000
mean         19.234805
std          31.516783
min        -147.380827
25%          -2.102370
50%          19.273695
75%          40.475011
max         147.253509
dtype: float64

In [10]:

X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, test_size = 0.1, random_state = 42)

In [11]:

# Maximize training performance with Gluon data loader workers
# https://aws.amazon.com/blogs/machine-learning/maximize-training-performance-with-gluon-data-loader-workers/
def to_gluon_iter(x_in, y_in, batch_size=256, workers=(multiprocessing.cpu_count()//2)):
    x_nd = nd.array(x_in)
    y_nd = nd.array(y_in)
    dataset = mx.gluon.data.ArrayDataset(x_nd, y_nd)

    itr = mx.gluon.data.DataLoader(dataset, batch_size = batch_size, shuffle = None, num_workers=workers)# , last_batch = 'rollover'
    return itr

def to_sym_iter(x_in, y_in, batch_size=256):
    itr = mx.io.NDArrayIter(x_in, y_in, batch_size, shuffle=None , label_name='lin_reg_label')
    return itr

In [12]:

class DataIterLoader():
    def __init__(self, data_iter):
        self.data_iter = data_iter

    def __iter__(self):
        self.data_iter.reset()
        return self

    def __next__(self):
        batch = self.data_iter.__next__()
        assert len(batch.data) == len(batch.label) == 1
        # print('len(batch_data): {}; batch.data[0].shape: {}'.format(len(batch.data), batch.data[0].shape))
        data = batch.data[0]
        label = batch.label[0]
        return data, label

    def next(self):
        return self.__next__() # for Python 2

# See:
#  Appendix: Upgrading from Module DataIter to Gluon DataLoader
#  https://mxnet.incubator.apache.org/versions/master/tutorials/gluon/datasets.html
batch_size=256
gluon_train_iter = to_gluon_iter(X_train, y_train, batch_size=batch_size)
gluon_valid_iter = to_gluon_iter(X_test , y_test, batch_size=batch_size)

sym_train_iter = to_sym_iter(X_train, y_train, batch_size=batch_size)
sym_valid_iter  = to_sym_iter(X_test, y_test, batch_size=batch_size)

gluon_train_iter = DataIterLoader(sym_train_iter)
gluon_valid_iter = DataIterLoader(sym_valid_iter)

In [13]:

def create_aux():
    epochs=20
    model_ctx=mx.cpu()
    loss_function = mx.gluon.loss.L2Loss()
    init_function = mx.init.Xavier()
    optimizer     = mx.optimizer.Adam()
    return epochs, model_ctx, loss_function, init_function, optimizer

In [14]:

def create_gluon_model():
    ACTIVATION = 'relu'
    net = mx.gluon.nn.HybridSequential(prefix='MLP_')
    with net.name_scope():
        net.add(
            mx.gluon.nn.Dense(300, activation=ACTIVATION, prefix='fc-1_'),
            mx.gluon.nn.Dense(100, activation=ACTIVATION, prefix='fc-2_'),
            mx.gluon.nn.Dense(1 , activation=None       , prefix='predictions')
        )
    return net

In [15]:

def create_sym_model():
    ACTIVATION = 'relu'

    data = mx.sym.Variable('data')
    Y    = mx.sym.Variable('lin_reg_label')
    fc1  = mx.sym.FullyConnected(data, name='fc1', num_hidden=300)
    act1 = mx.sym.Activation(fc1, name='relu1', act_type=ACTIVATION)
    fc2  = mx.sym.FullyConnected(act1, name='fc2', num_hidden=100)
    act2 = mx.sym.Activation(fc2, name='relu2', act_type=ACTIVATION)
    fc3  = mx.sym.FullyConnected(act2, name='fc3', num_hidden=1)
    lro  = mx.sym.LinearRegressionOutput(data=fc3, label=Y, name="lro")
    
    return lro    

In [16]:

epochs, model_ctx, loss_function, init_function, optimizer = create_aux()

In [17]:

sym_train_iter = to_sym_iter(X_train, y_train, batch_size=batch_size)
sym_valid_iter  = to_sym_iter(X_test, y_test, batch_size=batch_size)
gluon_model = create_gluon_model()
# gluon_model.hybridize()
gluon_model.hybridize(static_shape=True, static_alloc=True)
gluon_model.collect_params().initialize(init_function, ctx=model_ctx)

trainer = gluon.Trainer(gluon_model.collect_params(), optimizer)

nr_batches = len(X_train) // batch_size
total = epochs * (nr_batches + 1)

def train(num_workers):
    mx.random.seed(1)
    
    if num_workers > 1:
        gluon_train_iter = to_gluon_iter(X_train, y_train, batch_size=batch_size, workers=num_workers)
        gluon_valid_iter = to_gluon_iter(X_test , y_test, batch_size=batch_size, workers=num_workers)
    else:
        gluon_train_iter = DataIterLoader(sym_train_iter)
        gluon_valid_iter = DataIterLoader(sym_valid_iter)

    time1 = time.time()
    for e in range(epochs):
        for i, (x_, y_) in enumerate(gluon_train_iter):
            x_ = x_.as_in_context(model_ctx)
            y_ = y_.as_in_context(model_ctx)
            if num_workers > 1:
                nd.waitall()
            with autograd.record():
                output = gluon_model(x_)
                loss = loss_function(output, y_)

            loss.backward()
            last_batch_loss = nd.mean(loss).asscalar()
            trainer.step(x_.shape[0])
        if num_workers > 1:
            nd.waitall()
        t = time.time()
        print([t-time1, e, last_batch_loss])
    
    time2 = time.time()
    print('workers: {}; time: {}'.format(num_workers, time2-time1))

for workers in range(0, 6, 2):
    train(num_workers=workers)

[1.0114946365356445, 0, 2.688013]
[2.0326144695281982, 1, 0.9665312]
[3.0164339542388916, 2, 0.48773876]
[3.9574451446533203, 3, 0.28398213]
[4.913841009140015, 4, 0.19583277]
[5.915135383605957, 5, 0.15492727]
[6.879735946655273, 6, 0.14247555]
[7.859589576721191, 7, 0.18341143]
[8.832339763641357, 8, 0.8992266]
[9.830835580825806, 9, 3.629622]
[10.792657375335693, 10, 1.9046185]
[11.763952732086182, 11, 1.7743697]
[12.71415376663208, 12, 0.5882967]
[13.694055318832397, 13, 1.5400267]
[14.645780086517334, 14, 1.0786914]
[15.640945672988892, 15, 1.2148588]
[16.607487201690674, 16, 1.0880853]
[17.586325645446777, 17, 0.6401208]
[18.567651748657227, 18, 0.7938393]
[19.549959897994995, 19, 0.47012243]
workers: 0; time: 19.550041675567627
[2.080505847930908, 0, 0.46424496]
[3.9421205520629883, 1, 0.48203924]
[6.162583112716675, 2, 0.49990627]
[8.28538727760315, 3, 0.3972701]
[10.105726480484009, 4, 0.4251598]
[11.934930562973022, 5, 0.70309675]
[13.798903703689575, 6, 1.5159985]
[15.691807985305786, 7, 2.4252243]
[17.546621561050415, 8, 1.616506]
[19.324228763580322, 9, 3.8505468]
[21.208154916763306, 10, 1.9401183]
[23.062612056732178, 11, 1.0572857]
[24.97093892097473, 12, 0.78012973]
[26.971498727798462, 13, 0.39198607]
[28.860776901245117, 14, 0.2522775]
[30.770732879638672, 15, 0.4382887]
[32.62504196166992, 16, 0.74195063]
[34.49631190299988, 17, 0.6759308]
[36.76907277107239, 18, 1.1210217]
[38.66449570655823, 19, 0.9971001]
workers: 2; time: 38.66468834877014
[1.8759772777557373, 0, 0.5457086]
[3.9425806999206543, 1, 0.27232322]
[5.816941976547241, 2, 0.14699128]
[8.007057428359985, 3, 0.10915594]
[10.081457376480103, 4, 0.12385155]
[11.983157634735107, 5, 0.17104238]
[13.958155155181885, 6, 0.23911487]
[15.885420083999634, 7, 0.3623786]
[17.843082189559937, 8, 0.5243365]
[19.805946111679077, 9, 0.45486805]
[21.74500060081482, 10, 1.5060887]
[23.67216157913208, 11, 1.9099464]
[25.843996047973633, 12, 0.714357]
[27.72542119026184, 13, 0.28021395]
[29.729376316070557, 14, 0.17045738]
[31.75821566581726, 15, 0.110555805]
[33.6689875125885, 16, 0.17122094]
[35.82129526138306, 17, 0.42039445]
[37.803547620773315, 18, 0.34069037]
[39.806894302368164, 19, 0.5471134]
workers: 4; time: 39.807068824768066

In [18]:

gluon_predict_iter = mx.gluon.data.DataLoader(mx.gluon.data.ArrayDataset(nd.array(X_test)), batch_size=batch_size)
y_gluon_pred  = nd.zeros(X_test.shape[0])
for i, (data) in enumerate(gluon_predict_iter):
    data   = data.as_in_context(model_ctx)
    output = gluon_model(data)
    y_gluon_pred[i * batch_size : i * batch_size + output.shape[0]] = output[:,0]

s = sklearn.metrics.mean_squared_error(y_test, y_gluon_pred.asnumpy())
s

Out[18]:

2.737390181114425

In [19]:

sklearn.metrics.explained_variance_score(y_test, y_gluon_pred.asnumpy())

Out[19]:

0.9976410425625697

In [20]:

epochs, model_ctx, loss_function, init_function, optimizer = create_aux()

In [22]:

sym_train_iter = to_sym_iter(X_train, y_train, batch_size=batch_size)
sym_valid_iter  = to_sym_iter(X_test, y_test, batch_size=batch_size)

sym_model = create_sym_model()

sym_model_module = mx.mod.Module(symbol = sym_model, data_names = ['data'], label_names = ['lin_reg_label'], context = model_ctx)

freq = int((len(X_train) * epochs / batch_size) // 10)
batch_end_callback = mx.callback.Speedometer(batch_size, frequent=freq, auto_reset=False)

time1 = time.time()

sym_model_module.fit(sym_train_iter, 
                     sym_valid_iter,
                     optimizer=optimizer,
                     initializer=init_function,
                     num_epoch=epochs,
                     eval_metric='mse',
                     batch_end_callback=batch_end_callback
                    )
time2 = time.time()

/home/local/cs/local/install/anaconda3-5.3.1-Linux-x86_64/envs/mxnet/lib/python3.6/site-packages/mxnet/module/base_module.py:504: UserWarning: Optimizer created manually outside Module but rescale_grad is not normalized to 1.0/batch_size/num_workers (1.0 vs. 0.00390625). Is this intended?
  optimizer_params=optimizer_params)
2019-02-27 14:51:46,594:root:INFO: Epoch[0] Train-mse=151.881052
2019-02-27 14:51:46,595:root:INFO: Epoch[0] Time cost=1.018
2019-02-27 14:51:46,664:root:INFO: Epoch[0] Validation-mse=4.889890
2019-02-27 14:51:47,685:root:INFO: Epoch[1] Train-mse=2.949202
2019-02-27 14:51:47,686:root:INFO: Epoch[1] Time cost=1.021
2019-02-27 14:51:47,746:root:INFO: Epoch[1] Validation-mse=3.650367
2019-02-27 14:51:48,753:root:INFO: Epoch[2] Train-mse=1.537151
2019-02-27 14:51:48,754:root:INFO: Epoch[2] Time cost=1.007
2019-02-27 14:51:48,835:root:INFO: Epoch[2] Validation-mse=3.233742
2019-02-27 14:51:49,828:root:INFO: Epoch[3] Train-mse=0.924725
2019-02-27 14:51:49,829:root:INFO: Epoch[3] Time cost=0.993
2019-02-27 14:51:49,911:root:INFO: Epoch[3] Validation-mse=3.063358
2019-02-27 14:51:50,909:root:INFO: Epoch[4] Train-mse=0.610477
2019-02-27 14:51:50,910:root:INFO: Epoch[4] Time cost=0.998
2019-02-27 14:51:50,992:root:INFO: Epoch[4] Validation-mse=3.018220
2019-02-27 14:51:51,994:root:INFO: Epoch[5] Train-mse=0.441097
2019-02-27 14:51:51,995:root:INFO: Epoch[5] Time cost=1.001
2019-02-27 14:51:52,069:root:INFO: Epoch[5] Validation-mse=3.035564
2019-02-27 14:51:53,068:root:INFO: Epoch[6] Train-mse=0.355614
2019-02-27 14:51:53,069:root:INFO: Epoch[6] Time cost=0.998
2019-02-27 14:51:53,134:root:INFO: Epoch[6] Validation-mse=3.111674
2019-02-27 14:51:54,152:root:INFO: Epoch[7] Train-mse=0.355230
2019-02-27 14:51:54,153:root:INFO: Epoch[7] Time cost=1.018
2019-02-27 14:51:54,230:root:INFO: Epoch[7] Validation-mse=3.327117
2019-02-27 14:51:55,202:root:INFO: Epoch[8] Train-mse=0.669648
2019-02-27 14:51:55,203:root:INFO: Epoch[8] Time cost=0.972
2019-02-27 14:51:55,273:root:INFO: Epoch[8] Validation-mse=4.119022
2019-02-27 14:51:56,318:root:INFO: Epoch[9] Train-mse=2.828806
2019-02-27 14:51:56,319:root:INFO: Epoch[9] Time cost=1.046
2019-02-27 14:51:56,380:root:INFO: Epoch[9] Validation-mse=5.946182
2019-02-27 14:51:57,537:root:INFO: Epoch[10] Train-mse=4.074934
2019-02-27 14:51:57,538:root:INFO: Epoch[10] Time cost=1.157
2019-02-27 14:51:57,601:root:INFO: Epoch[10] Validation-mse=3.578255
2019-02-27 14:51:58,611:root:INFO: Epoch[11] Train-mse=2.713380
2019-02-27 14:51:58,612:root:INFO: Epoch[11] Time cost=1.011
2019-02-27 14:51:58,681:root:INFO: Epoch[11] Validation-mse=2.832748
2019-02-27 14:51:59,680:root:INFO: Epoch[12] Train-mse=2.544748
2019-02-27 14:51:59,681:root:INFO: Epoch[12] Time cost=0.999
2019-02-27 14:51:59,740:root:INFO: Epoch[12] Validation-mse=3.092040
2019-02-27 14:52:00,819:root:INFO: Epoch[13] Train-mse=2.752240
2019-02-27 14:52:00,820:root:INFO: Epoch[13] Time cost=1.080
2019-02-27 14:52:00,894:root:INFO: Epoch[13] Validation-mse=2.507709
2019-02-27 14:52:01,868:root:INFO: Epoch[14] Train-mse=2.272493
2019-02-27 14:52:01,869:root:INFO: Epoch[14] Time cost=0.974
2019-02-27 14:52:01,939:root:INFO: Epoch[14] Validation-mse=2.545414
2019-02-27 14:52:02,992:root:INFO: Epoch[15] Train-mse=1.962754
2019-02-27 14:52:02,993:root:INFO: Epoch[15] Time cost=1.053
2019-02-27 14:52:03,052:root:INFO: Epoch[15] Validation-mse=3.936031
2019-02-27 14:52:04,065:root:INFO: Epoch[16] Train-mse=2.390086
2019-02-27 14:52:04,065:root:INFO: Epoch[16] Time cost=1.013
2019-02-27 14:52:04,142:root:INFO: Epoch[16] Validation-mse=4.206038
2019-02-27 14:52:05,148:root:INFO: Epoch[17] Train-mse=3.163517
2019-02-27 14:52:05,149:root:INFO: Epoch[17] Time cost=1.006
2019-02-27 14:52:05,209:root:INFO: Epoch[17] Validation-mse=4.414568
2019-02-27 14:52:06,207:root:INFO: Epoch[18] Train-mse=3.450250
2019-02-27 14:52:06,207:root:INFO: Epoch[18] Time cost=0.997
2019-02-27 14:52:06,273:root:INFO: Epoch[18] Validation-mse=5.286100
2019-02-27 14:52:07,255:root:INFO: Epoch[19] Train-mse=2.802456
2019-02-27 14:52:07,256:root:INFO: Epoch[19] Time cost=0.982
2019-02-27 14:52:07,329:root:INFO: Epoch[19] Validation-mse=3.315309

In [23]:

print(time2-time1)

21.75580096244812

In [24]:

y_sym_pred = sym_model_module.predict(sym_valid_iter)
s = sklearn.metrics.mean_squared_error(y_test, y_sym_pred.asnumpy())
s

Out[24]:

3.3060569400329425

In [25]:

sklearn.metrics.explained_variance_score(y_test, y_sym_pred.asnumpy())

Out[25]:

0.9966796086836532