from mxnet import gluon, nd
from mxnet.gluon import nn
class CenteredLayer(nn.Block):
def __init__(self, **kwargs):
super(CenteredLayer, self).__init__(**kwargs)
def forward(self, x):
return x - x.mean()
layer = CenteredLayer()
X = nd.array([1, 2, 3, 4, 5])
layer(X)
[-2. -1. 0. 1. 2.] <NDArray 5 @cpu(0)>
net = nn.Sequential()
net.add(
nn.Dense(128),
CenteredLayer()
)
net.initialize()
X = nd.random.uniform(shape=(4, 8))
y = net(X)
print(y.shape)
print(y.mean())
(4, 128) [-1.2560122e-09] <NDArray 1 @cpu(0)>
Parameter
class and the ParameterDict
dictionary provide some basic housekeeping functionality.ParameterDict
type that comes with the Block
class.Parameter
type.Parameter
instance from ParameterDict
via the get
function.param_dict = gluon.ParameterDict()
param = param_dict.get('param2', shape=(2, 3))
param
Parameter param2 (shape=(2, 3), dtype=<class 'numpy.float32'>)
param_dict
( Parameter param2 (shape=(2, 3), dtype=<class 'numpy.float32'>) )
class MyDense(nn.Block):
# Units: the number of outputs in this layer; in_units: the number of inputs in this layer.
def __init__(self, units, in_units, **kwargs):
super(MyDense, self).__init__(**kwargs)
self.weight = self.params.get('weight', shape=(in_units, units))
self.bias = self.params.get('bias', shape=(units,))
def forward(self, x):
linear = nd.dot(x, self.weight.data()) + self.bias.data()
return nd.relu(linear)
dense = MyDense(units=3, in_units=5)
dense.params
mydense1_ ( Parameter mydense1_weight (shape=(5, 3), dtype=<class 'numpy.float32'>) Parameter mydense1_bias (shape=(3,), dtype=<class 'numpy.float32'>) )
dense.initialize()
X = nd.random.uniform(shape=(2, 5))
dense(X)
[[0.00618806 0.06494527 0.12089312] [0.04054129 0.06180677 0.07008321]] <NDArray 2x3 @cpu(0)>
net = nn.Sequential()
net.add(
MyDense(8, in_units=64),
MyDense(1, in_units=8)
)
net.initialize()
X = nd.random.uniform(shape=(2, 64))
net(X)
[[0. ] [0.01760728]] <NDArray 2x1 @cpu(0)>
from mxnet import nd
from mxnet.gluon import nn
x = nd.arange(4)
nd.save('x-file.dat', x)
x2 = nd.load('x-file.dat')
x2
[ [0. 1. 2. 3.] <NDArray 4 @cpu(0)>]
y = nd.zeros(4)
nd.save('x-files.dat', [x, y])
x2, y2 = nd.load('x-files.dat')
(x2, y2)
( [0. 1. 2. 3.] <NDArray 4 @cpu(0)>, [0. 0. 0. 0.] <NDArray 4 @cpu(0)>)
mydict = {'x': x, 'y': y}
nd.save('mydict.dat', mydict)
mydict2 = nd.load('mydict.dat')
mydict2
{'x': [0. 1. 2. 3.] <NDArray 4 @cpu(0)>, 'y': [0. 0. 0. 0.] <NDArray 4 @cpu(0)>}
class MLP(nn.Block):
def __init__(self, **kwargs):
super(MLP, self).__init__(**kwargs)
self.hidden = nn.Dense(256, activation='relu')
self.output = nn.Dense(10)
def forward(self, x):
return self.output(self.hidden(x))
net = MLP()
net.initialize()
x = nd.random.uniform(shape=(2, 20))
y = net(x)
net.save_parameters('mlp.params')
clone = MLP()
clone.load_parameters('mlp.params')
yclone = clone(x)
print(yclone == y)
[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]] <NDArray 2x10 @cpu(0)>
pip uninstall mxnet
pip install mxnet-cu90
mx.cpu()
(or any integer in the parentheses) means *all physical CPUs and memory*.
mx.gpu()
only represents one graphic card and the corresponding graphic memory.
import mxnet as mx
from mxnet import nd
from mxnet.gluon import nn
mx.cpu(), mx.gpu(), mx.gpu(1)
(cpu(0), gpu(0), gpu(1))
NDArray
objects are created on the CPU.@cpu(0)
identifier each time we print an NDArray
.x = nd.array([1, 2, 3])
x
[1. 2. 3.] <NDArray 3 @cpu(0)>
context
property of NDArray
to view the device where the NDArray
is located.x.context
cpu(0)
ctx
parameter when creating an NDArray
x = nd.ones((2, 3), ctx=mx.gpu())
x
y = nd.random.uniform(shape=(2, 3), ctx=mx.gpu(0))
y
z = x.copyto(mx.gpu(1))
print(x)
print(z)
y + z
as_in_context()
.z = x.as_in_context(mx.gpu(1))
z
as_in_context
function does not anything.y.as_in_context(mx.gpu(1)) is y
copyto
function always creates new memory for the target variable.y.copyto(mx.gpu()) is y
NDArray
data or convert NDArrays
to NumPy
format, if the data is not in main memory, MXNet will copy it to the main memory first, resulting in additional transmission overhead.ctx
parameter during initialization.net = nn.Sequential()
net.add(nn.Dense(1))
net.initialize(ctx=mx.gpu())
net[0].weight.data()