Neural Networks in Pytorch

If you are planning to use pytorch on the workstations installed in the Department of Computer Science, you might need to execute this command

export PYTHONPATH=$PYTHONPATH:/usr/local/anaconda/lib/python3.8/site-packages/

A better solution is to add it to your startup script, such as .bashrc.

A more complete example of implementing a convolutional network in Pytorch is here.

Now, let's build up a set of classes to define:

  • NeuralNetworkTorch to implement a neural network to do regression
  • NeuralNetworkClassifierTorch to implement a neural network to do classification
  • NeuralNetworkClassifierConvolutionalTorch to implement a convolutional neural network to do classification

We can reuse a lot of the class functions if we inherit each class from the previous one:

NeuralNetworkTorch --> NeuralNetworkClassifierTorch --> NeuralNetworkClassifierConvolutionalTorch

NeuralNetworkTorch

First, here is an implementation of NeuralNetworkTorch.

In [8]:
import numpy as np
import torch
import matplotlib.pyplot as plt
In [19]:
######################################################################
## NeuralNetwork  for regression
######################################################################

class NeuralNetworkTorch(torch.nn.Module):

    def __init__(self, n_inputs, n_hiddens_list, n_outputs, device='cpu'):

        super().__init__()

        self.n_inputs = n_inputs
        self.n_hiddens_list = n_hiddens_list
        self.n_outputs = n_outputs
        self.device = device

        self.n_layers = len(n_hiddens_list) + 1
        
        self.layers = torch.nn.ModuleList()
        for n_units in n_hiddens_list:
            self.layers.append(self._make_tanh_layer(n_inputs, n_units))
            n_inputs = n_units
        self.layers.append(torch.nn.Linear(n_inputs, n_outputs))

        self.stand_params = None
        self.error_trace = []
        self.error_trace_val = []

    def _make_tanh_layer(self, n_inputs, n_units):
        return torch.nn.Sequential(torch.nn.Linear(n_inputs, n_units),
                                   torch.nn.Tanh())

    def __repr__(self):
        return f'NeuralNetworkTorch({self.n_inputs}, {self.n_hiddens_list}, {self.n_outputs}, device={self.device})'

    def forward(self, Xst):
        Ys = [Xst]
        for layer in self.layers:
            Ys.append(layer(Ys[-1]))
        return Ys[1:]  # remove X from Ys

    def train(self, Xtrain, Ttrain, n_epochs=10, learning_rate=0.01, method='adam', verbose=True, Xval=None, Tval=None):

        if isinstance(Xtrain, np.ndarray):
            Xtrain = torch.from_numpy(Xtrain.astype(np.float32))
        if isinstance(Ttrain, np.ndarray):
            Ttrain = torch.from_numpy(Ttrain.astype(np.float32))

        if Xval is not None:
            if isinstance(Xval, np.ndarray):
                Xval = torch.from_numpy(Xval.astype(np.float32))
            if isinstance(Tval, np.ndarray):
                Tval = torch.from_numpy(Tval.astype(np.float32))

        self.stand_params = self.calc_standardize_parameters(Xtrain, Ttrain)
        Xtrain = self.standardize_X(Xtrain)
        Ttrain = self.standardize_T(Ttrain)
        if Xval is not None:
            Xval = self.standardize_X(Xval)
            Tval = self.standardize_T(Tval)

        if method == 'sgd':
            optimizer = torch.optim.SGD(self.parameters(), lr=learning_rate)
        elif method == 'adam':
            optimizer = torch.optim.Adam(self.parameters(), lr=learning_rate)
        else:
            print('train: method must be \'sgd\', or \'adam\'.')

        error_f = torch.nn.MSELoss()

        self.best_epoch = None
        best_mse = None
        best_weights = self.get_all_weights()
        
        for epoch in range(n_epochs):

            Ytrain = self.forward(Xtrain)[-1]  # to get last layer output
            mse = error_f(Ytrain, Ttrain)

            optimizer.zero_grad()
            mse.backward()
            optimizer.step()

            self.error_trace.append(mse.sqrt())
            
            if Xval is not None:
                Yval = self.forward(Xval)[-1]
                mse_val = error_f(Yval, Tval)
                self.error_trace_val.append(mse_val.sqrt())
                
                if best_mse is None or mse_val < best_mse:
                    best_mse = mse_val
                    best_weights = self.get_all_weights()
                    self.best_epoch = epoch

            if verbose and ((epoch+1) % (n_epochs // 10) == 0 or epoch == n_epochs - 1):
                if Xval is not None:
                    print(f'Epoch {epoch+1} RMSE train {self.error_trace[-1]:.4f} val {self.error_trace_val[-1]:.4f}')
                else:
                    print(f'Epoch {epoch+1} RMSE {self.error_trace[-1]:.4f}')
                
        if Xval is not None:
            self.set_all_weights(best_weights)

        return self

    def use(self, X, return_hidden_layer_outputs=False):
        if isinstance(X, np.ndarray):
            X = torch.from_numpy(X.astype(np.float32))

        Xst = self.standardize_X(X)
        Ys = self.forward(Xst)
        Y = Ys[-1]
        Y = self.unstandardize_T(Y)
        Zs = Ys[:-1]
        Y = Y.detach().cpu().numpy()
        Zs = [Z.detach().cpu().numpy() for Z in Zs]
        return (Y, Zs) if return_hidden_layer_outputs else Y

    def get_error_trace(self):
        return self.error_trace

    def get_error_traces(self):
        return self.error_trace, self.error_trace_val, self.best_epoch

    def calc_standardize_parameters(self, X, T):
        Xmeans = X.mean(axis=0)
        Xstds = X.std(axis=0)
        Xstds[Xstds == 0] = Xstds[Xstds > 0].mean(axis=0)
        if T is None:
            return {'Xmeans': Xmeans, 'Xstds': Xstds}
        else:
            Tmeans = T.mean(axis=0)
            Tstds = T.std(axis=0)
            return {'Xmeans': Xmeans, 'Xstds': Xstds, 'Tmeans': Tmeans, 'Tstds': Tstds}

    def standardize_X(self, X):
        return (X - self.stand_params['Xmeans']) / self.stand_params['Xstds']

    def unstandardize_X(self, Xst):
        return Xst * self.stand_params['Xstds'] + self.stand_params['Xmeans']

    def standardize_T(self, T):
        return (T - self.stand_params['Tmeans']) / self.stand_params['Tstds']

    def unstandardize_T(self, Tst):
        return Tst * self.stand_params['Tstds'] + self.stand_params['Tmeans']

    def get_Ws(self):
        Ws = []
        for layer in self.layers:
            W_and_bias = list(layer.parameters())
            W = W_and_bias[0].detach().numpy()
            Wbias = W_and_bias[1].detach().numpy().T.reshape(1, -1)
            if W.ndim == 4:
                W = np.moveaxis(W, 0, 3)  # first dim is units. Move it to last, fourth, dim
                n_units = Wbias.shape[-1]
                W = W.reshape(-1, n_units)
            else:
                W = W.T
            Ws.append(np.vstack((Wbias, W)))
        return Ws
        
    def get_all_weights(self):
        return torch.nn.utils.parameters_to_vector(self.parameters())

    def set_all_weights(self, all_weights):
        torch.nn.utils.vector_to_parameters(all_weights, self.parameters())
In [20]:
nnet = NeuralNetworkTorch(2, [20, 10], 3)
nnet
Out[20]:
NeuralNetworkTorch(2, [20, 10], 3, device=cpu)
In [21]:
len(nnet.get_Ws())
Out[21]:
3

Let's test it with some toy data.

In [22]:
n_samples = 10
Xtrain = np.linspace(0., 20.0, n_samples).reshape((n_samples, 1))
Ttrain = 0.2 + 0.05 * (Xtrain) + 0.4 * np.sin(Xtrain / 2) + 0.2 * np.random.normal(size=(n_samples, 1))
Xtest = Xtrain + 0.1 * np.random.normal(size=(n_samples, 1))
Ttest = 0.2 + 0.05 * (Xtest) + 0.4 * np.sin(Xtest / 2) + 0.2 * np.random.normal(size=(n_samples, 1))


n_inputs = Xtrain.shape[1]
n_hiddens_list = [10, 10, 10]
# n_hiddens_list = []  
n_outputs = Ttrain.shape[1]

n_epochs = 2000
learning_rate = 0.01  # not used by scg

nnet = NeuralNetworkTorch(n_inputs, n_hiddens_list, n_outputs)
nnet.train(Xtrain, Ttrain, n_epochs, learning_rate, method='adam')

def rmse(Y, T):
    error = T - Y
    return np.sqrt(np.mean(error ** 2))

Ytrain = nnet.use(Xtrain)
rmse_train = rmse(Ytrain, Ttrain)
Ytest = nnet.use(Xtest)
rmse_test = rmse(Ytest, Ttest)

print(f'RMSE: Train {rmse_train:.2f} Test {rmse_test:.2f}')

nnet
Epoch 200 RMSE 0.2440
Epoch 400 RMSE 0.0060
Epoch 600 RMSE 0.0003
Epoch 800 RMSE 0.0002
Epoch 1000 RMSE 0.0010
Epoch 1200 RMSE 0.0001
Epoch 1400 RMSE 0.0000
Epoch 1600 RMSE 0.0095
Epoch 1800 RMSE 0.0000
Epoch 2000 RMSE 0.0020
RMSE: Train 0.00 Test 0.19
Out[22]:
NeuralNetworkTorch(1, [10, 10, 10], 1, device=cpu)
In [23]:
plt.figure(figsize=(10, 10))

n_plot_rows = nnet.n_layers + 1
ploti = 0

ploti += 1
plt.subplot(n_plot_rows, 1, ploti)
plt.plot(nnet.get_error_trace())
plt.xlabel('Epoch')
plt.ylabel('RMSE')

ploti += 1
plt.subplot(n_plot_rows, 1, ploti)
plt.plot(Xtrain, Ttrain, 'o', label='Training Data')
plt.plot(Xtest, Ttest, 'o', label='Testing Data')
X_for_plot = np.linspace(0, 20, 100).reshape(-1, 1)
Y, Zs = nnet.use(X_for_plot, return_hidden_layer_outputs=True)
plt.plot(X_for_plot, Y, label='Neural Net Output')
plt.legend()
plt.xlabel('X')
plt.ylabel('Y')

for layeri in range(nnet.n_layers - 2, -1, -1):
    ploti += 1
    plt.subplot(n_plot_rows, 1, ploti)
    plt.plot(X_for_plot, Zs[layeri])
    plt.xlabel('X')
    plt.ylabel(f'Outputs from Layer {layeri + 1}')

Okay. That looks good. Now let's move on to our neural network classifier.

NeuralNetworkClassifierTorch

By extending NeuralNetworkTorch we only have to implement the constructor, the __repr__, train, and use functions!

In [24]:
######################################################################
## NeuralNetworkClassifierTorch
######################################################################

class NeuralNetworkClassifierTorch(NeuralNetworkTorch):

    def __init__(self, n_inputs, n_hiddens_list, n_outputs, device='cpu'):

        if isinstance(n_inputs, list) or isinstance(n_inputs, tuple):
            # n_inputs might be n_channels, n_rows, n_cols in 2-d input sample
            # if defined for use in a Convolutional network
            super().__init__(np.prod(n_inputs), n_hiddens_list, n_outputs, device)
        else:
            super().__init__(n_inputs, n_hiddens_list, n_outputs, device)

        last_layer = self.layers[-1]
        new_last_layer = torch.nn.Sequential(last_layer,
                                             torch.nn.LogSoftmax(dim=1))  # across columns
        self.layers = self.layers[:-1]
        self.layers.append(new_last_layer)
        # self.layers = self.layers[:-1] + new_last_layer

    def __repr__(self):
        return f'NeuralNetworkClassifierTorch({self.n_inputs}, {self.n_hiddens_list}, {self.n_outputs}, device={self.device})'

    def train(self, Xtrain, Ttrain, n_epochs=10, learning_rate=0.01, method='adam', verbose=True, Xval=None, Tval=None):

        if isinstance(Xtrain, np.ndarray):
            Xtrain = torch.from_numpy(Xtrain.astype(np.float32))
        if isinstance(Ttrain, np.ndarray):
            Ttrain = torch.from_numpy(Ttrain.astype(np.float32))

        if Xval is not None:
            if isinstance(Xval, np.ndarray):
                Xval = torch.from_numpy(Xval.astype(np.float32))
            if isinstance(Tval, np.ndarray):
                Tval = torch.from_numpy(Tval.astype(np.float32))

        Ttrain = Ttrain.long().reshape(-1)  # pytorch expects 1D tensor as labels
        if Xval is not None:
            Tval = Tval.long().reshape(-1)
        
        self.stand_params = self.calc_standardize_parameters(Xtrain, None)
        Xtrain = self.standardize_X(Xtrain)
        if Xval is not None:
            Xval = self.standardize_X(Xval)

        self.classes, counts = Ttrain.unique(return_counts=True)
        self.classes = self.classes.numpy()
        # self.most_common_class = self.classes[np.argmax(counts)]  # not used

        if method == 'sgd':
            optimizer = torch.optim.SGD(self.parameters(), lr=learning_rate)
        elif method == 'adam':
            optimizer = torch.optim.Adam(self.parameters(), lr=learning_rate)
        else:
            print('train: method must be \'sgd\', or \'adam\'.')

        neg_log_likelihood = torch.nn.NLLLoss()

        self.best_epoch = None
        best_nll = None
        best_weights = self.get_all_weights()
        
        for epoch in range(n_epochs):

            Ytrain = self.forward(Xtrain)[-1]  # to get last layer output as log(softmax(Y))
            nll = neg_log_likelihood(Ytrain, Ttrain)

            optimizer.zero_grad()
            nll.backward()
            optimizer.step()

            self.error_trace.append((-nll).exp())
            
            if Xval is not None:
                Yval = self.forward(Xval)[-1]
                nll_val = neg_log_likelihood(Yval, Tval)
                self.error_trace_val.append((-nll_val).exp())
                
                if best_nll is None or nll_val < best_nll:
                    best_nll = nll_val
                    best_weights = self.get_all_weights()
                    # print(f'epoch {epoch} first w {best_weights[0]}, nll train {self.error_trace[-1]} val {self.error_trace_val[-1]}')
                    self.best_epoch = epoch

            if verbose and ((epoch+1) % (n_epochs // 10) == 0 or epoch == n_epochs - 1):
                if Xval is not None:
                    print(f'Epoch {epoch+1} LL train {self.error_trace[-1]:.4f} val {self.error_trace_val[-1]:.4f}')
                else:
                    print(f'Epoch {epoch+1} NLL {self.error_trace[-1]:.4f}')

        if Xval is not None:
            # print(f'done epoch {epoch} first w {self.get_all_weights()[0]}')
            self.set_all_weights(best_weights)
            # print(f'after done epoch {epoch} first w {self.get_all_weights()[0]}')

        return self

    def use(self, X, return_hidden_layer_outputs=False):
        if isinstance(X, np.ndarray):
            X = torch.from_numpy(X.astype(np.float32))

        Xst = self.standardize_X(X)
        Ys = self.forward(Xst)
        Y = Ys[-1]
        Zs = Ys[:-1]
        probs = Y.detach().exp().cpu().numpy()  # ex() because net output is log(softmax())
        Zs = [Z.detach().cpu().numpy() for Z in Zs]
        classes = self.classes[np.argmax(probs, axis=1)].reshape(-1, 1)
        return (classes, probs, Zs) if return_hidden_layer_outputs else (classes, probs)

And now we can test our classifier.

Here is a simple classification problem. Classify integers from 0 to 99 into Class 0 if value is between 30 and 70, and Class 1 otherwise.

In [25]:
def calc_T(X):
    return np.logical_and(30 < X, X < 70).astype(int)

X = np.arange(100).reshape((-1, 1))
T = calc_T(X)

plt.plot(X, T, '.-')
plt.xlabel('Sample Value')
plt.ylabel('Target Class Label')
Out[25]:
Text(0, 0.5, 'Target Class Label')
In [26]:
nnet = NeuralNetworkClassifierTorch(1, [5, 5], 2)
nnet.train(X, T, 100, 0.1)
Epoch 10 NLL 0.5284
Epoch 20 NLL 0.7944
Epoch 30 NLL 0.9510
Epoch 40 NLL 0.9780
Epoch 50 NLL 0.9848
Epoch 60 NLL 0.9879
Epoch 70 NLL 0.9895
Epoch 80 NLL 0.9907
Epoch 90 NLL 0.9917
Epoch 100 NLL 0.9925
Out[26]:
NeuralNetworkClassifierTorch(1, [5, 5], 2, device=cpu)
In [28]:
plt.figure(figsize=(10, 12))
grid = plt.GridSpec(3, 1)  #, wspace=0.4, hspace=0.3)

plt.subplot(grid[0, :])
plt.plot(nnet.get_error_trace())

plt.subplot(grid[1:, :])
plt.plot(X, T, 'o')
classes, probs = nnet.use(X)
plt.plot(X, classes, '^')
plt.plot(X, probs[:, 1], alpha=0.5)
plt.ylabel('Train Data')
plt.plot([0, 100], [0.5, 0.5], 'r');

NeuralNetworkClassifierConvolutionalTorch

Now for the convolutional net. Some of its implementation is shown here. You must complete it for A5.

In [53]:
torch.nn.Conv2d?
In [ ]:
######################################################################
## NeuralNetworkClassifierConvolutionalTorch
######################################################################

class NeuralNetworkClassifierConvolutionalTorch(NeuralNetworkClassifierTorch):

    def __init__(self, n_inputs, n_conv_list, n_fc_list, n_outputs, device='cpu'):
        '''n_inputs: n_channels X n_rows X n_cols
n_conv_list: list of tuples of (n_units, kernel_size, stride)'''

        # Call constructor, but will discard the layers made and make new ones
        super().__init__(n_inputs, n_fc_list, n_outputs, device)

        self.n_channels, self.n_input_rows, self.n_input_cols = n_inputs
        self.n_conv_list = n_conv_list
        self.n_fc_list = n_fc_list
        self.n_outputs = n_outputs
        self.device = device

        self.layers = torch.nn.ModuleList()
        n_in_channels = self.n_channels
        
        output_rows, output_cols, output_channels = ( . . . )  # COMPLETE THIS
        
        for conv in n_conv_list:
            n_units, kernel_size, stride = conv
            (kernel_rows, kernel_cols) = (kernel_size, kernel_size) if np.isscalar(kernel_size) else kernel_size
            (stride_rows, stride_cols) = (stride, stride) if np.isscalar(stride) else stride

            self.layers.append(self._make_conv_layer(n_in_channels, n_units, kernel_size, stride))
            n_in_channels = n_units

            output_rows, output_cols, output_channels = ( . . . )  # COMPLETE THIS
            
        # Add Flatten to output of last convolutional layer
        self.layers[-1].add_module('flatten', torch.nn.Flatten())
        
        # Calculate the total number of inputs entering each unit in the first fully-connected layer
        n_inputs = . . .   # COMPLETE THIS

        for n_units in n_fc_list:
            self.layers.append(self._make_tanh_layer(n_inputs, n_units))
            n_inputs = n_units
            
        self.layers.append(torch.nn.Sequential(torch.nn.Linear(n_inputs, n_outputs),
                                               torch.nn.LogSoftmax(dim=1)))  # across columns

    def _make_conv_layer(self, n_in_channels, n_units, kernel_size, stride):
        return torch.nn.Sequential(torch.nn.Conv2d(n_in_channels, n_units, kernel_size, stride),
                                   torch.nn.Tanh())

    def __repr__(self):
        return f'''NeuralNetworkClassifierConvolutionalTorch(n_inputs={self.n_inputs}, n_conv_list={self.n_conv_list}, n_fc_list={self.n_fc_list}, n_outputs={self.n_outputs}, device={self.device})'''

And, now to test it. We will use the square and diamond images shown in previous lecture notes.

In [30]:
from A5mysolution import *
In [31]:
def makeImages(nEach):
    images = np.zeros((nEach * 2, 1, 20, 20))  # nSamples, nChannels, rows, columns
    radii = 3 + np.random.randint(10 - 5, size=(nEach * 2, 1))
    centers = np.zeros((nEach * 2, 2))
    for i in range(nEach * 2):
        r = radii[i, 0]
        centers[i, :] = r + 1 + np.random.randint(18 - 2 * r, size=(1, 2))
        x = int(centers[i, 0])
        y = int(centers[i, 1])
        if i < nEach:
            # squares
            images[i, 0, x - r:x + r, y + r] = 1.0
            images[i, 0, x - r:x + r, y - r] = 1.0
            images[i, 0, x - r, y - r:y + r] = 1.0
            images[i, 0, x + r, y - r:y + r + 1] = 1.0
        else:
            # diamonds
            images[i, 0, range(x - r, x), range(y, y + r)] = 1.0
            images[i, 0, range(x - r, x), range(y, y - r, -1)] = 1.0
            images[i, 0, range(x, x + r + 1), range(y + r, y - 1, -1)] = 1.0
            images[i, 0, range(x, x + r), range(y - r, y)] = 1.0
            # images += np.random.randn(*images.shape) * 0.5
        T = np.zeros((nEach * 2, 1))
        T[nEach:] = 1
    return images, T

nEach = 100
Xtrain, Ttrain = makeImages(nEach)
Xtest, Ttest = makeImages(10)
Xtrain.shape, Ttrain.shape, Xtest.shape, Ttest.shape
Out[31]:
((200, 1, 20, 20), (200, 1), (20, 1, 20, 20), (20, 1))

Let's try a network with two convolutional layers followed by two fully-connected layers.

For the convolutional layers, let's use 3 units in each layer. The first layer of convolutional units will use a kernel size of 3 x 3 and stride of 2 x 2. The second convolutional layer will use a kernal size of 5 x 5 and stride of 2 x 2.

The shape of the input images is (1, 20, 20). The first value is the number of channels. We only have one channel, because we have only one intensity per pixel. The next two values are the number of rows and the number of columns in each image. We made them as 20 x 20 images.

How do we define the convolutional layers? We need to include the number of units, the size of the kernel, and the stride. The size of the kernel is the shape of each sub-image. Let's use 3 units each with a 3 x 3 kernel, and stride them by 2 along rows and columns. So for the first layer we would use (3, (3, 3), (2, 2)).

For the second convolutional layer, let's use 4 units with kernel size of 5 x 5 and stride of 2 x 2, so its specification will be (4, (5, 5), (2, 2)).

In [32]:
n_inputs = [1, 20, 20]
n_conv_list = [(3, (3, 3), (2, 2)), (4, (5, 5), (2, 2))]
n_fc_list = [5, 5]

nnet = NeuralNetworkClassifierConvolutionalTorch(n_inputs, n_conv_list, n_fc_list, 2)
nnet
Out[32]:
NeuralNetworkClassifierConvolutionalTorch(n_inputs=400, n_conv_list=[(3, (3, 3), (2, 2)), (4, (5, 5), (2, 2))], n_fc_list=[5, 5], n_outputs=2, device=cpu)

Now, to train it and test it.

In [33]:
nnet.train(Xtrain, Ttrain, 400, 0.01)
Epoch 40 NLL 0.9873
Epoch 80 NLL 0.9977
Epoch 120 NLL 0.9986
Epoch 160 NLL 0.9990
Epoch 200 NLL 0.9992
Epoch 240 NLL 0.9994
Epoch 280 NLL 0.9995
Epoch 320 NLL 0.9996
Epoch 360 NLL 0.9997
Epoch 400 NLL 0.9997
Out[33]:
NeuralNetworkClassifierConvolutionalTorch(n_inputs=400, n_conv_list=[(3, (3, 3), (2, 2)), (4, (5, 5), (2, 2))], n_fc_list=[5, 5], n_outputs=2, device=cpu)
In [34]:
plt.plot(nnet.get_error_trace());

And test it by applying it to train and test data.

In [35]:
def percent_correct(Y, T):
    return np.mean(Y == T) * 100
In [36]:
Classes_train, Probs_train = nnet.use(Xtrain)
Classes_test, Probs_test = nnet.use(Xtest)
pc_train = percent_correct(Classes_train, Ttrain)
pc_test = percent_correct(Classes_test, Ttest)
print(f'Percent correct: Train {pc_train:.2f} Test {pc_test:.2f}')
Percent correct: Train 100.00 Test 95.00
In [37]:
nnet.get_Ws()
Out[37]:
[array([[ 0.11485263,  0.1158149 ,  0.1267491 ],
        [-0.2515384 , -0.04769806, -0.45377913],
        [ 0.5172608 ,  0.04799262,  0.15637438],
        [-0.05658079,  0.019784  ,  0.18814231],
        [-0.01561673,  0.02355211, -0.17559147],
        [-0.00865345,  0.5657233 , -0.46249714],
        [ 0.46235758, -0.13314217, -0.01158859],
        [ 0.47132447,  0.11674959, -0.26989627],
        [-0.11273972, -0.23081599, -0.08162393],
        [-0.20350884,  0.54734224,  0.04405316]], dtype=float32),
 array([[ 0.00819783,  0.06776461,  0.00767366, -0.0928953 ],
        [-0.12337035, -0.54885715, -0.15410621,  0.09375736],
        [-0.1731644 , -0.35000652, -0.2710134 , -0.23908873],
        [-0.15853782, -0.37635416, -0.22914116,  0.17172478],
        [-0.07995473, -0.08656137, -0.27375627,  0.12266738],
        [-0.1206286 ,  0.31954396, -0.35256645,  0.35868365],
        [-0.46286178, -0.30266723, -0.21780035, -0.23155367],
        [-0.21982358, -0.28607386, -0.11144164, -0.18098177],
        [-0.03447698,  0.08264611, -0.26059076, -0.05831757],
        [-0.37708098,  0.27671057, -0.23057936, -0.10786955],
        [ 0.01445049, -0.15032704,  0.02833546, -0.11438721],
        [-0.1544289 , -0.42082354, -0.19381991, -0.16426381],
        [ 0.0060949 , -0.13229568, -0.3957024 ,  0.14806296],
        [-0.2261085 ,  0.01878671, -0.16486776, -0.07932416],
        [ 0.05148206, -0.363763  , -0.02944275, -0.11372262],
        [-0.45233613, -0.24975221, -0.19870089, -0.02278008],
        [-0.01234807, -0.0548207 , -0.34826452, -0.01864016],
        [-0.17528436, -0.13396947, -0.23584099,  0.42656246],
        [ 0.05131888, -0.29386047, -0.06639653,  0.10102137],
        [-0.19841112,  0.1917951 , -0.13553399, -0.21752807],
        [-0.26689824,  0.04515814, -0.30587676,  0.01054448],
        [-0.12177621, -0.22819032, -0.15522662, -0.2705444 ],
        [-0.304581  , -0.09785741, -0.17337565, -0.3229492 ],
        [-0.18091917, -0.14647049, -0.03951858, -0.04825447],
        [-0.16105385, -0.19373728, -0.25032422, -0.07096113],
        [-0.09640659, -0.17987305, -0.15868688, -0.38541043],
        [-0.27452305, -0.17177477, -0.17711933, -0.11325402],
        [-0.21407719,  0.00442731, -0.02454768, -0.2947394 ],
        [-0.21134895, -0.0415016 , -0.22274493, -0.11419667],
        [-0.39611408, -0.31030247, -0.15952216, -0.33398828],
        [-0.47156945,  0.04211433, -0.23507042,  0.00692368],
        [-0.1799398 , -0.23578218, -0.12236536, -0.11778076],
        [-0.22223796, -0.03868366, -0.36321428, -0.10110889],
        [ 0.04014106,  0.1164107 , -0.13764642,  0.14948583],
        [ 0.03966717, -0.2027683 ,  0.03749636,  0.08915117],
        [-0.08812845, -0.405878  , -0.03033206,  0.01757926],
        [-0.30259973, -0.29688823, -0.23302554, -0.21058206],
        [-0.29549527,  0.27329707, -0.0014407 ,  0.10619262],
        [ 0.18097872, -0.09246252,  0.0024341 ,  0.03891686],
        [ 0.00308982, -0.27963525, -0.1671531 ,  0.13170676],
        [-0.08399162, -0.13635972, -0.35603285,  0.1619834 ],
        [ 0.24067357, -0.1129398 ,  0.01004686, -0.27016988],
        [-0.32271647, -0.17981426, -0.16830005, -0.06778584],
        [-0.07903849, -0.1495426 ,  0.09480797,  0.11847766],
        [-0.13848226,  0.06303025, -0.44931227,  0.0847558 ],
        [-0.03869504,  0.19065091, -0.3477858 ,  0.03373846],
        [-0.4176106 ,  0.17932218, -0.20930202, -0.18466659],
        [-0.07146617, -0.1336694 , -0.02051045,  0.31644812],
        [-0.25809866, -0.34144855, -0.25544032,  0.23393974],
        [-0.24218802, -0.20217338, -0.28619975,  0.12123585],
        [-0.4624091 , -0.186879  , -0.2644728 , -0.21513148],
        [-0.1110317 , -0.13670862,  0.06551373, -0.06226679],
        [ 0.18003376, -0.25065696, -0.0231601 ,  0.28699818],
        [-0.07908481, -0.37958997,  0.03243966, -0.05391359],
        [ 0.02980414,  0.17952241,  0.3684739 ,  0.3331037 ],
        [ 0.30445358, -0.05357276,  0.01933519,  0.5112109 ],
        [ 0.01492959, -0.1380745 ,  0.13697171,  0.07013752],
        [-0.21750379, -0.21750203, -0.01760901, -0.3948566 ],
        [-0.00567524, -0.25191134,  0.10900261, -0.00248792],
        [ 0.09694801, -0.35594606,  0.1566155 , -0.23837109],
        [-0.11850618,  0.02335254, -0.10962736,  0.00671214],
        [ 0.50414896,  0.13350141,  0.03031527,  0.12097074],
        [-0.00793819, -0.20381275, -0.0226971 , -0.23572847],
        [ 0.12456775, -0.14083204,  0.2160484 , -0.16515477],
        [-0.03725023, -0.15711257, -0.03606196,  0.05488919],
        [-0.16292685, -0.33615148, -0.05258921, -0.35524082],
        [ 0.11248331,  0.0046334 ,  0.00162024, -0.14834537],
        [-0.21547319, -0.09234294,  0.08226028,  0.25238982],
        [ 0.29170498, -0.2743039 ,  0.02645135, -0.06022469],
        [ 0.09474684, -0.00907688, -0.02272175, -0.02563222],
        [ 0.24650803, -0.10601389,  0.34791988, -0.05811856],
        [-0.18873274, -0.30799308,  0.1733022 , -0.012615  ],
        [ 0.10184078, -0.12630841, -0.10942606, -0.34323844],
        [-0.24545588,  0.26433766,  0.19138315, -0.2544481 ],
        [ 0.3955612 , -0.40784383,  0.13334854, -0.2983113 ],
        [ 0.21519285, -0.29798654,  0.31472692, -0.21363056]],
       dtype=float32),
 array([[-2.6501538e-02,  7.6009423e-02, -1.0627344e-02, -7.9353176e-02,
          7.2750013e-04],
        [-2.7921993e-01, -3.0999896e-01,  1.2959968e-01, -3.3488119e-01,
         -2.6011997e-01],
        [-3.8453451e-01, -2.1517934e-01,  3.7768579e-01, -4.0736684e-01,
         -2.4982694e-01],
        [ 3.6382320e-04,  9.5333077e-02,  5.1971134e-02,  3.8284231e-02,
         -2.0014669e-01],
        [-2.3009714e-01, -1.3121540e-02,  2.1432818e-01, -1.0005577e-01,
         -2.8989446e-01],
        [ 1.3038306e-01,  1.6644511e-01, -4.5228931e-01,  4.2555100e-01,
          3.7086320e-01],
        [-1.4480515e-01, -1.2917656e-01, -7.2048187e-02, -2.1295635e-01,
          3.0089952e-02],
        [-2.7943039e-01, -1.5385626e-01,  1.8732803e-01, -1.0547520e-01,
         -2.1670181e-01],
        [-3.2891160e-01, -3.9441589e-01,  6.6661397e-03, -1.2617126e-01,
         -4.0969348e-01],
        [-2.3672122e-01, -1.8349411e-01,  5.4737079e-01, -4.3037763e-01,
         -5.0951082e-01],
        [ 1.5367481e-01,  9.6237689e-02, -2.0931249e-02, -7.7931434e-02,
         -1.1535822e-01],
        [-2.2574949e-01, -1.3516858e-01,  1.3618995e-01, -1.7824568e-01,
         -6.4311712e-03],
        [-2.2831234e-01, -2.4026658e-01,  2.8771067e-01, -4.3407577e-01,
         -2.9390872e-01],
        [-4.1149300e-01, -3.7105709e-01,  1.6762991e-01, -1.1985406e-01,
         -3.8178992e-01],
        [ 7.7549212e-02,  2.7197561e-01, -1.3003260e-01,  1.8344732e-01,
          6.4640380e-02],
        [-1.1184795e-01, -3.5025635e-01,  2.8557891e-01, -3.6283359e-01,
         -1.5463081e-01],
        [-4.0680858e-01, -3.4462869e-01,  2.9105300e-01, -4.2851990e-01,
         -5.3411293e-01],
        [-4.8453910e-03,  1.4872539e-02,  4.4344157e-02, -1.3585833e-01,
         -2.5176984e-01],
        [-4.5986855e-01, -3.4550014e-01,  2.8219151e-01, -2.8002197e-01,
         -2.2549529e-01],
        [-1.7210251e-01, -3.4922656e-01,  4.2070955e-01, -2.2658229e-01,
         -3.8522163e-01],
        [ 1.0290636e-01,  4.1382890e-02,  2.7880302e-02, -1.4347462e-01,
         -8.5682996e-02],
        [-1.8898721e-01, -1.8823159e-01,  2.6973101e-01, -2.2399613e-01,
         -9.6830748e-02],
        [-1.1519485e-01, -2.0168947e-01,  6.6510156e-02, -2.4583511e-01,
         -2.8523120e-01],
        [-2.2049499e-01, -7.5475566e-02,  5.3553283e-02, -1.5333251e-02,
         -1.3980000e-01],
        [-3.1157124e-01, -4.2635152e-01,  1.6057600e-01, -4.5095477e-01,
         -3.3225989e-01],
        [-1.0598533e-01, -3.4058279e-01,  3.6469242e-01, -4.3378633e-01,
         -3.7405005e-01],
        [-2.3192866e-01, -2.0492680e-01,  3.9555225e-01, -1.9865525e-01,
         -2.4988170e-01],
        [-3.2120913e-01, -3.9694232e-01,  2.9584652e-01, -2.2025499e-01,
         -3.6630669e-01],
        [-1.2060175e-01, -3.5562414e-01,  1.7332451e-01, -3.2639369e-01,
         -2.7535233e-01],
        [-2.7336469e-01, -3.2325843e-01,  3.7863624e-01, -5.0369734e-01,
         -2.8213078e-01],
        [-2.4336830e-01, -3.2441148e-01,  2.7930376e-01, -1.5475513e-01,
         -4.2493808e-01],
        [-2.1760896e-01, -1.8101612e-02,  2.7504921e-01, -3.5092369e-02,
         -3.5210004e-01],
        [ 3.3764991e-01,  3.2406476e-01, -3.6991709e-01,  1.6440605e-01,
          2.3922206e-01],
        [-1.8486442e-01,  3.3861767e-03,  2.4195710e-01, -5.1889114e-02,
         -1.4517438e-01],
        [ 3.0172953e-01,  1.2691617e-01, -1.7727873e-01,  3.1793672e-01,
          5.1454637e-02],
        [-1.9969739e-01, -2.5362071e-01,  2.0770369e-01,  5.8242492e-02,
         -1.1190960e-01],
        [ 3.2291240e-01,  2.7845022e-01, -3.2698515e-01,  1.6366513e-01,
          4.4123572e-01]], dtype=float32),
 array([[ 0.26676834,  0.00728482, -0.00285054, -0.1461152 ,  0.09963264],
        [-0.11402499, -0.5634181 , -0.63458645,  0.28971758, -0.26953155],
        [-0.8352964 , -0.89830506, -0.73621505,  0.70818186, -0.78922045],
        [ 0.82725924,  0.15371536,  0.48443234, -0.8227662 ,  0.7208096 ],
        [-0.20121162, -0.17843205, -0.64368296,  0.24769948, -0.46094772],
        [-0.4818274 , -0.80634284, -0.43846112,  0.85539085, -0.31235638]],
       dtype=float32),
 array([[-0.16243261, -0.10252033],
        [-0.72352964,  0.76084346],
        [-0.73958087,  0.9099615 ],
        [-0.98955375,  0.84909   ],
        [ 0.452683  , -0.91716033],
        [-0.9436207 ,  0.9745404 ]], dtype=float32)]
In [38]:
W = nnet.get_Ws()[0]  # first layer
W.shape
Out[38]:
(10, 3)
In [39]:
W[1:, 0].reshape(3, 3)
Out[39]:
array([[-0.2515384 ,  0.5172608 , -0.05658079],
       [-0.01561673, -0.00865345,  0.46235758],
       [ 0.47132447, -0.11273972, -0.20350884]], dtype=float32)
In [40]:
for i in range(3):
    plt.subplot(1, 3, i + 1)
    plt.imshow(W[1:, i].reshape(3, 3))
    plt.colorbar()
plt.tight_layout()