19 Convolutional Neural Networks in Pytorch

If you are planning to use pytorch on the workstations installed in the Department of Computer Science, you must execute this command

export PYTHONPATH=$PYTHONPATH:/usr/local/anaconda/lib/python3.6/site-packages/

A better solution is to add it to your startup script, such as .bashrc.

In [1]:
# Modified from https://github.com/vinhkhuc/PyTorch-Mini-Tutorials

import numpy as np

import torch
from torch.autograd import Variable

import time

import gzip
import pickle

import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
class ConvNet(torch.nn.Module):
    def __init__(self, output_dim, relu=True):
        super().__init__()

        self.activation = torch.relu if relu else torch.tanh
        
        self.conv_1 = torch.nn.Conv2d(1, 10, kernel_size=5, stride=1)
        self.conv_2 = torch.nn.Conv2d(10, 20, kernel_size=5, stride=2)

        # Calculate number of inputs to next linear layer. Result of
        # flattening the outputs from the second convolutional layer
        image_hw = 28
        kernel = 5
        stride = 1
        conv_1_hw = (image_hw - kernel) // stride + 1
        kernel = 5
        stride = 2
        n_units = 20
        conv_2_hw = (conv_1_hw - kernel) // stride + 1
        n_inputs = conv_2_hw ** 2 * n_units

        self.fc_1 = torch.nn.Linear(n_inputs, 20)
        self.fc_2 = torch.nn.Linear(20, output_dim)

    def forward_all_outputs(self, x):
        n_samples = x.shape[0]
        Y_conv_1 = self.activation(self.conv_1(x))
        Y_conv_2 = self.activation(self.conv_2(Y_conv_1))

        Y_fc_1 = self.activation(self.fc_1(Y_conv_2.reshape(n_samples, -1)))
        Y = self.fc_2(Y_fc_1)
        return Y_conv_1, Y_conv_2, Y_fc_1, Y

    def forward(self, x):
        Y_each_layer = self.forward_all_outputs(x)
        return Y_each_layer[-1]
    
    def train(self, Xtrain, Ttrain, Xtest, n_epochs, batch_size, learning_rate):
        
        start_time = time.time()

        loss = torch.nn.CrossEntropyLoss(reduction='mean')
        optimizer = torch.optim.SGD(self.parameters(), lr=learning_rate, momentum=0.9)
        # optimizer = torch.optim.Adam(self.parameters(), lr=0.01)

        n_examples = Xtrain.shape[0]
        for i in range(n_epochs):
            cost = 0.
            num_batches = n_examples // batch_size
            for k in range(num_batches):
                start, end = k * batch_size, (k + 1) * batch_size
                Xtrain_batch = Xtrain[start:end, ...]  # rather than typing, [start:end, :, :, :]
                Ttrain_batch = Ttrain[start:end, ...]
                Xtrain_batch = Variable(Xtrain_batch, requires_grad=False)
                Ttrain_batch = Variable(Ttrain_batch, requires_grad=False)

                # Reset gradient
                optimizer.zero_grad()

                # Forward
                Y = self.forward(Xtrain_batch)
                output = loss.forward(Y, Ttrain_batch)

                # Backward
                output.backward()

                # Update parameters
                optimizer.step()

                cost += output.item()
                
            Ytest = self.forward(Xtest)
            Ytest_class = Ytest.data.cpu().numpy().argmax(axis=1)
            print('Epoch {:d}, cost = {:.4f}, acc = {:.2f}'.format(i + 1, cost / num_batches, 100. * np.mean(Ytest_class == Ttest)))

        print('Took {:.2f} seconds'.format(time.time() - start_time))
In [3]:
device = 'cpu'
if torch.cuda.is_available():
    y_or_n = input('Would you like to run on the GPU? (y or n): ')
    if y_or_n == 'y' or y_or_n == 'yes':
        device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Running on', device)
Running on cuda:0
In [5]:
!nvidia-smi
Thu Apr  4 17:14:39 2019       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 410.93       Driver Version: 410.93       CUDA Version: 10.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|===============================+======================+======================|
|   0  TITAN Xp            Off  | 00000000:02:00.0 Off |                  N/A |
| 23%   42C    P5    13W / 250W |     10MiB / 12196MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|=============================================================================|
|  No running processes found                                                 |
+-----------------------------------------------------------------------------+
In [6]:
with gzip.open('mnist.pkl.gz', 'rb') as f:
    train_set, valid_set, test_set = pickle.load(f, encoding='latin1')

Xtrain = train_set[0].reshape((-1, 1, 28, 28))
Ttrain = train_set[1] 
Xtest = test_set[0].reshape((-1, 1, 28, 28))
Ttest = test_set[1] 

Xtrain = torch.from_numpy(Xtrain).float().to(device)
Xtest = torch.from_numpy(Xtest).float().to(device)
Ttrain = torch.from_numpy(Ttrain).long().to(device)
# Do not need Ttest as torch.Tensor because it is only used in numpy calculation
In [15]:
n_classes = 10
nnet = ConvNet(output_dim=n_classes)
nnet.to(device)
n_epochs = 40
batch_size = 100
learning_rate = 0.001
nnet.train(Xtrain, Ttrain, Xtest, n_epochs, batch_size, learning_rate)\
Epoch 1, cost = 1.5180, acc = 86.98
Epoch 2, cost = 0.4250, acc = 90.05
Epoch 3, cost = 0.3569, acc = 91.27
Epoch 4, cost = 0.3091, acc = 92.19
Epoch 5, cost = 0.2695, acc = 93.09
Epoch 6, cost = 0.2414, acc = 93.87
Epoch 7, cost = 0.2201, acc = 94.28
Epoch 8, cost = 0.2022, acc = 94.98
Epoch 9, cost = 0.1862, acc = 95.38
Epoch 10, cost = 0.1719, acc = 95.76
Epoch 11, cost = 0.1588, acc = 96.05
Epoch 12, cost = 0.1471, acc = 96.31
Epoch 13, cost = 0.1369, acc = 96.52
Epoch 14, cost = 0.1280, acc = 96.68
Epoch 15, cost = 0.1201, acc = 96.81
Epoch 16, cost = 0.1133, acc = 97.02
Epoch 17, cost = 0.1071, acc = 97.21
Epoch 18, cost = 0.1014, acc = 97.35
Epoch 19, cost = 0.0962, acc = 97.44
Epoch 20, cost = 0.0914, acc = 97.50
Epoch 21, cost = 0.0869, acc = 97.55
Epoch 22, cost = 0.0828, acc = 97.58
Epoch 23, cost = 0.0790, acc = 97.62
Epoch 24, cost = 0.0755, acc = 97.72
Epoch 25, cost = 0.0723, acc = 97.75
Epoch 26, cost = 0.0693, acc = 97.77
Epoch 27, cost = 0.0665, acc = 97.88
Epoch 28, cost = 0.0638, acc = 97.86
Epoch 29, cost = 0.0613, acc = 97.85
Epoch 30, cost = 0.0590, acc = 97.83
Epoch 31, cost = 0.0569, acc = 97.82
Epoch 32, cost = 0.0549, acc = 97.82
Epoch 33, cost = 0.0530, acc = 97.82
Epoch 34, cost = 0.0512, acc = 97.88
Epoch 35, cost = 0.0495, acc = 97.95
Epoch 36, cost = 0.0478, acc = 97.92
Epoch 37, cost = 0.0463, acc = 97.97
Epoch 38, cost = 0.0449, acc = 97.93
Epoch 39, cost = 0.0435, acc = 97.99
Epoch 40, cost = 0.0422, acc = 97.97
Took 46.19 seconds
In [21]:
x = Xtest[20:21]
conv1, conv2, fc1, fc2 = nnet.forward_all_outputs(x)
layer1weights = list(nnet.children())[0].weight.data

plt.subplot(6, 4, 1)
plt.imshow(x.squeeze().cpu(), cmap='gray')
plt.axis('off')

ploti = 4
for i in range(10):
    ploti += 1
    plt.subplot(6, 4, ploti)
    plt.imshow(layer1weights[i].squeeze().cpu(), cmap='gray')
    plt.axis('off')
    ploti += 1
    plt.subplot(6, 4, ploti)
    plt.imshow(conv1[0, i, :, :].detach().squeeze().cpu(), cmap='gray')
    plt.axis('off')
In [22]:
layer2weights = list(nnet.children())[1].weight.data

plt.subplot(5, 5, 1)
plt.imshow(x.squeeze().cpu(), cmap='gray')
plt.axis('off')

ploti = 1
for i in range(20):
    ploti += 1
    plt.subplot(5, 5, ploti)
    plt.imshow(conv2[0, i, :, :].detach().squeeze().cpu(), cmap='gray')
    plt.axis('off')
In [24]:
x = Xtest[0:10]
conv1, conv2, fc1, fc2 = nnet.forward_all_outputs(x)
print(fc2.shape)

plt.figure(figsize=(15, 15))
ploti = 0
for i in range(10):
    ploti += 1
    plt.subplot(10, 2, ploti)
    plt.imshow(x[i].squeeze().cpu(), cmap='gray')
    plt.axis('off')
    ploti += 1
    plt.subplot(10, 2, ploti)
    # Try calculating this my self
    Y = fc2[i, :].cpu().detach().numpy()
    Yprobs = np.exp(Y) / np.sum(np.exp(Y))
    plt.plot(Yprobs)
torch.Size([10, 10])