Notebook

Deep Learning Models -- A collection of various deep learning architectures, models, and tips for TensorFlow and PyTorch in Jupyter Notebooks.

Author: Sebastian Raschka
GitHub Repository: https://github.com/rasbt/deeplearning-models

In [1]:

%load_ext watermark
%watermark -a 'Sebastian Raschka' -v -p torch

Sebastian Raschka 

CPython 3.6.8
IPython 7.2.0

torch 1.0.0

Network in Network CIFAR-10 Classifier¶

based on

Lin, Min, Qiang Chen, and Shuicheng Yan. "Network in network." arXiv preprint arXiv:1312.4400 (2013).

Imports¶

In [2]:

import os
import time

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader

from torchvision import datasets
from torchvision import transforms

import matplotlib.pyplot as plt
from PIL import Image


if torch.cuda.is_available():
    torch.backends.cudnn.deterministic = True

Model Settings¶

In [3]:

##########################
### SETTINGS
##########################

# Hyperparameters
RANDOM_SEED = 1
LEARNING_RATE = 0.001
BATCH_SIZE = 256
NUM_EPOCHS = 10

# Architecture
NUM_CLASSES = 10

# Other
DEVICE = "cuda:0"
GRAYSCALE = False

The following code cell that implements the ResNet-34 architecture is a derivative of the code provided at https://pytorch.org/docs/0.4.0/_modules/torchvision/models/resnet.html.

In [5]:

##########################
### CIFAR-10 Dataset
##########################


# Note transforms.ToTensor() scales input images
# to 0-1 range
train_dataset = datasets.CIFAR10(root='data', 
                                 train=True, 
                                 transform=transforms.ToTensor(),
                                 download=True)

test_dataset = datasets.CIFAR10(root='data', 
                                train=False, 
                                transform=transforms.ToTensor())


train_loader = DataLoader(dataset=train_dataset, 
                          batch_size=BATCH_SIZE, 
                          num_workers=8,
                          shuffle=True)

test_loader = DataLoader(dataset=test_dataset, 
                         batch_size=BATCH_SIZE,
                         num_workers=8,
                         shuffle=False)

# Checking the dataset
for images, labels in train_loader:  
    print('Image batch dimensions:', images.shape)
    print('Image label dimensions:', labels.shape)
    break

# Checking the dataset
for images, labels in train_loader:  
    print('Image batch dimensions:', images.shape)
    print('Image label dimensions:', labels.shape)
    break

Files already downloaded and verified
Image batch dimensions: torch.Size([256, 3, 32, 32])
Image label dimensions: torch.Size([256])
Image batch dimensions: torch.Size([256, 3, 32, 32])
Image label dimensions: torch.Size([256])

In [4]:

##########################
### MODEL
##########################


class NiN(nn.Module):
    def __init__(self, num_classes):
        super(NiN, self).__init__()
        self.num_classes = num_classes
        self.classifier = nn.Sequential(
                nn.Conv2d(3, 192, kernel_size=5, stride=1, padding=2),
                nn.ReLU(inplace=True),
                nn.Conv2d(192, 160, kernel_size=1, stride=1, padding=0),
                nn.ReLU(inplace=True),
                nn.Conv2d(160,  96, kernel_size=1, stride=1, padding=0),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
                nn.Dropout(0.5),

                nn.Conv2d(96, 192, kernel_size=5, stride=1, padding=2),
                nn.ReLU(inplace=True),
                nn.Conv2d(192, 192, kernel_size=1, stride=1, padding=0),
                nn.ReLU(inplace=True),
                nn.Conv2d(192, 192, kernel_size=1, stride=1, padding=0),
                nn.ReLU(inplace=True),
                nn.AvgPool2d(kernel_size=3, stride=2, padding=1),
                nn.Dropout(0.5),

                nn.Conv2d(192, 192, kernel_size=3, stride=1, padding=1),
                nn.ReLU(inplace=True),
                nn.Conv2d(192, 192, kernel_size=1, stride=1, padding=0),
                nn.ReLU(inplace=True),
                nn.Conv2d(192,  10, kernel_size=1, stride=1, padding=0),
                nn.ReLU(inplace=True),
                nn.AvgPool2d(kernel_size=8, stride=1, padding=0),

                )

    def forward(self, x):
        x = self.classifier(x)
        logits = x.view(x.size(0), self.num_classes)
        probas = torch.softmax(x, dim=1)
        return logits, probas

Training without Pinned Memory¶

In [6]:

torch.manual_seed(RANDOM_SEED)

model = NiN(NUM_CLASSES)
model.to(DEVICE)

optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)  

In [7]:

def compute_accuracy(model, data_loader, device):
    correct_pred, num_examples = 0, 0
    for i, (features, targets) in enumerate(data_loader):
            
        features = features.to(device)
        targets = targets.to(device)

        logits, probas = model(features)
        _, predicted_labels = torch.max(probas, 1)
        num_examples += targets.size(0)
        correct_pred += (predicted_labels == targets).sum()
    return correct_pred.float()/num_examples * 100
    

start_time = time.time()
for epoch in range(NUM_EPOCHS):
    
    model.train()
    for batch_idx, (features, targets) in enumerate(train_loader):
        
        features = features.to(DEVICE)
        targets = targets.to(DEVICE)
            
        ### FORWARD AND BACK PROP
        logits, probas = model(features)
        cost = F.cross_entropy(logits, targets)
        optimizer.zero_grad()
        
        cost.backward()
        
        ### UPDATE MODEL PARAMETERS
        optimizer.step()
        
        ### LOGGING
        if not batch_idx % 150:
            print ('Epoch: %03d/%03d | Batch %04d/%04d | Cost: %.4f' 
                   %(epoch+1, NUM_EPOCHS, batch_idx, 
                     len(train_loader), cost))

        

    model.eval()
    with torch.set_grad_enabled(False): # save memory during inference
        print('Epoch: %03d/%03d | Train: %.3f%%' % (
              epoch+1, NUM_EPOCHS, 
              compute_accuracy(model, train_loader, device=DEVICE)))
        
    print('Time elapsed: %.2f min' % ((time.time() - start_time)/60))
    
print('Total Training Time: %.2f min' % ((time.time() - start_time)/60))


with torch.set_grad_enabled(False): # save memory during inference
    print('Test accuracy: %.2f%%' % (compute_accuracy(model, test_loader, device=DEVICE)))
    
print('Total Time: %.2f min' % ((time.time() - start_time)/60))

Epoch: 001/010 | Batch 0000/0196 | Cost: 2.6021
Epoch: 001/010 | Batch 0150/0196 | Cost: 1.3961
Epoch: 001/010 | Train: 45.084%
Time elapsed: 0.26 min
Epoch: 002/010 | Batch 0000/0196 | Cost: 1.1228
Epoch: 002/010 | Batch 0150/0196 | Cost: 1.0426
Epoch: 002/010 | Train: 56.166%
Time elapsed: 0.52 min
Epoch: 003/010 | Batch 0000/0196 | Cost: 0.9980
Epoch: 003/010 | Batch 0150/0196 | Cost: 0.8279
Epoch: 003/010 | Train: 66.702%
Time elapsed: 0.80 min
Epoch: 004/010 | Batch 0000/0196 | Cost: 0.6384
Epoch: 004/010 | Batch 0150/0196 | Cost: 0.7103
Epoch: 004/010 | Train: 65.330%
Time elapsed: 1.08 min
Epoch: 005/010 | Batch 0000/0196 | Cost: 0.6308
Epoch: 005/010 | Batch 0150/0196 | Cost: 0.5913
Epoch: 005/010 | Train: 79.636%
Time elapsed: 1.36 min
Epoch: 006/010 | Batch 0000/0196 | Cost: 0.4409
Epoch: 006/010 | Batch 0150/0196 | Cost: 0.5557
Epoch: 006/010 | Train: 76.456%
Time elapsed: 1.62 min
Epoch: 007/010 | Batch 0000/0196 | Cost: 0.4778
Epoch: 007/010 | Batch 0150/0196 | Cost: 0.4815
Epoch: 007/010 | Train: 65.890%
Time elapsed: 1.89 min
Epoch: 008/010 | Batch 0000/0196 | Cost: 0.3782
Epoch: 008/010 | Batch 0150/0196 | Cost: 0.4339
Epoch: 008/010 | Train: 85.200%
Time elapsed: 2.16 min
Epoch: 009/010 | Batch 0000/0196 | Cost: 0.3083
Epoch: 009/010 | Batch 0150/0196 | Cost: 0.3290
Epoch: 009/010 | Train: 78.108%
Time elapsed: 2.42 min
Epoch: 010/010 | Batch 0000/0196 | Cost: 0.2229
Epoch: 010/010 | Batch 0150/0196 | Cost: 0.1945
Epoch: 010/010 | Train: 87.384%
Time elapsed: 2.70 min
Total Training Time: 2.70 min
Test accuracy: 70.67%
Total Time: 2.71 min

Training with Pinned Memory¶

In [8]:

##########################
### CIFAR-10 Dataset
##########################


# Note transforms.ToTensor() scales input images
# to 0-1 range
train_dataset = datasets.CIFAR10(root='data', 
                                 train=True, 
                                 transform=transforms.ToTensor(),
                                 download=True)

test_dataset = datasets.CIFAR10(root='data', 
                                train=False, 
                                transform=transforms.ToTensor())


train_loader = DataLoader(dataset=train_dataset, 
                          batch_size=BATCH_SIZE, 
                          pin_memory=True,
                          shuffle=True)

test_loader = DataLoader(dataset=test_dataset, 
                         batch_size=BATCH_SIZE,
                         pin_memory=True,
                         shuffle=False)

# Checking the dataset
for images, labels in train_loader:  
    print('Image batch dimensions:', images.shape)
    print('Image label dimensions:', labels.shape)
    break

# Checking the dataset
for images, labels in train_loader:  
    print('Image batch dimensions:', images.shape)
    print('Image label dimensions:', labels.shape)
    break

Files already downloaded and verified
Image batch dimensions: torch.Size([256, 3, 32, 32])
Image label dimensions: torch.Size([256])
Image batch dimensions: torch.Size([256, 3, 32, 32])
Image label dimensions: torch.Size([256])

In [9]:

torch.manual_seed(RANDOM_SEED)

model = resnet34(NUM_CLASSES)
model.to(DEVICE)

optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)  

In [10]:

def compute_accuracy(model, data_loader, device):
    correct_pred, num_examples = 0, 0
    for i, (features, targets) in enumerate(data_loader):
            
        features = features.to(device)
        targets = targets.to(device)

        logits, probas = model(features)
        _, predicted_labels = torch.max(probas, 1)
        num_examples += targets.size(0)
        correct_pred += (predicted_labels == targets).sum()
    return correct_pred.float()/num_examples * 100
    

start_time = time.time()
for epoch in range(NUM_EPOCHS):
    
    model.train()
    for batch_idx, (features, targets) in enumerate(train_loader):
        
        features = features.to(DEVICE)
        targets = targets.to(DEVICE)
            
        ### FORWARD AND BACK PROP
        logits, probas = model(features)
        cost = F.cross_entropy(logits, targets)
        optimizer.zero_grad()
        
        cost.backward()
        
        ### UPDATE MODEL PARAMETERS
        optimizer.step()
        
        ### LOGGING
        if not batch_idx % 150:
            print ('Epoch: %03d/%03d | Batch %04d/%04d | Cost: %.4f' 
                   %(epoch+1, NUM_EPOCHS, batch_idx, 
                     len(train_loader), cost))

        

    model.eval()
    with torch.set_grad_enabled(False): # save memory during inference
        print('Epoch: %03d/%03d | Train: %.3f%%' % (
              epoch+1, NUM_EPOCHS, 
              compute_accuracy(model, train_loader, device=DEVICE)))
        
    print('Time elapsed: %.2f min' % ((time.time() - start_time)/60))
    
print('Total Training Time: %.2f min' % ((time.time() - start_time)/60))


with torch.set_grad_enabled(False): # save memory during inference
    print('Test accuracy: %.2f%%' % (compute_accuracy(model, test_loader, device=DEVICE)))
    
print('Total Time: %.2f min' % ((time.time() - start_time)/60))

Epoch: 001/010 | Batch 0000/0196 | Cost: 2.6021
Epoch: 001/010 | Batch 0150/0196 | Cost: 1.3961
Epoch: 001/010 | Train: 45.084%
Time elapsed: 0.39 min
Epoch: 002/010 | Batch 0000/0196 | Cost: 1.1228
Epoch: 002/010 | Batch 0150/0196 | Cost: 1.0426
Epoch: 002/010 | Train: 56.166%
Time elapsed: 0.77 min
Epoch: 003/010 | Batch 0000/0196 | Cost: 0.9980
Epoch: 003/010 | Batch 0150/0196 | Cost: 0.8279
Epoch: 003/010 | Train: 66.702%
Time elapsed: 1.16 min
Epoch: 004/010 | Batch 0000/0196 | Cost: 0.6384
Epoch: 004/010 | Batch 0150/0196 | Cost: 0.7103
Epoch: 004/010 | Train: 65.330%
Time elapsed: 1.55 min
Epoch: 005/010 | Batch 0000/0196 | Cost: 0.6308
Epoch: 005/010 | Batch 0150/0196 | Cost: 0.5913
Epoch: 005/010 | Train: 79.636%
Time elapsed: 1.94 min
Epoch: 006/010 | Batch 0000/0196 | Cost: 0.4409
Epoch: 006/010 | Batch 0150/0196 | Cost: 0.5557
Epoch: 006/010 | Train: 76.456%
Time elapsed: 2.33 min
Epoch: 007/010 | Batch 0000/0196 | Cost: 0.4778
Epoch: 007/010 | Batch 0150/0196 | Cost: 0.4815
Epoch: 007/010 | Train: 65.890%
Time elapsed: 2.71 min
Epoch: 008/010 | Batch 0000/0196 | Cost: 0.3782
Epoch: 008/010 | Batch 0150/0196 | Cost: 0.4339
Epoch: 008/010 | Train: 85.200%
Time elapsed: 3.10 min
Epoch: 009/010 | Batch 0000/0196 | Cost: 0.3083
Epoch: 009/010 | Batch 0150/0196 | Cost: 0.3290
Epoch: 009/010 | Train: 78.108%
Time elapsed: 3.49 min
Epoch: 010/010 | Batch 0000/0196 | Cost: 0.2229
Epoch: 010/010 | Batch 0150/0196 | Cost: 0.1945
Epoch: 010/010 | Train: 87.384%
Time elapsed: 3.88 min
Total Training Time: 3.88 min
Test accuracy: 70.67%
Total Time: 3.91 min

In [11]:

%watermark -iv

numpy       1.15.4
pandas      0.23.4
torch       1.0.0
PIL.Image   5.3.0