Notebook

Deep Learning Models -- A collection of various deep learning architectures, models, and tips for TensorFlow and PyTorch in Jupyter Notebooks.

Author: Sebastian Raschka
GitHub Repository: https://github.com/rasbt/deeplearning-models

In [1]:

%load_ext watermark
%watermark -a 'Sebastian Raschka' -v -p torch

Sebastian Raschka 

CPython 3.6.8
IPython 7.2.0

torch 1.0.1.post2

Model Zoo -- AlexNet CIFAR-10 Classifier¶

Network Architecture¶

References

[1] Krizhevsky, Alex, Ilya Sutskever, and Geoffrey E. Hinton. "Imagenet classification with deep convolutional neural networks." In Advances in Neural Information Processing Systems, pp. 1097-1105. 2012.

Imports¶

In [2]:

import os
import time

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler

from torchvision import datasets
from torchvision import transforms

import matplotlib.pyplot as plt
from PIL import Image


if torch.cuda.is_available():
    torch.backends.cudnn.deterministic = True

Model Settings¶

In [3]:

##########################
### SETTINGS
##########################

# Hyperparameters
RANDOM_SEED = 1
LEARNING_RATE = 0.0001
BATCH_SIZE = 256
NUM_EPOCHS = 20

# Architecture
NUM_CLASSES = 10

# Other
DEVICE = "cuda:0"

Dataset¶

In [4]:

def get_train_valid_loader(data_dir,
                           batch_size,
                           train_transform,
                           valid_transform,
                           random_seed,
                           valid_size=0.1,
                           shuffle=True,
                           num_workers=4):

    train_dataset = datasets.CIFAR10(root=data_dir,
                                     train=True,
                                     download=True,
                                     transform=train_transform)

    valid_dataset = datasets.CIFAR10(root=data_dir,
                                     train=True,
                                     download=False,
                                     transform=valid_transform)

    num_train = len(train_dataset)
    indices = np.arange(num_train)
    split = np.int(np.floor(valid_size * num_train))

    if shuffle:
        np.random.seed(random_seed)
        np.random.shuffle(indices)

    train_idx, valid_idx = indices[split:], indices[:split]
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size,
                                               sampler=train_sampler,
                                               num_workers=num_workers)

    valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                               batch_size=batch_size,
                                               sampler=valid_sampler,
                                               num_workers=num_workers)

    return (train_loader, valid_loader)


def get_test_loader(data_dir,
                    batch_size,
                    test_transform,
                    num_workers=4):

    dataset = datasets.CIFAR10(root=data_dir,
                               train=False,
                               download=False,
                               transform=test_transform)

    data_loader = torch.utils.data.DataLoader(dataset,
                                              batch_size=batch_size,
                                              num_workers=num_workers)

    return data_loader

In [5]:

##########################
### CIFAR-10 Dataset
##########################


custom_transform = transforms.Compose([transforms.Resize((64, 64)),
                                       transforms.ToTensor()])

train_loader, valid_loader = get_train_valid_loader(
    data_dir='data',
    batch_size=BATCH_SIZE,
    train_transform=custom_transform,
    valid_transform=custom_transform,
    random_seed=RANDOM_SEED,
    valid_size=0.1,
    shuffle=True,
    num_workers=4
)

test_loader = get_test_loader(
    data_dir='data',
    batch_size=BATCH_SIZE,
    test_transform=custom_transform,
    num_workers=4
)


# Checking the dataset
print('Training Set:\n')
for images, labels in train_loader:  
    print('Image batch dimensions:', images.size())
    print('Image label dimensions:', labels.size())
    break
    
# Checking the dataset
print('\nValidation Set:')
for images, labels in valid_loader:  
    print('Image batch dimensions:', images.size())
    print('Image label dimensions:', labels.size())
    break

# Checking the dataset
print('\nTesting Set:')
for images, labels in train_loader:  
    print('Image batch dimensions:', images.size())
    print('Image label dimensions:', labels.size())
    break

Files already downloaded and verified
Training Set:

Image batch dimensions: torch.Size([256, 3, 64, 64])
Image label dimensions: torch.Size([256])

Validation Set:
Image batch dimensions: torch.Size([256, 3, 64, 64])
Image label dimensions: torch.Size([256])

Testing Set:
Image batch dimensions: torch.Size([256, 3, 64, 64])
Image label dimensions: torch.Size([256])

Model¶

In [6]:

##########################
### MODEL
##########################

class AlexNet(nn.Module):

    def __init__(self, num_classes):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        self.classifier = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), 256 * 6 * 6)
        logits = self.classifier(x)
        probas = F.softmax(logits, dim=1)
        return logits, probas

In [7]:

torch.manual_seed(RANDOM_SEED)

model = AlexNet(NUM_CLASSES)
model.to(DEVICE)

optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)  

Training¶

In [8]:

def compute_accuracy(model, data_loader, device):
    correct_pred, num_examples = 0, 0
    model.eval()
    for i, (features, targets) in enumerate(data_loader):
            
        features = features.to(device)
        targets = targets.to(device)

        logits, probas = model(features)
        _, predicted_labels = torch.max(probas, 1)
        num_examples += targets.size(0)
        assert predicted_labels.size() == targets.size()
        correct_pred += (predicted_labels == targets).sum()
    return correct_pred.float()/num_examples * 100
    

start_time = time.time()
for epoch in range(NUM_EPOCHS):
    
    model.train()
    for batch_idx, (features, targets) in enumerate(train_loader):
        
        features = features.to(DEVICE)
        targets = targets.to(DEVICE)
            
        ### FORWARD AND BACK PROP
        logits, probas = model(features)
        cost = F.cross_entropy(logits, targets)
        optimizer.zero_grad()
        
        cost.backward()
        
        ### UPDATE MODEL PARAMETERS
        optimizer.step()
        
        ### LOGGING
        if not batch_idx % 150:
            print ('Epoch: %03d/%03d | Batch %04d/%04d | Cost: %.4f' 
                   %(epoch+1, NUM_EPOCHS, batch_idx, 
                     len(train_loader), cost))

        

    model.eval()
    with torch.set_grad_enabled(False): # save memory during inference
        print('Epoch: %03d/%03d | Train: %.3f%%  | Valid: %.3f%%' % (
              epoch+1, NUM_EPOCHS, 
              compute_accuracy(model, train_loader, device=DEVICE),
              compute_accuracy(model, valid_loader, device=DEVICE)))
        
    print('Time elapsed: %.2f min' % ((time.time() - start_time)/60))
    
print('Total Training Time: %.2f min' % ((time.time() - start_time)/60))


with torch.set_grad_enabled(False): # save memory during inference
    print('Test accuracy: %.2f%%' % (compute_accuracy(model, test_loader, device=DEVICE)))
    
print('Total Time: %.2f min' % ((time.time() - start_time)/60))

Epoch: 001/020 | Batch 0000/0176 | Cost: 2.3027
Epoch: 001/020 | Batch 0150/0176 | Cost: 1.5725
Epoch: 001/020 | Train: 36.202%  | Valid: 35.380%
Time elapsed: 0.20 min
Epoch: 002/020 | Batch 0000/0176 | Cost: 1.6128
Epoch: 002/020 | Batch 0150/0176 | Cost: 1.4558
Epoch: 002/020 | Train: 46.247%  | Valid: 46.040%
Time elapsed: 0.39 min
Epoch: 003/020 | Batch 0000/0176 | Cost: 1.4093
Epoch: 003/020 | Batch 0150/0176 | Cost: 1.1841
Epoch: 003/020 | Train: 53.956%  | Valid: 52.540%
Time elapsed: 0.58 min
Epoch: 004/020 | Batch 0000/0176 | Cost: 1.2018
Epoch: 004/020 | Batch 0150/0176 | Cost: 1.1820
Epoch: 004/020 | Train: 57.549%  | Valid: 55.160%
Time elapsed: 0.78 min
Epoch: 005/020 | Batch 0000/0176 | Cost: 1.0970
Epoch: 005/020 | Batch 0150/0176 | Cost: 1.1353
Epoch: 005/020 | Train: 62.196%  | Valid: 58.820%
Time elapsed: 0.98 min
Epoch: 006/020 | Batch 0000/0176 | Cost: 0.9871
Epoch: 006/020 | Batch 0150/0176 | Cost: 1.0954
Epoch: 006/020 | Train: 62.413%  | Valid: 58.780%
Time elapsed: 1.18 min
Epoch: 007/020 | Batch 0000/0176 | Cost: 1.0950
Epoch: 007/020 | Batch 0150/0176 | Cost: 0.9522
Epoch: 007/020 | Train: 68.949%  | Valid: 62.860%
Time elapsed: 1.37 min
Epoch: 008/020 | Batch 0000/0176 | Cost: 0.8985
Epoch: 008/020 | Batch 0150/0176 | Cost: 0.8718
Epoch: 008/020 | Train: 67.489%  | Valid: 60.260%
Time elapsed: 1.57 min
Epoch: 009/020 | Batch 0000/0176 | Cost: 0.9852
Epoch: 009/020 | Batch 0150/0176 | Cost: 0.8458
Epoch: 009/020 | Train: 74.444%  | Valid: 64.880%
Time elapsed: 1.77 min
Epoch: 010/020 | Batch 0000/0176 | Cost: 0.7606
Epoch: 010/020 | Batch 0150/0176 | Cost: 0.7054
Epoch: 010/020 | Train: 77.271%  | Valid: 66.240%
Time elapsed: 1.96 min
Epoch: 011/020 | Batch 0000/0176 | Cost: 0.6426
Epoch: 011/020 | Batch 0150/0176 | Cost: 0.7225
Epoch: 011/020 | Train: 78.740%  | Valid: 65.580%
Time elapsed: 2.16 min
Epoch: 012/020 | Batch 0000/0176 | Cost: 0.6353
Epoch: 012/020 | Batch 0150/0176 | Cost: 0.7038
Epoch: 012/020 | Train: 81.960%  | Valid: 66.500%
Time elapsed: 2.36 min
Epoch: 013/020 | Batch 0000/0176 | Cost: 0.5080
Epoch: 013/020 | Batch 0150/0176 | Cost: 0.5161
Epoch: 013/020 | Train: 85.880%  | Valid: 68.320%
Time elapsed: 2.56 min
Epoch: 014/020 | Batch 0000/0176 | Cost: 0.4111
Epoch: 014/020 | Batch 0150/0176 | Cost: 0.4817
Epoch: 014/020 | Train: 86.342%  | Valid: 68.480%
Time elapsed: 2.76 min
Epoch: 015/020 | Batch 0000/0176 | Cost: 0.3986
Epoch: 015/020 | Batch 0150/0176 | Cost: 0.5016
Epoch: 015/020 | Train: 86.967%  | Valid: 67.040%
Time elapsed: 2.96 min
Epoch: 016/020 | Batch 0000/0176 | Cost: 0.3665
Epoch: 016/020 | Batch 0150/0176 | Cost: 0.4297
Epoch: 016/020 | Train: 89.004%  | Valid: 66.100%
Time elapsed: 3.16 min
Epoch: 017/020 | Batch 0000/0176 | Cost: 0.3275
Epoch: 017/020 | Batch 0150/0176 | Cost: 0.2950
Epoch: 017/020 | Train: 92.491%  | Valid: 68.980%
Time elapsed: 3.35 min
Epoch: 018/020 | Batch 0000/0176 | Cost: 0.2108
Epoch: 018/020 | Batch 0150/0176 | Cost: 0.2200
Epoch: 018/020 | Train: 93.887%  | Valid: 68.340%
Time elapsed: 3.55 min
Epoch: 019/020 | Batch 0000/0176 | Cost: 0.1458
Epoch: 019/020 | Batch 0150/0176 | Cost: 0.1814
Epoch: 019/020 | Train: 93.922%  | Valid: 67.540%
Time elapsed: 3.75 min
Epoch: 020/020 | Batch 0000/0176 | Cost: 0.2147
Epoch: 020/020 | Batch 0150/0176 | Cost: 0.1742
Epoch: 020/020 | Train: 91.998%  | Valid: 66.900%
Time elapsed: 3.95 min
Total Training Time: 3.95 min
Test accuracy: 66.63%
Total Time: 3.96 min

In [9]:

%watermark -iv

numpy       1.15.4
pandas      0.23.4
torch       1.0.1.post2
PIL.Image   5.3.0