Deep Learning Models -- A collection of various deep learning architectures, models, and tips for TensorFlow and PyTorch in Jupyter Notebooks.
%load_ext watermark
%watermark -a 'Sebastian Raschka' -v -p torch
Sebastian Raschka CPython 3.7.3 IPython 7.6.1 torch 1.2.0
This notebook implements the classic LeNet-5 convolutional network [1] and applies it to MNIST digit classification. The basic architecture is shown in the figure below:
LeNet-5 is commonly regarded as the pioneer of convolutional neural networks, consisting of a very simple architecture (by modern standards). In total, LeNet-5 consists of only 7 layers. 3 out of these 7 layers are convolutional layers (C1, C3, C5), which are connected by two average pooling layers (S2 & S4). The penultimate layer is a fully connexted layer (F6), which is followed by the final output layer. The additional details are summarized below:
achieve error rate below 1% on the MNIST data set, which was very close to the state of the art at the time (produced by a boosted ensemble of three LeNet-4 networks).
import os
import time
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision import transforms
import matplotlib.pyplot as plt
from PIL import Image
if torch.cuda.is_available():
torch.backends.cudnn.deterministic = True
##########################
### SETTINGS
##########################
# Hyperparameters
RANDOM_SEED = 1
LEARNING_RATE = 0.001
BATCH_SIZE = 128
NUM_EPOCHS = 10
# Architecture
NUM_FEATURES = 32*32
NUM_CLASSES = 10
# Other
DEVICE = "cuda:0"
GRAYSCALE = True
##########################
### MNIST DATASET
##########################
resize_transform = transforms.Compose([transforms.Resize((32, 32)),
transforms.ToTensor()])
# Note transforms.ToTensor() scales input images
# to 0-1 range
train_dataset = datasets.MNIST(root='data',
train=True,
transform=resize_transform,
download=True)
test_dataset = datasets.MNIST(root='data',
train=False,
transform=resize_transform)
train_loader = DataLoader(dataset=train_dataset,
batch_size=BATCH_SIZE,
shuffle=True)
test_loader = DataLoader(dataset=test_dataset,
batch_size=BATCH_SIZE,
shuffle=False)
# Checking the dataset
for images, labels in train_loader:
print('Image batch dimensions:', images.shape)
print('Image label dimensions:', labels.shape)
break
Image batch dimensions: torch.Size([128, 1, 32, 32]) Image label dimensions: torch.Size([128])
device = torch.device(DEVICE)
torch.manual_seed(0)
for epoch in range(2):
for batch_idx, (x, y) in enumerate(train_loader):
print('Epoch:', epoch+1, end='')
print(' | Batch index:', batch_idx, end='')
print(' | Batch size:', y.size()[0])
x = x.to(device)
y = y.to(device)
break
Epoch: 1 | Batch index: 0 | Batch size: 128 Epoch: 2 | Batch index: 0 | Batch size: 128
##########################
### MODEL
##########################
class LeNet5(nn.Module):
def __init__(self, num_classes, grayscale=False):
super(LeNet5, self).__init__()
self.grayscale = grayscale
self.num_classes = num_classes
if self.grayscale:
in_channels = 1
else:
in_channels = 3
self.features = nn.Sequential(
nn.Conv2d(in_channels, 6, kernel_size=5),
nn.MaxPool2d(kernel_size=2),
nn.Conv2d(6, 16, kernel_size=5),
nn.MaxPool2d(kernel_size=2)
)
self.classifier = nn.Sequential(
nn.Linear(16*5*5, 120),
nn.Linear(120, 84),
nn.Linear(84, num_classes),
)
def forward(self, x):
x = self.features(x)
x = torch.flatten(x, 1)
logits = self.classifier(x)
probas = F.softmax(logits, dim=1)
return logits, probas
torch.manual_seed(RANDOM_SEED)
model = LeNet5(NUM_CLASSES, GRAYSCALE)
model.to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
def compute_accuracy(model, data_loader, device):
correct_pred, num_examples = 0, 0
for i, (features, targets) in enumerate(data_loader):
features = features.to(device)
targets = targets.to(device)
logits, probas = model(features)
_, predicted_labels = torch.max(probas, 1)
num_examples += targets.size(0)
correct_pred += (predicted_labels == targets).sum()
return correct_pred.float()/num_examples * 100
start_time = time.time()
for epoch in range(NUM_EPOCHS):
model.train()
for batch_idx, (features, targets) in enumerate(train_loader):
features = features.to(DEVICE)
targets = targets.to(DEVICE)
### FORWARD AND BACK PROP
logits, probas = model(features)
cost = F.cross_entropy(logits, targets)
optimizer.zero_grad()
cost.backward()
### UPDATE MODEL PARAMETERS
optimizer.step()
### LOGGING
if not batch_idx % 50:
print ('Epoch: %03d/%03d | Batch %04d/%04d | Cost: %.4f'
%(epoch+1, NUM_EPOCHS, batch_idx,
len(train_loader), cost))
model.eval()
with torch.set_grad_enabled(False): # save memory during inference
print('Epoch: %03d/%03d | Train: %.3f%%' % (
epoch+1, NUM_EPOCHS,
compute_accuracy(model, train_loader, device=DEVICE)))
print('Time elapsed: %.2f min' % ((time.time() - start_time)/60))
print('Total Training Time: %.2f min' % ((time.time() - start_time)/60))
Epoch: 001/010 | Batch 0000/0469 | Cost: 2.3053 Epoch: 001/010 | Batch 0050/0469 | Cost: 0.3979 Epoch: 001/010 | Batch 0100/0469 | Cost: 0.4244 Epoch: 001/010 | Batch 0150/0469 | Cost: 0.3412 Epoch: 001/010 | Batch 0200/0469 | Cost: 0.1983 Epoch: 001/010 | Batch 0250/0469 | Cost: 0.1813 Epoch: 001/010 | Batch 0300/0469 | Cost: 0.1212 Epoch: 001/010 | Batch 0350/0469 | Cost: 0.1913 Epoch: 001/010 | Batch 0400/0469 | Cost: 0.1882 Epoch: 001/010 | Batch 0450/0469 | Cost: 0.0847 Epoch: 001/010 | Train: 96.217% Time elapsed: 0.28 min Epoch: 002/010 | Batch 0000/0469 | Cost: 0.0662 Epoch: 002/010 | Batch 0050/0469 | Cost: 0.1241 Epoch: 002/010 | Batch 0100/0469 | Cost: 0.1321 Epoch: 002/010 | Batch 0150/0469 | Cost: 0.1922 Epoch: 002/010 | Batch 0200/0469 | Cost: 0.0792 Epoch: 002/010 | Batch 0250/0469 | Cost: 0.1133 Epoch: 002/010 | Batch 0300/0469 | Cost: 0.0899 Epoch: 002/010 | Batch 0350/0469 | Cost: 0.1914 Epoch: 002/010 | Batch 0400/0469 | Cost: 0.0523 Epoch: 002/010 | Batch 0450/0469 | Cost: 0.0852 Epoch: 002/010 | Train: 97.377% Time elapsed: 0.56 min Epoch: 003/010 | Batch 0000/0469 | Cost: 0.1620 Epoch: 003/010 | Batch 0050/0469 | Cost: 0.0408 Epoch: 003/010 | Batch 0100/0469 | Cost: 0.0585 Epoch: 003/010 | Batch 0150/0469 | Cost: 0.0846 Epoch: 003/010 | Batch 0200/0469 | Cost: 0.0987 Epoch: 003/010 | Batch 0250/0469 | Cost: 0.0556 Epoch: 003/010 | Batch 0300/0469 | Cost: 0.0338 Epoch: 003/010 | Batch 0350/0469 | Cost: 0.1036 Epoch: 003/010 | Batch 0400/0469 | Cost: 0.0523 Epoch: 003/010 | Batch 0450/0469 | Cost: 0.0518 Epoch: 003/010 | Train: 97.980% Time elapsed: 0.83 min Epoch: 004/010 | Batch 0000/0469 | Cost: 0.1061 Epoch: 004/010 | Batch 0050/0469 | Cost: 0.0525 Epoch: 004/010 | Batch 0100/0469 | Cost: 0.0204 Epoch: 004/010 | Batch 0150/0469 | Cost: 0.0181 Epoch: 004/010 | Batch 0200/0469 | Cost: 0.0773 Epoch: 004/010 | Batch 0250/0469 | Cost: 0.0625 Epoch: 004/010 | Batch 0300/0469 | Cost: 0.0195 Epoch: 004/010 | Batch 0350/0469 | Cost: 0.0538 Epoch: 004/010 | Batch 0400/0469 | Cost: 0.1013 Epoch: 004/010 | Batch 0450/0469 | Cost: 0.1290 Epoch: 004/010 | Train: 98.055% Time elapsed: 1.10 min Epoch: 005/010 | Batch 0000/0469 | Cost: 0.0314 Epoch: 005/010 | Batch 0050/0469 | Cost: 0.0594 Epoch: 005/010 | Batch 0100/0469 | Cost: 0.0376 Epoch: 005/010 | Batch 0150/0469 | Cost: 0.0188 Epoch: 005/010 | Batch 0200/0469 | Cost: 0.0705 Epoch: 005/010 | Batch 0250/0469 | Cost: 0.0088 Epoch: 005/010 | Batch 0300/0469 | Cost: 0.1651 Epoch: 005/010 | Batch 0350/0469 | Cost: 0.1475 Epoch: 005/010 | Batch 0400/0469 | Cost: 0.0748 Epoch: 005/010 | Batch 0450/0469 | Cost: 0.0384 Epoch: 005/010 | Train: 98.560% Time elapsed: 1.38 min Epoch: 006/010 | Batch 0000/0469 | Cost: 0.0602 Epoch: 006/010 | Batch 0050/0469 | Cost: 0.0209 Epoch: 006/010 | Batch 0100/0469 | Cost: 0.0559 Epoch: 006/010 | Batch 0150/0469 | Cost: 0.0880 Epoch: 006/010 | Batch 0200/0469 | Cost: 0.0270 Epoch: 006/010 | Batch 0250/0469 | Cost: 0.1370 Epoch: 006/010 | Batch 0300/0469 | Cost: 0.0395 Epoch: 006/010 | Batch 0350/0469 | Cost: 0.0251 Epoch: 006/010 | Batch 0400/0469 | Cost: 0.0182 Epoch: 006/010 | Batch 0450/0469 | Cost: 0.1376 Epoch: 006/010 | Train: 98.378% Time elapsed: 1.65 min Epoch: 007/010 | Batch 0000/0469 | Cost: 0.0289 Epoch: 007/010 | Batch 0050/0469 | Cost: 0.0444 Epoch: 007/010 | Batch 0100/0469 | Cost: 0.0306 Epoch: 007/010 | Batch 0150/0469 | Cost: 0.0165 Epoch: 007/010 | Batch 0200/0469 | Cost: 0.0278 Epoch: 007/010 | Batch 0250/0469 | Cost: 0.0328 Epoch: 007/010 | Batch 0300/0469 | Cost: 0.0976 Epoch: 007/010 | Batch 0350/0469 | Cost: 0.0204 Epoch: 007/010 | Batch 0400/0469 | Cost: 0.0470 Epoch: 007/010 | Batch 0450/0469 | Cost: 0.0103 Epoch: 007/010 | Train: 98.673% Time elapsed: 1.94 min Epoch: 008/010 | Batch 0000/0469 | Cost: 0.0072 Epoch: 008/010 | Batch 0050/0469 | Cost: 0.0657 Epoch: 008/010 | Batch 0100/0469 | Cost: 0.0068 Epoch: 008/010 | Batch 0150/0469 | Cost: 0.0435 Epoch: 008/010 | Batch 0200/0469 | Cost: 0.0176 Epoch: 008/010 | Batch 0250/0469 | Cost: 0.0286 Epoch: 008/010 | Batch 0300/0469 | Cost: 0.0362 Epoch: 008/010 | Batch 0350/0469 | Cost: 0.0562 Epoch: 008/010 | Batch 0400/0469 | Cost: 0.0150 Epoch: 008/010 | Batch 0450/0469 | Cost: 0.0709 Epoch: 008/010 | Train: 98.795% Time elapsed: 2.21 min Epoch: 009/010 | Batch 0000/0469 | Cost: 0.0103 Epoch: 009/010 | Batch 0050/0469 | Cost: 0.0119 Epoch: 009/010 | Batch 0100/0469 | Cost: 0.0293 Epoch: 009/010 | Batch 0150/0469 | Cost: 0.0351 Epoch: 009/010 | Batch 0200/0469 | Cost: 0.0435 Epoch: 009/010 | Batch 0250/0469 | Cost: 0.0296 Epoch: 009/010 | Batch 0300/0469 | Cost: 0.0249 Epoch: 009/010 | Batch 0350/0469 | Cost: 0.0527 Epoch: 009/010 | Batch 0400/0469 | Cost: 0.0516 Epoch: 009/010 | Batch 0450/0469 | Cost: 0.0383 Epoch: 009/010 | Train: 98.588% Time elapsed: 2.49 min Epoch: 010/010 | Batch 0000/0469 | Cost: 0.0725 Epoch: 010/010 | Batch 0050/0469 | Cost: 0.0565 Epoch: 010/010 | Batch 0100/0469 | Cost: 0.0473 Epoch: 010/010 | Batch 0150/0469 | Cost: 0.0497 Epoch: 010/010 | Batch 0200/0469 | Cost: 0.0648 Epoch: 010/010 | Batch 0250/0469 | Cost: 0.0264 Epoch: 010/010 | Batch 0300/0469 | Cost: 0.0192 Epoch: 010/010 | Batch 0350/0469 | Cost: 0.0259 Epoch: 010/010 | Batch 0400/0469 | Cost: 0.0371 Epoch: 010/010 | Batch 0450/0469 | Cost: 0.0061 Epoch: 010/010 | Train: 99.042% Time elapsed: 2.76 min Total Training Time: 2.76 min
with torch.set_grad_enabled(False): # save memory during inference
print('Test accuracy: %.2f%%' % (compute_accuracy(model, test_loader, device=DEVICE)))
Test accuracy: 98.47%
for batch_idx, (features, targets) in enumerate(test_loader):
features = features
targets = targets
break
nhwc_img = np.transpose(features[0], axes=(1, 2, 0))
nhw_img = np.squeeze(nhwc_img.numpy(), axis=2)
plt.imshow(nhw_img, cmap='Greys');
model.eval()
logits, probas = model(features.to(device)[0, None])
print('Probability 7 %.2f%%' % (probas[0][7]*100))
Probability 7 100.00%
%watermark -iv
matplotlib 3.1.0 torchvision 0.4.0a0+6b959ee PIL.Image 6.0.0 pandas 0.24.2 torch 1.2.0 numpy 1.16.4