Deep Learning Models -- A collection of various deep learning architectures, models, and tips for TensorFlow and PyTorch in Jupyter Notebooks.
%load_ext watermark
%watermark -a 'Sebastian Raschka' -v -p torch
Sebastian Raschka CPython 3.6.8 IPython 7.2.0 torch 1.0.0
The network in this notebook is an implementation of the ResNet-34 [1] architecture on the MNIST digits dataset (http://yann.lecun.com/exdb/mnist/) to train a handwritten digit classifier.
References
[1] He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep residual learning for image recognition. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 770-778). (CVPR Link)
The following figure illustrates residual blocks with skip connections such that the input passed via the shortcut matches the dimensions of the main path's output, which allows the network to learn identity functions.
The ResNet-34 architecture actually uses residual blocks with skip connections such that the input passed via the shortcut matches is resized to dimensions of the main path's output. Such a residual block is illustrated below:
For a more detailed explanation see the other notebook, resnet-ex-1.ipynb.
import os
import time
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision import transforms
import matplotlib.pyplot as plt
from PIL import Image
if torch.cuda.is_available():
torch.backends.cudnn.deterministic = True
##########################
### SETTINGS
##########################
# Hyperparameters
RANDOM_SEED = 1
LEARNING_RATE = 0.001
BATCH_SIZE = 128
NUM_EPOCHS = 10
# Architecture
NUM_FEATURES = 28*28
NUM_CLASSES = 10
# Other
DEVICE = "cuda:2"
GRAYSCALE = True
##########################
### MNIST DATASET
##########################
# Note transforms.ToTensor() scales input images
# to 0-1 range
train_dataset = datasets.MNIST(root='data',
train=True,
transform=transforms.ToTensor(),
download=True)
test_dataset = datasets.MNIST(root='data',
train=False,
transform=transforms.ToTensor())
train_loader = DataLoader(dataset=train_dataset,
batch_size=BATCH_SIZE,
shuffle=True)
test_loader = DataLoader(dataset=test_dataset,
batch_size=BATCH_SIZE,
shuffle=False)
# Checking the dataset
for images, labels in train_loader:
print('Image batch dimensions:', images.shape)
print('Image label dimensions:', labels.shape)
break
Image batch dimensions: torch.Size([128, 1, 28, 28]) Image label dimensions: torch.Size([128])
device = torch.device(DEVICE)
torch.manual_seed(0)
for epoch in range(2):
for batch_idx, (x, y) in enumerate(train_loader):
print('Epoch:', epoch+1, end='')
print(' | Batch index:', batch_idx, end='')
print(' | Batch size:', y.size()[0])
x = x.to(device)
y = y.to(device)
break
Epoch: 1 | Batch index: 0 | Batch size: 128 Epoch: 2 | Batch index: 0 | Batch size: 128
The following code cell that implements the ResNet-34 architecture is a derivative of the code provided at https://pytorch.org/docs/0.4.0/_modules/torchvision/models/resnet.html.
##########################
### MODEL
##########################
def conv3x3(in_planes, out_planes, stride=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=False)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(self, block, layers, num_classes, grayscale):
self.inplanes = 64
if grayscale:
in_dim = 1
else:
in_dim = 3
super(ResNet, self).__init__()
self.conv1 = nn.Conv2d(in_dim, 64, kernel_size=7, stride=2, padding=3,
bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
self.avgpool = nn.AvgPool2d(7, stride=1)
self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, (2. / n)**.5)
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
# because MNIST is already 1x1 here:
# disable avg pooling
#x = self.avgpool(x)
x = x.view(x.size(0), -1)
logits = self.fc(x)
probas = F.softmax(logits, dim=1)
return logits, probas
def resnet34(num_classes):
"""Constructs a ResNet-34 model."""
model = ResNet(block=BasicBlock,
layers=[3, 4, 6, 3],
num_classes=NUM_CLASSES,
grayscale=GRAYSCALE)
return model
torch.manual_seed(RANDOM_SEED)
model = resnet34(NUM_CLASSES)
model.to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
def compute_accuracy(model, data_loader, device):
correct_pred, num_examples = 0, 0
for i, (features, targets) in enumerate(data_loader):
features = features.to(device)
targets = targets.to(device)
logits, probas = model(features)
_, predicted_labels = torch.max(probas, 1)
num_examples += targets.size(0)
correct_pred += (predicted_labels == targets).sum()
return correct_pred.float()/num_examples * 100
start_time = time.time()
for epoch in range(NUM_EPOCHS):
model.train()
for batch_idx, (features, targets) in enumerate(train_loader):
features = features.to(DEVICE)
targets = targets.to(DEVICE)
### FORWARD AND BACK PROP
logits, probas = model(features)
cost = F.cross_entropy(logits, targets)
optimizer.zero_grad()
cost.backward()
### UPDATE MODEL PARAMETERS
optimizer.step()
### LOGGING
if not batch_idx % 50:
print ('Epoch: %03d/%03d | Batch %04d/%04d | Cost: %.4f'
%(epoch+1, NUM_EPOCHS, batch_idx,
len(train_loader), cost))
model.eval()
with torch.set_grad_enabled(False): # save memory during inference
print('Epoch: %03d/%03d | Train: %.3f%%' % (
epoch+1, NUM_EPOCHS,
compute_accuracy(model, train_loader, device=DEVICE)))
print('Time elapsed: %.2f min' % ((time.time() - start_time)/60))
print('Total Training Time: %.2f min' % ((time.time() - start_time)/60))
Epoch: 001/010 | Batch 0000/0469 | Cost: 2.8909 Epoch: 001/010 | Batch 0050/0469 | Cost: 0.2777 Epoch: 001/010 | Batch 0100/0469 | Cost: 0.0824 Epoch: 001/010 | Batch 0150/0469 | Cost: 0.1159 Epoch: 001/010 | Batch 0200/0469 | Cost: 0.1098 Epoch: 001/010 | Batch 0250/0469 | Cost: 0.2297 Epoch: 001/010 | Batch 0300/0469 | Cost: 0.0692 Epoch: 001/010 | Batch 0350/0469 | Cost: 0.0762 Epoch: 001/010 | Batch 0400/0469 | Cost: 0.0318 Epoch: 001/010 | Batch 0450/0469 | Cost: 0.0387 Epoch: 001/010 | Train: 96.232% Time elapsed: 1.35 min Epoch: 002/010 | Batch 0000/0469 | Cost: 0.1717 Epoch: 002/010 | Batch 0050/0469 | Cost: 0.0508 Epoch: 002/010 | Batch 0100/0469 | Cost: 0.1568 Epoch: 002/010 | Batch 0150/0469 | Cost: 0.0505 Epoch: 002/010 | Batch 0200/0469 | Cost: 0.0380 Epoch: 002/010 | Batch 0250/0469 | Cost: 0.0550 Epoch: 002/010 | Batch 0300/0469 | Cost: 0.0708 Epoch: 002/010 | Batch 0350/0469 | Cost: 0.0737 Epoch: 002/010 | Batch 0400/0469 | Cost: 0.0399 Epoch: 002/010 | Batch 0450/0469 | Cost: 0.0172 Epoch: 002/010 | Train: 96.182% Time elapsed: 2.69 min Epoch: 003/010 | Batch 0000/0469 | Cost: 0.1397 Epoch: 003/010 | Batch 0050/0469 | Cost: 0.0358 Epoch: 003/010 | Batch 0100/0469 | Cost: 0.0052 Epoch: 003/010 | Batch 0150/0469 | Cost: 0.0488 Epoch: 003/010 | Batch 0200/0469 | Cost: 0.0314 Epoch: 003/010 | Batch 0250/0469 | Cost: 0.0550 Epoch: 003/010 | Batch 0300/0469 | Cost: 0.0239 Epoch: 003/010 | Batch 0350/0469 | Cost: 0.0566 Epoch: 003/010 | Batch 0400/0469 | Cost: 0.0222 Epoch: 003/010 | Batch 0450/0469 | Cost: 0.0515 Epoch: 003/010 | Train: 98.672% Time elapsed: 3.97 min Epoch: 004/010 | Batch 0000/0469 | Cost: 0.0364 Epoch: 004/010 | Batch 0050/0469 | Cost: 0.0026 Epoch: 004/010 | Batch 0100/0469 | Cost: 0.0321 Epoch: 004/010 | Batch 0150/0469 | Cost: 0.0180 Epoch: 004/010 | Batch 0200/0469 | Cost: 0.0851 Epoch: 004/010 | Batch 0250/0469 | Cost: 0.0142 Epoch: 004/010 | Batch 0300/0469 | Cost: 0.0362 Epoch: 004/010 | Batch 0350/0469 | Cost: 0.0563 Epoch: 004/010 | Batch 0400/0469 | Cost: 0.0512 Epoch: 004/010 | Batch 0450/0469 | Cost: 0.0353 Epoch: 004/010 | Train: 98.750% Time elapsed: 5.22 min Epoch: 005/010 | Batch 0000/0469 | Cost: 0.0242 Epoch: 005/010 | Batch 0050/0469 | Cost: 0.0092 Epoch: 005/010 | Batch 0100/0469 | Cost: 0.0055 Epoch: 005/010 | Batch 0150/0469 | Cost: 0.0129 Epoch: 005/010 | Batch 0200/0469 | Cost: 0.0259 Epoch: 005/010 | Batch 0250/0469 | Cost: 0.0256 Epoch: 005/010 | Batch 0300/0469 | Cost: 0.0082 Epoch: 005/010 | Batch 0350/0469 | Cost: 0.0493 Epoch: 005/010 | Batch 0400/0469 | Cost: 0.0026 Epoch: 005/010 | Batch 0450/0469 | Cost: 0.0212 Epoch: 005/010 | Train: 98.642% Time elapsed: 6.48 min Epoch: 006/010 | Batch 0000/0469 | Cost: 0.0437 Epoch: 006/010 | Batch 0050/0469 | Cost: 0.0071 Epoch: 006/010 | Batch 0100/0469 | Cost: 0.0274 Epoch: 006/010 | Batch 0150/0469 | Cost: 0.0300 Epoch: 006/010 | Batch 0200/0469 | Cost: 0.0169 Epoch: 006/010 | Batch 0250/0469 | Cost: 0.0176 Epoch: 006/010 | Batch 0300/0469 | Cost: 0.0036 Epoch: 006/010 | Batch 0350/0469 | Cost: 0.0473 Epoch: 006/010 | Batch 0400/0469 | Cost: 0.0090 Epoch: 006/010 | Batch 0450/0469 | Cost: 0.0848 Epoch: 006/010 | Train: 97.143% Time elapsed: 7.73 min Epoch: 007/010 | Batch 0000/0469 | Cost: 0.0441 Epoch: 007/010 | Batch 0050/0469 | Cost: 0.0150 Epoch: 007/010 | Batch 0100/0469 | Cost: 0.0407 Epoch: 007/010 | Batch 0150/0469 | Cost: 0.0082 Epoch: 007/010 | Batch 0200/0469 | Cost: 0.0643 Epoch: 007/010 | Batch 0250/0469 | Cost: 0.0132 Epoch: 007/010 | Batch 0300/0469 | Cost: 0.0054 Epoch: 007/010 | Batch 0350/0469 | Cost: 0.0046 Epoch: 007/010 | Batch 0400/0469 | Cost: 0.0143 Epoch: 007/010 | Batch 0450/0469 | Cost: 0.0397 Epoch: 007/010 | Train: 99.555% Time elapsed: 8.98 min Epoch: 008/010 | Batch 0000/0469 | Cost: 0.0115 Epoch: 008/010 | Batch 0050/0469 | Cost: 0.0036 Epoch: 008/010 | Batch 0100/0469 | Cost: 0.0046 Epoch: 008/010 | Batch 0150/0469 | Cost: 0.0028 Epoch: 008/010 | Batch 0200/0469 | Cost: 0.0080 Epoch: 008/010 | Batch 0250/0469 | Cost: 0.0143 Epoch: 008/010 | Batch 0300/0469 | Cost: 0.0091 Epoch: 008/010 | Batch 0350/0469 | Cost: 0.0122 Epoch: 008/010 | Batch 0400/0469 | Cost: 0.0372 Epoch: 008/010 | Batch 0450/0469 | Cost: 0.0093 Epoch: 008/010 | Train: 99.615% Time elapsed: 10.24 min Epoch: 009/010 | Batch 0000/0469 | Cost: 0.0032 Epoch: 009/010 | Batch 0050/0469 | Cost: 0.0009 Epoch: 009/010 | Batch 0100/0469 | Cost: 0.0091 Epoch: 009/010 | Batch 0150/0469 | Cost: 0.0601 Epoch: 009/010 | Batch 0200/0469 | Cost: 0.0274 Epoch: 009/010 | Batch 0250/0469 | Cost: 0.0127 Epoch: 009/010 | Batch 0300/0469 | Cost: 0.0147 Epoch: 009/010 | Batch 0350/0469 | Cost: 0.0501 Epoch: 009/010 | Batch 0400/0469 | Cost: 0.0198 Epoch: 009/010 | Batch 0450/0469 | Cost: 0.0020 Epoch: 009/010 | Train: 99.357% Time elapsed: 11.50 min Epoch: 010/010 | Batch 0000/0469 | Cost: 0.0073 Epoch: 010/010 | Batch 0050/0469 | Cost: 0.0077 Epoch: 010/010 | Batch 0100/0469 | Cost: 0.0079 Epoch: 010/010 | Batch 0150/0469 | Cost: 0.0546 Epoch: 010/010 | Batch 0200/0469 | Cost: 0.0021 Epoch: 010/010 | Batch 0250/0469 | Cost: 0.0211 Epoch: 010/010 | Batch 0300/0469 | Cost: 0.0018 Epoch: 010/010 | Batch 0350/0469 | Cost: 0.0042 Epoch: 010/010 | Batch 0400/0469 | Cost: 0.0078 Epoch: 010/010 | Batch 0450/0469 | Cost: 0.0017 Epoch: 010/010 | Train: 99.532% Time elapsed: 12.75 min Total Training Time: 12.75 min
with torch.set_grad_enabled(False): # save memory during inference
print('Test accuracy: %.2f%%' % (compute_accuracy(model, test_loader, device=DEVICE)))
Test accuracy: 99.04%
for batch_idx, (features, targets) in enumerate(test_loader):
features = features
targets = targets
break
nhwc_img = np.transpose(features[0], axes=(1, 2, 0))
nhw_img = np.squeeze(nhwc_img.numpy(), axis=2)
plt.imshow(nhw_img, cmap='Greys');
model.eval()
logits, probas = model(features.to(device)[0, None])
print('Probability 7 %.2f%%' % (probas[0][7]*100))
Probability 7 100.00%
%watermark -iv
numpy 1.15.4 pandas 0.23.4 torch 1.0.0 PIL.Image 5.3.0