Deep Learning Models -- A collection of various deep learning architectures, models, and tips for TensorFlow and PyTorch in Jupyter Notebooks.
%load_ext watermark
%watermark -a 'Sebastian Raschka' -v -p torch
Sebastian Raschka CPython 3.6.8 IPython 7.2.0 torch 1.0.1.post2
Please note that this example does not implement a really deep ResNet as described in literature but rather illustrates how the residual blocks described in He et al. [1] can be implemented in PyTorch.
import time
import numpy as np
import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision import transforms
if torch.cuda.is_available():
torch.backends.cudnn.deterministic = True
##########################
### SETTINGS
##########################
# Device
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
# Hyperparameters
random_seed = 123
learning_rate = 0.01
num_epochs = 10
batch_size = 128
# Architecture
num_classes = 10
##########################
### MNIST DATASET
##########################
# Note transforms.ToTensor() scales input images
# to 0-1 range
train_dataset = datasets.MNIST(root='data',
train=True,
transform=transforms.ToTensor(),
download=True)
test_dataset = datasets.MNIST(root='data',
train=False,
transform=transforms.ToTensor())
train_loader = DataLoader(dataset=train_dataset,
batch_size=batch_size,
shuffle=True)
test_loader = DataLoader(dataset=test_dataset,
batch_size=batch_size,
shuffle=False)
# Checking the dataset
for images, labels in train_loader:
print('Image batch dimensions:', images.shape)
print('Image label dimensions:', labels.shape)
break
Image batch dimensions: torch.Size([128, 1, 28, 28]) Image label dimensions: torch.Size([128])
The following code implements the residual blocks with skip connections such that the input passed via the shortcut matches the dimensions of the main path's output, which allows the network to learn identity functions. Such a residual block is illustrated below:
##########################
### MODEL
##########################
class ConvNet(torch.nn.Module):
def __init__(self, num_classes):
super(ConvNet, self).__init__()
#########################
### 1st residual block
#########################
# 28x28x1 => 28x28x4
self.conv_1 = torch.nn.Conv2d(in_channels=1,
out_channels=4,
kernel_size=(1, 1),
stride=(1, 1),
padding=0)
self.conv_1_bn = torch.nn.BatchNorm2d(4)
# 28x28x4 => 28x28x1
self.conv_2 = torch.nn.Conv2d(in_channels=4,
out_channels=1,
kernel_size=(3, 3),
stride=(1, 1),
padding=1)
self.conv_2_bn = torch.nn.BatchNorm2d(1)
#########################
### 2nd residual block
#########################
# 28x28x1 => 28x28x4
self.conv_3 = torch.nn.Conv2d(in_channels=1,
out_channels=4,
kernel_size=(1, 1),
stride=(1, 1),
padding=0)
self.conv_3_bn = torch.nn.BatchNorm2d(4)
# 28x28x4 => 28x28x1
self.conv_4 = torch.nn.Conv2d(in_channels=4,
out_channels=1,
kernel_size=(3, 3),
stride=(1, 1),
padding=1)
self.conv_4_bn = torch.nn.BatchNorm2d(1)
#########################
### Fully connected
#########################
self.linear_1 = torch.nn.Linear(28*28*1, num_classes)
def forward(self, x):
#########################
### 1st residual block
#########################
shortcut = x
out = self.conv_1(x)
out = self.conv_1_bn(out)
out = F.relu(out)
out = self.conv_2(out)
out = self.conv_2_bn(out)
out += shortcut
out = F.relu(out)
#########################
### 2nd residual block
#########################
shortcut = out
out = self.conv_3(out)
out = self.conv_3_bn(out)
out = F.relu(out)
out = self.conv_4(out)
out = self.conv_4_bn(out)
out += shortcut
out = F.relu(out)
#########################
### Fully connected
#########################
logits = self.linear_1(out.view(-1, 28*28*1))
probas = F.softmax(logits, dim=1)
return logits, probas
torch.manual_seed(random_seed)
model = ConvNet(num_classes=num_classes)
model = model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
def compute_accuracy(model, data_loader):
correct_pred, num_examples = 0, 0
for i, (features, targets) in enumerate(data_loader):
features = features.to(device)
targets = targets.to(device)
logits, probas = model(features)
_, predicted_labels = torch.max(probas, 1)
num_examples += targets.size(0)
correct_pred += (predicted_labels == targets).sum()
return correct_pred.float()/num_examples * 100
start_time = time.time()
for epoch in range(num_epochs):
model = model.train()
for batch_idx, (features, targets) in enumerate(train_loader):
features = features.to(device)
targets = targets.to(device)
### FORWARD AND BACK PROP
logits, probas = model(features)
cost = F.cross_entropy(logits, targets)
optimizer.zero_grad()
cost.backward()
### UPDATE MODEL PARAMETERS
optimizer.step()
### LOGGING
if not batch_idx % 50:
print ('Epoch: %03d/%03d | Batch %03d/%03d | Cost: %.4f'
%(epoch+1, num_epochs, batch_idx,
len(train_loader), cost))
model = model.eval() # eval mode to prevent upd. batchnorm params during inference
with torch.set_grad_enabled(False): # save memory during inference
print('Epoch: %03d/%03d training accuracy: %.2f%%' % (
epoch+1, num_epochs,
compute_accuracy(model, train_loader)))
print('Time elapsed: %.2f min' % ((time.time() - start_time)/60))
print('Total Training Time: %.2f min' % ((time.time() - start_time)/60))
Epoch: 001/010 | Batch 000/469 | Cost: 2.5157 Epoch: 001/010 | Batch 050/469 | Cost: 0.5106 Epoch: 001/010 | Batch 100/469 | Cost: 0.2353 Epoch: 001/010 | Batch 150/469 | Cost: 0.2672 Epoch: 001/010 | Batch 200/469 | Cost: 0.3670 Epoch: 001/010 | Batch 250/469 | Cost: 0.2920 Epoch: 001/010 | Batch 300/469 | Cost: 0.3122 Epoch: 001/010 | Batch 350/469 | Cost: 0.2697 Epoch: 001/010 | Batch 400/469 | Cost: 0.4273 Epoch: 001/010 | Batch 450/469 | Cost: 0.3696 Epoch: 001/010 training accuracy: 92.21% Time elapsed: 0.25 min Epoch: 002/010 | Batch 000/469 | Cost: 0.2612 Epoch: 002/010 | Batch 050/469 | Cost: 0.4460 Epoch: 002/010 | Batch 100/469 | Cost: 0.2881 Epoch: 002/010 | Batch 150/469 | Cost: 0.4010 Epoch: 002/010 | Batch 200/469 | Cost: 0.2376 Epoch: 002/010 | Batch 250/469 | Cost: 0.2598 Epoch: 002/010 | Batch 300/469 | Cost: 0.1649 Epoch: 002/010 | Batch 350/469 | Cost: 0.2331 Epoch: 002/010 | Batch 400/469 | Cost: 0.2897 Epoch: 002/010 | Batch 450/469 | Cost: 0.4034 Epoch: 002/010 training accuracy: 92.73% Time elapsed: 0.51 min Epoch: 003/010 | Batch 000/469 | Cost: 0.2406 Epoch: 003/010 | Batch 050/469 | Cost: 0.3472 Epoch: 003/010 | Batch 100/469 | Cost: 0.2030 Epoch: 003/010 | Batch 150/469 | Cost: 0.2327 Epoch: 003/010 | Batch 200/469 | Cost: 0.2796 Epoch: 003/010 | Batch 250/469 | Cost: 0.2485 Epoch: 003/010 | Batch 300/469 | Cost: 0.1806 Epoch: 003/010 | Batch 350/469 | Cost: 0.2239 Epoch: 003/010 | Batch 400/469 | Cost: 0.4661 Epoch: 003/010 | Batch 450/469 | Cost: 0.2216 Epoch: 003/010 training accuracy: 93.16% Time elapsed: 0.76 min Epoch: 004/010 | Batch 000/469 | Cost: 0.4196 Epoch: 004/010 | Batch 050/469 | Cost: 0.2219 Epoch: 004/010 | Batch 100/469 | Cost: 0.1649 Epoch: 004/010 | Batch 150/469 | Cost: 0.2900 Epoch: 004/010 | Batch 200/469 | Cost: 0.2729 Epoch: 004/010 | Batch 250/469 | Cost: 0.2085 Epoch: 004/010 | Batch 300/469 | Cost: 0.3587 Epoch: 004/010 | Batch 350/469 | Cost: 0.2085 Epoch: 004/010 | Batch 400/469 | Cost: 0.2656 Epoch: 004/010 | Batch 450/469 | Cost: 0.1630 Epoch: 004/010 training accuracy: 93.64% Time elapsed: 1.01 min Epoch: 005/010 | Batch 000/469 | Cost: 0.2607 Epoch: 005/010 | Batch 050/469 | Cost: 0.2885 Epoch: 005/010 | Batch 100/469 | Cost: 0.4115 Epoch: 005/010 | Batch 150/469 | Cost: 0.1415 Epoch: 005/010 | Batch 200/469 | Cost: 0.1815 Epoch: 005/010 | Batch 250/469 | Cost: 0.2137 Epoch: 005/010 | Batch 300/469 | Cost: 0.0949 Epoch: 005/010 | Batch 350/469 | Cost: 0.2109 Epoch: 005/010 | Batch 400/469 | Cost: 0.2047 Epoch: 005/010 | Batch 450/469 | Cost: 0.3176 Epoch: 005/010 training accuracy: 93.86% Time elapsed: 1.26 min Epoch: 006/010 | Batch 000/469 | Cost: 0.2820 Epoch: 006/010 | Batch 050/469 | Cost: 0.1209 Epoch: 006/010 | Batch 100/469 | Cost: 0.2926 Epoch: 006/010 | Batch 150/469 | Cost: 0.2950 Epoch: 006/010 | Batch 200/469 | Cost: 0.1879 Epoch: 006/010 | Batch 250/469 | Cost: 0.2352 Epoch: 006/010 | Batch 300/469 | Cost: 0.2423 Epoch: 006/010 | Batch 350/469 | Cost: 0.1898 Epoch: 006/010 | Batch 400/469 | Cost: 0.3582 Epoch: 006/010 | Batch 450/469 | Cost: 0.2269 Epoch: 006/010 training accuracy: 93.86% Time elapsed: 1.51 min Epoch: 007/010 | Batch 000/469 | Cost: 0.2327 Epoch: 007/010 | Batch 050/469 | Cost: 0.1684 Epoch: 007/010 | Batch 100/469 | Cost: 0.1441 Epoch: 007/010 | Batch 150/469 | Cost: 0.1740 Epoch: 007/010 | Batch 200/469 | Cost: 0.1402 Epoch: 007/010 | Batch 250/469 | Cost: 0.2488 Epoch: 007/010 | Batch 300/469 | Cost: 0.2436 Epoch: 007/010 | Batch 350/469 | Cost: 0.2196 Epoch: 007/010 | Batch 400/469 | Cost: 0.1210 Epoch: 007/010 | Batch 450/469 | Cost: 0.1820 Epoch: 007/010 training accuracy: 94.19% Time elapsed: 1.76 min Epoch: 008/010 | Batch 000/469 | Cost: 0.1494 Epoch: 008/010 | Batch 050/469 | Cost: 0.1392 Epoch: 008/010 | Batch 100/469 | Cost: 0.2526 Epoch: 008/010 | Batch 150/469 | Cost: 0.1961 Epoch: 008/010 | Batch 200/469 | Cost: 0.2890 Epoch: 008/010 | Batch 250/469 | Cost: 0.2019 Epoch: 008/010 | Batch 300/469 | Cost: 0.3335 Epoch: 008/010 | Batch 350/469 | Cost: 0.2250 Epoch: 008/010 | Batch 400/469 | Cost: 0.1983 Epoch: 008/010 | Batch 450/469 | Cost: 0.2136 Epoch: 008/010 training accuracy: 94.40% Time elapsed: 2.01 min Epoch: 009/010 | Batch 000/469 | Cost: 0.3670 Epoch: 009/010 | Batch 050/469 | Cost: 0.1793 Epoch: 009/010 | Batch 100/469 | Cost: 0.3003 Epoch: 009/010 | Batch 150/469 | Cost: 0.1713 Epoch: 009/010 | Batch 200/469 | Cost: 0.2957 Epoch: 009/010 | Batch 250/469 | Cost: 0.2260 Epoch: 009/010 | Batch 300/469 | Cost: 0.1860 Epoch: 009/010 | Batch 350/469 | Cost: 0.2632 Epoch: 009/010 | Batch 400/469 | Cost: 0.2249 Epoch: 009/010 | Batch 450/469 | Cost: 0.2512 Epoch: 009/010 training accuracy: 94.61% Time elapsed: 2.26 min Epoch: 010/010 | Batch 000/469 | Cost: 0.1599 Epoch: 010/010 | Batch 050/469 | Cost: 0.2204 Epoch: 010/010 | Batch 100/469 | Cost: 0.1528 Epoch: 010/010 | Batch 150/469 | Cost: 0.1847 Epoch: 010/010 | Batch 200/469 | Cost: 0.1767 Epoch: 010/010 | Batch 250/469 | Cost: 0.1473 Epoch: 010/010 | Batch 300/469 | Cost: 0.1407 Epoch: 010/010 | Batch 350/469 | Cost: 0.1406 Epoch: 010/010 | Batch 400/469 | Cost: 0.3001 Epoch: 010/010 | Batch 450/469 | Cost: 0.2306 Epoch: 010/010 training accuracy: 93.22% Time elapsed: 2.51 min Total Training Time: 2.51 min
print('Test accuracy: %.2f%%' % (compute_accuracy(model, test_loader)))
Test accuracy: 92.20%
The following code implements the residual blocks with skip connections such that the input passed via the shortcut matches is resized to dimensions of the main path's output. Such a residual block is illustrated below:
##########################
### MODEL
##########################
class ConvNet(torch.nn.Module):
def __init__(self, num_classes):
super(ConvNet, self).__init__()
#########################
### 1st residual block
#########################
# 28x28x1 => 14x14x4
self.conv_1 = torch.nn.Conv2d(in_channels=1,
out_channels=4,
kernel_size=(3, 3),
stride=(2, 2),
padding=1)
self.conv_1_bn = torch.nn.BatchNorm2d(4)
# 14x14x4 => 14x14x8
self.conv_2 = torch.nn.Conv2d(in_channels=4,
out_channels=8,
kernel_size=(1, 1),
stride=(1, 1),
padding=0)
self.conv_2_bn = torch.nn.BatchNorm2d(8)
# 28x28x1 => 14x14x8
self.conv_shortcut_1 = torch.nn.Conv2d(in_channels=1,
out_channels=8,
kernel_size=(1, 1),
stride=(2, 2),
padding=0)
self.conv_shortcut_1_bn = torch.nn.BatchNorm2d(8)
#########################
### 2nd residual block
#########################
# 14x14x8 => 7x7x16
self.conv_3 = torch.nn.Conv2d(in_channels=8,
out_channels=16,
kernel_size=(3, 3),
stride=(2, 2),
padding=1)
self.conv_3_bn = torch.nn.BatchNorm2d(16)
# 7x7x16 => 7x7x32
self.conv_4 = torch.nn.Conv2d(in_channels=16,
out_channels=32,
kernel_size=(1, 1),
stride=(1, 1),
padding=0)
self.conv_4_bn = torch.nn.BatchNorm2d(32)
# 14x14x8 => 7x7x32
self.conv_shortcut_2 = torch.nn.Conv2d(in_channels=8,
out_channels=32,
kernel_size=(1, 1),
stride=(2, 2),
padding=0)
self.conv_shortcut_2_bn = torch.nn.BatchNorm2d(32)
#########################
### Fully connected
#########################
self.linear_1 = torch.nn.Linear(7*7*32, num_classes)
def forward(self, x):
#########################
### 1st residual block
#########################
shortcut = x
out = self.conv_1(x) # 28x28x1 => 14x14x4
out = self.conv_1_bn(out)
out = F.relu(out)
out = self.conv_2(out) # 14x14x4 => 714x14x8
out = self.conv_2_bn(out)
# match up dimensions using a linear function (no relu)
shortcut = self.conv_shortcut_1(shortcut)
shortcut = self.conv_shortcut_1_bn(shortcut)
out += shortcut
out = F.relu(out)
#########################
### 2nd residual block
#########################
shortcut = out
out = self.conv_3(out) # 14x14x8 => 7x7x16
out = self.conv_3_bn(out)
out = F.relu(out)
out = self.conv_4(out) # 7x7x16 => 7x7x32
out = self.conv_4_bn(out)
# match up dimensions using a linear function (no relu)
shortcut = self.conv_shortcut_2(shortcut)
shortcut = self.conv_shortcut_2_bn(shortcut)
out += shortcut
out = F.relu(out)
#########################
### Fully connected
#########################
logits = self.linear_1(out.view(-1, 7*7*32))
probas = F.softmax(logits, dim=1)
return logits, probas
torch.manual_seed(random_seed)
model = ConvNet(num_classes=num_classes)
model = model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
def compute_accuracy(model, data_loader):
correct_pred, num_examples = 0, 0
for i, (features, targets) in enumerate(data_loader):
features = features.to(device)
targets = targets.to(device)
logits, probas = model(features)
_, predicted_labels = torch.max(probas, 1)
num_examples += targets.size(0)
correct_pred += (predicted_labels == targets).sum()
return correct_pred.float()/num_examples * 100
for epoch in range(num_epochs):
model = model.train()
for batch_idx, (features, targets) in enumerate(train_loader):
features = features.to(device)
targets = targets.to(device)
### FORWARD AND BACK PROP
logits, probas = model(features)
cost = F.cross_entropy(logits, targets)
optimizer.zero_grad()
cost.backward()
### UPDATE MODEL PARAMETERS
optimizer.step()
### LOGGING
if not batch_idx % 50:
print ('Epoch: %03d/%03d | Batch %03d/%03d | Cost: %.4f'
%(epoch+1, num_epochs, batch_idx,
len(train_loader), cost))
model = model.eval() # eval mode to prevent upd. batchnorm params during inference
with torch.set_grad_enabled(False): # save memory during inference
print('Epoch: %03d/%03d training accuracy: %.2f%%' % (
epoch+1, num_epochs,
compute_accuracy(model, train_loader)))
Epoch: 001/010 | Batch 000/469 | Cost: 2.3318 Epoch: 001/010 | Batch 050/469 | Cost: 0.1491 Epoch: 001/010 | Batch 100/469 | Cost: 0.2615 Epoch: 001/010 | Batch 150/469 | Cost: 0.0847 Epoch: 001/010 | Batch 200/469 | Cost: 0.1427 Epoch: 001/010 | Batch 250/469 | Cost: 0.1739 Epoch: 001/010 | Batch 300/469 | Cost: 0.1558 Epoch: 001/010 | Batch 350/469 | Cost: 0.0684 Epoch: 001/010 | Batch 400/469 | Cost: 0.0717 Epoch: 001/010 | Batch 450/469 | Cost: 0.0785 Epoch: 001/010 training accuracy: 97.90% Epoch: 002/010 | Batch 000/469 | Cost: 0.0582 Epoch: 002/010 | Batch 050/469 | Cost: 0.1199 Epoch: 002/010 | Batch 100/469 | Cost: 0.0918 Epoch: 002/010 | Batch 150/469 | Cost: 0.0247 Epoch: 002/010 | Batch 200/469 | Cost: 0.0314 Epoch: 002/010 | Batch 250/469 | Cost: 0.0759 Epoch: 002/010 | Batch 300/469 | Cost: 0.0280 Epoch: 002/010 | Batch 350/469 | Cost: 0.0391 Epoch: 002/010 | Batch 400/469 | Cost: 0.0431 Epoch: 002/010 | Batch 450/469 | Cost: 0.0455 Epoch: 002/010 training accuracy: 98.16% Epoch: 003/010 | Batch 000/469 | Cost: 0.0303 Epoch: 003/010 | Batch 050/469 | Cost: 0.0433 Epoch: 003/010 | Batch 100/469 | Cost: 0.0465 Epoch: 003/010 | Batch 150/469 | Cost: 0.0243 Epoch: 003/010 | Batch 200/469 | Cost: 0.0258 Epoch: 003/010 | Batch 250/469 | Cost: 0.0403 Epoch: 003/010 | Batch 300/469 | Cost: 0.1024 Epoch: 003/010 | Batch 350/469 | Cost: 0.0241 Epoch: 003/010 | Batch 400/469 | Cost: 0.0299 Epoch: 003/010 | Batch 450/469 | Cost: 0.0354 Epoch: 003/010 training accuracy: 98.08% Epoch: 004/010 | Batch 000/469 | Cost: 0.0471 Epoch: 004/010 | Batch 050/469 | Cost: 0.0954 Epoch: 004/010 | Batch 100/469 | Cost: 0.0073 Epoch: 004/010 | Batch 150/469 | Cost: 0.0531 Epoch: 004/010 | Batch 200/469 | Cost: 0.0493 Epoch: 004/010 | Batch 250/469 | Cost: 0.1070 Epoch: 004/010 | Batch 300/469 | Cost: 0.0205 Epoch: 004/010 | Batch 350/469 | Cost: 0.0270 Epoch: 004/010 | Batch 400/469 | Cost: 0.0817 Epoch: 004/010 | Batch 450/469 | Cost: 0.0182 Epoch: 004/010 training accuracy: 98.70% Epoch: 005/010 | Batch 000/469 | Cost: 0.0691 Epoch: 005/010 | Batch 050/469 | Cost: 0.0326 Epoch: 005/010 | Batch 100/469 | Cost: 0.0041 Epoch: 005/010 | Batch 150/469 | Cost: 0.0774 Epoch: 005/010 | Batch 200/469 | Cost: 0.1223 Epoch: 005/010 | Batch 250/469 | Cost: 0.0329 Epoch: 005/010 | Batch 300/469 | Cost: 0.0479 Epoch: 005/010 | Batch 350/469 | Cost: 0.0696 Epoch: 005/010 | Batch 400/469 | Cost: 0.0504 Epoch: 005/010 | Batch 450/469 | Cost: 0.0736 Epoch: 005/010 training accuracy: 98.38% Epoch: 006/010 | Batch 000/469 | Cost: 0.0318 Epoch: 006/010 | Batch 050/469 | Cost: 0.0303 Epoch: 006/010 | Batch 100/469 | Cost: 0.0267 Epoch: 006/010 | Batch 150/469 | Cost: 0.0912 Epoch: 006/010 | Batch 200/469 | Cost: 0.0131 Epoch: 006/010 | Batch 250/469 | Cost: 0.0164 Epoch: 006/010 | Batch 300/469 | Cost: 0.0109 Epoch: 006/010 | Batch 350/469 | Cost: 0.0699 Epoch: 006/010 | Batch 400/469 | Cost: 0.0030 Epoch: 006/010 | Batch 450/469 | Cost: 0.0237 Epoch: 006/010 training accuracy: 98.74% Epoch: 007/010 | Batch 000/469 | Cost: 0.0214 Epoch: 007/010 | Batch 050/469 | Cost: 0.0097 Epoch: 007/010 | Batch 100/469 | Cost: 0.0292 Epoch: 007/010 | Batch 150/469 | Cost: 0.0648 Epoch: 007/010 | Batch 200/469 | Cost: 0.0044 Epoch: 007/010 | Batch 250/469 | Cost: 0.0557 Epoch: 007/010 | Batch 300/469 | Cost: 0.0139 Epoch: 007/010 | Batch 350/469 | Cost: 0.0809 Epoch: 007/010 | Batch 400/469 | Cost: 0.0285 Epoch: 007/010 | Batch 450/469 | Cost: 0.0050 Epoch: 007/010 training accuracy: 98.82% Epoch: 008/010 | Batch 000/469 | Cost: 0.0890 Epoch: 008/010 | Batch 050/469 | Cost: 0.0685 Epoch: 008/010 | Batch 100/469 | Cost: 0.0274 Epoch: 008/010 | Batch 150/469 | Cost: 0.0187 Epoch: 008/010 | Batch 200/469 | Cost: 0.0268 Epoch: 008/010 | Batch 250/469 | Cost: 0.1681 Epoch: 008/010 | Batch 300/469 | Cost: 0.0167 Epoch: 008/010 | Batch 350/469 | Cost: 0.0518 Epoch: 008/010 | Batch 400/469 | Cost: 0.0138 Epoch: 008/010 | Batch 450/469 | Cost: 0.0270 Epoch: 008/010 training accuracy: 99.08% Epoch: 009/010 | Batch 000/469 | Cost: 0.0458 Epoch: 009/010 | Batch 050/469 | Cost: 0.0039 Epoch: 009/010 | Batch 100/469 | Cost: 0.0597 Epoch: 009/010 | Batch 150/469 | Cost: 0.0120 Epoch: 009/010 | Batch 200/469 | Cost: 0.0580 Epoch: 009/010 | Batch 250/469 | Cost: 0.0280 Epoch: 009/010 | Batch 300/469 | Cost: 0.0570 Epoch: 009/010 | Batch 350/469 | Cost: 0.0831 Epoch: 009/010 | Batch 400/469 | Cost: 0.0732 Epoch: 009/010 | Batch 450/469 | Cost: 0.0327 Epoch: 009/010 training accuracy: 99.05% Epoch: 010/010 | Batch 000/469 | Cost: 0.0312 Epoch: 010/010 | Batch 050/469 | Cost: 0.0130 Epoch: 010/010 | Batch 100/469 | Cost: 0.0052 Epoch: 010/010 | Batch 150/469 | Cost: 0.0188 Epoch: 010/010 | Batch 200/469 | Cost: 0.0362 Epoch: 010/010 | Batch 250/469 | Cost: 0.1085 Epoch: 010/010 | Batch 300/469 | Cost: 0.0004 Epoch: 010/010 | Batch 350/469 | Cost: 0.0299 Epoch: 010/010 | Batch 400/469 | Cost: 0.0769 Epoch: 010/010 | Batch 450/469 | Cost: 0.0247 Epoch: 010/010 training accuracy: 98.87%
print('Test accuracy: %.2f%%' % (compute_accuracy(model, test_loader)))
Test accuracy: 97.91%
This is the same network as above but uses a ResidualBlock
helper class.
class ResidualBlock(torch.nn.Module):
def __init__(self, channels):
super(ResidualBlock, self).__init__()
self.conv_1 = torch.nn.Conv2d(in_channels=channels[0],
out_channels=channels[1],
kernel_size=(3, 3),
stride=(2, 2),
padding=1)
self.conv_1_bn = torch.nn.BatchNorm2d(channels[1])
self.conv_2 = torch.nn.Conv2d(in_channels=channels[1],
out_channels=channels[2],
kernel_size=(1, 1),
stride=(1, 1),
padding=0)
self.conv_2_bn = torch.nn.BatchNorm2d(channels[2])
self.conv_shortcut_1 = torch.nn.Conv2d(in_channels=channels[0],
out_channels=channels[2],
kernel_size=(1, 1),
stride=(2, 2),
padding=0)
self.conv_shortcut_1_bn = torch.nn.BatchNorm2d(channels[2])
def forward(self, x):
shortcut = x
out = self.conv_1(x)
out = self.conv_1_bn(out)
out = F.relu(out)
out = self.conv_2(out)
out = self.conv_2_bn(out)
# match up dimensions using a linear function (no relu)
shortcut = self.conv_shortcut_1(shortcut)
shortcut = self.conv_shortcut_1_bn(shortcut)
out += shortcut
out = F.relu(out)
return out
##########################
### MODEL
##########################
class ConvNet(torch.nn.Module):
def __init__(self, num_classes):
super(ConvNet, self).__init__()
self.residual_block_1 = ResidualBlock(channels=[1, 4, 8])
self.residual_block_2 = ResidualBlock(channels=[8, 16, 32])
self.linear_1 = torch.nn.Linear(7*7*32, num_classes)
def forward(self, x):
out = self.residual_block_1.forward(x)
out = self.residual_block_2.forward(out)
logits = self.linear_1(out.view(-1, 7*7*32))
probas = F.softmax(logits, dim=1)
return logits, probas
torch.manual_seed(random_seed)
model = ConvNet(num_classes=num_classes)
model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
def compute_accuracy(model, data_loader):
correct_pred, num_examples = 0, 0
for i, (features, targets) in enumerate(data_loader):
features = features.to(device)
targets = targets.to(device)
logits, probas = model(features)
_, predicted_labels = torch.max(probas, 1)
num_examples += targets.size(0)
correct_pred += (predicted_labels == targets).sum()
return correct_pred.float()/num_examples * 100
for epoch in range(num_epochs):
model = model.train()
for batch_idx, (features, targets) in enumerate(train_loader):
features = features.to(device)
targets = targets.to(device)
### FORWARD AND BACK PROP
logits, probas = model(features)
cost = F.cross_entropy(logits, targets)
optimizer.zero_grad()
cost.backward()
### UPDATE MODEL PARAMETERS
optimizer.step()
### LOGGING
if not batch_idx % 50:
print ('Epoch: %03d/%03d | Batch %03d/%03d | Cost: %.4f'
%(epoch+1, num_epochs, batch_idx,
len(train_dataset)//batch_size, cost))
model = model.eval() # eval mode to prevent upd. batchnorm params during inference
with torch.set_grad_enabled(False): # save memory during inference
print('Epoch: %03d/%03d training accuracy: %.2f%%' % (
epoch+1, num_epochs,
compute_accuracy(model, train_loader)))
Epoch: 001/010 | Batch 000/468 | Cost: 2.3318 Epoch: 001/010 | Batch 050/468 | Cost: 0.1491 Epoch: 001/010 | Batch 100/468 | Cost: 0.2615 Epoch: 001/010 | Batch 150/468 | Cost: 0.0847 Epoch: 001/010 | Batch 200/468 | Cost: 0.1427 Epoch: 001/010 | Batch 250/468 | Cost: 0.1739 Epoch: 001/010 | Batch 300/468 | Cost: 0.1558 Epoch: 001/010 | Batch 350/468 | Cost: 0.0684 Epoch: 001/010 | Batch 400/468 | Cost: 0.0717 Epoch: 001/010 | Batch 450/468 | Cost: 0.0785 Epoch: 001/010 training accuracy: 97.90% Epoch: 002/010 | Batch 000/468 | Cost: 0.0582 Epoch: 002/010 | Batch 050/468 | Cost: 0.1199 Epoch: 002/010 | Batch 100/468 | Cost: 0.0918 Epoch: 002/010 | Batch 150/468 | Cost: 0.0247 Epoch: 002/010 | Batch 200/468 | Cost: 0.0314 Epoch: 002/010 | Batch 250/468 | Cost: 0.0759 Epoch: 002/010 | Batch 300/468 | Cost: 0.0280 Epoch: 002/010 | Batch 350/468 | Cost: 0.0391 Epoch: 002/010 | Batch 400/468 | Cost: 0.0431 Epoch: 002/010 | Batch 450/468 | Cost: 0.0455 Epoch: 002/010 training accuracy: 98.16% Epoch: 003/010 | Batch 000/468 | Cost: 0.0303 Epoch: 003/010 | Batch 050/468 | Cost: 0.0433 Epoch: 003/010 | Batch 100/468 | Cost: 0.0465 Epoch: 003/010 | Batch 150/468 | Cost: 0.0243 Epoch: 003/010 | Batch 200/468 | Cost: 0.0258 Epoch: 003/010 | Batch 250/468 | Cost: 0.0403 Epoch: 003/010 | Batch 300/468 | Cost: 0.1024 Epoch: 003/010 | Batch 350/468 | Cost: 0.0241 Epoch: 003/010 | Batch 400/468 | Cost: 0.0299 Epoch: 003/010 | Batch 450/468 | Cost: 0.0354 Epoch: 003/010 training accuracy: 98.08% Epoch: 004/010 | Batch 000/468 | Cost: 0.0471 Epoch: 004/010 | Batch 050/468 | Cost: 0.0954 Epoch: 004/010 | Batch 100/468 | Cost: 0.0073 Epoch: 004/010 | Batch 150/468 | Cost: 0.0531 Epoch: 004/010 | Batch 200/468 | Cost: 0.0493 Epoch: 004/010 | Batch 250/468 | Cost: 0.1070 Epoch: 004/010 | Batch 300/468 | Cost: 0.0205 Epoch: 004/010 | Batch 350/468 | Cost: 0.0270 Epoch: 004/010 | Batch 400/468 | Cost: 0.0817 Epoch: 004/010 | Batch 450/468 | Cost: 0.0182 Epoch: 004/010 training accuracy: 98.70% Epoch: 005/010 | Batch 000/468 | Cost: 0.0691 Epoch: 005/010 | Batch 050/468 | Cost: 0.0326 Epoch: 005/010 | Batch 100/468 | Cost: 0.0041 Epoch: 005/010 | Batch 150/468 | Cost: 0.0774 Epoch: 005/010 | Batch 200/468 | Cost: 0.1223 Epoch: 005/010 | Batch 250/468 | Cost: 0.0329 Epoch: 005/010 | Batch 300/468 | Cost: 0.0479 Epoch: 005/010 | Batch 350/468 | Cost: 0.0696 Epoch: 005/010 | Batch 400/468 | Cost: 0.0504 Epoch: 005/010 | Batch 450/468 | Cost: 0.0736 Epoch: 005/010 training accuracy: 98.38% Epoch: 006/010 | Batch 000/468 | Cost: 0.0318 Epoch: 006/010 | Batch 050/468 | Cost: 0.0303 Epoch: 006/010 | Batch 100/468 | Cost: 0.0267 Epoch: 006/010 | Batch 150/468 | Cost: 0.0912 Epoch: 006/010 | Batch 200/468 | Cost: 0.0131 Epoch: 006/010 | Batch 250/468 | Cost: 0.0164 Epoch: 006/010 | Batch 300/468 | Cost: 0.0109 Epoch: 006/010 | Batch 350/468 | Cost: 0.0699 Epoch: 006/010 | Batch 400/468 | Cost: 0.0030 Epoch: 006/010 | Batch 450/468 | Cost: 0.0237 Epoch: 006/010 training accuracy: 98.74% Epoch: 007/010 | Batch 000/468 | Cost: 0.0214 Epoch: 007/010 | Batch 050/468 | Cost: 0.0097 Epoch: 007/010 | Batch 100/468 | Cost: 0.0292 Epoch: 007/010 | Batch 150/468 | Cost: 0.0648 Epoch: 007/010 | Batch 200/468 | Cost: 0.0044 Epoch: 007/010 | Batch 250/468 | Cost: 0.0557 Epoch: 007/010 | Batch 300/468 | Cost: 0.0139 Epoch: 007/010 | Batch 350/468 | Cost: 0.0809 Epoch: 007/010 | Batch 400/468 | Cost: 0.0285 Epoch: 007/010 | Batch 450/468 | Cost: 0.0050 Epoch: 007/010 training accuracy: 98.82% Epoch: 008/010 | Batch 000/468 | Cost: 0.0890 Epoch: 008/010 | Batch 050/468 | Cost: 0.0685 Epoch: 008/010 | Batch 100/468 | Cost: 0.0274 Epoch: 008/010 | Batch 150/468 | Cost: 0.0187 Epoch: 008/010 | Batch 200/468 | Cost: 0.0268 Epoch: 008/010 | Batch 250/468 | Cost: 0.1681 Epoch: 008/010 | Batch 300/468 | Cost: 0.0167 Epoch: 008/010 | Batch 350/468 | Cost: 0.0518 Epoch: 008/010 | Batch 400/468 | Cost: 0.0138 Epoch: 008/010 | Batch 450/468 | Cost: 0.0270 Epoch: 008/010 training accuracy: 99.08% Epoch: 009/010 | Batch 000/468 | Cost: 0.0458 Epoch: 009/010 | Batch 050/468 | Cost: 0.0039 Epoch: 009/010 | Batch 100/468 | Cost: 0.0597 Epoch: 009/010 | Batch 150/468 | Cost: 0.0120 Epoch: 009/010 | Batch 200/468 | Cost: 0.0580 Epoch: 009/010 | Batch 250/468 | Cost: 0.0280 Epoch: 009/010 | Batch 300/468 | Cost: 0.0570 Epoch: 009/010 | Batch 350/468 | Cost: 0.0831 Epoch: 009/010 | Batch 400/468 | Cost: 0.0732 Epoch: 009/010 | Batch 450/468 | Cost: 0.0327 Epoch: 009/010 training accuracy: 99.05% Epoch: 010/010 | Batch 000/468 | Cost: 0.0312 Epoch: 010/010 | Batch 050/468 | Cost: 0.0130 Epoch: 010/010 | Batch 100/468 | Cost: 0.0052 Epoch: 010/010 | Batch 150/468 | Cost: 0.0188 Epoch: 010/010 | Batch 200/468 | Cost: 0.0362 Epoch: 010/010 | Batch 250/468 | Cost: 0.1085 Epoch: 010/010 | Batch 300/468 | Cost: 0.0004 Epoch: 010/010 | Batch 350/468 | Cost: 0.0299 Epoch: 010/010 | Batch 400/468 | Cost: 0.0769 Epoch: 010/010 | Batch 450/468 | Cost: 0.0247 Epoch: 010/010 training accuracy: 98.87%
print('Test accuracy: %.2f%%' % (compute_accuracy(model, test_loader)))
Test accuracy: 97.91%
%watermark -iv
numpy 1.15.4 torch 1.0.1.post2