Deep Learning Models -- A collection of various deep learning architectures, models, and tips for TensorFlow and PyTorch in Jupyter Notebooks.
%load_ext watermark
%watermark -a 'Sebastian Raschka' -v -p torch
Sebastian Raschka CPython 3.7.3 IPython 7.9.0 torch 1.3.1
This notebook implements the classic LeNet-5 convolutional network [1] and applies it to MNIST digit classification. The basic architecture is shown in the figure below:
LeNet-5 is commonly regarded as the pioneer of convolutional neural networks, consisting of a very simple architecture (by modern standards). In total, LeNet-5 consists of only 7 layers. 3 out of these 7 layers are convolutional layers (C1, C3, C5), which are connected by two average pooling layers (S2 & S4). The penultimate layer is a fully connexted layer (F6), which is followed by the final output layer. The additional details are summarized below:
achieve error rate below 1% on the MNIST data set, which was very close to the state of the art at the time (produced by a boosted ensemble of three LeNet-4 networks).
import os
import time
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from torchvision import transforms
import matplotlib.pyplot as plt
from PIL import Image
if torch.cuda.is_available():
torch.backends.cudnn.deterministic = True
##########################
### SETTINGS
##########################
# Hyperparameters
RANDOM_SEED = 1
LEARNING_RATE = 0.001
BATCH_SIZE = 128
NUM_EPOCHS = 10
# Architecture
NUM_FEATURES = 28*28
NUM_CLASSES = 10
# Other
DEVICE = "cuda:1"
GRAYSCALE = True
This notebook is based on Google's Quickdraw dataset (https://quickdraw.withgoogle.com). In particular we will be working with an arbitrary subset of 10 categories in png format:
label_dict = {
"lollipop": 0,
"binoculars": 1,
"mouse": 2,
"basket": 3,
"penguin": 4,
"washing machine": 5,
"canoe": 6,
"eyeglasses": 7,
"beach": 8,
"screwdriver": 9,
}
(The class labels 0-9 can be ignored in this notebook).
For more details on obtaining and preparing the dataset, please see the
notebook.
df = pd.read_csv('quickdraw_png_set1_train.csv', index_col=0)
df.head()
main_dir = 'quickdraw-png_set1/'
img = Image.open(os.path.join(main_dir, df.index[99]))
img = np.asarray(img, dtype=np.uint8)
print(img.shape)
plt.imshow(np.array(img), cmap='binary')
plt.show()
(28, 28)
class QuickdrawDataset(Dataset):
"""Custom Dataset for loading Quickdraw images"""
def __init__(self, txt_path, img_dir, transform=None):
df = pd.read_csv(txt_path, sep=",", index_col=0)
self.img_dir = img_dir
self.txt_path = txt_path
self.img_names = df.index.values
self.y = df['Label'].values
self.transform = transform
def __getitem__(self, index):
img = Image.open(os.path.join(self.img_dir,
self.img_names[index]))
if self.transform is not None:
img = self.transform(img)
label = self.y[index]
return img, label
def __len__(self):
return self.y.shape[0]
# Note that transforms.ToTensor()
# already divides pixels by 255. internally
BATCH_SIZE = 128
custom_transform = transforms.Compose([#transforms.Lambda(lambda x: x/255.),
transforms.ToTensor()])
train_dataset = QuickdrawDataset(txt_path='quickdraw_png_set1_train.csv',
img_dir='quickdraw-png_set1/',
transform=custom_transform)
train_loader = DataLoader(dataset=train_dataset,
batch_size=BATCH_SIZE,
shuffle=True,
num_workers=4)
valid_dataset = QuickdrawDataset(txt_path='quickdraw_png_set1_valid.csv',
img_dir='quickdraw-png_set1/',
transform=custom_transform)
valid_loader = DataLoader(dataset=valid_dataset,
batch_size=BATCH_SIZE,
shuffle=False,
num_workers=4)
test_dataset = QuickdrawDataset(txt_path='quickdraw_png_set1_train.csv',
img_dir='quickdraw-png_set1/',
transform=custom_transform)
test_loader = DataLoader(dataset=test_dataset,
batch_size=BATCH_SIZE,
shuffle=False,
num_workers=4)
device = torch.device(DEVICE if torch.cuda.is_available() else "cpu")
torch.manual_seed(0)
num_epochs = 2
for epoch in range(num_epochs):
for batch_idx, (x, y) in enumerate(train_loader):
print('Epoch:', epoch+1, end='')
print(' | Batch index:', batch_idx, end='')
print(' | Batch size:', y.size()[0])
x = x.to(device)
y = y.to(device)
break
Epoch: 1 | Batch index: 0 | Batch size: 128 Epoch: 2 | Batch index: 0 | Batch size: 128
##########################
### MODEL
##########################
class LeNet5(nn.Module):
def __init__(self, num_classes, grayscale=False):
super(LeNet5, self).__init__()
self.grayscale = grayscale
self.num_classes = num_classes
if self.grayscale:
in_channels = 1
else:
in_channels = 3
self.features = nn.Sequential(
nn.Conv2d(in_channels, 6, kernel_size=5),
nn.Tanh(),
nn.MaxPool2d(kernel_size=2),
nn.Conv2d(6, 16, kernel_size=5),
nn.Tanh(),
nn.MaxPool2d(kernel_size=2)
)
self.classifier = nn.Sequential(
nn.Linear(16*4*4, 120),
nn.Tanh(),
nn.Linear(120, 84),
nn.Tanh(),
nn.Linear(84, num_classes),
)
def forward(self, x):
x = self.features(x)
x = torch.flatten(x, 1)
logits = self.classifier(x)
probas = F.softmax(logits, dim=1)
return logits, probas
torch.manual_seed(RANDOM_SEED)
model = LeNet5(NUM_CLASSES, GRAYSCALE)
model = model.to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
def print_sizes(self, input, output):
print('Inside ' + self.__class__.__name__ + ' forward')
print('input size:', input[0].size())
print('output size:', output.data.size())
## Debugging
"""
model.features[0].register_forward_hook(print_sizes)
model.features[1].register_forward_hook(print_sizes)
model.features[2].register_forward_hook(print_sizes)
model.features[3].register_forward_hook(print_sizes)
model.classifier[0].register_forward_hook(print_sizes)
model.classifier[1].register_forward_hook(print_sizes)
model.classifier[2].register_forward_hook(print_sizes)
"""
'\nmodel.features[0].register_forward_hook(print_sizes)\nmodel.features[1].register_forward_hook(print_sizes)\nmodel.features[2].register_forward_hook(print_sizes)\nmodel.features[3].register_forward_hook(print_sizes)\n\nmodel.classifier[0].register_forward_hook(print_sizes)\nmodel.classifier[1].register_forward_hook(print_sizes)\nmodel.classifier[2].register_forward_hook(print_sizes)\n'
def compute_accuracy(model, data_loader, device):
correct_pred, num_examples = 0, 0
for i, (features, targets) in enumerate(data_loader):
features = features.to(device)
targets = targets.to(device)
logits, probas = model(features)
_, predicted_labels = torch.max(probas, 1)
num_examples += targets.size(0)
correct_pred += (predicted_labels == targets).sum()
return correct_pred.float()/num_examples * 100
start_time = time.time()
for epoch in range(NUM_EPOCHS):
model.train()
for batch_idx, (features, targets) in enumerate(train_loader):
features = features.to(DEVICE)
targets = targets.to(DEVICE)
### FORWARD AND BACK PROP
logits, probas = model(features)
cost = F.cross_entropy(logits, targets)
optimizer.zero_grad()
cost.backward()
### UPDATE MODEL PARAMETERS
optimizer.step()
### LOGGING
if not batch_idx % 500:
print ('Epoch: %03d/%03d | Batch %04d/%04d | Cost: %.4f'
%(epoch+1, NUM_EPOCHS, batch_idx,
len(train_loader), cost))
model.eval()
with torch.set_grad_enabled(False): # save memory during inference
print('Epoch: %03d/%03d | Train: %.3f%% | Validation: %.3f%%' % (
epoch+1, NUM_EPOCHS,
compute_accuracy(model, train_loader, device=DEVICE),
compute_accuracy(model, valid_loader, device=DEVICE) ))
print('Time elapsed: %.2f min' % ((time.time() - start_time)/60))
print('Total Training Time: %.2f min' % ((time.time() - start_time)/60))
Epoch: 001/010 | Batch 0000/8290 | Cost: 2.3096 Epoch: 001/010 | Batch 0500/8290 | Cost: 0.6812 Epoch: 001/010 | Batch 1000/8290 | Cost: 0.4123 Epoch: 001/010 | Batch 1500/8290 | Cost: 0.2931 Epoch: 001/010 | Batch 2000/8290 | Cost: 0.3878 Epoch: 001/010 | Batch 2500/8290 | Cost: 0.2494 Epoch: 001/010 | Batch 3000/8290 | Cost: 0.3749 Epoch: 001/010 | Batch 3500/8290 | Cost: 0.3393 Epoch: 001/010 | Batch 4000/8290 | Cost: 0.4072 Epoch: 001/010 | Batch 4500/8290 | Cost: 0.2639 Epoch: 001/010 | Batch 5000/8290 | Cost: 0.4709 Epoch: 001/010 | Batch 5500/8290 | Cost: 0.3594 Epoch: 001/010 | Batch 6000/8290 | Cost: 0.4542 Epoch: 001/010 | Batch 6500/8290 | Cost: 0.2887 Epoch: 001/010 | Batch 7000/8290 | Cost: 0.3441 Epoch: 001/010 | Batch 7500/8290 | Cost: 0.2771 Epoch: 001/010 | Batch 8000/8290 | Cost: 0.4163 Epoch: 001/010 | Train: 90.619% | Validation: 90.339% Time elapsed: 3.36 min Epoch: 002/010 | Batch 0000/8290 | Cost: 0.3799 Epoch: 002/010 | Batch 0500/8290 | Cost: 0.2720 Epoch: 002/010 | Batch 1000/8290 | Cost: 0.3350 Epoch: 002/010 | Batch 1500/8290 | Cost: 0.3859 Epoch: 002/010 | Batch 2000/8290 | Cost: 0.2861 Epoch: 002/010 | Batch 2500/8290 | Cost: 0.4202 Epoch: 002/010 | Batch 3000/8290 | Cost: 0.3077 Epoch: 002/010 | Batch 3500/8290 | Cost: 0.3045 Epoch: 002/010 | Batch 4000/8290 | Cost: 0.1604 Epoch: 002/010 | Batch 4500/8290 | Cost: 0.2022 Epoch: 002/010 | Batch 5000/8290 | Cost: 0.2315 Epoch: 002/010 | Batch 5500/8290 | Cost: 0.2880 Epoch: 002/010 | Batch 6000/8290 | Cost: 0.2055 Epoch: 002/010 | Batch 6500/8290 | Cost: 0.5247 Epoch: 002/010 | Batch 7000/8290 | Cost: 0.4131 Epoch: 002/010 | Batch 7500/8290 | Cost: 0.2302 Epoch: 002/010 | Batch 8000/8290 | Cost: 0.2234 Epoch: 002/010 | Train: 91.823% | Validation: 91.486% Time elapsed: 5.58 min Epoch: 003/010 | Batch 0000/8290 | Cost: 0.3333 Epoch: 003/010 | Batch 0500/8290 | Cost: 0.3250 Epoch: 003/010 | Batch 1000/8290 | Cost: 0.2323 Epoch: 003/010 | Batch 1500/8290 | Cost: 0.2834 Epoch: 003/010 | Batch 2000/8290 | Cost: 0.3315 Epoch: 003/010 | Batch 2500/8290 | Cost: 0.3029 Epoch: 003/010 | Batch 3000/8290 | Cost: 0.2193 Epoch: 003/010 | Batch 3500/8290 | Cost: 0.1904 Epoch: 003/010 | Batch 4000/8290 | Cost: 0.2865 Epoch: 003/010 | Batch 4500/8290 | Cost: 0.2746 Epoch: 003/010 | Batch 5000/8290 | Cost: 0.3442 Epoch: 003/010 | Batch 5500/8290 | Cost: 0.2003 Epoch: 003/010 | Batch 6000/8290 | Cost: 0.3828 Epoch: 003/010 | Batch 6500/8290 | Cost: 0.2139 Epoch: 003/010 | Batch 7000/8290 | Cost: 0.2914 Epoch: 003/010 | Batch 7500/8290 | Cost: 0.2799 Epoch: 003/010 | Batch 8000/8290 | Cost: 0.2144 Epoch: 003/010 | Train: 92.152% | Validation: 91.699% Time elapsed: 7.79 min Epoch: 004/010 | Batch 0000/8290 | Cost: 0.1746 Epoch: 004/010 | Batch 0500/8290 | Cost: 0.3684 Epoch: 004/010 | Batch 1000/8290 | Cost: 0.3992 Epoch: 004/010 | Batch 1500/8290 | Cost: 0.3352 Epoch: 004/010 | Batch 2000/8290 | Cost: 0.2877 Epoch: 004/010 | Batch 2500/8290 | Cost: 0.2366 Epoch: 004/010 | Batch 3000/8290 | Cost: 0.3215 Epoch: 004/010 | Batch 3500/8290 | Cost: 0.1784 Epoch: 004/010 | Batch 4000/8290 | Cost: 0.3136 Epoch: 004/010 | Batch 4500/8290 | Cost: 0.3379 Epoch: 004/010 | Batch 5000/8290 | Cost: 0.3069 Epoch: 004/010 | Batch 5500/8290 | Cost: 0.1735 Epoch: 004/010 | Batch 6000/8290 | Cost: 0.1910 Epoch: 004/010 | Batch 6500/8290 | Cost: 0.3131 Epoch: 004/010 | Batch 7000/8290 | Cost: 0.2566 Epoch: 004/010 | Batch 7500/8290 | Cost: 0.2888 Epoch: 004/010 | Batch 8000/8290 | Cost: 0.3298 Epoch: 004/010 | Train: 92.251% | Validation: 91.693% Time elapsed: 10.01 min Epoch: 005/010 | Batch 0000/8290 | Cost: 0.2621 Epoch: 005/010 | Batch 0500/8290 | Cost: 0.1341 Epoch: 005/010 | Batch 1000/8290 | Cost: 0.2740 Epoch: 005/010 | Batch 1500/8290 | Cost: 0.2190 Epoch: 005/010 | Batch 2000/8290 | Cost: 0.2355 Epoch: 005/010 | Batch 2500/8290 | Cost: 0.2771 Epoch: 005/010 | Batch 3000/8290 | Cost: 0.3470 Epoch: 005/010 | Batch 3500/8290 | Cost: 0.1613 Epoch: 005/010 | Batch 4000/8290 | Cost: 0.3326 Epoch: 005/010 | Batch 4500/8290 | Cost: 0.2114 Epoch: 005/010 | Batch 5000/8290 | Cost: 0.3249 Epoch: 005/010 | Batch 5500/8290 | Cost: 0.2614 Epoch: 005/010 | Batch 6000/8290 | Cost: 0.2974 Epoch: 005/010 | Batch 6500/8290 | Cost: 0.2653 Epoch: 005/010 | Batch 7000/8290 | Cost: 0.1659 Epoch: 005/010 | Batch 7500/8290 | Cost: 0.3587 Epoch: 005/010 | Batch 8000/8290 | Cost: 0.1271 Epoch: 005/010 | Train: 92.575% | Validation: 91.995% Time elapsed: 12.21 min Epoch: 006/010 | Batch 0000/8290 | Cost: 0.1457 Epoch: 006/010 | Batch 0500/8290 | Cost: 0.2908 Epoch: 006/010 | Batch 1000/8290 | Cost: 0.3151 Epoch: 006/010 | Batch 1500/8290 | Cost: 0.3322 Epoch: 006/010 | Batch 2000/8290 | Cost: 0.2056 Epoch: 006/010 | Batch 2500/8290 | Cost: 0.2625 Epoch: 006/010 | Batch 3000/8290 | Cost: 0.2600 Epoch: 006/010 | Batch 3500/8290 | Cost: 0.3253 Epoch: 006/010 | Batch 4000/8290 | Cost: 0.1884 Epoch: 006/010 | Batch 4500/8290 | Cost: 0.2553 Epoch: 006/010 | Batch 5000/8290 | Cost: 0.3106 Epoch: 006/010 | Batch 5500/8290 | Cost: 0.1887 Epoch: 006/010 | Batch 6000/8290 | Cost: 0.2765 Epoch: 006/010 | Batch 6500/8290 | Cost: 0.1896 Epoch: 006/010 | Batch 7000/8290 | Cost: 0.2351 Epoch: 006/010 | Batch 7500/8290 | Cost: 0.1942 Epoch: 006/010 | Batch 8000/8290 | Cost: 0.2452 Epoch: 006/010 | Train: 92.768% | Validation: 92.084% Time elapsed: 14.44 min Epoch: 007/010 | Batch 0000/8290 | Cost: 0.2731 Epoch: 007/010 | Batch 0500/8290 | Cost: 0.1256 Epoch: 007/010 | Batch 1000/8290 | Cost: 0.2282 Epoch: 007/010 | Batch 1500/8290 | Cost: 0.2288 Epoch: 007/010 | Batch 2000/8290 | Cost: 0.1315 Epoch: 007/010 | Batch 2500/8290 | Cost: 0.2518 Epoch: 007/010 | Batch 3000/8290 | Cost: 0.3285 Epoch: 007/010 | Batch 3500/8290 | Cost: 0.2102 Epoch: 007/010 | Batch 4000/8290 | Cost: 0.1955 Epoch: 007/010 | Batch 4500/8290 | Cost: 0.1690 Epoch: 007/010 | Batch 5000/8290 | Cost: 0.1595 Epoch: 007/010 | Batch 5500/8290 | Cost: 0.2186 Epoch: 007/010 | Batch 6000/8290 | Cost: 0.2465 Epoch: 007/010 | Batch 6500/8290 | Cost: 0.2922 Epoch: 007/010 | Batch 7000/8290 | Cost: 0.2836 Epoch: 007/010 | Batch 7500/8290 | Cost: 0.1863 Epoch: 007/010 | Batch 8000/8290 | Cost: 0.1654 Epoch: 007/010 | Train: 92.966% | Validation: 92.307% Time elapsed: 16.70 min Epoch: 008/010 | Batch 0000/8290 | Cost: 0.2479 Epoch: 008/010 | Batch 0500/8290 | Cost: 0.2505 Epoch: 008/010 | Batch 1000/8290 | Cost: 0.3280 Epoch: 008/010 | Batch 1500/8290 | Cost: 0.3119 Epoch: 008/010 | Batch 2000/8290 | Cost: 0.3892 Epoch: 008/010 | Batch 2500/8290 | Cost: 0.3371 Epoch: 008/010 | Batch 3000/8290 | Cost: 0.3909 Epoch: 008/010 | Batch 3500/8290 | Cost: 0.2831 Epoch: 008/010 | Batch 4000/8290 | Cost: 0.2730 Epoch: 008/010 | Batch 4500/8290 | Cost: 0.1258 Epoch: 008/010 | Batch 5000/8290 | Cost: 0.2155 Epoch: 008/010 | Batch 5500/8290 | Cost: 0.2419 Epoch: 008/010 | Batch 6000/8290 | Cost: 0.2309 Epoch: 008/010 | Batch 6500/8290 | Cost: 0.2843 Epoch: 008/010 | Batch 7000/8290 | Cost: 0.2820 Epoch: 008/010 | Batch 7500/8290 | Cost: 0.1245 Epoch: 008/010 | Batch 8000/8290 | Cost: 0.3503 Epoch: 008/010 | Train: 92.978% | Validation: 92.270% Time elapsed: 18.94 min Epoch: 009/010 | Batch 0000/8290 | Cost: 0.2116 Epoch: 009/010 | Batch 0500/8290 | Cost: 0.3477 Epoch: 009/010 | Batch 1000/8290 | Cost: 0.1537 Epoch: 009/010 | Batch 1500/8290 | Cost: 0.2932 Epoch: 009/010 | Batch 2000/8290 | Cost: 0.2075 Epoch: 009/010 | Batch 2500/8290 | Cost: 0.2520 Epoch: 009/010 | Batch 3000/8290 | Cost: 0.1347 Epoch: 009/010 | Batch 3500/8290 | Cost: 0.1800 Epoch: 009/010 | Batch 4000/8290 | Cost: 0.2365 Epoch: 009/010 | Batch 4500/8290 | Cost: 0.2445 Epoch: 009/010 | Batch 5000/8290 | Cost: 0.1622 Epoch: 009/010 | Batch 5500/8290 | Cost: 0.1989 Epoch: 009/010 | Batch 6000/8290 | Cost: 0.1404 Epoch: 009/010 | Batch 6500/8290 | Cost: 0.1281 Epoch: 009/010 | Batch 7000/8290 | Cost: 0.3659 Epoch: 009/010 | Batch 7500/8290 | Cost: 0.2559 Epoch: 009/010 | Batch 8000/8290 | Cost: 0.2351 Epoch: 009/010 | Train: 93.070% | Validation: 92.308% Time elapsed: 21.15 min Epoch: 010/010 | Batch 0000/8290 | Cost: 0.1964 Epoch: 010/010 | Batch 0500/8290 | Cost: 0.1686 Epoch: 010/010 | Batch 1000/8290 | Cost: 0.2819 Epoch: 010/010 | Batch 1500/8290 | Cost: 0.1610 Epoch: 010/010 | Batch 2000/8290 | Cost: 0.1473 Epoch: 010/010 | Batch 2500/8290 | Cost: 0.2996 Epoch: 010/010 | Batch 3000/8290 | Cost: 0.2584 Epoch: 010/010 | Batch 3500/8290 | Cost: 0.3147 Epoch: 010/010 | Batch 4000/8290 | Cost: 0.1333 Epoch: 010/010 | Batch 4500/8290 | Cost: 0.2588 Epoch: 010/010 | Batch 5000/8290 | Cost: 0.1896 Epoch: 010/010 | Batch 5500/8290 | Cost: 0.3248 Epoch: 010/010 | Batch 6000/8290 | Cost: 0.3710 Epoch: 010/010 | Batch 6500/8290 | Cost: 0.3223 Epoch: 010/010 | Batch 7000/8290 | Cost: 0.1774 Epoch: 010/010 | Batch 7500/8290 | Cost: 0.3240 Epoch: 010/010 | Batch 8000/8290 | Cost: 0.2755 Epoch: 010/010 | Train: 93.128% | Validation: 92.364% Time elapsed: 23.37 min Total Training Time: 23.37 min
with torch.set_grad_enabled(False): # save memory during inference
print('Test accuracy: %.2f%%' % (compute_accuracy(model, test_loader, device=DEVICE)))
Test accuracy: 93.13%
for batch_idx, (features, targets) in enumerate(test_loader):
features = features
targets = targets
break
nhwc_img = np.transpose(features[5], axes=(1, 2, 0))
nhw_img = np.squeeze(nhwc_img.numpy(), axis=2)
plt.imshow(nhw_img, cmap='Greys');
model.eval()
logits, probas = model(features.to(device)[0, None])
print('Probability Washing Machine %.2f%%' % (probas[0][4]*100))
Probability Washing Machine 99.83%
%watermark -iv
torch 1.3.1 numpy 1.17.4 PIL.Image 6.2.1 torchvision 0.4.2 matplotlib 3.1.0 pandas 0.24.2