Deep Learning Models -- A collection of various deep learning architectures, models, and tips for TensorFlow and PyTorch in Jupyter Notebooks.
%load_ext watermark
%watermark -a 'Sebastian Raschka' -v -p torch
Author: Sebastian Raschka Python implementation: CPython Python version : 3.8.12 IPython version : 8.0.1 torch: 1.10.1
Implementation of the VGG-16 [1] architecture on the CelebA face dataset [2] to train a smile classifier.
References
The following table (taken from Simonyan & Zisserman referenced above) summarizes the VGG19 architecture:
Note that the CelebA images are 218 x 178, not 256 x 256. We resize to 128x128
NUM_WORKERS = 0
instead.BATCH_SIZE = 256
NUM_EPOCHS = 25
LEARNING_RATE = 0.001
NUM_WORKERS = 4
import os
import time
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision import transforms
import matplotlib.pyplot as plt
from PIL import Image
if torch.cuda.is_available():
torch.backends.cudnn.deterministic = True
Note that the approx. 200,000 CelebA face image dataset is relatively large (approx. 1.3 Gb). If the following automatic download below does not work (e.g., returning a BadZipFile: File is not a zip file
error), this is usually to rate limit restrictions by the provider hosting the dataset. You can try to download the dataset manually via the download link provided by the author on the official CelebA website at http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html.
Alternatively, you can download the dataset from here: https://drive.google.com/file/d/1m8-EBPgi5MRubrm6iQjafK2QMHDBMSfJ/view?.
celeba
folder with the partially downloaded files.celeba
folder should contain the following files:celeba/img_align.celeba.zip
archive inside the celeba
folderget_dataloaders_celeba
below with download=False
##########################
### Dataset
##########################
custom_transforms = transforms.Compose([
transforms.CenterCrop((160, 160)),
transforms.Resize([128, 128]),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
def get_dataloaders_celeba(batch_size, num_workers=0,
train_transforms=None,
test_transforms=None,
download=True):
if train_transforms is None:
train_transforms = transforms.ToTensor()
if test_transforms is None:
test_transforms = transforms.ToTensor()
get_smile = lambda attr: attr[31]
train_dataset = datasets.CelebA(root='.',
split='train',
transform=train_transforms,
target_type='attr',
target_transform=get_smile,
download=download)
valid_dataset = datasets.CelebA(root='.',
split='valid',
target_type='attr',
target_transform=get_smile,
transform=test_transforms)
test_dataset = datasets.CelebA(root='.',
split='test',
target_type='attr',
target_transform=get_smile,
transform=test_transforms)
train_loader = DataLoader(dataset=train_dataset,
batch_size=batch_size,
num_workers=num_workers,
shuffle=True)
valid_loader = DataLoader(dataset=test_dataset,
batch_size=batch_size,
num_workers=num_workers,
shuffle=False)
test_loader = DataLoader(dataset=test_dataset,
batch_size=batch_size,
num_workers=num_workers,
shuffle=False)
return train_loader, valid_loader, test_loader
train_loader, valid_loader, test_loader = get_dataloaders_celeba(
batch_size=BATCH_SIZE,
train_transforms=custom_transforms,
test_transforms=custom_transforms,
download=False,
num_workers=4)
Note that the target vectors of the CelebA datasets are vectors containing 40 attributes:
00 - 5_o_Clock_Shadow
01 - Arched_Eyebrows
02 - Attractive
03 - Bags_Under_Eyes
04 - Bald
05 - Bangs
06 - Big_Lips
07 - Big_Nose
08 - Black_Hair
09 - Blond_Hair
10 - Blurry
11 - Brown_Hair
12 - Bushy_Eyebrows
13 - Chubby
14 - Double_Chin
15 - Eyeglasses
16 - Goatee
17 - Gray_Hair
18 - Heavy_Makeup
19 - High_Cheekbones
20 - Male
21 - Mouth_Slightly_Open
22 - Mustache
23 - Narrow_Eyes
24 - No_Beard
25 - Oval_Face
26 - Pale_Skin
27 - Pointy_Nose
28 - Receding_Hairline
29 - Rosy_Cheeks
30 - Sideburns
31 - Smiling
32 - Straight_Hair
33 - Wavy_Hair
34 - Wearing_Earrings
35 - Wearing_Hat
36 - Wearing_Lipstick
37 - Wearing_Necklace
38 - Wearing_Necktie
39 - Young
Via the custom get_smile
function above [31], we fetched the Smiling label.
##########################
### SETTINGS
##########################
# Device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# Hyperparameters
random_seed = 1
learning_rate = 0.001
num_epochs = 3
# Architecture
num_features = 128*128
num_classes = 2
##########################
### MODEL
##########################
class VGG16(torch.nn.Module):
def __init__(self, num_features, num_classes):
super(VGG16, self).__init__()
# calculate same padding:
# (w - k + 2*p)/s + 1 = o
# => p = (s(o-1) - w + k)/2
self.block_1 = nn.Sequential(
nn.Conv2d(in_channels=3,
out_channels=64,
kernel_size=(3, 3),
stride=(1, 1),
# (1(32-1)- 32 + 3)/2 = 1
padding=1),
nn.ReLU(),
nn.Conv2d(in_channels=64,
out_channels=64,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=(2, 2),
stride=(2, 2))
)
self.block_2 = nn.Sequential(
nn.Conv2d(in_channels=64,
out_channels=128,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.ReLU(),
nn.Conv2d(in_channels=128,
out_channels=128,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=(2, 2),
stride=(2, 2))
)
self.block_3 = nn.Sequential(
nn.Conv2d(in_channels=128,
out_channels=256,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.ReLU(),
nn.Conv2d(in_channels=256,
out_channels=256,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.ReLU(),
nn.Conv2d(in_channels=256,
out_channels=256,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.ReLU(),
nn.Conv2d(in_channels=256,
out_channels=256,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=(2, 2),
stride=(2, 2))
)
self.block_4 = nn.Sequential(
nn.Conv2d(in_channels=256,
out_channels=512,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.ReLU(),
nn.Conv2d(in_channels=512,
out_channels=512,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.ReLU(),
nn.Conv2d(in_channels=512,
out_channels=512,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.ReLU(),
nn.Conv2d(in_channels=512,
out_channels=512,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=(2, 2),
stride=(2, 2))
)
self.block_5 = nn.Sequential(
nn.Conv2d(in_channels=512,
out_channels=512,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.ReLU(),
nn.Conv2d(in_channels=512,
out_channels=512,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.ReLU(),
nn.Conv2d(in_channels=512,
out_channels=512,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.ReLU(),
nn.Conv2d(in_channels=512,
out_channels=512,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=(2, 2),
stride=(2, 2))
)
self.classifier = nn.Sequential(
nn.Linear(512*4*4, 4096),
nn.ReLU(),
nn.Linear(4096, 4096),
nn.ReLU(),
nn.Linear(4096, num_classes)
)
for m in self.modules():
if isinstance(m, torch.nn.Conv2d):
#n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
#m.weight.data.normal_(0, np.sqrt(2. / n))
m.weight.detach().normal_(0, 0.05)
if m.bias is not None:
m.bias.detach().zero_()
elif isinstance(m, torch.nn.Linear):
m.weight.detach().normal_(0, 0.05)
m.bias.detach().detach().zero_()
def forward(self, x):
x = self.block_1(x)
x = self.block_2(x)
x = self.block_3(x)
x = self.block_4(x)
x = self.block_5(x)
logits = self.classifier(x.view(-1, 512*4*4))
probas = F.softmax(logits, dim=1)
return logits, probas
torch.manual_seed(random_seed)
model = VGG16(num_features=num_features,
num_classes=num_classes)
model = model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
def compute_accuracy(model, data_loader):
correct_pred, num_examples = 0, 0
for i, (features, targets) in enumerate(data_loader):
features = features.to(device)
targets = targets.to(device)
logits, probas = model(features)
_, predicted_labels = torch.max(probas, 1)
num_examples += targets.size(0)
correct_pred += (predicted_labels == targets).sum()
return correct_pred.float()/num_examples * 100
start_time = time.time()
for epoch in range(num_epochs):
model.train()
for batch_idx, (features, targets) in enumerate(train_loader):
features = features.to(device)
targets = targets.to(device)
### FORWARD AND BACK PROP
logits, probas = model(features)
cost = F.cross_entropy(logits, targets)
optimizer.zero_grad()
cost.backward()
### UPDATE MODEL PARAMETERS
optimizer.step()
### LOGGING
if not batch_idx % 50:
print ('Epoch: %03d/%03d | Batch %04d/%04d | Cost: %.4f'
%(epoch+1, num_epochs, batch_idx,
len(train_loader), cost))
model.eval()
with torch.set_grad_enabled(False): # save memory during inference
print('Epoch: %03d/%03d | Train: %.3f%% | Valid: %.3f%%' % (
epoch+1, num_epochs,
compute_accuracy(model, train_loader),
compute_accuracy(model, valid_loader)))
print('Time elapsed: %.2f min' % ((time.time() - start_time)/60))
print('Total Training Time: %.2f min' % ((time.time() - start_time)/60))
Epoch: 001/003 | Batch 0000/0636 | Cost: 5911.8281 Epoch: 001/003 | Batch 0050/0636 | Cost: 0.6971 Epoch: 001/003 | Batch 0100/0636 | Cost: 0.6431 Epoch: 001/003 | Batch 0150/0636 | Cost: 0.5499 Epoch: 001/003 | Batch 0200/0636 | Cost: 0.4563 Epoch: 001/003 | Batch 0250/0636 | Cost: 0.3367 Epoch: 001/003 | Batch 0300/0636 | Cost: 0.3535 Epoch: 001/003 | Batch 0350/0636 | Cost: 0.2126 Epoch: 001/003 | Batch 0400/0636 | Cost: 0.3210 Epoch: 001/003 | Batch 0450/0636 | Cost: 0.2475 Epoch: 001/003 | Batch 0500/0636 | Cost: 0.2211 Epoch: 001/003 | Batch 0550/0636 | Cost: 0.3318 Epoch: 001/003 | Batch 0600/0636 | Cost: 0.2987 Epoch: 001/003 | Train: 88.431% | Valid: 88.363% Time elapsed: 34.48 min Epoch: 002/003 | Batch 0000/0636 | Cost: 0.2720 Epoch: 002/003 | Batch 0050/0636 | Cost: 0.2869 Epoch: 002/003 | Batch 0100/0636 | Cost: 0.2454 Epoch: 002/003 | Batch 0150/0636 | Cost: 0.2355 Epoch: 002/003 | Batch 0200/0636 | Cost: 0.2351 Epoch: 002/003 | Batch 0250/0636 | Cost: 0.1884 Epoch: 002/003 | Batch 0300/0636 | Cost: 0.2546 Epoch: 002/003 | Batch 0350/0636 | Cost: 0.2552 Epoch: 002/003 | Batch 0400/0636 | Cost: 0.2022 Epoch: 002/003 | Batch 0450/0636 | Cost: 0.2270 Epoch: 002/003 | Batch 0500/0636 | Cost: 0.2203 Epoch: 002/003 | Batch 0550/0636 | Cost: 0.2608 Epoch: 002/003 | Batch 0600/0636 | Cost: 0.2683 Epoch: 002/003 | Train: 90.173% | Valid: 89.771% Time elapsed: 68.96 min Epoch: 003/003 | Batch 0000/0636 | Cost: 0.2337 Epoch: 003/003 | Batch 0050/0636 | Cost: 0.1952 Epoch: 003/003 | Batch 0100/0636 | Cost: 0.2413 Epoch: 003/003 | Batch 0150/0636 | Cost: 0.2554 Epoch: 003/003 | Batch 0200/0636 | Cost: 0.1685 Epoch: 003/003 | Batch 0250/0636 | Cost: 0.2230 Epoch: 003/003 | Batch 0300/0636 | Cost: 0.2488 Epoch: 003/003 | Batch 0350/0636 | Cost: 0.2356 Epoch: 003/003 | Batch 0400/0636 | Cost: 0.1960 Epoch: 003/003 | Batch 0450/0636 | Cost: 0.2415 Epoch: 003/003 | Batch 0500/0636 | Cost: 0.2152 Epoch: 003/003 | Batch 0550/0636 | Cost: 0.2269 Epoch: 003/003 | Batch 0600/0636 | Cost: 0.2204 Epoch: 003/003 | Train: 90.878% | Valid: 90.512% Time elapsed: 104.14 min Total Training Time: 104.14 min
with torch.set_grad_enabled(False): # save memory during inference
print('Test accuracy: %.2f%%' % (compute_accuracy(model, test_loader)))
Test accuracy: 90.51%
from helper_data import UnNormalize
for batch_idx, (features, targets) in enumerate(test_loader):
features = features
targets = targets
break
unnormalizer = UnNormalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
plt.imshow(np.transpose(unnormalizer(features[3]), (1, 2, 0)))
<matplotlib.image.AxesImage at 0x7f8d8ab00f40>
model.eval()
logits, probas = model(features.to(device)[3, None])
print('Probability Smile %.2f%%' % (probas[0][1]*100))
Probability Smile 95.69%
%watermark -iv
torchvision: 0.11.2 matplotlib : 3.3.4 PIL : 9.0.1 pandas : 1.2.5 torch : 1.10.1 numpy : 1.22.2