Deep Learning Models -- A collection of various deep learning architectures, models, and tips for TensorFlow and PyTorch in Jupyter Notebooks.
%load_ext watermark
%watermark -a 'Sebastian Raschka' -v -p torch
Sebastian Raschka CPython 3.6.8 IPython 7.2.0 torch 1.0.0
import time
import numpy as np
from torchvision import datasets
from torchvision import transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch
if torch.cuda.is_available():
torch.backends.cudnn.deterministic = True
##########################
### SETTINGS
##########################
# Device
device = torch.device("cuda:3" if torch.cuda.is_available() else "cpu")
# Hyperparameters
random_seed = 1
learning_rate = 0.1
num_epochs = 10
batch_size = 64
# Architecture
num_features = 784
num_hidden_1 = 128
num_hidden_2 = 256
num_classes = 10
##########################
### MNIST DATASET
##########################
# Note transforms.ToTensor() scales input images
# to 0-1 range
train_dataset = datasets.MNIST(root='data',
train=True,
transform=transforms.ToTensor(),
download=True)
test_dataset = datasets.MNIST(root='data',
train=False,
transform=transforms.ToTensor())
train_loader = DataLoader(dataset=train_dataset,
batch_size=batch_size,
shuffle=True)
test_loader = DataLoader(dataset=test_dataset,
batch_size=batch_size,
shuffle=False)
# Checking the dataset
for images, labels in train_loader:
print('Image batch dimensions:', images.shape)
print('Image label dimensions:', labels.shape)
break
Image batch dimensions: torch.Size([64, 1, 28, 28]) Image label dimensions: torch.Size([64])
##########################
### MODEL
##########################
class MultilayerPerceptron(torch.nn.Module):
def __init__(self, num_features, num_classes):
super(MultilayerPerceptron, self).__init__()
self.net = torch.nn.Sequential(
torch.nn.Linear(num_features, num_hidden_1),
torch.nn.ReLU(inplace=True),
torch.nn.Linear(num_hidden_1, num_hidden_2),
torch.nn.ReLU(inplace=True),
torch.nn.Linear(num_hidden_2, num_classes)
)
def forward(self, x):
logits = self.net(x)
probas = F.log_softmax(logits, dim=1)
return logits, probas
torch.manual_seed(random_seed)
model = MultilayerPerceptron(num_features=num_features,
num_classes=num_classes)
model = model.to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
def compute_accuracy(net, data_loader):
net.eval()
correct_pred, num_examples = 0, 0
with torch.no_grad():
for features, targets in data_loader:
features = features.view(-1, 28*28).to(device)
targets = targets.to(device)
logits, probas = net(features)
_, predicted_labels = torch.max(probas, 1)
num_examples += targets.size(0)
correct_pred += (predicted_labels == targets).sum()
return correct_pred.float()/num_examples * 100
start_time = time.time()
for epoch in range(num_epochs):
model.train()
for batch_idx, (features, targets) in enumerate(train_loader):
features = features.view(-1, 28*28).to(device)
targets = targets.to(device)
### FORWARD AND BACK PROP
logits, probas = model(features)
cost = F.cross_entropy(logits, targets)
optimizer.zero_grad()
cost.backward()
### UPDATE MODEL PARAMETERS
optimizer.step()
### LOGGING
if not batch_idx % 50:
print ('Epoch: %03d/%03d | Batch %03d/%03d | Cost: %.4f'
%(epoch+1, num_epochs, batch_idx,
len(train_loader), cost))
with torch.set_grad_enabled(False):
print('Epoch: %03d/%03d training accuracy: %.2f%%' % (
epoch+1, num_epochs,
compute_accuracy(model, train_loader)))
print('Time elapsed: %.2f min' % ((time.time() - start_time)/60))
print('Total Training Time: %.2f min' % ((time.time() - start_time)/60))
Epoch: 001/010 | Batch 000/938 | Cost: 2.3075 Epoch: 001/010 | Batch 050/938 | Cost: 1.7222 Epoch: 001/010 | Batch 100/938 | Cost: 0.7172 Epoch: 001/010 | Batch 150/938 | Cost: 0.5022 Epoch: 001/010 | Batch 200/938 | Cost: 0.3913 Epoch: 001/010 | Batch 250/938 | Cost: 0.4786 Epoch: 001/010 | Batch 300/938 | Cost: 0.4177 Epoch: 001/010 | Batch 350/938 | Cost: 0.1392 Epoch: 001/010 | Batch 400/938 | Cost: 0.2752 Epoch: 001/010 | Batch 450/938 | Cost: 0.2991 Epoch: 001/010 | Batch 500/938 | Cost: 0.3828 Epoch: 001/010 | Batch 550/938 | Cost: 0.2604 Epoch: 001/010 | Batch 600/938 | Cost: 0.2135 Epoch: 001/010 | Batch 650/938 | Cost: 0.3743 Epoch: 001/010 | Batch 700/938 | Cost: 0.1834 Epoch: 001/010 | Batch 750/938 | Cost: 0.2983 Epoch: 001/010 | Batch 800/938 | Cost: 0.1182 Epoch: 001/010 | Batch 850/938 | Cost: 0.1066 Epoch: 001/010 | Batch 900/938 | Cost: 0.3104 Epoch: 001/010 training accuracy: 92.76% Time elapsed: 0.21 min Epoch: 002/010 | Batch 000/938 | Cost: 0.2066 Epoch: 002/010 | Batch 050/938 | Cost: 0.1977 Epoch: 002/010 | Batch 100/938 | Cost: 0.1766 Epoch: 002/010 | Batch 150/938 | Cost: 0.3247 Epoch: 002/010 | Batch 200/938 | Cost: 0.2914 Epoch: 002/010 | Batch 250/938 | Cost: 0.3427 Epoch: 002/010 | Batch 300/938 | Cost: 0.0698 Epoch: 002/010 | Batch 350/938 | Cost: 0.1212 Epoch: 002/010 | Batch 400/938 | Cost: 0.1271 Epoch: 002/010 | Batch 450/938 | Cost: 0.1743 Epoch: 002/010 | Batch 500/938 | Cost: 0.0741 Epoch: 002/010 | Batch 550/938 | Cost: 0.2402 Epoch: 002/010 | Batch 600/938 | Cost: 0.2013 Epoch: 002/010 | Batch 650/938 | Cost: 0.1400 Epoch: 002/010 | Batch 700/938 | Cost: 0.1400 Epoch: 002/010 | Batch 750/938 | Cost: 0.4599 Epoch: 002/010 | Batch 800/938 | Cost: 0.2720 Epoch: 002/010 | Batch 850/938 | Cost: 0.1009 Epoch: 002/010 | Batch 900/938 | Cost: 0.2104 Epoch: 002/010 training accuracy: 95.64% Time elapsed: 0.42 min Epoch: 003/010 | Batch 000/938 | Cost: 0.1309 Epoch: 003/010 | Batch 050/938 | Cost: 0.0440 Epoch: 003/010 | Batch 100/938 | Cost: 0.0876 Epoch: 003/010 | Batch 150/938 | Cost: 0.1927 Epoch: 003/010 | Batch 200/938 | Cost: 0.1592 Epoch: 003/010 | Batch 250/938 | Cost: 0.1010 Epoch: 003/010 | Batch 300/938 | Cost: 0.1311 Epoch: 003/010 | Batch 350/938 | Cost: 0.2633 Epoch: 003/010 | Batch 400/938 | Cost: 0.2272 Epoch: 003/010 | Batch 450/938 | Cost: 0.2475 Epoch: 003/010 | Batch 500/938 | Cost: 0.1742 Epoch: 003/010 | Batch 550/938 | Cost: 0.0937 Epoch: 003/010 | Batch 600/938 | Cost: 0.2019 Epoch: 003/010 | Batch 650/938 | Cost: 0.1171 Epoch: 003/010 | Batch 700/938 | Cost: 0.1200 Epoch: 003/010 | Batch 750/938 | Cost: 0.1760 Epoch: 003/010 | Batch 800/938 | Cost: 0.0595 Epoch: 003/010 | Batch 850/938 | Cost: 0.1174 Epoch: 003/010 | Batch 900/938 | Cost: 0.1585 Epoch: 003/010 training accuracy: 96.93% Time elapsed: 0.63 min Epoch: 004/010 | Batch 000/938 | Cost: 0.0601 Epoch: 004/010 | Batch 050/938 | Cost: 0.0644 Epoch: 004/010 | Batch 100/938 | Cost: 0.1762 Epoch: 004/010 | Batch 150/938 | Cost: 0.2237 Epoch: 004/010 | Batch 200/938 | Cost: 0.0488 Epoch: 004/010 | Batch 250/938 | Cost: 0.0304 Epoch: 004/010 | Batch 300/938 | Cost: 0.1097 Epoch: 004/010 | Batch 350/938 | Cost: 0.1154 Epoch: 004/010 | Batch 400/938 | Cost: 0.2170 Epoch: 004/010 | Batch 450/938 | Cost: 0.0193 Epoch: 004/010 | Batch 500/938 | Cost: 0.0457 Epoch: 004/010 | Batch 550/938 | Cost: 0.0845 Epoch: 004/010 | Batch 600/938 | Cost: 0.0482 Epoch: 004/010 | Batch 650/938 | Cost: 0.0267 Epoch: 004/010 | Batch 700/938 | Cost: 0.1988 Epoch: 004/010 | Batch 750/938 | Cost: 0.0505 Epoch: 004/010 | Batch 800/938 | Cost: 0.2189 Epoch: 004/010 | Batch 850/938 | Cost: 0.0378 Epoch: 004/010 | Batch 900/938 | Cost: 0.1241 Epoch: 004/010 training accuracy: 97.57% Time elapsed: 0.84 min Epoch: 005/010 | Batch 000/938 | Cost: 0.0834 Epoch: 005/010 | Batch 050/938 | Cost: 0.1044 Epoch: 005/010 | Batch 100/938 | Cost: 0.0275 Epoch: 005/010 | Batch 150/938 | Cost: 0.0497 Epoch: 005/010 | Batch 200/938 | Cost: 0.1309 Epoch: 005/010 | Batch 250/938 | Cost: 0.1043 Epoch: 005/010 | Batch 300/938 | Cost: 0.0290 Epoch: 005/010 | Batch 350/938 | Cost: 0.0926 Epoch: 005/010 | Batch 400/938 | Cost: 0.0186 Epoch: 005/010 | Batch 450/938 | Cost: 0.1377 Epoch: 005/010 | Batch 500/938 | Cost: 0.0227 Epoch: 005/010 | Batch 550/938 | Cost: 0.0664 Epoch: 005/010 | Batch 600/938 | Cost: 0.0825 Epoch: 005/010 | Batch 650/938 | Cost: 0.0761 Epoch: 005/010 | Batch 700/938 | Cost: 0.0321 Epoch: 005/010 | Batch 750/938 | Cost: 0.0946 Epoch: 005/010 | Batch 800/938 | Cost: 0.0219 Epoch: 005/010 | Batch 850/938 | Cost: 0.0287 Epoch: 005/010 | Batch 900/938 | Cost: 0.1176 Epoch: 005/010 training accuracy: 97.35% Time elapsed: 1.04 min Epoch: 006/010 | Batch 000/938 | Cost: 0.0235 Epoch: 006/010 | Batch 050/938 | Cost: 0.0705 Epoch: 006/010 | Batch 100/938 | Cost: 0.0529 Epoch: 006/010 | Batch 150/938 | Cost: 0.0226 Epoch: 006/010 | Batch 200/938 | Cost: 0.0313 Epoch: 006/010 | Batch 250/938 | Cost: 0.0317 Epoch: 006/010 | Batch 300/938 | Cost: 0.0393 Epoch: 006/010 | Batch 350/938 | Cost: 0.0317 Epoch: 006/010 | Batch 400/938 | Cost: 0.0341 Epoch: 006/010 | Batch 450/938 | Cost: 0.0347 Epoch: 006/010 | Batch 500/938 | Cost: 0.0658 Epoch: 006/010 | Batch 550/938 | Cost: 0.0223 Epoch: 006/010 | Batch 600/938 | Cost: 0.1233 Epoch: 006/010 | Batch 650/938 | Cost: 0.0330 Epoch: 006/010 | Batch 700/938 | Cost: 0.0122 Epoch: 006/010 | Batch 750/938 | Cost: 0.0728 Epoch: 006/010 | Batch 800/938 | Cost: 0.1590 Epoch: 006/010 | Batch 850/938 | Cost: 0.0982 Epoch: 006/010 | Batch 900/938 | Cost: 0.0298 Epoch: 006/010 training accuracy: 98.46% Time elapsed: 1.25 min Epoch: 007/010 | Batch 000/938 | Cost: 0.0400 Epoch: 007/010 | Batch 050/938 | Cost: 0.1568 Epoch: 007/010 | Batch 100/938 | Cost: 0.0724 Epoch: 007/010 | Batch 150/938 | Cost: 0.2265 Epoch: 007/010 | Batch 200/938 | Cost: 0.0221 Epoch: 007/010 | Batch 250/938 | Cost: 0.0142 Epoch: 007/010 | Batch 300/938 | Cost: 0.0837 Epoch: 007/010 | Batch 350/938 | Cost: 0.1274 Epoch: 007/010 | Batch 400/938 | Cost: 0.0372 Epoch: 007/010 | Batch 450/938 | Cost: 0.0902 Epoch: 007/010 | Batch 500/938 | Cost: 0.0803 Epoch: 007/010 | Batch 550/938 | Cost: 0.0229 Epoch: 007/010 | Batch 600/938 | Cost: 0.0453 Epoch: 007/010 | Batch 650/938 | Cost: 0.0195 Epoch: 007/010 | Batch 700/938 | Cost: 0.1837 Epoch: 007/010 | Batch 750/938 | Cost: 0.0499 Epoch: 007/010 | Batch 800/938 | Cost: 0.0406 Epoch: 007/010 | Batch 850/938 | Cost: 0.0500 Epoch: 007/010 | Batch 900/938 | Cost: 0.0717 Epoch: 007/010 training accuracy: 98.65% Time elapsed: 1.46 min Epoch: 008/010 | Batch 000/938 | Cost: 0.0179 Epoch: 008/010 | Batch 050/938 | Cost: 0.0589 Epoch: 008/010 | Batch 100/938 | Cost: 0.0335 Epoch: 008/010 | Batch 150/938 | Cost: 0.0211 Epoch: 008/010 | Batch 200/938 | Cost: 0.0545 Epoch: 008/010 | Batch 250/938 | Cost: 0.0219 Epoch: 008/010 | Batch 300/938 | Cost: 0.0395 Epoch: 008/010 | Batch 350/938 | Cost: 0.1509 Epoch: 008/010 | Batch 400/938 | Cost: 0.1123 Epoch: 008/010 | Batch 450/938 | Cost: 0.0262 Epoch: 008/010 | Batch 500/938 | Cost: 0.1050 Epoch: 008/010 | Batch 550/938 | Cost: 0.0804 Epoch: 008/010 | Batch 600/938 | Cost: 0.0080 Epoch: 008/010 | Batch 650/938 | Cost: 0.0510 Epoch: 008/010 | Batch 700/938 | Cost: 0.0269 Epoch: 008/010 | Batch 750/938 | Cost: 0.0175 Epoch: 008/010 | Batch 800/938 | Cost: 0.0942 Epoch: 008/010 | Batch 850/938 | Cost: 0.0452 Epoch: 008/010 | Batch 900/938 | Cost: 0.0179 Epoch: 008/010 training accuracy: 98.79% Time elapsed: 1.67 min Epoch: 009/010 | Batch 000/938 | Cost: 0.0745 Epoch: 009/010 | Batch 050/938 | Cost: 0.0414 Epoch: 009/010 | Batch 100/938 | Cost: 0.1068 Epoch: 009/010 | Batch 150/938 | Cost: 0.0644 Epoch: 009/010 | Batch 200/938 | Cost: 0.0175 Epoch: 009/010 | Batch 250/938 | Cost: 0.0171 Epoch: 009/010 | Batch 300/938 | Cost: 0.0626 Epoch: 009/010 | Batch 350/938 | Cost: 0.1016 Epoch: 009/010 | Batch 400/938 | Cost: 0.0094 Epoch: 009/010 | Batch 450/938 | Cost: 0.0147 Epoch: 009/010 | Batch 500/938 | Cost: 0.0191 Epoch: 009/010 | Batch 550/938 | Cost: 0.0259 Epoch: 009/010 | Batch 600/938 | Cost: 0.0519 Epoch: 009/010 | Batch 650/938 | Cost: 0.0041 Epoch: 009/010 | Batch 700/938 | Cost: 0.0307 Epoch: 009/010 | Batch 750/938 | Cost: 0.0121 Epoch: 009/010 | Batch 800/938 | Cost: 0.0308 Epoch: 009/010 | Batch 850/938 | Cost: 0.0094 Epoch: 009/010 | Batch 900/938 | Cost: 0.0168 Epoch: 009/010 training accuracy: 99.11% Time elapsed: 1.87 min Epoch: 010/010 | Batch 000/938 | Cost: 0.0393 Epoch: 010/010 | Batch 050/938 | Cost: 0.0156 Epoch: 010/010 | Batch 100/938 | Cost: 0.0285 Epoch: 010/010 | Batch 150/938 | Cost: 0.0080 Epoch: 010/010 | Batch 200/938 | Cost: 0.0148 Epoch: 010/010 | Batch 250/938 | Cost: 0.0367 Epoch: 010/010 | Batch 300/938 | Cost: 0.0511 Epoch: 010/010 | Batch 350/938 | Cost: 0.0230 Epoch: 010/010 | Batch 400/938 | Cost: 0.0563 Epoch: 010/010 | Batch 450/938 | Cost: 0.0435 Epoch: 010/010 | Batch 500/938 | Cost: 0.0626 Epoch: 010/010 | Batch 550/938 | Cost: 0.0835 Epoch: 010/010 | Batch 600/938 | Cost: 0.1073 Epoch: 010/010 | Batch 650/938 | Cost: 0.0313 Epoch: 010/010 | Batch 700/938 | Cost: 0.0279 Epoch: 010/010 | Batch 750/938 | Cost: 0.0343 Epoch: 010/010 | Batch 800/938 | Cost: 0.1145 Epoch: 010/010 | Batch 850/938 | Cost: 0.0085 Epoch: 010/010 | Batch 900/938 | Cost: 0.0067 Epoch: 010/010 training accuracy: 99.33% Time elapsed: 2.08 min Total Training Time: 2.08 min
print('Test accuracy: %.2f%%' % (compute_accuracy(model, test_loader)))
Test accuracy: 97.82%
One disadvantage of the Sequential wrapper is that we cannot readily access (or "print") intermediate values. However, we can use custom hooks for that. For instance, the order of operations in our Sequential wrapper is as follows:
model.net
Sequential( (0): Linear(in_features=784, out_features=128, bias=True) (1): ReLU(inplace) (2): Linear(in_features=128, out_features=256, bias=True) (3): ReLU(inplace) (4): Linear(in_features=256, out_features=10, bias=True) )
If we want to get the output from the 2nd layer during the forward pass, we can register a hook as follows:
outputs = []
def hook(module, input, output):
outputs.append(output)
model.net[2].register_forward_hook(hook)
<torch.utils.hooks.RemovableHandle at 0x7f659c6685c0>
Now, if we call the model on some inputs, it will save the intermediate results in the "outputs" list:
_ = model(features)
print(outputs)
[tensor([[0.5341, 1.0513, 2.3542, ..., 0.0000, 0.0000, 0.0000], [0.0000, 0.6676, 0.6620, ..., 0.0000, 0.0000, 2.4056], [1.1520, 0.0000, 0.0000, ..., 2.5860, 0.8992, 0.9642], ..., [0.0000, 0.1076, 0.0000, ..., 1.8367, 0.0000, 2.5203], [0.5415, 0.0000, 0.0000, ..., 2.7968, 0.8244, 1.6335], [1.0710, 0.9805, 3.0103, ..., 0.0000, 0.0000, 0.0000]], device='cuda:3', grad_fn=<ThresholdBackward1>)]
%watermark -iv
numpy 1.15.4 torch 1.0.0