Deep Learning Models -- A collection of various deep learning architectures, models, and tips for TensorFlow and PyTorch in Jupyter Notebooks.
%load_ext watermark
%watermark -a 'Sebastian Raschka' -v -p torch
Sebastian Raschka CPython 3.6.8 IPython 7.2.0 torch 1.0.0
import time
import numpy as np
from torchvision import datasets
from torchvision import transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch
if torch.cuda.is_available():
torch.backends.cudnn.deterministic = True
##########################
### SETTINGS
##########################
# Device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# Hyperparameters
random_seed = 1
learning_rate = 0.1
num_epochs = 10
batch_size = 64
# Architecture
num_features = 784
num_hidden_1 = 128
num_hidden_2 = 256
num_classes = 10
##########################
### MNIST DATASET
##########################
# Note transforms.ToTensor() scales input images
# to 0-1 range
train_dataset = datasets.MNIST(root='data',
train=True,
transform=transforms.ToTensor(),
download=True)
test_dataset = datasets.MNIST(root='data',
train=False,
transform=transforms.ToTensor())
train_loader = DataLoader(dataset=train_dataset,
batch_size=batch_size,
shuffle=True)
test_loader = DataLoader(dataset=test_dataset,
batch_size=batch_size,
shuffle=False)
# Checking the dataset
for images, labels in train_loader:
print('Image batch dimensions:', images.shape)
print('Image label dimensions:', labels.shape)
break
Image batch dimensions: torch.Size([64, 1, 28, 28]) Image label dimensions: torch.Size([64])
##########################
### MODEL
##########################
class MultilayerPerceptron(torch.nn.Module):
def __init__(self, num_features, num_classes):
super(MultilayerPerceptron, self).__init__()
### 1st hidden layer
self.linear_1 = torch.nn.Linear(num_features, num_hidden_1)
# The following to lones are not necessary,
# but used here to demonstrate how to access the weights
# and use a different weight initialization.
# By default, PyTorch uses Xavier/Glorot initialization, which
# should usually be preferred.
self.linear_1.weight.detach().normal_(0.0, 0.1)
self.linear_1.bias.detach().zero_()
self.linear_1_bn = torch.nn.BatchNorm1d(num_hidden_1)
### 2nd hidden layer
self.linear_2 = torch.nn.Linear(num_hidden_1, num_hidden_2)
self.linear_2.weight.detach().normal_(0.0, 0.1)
self.linear_2.bias.detach().zero_()
self.linear_2_bn = torch.nn.BatchNorm1d(num_hidden_2)
### Output layer
self.linear_out = torch.nn.Linear(num_hidden_2, num_classes)
self.linear_out.weight.detach().normal_(0.0, 0.1)
self.linear_out.bias.detach().zero_()
def forward(self, x):
out = self.linear_1(x)
# note that batchnorm is in the classic
# sense placed before the activation
out = self.linear_1_bn(out)
out = F.relu(out)
out = self.linear_2(out)
out = self.linear_2_bn(out)
out = F.relu(out)
logits = self.linear_out(out)
probas = F.softmax(logits, dim=1)
return logits, probas
torch.manual_seed(random_seed)
model = MultilayerPerceptron(num_features=num_features,
num_classes=num_classes)
model = model.to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
def compute_accuracy(net, data_loader):
net.eval()
correct_pred, num_examples = 0, 0
with torch.no_grad():
for features, targets in data_loader:
features = features.view(-1, 28*28).to(device)
targets = targets.to(device)
logits, probas = net(features)
_, predicted_labels = torch.max(probas, 1)
num_examples += targets.size(0)
correct_pred += (predicted_labels == targets).sum()
return correct_pred.float()/num_examples * 100
start_time = time.time()
for epoch in range(num_epochs):
model.train()
for batch_idx, (features, targets) in enumerate(train_loader):
features = features.view(-1, 28*28).to(device)
targets = targets.to(device)
### FORWARD AND BACK PROP
logits, probas = model(features)
cost = F.cross_entropy(logits, targets)
optimizer.zero_grad()
cost.backward()
### UPDATE MODEL PARAMETERS
optimizer.step()
### LOGGING
if not batch_idx % 50:
print ('Epoch: %03d/%03d | Batch %03d/%03d | Cost: %.4f'
%(epoch+1, num_epochs, batch_idx,
len(train_loader), cost))
print('Epoch: %03d/%03d training accuracy: %.2f%%' % (
epoch+1, num_epochs,
compute_accuracy(model, train_loader)))
print('Time elapsed: %.2f min' % ((time.time() - start_time)/60))
print('Total Training Time: %.2f min' % ((time.time() - start_time)/60))
Epoch: 001/010 | Batch 000/938 | Cost: 2.6465 Epoch: 001/010 | Batch 050/938 | Cost: 1.0305 Epoch: 001/010 | Batch 100/938 | Cost: 0.5404 Epoch: 001/010 | Batch 150/938 | Cost: 0.4430 Epoch: 001/010 | Batch 200/938 | Cost: 0.3235 Epoch: 001/010 | Batch 250/938 | Cost: 0.1927 Epoch: 001/010 | Batch 300/938 | Cost: 0.4007 Epoch: 001/010 | Batch 350/938 | Cost: 0.3802 Epoch: 001/010 | Batch 400/938 | Cost: 0.2528 Epoch: 001/010 | Batch 450/938 | Cost: 0.2257 Epoch: 001/010 | Batch 500/938 | Cost: 0.1454 Epoch: 001/010 | Batch 550/938 | Cost: 0.2160 Epoch: 001/010 | Batch 600/938 | Cost: 0.3425 Epoch: 001/010 | Batch 650/938 | Cost: 0.2175 Epoch: 001/010 | Batch 700/938 | Cost: 0.2307 Epoch: 001/010 | Batch 750/938 | Cost: 0.3723 Epoch: 001/010 | Batch 800/938 | Cost: 0.2452 Epoch: 001/010 | Batch 850/938 | Cost: 0.1285 Epoch: 001/010 | Batch 900/938 | Cost: 0.1302 Epoch: 001/010 training accuracy: 95.63% Time elapsed: 0.22 min Epoch: 002/010 | Batch 000/938 | Cost: 0.2137 Epoch: 002/010 | Batch 050/938 | Cost: 0.1923 Epoch: 002/010 | Batch 100/938 | Cost: 0.1739 Epoch: 002/010 | Batch 150/938 | Cost: 0.0742 Epoch: 002/010 | Batch 200/938 | Cost: 0.2186 Epoch: 002/010 | Batch 250/938 | Cost: 0.1424 Epoch: 002/010 | Batch 300/938 | Cost: 0.1131 Epoch: 002/010 | Batch 350/938 | Cost: 0.0575 Epoch: 002/010 | Batch 400/938 | Cost: 0.1232 Epoch: 002/010 | Batch 450/938 | Cost: 0.2385 Epoch: 002/010 | Batch 500/938 | Cost: 0.1344 Epoch: 002/010 | Batch 550/938 | Cost: 0.0950 Epoch: 002/010 | Batch 600/938 | Cost: 0.1565 Epoch: 002/010 | Batch 650/938 | Cost: 0.1312 Epoch: 002/010 | Batch 700/938 | Cost: 0.0859 Epoch: 002/010 | Batch 750/938 | Cost: 0.1722 Epoch: 002/010 | Batch 800/938 | Cost: 0.0630 Epoch: 002/010 | Batch 850/938 | Cost: 0.2606 Epoch: 002/010 | Batch 900/938 | Cost: 0.1681 Epoch: 002/010 training accuracy: 96.94% Time elapsed: 0.45 min Epoch: 003/010 | Batch 000/938 | Cost: 0.0676 Epoch: 003/010 | Batch 050/938 | Cost: 0.1975 Epoch: 003/010 | Batch 100/938 | Cost: 0.1241 Epoch: 003/010 | Batch 150/938 | Cost: 0.1723 Epoch: 003/010 | Batch 200/938 | Cost: 0.2233 Epoch: 003/010 | Batch 250/938 | Cost: 0.2249 Epoch: 003/010 | Batch 300/938 | Cost: 0.1027 Epoch: 003/010 | Batch 350/938 | Cost: 0.0369 Epoch: 003/010 | Batch 400/938 | Cost: 0.1460 Epoch: 003/010 | Batch 450/938 | Cost: 0.0430 Epoch: 003/010 | Batch 500/938 | Cost: 0.0821 Epoch: 003/010 | Batch 550/938 | Cost: 0.1188 Epoch: 003/010 | Batch 600/938 | Cost: 0.0424 Epoch: 003/010 | Batch 650/938 | Cost: 0.2548 Epoch: 003/010 | Batch 700/938 | Cost: 0.1219 Epoch: 003/010 | Batch 750/938 | Cost: 0.0623 Epoch: 003/010 | Batch 800/938 | Cost: 0.0557 Epoch: 003/010 | Batch 850/938 | Cost: 0.0999 Epoch: 003/010 | Batch 900/938 | Cost: 0.0595 Epoch: 003/010 training accuracy: 97.93% Time elapsed: 0.66 min Epoch: 004/010 | Batch 000/938 | Cost: 0.1017 Epoch: 004/010 | Batch 050/938 | Cost: 0.0885 Epoch: 004/010 | Batch 100/938 | Cost: 0.0252 Epoch: 004/010 | Batch 150/938 | Cost: 0.1987 Epoch: 004/010 | Batch 200/938 | Cost: 0.0377 Epoch: 004/010 | Batch 250/938 | Cost: 0.1986 Epoch: 004/010 | Batch 300/938 | Cost: 0.1076 Epoch: 004/010 | Batch 350/938 | Cost: 0.0270 Epoch: 004/010 | Batch 400/938 | Cost: 0.1977 Epoch: 004/010 | Batch 450/938 | Cost: 0.0623 Epoch: 004/010 | Batch 500/938 | Cost: 0.1706 Epoch: 004/010 | Batch 550/938 | Cost: 0.0296 Epoch: 004/010 | Batch 600/938 | Cost: 0.0899 Epoch: 004/010 | Batch 650/938 | Cost: 0.0479 Epoch: 004/010 | Batch 700/938 | Cost: 0.0615 Epoch: 004/010 | Batch 750/938 | Cost: 0.0633 Epoch: 004/010 | Batch 800/938 | Cost: 0.0348 Epoch: 004/010 | Batch 850/938 | Cost: 0.0710 Epoch: 004/010 | Batch 900/938 | Cost: 0.1097 Epoch: 004/010 training accuracy: 98.49% Time elapsed: 0.88 min Epoch: 005/010 | Batch 000/938 | Cost: 0.0251 Epoch: 005/010 | Batch 050/938 | Cost: 0.0213 Epoch: 005/010 | Batch 100/938 | Cost: 0.0694 Epoch: 005/010 | Batch 150/938 | Cost: 0.1481 Epoch: 005/010 | Batch 200/938 | Cost: 0.1333 Epoch: 005/010 | Batch 250/938 | Cost: 0.0117 Epoch: 005/010 | Batch 300/938 | Cost: 0.0978 Epoch: 005/010 | Batch 350/938 | Cost: 0.0204 Epoch: 005/010 | Batch 400/938 | Cost: 0.0517 Epoch: 005/010 | Batch 450/938 | Cost: 0.0371 Epoch: 005/010 | Batch 500/938 | Cost: 0.0337 Epoch: 005/010 | Batch 550/938 | Cost: 0.1566 Epoch: 005/010 | Batch 600/938 | Cost: 0.1280 Epoch: 005/010 | Batch 650/938 | Cost: 0.1210 Epoch: 005/010 | Batch 700/938 | Cost: 0.1570 Epoch: 005/010 | Batch 750/938 | Cost: 0.0531 Epoch: 005/010 | Batch 800/938 | Cost: 0.0136 Epoch: 005/010 | Batch 850/938 | Cost: 0.1199 Epoch: 005/010 | Batch 900/938 | Cost: 0.0485 Epoch: 005/010 training accuracy: 98.75% Time elapsed: 1.10 min Epoch: 006/010 | Batch 000/938 | Cost: 0.0548 Epoch: 006/010 | Batch 050/938 | Cost: 0.0178 Epoch: 006/010 | Batch 100/938 | Cost: 0.0137 Epoch: 006/010 | Batch 150/938 | Cost: 0.0555 Epoch: 006/010 | Batch 200/938 | Cost: 0.1317 Epoch: 006/010 | Batch 250/938 | Cost: 0.0326 Epoch: 006/010 | Batch 300/938 | Cost: 0.0615 Epoch: 006/010 | Batch 350/938 | Cost: 0.0594 Epoch: 006/010 | Batch 400/938 | Cost: 0.0780 Epoch: 006/010 | Batch 450/938 | Cost: 0.0451 Epoch: 006/010 | Batch 500/938 | Cost: 0.1128 Epoch: 006/010 | Batch 550/938 | Cost: 0.0465 Epoch: 006/010 | Batch 600/938 | Cost: 0.0719 Epoch: 006/010 | Batch 650/938 | Cost: 0.0286 Epoch: 006/010 | Batch 700/938 | Cost: 0.0323 Epoch: 006/010 | Batch 750/938 | Cost: 0.0246 Epoch: 006/010 | Batch 800/938 | Cost: 0.0303 Epoch: 006/010 | Batch 850/938 | Cost: 0.0532 Epoch: 006/010 | Batch 900/938 | Cost: 0.0584 Epoch: 006/010 training accuracy: 98.99% Time elapsed: 1.33 min Epoch: 007/010 | Batch 000/938 | Cost: 0.0348 Epoch: 007/010 | Batch 050/938 | Cost: 0.0086 Epoch: 007/010 | Batch 100/938 | Cost: 0.0448 Epoch: 007/010 | Batch 150/938 | Cost: 0.0301 Epoch: 007/010 | Batch 200/938 | Cost: 0.0218 Epoch: 007/010 | Batch 250/938 | Cost: 0.0705 Epoch: 007/010 | Batch 300/938 | Cost: 0.0957 Epoch: 007/010 | Batch 350/938 | Cost: 0.0849 Epoch: 007/010 | Batch 400/938 | Cost: 0.0368 Epoch: 007/010 | Batch 450/938 | Cost: 0.0423 Epoch: 007/010 | Batch 500/938 | Cost: 0.0450 Epoch: 007/010 | Batch 550/938 | Cost: 0.0101 Epoch: 007/010 | Batch 600/938 | Cost: 0.0460 Epoch: 007/010 | Batch 650/938 | Cost: 0.0290 Epoch: 007/010 | Batch 700/938 | Cost: 0.0351 Epoch: 007/010 | Batch 750/938 | Cost: 0.0317 Epoch: 007/010 | Batch 800/938 | Cost: 0.0574 Epoch: 007/010 | Batch 850/938 | Cost: 0.0758 Epoch: 007/010 | Batch 900/938 | Cost: 0.0172 Epoch: 007/010 training accuracy: 99.31% Time elapsed: 1.55 min Epoch: 008/010 | Batch 000/938 | Cost: 0.0331 Epoch: 008/010 | Batch 050/938 | Cost: 0.0113 Epoch: 008/010 | Batch 100/938 | Cost: 0.0890 Epoch: 008/010 | Batch 150/938 | Cost: 0.0309 Epoch: 008/010 | Batch 200/938 | Cost: 0.0391 Epoch: 008/010 | Batch 250/938 | Cost: 0.0567 Epoch: 008/010 | Batch 300/938 | Cost: 0.0330 Epoch: 008/010 | Batch 350/938 | Cost: 0.0342 Epoch: 008/010 | Batch 400/938 | Cost: 0.0904 Epoch: 008/010 | Batch 450/938 | Cost: 0.0247 Epoch: 008/010 | Batch 500/938 | Cost: 0.0359 Epoch: 008/010 | Batch 550/938 | Cost: 0.0544 Epoch: 008/010 | Batch 600/938 | Cost: 0.0428 Epoch: 008/010 | Batch 650/938 | Cost: 0.0105 Epoch: 008/010 | Batch 700/938 | Cost: 0.0986 Epoch: 008/010 | Batch 750/938 | Cost: 0.0188 Epoch: 008/010 | Batch 800/938 | Cost: 0.0153 Epoch: 008/010 | Batch 850/938 | Cost: 0.0095 Epoch: 008/010 | Batch 900/938 | Cost: 0.0464 Epoch: 008/010 training accuracy: 99.36% Time elapsed: 1.76 min Epoch: 009/010 | Batch 000/938 | Cost: 0.0491 Epoch: 009/010 | Batch 050/938 | Cost: 0.0390 Epoch: 009/010 | Batch 100/938 | Cost: 0.1674 Epoch: 009/010 | Batch 150/938 | Cost: 0.0409 Epoch: 009/010 | Batch 200/938 | Cost: 0.0664 Epoch: 009/010 | Batch 250/938 | Cost: 0.0775 Epoch: 009/010 | Batch 300/938 | Cost: 0.0383 Epoch: 009/010 | Batch 350/938 | Cost: 0.0214 Epoch: 009/010 | Batch 400/938 | Cost: 0.0217 Epoch: 009/010 | Batch 450/938 | Cost: 0.0254 Epoch: 009/010 | Batch 500/938 | Cost: 0.0369 Epoch: 009/010 | Batch 550/938 | Cost: 0.0154 Epoch: 009/010 | Batch 600/938 | Cost: 0.0524 Epoch: 009/010 | Batch 650/938 | Cost: 0.0727 Epoch: 009/010 | Batch 700/938 | Cost: 0.0718 Epoch: 009/010 | Batch 750/938 | Cost: 0.0279 Epoch: 009/010 | Batch 800/938 | Cost: 0.0238 Epoch: 009/010 | Batch 850/938 | Cost: 0.0236 Epoch: 009/010 | Batch 900/938 | Cost: 0.0147 Epoch: 009/010 training accuracy: 99.46% Time elapsed: 1.98 min Epoch: 010/010 | Batch 000/938 | Cost: 0.0172 Epoch: 010/010 | Batch 050/938 | Cost: 0.0071 Epoch: 010/010 | Batch 100/938 | Cost: 0.0308 Epoch: 010/010 | Batch 150/938 | Cost: 0.0047 Epoch: 010/010 | Batch 200/938 | Cost: 0.0716 Epoch: 010/010 | Batch 250/938 | Cost: 0.0162 Epoch: 010/010 | Batch 300/938 | Cost: 0.0614 Epoch: 010/010 | Batch 350/938 | Cost: 0.0308 Epoch: 010/010 | Batch 400/938 | Cost: 0.0571 Epoch: 010/010 | Batch 450/938 | Cost: 0.0050 Epoch: 010/010 | Batch 500/938 | Cost: 0.0548 Epoch: 010/010 | Batch 550/938 | Cost: 0.0269 Epoch: 010/010 | Batch 600/938 | Cost: 0.0378 Epoch: 010/010 | Batch 650/938 | Cost: 0.0120 Epoch: 010/010 | Batch 700/938 | Cost: 0.0298 Epoch: 010/010 | Batch 750/938 | Cost: 0.0781 Epoch: 010/010 | Batch 800/938 | Cost: 0.0251 Epoch: 010/010 | Batch 850/938 | Cost: 0.0693 Epoch: 010/010 | Batch 900/938 | Cost: 0.0499 Epoch: 010/010 training accuracy: 99.61% Time elapsed: 2.20 min Total Training Time: 2.20 min
print('Test accuracy: %.2f%%' % (compute_accuracy(model, test_loader)))
Test accuracy: 97.82%
%watermark -iv
numpy 1.15.4 torch 1.0.0