%matplotlib inline
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
#use_gpu = torch.cuda.is_available()
Parameters summary function come from stackoverflow:
https://stackoverflow.com/questions/42480111/model-summary-in-pytorch
'''
Note1.
def addindent(s, numSpaces):
s = s_.split('\n')
# dont do anything for single-line stuff
if len(s) == 1:
return s_
first = s.pop(0)
s = [(numSpaces * ' ') + line for line in s]
s = '\n'.join(s)
s = first + '\n' + s
return s
Note2. np.prod: Return the product of array elements over a given axis. '''
# Function 1
from torch.nn.modules.module import _addindent
def torch_summarize(model, show_weights=True, show_parameters=True):
"""Summarizes torch model by showing trainable parameters and weights."""
total_params = 0
tmpstr = model.__class__.__name__ + ' :\n' #Pring the object name
for key, module in model._modules.items():
# if it contains layers let call it recursively to get params and weights
if type(module) in [
torch.nn.modules.container.Container,
torch.nn.modules.container.Sequential
]:
modstr = torch_summarize(module)
else:
modstr = module.__repr__()
modstr = _addindent(modstr, 2) # Note1
params = sum([np.prod(p.size()) for p in module.parameters()]) # Note2
weights = tuple([tuple(p.size()) for p in module.parameters()])
total_params = total_params + params
tmpstr += ' (' + key + '): ' + modstr
if show_weights:
tmpstr += ', weights={}'.format(weights)
if show_parameters:
tmpstr += ', parameters={}'.format(params)
tmpstr += '\n------------------------------------------------------\n'
tmpstr = tmpstr + ')'
tmpstr = tmpstr + ' \n##Total Parameters = {} '.format(total_params)
return tmpstr
# Function 2
def show_batch(batch):
'''Show image of one batch'''
im = torchvision.utils.make_grid(batch)
plt.imshow(np.transpose(im.numpy(), (1, 2, 0)))
BATCH_SIZE = 200 # setting batch size
transform = transforms.ToTensor() # Transform them to tensors
# Load and transform data
trainset = torchvision.datasets.MNIST('./mnist', train=True, download=True, transform=transform)
#testset = torchvision.datasets.MNIST('./mnist', train=False, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
#testloader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)
dataiter = iter(trainloader)
images, labels = dataiter.next()
#print('Labels: ', list(labels))
print('Batch shape: ', images.size())
show_batch(images)
Batch shape: torch.Size([200, 1, 28, 28])
How to get the size of feature map:
Output H = 1 + (input H + 2Panding - Filter H)/Stride
Output W = 1 + (input W + 2Panding - Filter W)/Stride
class Shallow_model(nn.Module):
def __init__(self):
super(Shallow_model, self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(in_channels=1, out_channels=105, kernel_size=3, stride=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2)
)
# fully connected layer
self.fc1 = nn.Sequential(
nn.Linear(in_features=13*13*105, out_features=10))
def forward(self, x):
out = self.layer1(x)
out = out.view(out.size(0), -1)
out = self.fc1(out)
return F.log_softmax(out) #last layer is softmax layer
# Setting the model & loss function & optimizer
SNet = Shallow_model()
loss_func = nn.CrossEntropyLoss(size_average=False)
optimizer = torch.optim.Adam(SNet.parameters(), lr=1e-3, betas=(0.9, 0.99))
print(torch_summarize(SNet))
Shallow_model : (layer1): Sequential : (0): Conv2d(1, 105, kernel_size=(3, 3), stride=(1, 1)), weights=((105, 1, 3, 3), (105,)), parameters=1050 ------------------------------------------------------ (1): ReLU(inplace), weights=(), parameters=0 ------------------------------------------------------ (2): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), dilation=(1, 1), ceil_mode=False), weights=(), parameters=0 ------------------------------------------------------ ) ##Total Parameters = 1050 , weights=((105, 1, 3, 3), (105,)), parameters=1050 ------------------------------------------------------ (fc1): Sequential : (0): Linear(in_features=17745, out_features=10, bias=True), weights=((10, 17745), (10,)), parameters=177460 ------------------------------------------------------ ) ##Total Parameters = 177460 , weights=((10, 17745), (10,)), parameters=177460 ------------------------------------------------------ ) ##Total Parameters = 178510
# Running Model
loss_old = 0.01
check_time = 0
num_of_epoch = 10
iters = 0
for epoch in range(num_of_epoch):
correct = 0
for i, (images, labels) in enumerate(trainloader):
# convert tensor to Variable
images = Variable(images)
labels = Variable(labels)
# clear gradients w.r.t parameters
optimizer.zero_grad()
# forward pass
outputs = SNet(images)
# calculate loss
loss = loss_func(outputs, labels)
# get gradient w.r.t parameters
loss.backward()
# update parameters
optimizer.step()
iters += 1
# Accuracy (each batch)
pred = outputs.data.max(1)[1] # get the index of the max log-probability
correct += pred.eq(labels.data).cpu().sum()
Acc = correct / ((i+1)*BATCH_SIZE) * 100
if iters % 20 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}, Accumulation batch acc={:.1f}%'.format(
epoch+1, i * len(images), len(trainloader.dataset),
100. * i / len(trainloader), loss.data[0], Acc))
if Acc >= 97: break
/home/eric/anaconda3/envs/pytorch_gpu/lib/python3.6/site-packages/ipykernel_launcher.py:19: UserWarning: Implicit dimension choice for log_softmax has been deprecated. Change the call to include dim=X as an argument.
Train Epoch: 1 [3800/60000 (6%)] Loss: 124.736107, Accumulation batch acc=60.8% Train Epoch: 1 [7800/60000 (13%)] Loss: 76.771278, Accumulation batch acc=74.0% Train Epoch: 1 [11800/60000 (20%)] Loss: 76.151733, Accumulation batch acc=79.6% Train Epoch: 1 [15800/60000 (26%)] Loss: 47.040329, Accumulation batch acc=82.9% Train Epoch: 1 [19800/60000 (33%)] Loss: 44.093231, Accumulation batch acc=85.0% Train Epoch: 1 [23800/60000 (40%)] Loss: 39.303097, Accumulation batch acc=86.5% Train Epoch: 1 [27800/60000 (46%)] Loss: 29.995150, Accumulation batch acc=87.7% Train Epoch: 1 [31800/60000 (53%)] Loss: 46.388893, Accumulation batch acc=88.6% Train Epoch: 1 [35800/60000 (60%)] Loss: 27.591980, Accumulation batch acc=89.4% Train Epoch: 1 [39800/60000 (66%)] Loss: 39.556881, Accumulation batch acc=90.0% Train Epoch: 1 [43800/60000 (73%)] Loss: 29.579845, Accumulation batch acc=90.5% Train Epoch: 1 [47800/60000 (80%)] Loss: 36.751682, Accumulation batch acc=91.0% Train Epoch: 1 [51800/60000 (86%)] Loss: 25.697157, Accumulation batch acc=91.4% Train Epoch: 1 [55800/60000 (93%)] Loss: 30.895018, Accumulation batch acc=91.8% Train Epoch: 1 [59800/60000 (100%)] Loss: 25.055357, Accumulation batch acc=92.1% Train Epoch: 2 [3800/60000 (6%)] Loss: 13.464671, Accumulation batch acc=97.5% Train Epoch: 2 [7800/60000 (13%)] Loss: 17.068445, Accumulation batch acc=97.5% Train Epoch: 2 [11800/60000 (20%)] Loss: 15.550008, Accumulation batch acc=97.6% Train Epoch: 2 [15800/60000 (26%)] Loss: 24.389906, Accumulation batch acc=97.5% Train Epoch: 2 [19800/60000 (33%)] Loss: 22.837128, Accumulation batch acc=97.5% Train Epoch: 2 [23800/60000 (40%)] Loss: 16.299644, Accumulation batch acc=97.4% Train Epoch: 2 [27800/60000 (46%)] Loss: 10.810911, Accumulation batch acc=97.5% Train Epoch: 2 [31800/60000 (53%)] Loss: 13.949410, Accumulation batch acc=97.5% Train Epoch: 2 [35800/60000 (60%)] Loss: 17.886375, Accumulation batch acc=97.5% Train Epoch: 2 [39800/60000 (66%)] Loss: 12.107552, Accumulation batch acc=97.5% Train Epoch: 2 [43800/60000 (73%)] Loss: 7.118986, Accumulation batch acc=97.6% Train Epoch: 2 [47800/60000 (80%)] Loss: 21.743937, Accumulation batch acc=97.6% Train Epoch: 2 [51800/60000 (86%)] Loss: 21.522108, Accumulation batch acc=97.6% Train Epoch: 2 [55800/60000 (93%)] Loss: 14.585538, Accumulation batch acc=97.7% Train Epoch: 2 [59800/60000 (100%)] Loss: 10.544326, Accumulation batch acc=97.7%
How to get the size of feature map:
Output H = 1 + (input H + 2Panding - Filter H)/Stride
Output W = 1 + (input W + 2Panding - Filter W)/Stride
# Filter number of Conv layers
fil = [30, 35, 30]
class Deep_model(nn.Module):
def __init__(self):
super(Deep_model, self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(in_channels=1, out_channels=fil[0], kernel_size=5, stride=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2) )
self.layer2 = nn.Sequential(
nn.Conv2d(in_channels=fil[0], out_channels=fil[1], kernel_size=3, stride=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2) )
self.layer3 = nn.Sequential(
nn.Conv2d(in_channels=fil[1], out_channels=fil[2], kernel_size=1, stride=1),
nn.ReLU(inplace=True))
#nn.MaxPool2d(kernel_size=2, stride=2))
# fully connected layer
self.fc1 = nn.Sequential(
nn.Linear(in_features=5*5*fil[2], out_features=200),
nn.ReLU(inplace=True))
self.fc2 = nn.Sequential(
nn.Linear(in_features=200, out_features=80),
nn.ReLU(inplace=True))
self.fc3 = nn.Sequential(nn.Linear(in_features=80, out_features=10))
def forward(self, x):
out = self.layer3(self.layer2(self.layer1(x)))
out = out.view(out.size(0), -1)
out = self.fc3(self.fc2(self.fc1(out)))
return F.log_softmax(out)
# Setting the model & loss function & optimizer
DNet = Deep_model()
loss_func = nn.CrossEntropyLoss(size_average=False)
optimizer = torch.optim.Adam(DNet.parameters(), lr=1e-3, betas=(0.9, 0.99))
print(torch_summarize(DNet))
Deep_model : (layer1): Sequential : (0): Conv2d(1, 30, kernel_size=(5, 5), stride=(1, 1)), weights=((30, 1, 5, 5), (30,)), parameters=780 ------------------------------------------------------ (1): ReLU(inplace), weights=(), parameters=0 ------------------------------------------------------ (2): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), dilation=(1, 1), ceil_mode=False), weights=(), parameters=0 ------------------------------------------------------ ) ##Total Parameters = 780 , weights=((30, 1, 5, 5), (30,)), parameters=780 ------------------------------------------------------ (layer2): Sequential : (0): Conv2d(30, 35, kernel_size=(3, 3), stride=(1, 1)), weights=((35, 30, 3, 3), (35,)), parameters=9485 ------------------------------------------------------ (1): ReLU(inplace), weights=(), parameters=0 ------------------------------------------------------ (2): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), dilation=(1, 1), ceil_mode=False), weights=(), parameters=0 ------------------------------------------------------ ) ##Total Parameters = 9485 , weights=((35, 30, 3, 3), (35,)), parameters=9485 ------------------------------------------------------ (layer3): Sequential : (0): Conv2d(35, 30, kernel_size=(1, 1), stride=(1, 1)), weights=((30, 35, 1, 1), (30,)), parameters=1080 ------------------------------------------------------ (1): ReLU(inplace), weights=(), parameters=0 ------------------------------------------------------ ) ##Total Parameters = 1080 , weights=((30, 35, 1, 1), (30,)), parameters=1080 ------------------------------------------------------ (fc1): Sequential : (0): Linear(in_features=750, out_features=200, bias=True), weights=((200, 750), (200,)), parameters=150200 ------------------------------------------------------ (1): ReLU(inplace), weights=(), parameters=0 ------------------------------------------------------ ) ##Total Parameters = 150200 , weights=((200, 750), (200,)), parameters=150200 ------------------------------------------------------ (fc2): Sequential : (0): Linear(in_features=200, out_features=80, bias=True), weights=((80, 200), (80,)), parameters=16080 ------------------------------------------------------ (1): ReLU(inplace), weights=(), parameters=0 ------------------------------------------------------ ) ##Total Parameters = 16080 , weights=((80, 200), (80,)), parameters=16080 ------------------------------------------------------ (fc3): Sequential : (0): Linear(in_features=80, out_features=10, bias=True), weights=((10, 80), (10,)), parameters=810 ------------------------------------------------------ ) ##Total Parameters = 810 , weights=((10, 80), (10,)), parameters=810 ------------------------------------------------------ ) ##Total Parameters = 178435
iters = 0
for epoch in range(10):
correct = 0
for i, (images, labels) in enumerate(trainloader):
# convert tensor to Variable
images = Variable(images)
labels = Variable(labels)
# clear gradients w.r.t parameters
optimizer.zero_grad()
# forward pass
outputs = DNet(images)
# calculate loss
loss = loss_func(outputs, labels)
# get gradient w.r.t parameters
loss.backward()
# update parameters
optimizer.step()
iters += 1
# Accuracy (each batch)
pred = outputs.data.max(1)[1] # get the index of the max log-probability
correct += pred.eq(labels.data).cpu().sum()
Acc = correct / ((i+1)*BATCH_SIZE) * 100
if iters % 20 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}, Accumulation batch acc={:.1f}%'.format(
epoch+1, i * len(images), len(trainloader.dataset),
100. * i / len(trainloader), loss.data[0], Acc))
if Acc >= 97: break
/home/eric/anaconda3/envs/pytorch_gpu/lib/python3.6/site-packages/ipykernel_launcher.py:44: UserWarning: Implicit dimension choice for log_softmax has been deprecated. Change the call to include dim=X as an argument.
Train Epoch: 1 [3800/60000 (6%)] Loss: 366.770233, Accumulation batch acc=28.1% Train Epoch: 1 [7800/60000 (13%)] Loss: 135.742355, Accumulation batch acc=49.6% Train Epoch: 1 [11800/60000 (20%)] Loss: 93.916733, Accumulation batch acc=61.4% Train Epoch: 1 [15800/60000 (26%)] Loss: 82.038795, Accumulation batch acc=67.9% Train Epoch: 1 [19800/60000 (33%)] Loss: 51.646832, Accumulation batch acc=72.2% Train Epoch: 1 [23800/60000 (40%)] Loss: 70.576019, Accumulation batch acc=75.4% Train Epoch: 1 [27800/60000 (46%)] Loss: 29.793171, Accumulation batch acc=77.9% Train Epoch: 1 [31800/60000 (53%)] Loss: 19.501791, Accumulation batch acc=80.0% Train Epoch: 1 [35800/60000 (60%)] Loss: 24.378628, Accumulation batch acc=81.7% Train Epoch: 1 [39800/60000 (66%)] Loss: 29.607759, Accumulation batch acc=83.1% Train Epoch: 1 [43800/60000 (73%)] Loss: 31.054874, Accumulation batch acc=84.3% Train Epoch: 1 [47800/60000 (80%)] Loss: 23.495838, Accumulation batch acc=85.3% Train Epoch: 1 [51800/60000 (86%)] Loss: 14.021351, Accumulation batch acc=86.1% Train Epoch: 1 [55800/60000 (93%)] Loss: 22.884119, Accumulation batch acc=86.8% Train Epoch: 1 [59800/60000 (100%)] Loss: 17.105442, Accumulation batch acc=87.5% Train Epoch: 2 [3800/60000 (6%)] Loss: 14.681189, Accumulation batch acc=97.0% Train Epoch: 2 [7800/60000 (13%)] Loss: 10.660532, Accumulation batch acc=97.2% Train Epoch: 2 [11800/60000 (20%)] Loss: 24.991585, Accumulation batch acc=97.3% Train Epoch: 2 [15800/60000 (26%)] Loss: 16.203720, Accumulation batch acc=97.3% Train Epoch: 2 [19800/60000 (33%)] Loss: 19.821259, Accumulation batch acc=97.3% Train Epoch: 2 [23800/60000 (40%)] Loss: 10.361759, Accumulation batch acc=97.3% Train Epoch: 2 [27800/60000 (46%)] Loss: 23.170311, Accumulation batch acc=97.3% Train Epoch: 2 [31800/60000 (53%)] Loss: 23.077343, Accumulation batch acc=97.3% Train Epoch: 2 [35800/60000 (60%)] Loss: 6.488492, Accumulation batch acc=97.3% Train Epoch: 2 [39800/60000 (66%)] Loss: 20.268070, Accumulation batch acc=97.4% Train Epoch: 2 [43800/60000 (73%)] Loss: 8.840987, Accumulation batch acc=97.4% Train Epoch: 2 [47800/60000 (80%)] Loss: 27.512354, Accumulation batch acc=97.4% Train Epoch: 2 [51800/60000 (86%)] Loss: 18.465469, Accumulation batch acc=97.5% Train Epoch: 2 [55800/60000 (93%)] Loss: 5.769819, Accumulation batch acc=97.5% Train Epoch: 2 [59800/60000 (100%)] Loss: 5.859987, Accumulation batch acc=97.6%