#!/usr/bin/env python
# coding: utf-8
# In[1]:
get_ipython().run_line_magic('matplotlib', 'inline')
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
# In[2]:
#use_gpu = torch.cuda.is_available()
# # Function area
# Parameters summary function come from stackoverflow:
# https://stackoverflow.com/questions/42480111/model-summary-in-pytorch
#
# '''
# Note1.
# def _addindent(s_, numSpaces):
# s = s_.split('\n')
# # dont do anything for single-line stuff
# if len(s) == 1:
# return s_
# first = s.pop(0)
# s = [(numSpaces * ' ') + line for line in s]
# s = '\n'.join(s)
# s = first + '\n' + s
# return s
#
# Note2. np.prod: Return the product of array elements over a given axis.
# '''
# In[3]:
# Function 1
from torch.nn.modules.module import _addindent
def torch_summarize(model, show_weights=True, show_parameters=True):
"""Summarizes torch model by showing trainable parameters and weights."""
total_params = 0
tmpstr = model.__class__.__name__ + ' :\n' #Pring the object name
for key, module in model._modules.items():
# if it contains layers let call it recursively to get params and weights
if type(module) in [
torch.nn.modules.container.Container,
torch.nn.modules.container.Sequential
]:
modstr = torch_summarize(module)
else:
modstr = module.__repr__()
modstr = _addindent(modstr, 2) # Note1
params = sum([np.prod(p.size()) for p in module.parameters()]) # Note2
weights = tuple([tuple(p.size()) for p in module.parameters()])
total_params = total_params + params
tmpstr += ' (' + key + '): ' + modstr
if show_weights:
tmpstr += ', weights={}'.format(weights)
if show_parameters:
tmpstr += ', parameters={}'.format(params)
tmpstr += '\n------------------------------------------------------\n'
tmpstr = tmpstr + ')'
tmpstr = tmpstr + ' \n##Total Parameters = {} '.format(total_params)
return tmpstr
# Function 2
def show_batch(batch):
'''Show image of one batch'''
im = torchvision.utils.make_grid(batch)
plt.imshow(np.transpose(im.numpy(), (1, 2, 0)))
# # Load MNIST Data
# In[4]:
BATCH_SIZE = 200 # setting batch size
transform = transforms.ToTensor() # Transform them to tensors
# Load and transform data
trainset = torchvision.datasets.MNIST('./mnist', train=True, download=True, transform=transform)
#testset = torchvision.datasets.MNIST('./mnist', train=False, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
#testloader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)
# In[5]:
dataiter = iter(trainloader)
images, labels = dataiter.next()
#print('Labels: ', list(labels))
print('Batch shape: ', images.size())
show_batch(images)
# In[ ]:
# # Build Shallow Model
# How to get the size of feature map:
# Output H = 1 + (input H + 2Panding - Filter H)/Stride
# Output W = 1 + (input W + 2Panding - Filter W)/Stride
# In[10]:
class Shallow_model(nn.Module):
def __init__(self):
super(Shallow_model, self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(in_channels=1, out_channels=105, kernel_size=3, stride=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2)
)
# fully connected layer
self.fc1 = nn.Sequential(
nn.Linear(in_features=13*13*105, out_features=10))
def forward(self, x):
out = self.layer1(x)
out = out.view(out.size(0), -1)
out = self.fc1(out)
return F.log_softmax(out) #last layer is softmax layer
# In[11]:
# Setting the model & loss function & optimizer
SNet = Shallow_model()
loss_func = nn.CrossEntropyLoss(size_average=False)
optimizer = torch.optim.Adam(SNet.parameters(), lr=1e-3, betas=(0.9, 0.99))
# In[12]:
print(torch_summarize(SNet))
# In[13]:
# Running Model
loss_old = 0.01
check_time = 0
num_of_epoch = 10
iters = 0
for epoch in range(num_of_epoch):
correct = 0
for i, (images, labels) in enumerate(trainloader):
# convert tensor to Variable
images = Variable(images)
labels = Variable(labels)
# clear gradients w.r.t parameters
optimizer.zero_grad()
# forward pass
outputs = SNet(images)
# calculate loss
loss = loss_func(outputs, labels)
# get gradient w.r.t parameters
loss.backward()
# update parameters
optimizer.step()
iters += 1
# Accuracy (each batch)
pred = outputs.data.max(1)[1] # get the index of the max log-probability
correct += pred.eq(labels.data).cpu().sum()
Acc = correct / ((i+1)*BATCH_SIZE) * 100
if iters % 20 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}, Accumulation batch acc={:.1f}%'.format(
epoch+1, i * len(images), len(trainloader.dataset),
100. * i / len(trainloader), loss.data[0], Acc))
if Acc >= 97: break
# # Deep Model
# How to get the size of feature map:
# Output H = 1 + (input H + 2Panding - Filter H)/Stride
# Output W = 1 + (input W + 2Panding - Filter W)/Stride
# In[14]:
# Filter number of Conv layers
fil = [30, 35, 30]
class Deep_model(nn.Module):
def __init__(self):
super(Deep_model, self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(in_channels=1, out_channels=fil[0], kernel_size=5, stride=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2) )
self.layer2 = nn.Sequential(
nn.Conv2d(in_channels=fil[0], out_channels=fil[1], kernel_size=3, stride=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2) )
self.layer3 = nn.Sequential(
nn.Conv2d(in_channels=fil[1], out_channels=fil[2], kernel_size=1, stride=1),
nn.ReLU(inplace=True))
#nn.MaxPool2d(kernel_size=2, stride=2))
# fully connected layer
self.fc1 = nn.Sequential(
nn.Linear(in_features=5*5*fil[2], out_features=200),
nn.ReLU(inplace=True))
self.fc2 = nn.Sequential(
nn.Linear(in_features=200, out_features=80),
nn.ReLU(inplace=True))
self.fc3 = nn.Sequential(nn.Linear(in_features=80, out_features=10))
def forward(self, x):
out = self.layer3(self.layer2(self.layer1(x)))
out = out.view(out.size(0), -1)
out = self.fc3(self.fc2(self.fc1(out)))
return F.log_softmax(out)
# In[15]:
# Setting the model & loss function & optimizer
DNet = Deep_model()
loss_func = nn.CrossEntropyLoss(size_average=False)
optimizer = torch.optim.Adam(DNet.parameters(), lr=1e-3, betas=(0.9, 0.99))
# In[16]:
print(torch_summarize(DNet))
# In[17]:
iters = 0
for epoch in range(10):
correct = 0
for i, (images, labels) in enumerate(trainloader):
# convert tensor to Variable
images = Variable(images)
labels = Variable(labels)
# clear gradients w.r.t parameters
optimizer.zero_grad()
# forward pass
outputs = DNet(images)
# calculate loss
loss = loss_func(outputs, labels)
# get gradient w.r.t parameters
loss.backward()
# update parameters
optimizer.step()
iters += 1
# Accuracy (each batch)
pred = outputs.data.max(1)[1] # get the index of the max log-probability
correct += pred.eq(labels.data).cpu().sum()
Acc = correct / ((i+1)*BATCH_SIZE) * 100
if iters % 20 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}, Accumulation batch acc={:.1f}%'.format(
epoch+1, i * len(images), len(trainloader.dataset),
100. * i / len(trainloader), loss.data[0], Acc))
if Acc >= 97: break
# In[ ]:
# In[ ]: