#!/usr/bin/env python # coding: utf-8 # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') import torch from torch.autograd import Variable import torch.nn as nn import torch.nn.functional as F import torch.optim as optim import torchvision import torchvision.transforms as transforms import matplotlib.pyplot as plt import numpy as np # In[2]: #use_gpu = torch.cuda.is_available() # # Function area # Parameters summary function come from stackoverflow：
# https://stackoverflow.com/questions/42480111/model-summary-in-pytorch # # ''' # Note1.
# def _addindent(s_, numSpaces): # s = s_.split('\n') # # dont do anything for single-line stuff # if len(s) == 1: # return s_ # first = s.pop(0) # s = [(numSpaces * ' ') + line for line in s] # s = '\n'.join(s) # s = first + '\n' + s # return s # # Note2. np.prod: Return the product of array elements over a given axis. # ''' # In[3]: # Function 1 from torch.nn.modules.module import _addindent def torch_summarize(model, show_weights=True, show_parameters=True): """Summarizes torch model by showing trainable parameters and weights.""" total_params = 0 tmpstr = model.__class__.__name__ + ' :\n' #Pring the object name for key, module in model._modules.items(): # if it contains layers let call it recursively to get params and weights if type(module) in [ torch.nn.modules.container.Container, torch.nn.modules.container.Sequential ]: modstr = torch_summarize(module) else: modstr = module.__repr__() modstr = _addindent(modstr, 2) # Note1 params = sum([np.prod(p.size()) for p in module.parameters()]) # Note2 weights = tuple([tuple(p.size()) for p in module.parameters()]) total_params = total_params + params tmpstr += ' (' + key + '): ' + modstr if show_weights: tmpstr += ', weights={}'.format(weights) if show_parameters: tmpstr += ', parameters={}'.format(params) tmpstr += '\n------------------------------------------------------\n' tmpstr = tmpstr + ')' tmpstr = tmpstr + ' \n##Total Parameters = {} '.format(total_params) return tmpstr # Function 2 def show_batch(batch): '''Show image of one batch''' im = torchvision.utils.make_grid(batch) plt.imshow(np.transpose(im.numpy(), (1, 2, 0))) # # Load MNIST Data # In[4]: BATCH_SIZE = 200 # setting batch size transform = transforms.ToTensor() # Transform them to tensors # Load and transform data trainset = torchvision.datasets.MNIST('./mnist', train=True, download=True, transform=transform) #testset = torchvision.datasets.MNIST('./mnist', train=False, download=True, transform=transform) trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2) #testloader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2) # In[5]: dataiter = iter(trainloader) images, labels = dataiter.next() #print('Labels: ', list(labels)) print('Batch shape: ', images.size()) show_batch(images) # In[ ]: # # Build Shallow Model # How to get the size of feature map:
# Output H = 1 + (input H + 2Panding - Filter H)/Stride
# Output W = 1 + (input W + 2Panding - Filter W)/Stride
# In[10]: class Shallow_model(nn.Module): def __init__(self): super(Shallow_model, self).__init__() self.layer1 = nn.Sequential( nn.Conv2d(in_channels=1, out_channels=105, kernel_size=3, stride=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2) ) # fully connected layer self.fc1 = nn.Sequential( nn.Linear(in_features=13*13*105, out_features=10)) def forward(self, x): out = self.layer1(x) out = out.view(out.size(0), -1) out = self.fc1(out) return F.log_softmax(out) #last layer is softmax layer # In[11]: # Setting the model & loss function & optimizer SNet = Shallow_model() loss_func = nn.CrossEntropyLoss(size_average=False) optimizer = torch.optim.Adam(SNet.parameters(), lr=1e-3, betas=(0.9, 0.99)) # In[12]: print(torch_summarize(SNet)) # In[13]: # Running Model loss_old = 0.01 check_time = 0 num_of_epoch = 10 iters = 0 for epoch in range(num_of_epoch): correct = 0 for i, (images, labels) in enumerate(trainloader): # convert tensor to Variable images = Variable(images) labels = Variable(labels) # clear gradients w.r.t parameters optimizer.zero_grad() # forward pass outputs = SNet(images) # calculate loss loss = loss_func(outputs, labels) # get gradient w.r.t parameters loss.backward() # update parameters optimizer.step() iters += 1 # Accuracy (each batch) pred = outputs.data.max(1)[1] # get the index of the max log-probability correct += pred.eq(labels.data).cpu().sum() Acc = correct / ((i+1)*BATCH_SIZE) * 100 if iters % 20 == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}, Accumulation batch acc={:.1f}%'.format( epoch+1, i * len(images), len(trainloader.dataset), 100. * i / len(trainloader), loss.data[0], Acc)) if Acc >= 97: break # # Deep Model # How to get the size of feature map:
# Output H = 1 + (input H + 2Panding - Filter H)/Stride
# Output W = 1 + (input W + 2Panding - Filter W)/Stride
# In[14]: # Filter number of Conv layers fil = [30, 35, 30] class Deep_model(nn.Module): def __init__(self): super(Deep_model, self).__init__() self.layer1 = nn.Sequential( nn.Conv2d(in_channels=1, out_channels=fil[0], kernel_size=5, stride=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2) ) self.layer2 = nn.Sequential( nn.Conv2d(in_channels=fil[0], out_channels=fil[1], kernel_size=3, stride=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2) ) self.layer3 = nn.Sequential( nn.Conv2d(in_channels=fil[1], out_channels=fil[2], kernel_size=1, stride=1), nn.ReLU(inplace=True)) #nn.MaxPool2d(kernel_size=2, stride=2)) # fully connected layer self.fc1 = nn.Sequential( nn.Linear(in_features=5*5*fil[2], out_features=200), nn.ReLU(inplace=True)) self.fc2 = nn.Sequential( nn.Linear(in_features=200, out_features=80), nn.ReLU(inplace=True)) self.fc3 = nn.Sequential(nn.Linear(in_features=80, out_features=10)) def forward(self, x): out = self.layer3(self.layer2(self.layer1(x))) out = out.view(out.size(0), -1) out = self.fc3(self.fc2(self.fc1(out))) return F.log_softmax(out) # In[15]: # Setting the model & loss function & optimizer DNet = Deep_model() loss_func = nn.CrossEntropyLoss(size_average=False) optimizer = torch.optim.Adam(DNet.parameters(), lr=1e-3, betas=(0.9, 0.99)) # In[16]: print(torch_summarize(DNet)) # In[17]: iters = 0 for epoch in range(10): correct = 0 for i, (images, labels) in enumerate(trainloader): # convert tensor to Variable images = Variable(images) labels = Variable(labels) # clear gradients w.r.t parameters optimizer.zero_grad() # forward pass outputs = DNet(images) # calculate loss loss = loss_func(outputs, labels) # get gradient w.r.t parameters loss.backward() # update parameters optimizer.step() iters += 1 # Accuracy (each batch) pred = outputs.data.max(1)[1] # get the index of the max log-probability correct += pred.eq(labels.data).cpu().sum() Acc = correct / ((i+1)*BATCH_SIZE) * 100 if iters % 20 == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}, Accumulation batch acc={:.1f}%'.format( epoch+1, i * len(images), len(trainloader.dataset), 100. * i / len(trainloader), loss.data[0], Acc)) if Acc >= 97: break # In[ ]: # In[ ]: