%matplotlib inline
import torch
import torch.nn as nn
# Warmup:
# We will explore how nn.Linear layers work by passing it a
# random tensor as input (tensor with random values)
# Predicting the size of the output is a useful exercise to see
# if we really understand what the layer is doing
layer = nn.Linear(in_features=100, out_features=30)
x = torch.randn(1, 100) # tensor consisting of random numbers
x.shape
torch.Size([1, 100])
y.shape # [1, 30]
torch.Size([1, 30])
x = torch.randn(5, 100) # changed 1 => 5
y = layer(x)
y.shape
torch.Size([5, 30])
x = torch.randn(5, 90) # changed 100 => 90
y = layer(x)
y.shape
--------------------------------------------------------------------------- RuntimeError Traceback (most recent call last) <ipython-input-54-213a4d91b660> in <module> 1 x = torch.randn(5, 90) # changed 100 => 90 ----> 2 y = layer(x) 3 y.shape ~/miniconda3/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs) 475 result = self._slow_forward(*input, **kwargs) 476 else: --> 477 result = self.forward(*input, **kwargs) 478 for hook in self._forward_hooks.values(): 479 hook_result = hook(self, input, result) ~/miniconda3/lib/python3.6/site-packages/torch/nn/modules/linear.py in forward(self, input) 53 54 def forward(self, input): ---> 55 return F.linear(input, self.weight, self.bias) 56 57 def extra_repr(self): ~/miniconda3/lib/python3.6/site-packages/torch/nn/functional.py in linear(input, weight, bias) 1022 if input.dim() == 2 and bias is not None: 1023 # fused op is marginally faster -> 1024 return torch.addmm(bias, input, weight.t()) 1025 1026 output = input.matmul(weight.t()) RuntimeError: size mismatch, m1: [5 x 90], m2: [100 x 30] at /Users/soumith/miniconda2/conda-bld/pytorch_1532623076075/work/aten/src/TH/generic/THTensorMath.cpp:2070
conv = nn.Conv2d(in_channels=3,
out_channels=7,
kernel_size=5)
conv_paramters = list(conv.parameters())
len(conv_paramters)
2
conv_paramters[0].shape # kernel
torch.Size([7, 3, 5, 5])
conv_paramters[1].shape # bias
torch.Size([7])
x = torch.randn(1, 3, 128, 128) # 128 pixel x 128 pixle coloured image
# batch size = 1
# This format is called the NCHW format
# N = number
# C = channel
# H = height
# W = width
conv = nn.Conv2d(in_channels=3,
out_channels=7,
kernel_size=5)
y = conv(x)
y.shape
torch.Size([1, 7, 124, 124])
x = torch.randn(16, 3, 128, 128) # change 1 => 16
y = conv(x)
y.shape
torch.Size([16, 7, 124, 124])
# add padding
conv = nn.Conv2d(in_channels=3,
out_channels=7,
kernel_size=5,
padding=1)
x = torch.randn(16, 3, 128, 128) # change 1 => 16
y = conv(x)
y.shape
torch.Size([16, 7, 126, 126])
# add padding
conv = nn.Conv2d(in_channels=3,
out_channels=7,
kernel_size=5,
padding=25)
x = torch.randn(16, 3, 128, 128) # change 1 => 16
y = conv(x)
y.shape
torch.Size([16, 7, 174, 174])
# add padding 3
conv = nn.Conv2d(in_channels=3,
out_channels=7,
kernel_size=5,
padding=3)
x = torch.randn(16, 3, 128, 128) # change 1 => 16
y = conv(x)
y.shape
torch.Size([16, 7, 130, 130])
# add padding 2
conv = nn.Conv2d(in_channels=3,
out_channels=7,
kernel_size=5,
padding=2)
x = torch.randn(16, 3, 128, 128) # change 1 => 16
y = conv(x)
y.shape
torch.Size([16, 7, 128, 128])
print("input:", x.shape)
print("output:", y.shape)
input: torch.Size([16, 3, 128, 128]) output: torch.Size([16, 7, 128, 128])
# make kernel_size bigger doesn't change the size of output
conv = nn.Conv2d(in_channels=3,
out_channels=7,
kernel_size=7,
padding=3)
x = torch.randn(16, 3, 128, 128) # change 1 => 16
y = conv(x)
print("input:", x.shape)
print("output:", y.shape)
input: torch.Size([16, 3, 128, 128]) output: torch.Size([16, 7, 128, 128])
pool_layer = nn.MaxPool2d(2, 2)
y_pooled = pool_layer(torch.relu(y))
y_pooled.shape
# batch size unchanged
# number of channels unchanged
# h/w divided by 2
torch.Size([16, 7, 64, 64])
# set stride = 2
conv = nn.Conv2d(in_channels=3,
out_channels=7,
kernel_size=7,
padding=3,
stride=2) # added stride
x = torch.randn(16, 3, 128, 128)
y = conv(x)
print("input:", x.shape)
print("output:", y.shape)
input: torch.Size([16, 3, 128, 128]) output: torch.Size([16, 7, 64, 64])
# alexnet
import torchvision.models
alexNet = torchvision.models.alexnet(pretrained=True)
alexNet
AlexNet( (features): Sequential( (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2)) (1): ReLU(inplace) (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False) (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2)) (4): ReLU(inplace) (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False) (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (7): ReLU(inplace) (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (9): ReLU(inplace) (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (11): ReLU(inplace) (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False) ) (classifier): Sequential( (0): Dropout(p=0.5) (1): Linear(in_features=9216, out_features=4096, bias=True) (2): ReLU(inplace) (3): Dropout(p=0.5) (4): Linear(in_features=4096, out_features=4096, bias=True) (5): ReLU(inplace) (6): Linear(in_features=4096, out_features=1000, bias=True) ) )
alexNet.features(x).shape
torch.Size([16, 256, 3, 3])
# lenet verification
# add padding
conv = nn.Conv2d(in_channels=1,
out_channels=1,
kernel_size=5,
padding=0)
x = torch.randn(1, 1, 32, 32)
y = conv(x)
y.shape
torch.Size([1, 1, 28, 28])