%matplotlib inline
- ensures matplotlib will plot to output cell in notebook
%reload_ext autoreload, %autoreload 2
- automatically reloads modules if edited
%matplotlib inline
%reload_ext autoreload
%autoreload 2
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import sklearn
import matplotlib.pyplot as plt
torch.__version__
'1.0.1'
import fastai
from fastai.vision import *
from fastai.vision import learner
fastai.__version__
'1.0.50.post1'
path = untar_data(URLs.CIFAR, fname = '../../data/cifar');
path # output >>> PosixPath('/home/jupyter/.fastai/data/cifar10')
# dataset folder structure
path.ls() # output >>> ['test', 'labels.txt', 'train']
C:\Users\HYJ\Anaconda3\envs\py37fastai10\lib\site-packages\fastai\datasets.py:164: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details. with open(fpath, 'r') as yaml_file: return yaml.load(yaml_file)
[WindowsPath('C:/Users/HYJ/.fastai/data/cifar10/labels.txt'), WindowsPath('C:/Users/HYJ/.fastai/data/cifar10/models'), WindowsPath('C:/Users/HYJ/.fastai/data/cifar10/test'), WindowsPath('C:/Users/HYJ/.fastai/data/cifar10/train')]
train_data = path/'train'
train_data
WindowsPath('C:/Users/HYJ/.fastai/data/cifar10/train')
test_data = path/'test'
test_data
WindowsPath('C:/Users/HYJ/.fastai/data/cifar10/test')
bs = 64 # batch size
img_size = 24 # image size
# 随机旋转正负20度
tfms = [rotate(degrees=(-20,20)), symmetric_warp(magnitude=(-0.3,0.3))]
data = ImageDataBunch.from_folder(path, ds_tfms=tfms, valid='test', size=img_size, bs = bs,num_workers=0)
# normalising the dataset using the same normalisation applied to the imagenet dataset
data.normalize(imagenet_stats)
ImageDataBunch; Train: LabelList (50000 items) x: ImageList Image (3, 24, 24),Image (3, 24, 24),Image (3, 24, 24),Image (3, 24, 24),Image (3, 24, 24) y: CategoryList airplane,airplane,airplane,airplane,airplane Path: C:\Users\HYJ\.fastai\data\cifar10; Valid: LabelList (10000 items) x: ImageList Image (3, 24, 24),Image (3, 24, 24),Image (3, 24, 24),Image (3, 24, 24),Image (3, 24, 24) y: CategoryList airplane,airplane,airplane,airplane,airplane Path: C:\Users\HYJ\.fastai\data\cifar10; Test: None
ImageDataBunch.from_folder 设置valid
,其中valid_pct
字段表明valid占据10%的数据
new_data = ImageDataBunch.from_folder(train_data, ds_tfms=tfms, train=train_data, valid='valid', test=test_data,valid_pct=0.1,
size=img_size, bs = bs,num_workers=0)
# normalising the dataset using the same normalisation applied to the imagenet dataset
new_data.normalize(imagenet_stats)
ImageDataBunch; Train: LabelList (45000 items) x: ImageList Image (3, 24, 24),Image (3, 24, 24),Image (3, 24, 24),Image (3, 24, 24),Image (3, 24, 24) y: CategoryList airplane,airplane,airplane,airplane,airplane Path: C:\Users\HYJ\.fastai\data\cifar10\train; Valid: LabelList (5000 items) x: ImageList Image (3, 24, 24),Image (3, 24, 24),Image (3, 24, 24),Image (3, 24, 24),Image (3, 24, 24) y: CategoryList automobile,truck,ship,truck,airplane Path: C:\Users\HYJ\.fastai\data\cifar10\train; Test: LabelList (10000 items) x: ImageList Image (3, 24, 24),Image (3, 24, 24),Image (3, 24, 24),Image (3, 24, 24),Image (3, 24, 24) y: EmptyLabelList ,,,, Path: C:\Users\HYJ\.fastai\data\cifar10\train
print(data.classes)
len(data.classes),data.c
['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
(10, 10)
data.show_batch(rows=3, figsize=(5,5))
cnn_learner?
Signature: cnn_learner( data: fastai.basic_data.DataBunch, base_arch: Callable, cut: Union[int, Callable] = None, pretrained: bool = True, lin_ftrs: Union[Collection[int], NoneType] = None, ps: Union[float, Collection[float]] = 0.5, custom_head: Union[torch.nn.modules.module.Module, NoneType] = None, split_on: Union[Callable, Collection[Collection[torch.nn.modules.module.Module]], NoneType] = None, bn_final: bool = False, init=<function kaiming_normal_ at 0x00000197EB530620>, concat_pool: bool = True, **kwargs: Any, ) -> fastai.basic_train.Learner Docstring: Build convnet style learner. File: c:\users\hyj\anaconda3\envs\py37fastai10\lib\site-packages\fastai\vision\learner.py Type: function
learn1 = cnn_learner(data=data, base_arch=models.resnet34, metrics=error_rate)
learn1.lr_find()
LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.
# 画出学习率寻找曲线,给出建议学习率
learn1.recorder.plot(suggestion=True)
Min numerical gradient: 1.10E-02
learn1.fit_one_cycle
原有默认学习率为 0.003
# 根据学习率曲线得到max_lr,开始训练
learn1.fit_one_cycle(cyc_len=15, max_lr=1.10E-02)
epoch | train_loss | valid_loss | error_rate | time |
---|---|---|---|---|
0 | 1.390306 | 1.336829 | 0.473900 | 03:08 |
1 | 1.129087 | 1.167297 | 0.406900 | 02:57 |
2 | 1.107678 | 1.197931 | 0.419900 | 02:58 |
3 | 1.095819 | 1.120940 | 0.386100 | 02:57 |
4 | 1.060097 | 1.069762 | 0.371900 | 02:57 |
5 | 0.988918 | 1.139213 | 0.381800 | 02:51 |
6 | 1.017678 | 1.035485 | 0.364300 | 02:54 |
7 | 0.928527 | 0.997322 | 0.347800 | 02:52 |
8 | 0.942945 | 0.951707 | 0.335600 | 03:49 |
9 | 0.902082 | 0.961064 | 0.327800 | 03:52 |
10 | 0.860575 | 0.934616 | 0.323000 | 03:45 |
11 | 0.832780 | 0.935875 | 0.309200 | 04:03 |
12 | 0.797015 | 0.882177 | 0.308100 | 03:59 |
13 | 0.763474 | 0.864752 | 0.303900 | 03:56 |
14 | 0.767370 | 0.882639 | 0.306900 | 03:56 |
learn1.save('res34-stage-1') # saving the model weights
learn1.load('res34-stage-1') # loading the model weights
Learner(data=ImageDataBunch; Train: LabelList (50000 items) x: ImageList Image (3, 24, 24),Image (3, 24, 24),Image (3, 24, 24),Image (3, 24, 24),Image (3, 24, 24) y: CategoryList airplane,airplane,airplane,airplane,airplane Path: C:\Users\HYJ\.fastai\data\cifar10; Valid: LabelList (10000 items) x: ImageList Image (3, 24, 24),Image (3, 24, 24),Image (3, 24, 24),Image (3, 24, 24),Image (3, 24, 24) y: CategoryList airplane,airplane,airplane,airplane,airplane Path: C:\Users\HYJ\.fastai\data\cifar10; Test: None, model=Sequential( (0): Sequential( (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False) (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): ReLU(inplace) (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False) (4): Sequential( (0): BasicBlock( (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): BasicBlock( (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): BasicBlock( (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (5): Sequential( (0): BasicBlock( (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (downsample): Sequential( (0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False) (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (1): BasicBlock( (conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): BasicBlock( (conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (3): BasicBlock( (conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (6): Sequential( (0): BasicBlock( (conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (downsample): Sequential( (0): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2), bias=False) (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (1): BasicBlock( (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): BasicBlock( (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (3): BasicBlock( (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (4): BasicBlock( (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (5): BasicBlock( (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (7): Sequential( (0): BasicBlock( (conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (downsample): Sequential( (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False) (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (1): BasicBlock( (conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (2): BasicBlock( (conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) ) (1): Sequential( (0): AdaptiveConcatPool2d( (ap): AdaptiveAvgPool2d(output_size=1) (mp): AdaptiveMaxPool2d(output_size=1) ) (1): Flatten() (2): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (3): Dropout(p=0.25) (4): Linear(in_features=1024, out_features=512, bias=True) (5): ReLU(inplace) (6): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (7): Dropout(p=0.5) (8): Linear(in_features=512, out_features=10, bias=True) ) ), opt_func=functools.partial(<class 'torch.optim.adam.Adam'>, betas=(0.9, 0.99)), loss_func=FlattenedLoss of CrossEntropyLoss(), metrics=[<function error_rate at 0x000002A93F2CE6A8>], true_wd=True, bn_wd=True, wd=0.01, train_bn=True, path=WindowsPath('C:/Users/HYJ/.fastai/data/cifar10'), model_dir='models', callback_fns=[functools.partial(<class 'fastai.basic_train.Recorder'>, add_time=True)], callbacks=[], layer_groups=[Sequential( (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False) (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): ReLU(inplace) (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False) (4): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (6): ReLU(inplace) (7): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (8): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (9): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (10): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (11): ReLU(inplace) (12): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (13): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (14): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (15): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (16): ReLU(inplace) (17): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (18): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (19): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (20): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (21): ReLU(inplace) (22): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (23): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (24): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False) (25): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (26): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (27): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (28): ReLU(inplace) (29): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (30): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (31): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (32): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (33): ReLU(inplace) (34): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (35): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (36): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (37): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (38): ReLU(inplace) (39): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (40): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ), Sequential( (0): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): ReLU(inplace) (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (4): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (5): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2), bias=False) (6): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (7): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (8): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (9): ReLU(inplace) (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (11): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (13): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (14): ReLU(inplace) (15): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (16): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (17): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (18): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (19): ReLU(inplace) (20): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (21): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (22): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (23): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (24): ReLU(inplace) (25): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (26): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (27): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (28): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (29): ReLU(inplace) (30): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (31): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (32): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (33): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (34): ReLU(inplace) (35): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (36): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (37): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False) (38): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (39): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (40): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (41): ReLU(inplace) (42): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (43): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (44): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (45): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (46): ReLU(inplace) (47): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (48): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ), Sequential( (0): AdaptiveAvgPool2d(output_size=1) (1): AdaptiveMaxPool2d(output_size=1) (2): Flatten() (3): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (4): Dropout(p=0.25) (5): Linear(in_features=1024, out_features=512, bias=True) (6): ReLU(inplace) (7): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (8): Dropout(p=0.5) (9): Linear(in_features=512, out_features=10, bias=True) )], add_time=True)
learn1.unfreeze()
learn1.lr_find(end_lr=1)
LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.
learn1.recorder.plot()
learn1.fit_one_cycle(10, slice(2e-6, 1e-5))
epoch | train_loss | valid_loss | error_rate | time |
---|---|---|---|---|
0 | 0.782025 | 0.877774 | 0.305300 | 04:26 |
1 | 0.774211 | 0.903600 | 0.306200 | 04:08 |
2 | 0.756491 | 0.861392 | 0.299600 | 04:14 |
3 | 0.763104 | 0.858521 | 0.300700 | 04:10 |
4 | 0.742118 | 0.850643 | 0.295500 | 03:41 |
5 | 0.697710 | 0.842790 | 0.294400 | 03:47 |
6 | 0.704682 | 0.853747 | 0.300600 | 03:44 |
7 | 0.714265 | 0.835156 | 0.292200 | 03:42 |
8 | 0.717225 | 0.854354 | 0.297600 | 03:50 |
9 | 0.709184 | 0.836953 | 0.292000 | 03:44 |
learn1.save('res34-stage-2') # saving the model weights
interp = ClassificationInterpretation.from_learner(learn1)
interp.plot_top_losses(9, figsize=(9,6))
#interp.plot_confusion_matrix?
interp.plot_confusion_matrix(figsize=(6,6), dpi=150, cmap='cool')
interp.most_confused(min_val=40)
[('dog', 'cat', 226), ('cat', 'dog', 144), ('truck', 'automobile', 109), ('bird', 'deer', 105), ('horse', 'deer', 102), ('automobile', 'truck', 100), ('airplane', 'ship', 75), ('cat', 'bird', 70), ('frog', 'cat', 68), ('deer', 'bird', 66), ('bird', 'airplane', 65), ('cat', 'deer', 65), ('ship', 'airplane', 64), ('dog', 'bird', 61), ('deer', 'horse', 59), ('frog', 'bird', 59), ('dog', 'deer', 58), ('bird', 'cat', 57), ('cat', 'frog', 55), ('deer', 'cat', 54), ('frog', 'deer', 52), ('bird', 'dog', 50), ('horse', 'dog', 49), ('truck', 'airplane', 49), ('horse', 'cat', 46), ('dog', 'horse', 42)]
learn2 = cnn_learner(data=new_data, base_arch=models.resnet50, metrics=error_rate)
learn2.lr_find()
LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.
# 画出学习率寻找曲线,给出建议学习率
learn2.recorder.plot(suggestion=True)
Min numerical gradient: 9.12E-03
learn2.fit_one_cycle(cyc_len=15, max_lr=9.12E-03)
epoch | train_loss | valid_loss | error_rate | time |
---|---|---|---|---|
0 | 1.365069 | 1.357458 | 0.474200 | 03:27 |
1 | 1.102144 | 1.166417 | 0.406000 | 03:28 |
2 | 1.035258 | 1.033650 | 0.358600 | 03:30 |
3 | 1.011387 | 1.047086 | 0.366200 | 03:34 |
4 | 0.957564 | 0.993538 | 0.339400 | 03:31 |
5 | 0.951647 | 1.000391 | 0.347600 | 03:12 |
6 | 0.910214 | 1.002255 | 0.346000 | 03:15 |
7 | 0.866741 | 0.919583 | 0.308200 | 03:10 |
8 | 0.813545 | 0.858688 | 0.296600 | 03:11 |
9 | 0.759292 | 0.856918 | 0.304600 | 03:14 |
10 | 0.739882 | 0.845537 | 0.290200 | 03:23 |
11 | 0.692281 | 0.833295 | 0.282000 | 03:27 |
12 | 0.665312 | 0.946859 | 0.280800 | 03:28 |
13 | 0.618228 | 0.798325 | 0.270400 | 03:31 |
14 | 0.617501 | 0.790436 | 0.276400 | 03:26 |
更换网络,变换学习率,数据增广,使用更深的网络,变换新的学习率
learn2.save('resnet50-stage-1') # saving the model weights
learn2.load('resnet50-stage-1') # loading the model weights
Learner(data=ImageDataBunch; Train: LabelList (45000 items) x: ImageList Image (3, 24, 24),Image (3, 24, 24),Image (3, 24, 24),Image (3, 24, 24),Image (3, 24, 24) y: CategoryList airplane,airplane,airplane,airplane,airplane Path: C:\Users\HYJ\.fastai\data\cifar10\train; Valid: LabelList (5000 items) x: ImageList Image (3, 24, 24),Image (3, 24, 24),Image (3, 24, 24),Image (3, 24, 24),Image (3, 24, 24) y: CategoryList automobile,truck,ship,truck,airplane Path: C:\Users\HYJ\.fastai\data\cifar10\train; Test: LabelList (10000 items) x: ImageList Image (3, 24, 24),Image (3, 24, 24),Image (3, 24, 24),Image (3, 24, 24),Image (3, 24, 24) y: EmptyLabelList ,,,, Path: C:\Users\HYJ\.fastai\data\cifar10\train, model=Sequential( (0): Sequential( (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False) (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): ReLU(inplace) (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False) (4): Sequential( (0): Bottleneck( (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) (downsample): Sequential( (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (1): Bottleneck( (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (2): Bottleneck( (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) ) (5): Sequential( (0): Bottleneck( (conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) (downsample): Sequential( (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False) (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (1): Bottleneck( (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (2): Bottleneck( (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (3): Bottleneck( (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) ) (6): Sequential( (0): Bottleneck( (conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) (downsample): Sequential( (0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False) (1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (1): Bottleneck( (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (2): Bottleneck( (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (3): Bottleneck( (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (4): Bottleneck( (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (5): Bottleneck( (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) ) (7): Sequential( (0): Bottleneck( (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) (downsample): Sequential( (0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False) (1): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (1): Bottleneck( (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) (2): Bottleneck( (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace) ) ) ) (1): Sequential( (0): AdaptiveConcatPool2d( (ap): AdaptiveAvgPool2d(output_size=1) (mp): AdaptiveMaxPool2d(output_size=1) ) (1): Flatten() (2): BatchNorm1d(4096, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (3): Dropout(p=0.25) (4): Linear(in_features=4096, out_features=512, bias=True) (5): ReLU(inplace) (6): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (7): Dropout(p=0.5) (8): Linear(in_features=512, out_features=10, bias=True) ) ), opt_func=functools.partial(<class 'torch.optim.adam.Adam'>, betas=(0.9, 0.99)), loss_func=FlattenedLoss of CrossEntropyLoss(), metrics=[<function error_rate at 0x000002A93F2CE6A8>], true_wd=True, bn_wd=True, wd=0.01, train_bn=True, path=WindowsPath('C:/Users/HYJ/.fastai/data/cifar10/train'), model_dir='models', callback_fns=[functools.partial(<class 'fastai.basic_train.Recorder'>, add_time=True)], callbacks=[], layer_groups=[Sequential( (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False) (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): ReLU(inplace) (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False) (4): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (6): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (7): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (8): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (9): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (10): ReLU(inplace) (11): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (12): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (13): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) (14): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (15): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (16): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (17): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (18): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (19): ReLU(inplace) (20): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) (21): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (22): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (23): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (24): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (25): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (26): ReLU(inplace) (27): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (28): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (29): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (30): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (31): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (32): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (33): ReLU(inplace) (34): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False) (35): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (36): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (37): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (38): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (39): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (40): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (41): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (42): ReLU(inplace) (43): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (44): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (45): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (46): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (47): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (48): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (49): ReLU(inplace) (50): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (51): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (52): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (53): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (54): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (55): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (56): ReLU(inplace) ), Sequential( (0): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (4): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (5): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (6): ReLU(inplace) (7): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False) (8): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (9): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (10): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (11): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (12): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (13): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (14): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (15): ReLU(inplace) (16): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (17): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (18): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (19): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (20): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (21): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (22): ReLU(inplace) (23): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (24): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (25): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (26): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (27): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (28): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (29): ReLU(inplace) (30): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (31): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (32): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (33): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (34): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (35): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (36): ReLU(inplace) (37): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (38): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (39): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (40): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (41): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (42): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (43): ReLU(inplace) (44): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (45): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (46): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (47): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (48): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False) (49): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (50): ReLU(inplace) (51): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False) (52): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (53): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (54): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (55): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (56): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (57): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False) (58): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (59): ReLU(inplace) (60): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (61): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (62): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (63): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (64): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False) (65): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (66): ReLU(inplace) ), Sequential( (0): AdaptiveAvgPool2d(output_size=1) (1): AdaptiveMaxPool2d(output_size=1) (2): Flatten() (3): BatchNorm1d(4096, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (4): Dropout(p=0.25) (5): Linear(in_features=4096, out_features=512, bias=True) (6): ReLU(inplace) (7): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (8): Dropout(p=0.5) (9): Linear(in_features=512, out_features=10, bias=True) )], add_time=True)
learn2.unfreeze()
learn2.lr_find(end_lr=1e-3)
LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.
learn2.recorder.plot(suggestion=True)
Min numerical gradient: 1.74E-06
learn2.fit_one_cycle(15, slice(1e-6, 5e-6))
epoch | train_loss | valid_loss | error_rate | time |
---|---|---|---|---|
0 | 0.618872 | 0.797404 | 0.273800 | 03:53 |
1 | 0.623353 | 0.803663 | 0.274400 | 03:55 |
2 | 0.602284 | 0.818999 | 0.273000 | 03:54 |
3 | 0.614965 | 0.904788 | 0.273600 | 03:59 |
4 | 0.610566 | 0.771489 | 0.270600 | 03:52 |
5 | 0.599383 | 0.782821 | 0.269200 | 03:51 |
6 | 0.589775 | 0.789079 | 0.273400 | 03:51 |
7 | 0.592902 | 0.765868 | 0.271400 | 03:50 |
8 | 0.578316 | 0.796248 | 0.266200 | 03:51 |
9 | 0.569336 | 0.768856 | 0.262200 | 03:53 |
10 | 0.577416 | 0.872496 | 0.264600 | 03:56 |
11 | 0.571700 | 0.756352 | 0.260800 | 04:03 |
12 | 0.566952 | 0.784240 | 0.265600 | 04:05 |
13 | 0.560834 | 0.768550 | 0.265400 | 04:11 |
14 | 0.561714 | 0.833160 | 0.271200 | 04:09 |
learn2.save('resnet50-stage-2') # saving the model weights
interp2 = ClassificationInterpretation.from_learner(learn2)
interp2.plot_top_losses(9, figsize=(15,11))
interp.most_confused(min_val=40)
[('dog', 'cat', 226), ('cat', 'dog', 144), ('truck', 'automobile', 109), ('bird', 'deer', 105), ('horse', 'deer', 102), ('automobile', 'truck', 100), ('airplane', 'ship', 75), ('cat', 'bird', 70), ('frog', 'cat', 68), ('deer', 'bird', 66), ('bird', 'airplane', 65), ('cat', 'deer', 65), ('ship', 'airplane', 64), ('dog', 'bird', 61), ('deer', 'horse', 59), ('frog', 'bird', 59), ('dog', 'deer', 58), ('bird', 'cat', 57), ('cat', 'frog', 55), ('deer', 'cat', 54), ('frog', 'deer', 52), ('bird', 'dog', 50), ('horse', 'dog', 49), ('truck', 'airplane', 49), ('horse', 'cat', 46), ('dog', 'horse', 42)]