今天我们要解决的问题是通过迁移学习训练一个模型来实现蚂蚁和蜜蜂的分类。如果从头开始训练的话,这是一个非常小的数据集,就算做了数据增强也难以达到很好的效果。因此我们引入迁移学习的方法,采用在 ImageNet 上训练过的 resnet18 作为我们的预训练模型。
imagenet数据集三通道的均值和标准差分别是:$[0.485, 0.456, 0.406],[0.229, 0.224, 0.225]$。
该数据集是imagenet非常小的一个子集。只包含蚂蚁和蜜蜂两类。
所以数据标准化Normalize的时候我们也继承使用imagenet的均值和标准差。
种类 | 训练集 | 验证集 |
---|---|---|
蚂蚁 | 123 | 70 |
蜜蜂 | 121 | 83 |
总计 | 244 | 153 |
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.optim import lr_scheduler
import torchvision
from torchvision import datasets, models, transforms
import numpy as np
import matplotlib.pyplot as plt
import os
import time
import copy
data_transforms = {
'train': transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
'val': transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
}
datasets_path = './dataset'
image_datasets = {
x: datasets.ImageFolder(
root=os.path.join('./dataset', x),
transform=data_transforms[x]
) for x in ['train', 'val']
}
--------------------------------------------------------------------------- FileNotFoundError Traceback (most recent call last) <ipython-input-3-b87ab53d7868> in <module>() 3 root=os.path.join('./dataset', x), 4 transform=data_transforms[x] ----> 5 ) for x in ['train', 'val'] 6 } <ipython-input-3-b87ab53d7868> in <dictcomp>(.0) 3 root=os.path.join('./dataset', x), 4 transform=data_transforms[x] ----> 5 ) for x in ['train', 'val'] 6 } /usr/local/lib/python3.7/dist-packages/torchvision/datasets/folder.py in __init__(self, root, transform, target_transform, loader, is_valid_file) 254 transform=transform, 255 target_transform=target_transform, --> 256 is_valid_file=is_valid_file) 257 self.imgs = self.samples /usr/local/lib/python3.7/dist-packages/torchvision/datasets/folder.py in __init__(self, root, loader, extensions, transform, target_transform, is_valid_file) 124 super(DatasetFolder, self).__init__(root, transform=transform, 125 target_transform=target_transform) --> 126 classes, class_to_idx = self._find_classes(self.root) 127 samples = self.make_dataset(self.root, class_to_idx, extensions, is_valid_file) 128 if len(samples) == 0: /usr/local/lib/python3.7/dist-packages/torchvision/datasets/folder.py in _find_classes(self, dir) 162 No class is a subdirectory of another. 163 """ --> 164 classes = [d.name for d in os.scandir(dir) if d.is_dir()] 165 classes.sort() 166 class_to_idx = {cls_name: i for i, cls_name in enumerate(classes)} FileNotFoundError: [Errno 2] No such file or directory: './dataset/train'
dataloaders = {
x: DataLoader(
dataset=image_datasets[x],
batch_size=4,
shuffle=True,
num_workers=0
) for x in ['train', 'val']
}
inputs, labels = next(iter(dataloaders['train']))
grid_images = torchvision.utils.make_grid(inputs)
def no_normalize(im):
im = im.permute(1, 2, 0)
im = im*torch.Tensor([0.229, 0.224, 0.225])+torch.Tensor([0.485, 0.456, 0.406])
return im
grid_images = no_normalize(grid_images)
plt.title([class_names[x] for x in labels])
plt.imshow(grid_images)
plt.show()
之前提到过,迁移学习有两种常见的方法,我们就简单的称之为参数微调和特征提取吧。下面,我们将分别使用这两种方法来训练我们的模型,最后再进行对比分析。两种方法用同一个函数训练,只不过传的参数不同。公用的训练函数如下:
def train_model(model, criterion, optimizer, scheduler, num_epochs=10):
t1 = time.time()
best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0
for epoch in range(num_epochs):
lr = optimizer.param_groups[0]['lr']
print(
f'EPOCH: {epoch+1:0>{len(str(num_epochs))}}/{num_epochs}',
f'LR: {lr:.4f}',
end=' '
)
# 每轮都需要训练和评估
for phase in ['train', 'val']:
if phase == 'train':
model.train() # 将模型设置为训练模式
else:
model.eval() # 将模型设置为评估模式
running_loss = 0.0
running_corrects = 0
# 遍历数据
for inputs, labels in dataloaders[phase]:
inputs = inputs.to(device)
labels = labels.to(device)
# 梯度归零
optimizer.zero_grad()
# 前向传播
with torch.set_grad_enabled(phase == 'train'):
outputs = model(inputs)
preds = outputs.argmax(1)
loss = criterion(outputs, labels)
# 反向传播+参数更新
if phase == 'train':
loss.backward()
optimizer.step()
# 统计
running_loss += loss.item() * inputs.size(0)
running_corrects += (preds == labels.data).sum()
if phase == 'train':
# 调整学习率
scheduler.step()
epoch_loss = running_loss / dataset_sizes[phase]
epoch_acc = running_corrects.double() / dataset_sizes[phase]
# 打印训练过程
if phase == 'train':
print(
f'LOSS: {epoch_loss:.4f}',
f'ACC: {epoch_acc:.4f} ',
end=' '
)
else:
print(
f'VAL-LOSS: {epoch_loss:.4f}',
f'VAL-ACC: {epoch_acc:.4f} ',
end='\n'
)
# 深度拷贝模型参数
if phase == 'val' and epoch_acc > best_acc:
best_acc = epoch_acc
best_model_wts = copy.deepcopy(model.state_dict())
t2 = time.time()
total_time = t2-t1
print('-'*10)
print(
f'TOTAL-TIME: {total_time//60:.0f}m{total_time%60:.0f}s',
f'BEST-VAL-ACC: {best_acc:.4f}'
)
# 加载最佳的模型权重
model.load_state_dict(best_model_wts)
return model
该方法使用预训练的参数来初始化我们的网络模型,修改全连接层后再训练所有层。
# 加载预训练模型
model_ft = models.resnet18(pretrained=True)
# 获取resnet18的全连接层的输入特征数
num_ftrs = model_ft.fc.in_features
# 调整全连接层的输出特征数为2
model_ft.fc = nn.Linear(num_ftrs, len(class_names))
# 将模型放到GPU/CPU
model_ft = model_ft.to(device)
# 定义损失函数
criterion = nn.CrossEntropyLoss()
# 选择优化器
optimizer_ft = optim.SGD(model_ft.parameters(), lr=1e-3, momentum=0.9)
# 定义优化器器调整策略,每5轮后学习率下调0.1个乘法因子
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=5, gamma=0.1)
# 调用训练函数训练
model_ft = train_model(
model_ft,
criterion,
optimizer_ft,
exp_lr_scheduler,
num_epochs=10
)
该方法冻结除全连接层外的所有层的权重,修改全连接层后仅训练全连接层。
# 加载预训练模型
model_conv = models.resnet18(pretrained=True)
# 冻结除全连接层外的所有层, 使其梯度不会在反向传播中计算
for param in model_conv.parameters():
param.requires_grad = False
# 获取resnet18的全连接层的输入特征数
num_ftrs = model_conv.fc.in_features
# 调整全连接层的输出特征数为2
model_conv.fc = nn.Linear(num_ftrs, 2)
# 将模型放到GPU/CPU
model_conv = model_conv.to(device)
# 定义损失函数
criterion = nn.CrossEntropyLoss()
# 选择优化器, 只传全连接层的参数
optimizer_conv = optim.SGD(model_conv.fc.parameters(), lr=1e-3, momentum=0.9)
# 定义优化器器调整策略,每5轮后学习率下调0.1个乘法因子
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=5, gamma=0.1)
# 调用训练函数训练
model_conv = train_model(
model_conv,
criterion,
optimizer_conv,
exp_lr_scheduler,
num_epochs=10
)
def visualize_model(model):
model.eval()
with torch.no_grad():
inputs, labels = next(iter(dataloaders['val']))
inputs = inputs.to(device)
labels = labels.to(device)
outputs = model(inputs)
preds = outputs.argmax(1)
plt.figure(figsize=(9, 9))
for i in range(inputs.size(0)):
plt.subplot(2,2,i+1)
plt.axis('off')
plt.title(f'pred: {class_names[preds[i]]}|true: {class_names[labels[i]]}')
im = no_normalize(inputs[i].cpu())
plt.imshow(im)
plt.savefig('train.jpg')
plt.show()
torch.save(model_conv.state_dict(), 'model.pt')
device = torch.device('cpu')
model = models.resnet18(pretrained=False)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, len(class_names))
model.load_state_dict(torch.load('model.pt', map_location=device))
百度或必应图片中随便找几张张蚂蚁和蜜蜂的图片,或者用手机拍几张照片也行。用上一步加载的模型测试一下分类的效果。