For the purpose of this task we will be using PASCAL VOC datset. The dataset contains a total of 2913 images with segmentation annotations. Code in the cell below will download the code and extract the dataset.
!wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
!tar -xvf VOCtrainval_11-May-2012.tar
!easy_install pip
!pip install pillow>=4.3.0
!pip install scipy==1.1.0
WARNING: The easy_install command is deprecated and will be removed in a future version. Searching for pip Best match: pip 20.1.1 Adding pip 20.1.1 to easy-install.pth file Installing pip script to /opt/conda/bin Installing pip3 script to /opt/conda/bin Installing pip3.6 script to /opt/conda/bin Using /opt/conda/lib/python3.6/site-packages Processing dependencies for pip Finished processing dependencies for pip Collecting scipy==1.1.0 Downloading scipy-1.1.0-cp36-cp36m-manylinux1_x86_64.whl (31.2 MB) |████████████████████████████████| 31.2 MB 8.9 MB/s eta 0:00:01 |███▊ | 3.7 MB 8.9 MB/s eta 0:00:04 Requirement already satisfied: numpy>=1.8.2 in /opt/conda/lib/python3.6/site-packages (from scipy==1.1.0) (1.18.5) Installing collected packages: scipy Attempting uninstall: scipy Found existing installation: scipy 1.5.0 Uninstalling scipy-1.5.0: Successfully uninstalled scipy-1.5.0 Successfully installed scipy-1.1.0
import os
import copy
import random
import torch
import imageio
import numpy as np
import torch.optim as optim
from os.path import join as pjoin
# import pascalVOCDataset
from datasets.pascalvoc import pascalVOCDataset
In this section you have the freedom to decide your own model. Keep in mind though, to perform image segmentation, you need to implement an architecture that does pixel level classification i.e. for each pixel in the image you need to predict the probability of it belonging to one of the 21 categories.
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
class SimpleSegNet(nn.Module):
""" SimpleSegNet network """
def __init__(self):
super(SimpleSegNet, self).__init__()
self.encoder = models.vgg16(pretrained = True, progress=False).features
self.conv1 = nn.Conv2d(512, 21, 1)
self.convtrans = nn.ConvTranspose2d(21, 21, 62, stride = 30)
def forward(self, x):
#define the forward pass
x = self.encoder(x)
x = self.conv1(x)
x = self.convtrans(x)
return x
os.environ["CUDA_VISIBLE_DEVICES"]="2"
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device
device(type='cuda')
model = SimpleSegNet().to(device)
Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/checkpoints/vgg16-397923af.pth
Define all the hyperparameters(not restricted to the three given below) that you find useful here.
# reproducibility setups
torch.manual_seed(253452757)
random.seed(253452757)
np.random.seed(253452757)
local_path = 'VOCdevkit/VOC2012/' # modify it according to your device
bs = 10
epochs = 30
learning_rate = 0.01
sgd_momentum = 0.9
with_weights = True
Create the dataset using pascalVOCDataset class defined above. Use local_path defined in the cell above as root.
# dataloader variable
train_data = pascalVOCDataset(local_path, split="train")
train_data_raw = pascalVOCDataset(local_path, split="train", is_transform=False)
val_data = pascalVOCDataset(local_path, split="val")
dataset_sizes = {
'train' : len(train_data),
'val': len(val_data),
'total': len(train_data) + len(val_data)
}
dataloaders = {
'train' : torch.utils.data.DataLoader(train_data, batch_size=bs, shuffle=True, num_workers=0),
'val': torch.utils.data.DataLoader(val_data, batch_size=bs, shuffle=True, num_workers=0)
}
dataset_sizes
{'train': 1464, 'val': 1449, 'total': 2913}
Define below with the loss function you think would be most suitable for segmentation task. You are free to choose any optimizer to train the network.
from utils import median_frequency_balancing
# compute the weights of each class to account for the class imbalance
if with_weights:
weights = torch.from_numpy(median_frequency_balancing(train_data, 21)).float()
else:
weights = None
# loss function
if with_weights:
loss_f = nn.CrossEntropyLoss(weight=weights.to(device))
else:
loss_f = nn.CrossEntropyLoss()
# optimizer variable
opt = optim.SGD(model.parameters(), lr=learning_rate, momentum=sgd_momentum)
Your task here is to complete the code below to perform a training loop and save the model weights after each epoch of training.
from utils import plot_metric
from training import train
model_path = "models/simple-segnet"
if not os.path.isdir(model_path):
os.makedirs(model_path)
model = train(model, dataloaders, dataset_sizes, model_path, loss_f, opt, epochs)
losses = []
for epoch in range(epochs):
checkpoint = torch.load(pjoin(model_path, "epoch-{}.pt".format(epoch)))
losses.append(checkpoint['loss'])
plot_metric(losses, "Loss")
In this section you have to implement the evaluation metrics for your model. Calculate the values of F1-score, dice coefficient and AUC-ROC score on the data you used for training. You can use external packages like scikit-learn to compute above metrics.
from evaluation import EvaluationReport
Since the F1-score and the Dice's coefficient are equivalent, we have included Jaccard's similarity, also denoted as intersection-over-union (IoU) metric instead of Dice's coefficient.
for epoch in range(epochs):
print('Evaluating Epoch {}/{}'.format(epoch, epochs - 1))
# load model
checkpoint = torch.load(pjoin(model_path, "epoch-{}.pt".format(epoch)))
model.load_state_dict(checkpoint['model_state_dict'])
# create evaluation report from model
eval_report = EvaluationReport.from_model(dataloaders['train'], model,list(range(21)))
# compute the desired metrics
checkpoint['f1-score'] = eval_report.f1_score(average="macro")
checkpoint['jaccard-sim'] = eval_report.jaccard_similarity(average="macro")
# save checkpoint with metrics
torch.save(checkpoint, pjoin(model_path, "epoch-{}.pt".format(epoch)))
f1_scores = []
jaccard_similarities = []
roc_auc_scores = []
for epoch in range(epochs):
checkpoint = torch.load(pjoin(model_path, "epoch-{}.pt".format(epoch)))
f1_scores.append(checkpoint['f1-score'])
jaccard_similarities.append(checkpoint['jaccard-sim'])
In section 1.6 we saved the weights of the model after each epoch. In this section, you have to calculate the evaluation metrics after each epoch of training by loading the weights for each epoch. Once you have calculated the evaluation metrics for each epoch, plot them against the epochs.
from utils import plot_metric
plot_metric(f1_scores, "F1-score", "g")
print("Maximal F1-score = {} (epoch {})".format(max(f1_scores), np.argmax(f1_scores)))
Maximal F1-score = 0.4655 (epoch 24)
plot_metric(jaccard_similarities, "Jaccard's similarity")
print("Maximal Jaccard's similarity = {} (epoch {})".format(max(jaccard_similarities), np.argmax(jaccard_similarities)))
Maximal Jaccard's similarity = 0.3205 (epoch 24)
def get_batched_predictions(model, dataloader):
""" Creates lists of predictions and true values by batch """
true_list = []
pred_list = []
model = model.to(device)
model.eval()
with torch.no_grad():
for inputs, ground_truths in dataloaders["val"]:
inputs = inputs.to(device)
outputs = model(inputs)
pred_list.append(outputs.cpu())
true_list.append(ground_truths.cpu())
return pred_list, true_list
def roc_aoc_transform(preds, trues):
""" Transforms predictions and true tensors into probabilities
and one hot encoded tensors """
preds_s = []
true_oh = []
softmax = nn.Softmax(dim = 1)
for i in range(len(preds)):
preds_s.append(softmax(preds[i]).permute(1,0,2,3))
true_oh.append(F.one_hot(trues[i], num_classes=21).permute(3,0,1,2))
return preds_s, true_oh
def roc_auc(preds, trues):
""" Calculates roc auc per batch """
metric = torchmetrics.AUROC(num_classes=21)
preds, trues = roc_aoc_transform(preds, trues)
auc_roc = 0
for i in range(len(preds)):
pred = torch.flatten(preds[i], start_dim=1)
true = torch.flatten(trues[i], start_dim=1)
auc = metric(pred, true).cpu()
auc_roc = auc_roc + auc
print(f"Accuracy on batch {i}: {auc}")
return auc_roc/len(preds)
pred_list, true_list = get_batched_predictions(model, dataloaders["val"])
score = roc_auc(pred_list, true_list)
score
For any 10 images in the dataset, show the images along the with their segmentation mask.
from utils import plot_seg_results
from torchvision import transforms
Select the best model with respect to its Jaccard's similarity / mIoU
best_epoch = np.argmax(jaccard_similarities)
best_epoch
24
Load the best model's weight:
checkpoint = torch.load(pjoin(model_path, "epoch-{}.pt".format(best_epoch)))
model.load_state_dict(checkpoint['model_state_dict'])
<All keys matched successfully>
Select 10 images and predict their segmentation mask using the best weights:
init_pic = 49
n_pics = 10
inputs = torch.stack([train_data[i][0] for i in range(init_pic, init_pic + n_pics)])
images = [train_data_raw[i][0] for i in range(init_pic, init_pic + n_pics)]
ground_truths = torch.stack([train_data[i][1] for i in range(init_pic, init_pic + n_pics)])
outputs = model(inputs.to(device))
predictions = torch.argmax(outputs.squeeze().cpu(), dim = 1)
#images = [img.permute(1, 2, 0).numpy() for img in images]
predictions = [train_data.decode_segmap(pred.numpy()) for pred in predictions]
ground_truths = [train_data.decode_segmap(gt.numpy()) for gt in ground_truths]
plot_seg_results(images, ground_truths, predictions)