Still trying to figure out various ways of separating buildings from each other, but in the meantime, can we improve the raw segmentation results with better loss functions?
Some examples are
or better image augmentations
or weighted edges
or Test time augmentation
from fastai.imports import *
from fastai.vision import *
from fastai.metrics import dice
from fastai.callbacks import *
import tifffile as tiff
from skimage.external import tifffile as sktif
from joblib import Parallel, delayed
import torch.nn.functional as F
import torch
import functools, traceback
def gpu_mem_restore(func):
"Reclaim GPU RAM if CUDA out of memory happened, or execution was interrupted"
@functools.wraps(func)
def wrapper(*args, **kwargs):
try:
return func(*args, **kwargs)
except:
type, val, tb = sys.exc_info()
traceback.clear_frames(tb)
raise type(val).with_traceback(tb) from None
return wrapper
os.environ['FASTAI_TB_CLEAR_FRAMES']="1"
from importlib import reload
from lovasz_losses import *
data_dir = Path("/media/wwymak/Storage/urban-3D-satellite")
train_dir = data_dir / "training"
cropped_dir = data_dir / "cropped_training"
cropped_val_dir = data_dir / "cropped_validation"
def get_img_id(fname):
fname = fname.replace('_RGB.tif', '')
img_id = "_".join(fname.split('_')[:-1])
return img_id
train_img_ids = [get_img_id(f.name) for f in cropped_dir.ls() if f.name.endswith('RGB.tif')]
val_img_ids = [get_img_id(f.name) for f in cropped_val_dir.ls() if f.name.endswith('RGB.tif')]
train_cropped_imgs = [f.name for f in (data_dir / "cropped_training").ls() if f.name.endswith('RGB.tif')]
valid_cropped_imgs = [f.name for f in (data_dir / "cropped_validation").ls() if f.name.endswith('RGB.tif')]
dataset_df = pd.DataFrame({"name":[f"cropped_training/{f}" for f in train_cropped_imgs] \
+ [f"cropped_validation/{f}" for f in valid_cropped_imgs],
"label": [f"{str(data_dir)}/cropped_training/{f.replace('RGB', 'pytorch_GTL')}" for f in train_cropped_imgs] \
+ [f"{str(data_dir)}/cropped_validation/{f.replace('RGB', 'pytorch_GTL')}" for f in valid_cropped_imgs],\
"is_valid": [False for i in train_cropped_imgs] + [True for i in valid_cropped_imgs ] })
# codes = ["building"]
codes = ["background", "building"]
src = (SegmentationItemList.from_df(dataset_df, path=data_dir )
. split_from_df(col="is_valid")
.label_from_df(cols="label", classes=codes))
size = 128
bs=64
data = (src.transform(get_transforms(do_flip=True,
flip_vert=True,
max_rotate=180,
max_zoom=1.2,
max_lighting=0.5,
max_warp=0.2,
p_affine=0.75,
p_lighting=0.75), size=size, tfm_y=True)
.databunch(bs=bs)
.normalize(imagenet_stats))
def accuracy_pixel(input, target):
target = target.squeeze(1)
mask = target != 0
return (input.argmax(dim=1)[mask] == target[mask]).float().mean()
data.show_batch(8, figsize=(20,20))
def flatten_binary_scores2(scores, labels, ignore=None):
"""
Flattens predictions in the batch (binary case)
Remove labels equal to 'ignore'
"""
scores = scores.contiguous().view(-1)
labels = labels.contiguous().view(-1)
if ignore is None:
return scores, labels
valid = (labels != ignore)
vscores = scores[valid]
vlabels = labels[valid]
return vscores, vlabels
def lovasz_hinge(logits, labels, per_image=False, ignore=None):
"""
Binary Lovasz hinge loss
logits: [B, H, W] Variable, logits at each pixel (between -\infty and +\infty)
labels: [B, H, W] Tensor, binary ground truth masks (0 or 1)
per_image: compute the loss per image instead of per batch
ignore: void class id
"""
print(logits.min(), logits.max(), logits.size())
logits=logits[:,1,:,:]
labels = labels.squeeze(1)
if per_image:
loss = mean(lovasz_hinge_flat(*flatten_binary_scores(log.unsqueeze(0), lab.unsqueeze(0), ignore))
for log, lab in zip(logits, labels))
else:
loss = lovasz_hinge_flat(*flatten_binary_scores2(logits, labels, ignore))
return loss
def lovasz_hinge_flat(logits, labels):
"""
Binary Lovasz hinge loss
logits: [P] Variable, logits at each prediction (between -\infty and +\infty)
labels: [P] Tensor, binary ground truth labels (0 or 1)
ignore: label to ignore
"""
if len(labels) == 0:
# only void pixels, the gradients should be 0
return logits.sum() * 0.
signs = 2. * labels.float() - 1.
errors = (1. - logits * Variable(signs))
errors_sorted, perm = torch.sort(errors, dim=0, descending=True)
perm = perm.data
gt_sorted = labels[perm]
grad = lovasz_grad(gt_sorted)
loss = torch.dot(torch.nn.functional.relu(errors_sorted), Variable(grad))
return loss
def combined_loss(logits, labels):
logits=logits[:,1,:,:].float()
labels = labels.squeeze(1).float()
lh_loss = lovasz_hinge_flat(*flatten_binary_scores2(logits, labels, ignore=0))
bce_loss = F.binary_cross_entropy_with_logits(logits, labels)
return 0.8 * bce_loss + lh_loss
wd =1e-2
metrics = [accuracy_pixel,dice]
learn = unet_learner(data, models.resnet34, metrics=metrics, wd=wd, loss_func=combined_loss)
learn.to_fp16();
lr_find(learn)
learn.recorder.plot(skip_end=1)
LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.
lr=1e-3
learn.fit_one_cycle(10, slice(lr), pct_start=0.8)
epoch | train_loss | valid_loss | accuracy_pixel | dice | time |
---|---|---|---|---|---|
0 | 0.941657 | 0.723196 | 0.981836 | 0.259024 | 00:15 |
1 | 0.781955 | 0.605283 | 0.973925 | 0.317851 | 00:09 |
2 | 0.720370 | 0.587200 | 0.986201 | 0.318457 | 00:09 |
3 | 0.633609 | 0.454838 | 0.973237 | 0.386033 | 00:09 |
4 | 0.591445 | 0.451638 | 0.945282 | 0.366780 | 00:09 |
5 | 0.613031 | 0.556444 | 0.983696 | 0.322247 | 00:09 |
6 | 0.647956 | 0.538623 | 0.970270 | 0.360594 | 00:09 |
7 | 0.599904 | 0.536134 | 0.990787 | 0.337628 | 00:09 |
8 | 0.556940 | 0.413740 | 0.986343 | 0.421020 | 00:09 |
9 | 0.502675 | 0.389141 | 0.981200 | 0.452987 | 00:09 |
learn.save('lovaz-loss-stage1')
the loss combined with lovasz_hinge seems to have helped!
learn.show_results(rows=10, figsize=(8,20))
However, as we can see, there are issues with the buildings merging into each other, probably as we're not handling the class 0 loss very well.
What if, we update the combined loss to not ignore the zeros?
def combined_loss2(logits, labels):
logits=logits[:,1,:,:].float()
labels = labels.squeeze(1).float()
lh_loss = lovasz_hinge_flat(*flatten_binary_scores2(logits, labels))
bce_loss = F.binary_cross_entropy_with_logits(logits, labels)
return 0.8 * bce_loss + lh_loss
wd =1e-2
metrics = [accuracy_pixel,dice]
learn = unet_learner(data, models.resnet34, metrics=metrics, wd=wd, loss_func=combined_loss2)
learn.to_fp16();
lr=1e-3
learn.fit_one_cycle(10, slice(lr), pct_start=0.8)
epoch | train_loss | valid_loss | accuracy_pixel | dice | time |
---|---|---|---|---|---|
0 | 2.096585 | 1.625379 | 0.291321 | 0.294147 | 00:14 |
1 | 1.729736 | 1.373122 | 0.039301 | 0.072654 | 00:09 |
2 | 1.572195 | 1.342182 | 0.088764 | 0.157758 | 00:09 |
3 | 1.467142 | 1.363441 | 0.041958 | 0.079739 | 00:09 |
4 | 1.460650 | 1.297386 | 0.266683 | 0.389975 | 00:09 |
5 | 1.410257 | 1.249807 | 0.374496 | 0.496982 | 00:09 |
6 | 1.342018 | 1.139163 | 0.480276 | 0.602772 | 00:09 |
7 | 1.312162 | 1.133835 | 0.499022 | 0.622152 | 00:09 |
8 | 1.282838 | 1.083839 | 0.581498 | 0.668909 | 00:09 |
9 | 1.229516 | 1.086541 | 0.604295 | 0.678851 | 00:09 |
learn.save('lovaz-loss-stage1-ignore-none')
learn.show_results(rows=10, figsize=(8,20))
learn.unfreeze()
lr_find(learn)
LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.
learn.recorder.plot(skip_start=None)
lrs = slice(lr/400,lr/4)
learn.fit_one_cycle(12, lrs, pct_start=0.8)
epoch | train_loss | valid_loss | accuracy_pixel | dice | time |
---|---|---|---|---|---|
0 | 1.159289 | 1.072842 | 0.594632 | 0.677623 | 00:10 |
1 | 1.148537 | 1.065870 | 0.603912 | 0.683072 | 00:10 |
2 | 1.145095 | 1.068238 | 0.619173 | 0.688815 | 00:09 |
3 | 1.135726 | 1.054417 | 0.627061 | 0.696300 | 00:09 |
4 | 1.123820 | 1.043263 | 0.622098 | 0.693442 | 00:10 |
5 | 1.111574 | 1.032858 | 0.667092 | 0.710604 | 00:10 |
6 | 1.109143 | 1.035043 | 0.630003 | 0.705366 | 00:09 |
7 | 1.099070 | 1.031720 | 0.701172 | 0.723736 | 00:09 |
8 | 1.088027 | 0.998545 | 0.656065 | 0.717367 | 00:10 |
9 | 1.072874 | 1.011678 | 0.663764 | 0.718923 | 00:09 |
10 | 1.059594 | 0.966032 | 0.722534 | 0.744314 | 00:09 |
11 | 1.042305 | 0.969649 | 0.722419 | 0.741129 | 00:09 |
learn.save('lovaz-loss-stage2-ignore-none')
learn.show_results(rows=8)
# increase image sizes and continue training
wd =1e-2
metrics = [accuracy_pixel,dice]
size = 256
bs=32
data = (src.transform(get_transforms(do_flip=True,
flip_vert=True,
max_rotate=180,
max_zoom=1.2,
max_lighting=0.5,
max_warp=0.2,
p_affine=0.75,
p_lighting=0.75), size=size, tfm_y=True)
.databunch(bs=bs)
.normalize(imagenet_stats))
try:
learn.data = data
except:
learn = unet_learner(data, models.resnet34, metrics=metrics, wd=wd, loss_func=combined_loss2)
learn.to_fp16();
learn.load('lovaz-loss-stage2-ignore-none')
learn.freeze()
learn.to_fp16();
lr_find(learn)
LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.
learn.recorder.plot(skip_end=1)
lr=1e-4
learn.fit_one_cycle(10, slice(lr), pct_start=0.8,
callbacks=[SaveModelCallback(learn, every='improvement', monitor='dice', name='lovaz-loss-stage3-ignore-none')])
epoch | train_loss | valid_loss | accuracy_pixel | dice | time |
---|---|---|---|---|---|
0 | 1.171193 | 1.139302 | 0.744372 | 0.723440 | 00:33 |
1 | 1.121718 | 1.190995 | 0.745211 | 0.741132 | 00:33 |
2 | 1.109659 | 1.046413 | 0.778693 | 0.748786 | 00:32 |
3 | 1.071222 | 0.994989 | 0.770700 | 0.758337 | 00:33 |
4 | 1.043691 | 0.924012 | 0.742449 | 0.764929 | 00:33 |
5 | 1.031354 | 0.917510 | 0.756440 | 0.768916 | 00:33 |
6 | 1.012524 | 0.916214 | 0.773177 | 0.772853 | 00:33 |
7 | 1.010209 | 0.911846 | 0.785267 | 0.774773 | 00:32 |
8 | 0.988066 | 0.908342 | 0.808190 | 0.774416 | 00:33 |
9 | 0.959390 | 0.884320 | 0.791626 | 0.780417 | 00:35 |
Better model found at epoch 0 with dice value: 0.7234397530555725. Better model found at epoch 1 with dice value: 0.7411317229270935. Better model found at epoch 2 with dice value: 0.7487855553627014. Better model found at epoch 3 with dice value: 0.758337140083313. Better model found at epoch 4 with dice value: 0.7649291753768921. Better model found at epoch 5 with dice value: 0.768915593624115. Better model found at epoch 6 with dice value: 0.772853434085846. Better model found at epoch 7 with dice value: 0.7747730016708374. Better model found at epoch 9 with dice value: 0.7804170250892639.
learn.show_results(rows=8)
learn.unfreeze()
lr_find(learn)
learn.recorder.plot()
LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.
lrs = slice(lr/400,lr/4)
learn.fit_one_cycle(12, lrs, pct_start=0.8,
callbacks=[SaveModelCallback(learn, every='improvement', monitor='dice', name='lovaz-loss-stage4-ignore-none')])
epoch | train_loss | valid_loss | accuracy_pixel | dice | time |
---|---|---|---|---|---|
0 | 0.949066 | 0.893189 | 0.796160 | 0.779445 | 00:35 |
1 | 0.950403 | 0.883891 | 0.791997 | 0.781794 | 00:37 |
2 | 0.942032 | 0.883334 | 0.798013 | 0.781766 | 00:37 |
3 | 0.942719 | 0.872575 | 0.795616 | 0.784558 | 00:36 |
4 | 0.942663 | 0.884769 | 0.786577 | 0.781226 | 00:36 |
5 | 0.936178 | 0.894581 | 0.789426 | 0.779993 | 00:36 |
6 | 0.937734 | 0.880742 | 0.802573 | 0.781381 | 00:37 |
7 | 0.929634 | 0.872485 | 0.782286 | 0.783649 | 00:37 |
8 | 0.929276 | 0.867279 | 0.810102 | 0.787159 | 00:37 |
9 | 0.935305 | 0.857986 | 0.800135 | 0.788165 | 00:37 |
10 | 0.919585 | 0.860885 | 0.798395 | 0.786934 | 00:36 |
11 | 0.912029 | 0.856350 | 0.804760 | 0.788493 | 00:37 |
Better model found at epoch 0 with dice value: 0.7794451117515564. Better model found at epoch 1 with dice value: 0.7817935347557068. Better model found at epoch 3 with dice value: 0.7845578193664551. Better model found at epoch 8 with dice value: 0.787158727645874. Better model found at epoch 9 with dice value: 0.7881650924682617. Better model found at epoch 11 with dice value: 0.7884933948516846.
smaller batch sizes?
learn.show_results(rows=16)
Of course, the ultimate goal is to do instance segmentation of the buildings, so let's see how to adapt this model to