Image segmentation with CamVid¶

In [ ]:

%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [ ]:

from fastai.vision import *
from fastai.callbacks.hooks import *
from fastai.utils.mem import *

In [ ]:

path = untar_data(URLs.CAMVID)
path.ls()

Out[ ]:

[PosixPath('/home/ubuntu/.fastai/data/camvid/images'),
 PosixPath('/home/ubuntu/.fastai/data/camvid/codes.txt'),
 PosixPath('/home/ubuntu/.fastai/data/camvid/labels'),
 PosixPath('/home/ubuntu/.fastai/data/camvid/valid.txt')]

In [ ]:

path_lbl = path/'labels'
path_img = path/'images'

Subset classes¶

In [ ]:

# path = Path('./data/camvid-small')

# def get_y_fn(x): return Path(str(x.parent)+'annot')/x.name

# codes = array(['Sky', 'Building', 'Pole', 'Road', 'Sidewalk', 'Tree',
#     'Sign', 'Fence', 'Car', 'Pedestrian', 'Cyclist', 'Void'])

# src = (SegmentationItemList.from_folder(path)
#        .split_by_folder(valid='val')
#        .label_from_func(get_y_fn, classes=codes))

# bs=8
# data = (src.transform(get_transforms(), tfm_y=True)
#         .databunch(bs=bs)
#         .normalize(imagenet_stats))

Data¶

In [ ]:

fnames = get_image_files(path_img)
fnames[:3]

Out[ ]:

[PosixPath('/home/ubuntu/.fastai/data/camvid/images/0001TP_008160.png'),
 PosixPath('/home/ubuntu/.fastai/data/camvid/images/0016E5_08041.png'),
 PosixPath('/home/ubuntu/.fastai/data/camvid/images/0006R0_f02340.png')]

In [ ]:

lbl_names = get_image_files(path_lbl)
lbl_names[:3]

Out[ ]:

[PosixPath('/home/ubuntu/.fastai/data/camvid/labels/0016E5_05400_P.png'),
 PosixPath('/home/ubuntu/.fastai/data/camvid/labels/0006R0_f02640_P.png'),
 PosixPath('/home/ubuntu/.fastai/data/camvid/labels/0016E5_05340_P.png')]

In [ ]:

img_f = fnames[0]
img = open_image(img_f)
img.show(figsize=(5,5))

In [ ]:

get_y_fn = lambda x: path_lbl/f'{x.stem}_P{x.suffix}'

In [ ]:

mask = open_mask(get_y_fn(img_f))
mask.show(figsize=(5,5), alpha=1)

In [ ]:

src_size = np.array(mask.shape[1:])
src_size,mask.data

Out[ ]:

(array([720, 960]), tensor([[[ 4,  4,  4,  ...,  4,  4,  4],
          [ 4,  4,  4,  ...,  4,  4,  4],
          [ 4,  4,  4,  ...,  4,  4,  4],
          ...,
          [17, 17, 17,  ..., 30, 30, 30],
          [17, 17, 17,  ..., 30, 30, 30],
          [17, 17, 17,  ..., 30, 30, 30]]]))

In [ ]:

codes = np.loadtxt(path/'codes.txt', dtype=str); codes

Out[ ]:

array(['Animal', 'Archway', 'Bicyclist', 'Bridge', 'Building', 'Car', 'CartLuggagePram', 'Child', 'Column_Pole',
       'Fence', 'LaneMkgsDriv', 'LaneMkgsNonDriv', 'Misc_Text', 'MotorcycleScooter', 'OtherMoving', 'ParkingBlock',
       'Pedestrian', 'Road', 'RoadShoulder', 'Sidewalk', 'SignSymbol', 'Sky', 'SUVPickupTruck', 'TrafficCone',
       'TrafficLight', 'Train', 'Tree', 'Truck_Bus', 'Tunnel', 'VegetationMisc', 'Void', 'Wall'], dtype='<U17')

Datasets¶

In [ ]:

size = src_size//2

free = gpu_mem_get_free_no_cache()
# the max size of bs depends on the available GPU RAM
if free > 8200: bs=8
else:           bs=4
print(f"using bs={bs}, have {free}MB of GPU RAM free")

In [ ]:

src = (SegmentationItemList.from_folder(path_img)
       .split_by_fname_file('../valid.txt')
       .label_from_func(get_y_fn, classes=codes))

In [ ]:

data = (src.transform(get_transforms(), size=size, tfm_y=True)
        .databunch(bs=bs)
        .normalize(imagenet_stats))

In [ ]:

data.show_batch(2, figsize=(10,7))

In [ ]:

data.show_batch(2, figsize=(10,7), ds_type=DatasetType.Valid)

Model¶

In [ ]:

name2id = {v:k for k,v in enumerate(codes)}
void_code = name2id['Void']

def acc_camvid(input, target):
    target = target.squeeze(1)
    mask = target != void_code
    return (input.argmax(dim=1)[mask]==target[mask]).float().mean()

In [ ]:

metrics=acc_camvid
# metrics=accuracy

In [ ]:

wd=1e-2

In [ ]:

learn = unet_learner(data, models.resnet34, metrics=metrics, wd=wd)

In [ ]:

lr_find(learn)
learn.recorder.plot()

LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.

In [ ]:

lr=3e-3

In [ ]:

learn.fit_one_cycle(10, slice(lr), pct_start=0.9)

Total time: 05:30

epoch	train_loss	valid_loss	acc_camvid
1	1.172738	0.771233	0.820753
2	0.799105	0.637969	0.825231
3	0.731867	0.628517	0.824456
4	0.630086	0.486045	0.860029
5	0.616145	0.535374	0.860695
6	0.588540	0.439988	0.879501
7	0.551265	0.412592	0.884478
8	0.568172	0.679087	0.832221
9	0.538392	0.405911	0.879605
10	0.451235	0.353972	0.892891

In [ ]:

learn.save('stage-1')

In [ ]:

learn.load('stage-1');

In [ ]:

learn.show_results(rows=3, figsize=(8,9))

In [ ]:

learn.unfreeze()

In [ ]:

lrs = slice(lr/400,lr/4)

In [ ]:

learn.fit_one_cycle(12, lrs, pct_start=0.8)

Total time: 06:39

epoch	train_loss	valid_loss	acc_camvid
1	0.389135	0.334715	0.896700
2	0.377873	0.324080	0.900284
3	0.369020	0.325073	0.904146
4	0.355022	0.308820	0.912556
5	0.351138	0.313001	0.909351
6	0.347777	0.285509	0.920183
7	0.338683	0.306076	0.909899
8	0.318913	0.303712	0.915792
9	0.312038	0.276126	0.920137
10	0.311217	0.276649	0.925244
11	0.285135	0.268458	0.922453
12	0.256778	0.262011	0.926964

In [ ]:

learn.save('stage-2');

Go big¶

You may have to restart your kernel and come back to this stage if you run out of memory, and may also need to decrease bs.

In [ ]:

learn.destroy()

size = src_size

free = gpu_mem_get_free_no_cache()
# the max size of bs depends on the available GPU RAM
if free > 8200: bs=3
else:           bs=1
print(f"using bs={bs}, have {free}MB of GPU RAM free")

In [ ]:

data = (src.transform(get_transforms(), size=size, tfm_y=True)
        .databunch(bs=bs)
        .normalize(imagenet_stats))

In [ ]:

learn = unet_learner(data, models.resnet34, metrics=metrics, wd=wd)

In [ ]:

learn.load('stage-2');

In [ ]:

lr_find(learn)
learn.recorder.plot()

LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.

In [ ]:

lr=1e-3

In [ ]:

learn.fit_one_cycle(10, slice(lr), pct_start=0.8)

Total time: 20:31

epoch	train_loss	valid_loss	acc_camvid
1	0.366140	0.339497	0.908682
2	0.333278	0.304011	0.916702
3	0.326025	0.305404	0.919941
4	0.336885	0.321342	0.910933
5	0.326935	0.305589	0.919401
6	0.347779	0.333608	0.908895
7	0.328334	0.352358	0.905482
8	0.327277	0.387525	0.912187
9	0.291777	0.293065	0.918046
10	0.228348	0.257859	0.929750

In [ ]:

learn.save('stage-1-big')

In [ ]:

learn.load('stage-1-big');

In [ ]:

learn.unfreeze()

In [ ]:

lrs = slice(1e-6,lr/10)

In [ ]:

learn.fit_one_cycle(10, lrs)

Total time: 21:05

epoch	train_loss	valid_loss	acc_camvid
1	0.225999	0.254650	0.930782
2	0.216341	0.256007	0.930751
3	0.209800	0.255649	0.930709
4	0.203509	0.252857	0.931682
5	0.202308	0.258498	0.931308
6	0.200796	0.257311	0.931915
7	0.204560	0.251486	0.933218
8	0.192893	0.254977	0.932786
9	0.189505	0.258091	0.932555
10	0.190156	0.256064	0.933315

In [ ]:

learn.save('stage-2-big')

In [ ]:

learn.load('stage-2-big');

In [ ]:

learn.show_results(rows=3, figsize=(10,10))

In [ ]:

fin¶

In [ ]: