Image segmentation with CamVid

In [ ]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline
In [ ]:
from fastai import *
from fastai.vision import *
In [ ]:
path = untar_data(URLs.CAMVID)
path.ls()
Out[ ]:
[PosixPath('/data1/jhoward/git/course-v3/nbs/dl1/data/camvid/valid.txt'),
 PosixPath('/data1/jhoward/git/course-v3/nbs/dl1/data/camvid/labels'),
 PosixPath('/data1/jhoward/git/course-v3/nbs/dl1/data/camvid/codes.txt'),
 PosixPath('/data1/jhoward/git/course-v3/nbs/dl1/data/camvid/images')]
In [ ]:
path_lbl = path/'labels'
path_img = path/'images'

Data

In [ ]:
fnames = get_image_files(path_img)
fnames[:3]
Out[ ]:
[PosixPath('/data1/jhoward/git/course-v3/nbs/dl1/data/camvid/images/0016E5_08065.png'),
 PosixPath('/data1/jhoward/git/course-v3/nbs/dl1/data/camvid/images/0006R0_f01500.png'),
 PosixPath('/data1/jhoward/git/course-v3/nbs/dl1/data/camvid/images/0016E5_06510.png')]
In [ ]:
lbl_names = get_image_files(path_lbl)
lbl_names[:3]
Out[ ]:
[PosixPath('/data1/jhoward/git/course-v3/nbs/dl1/data/camvid/labels/0006R0_f01770_P.png'),
 PosixPath('/data1/jhoward/git/course-v3/nbs/dl1/data/camvid/labels/0006R0_f01290_P.png'),
 PosixPath('/data1/jhoward/git/course-v3/nbs/dl1/data/camvid/labels/Seq05VD_f02940_P.png')]
In [ ]:
img_f = fnames[0]
img = open_image(img_f)
img.show(figsize=(5,5))
In [ ]:
get_y_fn = lambda x: path_lbl/f'{x.stem}_P{x.suffix}'
In [ ]:
mask = open_mask(get_y_fn(img_f))
mask.show(figsize=(5,5), alpha=1)
In [ ]:
mask.shape,mask.data
Out[ ]:
(torch.Size([1, 720, 960]), tensor([[[ 4,  4,  4,  ..., 26, 26, 26],
          [ 4,  4,  4,  ..., 26, 26, 26],
          [ 4,  4,  4,  ..., 26, 26, 26],
          ...,
          [19, 19, 19,  ..., 17, 17, 17],
          [19, 19, 19,  ..., 17, 17, 17],
          [19, 19, 19,  ..., 17, 17, 17]]]))
In [ ]:
codes = np.loadtxt(path/'codes.txt', dtype=str); codes
Out[ ]:
array(['Animal', 'Archway', 'Bicyclist', 'Bridge', 'Building', 'Car', 'CartLuggagePram', 'Child', 'Column_Pole',
       'Fence', 'LaneMkgsDriv', 'LaneMkgsNonDriv', 'Misc_Text', 'MotorcycleScooter', 'OtherMoving', 'ParkingBlock',
       'Pedestrian', 'Road', 'RoadShoulder', 'Sidewalk', 'SignSymbol', 'Sky', 'SUVPickupTruck', 'TrafficCone',
       'TrafficLight', 'Train', 'Tree', 'Truck_Bus', 'Tunnel', 'VegetationMisc', 'Void', 'Wall'], dtype='<U17')

Datasets

In [ ]:
size=640
bs=4
In [ ]:
data = (ImageFileList.from_folder(path_img)
        .label_from_func(get_y_fn)
        .split_by_fname_file('../valid.txt')
        .datasets(SegmentationDataset, classes=codes)
        .transform(get_transforms(), size=size, tfm_y=True)
        .databunch(bs=bs)
        .normalize(imagenet_stats))
In [ ]:
data.show_batch(2)

Model

In [ ]:
name2id = {v:k for k,v in enumerate(codes)}
void_code = name2id['Void']

def acc_camvid(input, target):
    target = target.squeeze()
    mask = target != void_code
    return (input.argmax(dim=1)[mask]==target[mask]).float().mean()
In [ ]:
metrics=acc_camvid
# metrics=accuracy
In [ ]:
learn = Learner.create_unet(data, models.resnet34, metrics=metrics)
In [ ]:
lr_find(learn)
learn.recorder.plot()
LR Finder complete, type {learner_name}.recorder.plot() to see the graph.
In [ ]:
lr = 1e-2
In [ ]:
learn.fit_one_cycle(5, slice(lr))
Total time: 02:28
epoch  train_loss  valid_loss  acc_camvid
1      1.119056    0.977713    0.750681    (00:35)
2      0.904997    0.849519    0.815774    (00:28)
3      0.682727    0.692044    0.821199    (00:28)
4      0.619111    0.559300    0.844576    (00:28)
5      0.544850    0.521154    0.851283    (00:28)

In [ ]:
learn.fit_one_cycle(5, slice(lr))
Total time: 02:23
epoch  train_loss  valid_loss  acc_camvid
1      0.587322    0.519706    0.859591    (00:28)
2      0.649438    0.692592    0.845502    (00:28)
3      0.567764    0.572977    0.835417    (00:28)
4      0.501820    0.474771    0.864284    (00:28)
5      0.471103    0.443377    0.873719    (00:28)

In [ ]:
learn.save('stage-1')
In [ ]:
learn.load('stage-1')
In [ ]:
learn.unfreeze()
In [ ]:
lr_find(learn)
learn.recorder.plot()
LR Finder complete, type {learner_name}.recorder.plot() to see the graph.
In [ ]:
learn.fit_one_cycle(12, slice(1e-5,lr/5))
Total time: 06:39
epoch  train_loss  valid_loss  acc_camvid
1      0.476839    0.452640    0.872494    (00:33)
2      0.476087    0.433352    0.875471    (00:33)
3      0.467058    0.437772    0.869612    (00:33)
4      0.475351    0.468794    0.868904    (00:33)
5      0.461685    0.443922    0.869515    (00:33)
6      0.434607    0.433422    0.866817    (00:33)
7      0.426970    0.401881    0.881677    (00:33)
8      0.400956    0.379572    0.884080    (00:32)
9      0.375717    0.388340    0.879745    (00:33)
10     0.369958    0.387626    0.876695    (00:33)
11     0.363495    0.374728    0.882055    (00:33)
12     0.349536    0.382812    0.880067    (00:33)

In [ ]:
learn.save('stage-2')
In [ ]:
learn.load('stage-2')
In [ ]:
learn.fit_one_cycle(12, slice(1e-5,lr/5))
Total time: 06:40
epoch  train_loss  valid_loss  acc_camvid
1      0.363479    0.372779    0.885822    (00:33)
2      0.372744    0.370301    0.889697    (00:33)
3      0.377817    0.398087    0.887722    (00:33)
4      0.377816    0.400822    0.877530    (00:33)
5      0.379588    0.381926    0.881407    (00:33)
6      0.370663    0.354900    0.898501    (00:33)
7      0.331817    0.341902    0.894362    (00:33)
8      0.329269    0.327356    0.901787    (00:33)
9      0.315665    0.332719    0.899110    (00:33)
10     0.297474    0.337342    0.899688    (00:33)
11     0.299392    0.329972    0.898885    (00:33)
12     0.298601    0.329742    0.900147    (00:33)

In [ ]:
learn.save('stage-3')
In [ ]: