Image segmentation with CamVid¶

In [ ]:

%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [ ]:

from fastai import *
from fastai.vision import *

In [ ]:

path = untar_data(URLs.CAMVID)
path.ls()

Out[ ]:

[PosixPath('/data1/jhoward/git/course-v3/nbs/dl1/data/camvid/valid.txt'),
 PosixPath('/data1/jhoward/git/course-v3/nbs/dl1/data/camvid/labels'),
 PosixPath('/data1/jhoward/git/course-v3/nbs/dl1/data/camvid/codes.txt'),
 PosixPath('/data1/jhoward/git/course-v3/nbs/dl1/data/camvid/images')]

In [ ]:

path_lbl = path/'labels'
path_img = path/'images'

Data¶

In [ ]:

fnames = get_image_files(path_img)
fnames[:3]

Out[ ]:

[PosixPath('/data1/jhoward/git/course-v3/nbs/dl1/data/camvid/images/0016E5_08065.png'),
 PosixPath('/data1/jhoward/git/course-v3/nbs/dl1/data/camvid/images/0006R0_f01500.png'),
 PosixPath('/data1/jhoward/git/course-v3/nbs/dl1/data/camvid/images/0016E5_06510.png')]

In [ ]:

lbl_names = get_image_files(path_lbl)
lbl_names[:3]

Out[ ]:

[PosixPath('/data1/jhoward/git/course-v3/nbs/dl1/data/camvid/labels/0006R0_f01770_P.png'),
 PosixPath('/data1/jhoward/git/course-v3/nbs/dl1/data/camvid/labels/0006R0_f01290_P.png'),
 PosixPath('/data1/jhoward/git/course-v3/nbs/dl1/data/camvid/labels/Seq05VD_f02940_P.png')]

In [ ]:

img_f = fnames[0]
img = open_image(img_f)
img.show(figsize=(5,5))

In [ ]:

get_y_fn = lambda x: path_lbl/f'{x.stem}_P{x.suffix}'

In [ ]:

mask = open_mask(get_y_fn(img_f))
mask.show(figsize=(5,5), alpha=1)

In [ ]:

mask.shape,mask.data

Out[ ]:

(torch.Size([1, 720, 960]), tensor([[[ 4,  4,  4,  ..., 26, 26, 26],
          [ 4,  4,  4,  ..., 26, 26, 26],
          [ 4,  4,  4,  ..., 26, 26, 26],
          ...,
          [19, 19, 19,  ..., 17, 17, 17],
          [19, 19, 19,  ..., 17, 17, 17],
          [19, 19, 19,  ..., 17, 17, 17]]]))

In [ ]:

codes = np.loadtxt(path/'codes.txt', dtype=str); codes

Out[ ]:

array(['Animal', 'Archway', 'Bicyclist', 'Bridge', 'Building', 'Car', 'CartLuggagePram', 'Child', 'Column_Pole',
       'Fence', 'LaneMkgsDriv', 'LaneMkgsNonDriv', 'Misc_Text', 'MotorcycleScooter', 'OtherMoving', 'ParkingBlock',
       'Pedestrian', 'Road', 'RoadShoulder', 'Sidewalk', 'SignSymbol', 'Sky', 'SUVPickupTruck', 'TrafficCone',
       'TrafficLight', 'Train', 'Tree', 'Truck_Bus', 'Tunnel', 'VegetationMisc', 'Void', 'Wall'], dtype='<U17')

Datasets¶

In [ ]:

size=640
bs=4

In [ ]:

data = (ImageFileList.from_folder(path_img)
        .label_from_func(get_y_fn)
        .split_by_fname_file('../valid.txt')
        .datasets(SegmentationDataset, classes=codes)
        .transform(get_transforms(), size=size, tfm_y=True)
        .databunch(bs=bs)
        .normalize(imagenet_stats))

In [ ]:

data.show_batch(2)

Model¶

In [ ]:

name2id = {v:k for k,v in enumerate(codes)}
void_code = name2id['Void']

def acc_camvid(input, target):
    target = target.squeeze()
    mask = target != void_code
    return (input.argmax(dim=1)[mask]==target[mask]).float().mean()

In [ ]:

metrics=acc_camvid
# metrics=accuracy

In [ ]:

learn = Learner.create_unet(data, models.resnet34, metrics=metrics)

In [ ]:

lr_find(learn)
learn.recorder.plot()

LR Finder complete, type {learner_name}.recorder.plot() to see the graph.

In [ ]:

lr = 1e-2

In [ ]:

learn.fit_one_cycle(5, slice(lr))

Total time: 02:28
epoch  train_loss  valid_loss  acc_camvid
1      1.119056    0.977713    0.750681    (00:35)
2      0.904997    0.849519    0.815774    (00:28)
3      0.682727    0.692044    0.821199    (00:28)
4      0.619111    0.559300    0.844576    (00:28)
5      0.544850    0.521154    0.851283    (00:28)

In [ ]:

learn.fit_one_cycle(5, slice(lr))

Total time: 02:23
epoch  train_loss  valid_loss  acc_camvid
1      0.587322    0.519706    0.859591    (00:28)
2      0.649438    0.692592    0.845502    (00:28)
3      0.567764    0.572977    0.835417    (00:28)
4      0.501820    0.474771    0.864284    (00:28)
5      0.471103    0.443377    0.873719    (00:28)

In [ ]:

learn.save('stage-1')

In [ ]:

learn.load('stage-1')

In [ ]:

learn.unfreeze()

In [ ]:

lr_find(learn)
learn.recorder.plot()

LR Finder complete, type {learner_name}.recorder.plot() to see the graph.

In [ ]:

learn.fit_one_cycle(12, slice(1e-5,lr/5))

Total time: 06:39
epoch  train_loss  valid_loss  acc_camvid
1      0.476839    0.452640    0.872494    (00:33)
2      0.476087    0.433352    0.875471    (00:33)
3      0.467058    0.437772    0.869612    (00:33)
4      0.475351    0.468794    0.868904    (00:33)
5      0.461685    0.443922    0.869515    (00:33)
6      0.434607    0.433422    0.866817    (00:33)
7      0.426970    0.401881    0.881677    (00:33)
8      0.400956    0.379572    0.884080    (00:32)
9      0.375717    0.388340    0.879745    (00:33)
10     0.369958    0.387626    0.876695    (00:33)
11     0.363495    0.374728    0.882055    (00:33)
12     0.349536    0.382812    0.880067    (00:33)

In [ ]:

learn.save('stage-2')

In [ ]:

learn.load('stage-2')

In [ ]:

learn.fit_one_cycle(12, slice(1e-5,lr/5))

Total time: 06:40
epoch  train_loss  valid_loss  acc_camvid
1      0.363479    0.372779    0.885822    (00:33)
2      0.372744    0.370301    0.889697    (00:33)
3      0.377817    0.398087    0.887722    (00:33)
4      0.377816    0.400822    0.877530    (00:33)
5      0.379588    0.381926    0.881407    (00:33)
6      0.370663    0.354900    0.898501    (00:33)
7      0.331817    0.341902    0.894362    (00:33)
8      0.329269    0.327356    0.901787    (00:33)
9      0.315665    0.332719    0.899110    (00:33)
10     0.297474    0.337342    0.899688    (00:33)
11     0.299392    0.329972    0.898885    (00:33)
12     0.298601    0.329742    0.900147    (00:33)

In [ ]:

learn.save('stage-3')

In [ ]: