%reload_ext autoreload
%autoreload 2
#export
from nb_006a import *
PATH = Path('data/pascal')
JPEG_PATH = PATH/'VOCdevkit'/'VOC2007'/'JPEGImages'
import json
trn_j = json.load((PATH / 'pascal_train2007.json').open())
classes = {o['id']:o['name'] for o in trn_j['categories']}
filenames = {o['id']:JPEG_PATH/o['file_name'] for o in trn_j['images']}
annotations = [{'img_id': o['image_id'],
'class': classes[o['category_id']],
'bbox':o['bbox']} for o in trn_j['annotations'] if not o['ignore']]
len(annotations)
annot_by_img = collections.defaultdict(list)
for annot in annotations:
annot_by_img[annot['img_id']].append({'class': annot['class'], 'bbox': annot['bbox']})
len(annot_by_img)
First, let's do build a model finding the biggest bbox.
biggest_bb = {}
for id in filenames.keys():
size,best = 0,0
for i,o in enumerate(annot_by_img[id]):
o_sz = o['bbox'][2] * o['bbox'][3]
if size < o_sz:
size,best = o_sz,i
biggest_bb[id] = annot_by_img[id][best]
ids = np.array(list(filenames.keys()))
ids = np.random.permutation(ids)
split = int(len(filenames) * 0.2)
train_fns = [filenames[i] for i in ids[split:]]
valid_fns = [filenames[i] for i in ids[:split]]
bboxes = {}
for i in filenames.keys():
bb = biggest_bb[i]['bbox']
bboxes[i] = [[bb[1],bb[0], bb[3]+bb[1], bb[2]+bb[0]]]
train_bbs = [bboxes[i] for i in ids[split:]]
valid_bbs = [bboxes[i] for i in ids[:split]]
all_bboxes = collections.defaultdict(list)
for i in filenames.keys():
for o in annot_by_img[i]:
bb = o['bbox']
all_bboxes[i].append([bb[1],bb[0], bb[3]+bb[1], bb[2]+bb[0]])
train_all_bbs = [all_bboxes[i] for i in ids[split:]]
valid_all_bbs = [all_bboxes[i] for i in ids[:split]]
#export
class ImageBBox(ImageMask):
"Image class for bbox-style annotations"
def clone(self):
return self.__class__(self.px.clone())
@classmethod
def create(cls, bboxes:Collection[Collection[int]], h:int, w:int) -> 'ImageBBox':
"Creates an ImageBBox object from bboxes"
pxls = torch.zeros(len(bboxes),h, w).long()
for i,bbox in enumerate(bboxes):
pxls[i,bbox[0]:bbox[2]+1,bbox[1]:bbox[3]+1] = 1
return cls(pxls.float())
@property
def data(self) -> LongTensor:
bboxes = []
for i in range(self.px.size(0)):
idxs = torch.nonzero(self.px[i])
if len(idxs) != 0:
bboxes.append(torch.tensor([idxs[:,0].min(), idxs[:,1].min(), idxs[:,0].max(), idxs[:,1].max()])[None])
return torch.cat(bboxes, 0).squeeze()
#export
from matplotlib import patches, patheffects
from matplotlib.patches import Patch
def bb2hw(a:Collection[int]) -> np.ndarray:
"Converts bounding box points from (width,height,center) to (height,width,top,left)"
return np.array([a[1],a[0],a[3]-a[1],a[2]-a[0]])
def draw_outline(o:Patch, lw:int):
"Outlines bounding box onto image `Patch`"
o.set_path_effects([patheffects.Stroke(
linewidth=lw, foreground='black'), patheffects.Normal()])
def draw_rect(ax:plt.Axes, b:Collection[int], color:str='white'):
"Draws bounding box on `ax`"
patch = ax.add_patch(patches.Rectangle(b[:2], *b[-2:], fill=False, edgecolor=color, lw=2))
draw_outline(patch, 4)
def _show_image(img:Image, ax:plt.Axes=None, figsize:tuple=(3,3), hide_axis:bool=True, cmap:str='binary',
alpha:float=None) -> plt.Axes:
if ax is None: fig,ax = plt.subplots(figsize=figsize)
ax.imshow(image2np(img), cmap=cmap, alpha=alpha)
if hide_axis: ax.axis('off')
return ax
def show_image(x:Image, y:Image=None, ax:plt.Axes=None, figsize:tuple=(3,3), alpha:float=0.5,
hide_axis:bool=True, cmap:str='viridis'):
ax1 = _show_image(x, ax=ax, hide_axis=hide_axis, cmap=cmap)
if y is not None: _show_image(y, ax=ax1, alpha=alpha, hide_axis=hide_axis, cmap=cmap)
if hide_axis: ax1.axis('off')
def _show(self:Image, ax:plt.Axes=None, y:Image=None, **kwargs):
if y is not None:
is_bb = isinstance(y, ImageBBox)
y=y.data
if not is_bb: return show_image(self.data, ax=ax, y=y, **kwargs)
ax = _show_image(self.data, ax=ax)
if len(y.size()) == 1: draw_rect(ax, bb2hw(y))
else:
for i in range(y.size(0)): draw_rect(ax, bb2hw(y[i]))
Image.show = _show
#export
@dataclass
class CoordTargetDataset(Dataset):
"A dataset with annotated images"
x_fns:Collection[Path]
bbs:Collection[Collection[int]]
def __post_init__(self): assert len(self.x_fns)==len(self.bbs)
def __repr__(self) -> str: return f'{type(self).__name__} of len {len(self.x_fns)}'
def __len__(self) -> int: return len(self.x_fns)
def __getitem__(self, i:int) -> Tuple[Image,ImageBBox]:
x = open_image(self.x_fns[i])
return x, ImageBBox.create(self.bbs[i], *x.size)
train_ds = CoordTargetDataset(train_fns, train_all_bbs)
valid_ds = CoordTargetDataset(valid_fns, valid_all_bbs)
train_fns.index(Path(JPEG_PATH/'000012.jpg'))
x,y = train_ds[1477]
y.data
x.show(y=y)
x.show(y=ImageMask(y.px[0].unsqueeze(0)))
y.data, valid_all_bbs[1]
tfms = get_transforms(do_flip=True, max_rotate=4, max_lighting=0.2)
train_tds = DatasetTfm(train_ds, tfms=tfms[0], tfm_y=True, size=128, padding_mode='border')
x,y = train_tds[0]
fig,axs = plt.subplots(4,4, figsize=(10,10))
for ax in axs.flatten():
x,y = train_tds[0]
x.show(ax=ax,y=y)
bs,sz=4,224
tfms = get_transforms(do_flip=True, max_rotate=4, max_lighting=0.2)
train_ds = CoordTargetDataset(train_fns, train_bbs)
valid_ds = CoordTargetDataset(valid_fns, valid_bbs)
data = DataBunch.create(train_ds, valid_ds, path=PATH, bs=bs, num_workers=0, ds_tfms=tfms, size=sz, tfms=imagenet_norm,
padding_mode='border')
We take a pretrained resnet34 with a custom head.
arch = tvm.resnet34
model = create_body(arch(), -2)
num_features(model)
def custom_loss(output, target):
target = target.float().div_(sz)
return F.l1_loss(output, target)
arch = tvm.resnet34
head_reg4 = nn.Sequential(Flatten(), nn.Linear(512 * 7*7,4), nn.Sigmoid())
learn = ConvLearner(data, arch, metrics=accuracy, custom_head=head_reg4)
learn.loss_fn = custom_loss
learn.lr_find()
learn.recorder.plot()