%reload_ext autoreload
%autoreload 2
#export
from nb_006c import *
import pandas as pd
see https://www.kaggle.com/c/imagenet-object-localization-challenge
we are using a reduced dataset (only 28 of the 1000 classes in the challenge above)
You can download it here:
https://www.kaggle.com/fm313v/imgnet-obj-loc-small (4.34GB)
PATH = Path('data/imgnetloc-small')
TRAIN_CSV = PATH/'LOC_train_solution.csv'
VALID_CSV = PATH/'LOC_val_solution.csv'
CLASSES_TXT = PATH/'LOC_synset_mapping.txt'
ILSVRC = PATH/'ILSVRC'
IMG_PATH = ILSVRC/'Data/CLS-LOC'
TRAIN_IMG = IMG_PATH/'train'
VALID_IMG = IMG_PATH/'val'
ANNO_PATH = ILSVRC/'Annotations/CLS-LOC'
TRAIN_ANNO = ANNO_PATH/'train'
VALID_ANNO = ANNO_PATH/'val'
train_df = pd.read_csv(TRAIN_CSV)
valid_df = pd.read_csv(VALID_CSV)
train_df.head()
valid_df.head()
def read_classes():
classes = {}
with open(CLASSES_TXT, 'r') as class_file:
lines = class_file.readlines()
for line in lines:
classes[line[0:9]] = line[10:].strip().split(',')[0] # strip extra items after ','
return classes
from matplotlib import patches, patheffects
def bb_hw(a): return np.array([a[1],a[0],a[3]-a[1]+1,a[2]-a[0]+1])
def draw_outline(o, lw):
o.set_path_effects([patheffects.Stroke(
linewidth=lw, foreground='black'), patheffects.Normal()])
def draw_rect(ax, b, color='white'):
patch = ax.add_patch(patches.Rectangle(b[:2], *b[2:], fill=False, edgecolor=color, lw=2))
draw_outline(patch, 4)
def draw_text(ax, xy, txt, sz=14):
text = ax.text(*xy, txt,
verticalalignment='top', color='white', fontsize=sz, weight='bold')
draw_outline(text, 1)
def show_img_annos(img, annos, lbl_to_txt=None, ax=None):
if not ax: fig,ax = plt.subplots()
ax.imshow(img.numpy().transpose(1,2,0))
for anno in annos: draw_anno(ax, anno, lbl_to_txt=lbl_to_txt)
def show_img_anno(img, anno, lbl_to_txt=None, ax=None):
if not ax: fig,ax = plt.subplots()
ax.imshow(img.numpy().transpose(1,2,0))
draw_anno(ax, anno, lbl_to_txt=lbl_to_txt)
def draw_anno(ax, anno, lbl_to_txt=None):
c, bb = anno
b = bb_hw(bb)
draw_rect(ax, b)
if lbl_to_txt: draw_text(ax, b[:2], lbl_to_txt[c], sz=16)
class_to_text = read_classes()
lbl_to_class = dict(enumerate(class_to_text.keys()))
class_to_lbl = {v:k for k,v in lbl_to_class.items()}
lbl_to_text = { i:class_to_text[c] for i,c in lbl_to_class.items()}
def pull_class_id(x): return x.split(' ')[0]
def train_to_image_path(x):
class_id = x.split('_')[0]
return TRAIN_IMG/class_id/f'{x}.JPEG'
def train_to_anno_path(x):
class_id = pull_class_id(x)
return TRAIN_ANNO/class_id/f'{x}.xml'
def valid_to_image_path(x): return VALID_IMG/f'{x}.JPEG'
def valid_to_anno_path(x): return VALID_IMG/f'{x}.xml'
train_df['image_fn'] = train_df.ImageId.apply(train_to_image_path)
train_df['anno_fn'] = train_df.ImageId.apply(train_to_anno_path)
train_df['class_id'] = train_df.PredictionString.apply(pull_class_id)
valid_df['image_fn'] = valid_df.ImageId.apply(valid_to_image_path)
valid_df['anno_fn'] = valid_df.ImageId.apply(valid_to_anno_path)
valid_df['class_id'] = valid_df.PredictionString.apply(pull_class_id)
def to_preds(x):
boxes = []
items = x.strip().split(' ')
for i in range(0,len(items),5):
class_id, left, top, right, bottom = items[i:(i+5)]
c = class_to_lbl[class_id]
boxes.append((c, [float(top), float(left), float(bottom), float(right)]))
return boxes
train_fns = list(train_df.image_fn)
train_annos = list(train_df.PredictionString.apply(to_preds))
valid_fns = list(valid_df.image_fn)
valid_annos = list(valid_df.PredictionString.apply(to_preds))
def get_biggest_annos(img_annos):
biggest_annos = []
j = 0
for annos in img_annos:
size,best = 0,0
for i, anno in enumerate(annos):
c, bb = anno
b = bb_hw(bb)
o_sz = b[2] * b[3]
if size < o_sz: size,best = o_sz,i
biggest_annos.append(annos[best])
j += 1
return biggest_annos
train_annos_lrg = get_biggest_annos(train_annos)
valid_annos_lrg= get_biggest_annos(valid_annos)
idx = 15460
img = open_image(train_df.image_fn[idx])
annos = train_annos[idx]
show_img_annos(img, annos, lbl_to_text)
show_img_anno(img, train_annos_lrg[idx], lbl_to_text)
@dataclass
class AnnoTargetDataset(Dataset):
x_fns:List[Path]; bbs:Tuple[int, List[float]]
def __post_init__(self): assert len(self.x_fns)==len(self.bbs)
def __repr__(self): return f'{type(self).__name__} of len {len(self.x_fns)}'
def __len__(self): return len(self.x_fns)
def __getitem__(self, i):
return open_image(self.x_fns[i]), self.bbs[i]
train_ds = AnnoTargetDataset(train_fns, train_annos_lrg)
valid_ds = AnnoTargetDataset(valid_fns, valid_annos_lrg)
x, y = next(iter(train_ds))
show_img_anno(x, y, lbl_to_text)
from torchvision.models import resnet18, resnet34
arch = resnet34
# imagenet mean / std
data_mean, data_std = map(tensor, ([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]))
data_norm,data_denorm = normalize_funcs(data_mean,data_std)
bs = 128
size=128
workers=0
def get_data(bs, size):
tfms = get_transforms(do_flip=True, max_rotate=10, max_zoom=1.2, max_lighting=0.3, max_warp=0.15)
tds = transform_datasets(train_ds, valid_ds, tfms, size=size)
data = DataBunch.create(*tds, bs=bs, num_workers=workers, tfms=data_norm)
return data
data = get_data(bs, size)
x,y = next(iter(data.train_dl))
tensor([[5,6],[1,2,3]])
type(b)