#!/usr/bin/env python # coding: utf-8 # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') get_ipython().run_line_magic('reload_ext', 'autoreload') get_ipython().run_line_magic('autoreload', '2') # In[2]: from fastai.conv_learner import * from fastai.dataset import * from pathlib import Path import json from PIL import ImageDraw, ImageFont from matplotlib import patches, patheffects # # Pascal VOC dataset # In[3]: PATH = Path('data/pascal') list(PATH.iterdir()) # In[4]: train_json = json.load((PATH / 'pascal_train2007.json').open()) # ### Explore dataset # In[5]: train_json.keys() # In[6]: len(train_json['images']) # In[7]: train_json['images'][:3] # In[8]: len(train_json['annotations']) # In[126]: train_json['annotations'][:3] # In[10]: train_json['categories'] # In[11]: list((PATH/'VOC2007').iterdir()) # In[12]: list((PATH/'VOC2007'/'JPEGImages').iterdir())[:10] # ### Create data structures # In[13]: categories = {category['id']:category['name'] for category in train_json['categories']} # In[14]: categories[1], categories[7] # In[15]: image_props = {} for image in train_json['images']: image_props[image['id']] = { 'filename' : image['file_name'], 'objects' :[] } for annotation in train_json['annotations']: if not annotation['ignore']: image_props[annotation['image_id']]['objects'].append((annotation['category_id'], annotation['bbox'])) # In[16]: image_props[17] # In[17]: categories[13] # ### Display image # In[18]: get_ipython().run_line_magic('matplotlib', 'inline') # In[19]: def sort_by_size(obj): return sorted(obj, key=lambda x : x[1][2] * x[1][3], reverse=True) # In[20]: def draw_outline(obj, width): obj.set_path_effects([patheffects.Stroke(linewidth=width, foreground='black'), patheffects.Normal()]) def draw_bbox(ax, obj): category, bbox = obj patch = ax.add_patch(patches.Rectangle(xy=(bbox[0], bbox[1]), width=bbox[2], height=bbox[3], fill=False, edgecolor='yellow', lw=2)) draw_outline(patch, 4) text = ax.text(bbox[0], bbox[1], s=categories[category], color='black', fontsize=12, verticalalignment='bottom', bbox=dict(facecolor='yellow', pad=2)) def draw_image(id, largest=False): fig, ax = plt.subplots(figsize=(16,8)) img = open_image(PATH/'VOC2007'/'JPEGImages'/image_props[id]['filename']) # sort bboxes by size, used later sorted_bboxes = sort_by_size(image_props[id]['objects']) for obj in sorted_bboxes: draw_bbox(ax, obj) if largest: break ax.imshow(img); # In[21]: draw_image(17) # # Largest item classifier # In[22]: draw_image(17, largest=True) # In[23]: draw_image(23, largest=True) # In[24]: largest_objects = {img_prop['filename']:categories[sort_by_size(img_prop['objects'])[0][0]] for k, img_prop in image_props.items()} # In[25]: largest_df = pd.DataFrame.from_dict(largest_objects, orient='index') largest_df.head() # In[26]: LARGEST_CSV = PATH/'largest.csv' largest_df.to_csv(LARGEST_CSV) # In[27]: model = resnet34 sz = 224 bs = 64 JPEGS = 'VOC2007/JPEGImages' # In[28]: tfms = tfms_from_model(model, sz, transforms_side_on, crop_type=CropType.NO) md = ImageClassifierData.from_csv(PATH, JPEGS, LARGEST_CSV, bs, tfms) # In[29]: x, y = next(iter(md.val_dl)) # first minibatch # In[30]: plt.imshow(md.trn_ds.denorm(to_np(x))[0]); # In[31]: learner = ConvLearner.pretrained(model, md, metrics=[accuracy]) # In[32]: learner.lr_find(1e-5, 100) # In[33]: learner.sched.plot(3, 1) # ### Train model # In[34]: lr = 2e-2 learner.fit(lr, 1, cycle_len=1) # In[35]: learner.fit(lr, 1, cycle_len=1) # In[36]: learner.fit(lr, 1, cycle_len=1) # In[37]: learner.fit(lr, 1, cycle_len=1) # In[38]: learner.fit(lr, 1, cycle_len=1) # In[39]: learner.fit(lr, 1, cycle_len=1) # In[41]: lrs = np.array([lr/1000,lr/100,lr]) learner.freeze_to(-2) lrf=learner.lr_find(lrs/1000) learner.sched.plot(1) # In[42]: learner.fit(lrs/5, 1, cycle_len=1) # In[43]: learner.unfreeze() # In[44]: learner.fit(lrs/5, 1, cycle_len=2) # In[45]: x, y = next(iter(md.val_dl)) probs = F.softmax(predict_batch(learner.model, x), -1) x, preds = to_np(x), to_np(probs) # In[46]: preds # In[47]: preds = np.argmax(preds, -1) # In[48]: preds # In[49]: def show_img(im, figsize=None, ax=None): if not ax: fig,ax = plt.subplots(figsize=figsize) ax.imshow(im) ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) return ax def draw_text(ax, xy, txt, sz=14): text = ax.text(*xy, txt, verticalalignment='top', color='white', fontsize=sz, weight='bold') draw_outline(text, 1) def draw_outline(o, lw): o.set_path_effects([patheffects.Stroke( linewidth=lw, foreground='black'), patheffects.Normal()]) # In[50]: fig, axes = plt.subplots(3, 4, figsize=(12, 8)) for i,ax in enumerate(axes.flat): ima=md.val_ds.denorm(x)[i] txt = md.classes[preds[i]] ax = show_img(ima, ax=ax) draw_text(ax, (0,0), txt) plt.tight_layout() # ### Known object counter # In[96]: obj_count = {img_prop['filename']:len(img_prop['objects']) for k, img_prop in image_props.items()} # In[97]: df = pd.DataFrame.from_dict(obj_count, orient='index').reset_index() df.head() # In[98]: COUNT_CSV = PATH/'count.csv' df.to_csv(COUNT_CSV, index=False) # In[99]: model = resnet34 sz=224 bs = 64 JPEGS = 'VOC2007/JPEGImages' # In[100]: tfms = tfms_from_model(model, sz, crop_type=CropType.NO) md = ImageClassifierData.from_csv(PATH, JPEGS, COUNT_CSV, tfms=tfms, continuous=True) # In[101]: x,y=next(iter(md.val_dl)) show_img(md.val_ds.denorm(to_np(x))[0]); # In[102]: to_np(y[0]) # In[104]: head_reg = nn.Sequential(Flatten(), nn.Linear(25088, 1)) # resnet last lyr = 7 * 7 * 512 = 25088, 1 outputs for number of objects learn = ConvLearner.pretrained(model, md, custom_head=head_reg) learn.opt_fn = optim.Adam learn.crit = nn.L1Loss() # In[105]: learn.lr_find(1e-5,100) learn.sched.plot(5) # In[108]: learn.sched.plot(0, 1) # In[109]: lr = 5e-5 # In[110]: learn.fit(lr, 2, cycle_len=1, cycle_mult=2) # In[111]: lrs = np.array([lr/100,lr/10,lr]) # In[112]: learn.freeze_to(-2) # In[113]: lrf=learn.lr_find(lrs/10000) learn.sched.plot(1) # In[114]: learn.sched.plot(0) # In[115]: learn.fit(lrs, 2, cycle_len=1, cycle_mult=2) # In[116]: learn.freeze_to(-3) # In[117]: learn.fit(lrs, 1, cycle_len=2) # In[118]: learn.save('counter') # In[119]: x,y = next(iter(md.val_dl)) learn.model.eval() preds = to_np(learn.model(VV(x))) # In[125]: fig, axes = plt.subplots(3, 4, figsize=(12, 8)) for i,ax in enumerate(axes.flat): ima=md.val_ds.denorm(x)[i] txt = str(int(round(preds[i][0]))) ax = show_img(ima, ax=ax) draw_text(ax, (0,0), txt) plt.tight_layout() # In[198]: for fn in md.val_ds.fnames[:12]: print(fn[len('VOC2007/JPEGImages/'):] + ' - ' + str(obj_count[fn[len('VOC2007/JPEGImages/'):]])) # ### Known object counter using classification # In[62]: obj_count = {img_prop['filename']:str(len(img_prop['objects'])) for k, img_prop in image_props.items()} # In[63]: df = pd.DataFrame.from_dict(obj_count, orient='index').reset_index() df.head() # In[64]: sorted(df[0].unique()) # In[66]: COUNT_CSV = PATH/'count_classes.csv' df.to_csv(COUNT_CSV, index=False) # In[67]: model = resnet34 sz=224 bs = 64 JPEGS = 'VOC2007/JPEGImages' # In[68]: tfms = tfms_from_model(model, sz, transforms_side_on, crop_type=CropType.NO) md = ImageClassifierData.from_csv(PATH, JPEGS, COUNT_CSV, bs, tfms) # In[69]: x,y=next(iter(md.val_dl)) show_img(md.val_ds.denorm(to_np(x))[0]); # In[72]: learn = ConvLearner.pretrained(model, md, metrics=[accuracy]) # In[73]: learn.lr_find(1e-5,100) learn.sched.plot(3, 1) # In[74]: learn.sched.plot(0,1) # In[75]: lr = 5e-2 # In[76]: learn.fit(lr, 2, cycle_len=1, cycle_mult=2) # In[77]: learn.fit(lr, 2, cycle_len=1, cycle_mult=3) # In[78]: learn.fit(lr, 3, cycle_len=1, cycle_mult=2) # In[79]: lrs = np.array([lr/100,lr/10,lr]) # In[80]: learn.freeze_to(-2) # In[81]: lrf=learn.lr_find(lrs/10000) learn.sched.plot(1) # In[82]: learn.sched.plot(0) # In[83]: learn.fit(lrs, 2, cycle_len=1, cycle_mult=2) # In[84]: learn.freeze_to(-3) # In[85]: learn.fit(lrs, 1, cycle_len=2) # In[174]: learn.save('counter') # In[86]: x,y = next(iter(md.val_dl)) learn.model.eval() preds = to_np(learn.model(VV(x))) # In[90]: x, y = next(iter(md.val_dl)) probs = F.softmax(predict_batch(learn.model, x), -1) x, preds = to_np(x), to_np(probs) # In[91]: preds # In[92]: preds = np.argmax(preds, -1) # In[93]: preds # In[94]: fig, axes = plt.subplots(3, 4, figsize=(12, 8)) for i,ax in enumerate(axes.flat): ima=md.val_ds.denorm(x)[i] txt = md.classes[preds[i]] ax = show_img(ima, ax=ax) draw_text(ax, (0,0), txt) plt.tight_layout() # In[95]: for fn in md.val_ds.fnames[:12]: print(fn[len('VOC2007/JPEGImages/'):] + ' - ' + str(obj_count[fn[len('VOC2007/JPEGImages/'):]]))