#!/usr/bin/env python
# coding: utf-8

# In[1]:


get_ipython().run_line_magic('matplotlib', 'inline')
get_ipython().run_line_magic('reload_ext', 'autoreload')
get_ipython().run_line_magic('autoreload', '2')


# In[2]:


from fastai.conv_learner import *
from fastai.dataset import *

from pathlib import Path
import json

from PIL import ImageDraw, ImageFont
from matplotlib import patches, patheffects


# # Pascal VOC dataset

# In[3]:


PATH = Path('data/pascal')
list(PATH.iterdir())


# In[4]:


train_json = json.load((PATH / 'pascal_train2007.json').open())


# ### Explore dataset

# In[5]:


train_json.keys()


# In[6]:


len(train_json['images'])


# In[7]:


train_json['images'][:3]


# In[8]:


len(train_json['annotations'])


# In[126]:


train_json['annotations'][:3]


# In[10]:


train_json['categories']


# In[11]:


list((PATH/'VOC2007').iterdir())


# In[12]:


list((PATH/'VOC2007'/'JPEGImages').iterdir())[:10]


# ### Create data structures

# In[13]:


categories = {category['id']:category['name'] for category in train_json['categories']}


# In[14]:


categories[1], categories[7]


# In[15]:


image_props = {}

for image in train_json['images']:
    image_props[image['id']] = {
        'filename' : image['file_name'],
        'objects' :[]
    }

for annotation in train_json['annotations']:
    if not annotation['ignore']:
        image_props[annotation['image_id']]['objects'].append((annotation['category_id'], annotation['bbox']))


# In[16]:


image_props[17]


# In[17]:


categories[13]


# ### Display image

# In[18]:


get_ipython().run_line_magic('matplotlib', 'inline')


# In[19]:


def sort_by_size(obj):
    return sorted(obj, key=lambda x : x[1][2] * x[1][3], reverse=True)


# In[20]:


def draw_outline(obj, width):
    obj.set_path_effects([patheffects.Stroke(linewidth=width, foreground='black'), patheffects.Normal()])

def draw_bbox(ax, obj):    
    category, bbox = obj
    patch = ax.add_patch(patches.Rectangle(xy=(bbox[0], bbox[1]), width=bbox[2], height=bbox[3], 
                                           fill=False, edgecolor='yellow', lw=2))
    draw_outline(patch, 4)
    text = ax.text(bbox[0], bbox[1], s=categories[category], color='black', fontsize=12,
                   verticalalignment='bottom',
                   bbox=dict(facecolor='yellow', pad=2))
    
def draw_image(id, largest=False):
    fig, ax = plt.subplots(figsize=(16,8))
    img = open_image(PATH/'VOC2007'/'JPEGImages'/image_props[id]['filename'])
    
    # sort bboxes by size, used later
    sorted_bboxes = sort_by_size(image_props[id]['objects'])
    
    for obj in sorted_bboxes:
        draw_bbox(ax, obj)
        if largest:
            break
            
    ax.imshow(img);


# In[21]:


draw_image(17)


# # Largest item classifier

# In[22]:


draw_image(17, largest=True)


# In[23]:


draw_image(23, largest=True)


# In[24]:


largest_objects = {img_prop['filename']:categories[sort_by_size(img_prop['objects'])[0][0]] 
                   for k, img_prop in image_props.items()}


# In[25]:


largest_df = pd.DataFrame.from_dict(largest_objects, orient='index')
largest_df.head()


# In[26]:


LARGEST_CSV = PATH/'largest.csv'
largest_df.to_csv(LARGEST_CSV)


# In[27]:


model = resnet34
sz = 224
bs = 64
JPEGS = 'VOC2007/JPEGImages'


# In[28]:


tfms = tfms_from_model(model, sz, transforms_side_on, crop_type=CropType.NO)
md = ImageClassifierData.from_csv(PATH, JPEGS, LARGEST_CSV, bs, tfms)


# In[29]:


x, y = next(iter(md.val_dl)) # first minibatch


# In[30]:


plt.imshow(md.trn_ds.denorm(to_np(x))[0]);


# In[31]:


learner = ConvLearner.pretrained(model, md, metrics=[accuracy])


# In[32]:


learner.lr_find(1e-5, 100)


# In[33]:


learner.sched.plot(3, 1)


# ### Train model

# In[34]:


lr = 2e-2
learner.fit(lr, 1, cycle_len=1)


# In[35]:


learner.fit(lr, 1, cycle_len=1)


# In[36]:


learner.fit(lr, 1, cycle_len=1)


# In[37]:


learner.fit(lr, 1, cycle_len=1)


# In[38]:


learner.fit(lr, 1, cycle_len=1)


# In[39]:


learner.fit(lr, 1, cycle_len=1)


# In[41]:


lrs = np.array([lr/1000,lr/100,lr])

learner.freeze_to(-2)

lrf=learner.lr_find(lrs/1000)
learner.sched.plot(1)


# In[42]:


learner.fit(lrs/5, 1, cycle_len=1)


# In[43]:


learner.unfreeze()


# In[44]:


learner.fit(lrs/5, 1, cycle_len=2)


# In[45]:


x, y = next(iter(md.val_dl))
probs = F.softmax(predict_batch(learner.model, x), -1)
x, preds = to_np(x), to_np(probs)


# In[46]:


preds


# In[47]:


preds = np.argmax(preds, -1)


# In[48]:


preds


# In[49]:


def show_img(im, figsize=None, ax=None):
    if not ax: fig,ax = plt.subplots(figsize=figsize)
    ax.imshow(im)
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
    return ax

def draw_text(ax, xy, txt, sz=14):
    text = ax.text(*xy, txt,
        verticalalignment='top', color='white', fontsize=sz, weight='bold')
    draw_outline(text, 1)
    
def draw_outline(o, lw):
    o.set_path_effects([patheffects.Stroke(
        linewidth=lw, foreground='black'), patheffects.Normal()])


# In[50]:


fig, axes = plt.subplots(3, 4, figsize=(12, 8))
for i,ax in enumerate(axes.flat):
    ima=md.val_ds.denorm(x)[i]
    txt = md.classes[preds[i]]
    ax = show_img(ima, ax=ax)
    draw_text(ax, (0,0), txt)
plt.tight_layout()


# ### Known object counter

# In[96]:


obj_count = {img_prop['filename']:len(img_prop['objects']) for k, img_prop in image_props.items()}


# In[97]:


df = pd.DataFrame.from_dict(obj_count, orient='index').reset_index()
df.head()


# In[98]:


COUNT_CSV = PATH/'count.csv'
df.to_csv(COUNT_CSV, index=False)


# In[99]:


model = resnet34
sz=224
bs = 64
JPEGS = 'VOC2007/JPEGImages'


# In[100]:


tfms = tfms_from_model(model, sz, crop_type=CropType.NO)
md = ImageClassifierData.from_csv(PATH, JPEGS, COUNT_CSV, tfms=tfms, continuous=True)


# In[101]:


x,y=next(iter(md.val_dl))
show_img(md.val_ds.denorm(to_np(x))[0]);


# In[102]:


to_np(y[0])


# In[104]:


head_reg = nn.Sequential(Flatten(), nn.Linear(25088, 1)) # resnet last lyr = 7 * 7 * 512 = 25088, 1 outputs for number of objects
learn = ConvLearner.pretrained(model, md, custom_head=head_reg)
learn.opt_fn = optim.Adam
learn.crit = nn.L1Loss()


# In[105]:


learn.lr_find(1e-5,100)
learn.sched.plot(5)


# In[108]:


learn.sched.plot(0, 1)


# In[109]:


lr = 5e-5


# In[110]:


learn.fit(lr, 2, cycle_len=1, cycle_mult=2)


# In[111]:


lrs = np.array([lr/100,lr/10,lr])


# In[112]:


learn.freeze_to(-2)


# In[113]:


lrf=learn.lr_find(lrs/10000)
learn.sched.plot(1)


# In[114]:


learn.sched.plot(0)


# In[115]:


learn.fit(lrs, 2, cycle_len=1, cycle_mult=2)


# In[116]:


learn.freeze_to(-3)


# In[117]:


learn.fit(lrs, 1, cycle_len=2)


# In[118]:


learn.save('counter')


# In[119]:


x,y = next(iter(md.val_dl))
learn.model.eval()
preds = to_np(learn.model(VV(x)))


# In[125]:


fig, axes = plt.subplots(3, 4, figsize=(12, 8))
for i,ax in enumerate(axes.flat):
    ima=md.val_ds.denorm(x)[i]
    
    txt = str(int(round(preds[i][0])))
    ax = show_img(ima, ax=ax)
    draw_text(ax, (0,0), txt)
plt.tight_layout()


# In[198]:


for fn in md.val_ds.fnames[:12]:
    print(fn[len('VOC2007/JPEGImages/'):] + ' - ' + str(obj_count[fn[len('VOC2007/JPEGImages/'):]]))


# ### Known object counter using classification

# In[62]:


obj_count = {img_prop['filename']:str(len(img_prop['objects'])) for k, img_prop in image_props.items()}


# In[63]:


df = pd.DataFrame.from_dict(obj_count, orient='index').reset_index()
df.head()


# In[64]:


sorted(df[0].unique())


# In[66]:


COUNT_CSV = PATH/'count_classes.csv'
df.to_csv(COUNT_CSV, index=False)


# In[67]:


model = resnet34
sz=224
bs = 64
JPEGS = 'VOC2007/JPEGImages'


# In[68]:


tfms = tfms_from_model(model, sz, transforms_side_on, crop_type=CropType.NO)
md = ImageClassifierData.from_csv(PATH, JPEGS, COUNT_CSV, bs, tfms)


# In[69]:


x,y=next(iter(md.val_dl))
show_img(md.val_ds.denorm(to_np(x))[0]);


# In[72]:


learn = ConvLearner.pretrained(model, md, metrics=[accuracy])


# In[73]:


learn.lr_find(1e-5,100)
learn.sched.plot(3, 1)


# In[74]:


learn.sched.plot(0,1)


# In[75]:


lr = 5e-2


# In[76]:


learn.fit(lr, 2, cycle_len=1, cycle_mult=2)


# In[77]:


learn.fit(lr, 2, cycle_len=1, cycle_mult=3)


# In[78]:


learn.fit(lr, 3, cycle_len=1, cycle_mult=2)


# In[79]:


lrs = np.array([lr/100,lr/10,lr])


# In[80]:


learn.freeze_to(-2)


# In[81]:


lrf=learn.lr_find(lrs/10000)
learn.sched.plot(1)


# In[82]:


learn.sched.plot(0)


# In[83]:


learn.fit(lrs, 2, cycle_len=1, cycle_mult=2)


# In[84]:


learn.freeze_to(-3)


# In[85]:


learn.fit(lrs, 1, cycle_len=2)


# In[174]:


learn.save('counter')


# In[86]:


x,y = next(iter(md.val_dl))
learn.model.eval()
preds = to_np(learn.model(VV(x)))


# In[90]:


x, y = next(iter(md.val_dl))
probs = F.softmax(predict_batch(learn.model, x), -1)
x, preds = to_np(x), to_np(probs)


# In[91]:


preds


# In[92]:


preds = np.argmax(preds, -1)


# In[93]:


preds


# In[94]:


fig, axes = plt.subplots(3, 4, figsize=(12, 8))
for i,ax in enumerate(axes.flat):
    ima=md.val_ds.denorm(x)[i]
    txt = md.classes[preds[i]]
    ax = show_img(ima, ax=ax)
    draw_text(ax, (0,0), txt)
plt.tight_layout()


# In[95]:


for fn in md.val_ds.fnames[:12]:
    print(fn[len('VOC2007/JPEGImages/'):] + ' - ' + str(obj_count[fn[len('VOC2007/JPEGImages/'):]]))