Notebook

In [1]:

import numpy as np

import caffe

from lib import run_net
from lib import score_util

from datasets.youtube import youtube
from datasets.pascal_voc import pascal

Configure Caffe and load net

In [2]:

caffe.set_device(0)
caffe.set_mode_gpu()

net = caffe.Net('../nets/stage-voc-fcn8s.prototxt',
                '../nets/voc-fcn8s-heavy.caffemodel',
                caffe.TEST)

Dataset details

In [3]:

YT = youtube('/x/youtube/')
PV = pascal('/x/PASCAL/VOC2011')

n_cl = len(YT.classes)
inputs = YT.load_dataset()

Set base clock/subsampling rate

In [4]:

CR = 10 # subsample amount -- we used only every 10 frames for paper

Oracle per frame¶

In [5]:

hist_perframe = np.zeros((n_cl, n_cl))
for (class_, vid, shot) in inputs:
    for f in YT.list_label_frames(class_, vid, shot):
        # skip the first 2 frames to align with pipeline
        if f < 2*CR+1:
            continue
        im = YT.load_frame(class_, vid, shot, f)
            
        out = run_net.segrun(net, YT.preprocess(im))
        out_yt = np.zeros(out.shape, dtype=np.uint8)
        for c in YT.classes:
            out_yt[out == PV.classes.index(c)] = YT.classes.index(c)

        label = YT.load_label(class_, vid, shot, f)
        label = YT.make_label(label, class_)
        hist_perframe += score_util.fast_hist(label.flatten(), out_yt.flatten(), n_cl)

acc, cl_acc, mean_iu, fw_iu = score_util.get_scores(hist_perframe)
print 'Oracle: Per frame'
print 'acc\t\t cl acc\t\t mIU\t\t fwIU'
print '{:f}\t {:f}\t {:f}\t {:f}\t'.format(100*acc, 100*cl_acc, 100*mean_iu, 100*fw_iu)

Oracle: Per frame
acc		 cl acc		 mIU		 fwIU
95.255650	 82.298180	 69.999789	 91.424557

Pipeline 2-stage¶

In [6]:

hist = np.zeros((n_cl, n_cl))
for (class_, vid, shot) in inputs:
    for f in YT.list_label_frames(class_, vid, shot):
        # skip the first 10 frames to align with pipeline
        if f < 2*CR+1:
            continue
            
        # Assume CR frame old is last full run
        im = YT.load_frame(class_, vid, shot, (f-CR))
        _ = run_net.segrun(net, YT.preprocess(im))
        
        # Run current frame through 2stage pipeline
        im = YT.load_frame(class_, vid, shot, f)
        out = run_net.pipeline_2stage_forward(net, YT.preprocess(im))
        out_yt = np.zeros(out.shape, dtype=np.uint8)
        for c in YT.classes:
            out_yt[out == PV.classes.index(c)] = YT.classes.index(c)

        label = YT.load_label(class_, vid, shot, f)
        label = YT.make_label(label, class_)
        hist += score_util.fast_hist(label.flatten(), out_yt.flatten(), n_cl)

acc, cl_acc, mean_iu, fw_iu = score_util.get_scores(hist)
print 'Pipeline 2-stage on subsample {}:'.format(CR)
print 'acc\t\t cl acc\t\t mIU\t\t fwIU'
print '{:f}\t {:f}\t {:f}\t {:f}\t'.format(100*acc, 100*cl_acc,100*mean_iu, 100*fw_iu)

Pipeline 2-stage on subsample 10:
acc		 cl acc		 mIU		 fwIU
93.925326	 76.476584	 64.002186	 89.178644

Pipeline 3-stage¶

In [7]:

hist = np.zeros((n_cl, n_cl))
for (class_, vid, shot) in inputs:
    for f in YT.list_label_frames(class_, vid, shot):
        # skip the first 10 frames to align with pipeline
        if f < 2*CR+1:
            continue
            
        # Push frames 2*CR and CR old through pipeline
        im = YT.load_frame(class_, vid, shot, (f-2*CR))
        _ = run_net.segrun(net, YT.preprocess(im))
        im = YT.load_frame(class_, vid, shot, f-CR)
        _ = run_net.pipeline_3stage_forward(net, YT.preprocess(im))
        
        # Run current frame through pipeline
        im = YT.load_frame(class_, vid, shot, f)
        out = run_net.pipeline_3stage_forward(net, YT.preprocess(im))
        out_yt = np.zeros(out.shape, dtype=np.uint8)
        for c in YT.classes:
            out_yt[out == PV.classes.index(c)] = YT.classes.index(c)

        label = YT.load_label(class_, vid, shot, f)
        label = YT.make_label(label, class_)
        hist += score_util.fast_hist(label.flatten(), out_yt.flatten(), n_cl)
        
acc, cl_acc, mean_iu, fw_iu = score_util.get_scores(hist)
print 'Pipeline 3-stage on subsample {}:'.format(CR)
print 'acc\t\t cl acc\t\t mIU\t\t fwIU'
print '{:f}\t {:f}\t {:f}\t {:f}\t'.format(100*acc, 100*cl_acc,100*mean_iu, 100*fw_iu)

Pipeline 3-stage on subsample 10:
acc		 cl acc		 mIU		 fwIU
92.591616	 70.971741	 58.125009	 87.030027