import numpy as np
import caffe
from lib import run_net
from lib import score_util
from datasets.youtube import youtube
from datasets.pascal_voc import pascal
Configure Caffe and load net
caffe.set_device(0)
caffe.set_mode_gpu()
net = caffe.Net('../nets/stage-voc-fcn8s.prototxt',
'../nets/voc-fcn8s-heavy.caffemodel',
caffe.TEST)
Dataset details
YT = youtube('/x/youtube/')
PV = pascal('/x/PASCAL/VOC2011')
n_cl = len(YT.classes)
inputs = YT.load_dataset()
Set base clock/subsampling rate
CR = 10 # subsample amount -- we used only every 10 frames for paper
hist_perframe = np.zeros((n_cl, n_cl))
for (class_, vid, shot) in inputs:
for f in YT.list_label_frames(class_, vid, shot):
# skip the first 2 frames to align with pipeline
if f < 2*CR+1:
continue
im = YT.load_frame(class_, vid, shot, f)
out = run_net.segrun(net, YT.preprocess(im))
out_yt = np.zeros(out.shape, dtype=np.uint8)
for c in YT.classes:
out_yt[out == PV.classes.index(c)] = YT.classes.index(c)
label = YT.load_label(class_, vid, shot, f)
label = YT.make_label(label, class_)
hist_perframe += score_util.fast_hist(label.flatten(), out_yt.flatten(), n_cl)
acc, cl_acc, mean_iu, fw_iu = score_util.get_scores(hist_perframe)
print 'Oracle: Per frame'
print 'acc\t\t cl acc\t\t mIU\t\t fwIU'
print '{:f}\t {:f}\t {:f}\t {:f}\t'.format(100*acc, 100*cl_acc, 100*mean_iu, 100*fw_iu)
Oracle: Per frame acc cl acc mIU fwIU 95.255650 82.298180 69.999789 91.424557
hist = np.zeros((n_cl, n_cl))
for (class_, vid, shot) in inputs:
for f in YT.list_label_frames(class_, vid, shot):
# skip the first 10 frames to align with pipeline
if f < 2*CR+1:
continue
# Assume CR frame old is last full run
im = YT.load_frame(class_, vid, shot, (f-CR))
_ = run_net.segrun(net, YT.preprocess(im))
# Run current frame through 2stage pipeline
im = YT.load_frame(class_, vid, shot, f)
out = run_net.pipeline_2stage_forward(net, YT.preprocess(im))
out_yt = np.zeros(out.shape, dtype=np.uint8)
for c in YT.classes:
out_yt[out == PV.classes.index(c)] = YT.classes.index(c)
label = YT.load_label(class_, vid, shot, f)
label = YT.make_label(label, class_)
hist += score_util.fast_hist(label.flatten(), out_yt.flatten(), n_cl)
acc, cl_acc, mean_iu, fw_iu = score_util.get_scores(hist)
print 'Pipeline 2-stage on subsample {}:'.format(CR)
print 'acc\t\t cl acc\t\t mIU\t\t fwIU'
print '{:f}\t {:f}\t {:f}\t {:f}\t'.format(100*acc, 100*cl_acc,100*mean_iu, 100*fw_iu)
Pipeline 2-stage on subsample 10: acc cl acc mIU fwIU 93.925326 76.476584 64.002186 89.178644
hist = np.zeros((n_cl, n_cl))
for (class_, vid, shot) in inputs:
for f in YT.list_label_frames(class_, vid, shot):
# skip the first 10 frames to align with pipeline
if f < 2*CR+1:
continue
# Push frames 2*CR and CR old through pipeline
im = YT.load_frame(class_, vid, shot, (f-2*CR))
_ = run_net.segrun(net, YT.preprocess(im))
im = YT.load_frame(class_, vid, shot, f-CR)
_ = run_net.pipeline_3stage_forward(net, YT.preprocess(im))
# Run current frame through pipeline
im = YT.load_frame(class_, vid, shot, f)
out = run_net.pipeline_3stage_forward(net, YT.preprocess(im))
out_yt = np.zeros(out.shape, dtype=np.uint8)
for c in YT.classes:
out_yt[out == PV.classes.index(c)] = YT.classes.index(c)
label = YT.load_label(class_, vid, shot, f)
label = YT.make_label(label, class_)
hist += score_util.fast_hist(label.flatten(), out_yt.flatten(), n_cl)
acc, cl_acc, mean_iu, fw_iu = score_util.get_scores(hist)
print 'Pipeline 3-stage on subsample {}:'.format(CR)
print 'acc\t\t cl acc\t\t mIU\t\t fwIU'
print '{:f}\t {:f}\t {:f}\t {:f}\t'.format(100*acc, 100*cl_acc,100*mean_iu, 100*fw_iu)
Pipeline 3-stage on subsample 10: acc cl acc mIU fwIU 92.591616 70.971741 58.125009 87.030027