import numpy as np
import matplotlib.pyplot as plt
import matplotlib
%matplotlib inline
# Create images with random rectangles and bounding boxes.
num_imgs = 50000
img_size = 8
min_rect_size = 1
max_rect_size = 4
num_objects = 2
bboxes = np.zeros((num_imgs, num_objects, 4))
imgs = np.zeros((num_imgs, img_size, img_size))
for i_img in range(num_imgs):
for i_object in range(num_objects):
w, h = np.random.randint(min_rect_size, max_rect_size, size=2)
x = np.random.randint(0, img_size - w)
y = np.random.randint(0, img_size - h)
imgs[i_img, x:x+w, y:y+h] = 1.
bboxes[i_img, i_object] = [x, y, w, h]
imgs.shape, bboxes.shape
((50000, 8, 8), (50000, 2, 4))
import random
i = random.randint(1,1000)
plt.imshow(imgs[i].T, cmap='Greys', interpolation='none', origin='lower', extent=[0, img_size, 0, img_size])
for bbox in bboxes[i]:
plt.gca().add_patch(matplotlib.patches.Rectangle((bbox[0], bbox[1]), bbox[2], bbox[3], ec='r', fc='none'))
# Reshape and normalize the data to mean 0 and std 1.
X = (imgs.reshape(num_imgs, -1) - np.mean(imgs)) / np.std(imgs)
X.shape, np.mean(X), np.std(X)
# Normalize x, y, w, h by img_size, so that all values are between 0 and 1.
# Important: Do not shift to negative values (e.g. by setting to mean 0), because the IOU calculation needs positive w and h.
y = bboxes.reshape(num_imgs, -1) / img_size
y.shape, np.mean(y), np.std(y)
((50000, 8), 0.2814834375, 0.17521282872747532)
# Split training and test.
i = int(0.8 * num_imgs)
train_X = X[:i]
test_X = X[i:]
train_y = y[:i]
test_y = y[i:]
test_imgs = imgs[i:]
test_bboxes = bboxes[i:]
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, Convolution2D, MaxPooling2D, Flatten
from keras.optimizers import SGD
filter_size = 3
pool_size = 2
model = Sequential([
Dense(512, input_dim=X.shape[-1]),
Activation('relu'),
Dense(128, input_dim=X.shape[-1]),
Activation('relu'),
Dropout(0.2),
Dense(y.shape[-1])
])
model.compile('adadelta', 'mse')
def IOU(bbox1, bbox2):
'''Calculate overlap between two bounding boxes [x, y, w, h] as the area of intersection over the area of unity'''
x1, y1, w1, h1 = bbox1[0], bbox1[1], bbox1[2], bbox1[3]
x2, y2, w2, h2 = bbox2[0], bbox2[1], bbox2[2], bbox2[3]
w_I = min(x1 + w1, x2 + w2) - max(x1, x2)
h_I = min(y1 + h1, y2 + h2) - max(y1, y2)
if w_I <= 0 or h_I <= 0: # no overlap
return 0
I = w_I * h_I
U = w1 * h1 + w2 * h2 - I
return I / U
def distance(bbox1, bbox2):
return np.sqrt(np.sum(np.square(bbox1[:2] - bbox2[:2])))
num_epochs = 50
flipped_train_y = np.array(train_y)
flipped = np.zeros((len(flipped_train_y), num_epochs))
ious_epoch = np.zeros((len(flipped_train_y), num_epochs))
dists_epoch = np.zeros((len(flipped_train_y), num_epochs))
mses_epoch = np.zeros((len(flipped_train_y), num_epochs))
for epoch in range(num_epochs):
print('Epoch', epoch)
model.fit(train_X, flipped_train_y, epochs=1, validation_data=(test_X, test_y), verbose=2)
pred_y = model.predict(train_X)
for i, (pred_bboxes, exp_bboxes) in enumerate(zip(pred_y, flipped_train_y)):
flipped_exp_bboxes = np.concatenate([exp_bboxes[4:], exp_bboxes[:4]])
mse = np.mean(np.square(pred_bboxes - exp_bboxes))
mse_flipped = np.mean(np.square(pred_bboxes - flipped_exp_bboxes))
iou = IOU(pred_bboxes[:4], exp_bboxes[:4]) + IOU(pred_bboxes[4:], exp_bboxes[4:])
iou_flipped = IOU(pred_bboxes[:4], flipped_exp_bboxes[:4]) + IOU(pred_bboxes[4:], flipped_exp_bboxes[4:])
dist = distance(pred_bboxes[:4], exp_bboxes[:4]) + distance(pred_bboxes[4:], exp_bboxes[4:])
dist_flipped = distance(pred_bboxes[:4], flipped_exp_bboxes[:4]) + distance(pred_bboxes[4:], flipped_exp_bboxes[4:])
if mse_flipped < mse: # you can also use iou or dist here
flipped_train_y[i] = flipped_exp_bboxes
flipped[i, epoch] = 1
mses_epoch[i, epoch] = mse_flipped / 2.
ious_epoch[i, epoch] = iou_flipped / 2.
dists_epoch[i, epoch] = dist_flipped / 2.
else:
mses_epoch[i, epoch] = mse / 2.
ious_epoch[i, epoch] = iou / 2.
dists_epoch[i, epoch] = dist / 2.
print('Flipped {} training samples ({} %)'.format(np.sum(flipped[:, epoch]), np.mean(flipped[:, epoch]) * 100.))
print('Mean IOU: {}'.format(np.mean(ious_epoch[:, epoch])))
print('Mean dist: {}'.format(np.mean(dists_epoch[:, epoch])))
print('Mean mse: {}'.format(np.mean(mses_epoch[:, epoch])))
print('\n')
Epoch 0 1250/1250 - 3s - loss: 0.1383 - val_loss: 0.0813 Flipped 19891.0 training samples (49.7275 %) Mean IOU: 0.037232690657124395 Mean dist: 0.35727365194077376 Mean mse: 0.033198528514804856 Epoch 1 1250/1250 - 2s - loss: 0.0888 - val_loss: 0.0656 Flipped 1721.0 training samples (4.3025 %) Mean IOU: 0.05318151422847442 Mean dist: 0.3144863546827677 Mean mse: 0.025979474518208867 Epoch 2 1250/1250 - 3s - loss: 0.0741 - val_loss: 0.0569 Flipped 1114.0 training samples (2.785 %) Mean IOU: 0.06598561365328626 Mean dist: 0.2901569431546851 Mean mse: 0.022127709409292335 Epoch 3 1250/1250 - 2s - loss: 0.0649 - val_loss: 0.0512 Flipped 948.0 training samples (2.37 %) Mean IOU: 0.07684254374744041 Mean dist: 0.2736034593906058 Mean mse: 0.019633069058081792 Epoch 4 1250/1250 - 2s - loss: 0.0579 - val_loss: 0.0470 Flipped 905.0 training samples (2.2624999999999997 %) Mean IOU: 0.08640724107200685 Mean dist: 0.2613253071013454 Mean mse: 0.01786833271362056 Epoch 5 1250/1250 - 2s - loss: 0.0527 - val_loss: 0.0439 Flipped 902.0 training samples (2.255 %) Mean IOU: 0.09415122599488478 Mean dist: 0.25194095852176157 Mean mse: 0.01655298475231711 Epoch 6 1250/1250 - 3s - loss: 0.0484 - val_loss: 0.0414 Flipped 862.0 training samples (2.155 %) Mean IOU: 0.10142116262063688 Mean dist: 0.2445910453691942 Mean mse: 0.015528906734117075 Epoch 7 1250/1250 - 3s - loss: 0.0451 - val_loss: 0.0395 Flipped 708.0 training samples (1.77 %) Mean IOU: 0.10756239828833132 Mean dist: 0.23869207003038606 Mean mse: 0.014717786949577637 Epoch 8 1250/1250 - 3s - loss: 0.0423 - val_loss: 0.0379 Flipped 684.0 training samples (1.71 %) Mean IOU: 0.11269503097520481 Mean dist: 0.23381905929811422 Mean mse: 0.014059459684692141 Epoch 9 1250/1250 - 3s - loss: 0.0400 - val_loss: 0.0365 Flipped 657.0 training samples (1.6424999999999998 %) Mean IOU: 0.11754654677130373 Mean dist: 0.22965602688907125 Mean mse: 0.013505636898159694 Epoch 10 1250/1250 - 3s - loss: 0.0382 - val_loss: 0.0353 Flipped 619.0 training samples (1.5474999999999999 %) Mean IOU: 0.1219872760987479 Mean dist: 0.22601357287748575 Mean mse: 0.013026879527312556 Epoch 11 1250/1250 - 2s - loss: 0.0365 - val_loss: 0.0344 Flipped 609.0 training samples (1.5225 %) Mean IOU: 0.1256473229702858 Mean dist: 0.2229416991320054 Mean mse: 0.01262558993937125 Epoch 12 1250/1250 - 2s - loss: 0.0352 - val_loss: 0.0335 Flipped 519.0 training samples (1.2975 %) Mean IOU: 0.12965207870365922 Mean dist: 0.22016640171222762 Mean mse: 0.01226186523610329 Epoch 13 1250/1250 - 3s - loss: 0.0339 - val_loss: 0.0328 Flipped 495.0 training samples (1.2375 %) Mean IOU: 0.13345745626533198 Mean dist: 0.21770910598838264 Mean mse: 0.011944320308333223 Epoch 14 1250/1250 - 2s - loss: 0.0329 - val_loss: 0.0322 Flipped 481.0 training samples (1.2025 %) Mean IOU: 0.13652392449009745 Mean dist: 0.21539978047413516 Mean mse: 0.011665974222373175 Epoch 15 1250/1250 - 3s - loss: 0.0319 - val_loss: 0.0316 Flipped 488.0 training samples (1.22 %) Mean IOU: 0.1393332919014705 Mean dist: 0.21334436669366072 Mean mse: 0.011418366132973578 Epoch 16 1250/1250 - 3s - loss: 0.0312 - val_loss: 0.0311 Flipped 448.0 training samples (1.1199999999999999 %) Mean IOU: 0.14204056057933206 Mean dist: 0.2114437389066506 Mean mse: 0.011188648821134583 Epoch 17 1250/1250 - 3s - loss: 0.0303 - val_loss: 0.0307 Flipped 418.0 training samples (1.045 %) Mean IOU: 0.14458371890941213 Mean dist: 0.20967240988183916 Mean mse: 0.010981257015141745 Epoch 18 1250/1250 - 2s - loss: 0.0296 - val_loss: 0.0303 Flipped 319.0 training samples (0.7975 %) Mean IOU: 0.1468144933414354 Mean dist: 0.2080494725385991 Mean mse: 0.010792488328989958 Epoch 19 1250/1250 - 3s - loss: 0.0290 - val_loss: 0.0299 Flipped 356.0 training samples (0.89 %) Mean IOU: 0.14995869851660198 Mean dist: 0.2064036374854088 Mean mse: 0.010608434098732234 Epoch 20 1250/1250 - 3s - loss: 0.0285 - val_loss: 0.0296 Flipped 308.0 training samples (0.77 %) Mean IOU: 0.15203454068016112 Mean dist: 0.20495681090139092 Mean mse: 0.010447016123295134 Epoch 21 1250/1250 - 3s - loss: 0.0279 - val_loss: 0.0293 Flipped 294.0 training samples (0.735 %) Mean IOU: 0.15428042323304164 Mean dist: 0.20356949656557038 Mean mse: 0.01029422922077815 Epoch 22 1250/1250 - 2s - loss: 0.0275 - val_loss: 0.0290 Flipped 293.0 training samples (0.7325 %) Mean IOU: 0.15624734448707436 Mean dist: 0.20225715700244062 Mean mse: 0.010154689438867345 Epoch 23 1250/1250 - 3s - loss: 0.0270 - val_loss: 0.0287 Flipped 316.0 training samples (0.79 %) Mean IOU: 0.15863816766232575 Mean dist: 0.20092190840371113 Mean mse: 0.01001509179614267 Epoch 24 1250/1250 - 3s - loss: 0.0267 - val_loss: 0.0285 Flipped 317.0 training samples (0.7925 %) Mean IOU: 0.1603624874676951 Mean dist: 0.19970516430091176 Mean mse: 0.009889360513190452 Epoch 25 1250/1250 - 2s - loss: 0.0263 - val_loss: 0.0283 Flipped 310.0 training samples (0.775 %) Mean IOU: 0.16236255552418544 Mean dist: 0.1985246433317795 Mean mse: 0.009768766455358692 Epoch 26 1250/1250 - 3s - loss: 0.0259 - val_loss: 0.0281 Flipped 243.0 training samples (0.6074999999999999 %) Mean IOU: 0.1639310728770347 Mean dist: 0.19749649045465012 Mean mse: 0.009659793646618061 Epoch 27 1250/1250 - 3s - loss: 0.0256 - val_loss: 0.0279 Flipped 224.0 training samples (0.5599999999999999 %) Mean IOU: 0.16592526733371524 Mean dist: 0.19641030311005458 Mean mse: 0.009547083750477282 Epoch 28 1250/1250 - 3s - loss: 0.0252 - val_loss: 0.0278 Flipped 258.0 training samples (0.645 %) Mean IOU: 0.16776933185145626 Mean dist: 0.19536236573254215 Mean mse: 0.009444298010414938 Epoch 29 1250/1250 - 3s - loss: 0.0250 - val_loss: 0.0276 Flipped 295.0 training samples (0.7374999999999999 %) Mean IOU: 0.1691739271044937 Mean dist: 0.19428270249879284 Mean mse: 0.009347408139333078 Epoch 30 1250/1250 - 2s - loss: 0.0246 - val_loss: 0.0275 Flipped 191.0 training samples (0.4775 %) Mean IOU: 0.17066198583004422 Mean dist: 0.19329219935458095 Mean mse: 0.009254583448935863 Epoch 31 1250/1250 - 2s - loss: 0.0243 - val_loss: 0.0273 Flipped 224.0 training samples (0.5599999999999999 %) Mean IOU: 0.17239853906267139 Mean dist: 0.1923443650923715 Mean mse: 0.009162586181179426 Epoch 32 1250/1250 - 2s - loss: 0.0240 - val_loss: 0.0272 Flipped 217.0 training samples (0.5425 %) Mean IOU: 0.17365462918761174 Mean dist: 0.19144313608610497 Mean mse: 0.009082014350604177 Epoch 33 1250/1250 - 3s - loss: 0.0238 - val_loss: 0.0271 Flipped 198.0 training samples (0.49500000000000005 %) Mean IOU: 0.1754342446492649 Mean dist: 0.19051836798560184 Mean mse: 0.008995486118540955 Epoch 34 1250/1250 - 2s - loss: 0.0235 - val_loss: 0.0270 Flipped 170.0 training samples (0.42500000000000004 %) Mean IOU: 0.17713109104470814 Mean dist: 0.1896244767835146 Mean mse: 0.008915593126860844 Epoch 35 1250/1250 - 3s - loss: 0.0233 - val_loss: 0.0269 Flipped 167.0 training samples (0.4175 %) Mean IOU: 0.1783873919924296 Mean dist: 0.18881081434670838 Mean mse: 0.00884079262867683 Epoch 36 1250/1250 - 3s - loss: 0.0231 - val_loss: 0.0268 Flipped 138.0 training samples (0.345 %) Mean IOU: 0.1792274133004544 Mean dist: 0.188094066597689 Mean mse: 0.00877569088354422 Epoch 37 1250/1250 - 3s - loss: 0.0230 - val_loss: 0.0267 Flipped 193.0 training samples (0.48250000000000004 %) Mean IOU: 0.18130463171101832 Mean dist: 0.18726396432839515 Mean mse: 0.00869565285947182 Epoch 38 1250/1250 - 3s - loss: 0.0227 - val_loss: 0.0266 Flipped 204.0 training samples (0.51 %) Mean IOU: 0.18280200066359556 Mean dist: 0.1864627286646748 Mean mse: 0.008624417345906802 Epoch 39 1250/1250 - 3s - loss: 0.0225 - val_loss: 0.0266 Flipped 190.0 training samples (0.475 %) Mean IOU: 0.1838462055031591 Mean dist: 0.18572079626665308 Mean mse: 0.008562547158313465 Epoch 40 1250/1250 - 3s - loss: 0.0223 - val_loss: 0.0265 Flipped 159.0 training samples (0.3975 %) Mean IOU: 0.18475535696344542 Mean dist: 0.1850447536846153 Mean mse: 0.008503390265712587 Epoch 41 1250/1250 - 2s - loss: 0.0221 - val_loss: 0.0264 Flipped 122.0 training samples (0.305 %) Mean IOU: 0.18586247268819275 Mean dist: 0.184363489838462 Mean mse: 0.008445507373116461 Epoch 42 1250/1250 - 2s - loss: 0.0219 - val_loss: 0.0263 Flipped 166.0 training samples (0.415 %) Mean IOU: 0.18760472663447547 Mean dist: 0.18365588710412672 Mean mse: 0.008381407659689295 Epoch 43 1250/1250 - 2s - loss: 0.0218 - val_loss: 0.0263 Flipped 173.0 training samples (0.4325 %) Mean IOU: 0.18870382838316913 Mean dist: 0.18298375121132615 Mean mse: 0.008326145256875323 Epoch 44 1250/1250 - 3s - loss: 0.0216 - val_loss: 0.0262 Flipped 169.0 training samples (0.4225 %) Mean IOU: 0.18930894377383584 Mean dist: 0.18237986450754146 Mean mse: 0.008275573133820045 Epoch 45 1250/1250 - 3s - loss: 0.0214 - val_loss: 0.0262 Flipped 155.0 training samples (0.3875 %) Mean IOU: 0.19045625291588011 Mean dist: 0.1816923421883227 Mean mse: 0.008221076761049982 Epoch 46 1250/1250 - 2s - loss: 0.0213 - val_loss: 0.0261 Flipped 135.0 training samples (0.3375 %) Mean IOU: 0.19210190480679085 Mean dist: 0.18100392317543298 Mean mse: 0.008164115579863454 Epoch 47 1250/1250 - 2s - loss: 0.0212 - val_loss: 0.0261 Flipped 139.0 training samples (0.3475 %) Mean IOU: 0.1929415455332721 Mean dist: 0.18040060820084736 Mean mse: 0.008116974390970401 Epoch 48 1250/1250 - 2s - loss: 0.0210 - val_loss: 0.0260 Flipped 150.0 training samples (0.375 %) Mean IOU: 0.19437142804856034 Mean dist: 0.17980052557386686 Mean mse: 0.008063567421949221 Epoch 49 1250/1250 - 3s - loss: 0.0209 - val_loss: 0.0260 Flipped 138.0 training samples (0.345 %) Mean IOU: 0.19525224187547882 Mean dist: 0.17925508563562084 Mean mse: 0.008018252786645448
plt.pcolor(flipped[:1000], cmap='Greys')
plt.xlabel('Epoch')
plt.ylabel('Training sample')
Text(0, 0.5, 'Training sample')
plt.plot(np.mean(ious_epoch, axis=0), label='Mean IOU') # between predicted and assigned true bboxes
plt.plot(np.mean(dists_epoch, axis=0), label='Mean distance') # relative to image size
plt.legend()
plt.ylim(0, 1)
(0.0, 1.0)
pred_y = model.predict(test_X)
pred_y = pred_y.reshape(len(pred_y), num_objects, -1)
pred_bboxes = pred_y[..., :4] * img_size
pred_shapes = pred_y[..., 4:5]
pred_bboxes.shape, pred_shapes.shape
((10000, 2, 4), (10000, 2, 0))
plt.figure(figsize=(16, 8))
for i_subplot in range(1, 5):
plt.subplot(1, 4, i_subplot)
i = np.random.randint(len(test_X))
plt.imshow(test_imgs[i].T, cmap='Greys', interpolation='none', origin='lower', extent=[0, img_size, 0, img_size])
for pred_bbox, exp_bbox, pred_shape in zip(pred_bboxes[i], test_bboxes[i], pred_shapes[i]):
print(pred_bbox)
plt.gca().add_patch(matplotlib.patches.Rectangle((pred_bbox[0], pred_bbox[1]), pred_bbox[2], pred_bbox[3],ec='r', fc='none'))
[3.0901163 0.08360825 2.8350563 1.9934778 ] [2.8934953 0.05177559 1.8182871 1.4411243 ] [1.1641378 3.105485 1.7916882 1.2118427] [1.9490995 4.0168724 0.78566885 0.8842524 ] [ 5.314244 -0.78453827 2.0568438 1.9726627 ] [2.9276226 3.003235 1.8148657 1.4338228] [1.2816632 3.6937425 2.8126748 1.6420349] [1.6368439 2.8688457 1.711059 2.524076 ]