import numpy as np
import matplotlib.pyplot as plt
import matplotlib
%matplotlib inline
# Create images with random rectangles and bounding boxes.
num_imgs = 50000
img_size = 8
min_object_size = 1
max_object_size = 4
num_objects = 1
bboxes = np.zeros((num_imgs, num_objects, 4))
imgs = np.zeros((num_imgs, img_size, img_size)) # set background to 0
for i_img in range(num_imgs):
for i_object in range(num_objects):
w, h = np.random.randint(min_object_size, max_object_size, size=2)
x = np.random.randint(0, img_size - w)
y = np.random.randint(0, img_size - h)
imgs[i_img, x:x+w, y:y+h] = 1. # set rectangle to 1
bboxes[i_img, i_object] = [x, y, w, h]
imgs.shape, bboxes.shape
((50000, 8, 8), (50000, 1, 4))
i =5
plt.imshow(imgs[i].T, cmap='Greys', interpolation='none', origin='lower', extent=[0, img_size, 0, img_size])
for bbox in bboxes[i]:
plt.gca().add_patch(matplotlib.patches.Rectangle((bbox[0], bbox[1]), bbox[2], bbox[3], ec='r', fc='none'))
# Reshape and normalize the image data to mean 0 and std 1.
X = (imgs.reshape(num_imgs, -1) - np.mean(imgs)) / np.std(imgs)
X.shape, np.mean(X), np.std(X)
((50000, 64), -3.5376146456655986e-17, 1.0)
# Normalize x, y, w, h by img_size, so that all values are between 0 and 1.
# Important: Do not shift to negative values (e.g. by setting to mean 0), because the IOU calculation needs positive w and h.
y = bboxes.reshape(num_imgs, -1) / img_size
y.shape, np.mean(y), np.std(y)
((50000, 4), 0.281348125, 0.17571291766396793)
# Split training and test.
i = int(0.8 * num_imgs)
train_X = X[:i]
test_X = X[i:]
train_y = y[:i]
test_y = y[i:]
test_imgs = imgs[i:]
test_bboxes = bboxes[i:]
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, Convolution2D, MaxPooling2D, Flatten
from keras.optimizers import SGD
filter_size = 3
pool_size = 2
model = Sequential([
Dense(512, input_dim=X.shape[-1]),
Activation('relu'),
Dense(128, input_dim=X.shape[-1]),
Activation('relu'),
Dropout(0.2),
Dense(y.shape[-1])
])
model.compile('adadelta', 'mse')
model.fit(train_X, train_y, epochs=30, validation_data=(test_X, test_y), verbose=2)
Epoch 1/30 1250/1250 - 2s - loss: 0.5446 - val_loss: 0.1817 Epoch 2/30 1250/1250 - 2s - loss: 0.2812 - val_loss: 0.0870 Epoch 3/30 1250/1250 - 2s - loss: 0.2028 - val_loss: 0.0664 Epoch 4/30 1250/1250 - 2s - loss: 0.1763 - val_loss: 0.0561 Epoch 5/30 1250/1250 - 2s - loss: 0.1573 - val_loss: 0.0480 Epoch 6/30 1250/1250 - 2s - loss: 0.1429 - val_loss: 0.0421 Epoch 7/30 1250/1250 - 2s - loss: 0.1316 - val_loss: 0.0375 Epoch 8/30 1250/1250 - 2s - loss: 0.1212 - val_loss: 0.0340 Epoch 9/30 1250/1250 - 2s - loss: 0.1119 - val_loss: 0.0313 Epoch 10/30 1250/1250 - 2s - loss: 0.1056 - val_loss: 0.0290 Epoch 11/30 1250/1250 - 2s - loss: 0.0988 - val_loss: 0.0272 Epoch 12/30 1250/1250 - 2s - loss: 0.0922 - val_loss: 0.0258 Epoch 13/30 1250/1250 - 2s - loss: 0.0870 - val_loss: 0.0246 Epoch 14/30 1250/1250 - 2s - loss: 0.0824 - val_loss: 0.0236 Epoch 15/30 1250/1250 - 2s - loss: 0.0772 - val_loss: 0.0230 Epoch 16/30 1250/1250 - 2s - loss: 0.0731 - val_loss: 0.0224 Epoch 17/30 1250/1250 - 2s - loss: 0.0692 - val_loss: 0.0219 Epoch 18/30 1250/1250 - 2s - loss: 0.0659 - val_loss: 0.0215 Epoch 19/30 1250/1250 - 2s - loss: 0.0629 - val_loss: 0.0212 Epoch 20/30 1250/1250 - 2s - loss: 0.0595 - val_loss: 0.0210 Epoch 21/30 1250/1250 - 2s - loss: 0.0569 - val_loss: 0.0207 Epoch 22/30 1250/1250 - 2s - loss: 0.0548 - val_loss: 0.0207 Epoch 23/30 1250/1250 - 2s - loss: 0.0526 - val_loss: 0.0206 Epoch 24/30 1250/1250 - 2s - loss: 0.0506 - val_loss: 0.0206 Epoch 25/30 1250/1250 - 2s - loss: 0.0485 - val_loss: 0.0206 Epoch 26/30 1250/1250 - 2s - loss: 0.0463 - val_loss: 0.0205 Epoch 27/30 1250/1250 - 2s - loss: 0.0450 - val_loss: 0.0206 Epoch 28/30 1250/1250 - 2s - loss: 0.0435 - val_loss: 0.0207 Epoch 29/30 1250/1250 - 2s - loss: 0.0422 - val_loss: 0.0206 Epoch 30/30 1250/1250 - 2s - loss: 0.0405 - val_loss: 0.0206
<tensorflow.python.keras.callbacks.History at 0x7f754803d290>
# Predict bounding boxes on the test images.
pred_y = model.predict(test_X)
pred_bboxes = pred_y * img_size
pred_bboxes = pred_bboxes.reshape(len(pred_bboxes), num_objects, -1)
pred_bboxes.shape
(10000, 1, 4)
def IOU(bbox1, bbox2):
'''Calculate overlap between two bounding boxes [x, y, w, h] as the area of intersection over the area of unity'''
x1, y1, w1, h1 = bbox1[0], bbox1[1], bbox1[2], bbox1[3]
x2, y2, w2, h2 = bbox2[0], bbox2[1], bbox2[2], bbox2[3]
w_I = min(x1 + w1, x2 + w2) - max(x1, x2)
h_I = min(y1 + h1, y2 + h2) - max(y1, y2)
if w_I <= 0 or h_I <= 0: # no overlap
return 0.
I = w_I * h_I
U = w1 * h1 + w2 * h2 - I
return(I / U)
plt.figure(figsize=(12, 3))
for i_subplot in range(1, 5):
plt.subplot(1, 4, i_subplot)
i = np.random.randint(len(test_imgs))
plt.imshow(test_imgs[i].T, cmap='Greys', interpolation='none', origin='lower', extent=[0, img_size, 0, img_size])
for pred_bbox, exp_bbox in zip(pred_bboxes[i], test_bboxes[i]):
plt.gca().add_patch(matplotlib.patches.Rectangle((pred_bbox[0], pred_bbox[1]), pred_bbox[2], pred_bbox[3], ec='r', fc='none'))
plt.annotate('IOU: {:.2f}'.format(IOU(pred_bbox, exp_bbox)), (pred_bbox[0], pred_bbox[1]+pred_bbox[3]+0.2), color='r')
# Calculate the mean IOU (overlap) between the predicted and expected bounding boxes on the test dataset.
summed_IOU = 0.
for pred_bbox, test_bbox in zip(pred_bboxes.reshape(-1, 4), test_bboxes.reshape(-1, 4)):
summed_IOU += IOU(pred_bbox, test_bbox)
mean_IOU = summed_IOU / len(pred_bboxes)
mean_IOU
0.07784513254188125