This section just reproduces lesson 1 logic using my own code and with 30 tennis and 30 basketball player images. I chose all male players for simplicity.
# Put these at the top of every notebook, to get automatic reloading and inline plotting
%reload_ext autoreload
%autoreload 2
%matplotlib inline
# This file contains all the main external libs we'll use
from fastai.imports import *
from fastai.transforms import *
from fastai.conv_learner import *
from fastai.model import *
from fastai.dataset import *
from fastai.sgdr import *
from fastai.plots import *
from typing import List, Union
from pathlib import Path
!wget 'https://onedrive.live.com/download?cid=C273BC3946897048&resid=C273BC3946897048%216605&authkey=AIVFQLj7IoJYiz4' -O foo.zip
!unzip -d data foo.zip
!rm foo.zip
sz=224
path = Path('data/tennisbball')
path.absolute(), list(path.glob('*'))
sample = plt.imread(next(iter((path / 'valid' / 'tennis').iterdir())))
plt.imshow(sample)
plt.figure()
sample = plt.imread(next(iter((path / 'valid' / 'bball').iterdir())))
plt.imshow(sample)
sample.shape, sample[:4,:4]
torch.cuda.is_available(),torch.backends.cudnn.enabled
Define the model architecture
#tfms_from_model -- model based image transforms (preprocessing stats)
arch=resnet50
data = ImageClassifierData.from_paths(path, test_name='test', test_with_labels=True, tfms=tfms_from_model(arch, sz))
#precompute=True to save conv layer activations! pass False if you want to run the data viz below
learner = ConvLearner.pretrained(f=arch, data=data, precompute=False)
This section trains a model using transfer learning.
learner.fit(0.01, 15)
#uncomment line below to save the model
#learner.save('tennis_v_bball.lrnr')
Or if you've already trained a model, skip the above section and start from here.
learner.load('tennis_v_bball.lrnr')
probs = np.exp(learner.predict())
probs
#TODO: improve
def display_images(images:List[Union[Path, np.ndarray]], columns:int, titles:List[str]=None, figsize=None) -> None:
if not titles:
titles = [f'Image {i+1}' for i in range(len(images))]
rows = len(images) // columns + int(len(images) % columns > 0)
if figsize is None:
figsize = (60,60)
plt.figure(figsize=figsize)
for i, (image, title) in enumerate(zip(images, titles)):
if isinstance(image, Path):
image = np.array(PIL.Image.open(image))
plt.subplot(rows, columns, i+1)
plt.imshow(image)
plt.title(title, fontsize=10*columns)
plt.axis('off')
#val images
predictions = probs.argmax(axis=1)
images, titles = [], []
for prob, pclass, fname in zip(probs, predictions, data.val_ds.fnames):
images.append(path / fname)
titles.append(f'{fname} -- {prob[pclass]:.{3}f} ({data.classes[pclass]})')
display_images(images, 4, titles)
test_probs = np.exp(learner.predict(is_test=True))
test_predictions = test_probs.argmax(axis=1)
#test images
images, titles = [],[]
for prob, pclass, fname in zip(test_probs, test_predictions, data.test_ds.fnames):
images.append(path / fname)
titles.append(f'{fname} -- {prob[pclass]:.{3}f} ({data.classes[pclass]})')
display_images(images, 4, titles)
#check out the model structure
model = learner.model
model
#
# utilize torch hooks to capture the activations for any conv layer. for simplicity we use a
# batch size of 1.
#
class ActivationHook:
def __init__(self):
self.output = []
def __call__(self, module, input, output):
self.output = output.data
def find_layers(module, ltype):
rv = []
if isinstance(module, ltype):
rv.append(module)
else:
for c in module.children():
rv.extend(find_layers(c, ltype))
return rv
def capture_activations(model, x):
layers = find_layers(model, nn.Conv2d)
hooks = [ActivationHook() for _ in layers]
handles = [conv.register_forward_hook(hook) for conv, hook in zip(layers, hooks)]
model(x)
for h in handles:
h.remove()
return [h.output for h in hooks]
bs = data.bs
data.bs = 1
dl = data.get_dl(data.test_ds, False)
i = iter(dl)
ball_x = next(i)[0]
noball_x = next(i)[0]
data.bs = bs
ball_activations = capture_activations(model, Variable(ball_x))
noball_activations = capture_activations(model, Variable(noball_x))
for i, layer_output in enumerate(ball_activations):
print(f'Layer {i}: {layer_output.squeeze().shape}')
#layer 5, filter 18, 36 seems to like circular type things
layer_idx = 0
images = []
titles = []
num_filters = ball_activations[layer_idx].shape[1]
asize = ball_activations[layer_idx].shape[2]
def filter_activations_to_image(activations, lidx, fidx):
a = activations[lidx].squeeze() #choose conv layer & discard batch dimension
a = a[fidx] #choose conv filter
a = (a - a.mean())/(3*a.std()) + 0.5 #center and scale down
a = a.clamp(0, 1).numpy() # and finally clamp
return a
buff_size = 10
for filter_idx in range(num_filters):
a0 = filter_activations_to_image(ball_activations, layer_idx, filter_idx)
a1 = filter_activations_to_image(noball_activations, layer_idx, filter_idx)
z = np.hstack([a0, np.ones((asize, 10)), a1])
plt.imshow(z, cmap='gray')
plt.axis('off')
plt.title(f'Filter {filter_idx}')
plt.show()
We can also look at filters. This is easiest at the first layer where each filter is 3 dimensional.
import matplotlib.colors as mc
import math
conv = find_layers(learner.model, nn.Conv2d)[0]
weight = conv.weight.data.numpy()
num_filters, depth, w, h = weight.shape
rows = int(num_filters**0.5)
cols = int(math.ceil(num_filters/rows))
border = 1
img = np.zeros((depth, rows*h + (1+rows)*border, cols*w + (1+cols)*border))
for f in range(num_filters):
r = f // rows
c = f % cols
x = border + r * (w+border)
y = border + c * (w+border)
norm = mc.Normalize()
img[:, x:x+w, y:y+h] = norm(weight[f, :, :, :])
plt.figure(figsize=(12,12))
plt.imshow(img.transpose(1,2,0))
_ = plt.axis('off')
We can also visualize subsequent layers, though it's not so pretty. We can map each dimension of each filter back into grayscale.
# for i, conv in enumerate(find_layers(learner.model, nn.Conv2d)):
# print(conv, conv.weight.shape)
weight = find_layers(learner.model, nn.Conv2d)[2].weight.data.numpy()
num_filters, depth, w, h = weight.shape
rows = num_filters
cols = depth
border = 1
img = np.zeros((rows*h + (1+rows)*border, cols*w + (1+cols)*border))
for f in range(num_filters):
norm = mc.Normalize()
normed = norm(weight[f, :, :, :]) #normalize over all the weights in a filter
for d in range(depth):
r = f
c = d
x = border + r * (w+border)
y = border + c * (w+border)
img[x:x+w, y:y+h] = normed[d]
plt.figure(figsize=(18,18))
plt.imshow(img, cmap='gray')
_ = plt.axis('off')
We can also mask out portions of the image by sliding a gray block over the image repeatedly and record how the predictions change.
block_size = 50
image_path = path / data.test_ds.fnames[0]
image = open_image(image_path)
image[50:250, 50:250] = np.full((200,200,3), 0.75)
scaled_image = Scale(sz=224).do_transform(orig_image, False)
# image[0:block_size, 0:block_size] = np.full((block_size,block_size,3), 0.75)
plt.imshow(image)
_ = plt.axis('off')
block_size = 50
image_path = path / data.test_ds.fnames[0]
orig_image = open_image(image_path)
# image[0:200, 0:200] = np.full((200,200,3), 0.75)
scaled_image = Scale(sz=224).do_transform(orig_image, False)
# image[0:block_size, 0:block_size] = np.full((block_size,block_size,3), 0.75)
# plt.imshow(image)
plt.axis('off')
#the prediction for the smaller image should be essentially unchanged
print(learner.model(VV(tfms_from_model(arch, sz)[1](scaled_image)).unsqueeze(0)).exp())
w,h,_ = scaled_image.shape
learner.model.eval()
t0 = time.time()
prob_map = np.zeros((2, w, h))
z = 0
#TODO: add stride for efficiency.
for x in tqdm(range(1 - block_size, w)):
for y in range(1 - block_size, h):
image = np.array(scaled_image)
x0, x1 = max(0, x), min(w, x + block_size)
y0, y1 = max(0, y), min(h, y + block_size)
image[x0:x1,y0:y1] = np.full((x1-x0, y1-y0, 3), 0.75)
image = tfms_from_model(arch, sz)[1](image)
predictions = learner.model(VV(image).unsqueeze(0))
prob_map[0,x0:x1,y0:y1] += predictions.exp().data[0][0]
prob_map[1,x0:x1,y0:y1] += 1
np.save('probs-heatmap.npy', prob_map)
heatmap = prob_map[0]/prob_map[1]
plt.subplot(1,2,1)
plt.imshow(1 - heatmap, cmap='jet')
plt.axis('off')
plt.subplot(1,2,2)
plt.imshow(orig_image)
_ = plt.axis('off')
block_size = 50
image_path = path / 'valid/bball/29.jpg'
orig_image = open_image(image_path)
# image[0:200, 0:200] = np.full((200,200,3), 0.75)
scaled_image = Scale(sz=224).do_transform(orig_image, False)
# orig_image[0:block_size, 0:block_size] = np.full((block_size,block_size,3), 0.75)
# plt.imshow(orig_image)
# plt.axis('off')
#the prediction for the smaller image should be essentially unchanged
print(learner.model(VV(tfms_from_model(arch, sz)[1](scaled_image)).unsqueeze(0)).exp())
w,h,_ = scaled_image.shape
learner.model.eval()
t0 = time.time()
prob_map = np.zeros((2, w, h))
z = 0
#TODO: add stride for efficiency.
for x in tqdm(range(1 - block_size, w)):
for y in range(1 - block_size, h):b
image = np.array(scaled_image)
x0, x1 = max(0, x), min(w, x + block_size)
y0, y1 = max(0, y), min(h, y + block_size)
image[x0:x1,y0:y1] = np.full((x1-x0, y1-y0, 3), 0.75)
image = tfms_from_model(arch, sz)[1](image)
predictions = learner.model(VV(image).unsqueeze(0))
prob_map[0,x0:x1,y0:y1] += predictions.exp().data[0][0]
prob_map[1,x0:x1,y0:y1] += 1
np.save('probs-giannis-heatmap.npy', prob_map)
heatmap = prob_map[0]/prob_map[1]
plt.subplot(1,2,1)
plt.imshow(1 - heatmap, cmap='jet')
plt.axis('off')
plt.subplot(1,2,2)
plt.imshow(orig_image)
_ = plt.axis('off')
block_size = 50
image_path = path / 'valid/tennis/23.jpg'
orig_image = open_image(image_path)
# image[0:200, 0:200] = np.full((200,200,3), 0.75)
scaled_image = Scale(sz=224).do_transform(orig_image, False)
# orig_image[0:block_size, 0:block_size] = np.full((block_size,block_size,3), 0.75)
plt.imshow(scaled_image)
# plt.axis('off')
#the prediction for the smaller image should be essentially unchanged
print(learner.model(VV(tfms_from_model(arch, sz)[1](scaled_image)).unsqueeze(0)).exp())
w,h,_ = scaled_image.shape
learner.model.eval()
t0 = time.time()
prob_map = np.zeros((2, w, h))
z = 0
#TODO: add stride for efficiency.
for x in tqdm(range(1 - block_size, w)):
for y in range(1 - block_size, h):
image = np.array(scaled_image)
x0, x1 = max(0, x), min(w, x + block_size)
y0, y1 = max(0, y), min(h, y + block_size)
image[x0:x1,y0:y1] = np.full((x1-x0, y1-y0, 3), 0.75)
image = tfms_from_model(arch, sz)[1](image)
predictions = learner.model(VV(image).unsqueeze(0))
prob_map[0,x0:x1,y0:y1] += predictions.exp().data[0][0]
prob_map[1,x0:x1,y0:y1] += 1
np.save('probs-tennis-heatmap.npy', prob_map)
heatmap = prob_map[0]/prob_map[1]
plt.subplot(1,2,1)
plt.imshow(heatmap, cmap='jet')
plt.axis('off')
plt.subplot(1,2,2)
plt.imshow(orig_image)
_ = plt.axis('off')