Notebook

Lesson 1 Experiments¶

This section just reproduces lesson 1 logic using my own code and with 30 tennis and 30 basketball player images. I chose all male players for simplicity.

In [ ]:

# Put these at the top of every notebook, to get automatic reloading and inline plotting
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [ ]:

# This file contains all the main external libs we'll use
from fastai.imports import *
from fastai.transforms import *
from fastai.conv_learner import *
from fastai.model import *
from fastai.dataset import *
from fastai.sgdr import *
from fastai.plots import *
from typing import List, Union
from pathlib import Path

Download the Sample Data¶

Only execute the cell below once! If the commands below don't work, try the direct link here.

In [ ]:

!wget 'https://onedrive.live.com/download?cid=C273BC3946897048&resid=C273BC3946897048%216605&authkey=AIVFQLj7IoJYiz4' -O foo.zip
!unzip -d data foo.zip 
!rm foo.zip

Load the Sample Data¶

In [ ]:

sz=224
path = Path('data/tennisbball')
path.absolute(), list(path.glob('*'))

In [ ]:

sample = plt.imread(next(iter((path / 'valid' / 'tennis').iterdir())))
plt.imshow(sample)
plt.figure()
sample = plt.imread(next(iter((path / 'valid' / 'bball').iterdir())))
plt.imshow(sample)

In [ ]:

sample.shape, sample[:4,:4]

In [ ]:

torch.cuda.is_available(),torch.backends.cudnn.enabled

Construct the Model¶

Define the model architecture

In [ ]:

#tfms_from_model -- model based image transforms (preprocessing stats)
arch=resnet50
data = ImageClassifierData.from_paths(path, test_name='test', test_with_labels=True, tfms=tfms_from_model(arch, sz))

#precompute=True to save conv layer activations! pass False if you want to run the data viz below
learner = ConvLearner.pretrained(f=arch, data=data, precompute=False)

Train a Model¶

This section trains a model using transfer learning.

In [ ]:

learner.fit(0.01, 15)

In [ ]:

#uncomment line below to save the model

#learner.save('tennis_v_bball.lrnr')

Load/Visualize an Existing Model¶

Or if you've already trained a model, skip the above section and start from here.

In [ ]:

learner.load('tennis_v_bball.lrnr')

In [ ]:

probs = np.exp(learner.predict())
probs

In [ ]:

#TODO: improve
def display_images(images:List[Union[Path, np.ndarray]], columns:int, titles:List[str]=None, figsize=None) -> None:
    if not titles:
        titles = [f'Image {i+1}' for i in range(len(images))]
    rows = len(images) // columns + int(len(images) % columns > 0)
    if figsize is None:
        figsize = (60,60)
    plt.figure(figsize=figsize)
    for i, (image, title) in enumerate(zip(images, titles)):
        if isinstance(image, Path):
            image = np.array(PIL.Image.open(image))
        plt.subplot(rows, columns, i+1)
        plt.imshow(image)
        plt.title(title, fontsize=10*columns)
        plt.axis('off')

In [ ]:

#val images
predictions = probs.argmax(axis=1)
images, titles = [], []
for prob, pclass, fname in zip(probs, predictions,  data.val_ds.fnames):
    images.append(path / fname)
    titles.append(f'{fname} -- {prob[pclass]:.{3}f} ({data.classes[pclass]})')
    
display_images(images, 4, titles)

In [ ]:

test_probs = np.exp(learner.predict(is_test=True))
test_predictions = test_probs.argmax(axis=1)

#test images
images, titles = [],[]
for prob, pclass, fname in zip(test_probs, test_predictions,  data.test_ds.fnames):
    images.append(path / fname)
    titles.append(f'{fname} -- {prob[pclass]:.{3}f} ({data.classes[pclass]})')
    
display_images(images, 4, titles)

Dataviz -- Activations¶

In [ ]:

#check out the model structure
model = learner.model
model

In [ ]:

#
# utilize torch hooks to capture the activations for any conv layer. for simplicity we use a 
# batch size of 1.
#
class ActivationHook:
    def __init__(self):
        self.output = []
        
    def __call__(self, module, input, output):
        self.output = output.data
        
def find_layers(module, ltype):    
    rv = []
    if isinstance(module, ltype):
        rv.append(module)
    else:
        for c in module.children():
            rv.extend(find_layers(c, ltype))
    
    return rv

def capture_activations(model, x):
    layers = find_layers(model, nn.Conv2d)
    hooks = [ActivationHook() for _ in layers]
    handles = [conv.register_forward_hook(hook) for conv, hook in zip(layers, hooks)]
    model(x)
    for h in handles:
        h.remove()
        
    return [h.output for h in hooks]

bs = data.bs
data.bs = 1
dl = data.get_dl(data.test_ds, False) 
i = iter(dl)
ball_x = next(i)[0]
noball_x = next(i)[0]
data.bs = bs

In [ ]:

ball_activations = capture_activations(model, Variable(ball_x))
noball_activations = capture_activations(model, Variable(noball_x))
for i, layer_output in enumerate(ball_activations):
    print(f'Layer {i}: {layer_output.squeeze().shape}')

In [ ]:

#layer 5, filter 18, 36 seems to like circular type things
layer_idx = 0
images = []
titles = []
num_filters = ball_activations[layer_idx].shape[1]
asize = ball_activations[layer_idx].shape[2]

def filter_activations_to_image(activations, lidx, fidx):
    a = activations[lidx].squeeze() #choose conv layer & discard batch dimension
    a = a[fidx] #choose conv filter
    a = (a - a.mean())/(3*a.std()) + 0.5 #center and scale down
    a = a.clamp(0, 1).numpy() # and finally clamp 
    return a

buff_size = 10
for filter_idx in range(num_filters):
    a0 = filter_activations_to_image(ball_activations, layer_idx, filter_idx)
    a1 = filter_activations_to_image(noball_activations, layer_idx, filter_idx)
    z = np.hstack([a0, np.ones((asize, 10)), a1])
    plt.imshow(z, cmap='gray')
    plt.axis('off')
    plt.title(f'Filter {filter_idx}')
    plt.show()

DataViz -- Filters¶

We can also look at filters. This is easiest at the first layer where each filter is 3 dimensional.

In [ ]:

import matplotlib.colors as mc
import math
conv = find_layers(learner.model, nn.Conv2d)[0]
weight = conv.weight.data.numpy()

num_filters, depth, w, h = weight.shape

rows = int(num_filters**0.5)
cols = int(math.ceil(num_filters/rows))
border = 1
img = np.zeros((depth, rows*h + (1+rows)*border, cols*w + (1+cols)*border))
for f in range(num_filters):
    r = f // rows
    c = f % cols
    x = border + r * (w+border)
    y = border + c * (w+border)
    norm = mc.Normalize()
    img[:, x:x+w, y:y+h] = norm(weight[f, :, :, :])

plt.figure(figsize=(12,12))
plt.imshow(img.transpose(1,2,0))
_ = plt.axis('off')

We can also visualize subsequent layers, though it's not so pretty. We can map each dimension of each filter back into grayscale.

In [ ]:

# for i, conv in enumerate(find_layers(learner.model, nn.Conv2d)):
#      print(conv, conv.weight.shape)
weight = find_layers(learner.model, nn.Conv2d)[2].weight.data.numpy()
num_filters, depth, w, h = weight.shape
rows = num_filters
cols = depth
border = 1
img = np.zeros((rows*h + (1+rows)*border, cols*w + (1+cols)*border))
for f in range(num_filters):
    norm = mc.Normalize()
    normed = norm(weight[f, :, :, :]) #normalize over all the weights in a filter
    for d in range(depth):
        r = f
        c = d
        x = border + r * (w+border)
        y = border + c * (w+border)
        img[x:x+w, y:y+h] = normed[d]

plt.figure(figsize=(18,18))
plt.imshow(img, cmap='gray')
_ = plt.axis('off')

Occlusion¶

We can also mask out portions of the image by sliding a gray block over the image repeatedly and record how the predictions change.

In [ ]:

block_size = 50
image_path = path / data.test_ds.fnames[0]
image = open_image(image_path)
image[50:250, 50:250] = np.full((200,200,3), 0.75)
scaled_image = Scale(sz=224).do_transform(orig_image, False)
# image[0:block_size, 0:block_size] = np.full((block_size,block_size,3), 0.75)
plt.imshow(image)
_ = plt.axis('off')

In [ ]:

block_size = 50
image_path = path / data.test_ds.fnames[0]
orig_image = open_image(image_path)
# image[0:200, 0:200] = np.full((200,200,3), 0.75)
scaled_image = Scale(sz=224).do_transform(orig_image, False)
# image[0:block_size, 0:block_size] = np.full((block_size,block_size,3), 0.75)
# plt.imshow(image)
plt.axis('off')

#the prediction for the smaller image should be essentially unchanged
print(learner.model(VV(tfms_from_model(arch, sz)[1](scaled_image)).unsqueeze(0)).exp())
w,h,_ = scaled_image.shape
learner.model.eval()
t0 = time.time()
prob_map = np.zeros((2, w, h))

z = 0

#TODO: add stride for efficiency.
for x in tqdm(range(1 - block_size, w)):
    for y in range(1 - block_size, h):
        image = np.array(scaled_image)
        x0, x1 = max(0, x), min(w, x + block_size)
        y0, y1 = max(0, y), min(h, y + block_size)
        image[x0:x1,y0:y1] = np.full((x1-x0, y1-y0, 3), 0.75)
        image = tfms_from_model(arch, sz)[1](image)
        predictions = learner.model(VV(image).unsqueeze(0))        
        prob_map[0,x0:x1,y0:y1] += predictions.exp().data[0][0]
        prob_map[1,x0:x1,y0:y1] += 1

In [ ]:

np.save('probs-heatmap.npy', prob_map)

In [ ]:

heatmap = prob_map[0]/prob_map[1]
plt.subplot(1,2,1)
plt.imshow(1 - heatmap, cmap='jet')
plt.axis('off')
plt.subplot(1,2,2)
plt.imshow(orig_image)
_ = plt.axis('off')

In [ ]:

block_size = 50
image_path = path / 'valid/bball/29.jpg'
orig_image = open_image(image_path)
# image[0:200, 0:200] = np.full((200,200,3), 0.75)
scaled_image = Scale(sz=224).do_transform(orig_image, False)
# orig_image[0:block_size, 0:block_size] = np.full((block_size,block_size,3), 0.75)
# plt.imshow(orig_image)
# plt.axis('off')

#the prediction for the smaller image should be essentially unchanged
print(learner.model(VV(tfms_from_model(arch, sz)[1](scaled_image)).unsqueeze(0)).exp())
w,h,_ = scaled_image.shape
learner.model.eval()
t0 = time.time()
prob_map = np.zeros((2, w, h))

z = 0

#TODO: add stride for efficiency.
for x in tqdm(range(1 - block_size, w)):
    for y in range(1 - block_size, h):b
        image = np.array(scaled_image)
        x0, x1 = max(0, x), min(w, x + block_size)
        y0, y1 = max(0, y), min(h, y + block_size)
        image[x0:x1,y0:y1] = np.full((x1-x0, y1-y0, 3), 0.75)
        image = tfms_from_model(arch, sz)[1](image)
        predictions = learner.model(VV(image).unsqueeze(0))        
        prob_map[0,x0:x1,y0:y1] += predictions.exp().data[0][0]
        prob_map[1,x0:x1,y0:y1] += 1

In [ ]:

np.save('probs-giannis-heatmap.npy', prob_map)

In [ ]:

heatmap = prob_map[0]/prob_map[1]
plt.subplot(1,2,1)
plt.imshow(1 - heatmap, cmap='jet')
plt.axis('off')
plt.subplot(1,2,2)
plt.imshow(orig_image)
_ = plt.axis('off')

In [ ]:

block_size = 50
image_path = path / 'valid/tennis/23.jpg'
orig_image = open_image(image_path)
# image[0:200, 0:200] = np.full((200,200,3), 0.75)
scaled_image = Scale(sz=224).do_transform(orig_image, False)
# orig_image[0:block_size, 0:block_size] = np.full((block_size,block_size,3), 0.75)
plt.imshow(scaled_image)
# plt.axis('off')

#the prediction for the smaller image should be essentially unchanged
print(learner.model(VV(tfms_from_model(arch, sz)[1](scaled_image)).unsqueeze(0)).exp())
w,h,_ = scaled_image.shape
learner.model.eval()
t0 = time.time()
prob_map = np.zeros((2, w, h))

z = 0

#TODO: add stride for efficiency.
for x in tqdm(range(1 - block_size, w)):
    for y in range(1 - block_size, h):
        image = np.array(scaled_image)
        x0, x1 = max(0, x), min(w, x + block_size)
        y0, y1 = max(0, y), min(h, y + block_size)
        image[x0:x1,y0:y1] = np.full((x1-x0, y1-y0, 3), 0.75)
        image = tfms_from_model(arch, sz)[1](image)
        predictions = learner.model(VV(image).unsqueeze(0))        
        prob_map[0,x0:x1,y0:y1] += predictions.exp().data[0][0]
        prob_map[1,x0:x1,y0:y1] += 1

In [ ]:

np.save('probs-tennis-heatmap.npy', prob_map)

In [ ]:

heatmap = prob_map[0]/prob_map[1]
plt.subplot(1,2,1)
plt.imshow(heatmap, cmap='jet')
plt.axis('off')
plt.subplot(1,2,2)
plt.imshow(orig_image)
_ = plt.axis('off')