Notebook

4.3 Grad-CAM++¶

Tensorflow Walkthrough¶

1. Import Dependencies¶

In [1]:

import os

from tensorflow.examples.tutorials.mnist import input_data
from tensorflow.python.ops import nn_ops, gen_nn_ops
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np

from models.models_4_3 import MNIST_CNN
from utils import find_roi

%matplotlib inline

mnist_cluttered = np.load('./MNIST_cluttered/mnist_sequence1_sample_5distortions5x5.npz')
X_train = mnist_cluttered['X_train']
y_train = mnist_cluttered['y_train']
X_valid = mnist_cluttered['X_valid']
y_valid = mnist_cluttered['y_valid']
X_test = mnist_cluttered['X_test']
y_test = mnist_cluttered['y_test']

logdir = './tf_logs/4_3_GCAMPP/'
ckptdir = logdir + 'model'

if not os.path.exists(logdir):
    os.mkdir(logdir)

2. Building Graph¶

In [2]:

with tf.name_scope('Classifier'):

    # Initialize neural network
    DNN = MNIST_CNN('CNN')

    # Setup training process
    X = tf.placeholder(tf.float32, [None, 1600], name='X')
    Y = tf.placeholder(tf.int64, [None], name='Y')
    Y_hot = tf.one_hot(Y, 10)

    activations, logits = DNN(X)
    
    tf.add_to_collection('GCAM', X)
    tf.add_to_collection('GCAM', logits)
    
    for activation in activations:
        tf.add_to_collection('GCAM', activation)

    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y_hot))

    optimizer = tf.train.AdamOptimizer().minimize(cost, var_list=DNN.vars)

    correct_prediction = tf.equal(tf.argmax(logits, 1), Y)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

cost_summary = tf.summary.scalar('Cost', cost)
accuray_summary = tf.summary.scalar('Accuracy', accuracy)
summary = tf.summary.merge_all()

3. Training Network¶

In [3]:

sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

saver = tf.train.Saver()
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())

# Hyper parameters
training_epochs = 20
batch_size = 100

for epoch in range(training_epochs):
    total_batch = int(np.shape(X_train)[0] / batch_size)
    avg_cost = 0
    avg_acc = 0
    
    for i in range(total_batch):
        batch_xs, batch_ys = X_train[i * batch_size:(i+1) * batch_size], y_train[i * batch_size:(i+1) * batch_size].reshape([-1])
        _, c, a, summary_str = sess.run([optimizer, cost, accuracy, summary], feed_dict={X: batch_xs, Y: batch_ys})
        avg_cost += c / total_batch
        avg_acc += a / total_batch
        
        file_writer.add_summary(summary_str, epoch * total_batch + i)

    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost), 'accuracy =', '{:.9f}'.format(avg_acc))
    
    saver.save(sess, ckptdir)

print('Accuracy:', sess.run(accuracy, feed_dict={X: X_test, Y: y_test.reshape([-1])}))

sess.close()

Epoch: 0001 cost = 1.020469854 accuracy = 0.658100001
Epoch: 0002 cost = 0.308226582 accuracy = 0.904399999
Epoch: 0003 cost = 0.164288832 accuracy = 0.947400003
Epoch: 0004 cost = 0.102076813 accuracy = 0.967900006
Epoch: 0005 cost = 0.060549132 accuracy = 0.982200012
Epoch: 0006 cost = 0.039169166 accuracy = 0.986300008
Epoch: 0007 cost = 0.025426290 accuracy = 0.992000005
Epoch: 0008 cost = 0.020262406 accuracy = 0.994500005
Epoch: 0009 cost = 0.009563436 accuracy = 0.997700002
Epoch: 0010 cost = 0.002488350 accuracy = 0.999500000
Epoch: 0011 cost = 0.001013720 accuracy = 1.000000000
Epoch: 0012 cost = 0.000315451 accuracy = 1.000000000
Epoch: 0013 cost = 0.000174855 accuracy = 1.000000000
Epoch: 0014 cost = 0.000118276 accuracy = 1.000000000
Epoch: 0015 cost = 0.000080929 accuracy = 1.000000000
Epoch: 0016 cost = 0.000066408 accuracy = 1.000000000
Epoch: 0017 cost = 0.000056945 accuracy = 1.000000000
Epoch: 0018 cost = 0.000049607 accuracy = 1.000000000
Epoch: 0019 cost = 0.000043721 accuracy = 1.000000000
Epoch: 0020 cost = 0.000038873 accuracy = 1.000000000
Accuracy: 0.947

4. Restoring Subgraph¶

In [4]:

tf.reset_default_graph()

sess = tf.InteractiveSession()

new_saver = tf.train.import_meta_graph(ckptdir + '.meta')
new_saver.restore(sess, tf.train.latest_checkpoint(logdir))

activations = tf.get_collection('GCAM')
weights = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='.*kernel.*')

X = activations[0]
logits = activations[1]
activations = activations[2:]

sample_imgs = [X_train[y_train.reshape([-1]) == i][5] for i in range(10)]

INFO:tensorflow:Restoring parameters from ./tf_logs/4_3_GCAMPP/model

5. Generating CAMs¶

In order to generate class activation maps, we must first calculate $\alpha^{kc}_{ij}$ at Equation (17) of the paper. Since the gradient terms in the numerator and the denominator cancel out, we can further simplify the calculation:

\begin{equation} \alpha^{kc}_{ij} = \frac{\left(\frac{\partial S^c}{\partial A^k_{ij}}\right)^2}{2 \left(\frac{\partial S^c}{\partial A^k_{ij}}\right)^2 + \sum_a \sum_b A^k_{ab} \left(\frac{\partial S^c}{\partial A^k_{ij}}\right)^3} = \frac{1}{2 + \left(\frac{\partial S^c}{\partial A^k_{ij}}\right) \sum_a \sum_b A^k_{ab}} \end{equation}

Note that the last_conv variable corresponds to $A^k$, preact_grads to $\partial S^c / \partial A^k$, act_grads to $\partial Y^c / \partial A^k$, conv_sum to $\sum_a \sum_b A^k_{ab}$, alpha_kc to $\alpha^{kc}$ and w_kc to $w^c_k$ in the original paper.

In [5]:

last_conv = activations[5]

preact_grads = [tf.gradients(logits[:,i,None], last_conv)[0] for i in range(10)]
act_grads = [tf.gradients(tf.exp(logits[:,i,None]), last_conv)[0] for i in range(10)]

conv_sum = tf.reshape(tf.nn.avg_pool(last_conv, [1, 10, 10, 1], [1, 1, 1, 1], 'VALID') * 100, [-1,128])

alpha_kc = [1 / (2 + conv_sum * preact_grad) for preact_grad in preact_grads]

w_kc = [tf.nn.avg_pool(alpha_kc[i] * tf.nn.relu(act_grads[i]), [1, 10, 10, 1], [1, 1, 1, 1], 'VALID') * 100 for i in range(10)]

cams = [tf.nn.relu(tf.reduce_sum(last_conv * w_kc[i], axis=3, keep_dims=True)) for i in range(10)]
resized_cams = [tf.image.resize_bilinear(cams[i], [40,40], align_corners=True) for i in range(10)]

hmaps = np.reshape([sess.run(resized_cams[i], feed_dict={X: sample_imgs[i][None]}) for i in range(10)], [10, 40, 40])

6. Displaying Images¶

In [6]:

for i in range(5):
    plt.figure(figsize=(15,15))
    for j in range(2):
        plt.subplot(5, 2, i * 2 + j + 1)
        plt.imshow(sample_imgs[i * 2 + j].reshape(40,40), cmap='gray')
        plt.imshow(hmaps[i * 2 + j], alpha=0.8)
        plt.title('Digit {} Grad-CAM++'.format(i * 2 + j))
        plt.colorbar()
        plt.xticks([])
        plt.yticks([])
    plt.tight_layout()

In [ ]: