import os
from tensorflow.examples.tutorials.mnist import input_data
from tensorflow.python.ops import nn_ops, gen_nn_ops
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
from models.models_4_3 import MNIST_CNN
from utils import find_roi
%matplotlib inline
mnist_cluttered = np.load('./MNIST_cluttered/mnist_sequence1_sample_5distortions5x5.npz')
X_train = mnist_cluttered['X_train']
y_train = mnist_cluttered['y_train']
X_valid = mnist_cluttered['X_valid']
y_valid = mnist_cluttered['y_valid']
X_test = mnist_cluttered['X_test']
y_test = mnist_cluttered['y_test']
logdir = './tf_logs/4_3_GCAMPP/'
ckptdir = logdir + 'model'
if not os.path.exists(logdir):
os.mkdir(logdir)
with tf.name_scope('Classifier'):
# Initialize neural network
DNN = MNIST_CNN('CNN')
# Setup training process
X = tf.placeholder(tf.float32, [None, 1600], name='X')
Y = tf.placeholder(tf.int64, [None], name='Y')
Y_hot = tf.one_hot(Y, 10)
activations, logits = DNN(X)
tf.add_to_collection('GCAM', X)
tf.add_to_collection('GCAM', logits)
for activation in activations:
tf.add_to_collection('GCAM', activation)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y_hot))
optimizer = tf.train.AdamOptimizer().minimize(cost, var_list=DNN.vars)
correct_prediction = tf.equal(tf.argmax(logits, 1), Y)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
cost_summary = tf.summary.scalar('Cost', cost)
accuray_summary = tf.summary.scalar('Accuracy', accuracy)
summary = tf.summary.merge_all()
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver()
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())
# Hyper parameters
training_epochs = 20
batch_size = 100
for epoch in range(training_epochs):
total_batch = int(np.shape(X_train)[0] / batch_size)
avg_cost = 0
avg_acc = 0
for i in range(total_batch):
batch_xs, batch_ys = X_train[i * batch_size:(i+1) * batch_size], y_train[i * batch_size:(i+1) * batch_size].reshape([-1])
_, c, a, summary_str = sess.run([optimizer, cost, accuracy, summary], feed_dict={X: batch_xs, Y: batch_ys})
avg_cost += c / total_batch
avg_acc += a / total_batch
file_writer.add_summary(summary_str, epoch * total_batch + i)
print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost), 'accuracy =', '{:.9f}'.format(avg_acc))
saver.save(sess, ckptdir)
print('Accuracy:', sess.run(accuracy, feed_dict={X: X_test, Y: y_test.reshape([-1])}))
sess.close()
Epoch: 0001 cost = 1.020469854 accuracy = 0.658100001 Epoch: 0002 cost = 0.308226582 accuracy = 0.904399999 Epoch: 0003 cost = 0.164288832 accuracy = 0.947400003 Epoch: 0004 cost = 0.102076813 accuracy = 0.967900006 Epoch: 0005 cost = 0.060549132 accuracy = 0.982200012 Epoch: 0006 cost = 0.039169166 accuracy = 0.986300008 Epoch: 0007 cost = 0.025426290 accuracy = 0.992000005 Epoch: 0008 cost = 0.020262406 accuracy = 0.994500005 Epoch: 0009 cost = 0.009563436 accuracy = 0.997700002 Epoch: 0010 cost = 0.002488350 accuracy = 0.999500000 Epoch: 0011 cost = 0.001013720 accuracy = 1.000000000 Epoch: 0012 cost = 0.000315451 accuracy = 1.000000000 Epoch: 0013 cost = 0.000174855 accuracy = 1.000000000 Epoch: 0014 cost = 0.000118276 accuracy = 1.000000000 Epoch: 0015 cost = 0.000080929 accuracy = 1.000000000 Epoch: 0016 cost = 0.000066408 accuracy = 1.000000000 Epoch: 0017 cost = 0.000056945 accuracy = 1.000000000 Epoch: 0018 cost = 0.000049607 accuracy = 1.000000000 Epoch: 0019 cost = 0.000043721 accuracy = 1.000000000 Epoch: 0020 cost = 0.000038873 accuracy = 1.000000000 Accuracy: 0.947
tf.reset_default_graph()
sess = tf.InteractiveSession()
new_saver = tf.train.import_meta_graph(ckptdir + '.meta')
new_saver.restore(sess, tf.train.latest_checkpoint(logdir))
activations = tf.get_collection('GCAM')
weights = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='.*kernel.*')
X = activations[0]
logits = activations[1]
activations = activations[2:]
sample_imgs = [X_train[y_train.reshape([-1]) == i][5] for i in range(10)]
INFO:tensorflow:Restoring parameters from ./tf_logs/4_3_GCAMPP/model
In order to generate class activation maps, we must first calculate $\alpha^{kc}_{ij}$ at Equation (17) of the paper. Since the gradient terms in the numerator and the denominator cancel out, we can further simplify the calculation:
\begin{equation} \alpha^{kc}_{ij} = \frac{\left(\frac{\partial S^c}{\partial A^k_{ij}}\right)^2}{2 \left(\frac{\partial S^c}{\partial A^k_{ij}}\right)^2 + \sum_a \sum_b A^k_{ab} \left(\frac{\partial S^c}{\partial A^k_{ij}}\right)^3} = \frac{1}{2 + \left(\frac{\partial S^c}{\partial A^k_{ij}}\right) \sum_a \sum_b A^k_{ab}} \end{equation}Note that the last_conv
variable corresponds to $A^k$, preact_grads
to $\partial S^c / \partial A^k$, act_grads
to $\partial Y^c / \partial A^k$, conv_sum
to $\sum_a \sum_b A^k_{ab}$, alpha_kc
to $\alpha^{kc}$ and w_kc
to $w^c_k$ in the original paper.
last_conv = activations[5]
preact_grads = [tf.gradients(logits[:,i,None], last_conv)[0] for i in range(10)]
act_grads = [tf.gradients(tf.exp(logits[:,i,None]), last_conv)[0] for i in range(10)]
conv_sum = tf.reshape(tf.nn.avg_pool(last_conv, [1, 10, 10, 1], [1, 1, 1, 1], 'VALID') * 100, [-1,128])
alpha_kc = [1 / (2 + conv_sum * preact_grad) for preact_grad in preact_grads]
w_kc = [tf.nn.avg_pool(alpha_kc[i] * tf.nn.relu(act_grads[i]), [1, 10, 10, 1], [1, 1, 1, 1], 'VALID') * 100 for i in range(10)]
cams = [tf.nn.relu(tf.reduce_sum(last_conv * w_kc[i], axis=3, keep_dims=True)) for i in range(10)]
resized_cams = [tf.image.resize_bilinear(cams[i], [40,40], align_corners=True) for i in range(10)]
hmaps = np.reshape([sess.run(resized_cams[i], feed_dict={X: sample_imgs[i][None]}) for i in range(10)], [10, 40, 40])
for i in range(5):
plt.figure(figsize=(15,15))
for j in range(2):
plt.subplot(5, 2, i * 2 + j + 1)
plt.imshow(sample_imgs[i * 2 + j].reshape(40,40), cmap='gray')
plt.imshow(hmaps[i * 2 + j], alpha=0.8)
plt.title('Digit {} Grad-CAM++'.format(i * 2 + j))
plt.colorbar()
plt.xticks([])
plt.yticks([])
plt.tight_layout()