# MNIST-Neural Network-Single Hidden Layer with Tensorflow¶

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data
import math
import tensorflow as tf
print(tf.__version__)

%matplotlib inline

1.0.1


## 1. MNIST handwritten digits image set¶

In [2]:
mnist = input_data.read_data_sets("/Users/yhhan/git/deeplink/0.Common/data/MNIST_data/", one_hot=True)

Extracting /Users/yhhan/git/deeplink/0.Common/data/MNIST_data/train-images-idx3-ubyte.gz

• Each image is 28 pixels by 28 pixels. We can interpret this as a big array of numbers:

• flatten 1-D tensor of size 28x28 = 784.

• Each entry in the tensor is a pixel intensity between 0 and 1, for a particular pixel in a particular image. $$[0, 0, 0, ..., 0.6, 0.7, 0.7, 0.5, ... 0.8, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9, 0.3, ..., 0.4, 0.4, 0.4, ... 0, 0, 0]$$

### 1) Training Data¶

In [3]:
print(type(mnist.train.images), mnist.train.images.shape)
print(type(mnist.train.labels), mnist.train.labels.shape)

<class 'numpy.ndarray'> (55000, 784)
<class 'numpy.ndarray'> (55000, 10)

• Number of train images is 55000.
• mnist.train.images is a tensor with a shape of [55000, 784].
• A one-hot vector is a vector which is 0 in most entries, and 1 in a single entry.
• In this case, the $n$th digit will be represented as a vector which is 1 in the nth entry.
• For example, 3 would be $[0,0,0,1,0,0,0,0,0,0]$.
• mnist.train.labels is a tensor with a shape of [55000, 10].
In [4]:
fig = plt.figure(figsize=(20, 5))
for i in range(5):
img = np.array(mnist.train.images[i])
img.shape = (28, 28)
plt.subplot(150 + (i+1))
plt.imshow(img, cmap='gray')


### 2) Validation Data¶

In [5]:
print(type(mnist.validation.images), mnist.validation.images.shape)
print(type(mnist.validation.labels), mnist.validation.labels.shape)

<class 'numpy.ndarray'> (5000, 784)
<class 'numpy.ndarray'> (5000, 10)


### 3) Test Data¶

In [6]:
print(type(mnist.test.images), mnist.test.images.shape)
print(type(mnist.test.labels), mnist.test.labels.shape)

<class 'numpy.ndarray'> (10000, 784)
<class 'numpy.ndarray'> (10000, 10)


## 2. Simple Neural Network Model (No Hidden Layer)¶

### 1) Tensor Operation and Shape¶

• Input Layer to Output Layer
• $i=1...784$
• $j=1...10$ $$u_j = \sum_i W_{ji} x_i + b_j$$
• Presentation of Matrix and Vector
• Shape of ${\bf W}: (10, 784)$
• Shape of ${\bf x}: (784, 1)$
• Shape of ${\bf b}: (10,)$
• Shape of ${\bf u}: (10,)$ $${\bf u} = {\bf Wx + b}$$
• Transposed Matrix Operation in Tensorflow
• Shape of ${\bf W}: (784, 10)$
• Shape of ${\bf x}: (1, 784)$
• Shape of ${\bf b}: (10,)$
• Shape of ${\bf u}: (10,)$ $${\bf u} = {\bf xW + b}$$
• Small Sized Example
In [7]:
W_ = np.array([[1, 2, 3], [4, 5, 6]])  #shape of W: (2, 3)
x_ = np.array([[1, 2]])                #shape of x: (1, 2)
xW_ = np.dot(x_, W_)                     #shape of xW: (1, 3)
print(W_.shape, x_.shape, xW_.shape)
print(xW_)

print()

b_ = np.array([10, 20, 30])            #shape of b: (3,)
u_ = xW_ + b_                            #shape of u: (1, 3)
print(b_.shape, u_.shape)
print(u_)

(2, 3) (1, 2) (1, 3)
[[ 9 12 15]]

(3,) (1, 3)
[[19 32 45]]


### 2) Mini Batch¶

In [8]:
batch_images, batch_labels = mnist.train.next_batch(100)
print(batch_images.shape)
#print batch_images
print

print(batch_labels.shape)
#print batch_labels

(100, 784)
(100, 10)

• Mini Batch (ex. batch size = 100)
• Shape of ${\bf W}: (784, 10)$
• Shape of ${\bf x}: (100, 784)$
• Shape of ${\bf b}: (10,)$
• Shape of ${\bf u}: (100, 10)$ $${\bf U} = {\bf XW + B}$$
• Small Sized Example
In [9]:
W_ = np.array([[1, 2, 3], [4, 5, 6]])                    #shape of W: (2, 3)
x_ = np.array([[1, 2], [1, 2], [1, 2], [1, 2], [1, 2]])  #shape of x: (5, 2)
xW_ = np.dot(x_, W_)                                       #shape of xW: (5, 3)
print(W_.shape, x_.shape, xW_.shape)
print(xW_)

print()

b_ = np.array([10, 20, 30])            #shape of b: (3,)
u_ = xW_ + b_                            #shape of u: (1, 3)
print(b_.shape, u_.shape)
print(u_)

(2, 3) (5, 2) (5, 3)
[[ 9 12 15]
[ 9 12 15]
[ 9 12 15]
[ 9 12 15]
[ 9 12 15]]

(3,) (5, 3)
[[19 32 45]
[19 32 45]
[19 32 45]
[19 32 45]
[19 32 45]]


### 3) Model Construction¶

• The placeholder to store the training data:
In [10]:
x = tf.placeholder(tf.float32, [None, 784])
print("x -", x.get_shape())

x - (?, 784)

• The placeholder to store the correct answers (ground truth):
In [11]:
y_target = tf.placeholder(tf.float32, [None, 10])

• A single (output) layer neural network model
In [18]:
weight_init_std = 0.01
W = tf.Variable(weight_init_std * tf.random_normal([784, 10]))
b = tf.Variable(tf.zeros([10]))
print("W -", W.get_shape())
print("b -", b.get_shape())

W - (784, 10)
b - (10,)

In [19]:
u = tf.matmul(x, W) + b
print("u -", u.get_shape())

u - (?, 10)


### 4) Target Setup¶

• softmax
$${\bf z} = softmax({\bf u})$$
• Error functions: Cross entropy
• Suppose you have two tensors, where $u$ contains computed scores for each class (for example, from $u = W*x + b$) and $y_target$ contains one-hot encoded true labels.
u  = ... # Predicted label, e.g. $u = tf.matmul(X, W) + b y_target = ... # True label, one-hot encoded  • We call$u\$ logits (if you interpret the scores in u as unnormalized log probabilities).

• Additionally, the total cross-entropy loss computed in this manner:

z = tf.nn.softmax(u)
total_loss = tf.reduce_mean(-tf.reduce_sum(y_target * tf.log(z), [1]))

• is essentially equivalent to the total cross-entropy loss computed with the function softmax_cross_entropy_with_logits():
total_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=u, labels=y_target))

In [20]:
learning_rate = 0.1
error = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=u, labels=y_target))


## 4. Learning (Training) & Evaluation¶

In [21]:
prediction_and_ground_truth = tf.equal(tf.argmax(u, 1), tf.argmax(y_target, 1))
accuracy = tf.reduce_mean(tf.cast(prediction_and_ground_truth, tf.float32))
with tf.Session() as sess:
init = tf.global_variables_initializer()
sess.run(init)

batch_size = 100
total_batch = int(math.ceil(mnist.train.num_examples/float(batch_size)))
print("Total batch: %d" % total_batch)

training_epochs = 50
epoch_list = []
train_error_list = []
validation_error_list = []
test_accuracy_list = []
for epoch in range(training_epochs):
epoch_list.append(epoch)
# Train Error Value
train_error_value = sess.run(error, feed_dict={x: mnist.train.images, y_target: mnist.train.labels})
train_error_list.append(train_error_value)

validation_error_value = sess.run(error, feed_dict={x: mnist.validation.images, y_target: mnist.validation.labels})
validation_error_list.append(validation_error_value)

test_accuracy_value = sess.run(accuracy, feed_dict={x: mnist.test.images, y_target: mnist.test.labels})
test_accuracy_list.append(test_accuracy_value)
print("Epoch: {0:2d}, Train Error: {1:0.5f}, Validation Error: {2:0.5f}, Test Accuracy: {3:0.5f}".format(epoch, train_error_value, validation_error_value, test_accuracy_value))

for i in range(total_batch):
batch_images, batch_labels = mnist.train.next_batch(batch_size)
sess.run(optimizer, feed_dict={x: batch_images, y_target: batch_labels})

Total batch: 550
Epoch:  0, Train Error: 2.30572, Validation Error: 2.30698, Test Accuracy: 0.12840
Epoch:  1, Train Error: 0.39198, Validation Error: 0.37061, Test Accuracy: 0.90180
Epoch:  2, Train Error: 0.34676, Validation Error: 0.32587, Test Accuracy: 0.90890
Epoch:  3, Train Error: 0.32534, Validation Error: 0.30698, Test Accuracy: 0.91660
Epoch:  4, Train Error: 0.31445, Validation Error: 0.29831, Test Accuracy: 0.91660
Epoch:  5, Train Error: 0.30418, Validation Error: 0.28912, Test Accuracy: 0.91790
Epoch:  6, Train Error: 0.29866, Validation Error: 0.28601, Test Accuracy: 0.91850
Epoch:  7, Train Error: 0.29390, Validation Error: 0.28150, Test Accuracy: 0.91900
Epoch:  8, Train Error: 0.29029, Validation Error: 0.27967, Test Accuracy: 0.91980
Epoch:  9, Train Error: 0.28661, Validation Error: 0.27622, Test Accuracy: 0.92200
Epoch: 10, Train Error: 0.28457, Validation Error: 0.27407, Test Accuracy: 0.92190
Epoch: 11, Train Error: 0.28074, Validation Error: 0.27155, Test Accuracy: 0.92230
Epoch: 12, Train Error: 0.27949, Validation Error: 0.27210, Test Accuracy: 0.92440
Epoch: 13, Train Error: 0.27634, Validation Error: 0.26903, Test Accuracy: 0.92230
Epoch: 14, Train Error: 0.27572, Validation Error: 0.26896, Test Accuracy: 0.92290
Epoch: 15, Train Error: 0.27364, Validation Error: 0.26740, Test Accuracy: 0.92340
Epoch: 16, Train Error: 0.27223, Validation Error: 0.26725, Test Accuracy: 0.92270
Epoch: 17, Train Error: 0.27102, Validation Error: 0.26595, Test Accuracy: 0.92290
Epoch: 18, Train Error: 0.27071, Validation Error: 0.26719, Test Accuracy: 0.92330
Epoch: 19, Train Error: 0.26938, Validation Error: 0.26507, Test Accuracy: 0.92190
Epoch: 20, Train Error: 0.26838, Validation Error: 0.26431, Test Accuracy: 0.92210
Epoch: 21, Train Error: 0.26776, Validation Error: 0.26610, Test Accuracy: 0.92200
Epoch: 22, Train Error: 0.26633, Validation Error: 0.26555, Test Accuracy: 0.92400
Epoch: 23, Train Error: 0.26622, Validation Error: 0.26398, Test Accuracy: 0.92470
Epoch: 24, Train Error: 0.26345, Validation Error: 0.26242, Test Accuracy: 0.92270
Epoch: 25, Train Error: 0.26297, Validation Error: 0.26230, Test Accuracy: 0.92380
Epoch: 26, Train Error: 0.26315, Validation Error: 0.26254, Test Accuracy: 0.92380
Epoch: 27, Train Error: 0.26165, Validation Error: 0.26105, Test Accuracy: 0.92450
Epoch: 28, Train Error: 0.26111, Validation Error: 0.26239, Test Accuracy: 0.92470
Epoch: 29, Train Error: 0.25995, Validation Error: 0.26142, Test Accuracy: 0.92350
Epoch: 30, Train Error: 0.25970, Validation Error: 0.26080, Test Accuracy: 0.92450
Epoch: 31, Train Error: 0.25900, Validation Error: 0.26151, Test Accuracy: 0.92420
Epoch: 32, Train Error: 0.26040, Validation Error: 0.26189, Test Accuracy: 0.92400
Epoch: 33, Train Error: 0.25777, Validation Error: 0.25990, Test Accuracy: 0.92350
Epoch: 34, Train Error: 0.25738, Validation Error: 0.26107, Test Accuracy: 0.92430
Epoch: 35, Train Error: 0.25743, Validation Error: 0.26133, Test Accuracy: 0.92400
Epoch: 36, Train Error: 0.25644, Validation Error: 0.26034, Test Accuracy: 0.92460
Epoch: 37, Train Error: 0.25596, Validation Error: 0.26001, Test Accuracy: 0.92440
Epoch: 38, Train Error: 0.25591, Validation Error: 0.26088, Test Accuracy: 0.92520
Epoch: 39, Train Error: 0.25558, Validation Error: 0.26054, Test Accuracy: 0.92540
Epoch: 40, Train Error: 0.25539, Validation Error: 0.26121, Test Accuracy: 0.92550
Epoch: 41, Train Error: 0.25400, Validation Error: 0.25911, Test Accuracy: 0.92520
Epoch: 42, Train Error: 0.25396, Validation Error: 0.25937, Test Accuracy: 0.92360
Epoch: 43, Train Error: 0.25311, Validation Error: 0.25945, Test Accuracy: 0.92460
Epoch: 44, Train Error: 0.25310, Validation Error: 0.25960, Test Accuracy: 0.92520
Epoch: 45, Train Error: 0.25337, Validation Error: 0.26013, Test Accuracy: 0.92450
Epoch: 46, Train Error: 0.25380, Validation Error: 0.26053, Test Accuracy: 0.92570
Epoch: 47, Train Error: 0.25286, Validation Error: 0.25917, Test Accuracy: 0.92560
Epoch: 48, Train Error: 0.25228, Validation Error: 0.26081, Test Accuracy: 0.92410
Epoch: 49, Train Error: 0.25221, Validation Error: 0.26051, Test Accuracy: 0.92410


## 5. Analysis with Graph¶

In [22]:
# Draw Graph about Error Values & Accuracy Values
def draw_error_values_and_accuracy(epoch_list, train_error_list, validation_error_list, test_accuracy_list):
# Draw Error Values and Accuracy
fig = plt.figure(figsize=(20, 5))
plt.subplot(121)
plt.plot(epoch_list[1:], train_error_list[1:], 'r', label='Train')
plt.plot(epoch_list[1:], validation_error_list[1:], 'g', label='Validation')
plt.ylabel('Total Error')
plt.xlabel('Epochs')
plt.grid(True)
plt.legend(loc='upper right')

plt.subplot(122)
plt.plot(epoch_list[1:], test_accuracy_list[1:], 'b', label='Test')
plt.ylabel('Accuracy')
plt.xlabel('Epochs')
plt.yticks(np.arange(0.0, 1.0, 0.05))
plt.grid(True)
plt.legend(loc='lower right')
plt.show()

draw_error_values_and_accuracy(epoch_list, train_error_list, validation_error_list, test_accuracy_list)

def draw_false_prediction(diff_index_list):
fig = plt.figure(figsize=(20, 5))
for i in range(5):
j = diff_index_list[i]
print("False Prediction Index: %s, Prediction: %s, Ground Truth: %s" % (j, prediction[j], ground_truth[j]))
img = np.array(mnist.test.images[j])
img.shape = (28, 28)
plt.subplot(150 + (i+1))
plt.imshow(img, cmap='gray')

with tf.Session() as sess:
init = tf.global_variables_initializer()
sess.run(init)
# False Prediction Profile
prediction = sess.run(tf.argmax(u, 1), feed_dict={x:mnist.test.images})
ground_truth = sess.run(tf.argmax(y_target, 1), feed_dict={y_target:mnist.test.labels})

print(prediction)
print(ground_truth)

diff_index_list = []
for i in range(mnist.test.num_examples):
if (prediction[i] != ground_truth[i]):
diff_index_list.append(i)

print("Number of False Prediction:", len(diff_index_list))
draw_false_prediction(diff_index_list)

[3 8 5 ..., 8 8 8]
[7 2 1 ..., 4 5 6]
Number of False Prediction: 8971
False Prediction Index: 0, Prediction: 3, Ground Truth: 7
False Prediction Index: 1, Prediction: 8, Ground Truth: 2
False Prediction Index: 2, Prediction: 5, Ground Truth: 1
False Prediction Index: 3, Prediction: 8, Ground Truth: 0
False Prediction Index: 4, Prediction: 0, Ground Truth: 4