#!/usr/bin/env python # coding: utf-8 # # MNIST-Neural Network-Single Hidden Layer with Tensorflow # In[1]: import numpy as np import matplotlib.pyplot as plt from tensorflow.examples.tutorials.mnist import input_data import math import tensorflow as tf print(tf.__version__) get_ipython().run_line_magic('matplotlib', 'inline') # ## 1. MNIST handwritten digits image set # - Note1: http://yann.lecun.com/exdb/mnist/ # - Note2: https://www.tensorflow.org/versions/r0.11/tutorials/mnist/beginners/index.html # In[2]: mnist = input_data.read_data_sets("/Users/yhhan/git/deeplink/0.Common/data/MNIST_data/", one_hot=True) # - Each image is 28 pixels by 28 pixels. We can interpret this as a big array of numbers: # # # - flatten 1-D tensor of size 28x28 = 784. # - Each entry in the tensor is a pixel intensity between 0 and 1, for a particular pixel in a particular image. # $$[0, 0, 0, ..., 0.6, 0.7, 0.7, 0.5, ... 0.8, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9, 0.3, ..., 0.4, 0.4, 0.4, ... 0, 0, 0]$$ # ### 1) Training Data # In[3]: print(type(mnist.train.images), mnist.train.images.shape) print(type(mnist.train.labels), mnist.train.labels.shape) # - Number of train images is 55000. # - **mnist.train.images** is a tensor with a shape of [55000, 784]. # # - A one-hot vector is a vector which is 0 in most entries, and 1 in a single entry. # - In this case, the $n$th digit will be represented as a vector which is 1 in the nth entry. # - For example, 3 would be $[0,0,0,1,0,0,0,0,0,0]$. # - **mnist.train.labels** is a tensor with a shape of [55000, 10]. # # In[4]: fig = plt.figure(figsize=(20, 5)) for i in range(5): img = np.array(mnist.train.images[i]) img.shape = (28, 28) plt.subplot(150 + (i+1)) plt.imshow(img, cmap='gray') # ### 2) Validation Data # In[5]: print(type(mnist.validation.images), mnist.validation.images.shape) print(type(mnist.validation.labels), mnist.validation.labels.shape) # ### 3) Test Data # In[6]: print(type(mnist.test.images), mnist.test.images.shape) print(type(mnist.test.labels), mnist.test.labels.shape) # ## 2. Simple Neural Network Model (No Hidden Layer) # ### 1) Tensor Operation and Shape # - Input Layer to Output Layer # - $i=1...784$ # - $j=1...10$ # $$ u_j = \sum_i W_{ji} x_i + b_j $$ # - Presentation of Matrix and Vector # - Shape of ${\bf W}: (10, 784)$ # - Shape of ${\bf x}: (784, 1)$ # - Shape of ${\bf b}: (10,)$ # - Shape of ${\bf u}: (10,)$ # $$ {\bf u} = {\bf Wx + b} $$ # - **Transposed Matrix** Operation in Tensorflow # - Shape of ${\bf W}: (784, 10)$ # - Shape of ${\bf x}: (1, 784)$ # - Shape of ${\bf b}: (10,)$ # - Shape of ${\bf u}: (10,)$ # $$ {\bf u} = {\bf xW + b} $$ # - Small Sized Example # In[7]: W_ = np.array([[1, 2, 3], [4, 5, 6]]) #shape of W: (2, 3) x_ = np.array([[1, 2]]) #shape of x: (1, 2) xW_ = np.dot(x_, W_) #shape of xW: (1, 3) print(W_.shape, x_.shape, xW_.shape) print(xW_) print() b_ = np.array([10, 20, 30]) #shape of b: (3,) u_ = xW_ + b_ #shape of u: (1, 3) print(b_.shape, u_.shape) print(u_) # ### 2) Mini Batch # In[8]: batch_images, batch_labels = mnist.train.next_batch(100) print(batch_images.shape) #print batch_images print print(batch_labels.shape) #print batch_labels # - Mini Batch (ex. batch size = 100) # - Shape of ${\bf W}: (784, 10)$ # - Shape of ${\bf x}: (100, 784)$ # - Shape of ${\bf b}: (10,)$ # - Shape of ${\bf u}: (100, 10)$ # $$ {\bf U} = {\bf XW + B} $$ # - Small Sized Example # In[9]: W_ = np.array([[1, 2, 3], [4, 5, 6]]) #shape of W: (2, 3) x_ = np.array([[1, 2], [1, 2], [1, 2], [1, 2], [1, 2]]) #shape of x: (5, 2) xW_ = np.dot(x_, W_) #shape of xW: (5, 3) print(W_.shape, x_.shape, xW_.shape) print(xW_) print() b_ = np.array([10, 20, 30]) #shape of b: (3,) u_ = xW_ + b_ #shape of u: (1, 3) print(b_.shape, u_.shape) print(u_) # ### 3) Model Construction # - The placeholder to store the training data: # In[10]: x = tf.placeholder(tf.float32, [None, 784]) print("x -", x.get_shape()) # - The placeholder to store the correct answers (ground truth): # In[11]: y_target = tf.placeholder(tf.float32, [None, 10]) # - A single (output) layer neural network model # In[18]: weight_init_std = 0.01 W = tf.Variable(weight_init_std * tf.random_normal([784, 10])) b = tf.Variable(tf.zeros([10])) print("W -", W.get_shape()) print("b -", b.get_shape()) # In[19]: u = tf.matmul(x, W) + b print("u -", u.get_shape()) # ### 4) Target Setup # - softmax # # $$ {\bf z} = softmax({\bf u}) $$ # - Error functions: Cross entropy # - Suppose you have two tensors, where $u$ contains computed scores for each class (for example, from $u = W*x + b$) and $y_target$ contains one-hot encoded true labels. # #
# u = ... # Predicted label, e.g. $u = tf.matmul(X, W) + b # y_target = ... # True label, one-hot encoded ## # - We call $u$ **logits** (if you interpret the scores in u as unnormalized log probabilities). # # - Additionally, the total cross-entropy loss computed in this manner: # #
# z = tf.nn.softmax(u) # total_loss = tf.reduce_mean(-tf.reduce_sum(y_target * tf.log(z), [1])) ## # - is essentially equivalent to the total cross-entropy loss computed with the function softmax_cross_entropy_with_logits(): # #
# total_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=u, labels=y_target)) ## In[20]: learning_rate = 0.1 error = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=u, labels=y_target)) optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(error) # ## 4. Learning (Training) & Evaluation # In[21]: prediction_and_ground_truth = tf.equal(tf.argmax(u, 1), tf.argmax(y_target, 1)) accuracy = tf.reduce_mean(tf.cast(prediction_and_ground_truth, tf.float32)) with tf.Session() as sess: init = tf.global_variables_initializer() sess.run(init) batch_size = 100 total_batch = int(math.ceil(mnist.train.num_examples/float(batch_size))) print("Total batch: %d" % total_batch) training_epochs = 50 epoch_list = [] train_error_list = [] validation_error_list = [] test_accuracy_list = [] for epoch in range(training_epochs): epoch_list.append(epoch) # Train Error Value train_error_value = sess.run(error, feed_dict={x: mnist.train.images, y_target: mnist.train.labels}) train_error_list.append(train_error_value) validation_error_value = sess.run(error, feed_dict={x: mnist.validation.images, y_target: mnist.validation.labels}) validation_error_list.append(validation_error_value) test_accuracy_value = sess.run(accuracy, feed_dict={x: mnist.test.images, y_target: mnist.test.labels}) test_accuracy_list.append(test_accuracy_value) print("Epoch: {0:2d}, Train Error: {1:0.5f}, Validation Error: {2:0.5f}, Test Accuracy: {3:0.5f}".format(epoch, train_error_value, validation_error_value, test_accuracy_value)) for i in range(total_batch): batch_images, batch_labels = mnist.train.next_batch(batch_size) sess.run(optimizer, feed_dict={x: batch_images, y_target: batch_labels}) # ## 5. Analysis with Graph # In[22]: # Draw Graph about Error Values & Accuracy Values def draw_error_values_and_accuracy(epoch_list, train_error_list, validation_error_list, test_accuracy_list): # Draw Error Values and Accuracy fig = plt.figure(figsize=(20, 5)) plt.subplot(121) plt.plot(epoch_list[1:], train_error_list[1:], 'r', label='Train') plt.plot(epoch_list[1:], validation_error_list[1:], 'g', label='Validation') plt.ylabel('Total Error') plt.xlabel('Epochs') plt.grid(True) plt.legend(loc='upper right') plt.subplot(122) plt.plot(epoch_list[1:], test_accuracy_list[1:], 'b', label='Test') plt.ylabel('Accuracy') plt.xlabel('Epochs') plt.yticks(np.arange(0.0, 1.0, 0.05)) plt.grid(True) plt.legend(loc='lower right') plt.show() draw_error_values_and_accuracy(epoch_list, train_error_list, validation_error_list, test_accuracy_list) def draw_false_prediction(diff_index_list): fig = plt.figure(figsize=(20, 5)) for i in range(5): j = diff_index_list[i] print("False Prediction Index: %s, Prediction: %s, Ground Truth: %s" % (j, prediction[j], ground_truth[j])) img = np.array(mnist.test.images[j]) img.shape = (28, 28) plt.subplot(150 + (i+1)) plt.imshow(img, cmap='gray') with tf.Session() as sess: init = tf.global_variables_initializer() sess.run(init) # False Prediction Profile prediction = sess.run(tf.argmax(u, 1), feed_dict={x:mnist.test.images}) ground_truth = sess.run(tf.argmax(y_target, 1), feed_dict={y_target:mnist.test.labels}) print(prediction) print(ground_truth) diff_index_list = [] for i in range(mnist.test.num_examples): if (prediction[i] != ground_truth[i]): diff_index_list.append(i) print("Number of False Prediction:", len(diff_index_list)) draw_false_prediction(diff_index_list)