# coding: utf-8 # # Neural Network # ## 1. Basic Usage of TensofFlow # - References # - https://www.tensorflow.org/versions/r0.11/get_started/basic_usage.html # - https://www.tensorflow.org/versions/r0.11/resources/dims_types.html # - **TensorFlow** is a programming system in which you represent computations as graphs. # - Nodes in the graph are called **operations**. # - An operation takes zero or more Tensors, performs some computation, and produces zero or more Tensors. # - A **Tensor** is a typed multi-dimensional array. # In[1]: import tensorflow as tf matrix1 = tf.constant([[3., 3.]]) matrix2 = tf.constant([[2.],[2.]]) print "matrix1 -", matrix1 print "matrix2 -", matrix2 print matrix3 = tf.matmul(matrix1, matrix2) print "matrix3 -", matrix3 matrix4 = tf.matmul(matrix2, matrix1) print "matrix4 -", matrix4 print matrix5 = tf.constant([[1., 1.], [2., 2.]]) print "matrix5 -", matrix5 matrix6 = tf.constant([10., 100.]) print "matrix6 -", matrix6 print matrix7 = matrix5 + matrix6 print "matrix7 -", matrix7 matrix8 = matrix5 * matrix6 print "matrix8 -", matrix8 matrix8 = tf.constant([[1., 1.], [2., 2.]]) print "matrix8 -", matrix8 matrix9 = tf.ones([2]) print "matrix9 -", matrix9 matrix10 = matrix8 + matrix9 #broadcast print "matrix10 -", matrix10 print sess = tf.Session() matrix3_result = sess.run(matrix3) print "matrix3_result -\n", matrix3_result matrix4_result = sess.run(matrix4) print "matrix4_result -\n", matrix4_result matrix7_result = sess.run(matrix7) print "matrix7_result -\n", matrix7_result matrix10_result = sess.run(matrix10) print "matrix10_result -\n", matrix10_result # ## 2. MNIST handwritten digits image set # - Note1: http://yann.lecun.com/exdb/mnist/ # - Note2: https://www.tensorflow.org/versions/r0.11/tutorials/mnist/beginners/index.html # In[2]: from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) # - Each image is 28 pixels by 28 pixels. We can interpret this as a big array of numbers: # # # - flatten 1-D tensor of size 28x28 = 784. # - Each entry in the tensor is a pixel intensity between 0 and 1, for a particular pixel in a particular image. # $$[0, 0, 0, ..., 0.6, 0.7, 0.7, 0.5, ... 0.8, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9, 0.3, ..., 0.4, 0.4, 0.4, ... 0, 0, 0]$$ # In[3]: print type(mnist.train.images), mnist.train.images.shape print type(mnist.train.labels), mnist.train.labels.shape # - Number of train images is 55000. # - **mnist.train.images** is a tensor with a shape of [55000, 784]. # # - A one-hot vector is a vector which is 0 in most entries, and 1 in a single entry. # - In this case, the $n$th digit will be represented as a vector which is 1 in the nth entry. # - For example, 3 would be $[0,0,0,1,0,0,0,0,0,0]$. # - **mnist.train.labels** is a tensor with a shape of [55000, 10]. # # In[4]: import numpy as np import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') fig = plt.figure(figsize=(20, 5)) for i in range(5): img = np.array(mnist.train.images[i]) img.shape = (28, 28) plt.subplot(150 + (i+1)) plt.imshow(img) # ## 3. Neural Network Model # - Input Layer to Output Layer # - $i=1...784$ # - $j=1...10$ # $$u_j = \sum_i W_{ji} x_i + b_j$$ # - Presentation of Matrix and Vector # - Shape of ${\bf W} = 10 \times 784$ # - Shape of ${\bf x} = 784 \times 1$ # - Shape of ${\bf b} = 10 \times 1$ # - Shape of ${\bf u} = 10 \times 1$ # $${\bf u} = {\bf Wx + b}$$ # In[5]: batch_images, batch_labels = mnist.train.next_batch(1) print batch_images.shape print batch_images print print batch_labels.shape print batch_labels # - Transposed Matrix Operation in Tensorflow # - Shape of ${\bf x} = 1 \times 784$ # - Shape of ${\bf W} = 784 \times 10$ # - Shape of ${\bf b} = 1 \times 10$ # - Shape of ${\bf u} = 1 \times 10$ # $${\bf u} = {\bf xW + b}$$ # In[6]: batch_images, batch_labels = mnist.train.next_batch(100) print batch_images.shape print batch_images print print batch_labels.shape print batch_labels # - Mini Batch (ex. batch size = 100) # - Shape of ${\bf x} = 100 \times 784$ # - Shape of ${\bf W} = 784 \times 10$ # - Shape of ${\bf b} = 100 \times 10$ # - Shape of ${\bf u} = 100 \times 10$ # $${\bf U} = {\bf XW + B}$$ # In[7]: import tensorflow as tf x = tf.placeholder(tf.float32, [None, 784]) print "x -", x.get_shape() # - we also need to add a new placeholder to input the correct answers (ground truth): # In[8]: y = tf.placeholder(tf.float32, [None, 10]) # - construct a single layer neural network # In[9]: W = tf.Variable(tf.zeros([784, 10])) b = tf.Variable(tf.zeros([10])) print "W -", W.get_shape() print "b -", b.get_shape() # In[10]: u = tf.matmul(x, W) + b print "u -", u.get_shape() # - softmax # # $${\bf z} = softmax({\bf u})$$ # - Error functions # - Squarred error # - Using maximum likelihood estimation # - Cross entropy # In[11]: error = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(u, y)) total_loss = tf.train.GradientDescentOptimizer(0.5).minimize(error) # Suppose you have two tensors, where u contains computed scores for each class (for example, from u = W*x +b) and y contains one-hot encoded true labels. # #
# u  = ... # Predicted label, e.g. u = tf.matmul(X, W) + b
# y  = ... # True label, one-hot encoded
# 
# If you interpret the scores in u as unnormalized log probabilities, then they are logits. # # Additionally, the total cross-entropy loss computed in this manner: # #
# z = tf.nn.softmax(u)
# total_loss = tf.reduce_mean(-tf.reduce_sum(y * tf.log(z), [1]))
# 
# is essentially equivalent to the total cross-entropy loss computed with the function softmax_cross_entropy_with_logits(): # #
# total_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(u, y))
# 
# ## 4. Training # In[12]: init = tf.initialize_all_variables() # In[13]: sess = tf.Session() sess.run(init) # In[14]: batch_size = 100 total_batch = int(mnist.train.num_examples/batch_size) for i in range(total_batch): batch_images, batch_labels = mnist.train.next_batch(batch_size) sess.run(total_loss, feed_dict={x: batch_images, y: batch_labels}) # ## 5. Evaluation # In[15]: print type(mnist.test.images), mnist.test.images.shape print type(mnist.test.labels), mnist.test.labels.shape # In[16]: batch_x, batch_y = mnist.test.next_batch(10000) prediction = sess.run(tf.argmax(u, 1), feed_dict={x:batch_x}) ground_truth = sess.run(tf.argmax(y, 1), feed_dict={y:batch_y}) print prediction print ground_truth sum = 0 diff_index_list = [] for i in range(10000): if (prediction[i] == ground_truth[i]): sum = sum + 1 else: diff_index_list.append(i) #print "%d - %d: %s" % (diff_a[i], diff_b[i], diff_a[i] == diff_b[i]) print sum / 10000.0 print len(diff_index_list) import numpy as np import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') fig = plt.figure(figsize=(20, 5)) for i in range(5): j = diff_index_list[i] print "Error Index: %s, Prediction: %s, Ground Truth: %s" % (j, prediction[j], ground_truth[j]) img = np.array(mnist.test.images[j]) img.shape = (28, 28) plt.subplot(150 + (i+1)) plt.imshow(img) # In[17]: prediction_and_ground_truth = tf.equal(tf.argmax(u, 1), tf.argmax(y, 1)) accuracy = tf.reduce_mean(tf.cast(prediction_and_ground_truth, tf.float32)) # In[18]: print(sess.run(accuracy, feed_dict={x: mnist.test.images, y: mnist.test.labels})) # ## 6. Single Layer Neural Network - All in one # In[24]: import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data # Parameters training_epochs = 40 learning_rate = 0.001 batch_size = 100 # Network Parameters n_input = 784 # MNIST data input (img shape: 28*28) n_classes = 10 # MNIST total classes (0-9 digits) # tf Graph input x = tf.placeholder(tf.float32, [None, n_input]) y = tf.placeholder(tf.float32, [None, n_classes]) # Construct model W = tf.Variable(tf.zeros([n_input, n_classes])) b = tf.Variable(tf.zeros([n_classes])) u = tf.matmul(x, W) + b # Define loss and target loss function #z = tf.nn.softmax(u) #error = tf.reduce_mean(-tf.reduce_sum(z_ * tf.log(z), reduction_indices=[1])) error = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(u, y)) total_loss = tf.train.GradientDescentOptimizer(0.5).minimize(error) # Initializing the variables init = tf.initialize_all_variables() # Calculate accuracy with a Test model prediction_ground_truth = tf.equal(tf.argmax(u, 1), tf.argmax(y, 1)) accuracy = tf.reduce_mean(tf.cast(prediction_ground_truth, tf.float32)) # Launch the tensorflow graph with tf.Session() as sess: sess.run(init) total_batch = int(mnist.train.num_examples / batch_size) print "Total batch: %d" % total_batch # Training cycle for epoch in range(training_epochs): # Loop over all batches for i in range(total_batch): batch_images, batch_labels = mnist.train.next_batch(batch_size) sess.run(total_loss, feed_dict={x: batch_images, y: batch_labels}) print "Epoch %d Finished - Accuracy: %f" % (epoch, accuracy.eval({x: mnist.test.images, y: mnist.test.labels})) print("Optimization Finished!") # ## 7. Multi Layer Neural Network - All in one # In[25]: # Parameters training_epochs = 40 learning_rate = 0.001 batch_size = 100 display_step = 1 # Network Parameters n_input = 784 # MNIST data input (img shape: 28*28) n_hidden_1 = 256 # 1st layer number of features n_hidden_2 = 256 # 2nd layer number of features n_classes = 10 # MNIST total classes (0-9 digits) # tf Graph input x = tf.placeholder("float", [None, n_input]) y = tf.placeholder("float", [None, n_classes]) # Store layers weight & bias weights = { 'W1': tf.Variable(tf.random_normal([n_input, n_hidden_1])), 'W2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])), 'out': tf.Variable(tf.random_normal([n_hidden_2, n_classes])) } biases = { 'b1': tf.Variable(tf.random_normal([n_hidden_1])), 'b2': tf.Variable(tf.random_normal([n_hidden_2])), 'out': tf.Variable(tf.random_normal([n_classes])) } # Create model def multilayer_perceptron(x, weights, biases): # Hidden layer with RELU activation u_2 = tf.add(tf.matmul(x, weights['W1']), biases['b1']) z_2 = tf.nn.relu(u_2) # Hidden layer with RELU activation u_3 = tf.add(tf.matmul(z_2, weights['W2']), biases['b2']) z_3 = tf.nn.relu(u_3) # Output layer with linear activation u_4 = tf.add(tf.matmul(z_3, weights['out']), biases['out']) return u_4 # Construct model pred = multilayer_perceptron(x, weights, biases) # Define loss and target loss function # pred = tf.nn.softmax(pred) # error = tf.reduce_mean(-tf.reduce_sum(y * tf.log(pred), reduction_indices=[1])) error = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y)) total_loss = tf.train.GradientDescentOptimizer(learning_rate).minimize(error) # Initializing the variables init = tf.initialize_all_variables() # Calculate accuracy with a Test model prediction_ground_truth = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1)) accuracy = tf.reduce_mean(tf.cast(prediction_ground_truth, tf.float32)) # Launch the graph with tf.Session() as sess: sess.run(init) total_batch = int(mnist.train.num_examples/batch_size) print "Total batch: %d" % total_batch # Training cycle for epoch in range(training_epochs): # Loop over all batches for i in range(total_batch): batch_images, batch_labels = mnist.train.next_batch(batch_size) sess.run(total_loss, feed_dict={x: batch_images, y: batch_labels}) print "Epoch %d Finished - Accuracy: %f" % (epoch, accuracy.eval({x: mnist.test.images, y: mnist.test.labels})) print("Optimization Finished!")