import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
Load necessary libraries
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
Extracting /tmp/data/train-images-idx3-ubyte.gz Extracting /tmp/data/train-labels-idx1-ubyte.gz Extracting /tmp/data/t10k-images-idx3-ubyte.gz Extracting /tmp/data/t10k-labels-idx1-ubyte.gz
Load MNIST Dataset
init_param = lambda shape: tf.random_normal(shape, dtype=tf.float32)
with tf.name_scope("IO"):
inputs = tf.placeholder(tf.float32, [None, 784], name="X")
targets = tf.placeholder(tf.float32, [None, 10], name="Yhat")
with tf.name_scope("LogReg"):
W = tf.Variable(init_param([784, 10]), name="W")
B = tf.Variable(init_param([10]))
logits = tf.matmul(inputs, W) + B
y = tf.nn.softmax(logits)
with tf.name_scope("train"):
learning_rate = tf.Variable(0.5, trainable=False)
cost_op = tf.nn.softmax_cross_entropy_with_logits(logits, targets)
cost_op = tf.reduce_mean(cost_op)
train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost_op)
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(targets,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))*100
Create TensorFlow graph
tolerance = 1e-4
# Perform Stochastic Gradient Descent
epochs = 1
last_cost = 0
alpha = 0.7
max_epochs = 100
batch_size = 50
costs = []
sess = tf.Session()
print "Beginning Training"
with sess.as_default():
init = tf.initialize_all_variables()
sess.run(init)
sess.run(tf.assign(learning_rate, alpha))
writer = tf.train.SummaryWriter("/tmp/tboard", sess.graph) # Create TensorBoard files
while True:
num_batches = int(mnist.train.num_examples/batch_size)
cost=0
for _ in range(num_batches):
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
tcost, _ = sess.run([cost_op, train_op], feed_dict={inputs: batch_xs, targets: batch_ys})
cost += tcost
cost /= num_batches
tcost = sess.run(cost_op, feed_dict={inputs: mnist.test.images, targets: mnist.test.labels})
costs.append([cost, tcost])
# Keep track of our performance
if epochs%5==0:
acc = sess.run(accuracy, feed_dict={inputs: mnist.train.images, targets: mnist.train.labels})
print "Epoch: %d - Error: %.4f - Accuracy - %.2f%%" %(epochs, cost, acc)
# Stopping Condition
if abs(last_cost - cost) < tolerance or epochs > max_epochs:
print "Converged."
break
last_cost = cost
epochs += 1
tcost, taccuracy = sess.run([cost_op, accuracy], feed_dict={inputs: mnist.test.images, targets: mnist.test.labels})
print "Test Cost: %.4f - Accuracy: %.2f%% " %(tcost, taccuracy)
Beginning Training Epoch: 5 - Error: 0.3686 - Accuracy - 90.58% Epoch: 10 - Error: 0.3192 - Accuracy - 92.03% Epoch: 15 - Error: 0.3007 - Accuracy - 92.36% Epoch: 20 - Error: 0.2882 - Accuracy - 92.47% Epoch: 25 - Error: 0.2827 - Accuracy - 92.99% Epoch: 30 - Error: 0.2782 - Accuracy - 92.12% Epoch: 35 - Error: 0.2783 - Accuracy - 92.86% Epoch: 40 - Error: 0.2733 - Accuracy - 92.85% Epoch: 45 - Error: 0.2704 - Accuracy - 93.25% Epoch: 50 - Error: 0.2693 - Accuracy - 93.23% Epoch: 55 - Error: 0.2680 - Accuracy - 93.15% Epoch: 60 - Error: 0.2663 - Accuracy - 92.03% Epoch: 65 - Error: 0.2657 - Accuracy - 92.50% Epoch: 70 - Error: 0.2648 - Accuracy - 92.82% Epoch: 75 - Error: 0.2631 - Accuracy - 93.29% Epoch: 80 - Error: 0.2639 - Accuracy - 93.51% Epoch: 85 - Error: 0.2624 - Accuracy - 93.30% Epoch: 90 - Error: 0.2614 - Accuracy - 93.16% Epoch: 95 - Error: 0.2600 - Accuracy - 93.43% Epoch: 100 - Error: 0.2595 - Accuracy - 93.21% Epoch: 105 - Error: 0.2589 - Accuracy - 92.32% Converged. Test Cost: 0.3490 - Accuracy: 90.49%
Perform gradient descent to learn model
epochs = len(costs)
costs = np.array(costs)
plt.plot(range(epochs), costs[:,0], label="Training")
plt.plot(range(epochs), costs[:,1], label="Test")
plt.grid()
plt.xlabel("Epochs")
plt.ylabel("Cross Entropy")
plt.title("Training Curve")
plt.legend(loc='best')
plt.show()
Plot train curves