Vahid Moosavi

25 April 2017

# Introduction to Recurrent Neural Nets¶

### To be discussed¶

• Key Concepts
• Simple Experiments
• Interesting applications
In [5]:
import warnings
warnings.filterwarnings("ignore")
import datetime
import pandas as pd
# import pandas.io.data
import numpy as np
import matplotlib
from matplotlib import pyplot as plt
import sys
import sompylib.sompy as SOM# from pandas import Series, DataFrame

from ipywidgets import interact, HTML, FloatSlider

%matplotlib inline


# Feed Forward Networks¶

• Assumption of independent data

# Recurrent Neural Networks¶

• When our decision depends on our current state
• Similar to (Hidden) Markov Chains, but not exactly the smae attitude: Here we don't build a global state model

# Unrolled RNN¶

• To linearize in time: Multiple copies of the same cell

# $$h_t = \tanh ( W_{hh} h_{t-1} + W_{xh} x_t )$$¶

In [6]:
# http://karpathy.github.io/2015/05/21/rnn-effectiveness/
class RNN:
def setup(self,hidden_size,input_size):
Wxh = np.random.randn(hidden_size, input_size)*0.01 # input to hidden
Whh = np.random.randn(hidden_size, hidden_size)*0.01 # hidden to hidden
Why = np.random.randn(input_size, hidden_size)*0.01 # hidden to output
self.W_hh = Whh
self.h = np.zeros((hidden_size))
self.W_xh = Wxh
self.W_hy = Why
def step(self, x):
# update the hidden state
self.h = np.tanh(np.dot(self.h,self.W_hh) + np.dot(self.W_xh, x)).T
#         print self.h.shape
# compute the output vector
y = np.dot(self.W_hy, self.h)
#         print self.W_hy.shape,self.h.shape , y.shape
return y

In [7]:
rnn1= RNN()
rnn2 = RNN()
hidden_size = 10
input_size = 4
rnn1.setup(hidden_size,input_size)
rnn2.setup(hidden_size,input_size)

In [8]:
# In every step hidden states get updated

#two time steps

x = np.asarray([1,0,0,1])

y1 = rnn1.step(x)
y2 = rnn2.step(y1)
print x, y2

x = y2
y1 = rnn1.step(x)
y2 = rnn2.step(y1)
print x, y2

[1 0 0 1] [ -3.83812807e-07   1.26533933e-07  -3.69391726e-08   5.78367948e-07]
[ -3.83812807e-07   1.26533933e-07  -3.69391726e-08   5.78367948e-07] [ -2.89510828e-08   9.30318564e-09   1.33466938e-08   2.30315094e-09]


# However, for a long time nobody (few) believed in RNNs¶

• Memory problem

# Long Short-Term Memory (LSTM)¶

## Deep RNN¶

• Ecah cell unfolds horizontally
• We stack cells vertically

# Some examples using TensorFlow¶

## number of 1s in a binary number¶

• Architecture: We are just interested in the last output (Many to one)
In [9]:
#Source code with the blog post at http://monik.in/a-noobs-guide-to-implementing-rnn-lstm-using-tensorflow/
import warnings
warnings.filterwarnings("ignore")
import numpy as np
import random
from random import shuffle
import tensorflow as tf
tf.reset_default_graph()
pow2= 16

train_input = ['{0:016b}'.format(i) for i in range(2**pow2)]
shuffle(train_input)
train_input = [map(int,i) for i in train_input]
ti  = []
for i in train_input:
temp_list = []
for j in i:
temp_list.append([j])
ti.append(np.array(temp_list))
train_input = ti

train_output = []
for i in train_input:
count = 0
for j in i:
if j[0] == 1:
count+=1
temp_list = ([0]*(pow2+1))
temp_list[count]=1
train_output.append(temp_list)

In [10]:
print train_input[0]
print train_output[0]

[[1]
[1]
[1]
[0]
[1]
[1]
[0]
[0]
[1]
[0]
[1]
[0]
[1]
[0]
[1]
[1]]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0]

In [11]:
NUM_EXAMPLES = 10000
test_input = train_input[NUM_EXAMPLES:]
test_output = train_output[NUM_EXAMPLES:]
train_input = train_input[:NUM_EXAMPLES]
train_output = train_output[:NUM_EXAMPLES]

tf.reset_default_graph()
print "test and training data loaded"

data = tf.placeholder(tf.float32, [None, pow2,1]) #Number of examples, number of input, dimension of each input
target = tf.placeholder(tf.float32, [None, (pow2+1)])
num_hidden = 24
num_layers=2
cell = tf.nn.rnn_cell.LSTMCell(num_hidden,state_is_tuple=True)
cell = tf.nn.rnn_cell.MultiRNNCell([cell] * num_layers, state_is_tuple=True)
val, _ = tf.nn.dynamic_rnn(cell, data, dtype=tf.float32)
val = tf.transpose(val, [1, 0, 2])
last = tf.gather(val, int(val.get_shape()[0]) - 1)

weight = tf.Variable(tf.truncated_normal([num_hidden, int(target.get_shape()[1])]))
bias = tf.Variable(tf.constant(0.1, shape=[target.get_shape()[1]]))

prediction = tf.nn.softmax(tf.matmul(last, weight) + bias)

cross_entropy = -tf.reduce_sum(target * tf.log(tf.clip_by_value(prediction,1e-10,1.0)))
minimize = optimizer.minimize(cross_entropy)

mistakes = tf.not_equal(tf.argmax(target, 1), tf.argmax(prediction, 1))
error = tf.reduce_mean(tf.cast(mistakes, tf.float32))

# init_op = tf.initialize_all_variables()
init_op = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init_op)

batch_size = 200
no_of_batches = int(len(train_input)) / batch_size
epoch = 200
for i in range(epoch):
ptr = 0
for j in range(no_of_batches):
inp, out = train_input[ptr:ptr+batch_size], train_output[ptr:ptr+batch_size]
ptr+=batch_size
sess.run(minimize,{data: inp, target: out})
if i%100 ==0:
incorrect = sess.run(error,{data: inp, target: out})
print "Epoch {} error: {}".format(i,incorrect*100)
incorrect = sess.run(error,{data: test_input, target: test_output})

print('Epoch {:2d} error {:3.1f}%'.format(i + 1, 100 * incorrect))

test and training data loaded
Epoch 0 error: 77.4999976158
Epoch 100 error: 0.0
Epoch 200 error 0.1%

In [12]:
tt = np.random.randint(0,2,size=pow2)[np.newaxis,:,np.newaxis]
p = sess.run(tf.argmax(prediction, 1),{data:tt})
print np.sum(tt),p[0]

10 10

In [13]:
sess.close()


## Summing Two numbers¶

• Architecture: We are interested in all the outputs in time (many to many)
In [14]:
# https://gist.github.com/nivwusquorum/b18ce332bde37e156034e5d3f60f8a23

import numpy as np
import random
import tensorflow as tf
import tensorflow.contrib.layers as layers
tf.reset_default_graph()
# map_fn = tf.python.functional_ops.map_fn
map_fn = tf.map_fn

################################################################################
##                           DATASET GENERATION                               ##
##                                                                            ##
##  The problem we are trying to solve is adding two binary numbers. The      ##
##  numbers are reversed, so that the state of RNN can add the numbers        ##
##  perfectly provided it can learn to store carry in the state. Timestep t   ##
##  corresponds to bit len(number) - t.                                       ##
################################################################################

def as_bytes(num, final_size):
res = []
for _ in range(final_size):
res.append(num % 2)
num //= 2
return res

def generate_example(num_bits):
a = random.randint(0, 2**(num_bits - 1) - 1)
b = random.randint(0, 2**(num_bits - 1) - 1)
res = a + b
return (as_bytes(a,  num_bits),
as_bytes(b,  num_bits),
as_bytes(res,num_bits))

def generate_batch(num_bits, batch_size):
"""Generates instance of a problem.
Returns
-------
x: np.array
two numbers to be added represented by bits.
shape: b, i, n
where:
b is bit index from the end
i is example idx in batch
n is one of [0,1] depending for first and
second summand respectively
y: np.array
shape: b, i, n
where:
b is bit index from the end
i is example idx in batch
n is always 0
"""
x = np.empty((num_bits, batch_size, 2))
y = np.empty((num_bits, batch_size, 1))

for i in range(batch_size):
a, b, r = generate_example(num_bits)
x[:, i, 0] = a
x[:, i, 1] = b
y[:, i, 0] = r
return x, y

################################################################################
##                           GRAPH DEFINITION                                 ##
################################################################################

INPUT_SIZE    = 2       # 2 bits per timestep
RNN_HIDDEN    = 20
OUTPUT_SIZE   = 1       # 1 bit per timestep
TINY          = 1e-6    # to avoid NaNs in logs
LEARNING_RATE = 0.01

USE_LSTM = True

inputs  = tf.placeholder(tf.float32, (None, None, INPUT_SIZE))  # (time, batch, in)
outputs = tf.placeholder(tf.float32, (None, None, OUTPUT_SIZE)) # (time, batch, out)

if USE_LSTM:
num_layers=2
cell = tf.nn.rnn_cell.BasicLSTMCell(RNN_HIDDEN, state_is_tuple=True)
cell = tf.nn.rnn_cell.MultiRNNCell([cell] * num_layers, state_is_tuple=True)
else:
cell = tf.nn.rnn_cell.BasicRNNCell(RNN_HIDDEN)

# Create initial state. Here it is just a constant tensor filled with zeros,
# but in principle it could be a learnable parameter. This is a bit tricky
# to do for LSTM's tuple state, but can be achieved by creating two vector
# Variables, which are then tiled along batch dimension and grouped into tuple.
batch_size    = tf.shape(inputs)[1]
initial_state = cell.zero_state(batch_size, tf.float32)

# Given inputs (time, batch, input_size) outputs a tuple
#  - outputs: (time, batch, output_size)  [do not mistake with OUTPUT_SIZE]
#  - states:  (time, batch, hidden_size)
rnn_outputs, rnn_states = tf.nn.dynamic_rnn(cell, inputs, initial_state=initial_state, time_major=True)

# project output from rnn output size to OUTPUT_SIZE. Sometimes it is worth adding
# an extra layer here.
final_projection = lambda x: layers.linear(x, num_outputs=OUTPUT_SIZE, activation_fn=tf.nn.sigmoid)

# apply projection to every timestep.
predicted_outputs = map_fn(final_projection, rnn_outputs)

# compute elementwise cross entropy.
error = -(outputs * tf.log(predicted_outputs + TINY) + (1.0 - outputs) * tf.log(1.0 - predicted_outputs + TINY))
error = tf.reduce_mean(error)

# optimize

# assuming that absolute difference between output and correct answer is 0.5
# or less we can round it to the correct output.
accuracy = tf.reduce_mean(tf.cast(tf.abs(outputs - predicted_outputs) < 0.5, tf.float32))

################################################################################
##                           TRAINING LOOP                                    ##
################################################################################

NUM_BITS = 10
ITERATIONS_PER_EPOCH = 100
BATCH_SIZE = 16

valid_x, valid_y = generate_batch(num_bits=NUM_BITS, batch_size=100)

session = tf.Session()
# For some reason it is our job to do this:
session.run(tf.global_variables_initializer())

for epoch in range(200):
epoch_error = 0
for _ in range(ITERATIONS_PER_EPOCH):
# here train_fn is what triggers backprop. error and accuracy on their
# own do not trigger the backprop.
x, y = generate_batch(num_bits=NUM_BITS, batch_size=BATCH_SIZE)
epoch_error += session.run([error, train_fn], {
inputs: x,
outputs: y,
})[0]
epoch_error /= ITERATIONS_PER_EPOCH
valid_accuracy = session.run(accuracy, {
inputs:  valid_x,
outputs: valid_y,
})
if epoch%10==0:
print ("Epoch %d, train error: %.2f, valid accuracy: %.1f %%" % (epoch, epoch_error, valid_accuracy * 100.0))
#

Epoch 0, train error: 0.67, valid accuracy: 61.1 %
Epoch 10, train error: 0.00, valid accuracy: 100.0 %
Epoch 20, train error: 0.00, valid accuracy: 100.0 %
Epoch 30, train error: 0.00, valid accuracy: 100.0 %
Epoch 40, train error: 0.00, valid accuracy: 100.0 %
Epoch 50, train error: 0.00, valid accuracy: 100.0 %
Epoch 60, train error: 0.00, valid accuracy: 100.0 %
Epoch 70, train error: 0.00, valid accuracy: 100.0 %
Epoch 80, train error: 0.00, valid accuracy: 100.0 %
Epoch 90, train error: -0.00, valid accuracy: 100.0 %
Epoch 100, train error: -0.00, valid accuracy: 100.0 %
Epoch 110, train error: -0.00, valid accuracy: 100.0 %
Epoch 120, train error: -0.00, valid accuracy: 100.0 %
Epoch 130, train error: -0.00, valid accuracy: 100.0 %
Epoch 140, train error: -0.00, valid accuracy: 100.0 %
Epoch 150, train error: -0.00, valid accuracy: 100.0 %
Epoch 160, train error: -0.00, valid accuracy: 100.0 %
Epoch 170, train error: -0.00, valid accuracy: 100.0 %
Epoch 180, train error: -0.00, valid accuracy: 100.0 %
Epoch 190, train error: -0.00, valid accuracy: 100.0 %

In [15]:
preds_valid = session.run(predicted_outputs, {
inputs:  valid_x,
outputs: valid_y,
})

In [16]:
i = 20
print (np.around(preds_valid)[:,i,0])
print (valid_y[:,i,0])

[ 1.  1.  0.  0.  1.  1.  1.  1.  1.  0.]
[ 1.  1.  0.  0.  1.  1.  1.  1.  1.  0.]

In [17]:
session.close()


## MNIST RNN (Image Classification)¶

• Architecture:
• We have sequences of rows (columns) of images. (many to one)
• We are interested to predict a class only at the end
In [18]:
# From: https://github.com/aymericdamien/TensorFlow-Examples/blob/master/notebooks/3_NeuralNetworks/recurrent_network.ipynb

'''
A Recurrent Neural Network (LSTM) implementation example using TensorFlow library.
This example is using the MNIST database of handwritten digits (http://yann.lecun.com/exdb/mnist/)
Long Short Term Memory paper: http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf

Author: Aymeric Damien
Project: https://github.com/aymericdamien/TensorFlow-Examples/
'''

import warnings
warnings.filterwarnings("ignore")
import tensorflow as tf
from tensorflow.python.ops import rnn, rnn_cell

# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


''' To classify images using a reccurent neural network, we consider every image row as a sequence of pixels. Because MNIST image shape is 28*28px, we will then handle 28 sequences of 28 steps for every sample. '''

In [19]:
# Parameters
learning_rate = 0.001
training_iters = 100000
batch_size = 128
display_step = 10

tf.reset_default_graph()

# Network Parameters
n_input = 28 # MNIST data input (img shape: 28*28)
n_steps = 28 # timesteps
n_hidden = 128 # hidden layer num of features
n_classes = 10 # MNIST total classes (0-9 digits)

# tf Graph input
x = tf.placeholder(tf.float32, [None, n_steps, n_input])
y = tf.placeholder(tf.float32, [None, n_classes])

# data = tf.placeholder(tf.float32, [None, pow2,1]) #Number of examples, number of input, dimension of each input

# Define weights
weights = {
'out': tf.Variable(tf.random_normal([n_hidden, n_classes]))
}
biases = {
'out': tf.Variable(tf.random_normal([n_classes]))
}

# Prepare data shape to match rnn function requirements

num_layers = 2
lstm_cell = tf.nn.rnn_cell.LSTMCell(n_hidden,state_is_tuple=True)
lstm_cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * num_layers, state_is_tuple=True)
val, _ = tf.nn.dynamic_rnn(lstm_cell, x, dtype=tf.float32)
val = tf.transpose(val, [1, 0, 2])
last = tf.gather(val, int(val.get_shape()[0]) - 1)

pred = tf.matmul(last, weights['out']) + biases['out']

# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))

# Evaluate model
correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Launch the graph
sess1 = tf.Session()
# For some reason it is our job to do this:
sess1.run(tf.global_variables_initializer())

# with tf.Session() as sess:
#     sess.run(init)
step = 1
# Keep training until reach max iterations
while step * batch_size < training_iters:
batch_x, batch_y = mnist.train.next_batch(batch_size)
# Reshape data to get 28 seq of 28 elements
batch_x = batch_x.reshape((batch_size, n_steps, n_input))
# Run optimization op (backprop)
sess1.run(optimizer, feed_dict={x: batch_x, y: batch_y})
if step % display_step == 0:
# Calculate batch accuracy
acc = sess1.run(accuracy, feed_dict={x: batch_x, y: batch_y})
# Calculate batch loss
loss = sess1.run(cost, feed_dict={x: batch_x, y: batch_y})
print("Iter " + str(step*batch_size) + ", Minibatch Loss= " + \
"{:.6f}".format(loss) + ", Training Accuracy= " + \
"{:.5f}".format(acc))
step += 1
print("Optimization Finished!")

# Calculate accuracy for 128 mnist test images
test_len = 128
test_data = mnist.test.images[:test_len].reshape((-1, n_steps, n_input))
test_label = mnist.test.labels[:test_len]
print("Testing Accuracy:", \
sess1.run(accuracy, feed_dict={x: test_data, y: test_label}))

Iter 1280, Minibatch Loss= 1.355216, Training Accuracy= 0.54688
Iter 2560, Minibatch Loss= 1.186185, Training Accuracy= 0.64062
Iter 3840, Minibatch Loss= 0.959371, Training Accuracy= 0.69531
Iter 5120, Minibatch Loss= 0.756388, Training Accuracy= 0.76562
Iter 6400, Minibatch Loss= 0.597719, Training Accuracy= 0.78125
Iter 7680, Minibatch Loss= 0.966340, Training Accuracy= 0.64062
Iter 8960, Minibatch Loss= 0.563785, Training Accuracy= 0.83594
Iter 10240, Minibatch Loss= 0.472388, Training Accuracy= 0.82812
Iter 11520, Minibatch Loss= 0.273660, Training Accuracy= 0.94531
Iter 12800, Minibatch Loss= 0.521885, Training Accuracy= 0.82031
Iter 14080, Minibatch Loss= 0.402212, Training Accuracy= 0.87500
Iter 15360, Minibatch Loss= 0.248117, Training Accuracy= 0.94531
Iter 16640, Minibatch Loss= 0.305744, Training Accuracy= 0.93750
Iter 17920, Minibatch Loss= 0.227243, Training Accuracy= 0.92188
Iter 19200, Minibatch Loss= 0.252387, Training Accuracy= 0.93750
Iter 20480, Minibatch Loss= 0.116889, Training Accuracy= 0.98438
Iter 21760, Minibatch Loss= 0.330990, Training Accuracy= 0.89062
Iter 23040, Minibatch Loss= 0.106201, Training Accuracy= 0.96094
Iter 24320, Minibatch Loss= 0.324091, Training Accuracy= 0.89844
Iter 25600, Minibatch Loss= 0.340873, Training Accuracy= 0.89062
Iter 26880, Minibatch Loss= 0.170801, Training Accuracy= 0.95312
Iter 28160, Minibatch Loss= 0.202922, Training Accuracy= 0.95312
Iter 29440, Minibatch Loss= 0.261573, Training Accuracy= 0.92188
Iter 30720, Minibatch Loss= 0.219512, Training Accuracy= 0.92969
Iter 32000, Minibatch Loss= 0.152274, Training Accuracy= 0.93750
Iter 33280, Minibatch Loss= 0.172803, Training Accuracy= 0.93750
Iter 34560, Minibatch Loss= 0.178191, Training Accuracy= 0.96094
Iter 35840, Minibatch Loss= 0.190618, Training Accuracy= 0.96094
Iter 37120, Minibatch Loss= 0.230897, Training Accuracy= 0.91406
Iter 38400, Minibatch Loss= 0.111684, Training Accuracy= 0.96875
Iter 39680, Minibatch Loss= 0.116336, Training Accuracy= 0.96875
Iter 40960, Minibatch Loss= 0.208338, Training Accuracy= 0.92188
Iter 42240, Minibatch Loss= 0.099843, Training Accuracy= 0.96094
Iter 43520, Minibatch Loss= 0.175103, Training Accuracy= 0.93750
Iter 44800, Minibatch Loss= 0.188875, Training Accuracy= 0.93750
Iter 46080, Minibatch Loss= 0.076799, Training Accuracy= 0.96094
Iter 47360, Minibatch Loss= 0.203019, Training Accuracy= 0.94531
Iter 48640, Minibatch Loss= 0.190256, Training Accuracy= 0.92188
Iter 49920, Minibatch Loss= 0.138491, Training Accuracy= 0.95312
Iter 51200, Minibatch Loss= 0.084339, Training Accuracy= 0.96875
Iter 52480, Minibatch Loss= 0.144099, Training Accuracy= 0.96094
Iter 53760, Minibatch Loss= 0.046987, Training Accuracy= 0.97656
Iter 55040, Minibatch Loss= 0.242961, Training Accuracy= 0.91406
Iter 56320, Minibatch Loss= 0.127308, Training Accuracy= 0.95312
Iter 57600, Minibatch Loss= 0.075414, Training Accuracy= 0.96875
Iter 58880, Minibatch Loss= 0.185896, Training Accuracy= 0.94531
Iter 60160, Minibatch Loss= 0.029974, Training Accuracy= 1.00000
Iter 61440, Minibatch Loss= 0.117170, Training Accuracy= 0.95312
Iter 62720, Minibatch Loss= 0.118068, Training Accuracy= 0.95312
Iter 64000, Minibatch Loss= 0.145590, Training Accuracy= 0.94531
Iter 65280, Minibatch Loss= 0.131703, Training Accuracy= 0.96094
Iter 66560, Minibatch Loss= 0.101745, Training Accuracy= 0.98438
Iter 67840, Minibatch Loss= 0.137928, Training Accuracy= 0.95312
Iter 69120, Minibatch Loss= 0.102363, Training Accuracy= 0.96875
Iter 70400, Minibatch Loss= 0.035047, Training Accuracy= 1.00000
Iter 71680, Minibatch Loss= 0.211946, Training Accuracy= 0.95312
Iter 72960, Minibatch Loss= 0.115560, Training Accuracy= 0.95312
Iter 74240, Minibatch Loss= 0.072575, Training Accuracy= 0.96875
Iter 75520, Minibatch Loss= 0.145280, Training Accuracy= 0.96094
Iter 76800, Minibatch Loss= 0.154121, Training Accuracy= 0.96094
Iter 78080, Minibatch Loss= 0.167381, Training Accuracy= 0.96094
Iter 79360, Minibatch Loss= 0.094849, Training Accuracy= 0.95312
Iter 80640, Minibatch Loss= 0.021832, Training Accuracy= 1.00000
Iter 81920, Minibatch Loss= 0.072856, Training Accuracy= 0.97656
Iter 83200, Minibatch Loss= 0.039065, Training Accuracy= 0.99219
Iter 84480, Minibatch Loss= 0.072395, Training Accuracy= 0.96875
Iter 85760, Minibatch Loss= 0.090926, Training Accuracy= 0.96094
Iter 87040, Minibatch Loss= 0.026965, Training Accuracy= 1.00000
Iter 88320, Minibatch Loss= 0.051769, Training Accuracy= 0.96875
Iter 89600, Minibatch Loss= 0.086891, Training Accuracy= 0.96875
Iter 90880, Minibatch Loss= 0.081950, Training Accuracy= 0.96094
Iter 92160, Minibatch Loss= 0.084406, Training Accuracy= 0.96094
Iter 93440, Minibatch Loss= 0.017629, Training Accuracy= 1.00000
Iter 94720, Minibatch Loss= 0.102386, Training Accuracy= 0.96875
Iter 96000, Minibatch Loss= 0.037047, Training Accuracy= 0.99219
Iter 97280, Minibatch Loss= 0.046284, Training Accuracy= 0.99219
Iter 98560, Minibatch Loss= 0.167275, Training Accuracy= 0.95312
Iter 99840, Minibatch Loss= 0.086690, Training Accuracy= 0.97656
Optimization Finished!
('Testing Accuracy:', 0.984375)


## Next steps hopefully by the next week¶

• Try this! Polygon example: Learn to draw convex polygons as a sequence of two dimensional points
• Sentiment analysis
• Text and time series
• Multi-dimensional time series prediction

# Other applications!¶

• handwriting generation
• Image captioning
• Seq2Seq models for natural language translation

• Learning and Translation in general

• Extensions to LSTM: Neural Turing Machine and differentiable-neural-computers