import warnings
warnings.filterwarnings("ignore")
import datetime
import pandas as pd
# import pandas.io.data
import numpy as np
import matplotlib
from matplotlib import pyplot as plt
import sys
import sompylib.sompy as SOM# from pandas import Series, DataFrame
from ipywidgets import interact, HTML, FloatSlider
%matplotlib inline
source: http://colah.github.io/posts/2015-08-Understa/nding-LSTMs/
# http://karpathy.github.io/2015/05/21/rnn-effectiveness/
class RNN:
def setup(self,hidden_size,input_size):
Wxh = np.random.randn(hidden_size, input_size)*0.01 # input to hidden
Whh = np.random.randn(hidden_size, hidden_size)*0.01 # hidden to hidden
Why = np.random.randn(input_size, hidden_size)*0.01 # hidden to output
self.W_hh = Whh
self.h = np.zeros((hidden_size))
self.W_xh = Wxh
self.W_hy = Why
def step(self, x):
# update the hidden state
self.h = np.tanh(np.dot(self.h,self.W_hh) + np.dot(self.W_xh, x)).T
# print self.h.shape
# compute the output vector
y = np.dot(self.W_hy, self.h)
# print self.W_hy.shape,self.h.shape , y.shape
return y
rnn1= RNN()
rnn2 = RNN()
hidden_size = 10
input_size = 4
rnn1.setup(hidden_size,input_size)
rnn2.setup(hidden_size,input_size)
# In every step hidden states get updated
#two time steps
x = np.asarray([1,0,0,1])
y1 = rnn1.step(x)
y2 = rnn2.step(y1)
print x, y2
x = y2
y1 = rnn1.step(x)
y2 = rnn2.step(y1)
print x, y2
[1 0 0 1] [ -3.83812807e-07 1.26533933e-07 -3.69391726e-08 5.78367948e-07] [ -3.83812807e-07 1.26533933e-07 -3.69391726e-08 5.78367948e-07] [ -2.89510828e-08 9.30318564e-09 1.33466938e-08 2.30315094e-09]
image from: http://deeplearning.net/tutorial/lstm.html
#Source code with the blog post at http://monik.in/a-noobs-guide-to-implementing-rnn-lstm-using-tensorflow/
import warnings
warnings.filterwarnings("ignore")
import numpy as np
import random
from random import shuffle
import tensorflow as tf
tf.reset_default_graph()
pow2= 16
train_input = ['{0:016b}'.format(i) for i in range(2**pow2)]
shuffle(train_input)
train_input = [map(int,i) for i in train_input]
ti = []
for i in train_input:
temp_list = []
for j in i:
temp_list.append([j])
ti.append(np.array(temp_list))
train_input = ti
train_output = []
for i in train_input:
count = 0
for j in i:
if j[0] == 1:
count+=1
temp_list = ([0]*(pow2+1))
temp_list[count]=1
train_output.append(temp_list)
print train_input[0]
print train_output[0]
[[1] [1] [1] [0] [1] [1] [0] [0] [1] [0] [1] [0] [1] [0] [1] [1]] [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
NUM_EXAMPLES = 10000
test_input = train_input[NUM_EXAMPLES:]
test_output = train_output[NUM_EXAMPLES:]
train_input = train_input[:NUM_EXAMPLES]
train_output = train_output[:NUM_EXAMPLES]
tf.reset_default_graph()
print "test and training data loaded"
data = tf.placeholder(tf.float32, [None, pow2,1]) #Number of examples, number of input, dimension of each input
target = tf.placeholder(tf.float32, [None, (pow2+1)])
num_hidden = 24
num_layers=2
cell = tf.nn.rnn_cell.LSTMCell(num_hidden,state_is_tuple=True)
cell = tf.nn.rnn_cell.MultiRNNCell([cell] * num_layers, state_is_tuple=True)
val, _ = tf.nn.dynamic_rnn(cell, data, dtype=tf.float32)
val = tf.transpose(val, [1, 0, 2])
last = tf.gather(val, int(val.get_shape()[0]) - 1)
weight = tf.Variable(tf.truncated_normal([num_hidden, int(target.get_shape()[1])]))
bias = tf.Variable(tf.constant(0.1, shape=[target.get_shape()[1]]))
prediction = tf.nn.softmax(tf.matmul(last, weight) + bias)
cross_entropy = -tf.reduce_sum(target * tf.log(tf.clip_by_value(prediction,1e-10,1.0)))
optimizer = tf.train.AdamOptimizer()
minimize = optimizer.minimize(cross_entropy)
mistakes = tf.not_equal(tf.argmax(target, 1), tf.argmax(prediction, 1))
error = tf.reduce_mean(tf.cast(mistakes, tf.float32))
# init_op = tf.initialize_all_variables()
init_op = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init_op)
batch_size = 200
no_of_batches = int(len(train_input)) / batch_size
epoch = 200
for i in range(epoch):
ptr = 0
for j in range(no_of_batches):
inp, out = train_input[ptr:ptr+batch_size], train_output[ptr:ptr+batch_size]
ptr+=batch_size
sess.run(minimize,{data: inp, target: out})
if i%100 ==0:
incorrect = sess.run(error,{data: inp, target: out})
print "Epoch {} error: {}".format(i,incorrect*100)
incorrect = sess.run(error,{data: test_input, target: test_output})
print('Epoch {:2d} error {:3.1f}%'.format(i + 1, 100 * incorrect))
test and training data loaded Epoch 0 error: 77.4999976158 Epoch 100 error: 0.0 Epoch 200 error 0.1%
tt = np.random.randint(0,2,size=pow2)[np.newaxis,:,np.newaxis]
p = sess.run(tf.argmax(prediction, 1),{data:tt})
print np.sum(tt),p[0]
10 10
sess.close()
# https://gist.github.com/nivwusquorum/b18ce332bde37e156034e5d3f60f8a23
import numpy as np
import random
import tensorflow as tf
import tensorflow.contrib.layers as layers
tf.reset_default_graph()
# map_fn = tf.python.functional_ops.map_fn
map_fn = tf.map_fn
################################################################################
## DATASET GENERATION ##
## ##
## The problem we are trying to solve is adding two binary numbers. The ##
## numbers are reversed, so that the state of RNN can add the numbers ##
## perfectly provided it can learn to store carry in the state. Timestep t ##
## corresponds to bit len(number) - t. ##
################################################################################
def as_bytes(num, final_size):
res = []
for _ in range(final_size):
res.append(num % 2)
num //= 2
return res
def generate_example(num_bits):
a = random.randint(0, 2**(num_bits - 1) - 1)
b = random.randint(0, 2**(num_bits - 1) - 1)
res = a + b
return (as_bytes(a, num_bits),
as_bytes(b, num_bits),
as_bytes(res,num_bits))
def generate_batch(num_bits, batch_size):
"""Generates instance of a problem.
Returns
-------
x: np.array
two numbers to be added represented by bits.
shape: b, i, n
where:
b is bit index from the end
i is example idx in batch
n is one of [0,1] depending for first and
second summand respectively
y: np.array
the result of the addition
shape: b, i, n
where:
b is bit index from the end
i is example idx in batch
n is always 0
"""
x = np.empty((num_bits, batch_size, 2))
y = np.empty((num_bits, batch_size, 1))
for i in range(batch_size):
a, b, r = generate_example(num_bits)
x[:, i, 0] = a
x[:, i, 1] = b
y[:, i, 0] = r
return x, y
################################################################################
## GRAPH DEFINITION ##
################################################################################
INPUT_SIZE = 2 # 2 bits per timestep
RNN_HIDDEN = 20
OUTPUT_SIZE = 1 # 1 bit per timestep
TINY = 1e-6 # to avoid NaNs in logs
LEARNING_RATE = 0.01
USE_LSTM = True
inputs = tf.placeholder(tf.float32, (None, None, INPUT_SIZE)) # (time, batch, in)
outputs = tf.placeholder(tf.float32, (None, None, OUTPUT_SIZE)) # (time, batch, out)
if USE_LSTM:
num_layers=2
cell = tf.nn.rnn_cell.BasicLSTMCell(RNN_HIDDEN, state_is_tuple=True)
cell = tf.nn.rnn_cell.MultiRNNCell([cell] * num_layers, state_is_tuple=True)
else:
cell = tf.nn.rnn_cell.BasicRNNCell(RNN_HIDDEN)
# Create initial state. Here it is just a constant tensor filled with zeros,
# but in principle it could be a learnable parameter. This is a bit tricky
# to do for LSTM's tuple state, but can be achieved by creating two vector
# Variables, which are then tiled along batch dimension and grouped into tuple.
batch_size = tf.shape(inputs)[1]
initial_state = cell.zero_state(batch_size, tf.float32)
# Given inputs (time, batch, input_size) outputs a tuple
# - outputs: (time, batch, output_size) [do not mistake with OUTPUT_SIZE]
# - states: (time, batch, hidden_size)
rnn_outputs, rnn_states = tf.nn.dynamic_rnn(cell, inputs, initial_state=initial_state, time_major=True)
# project output from rnn output size to OUTPUT_SIZE. Sometimes it is worth adding
# an extra layer here.
final_projection = lambda x: layers.linear(x, num_outputs=OUTPUT_SIZE, activation_fn=tf.nn.sigmoid)
# apply projection to every timestep.
predicted_outputs = map_fn(final_projection, rnn_outputs)
# compute elementwise cross entropy.
error = -(outputs * tf.log(predicted_outputs + TINY) + (1.0 - outputs) * tf.log(1.0 - predicted_outputs + TINY))
error = tf.reduce_mean(error)
# optimize
train_fn = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE).minimize(error)
# assuming that absolute difference between output and correct answer is 0.5
# or less we can round it to the correct output.
accuracy = tf.reduce_mean(tf.cast(tf.abs(outputs - predicted_outputs) < 0.5, tf.float32))
################################################################################
## TRAINING LOOP ##
################################################################################
NUM_BITS = 10
ITERATIONS_PER_EPOCH = 100
BATCH_SIZE = 16
valid_x, valid_y = generate_batch(num_bits=NUM_BITS, batch_size=100)
session = tf.Session()
# For some reason it is our job to do this:
session.run(tf.global_variables_initializer())
for epoch in range(200):
epoch_error = 0
for _ in range(ITERATIONS_PER_EPOCH):
# here train_fn is what triggers backprop. error and accuracy on their
# own do not trigger the backprop.
x, y = generate_batch(num_bits=NUM_BITS, batch_size=BATCH_SIZE)
epoch_error += session.run([error, train_fn], {
inputs: x,
outputs: y,
})[0]
epoch_error /= ITERATIONS_PER_EPOCH
valid_accuracy = session.run(accuracy, {
inputs: valid_x,
outputs: valid_y,
})
if epoch%10==0:
print ("Epoch %d, train error: %.2f, valid accuracy: %.1f %%" % (epoch, epoch_error, valid_accuracy * 100.0))
#
Epoch 0, train error: 0.67, valid accuracy: 61.1 % Epoch 10, train error: 0.00, valid accuracy: 100.0 % Epoch 20, train error: 0.00, valid accuracy: 100.0 % Epoch 30, train error: 0.00, valid accuracy: 100.0 % Epoch 40, train error: 0.00, valid accuracy: 100.0 % Epoch 50, train error: 0.00, valid accuracy: 100.0 % Epoch 60, train error: 0.00, valid accuracy: 100.0 % Epoch 70, train error: 0.00, valid accuracy: 100.0 % Epoch 80, train error: 0.00, valid accuracy: 100.0 % Epoch 90, train error: -0.00, valid accuracy: 100.0 % Epoch 100, train error: -0.00, valid accuracy: 100.0 % Epoch 110, train error: -0.00, valid accuracy: 100.0 % Epoch 120, train error: -0.00, valid accuracy: 100.0 % Epoch 130, train error: -0.00, valid accuracy: 100.0 % Epoch 140, train error: -0.00, valid accuracy: 100.0 % Epoch 150, train error: -0.00, valid accuracy: 100.0 % Epoch 160, train error: -0.00, valid accuracy: 100.0 % Epoch 170, train error: -0.00, valid accuracy: 100.0 % Epoch 180, train error: -0.00, valid accuracy: 100.0 % Epoch 190, train error: -0.00, valid accuracy: 100.0 %
preds_valid = session.run(predicted_outputs, {
inputs: valid_x,
outputs: valid_y,
})
i = 20
print (np.around(preds_valid)[:,i,0])
print (valid_y[:,i,0])
[ 1. 1. 0. 0. 1. 1. 1. 1. 1. 0.] [ 1. 1. 0. 0. 1. 1. 1. 1. 1. 0.]
session.close()
# From: https://github.com/aymericdamien/TensorFlow-Examples/blob/master/notebooks/3_NeuralNetworks/recurrent_network.ipynb
'''
A Recurrent Neural Network (LSTM) implementation example using TensorFlow library.
This example is using the MNIST database of handwritten digits (http://yann.lecun.com/exdb/mnist/)
Long Short Term Memory paper: http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf
Author: Aymeric Damien
Project: https://github.com/aymericdamien/TensorFlow-Examples/
'''
import warnings
warnings.filterwarnings("ignore")
import tensorflow as tf
from tensorflow.python.ops import rnn, rnn_cell
# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
Extracting /tmp/data/train-images-idx3-ubyte.gz Extracting /tmp/data/train-labels-idx1-ubyte.gz Extracting /tmp/data/t10k-images-idx3-ubyte.gz Extracting /tmp/data/t10k-labels-idx1-ubyte.gz
''' To classify images using a reccurent neural network, we consider every image row as a sequence of pixels. Because MNIST image shape is 28*28px, we will then handle 28 sequences of 28 steps for every sample. '''
# Parameters
learning_rate = 0.001
training_iters = 100000
batch_size = 128
display_step = 10
tf.reset_default_graph()
# Network Parameters
n_input = 28 # MNIST data input (img shape: 28*28)
n_steps = 28 # timesteps
n_hidden = 128 # hidden layer num of features
n_classes = 10 # MNIST total classes (0-9 digits)
# tf Graph input
x = tf.placeholder(tf.float32, [None, n_steps, n_input])
y = tf.placeholder(tf.float32, [None, n_classes])
# data = tf.placeholder(tf.float32, [None, pow2,1]) #Number of examples, number of input, dimension of each input
# Define weights
weights = {
'out': tf.Variable(tf.random_normal([n_hidden, n_classes]))
}
biases = {
'out': tf.Variable(tf.random_normal([n_classes]))
}
# Prepare data shape to match `rnn` function requirements
num_layers = 2
lstm_cell = tf.nn.rnn_cell.LSTMCell(n_hidden,state_is_tuple=True)
lstm_cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * num_layers, state_is_tuple=True)
val, _ = tf.nn.dynamic_rnn(lstm_cell, x, dtype=tf.float32)
val = tf.transpose(val, [1, 0, 2])
last = tf.gather(val, int(val.get_shape()[0]) - 1)
pred = tf.matmul(last, weights['out']) + biases['out']
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Evaluate model
correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# Launch the graph
sess1 = tf.Session()
# For some reason it is our job to do this:
sess1.run(tf.global_variables_initializer())
# with tf.Session() as sess:
# sess.run(init)
step = 1
# Keep training until reach max iterations
while step * batch_size < training_iters:
batch_x, batch_y = mnist.train.next_batch(batch_size)
# Reshape data to get 28 seq of 28 elements
batch_x = batch_x.reshape((batch_size, n_steps, n_input))
# Run optimization op (backprop)
sess1.run(optimizer, feed_dict={x: batch_x, y: batch_y})
if step % display_step == 0:
# Calculate batch accuracy
acc = sess1.run(accuracy, feed_dict={x: batch_x, y: batch_y})
# Calculate batch loss
loss = sess1.run(cost, feed_dict={x: batch_x, y: batch_y})
print("Iter " + str(step*batch_size) + ", Minibatch Loss= " + \
"{:.6f}".format(loss) + ", Training Accuracy= " + \
"{:.5f}".format(acc))
step += 1
print("Optimization Finished!")
# Calculate accuracy for 128 mnist test images
test_len = 128
test_data = mnist.test.images[:test_len].reshape((-1, n_steps, n_input))
test_label = mnist.test.labels[:test_len]
print("Testing Accuracy:", \
sess1.run(accuracy, feed_dict={x: test_data, y: test_label}))
Iter 1280, Minibatch Loss= 1.355216, Training Accuracy= 0.54688 Iter 2560, Minibatch Loss= 1.186185, Training Accuracy= 0.64062 Iter 3840, Minibatch Loss= 0.959371, Training Accuracy= 0.69531 Iter 5120, Minibatch Loss= 0.756388, Training Accuracy= 0.76562 Iter 6400, Minibatch Loss= 0.597719, Training Accuracy= 0.78125 Iter 7680, Minibatch Loss= 0.966340, Training Accuracy= 0.64062 Iter 8960, Minibatch Loss= 0.563785, Training Accuracy= 0.83594 Iter 10240, Minibatch Loss= 0.472388, Training Accuracy= 0.82812 Iter 11520, Minibatch Loss= 0.273660, Training Accuracy= 0.94531 Iter 12800, Minibatch Loss= 0.521885, Training Accuracy= 0.82031 Iter 14080, Minibatch Loss= 0.402212, Training Accuracy= 0.87500 Iter 15360, Minibatch Loss= 0.248117, Training Accuracy= 0.94531 Iter 16640, Minibatch Loss= 0.305744, Training Accuracy= 0.93750 Iter 17920, Minibatch Loss= 0.227243, Training Accuracy= 0.92188 Iter 19200, Minibatch Loss= 0.252387, Training Accuracy= 0.93750 Iter 20480, Minibatch Loss= 0.116889, Training Accuracy= 0.98438 Iter 21760, Minibatch Loss= 0.330990, Training Accuracy= 0.89062 Iter 23040, Minibatch Loss= 0.106201, Training Accuracy= 0.96094 Iter 24320, Minibatch Loss= 0.324091, Training Accuracy= 0.89844 Iter 25600, Minibatch Loss= 0.340873, Training Accuracy= 0.89062 Iter 26880, Minibatch Loss= 0.170801, Training Accuracy= 0.95312 Iter 28160, Minibatch Loss= 0.202922, Training Accuracy= 0.95312 Iter 29440, Minibatch Loss= 0.261573, Training Accuracy= 0.92188 Iter 30720, Minibatch Loss= 0.219512, Training Accuracy= 0.92969 Iter 32000, Minibatch Loss= 0.152274, Training Accuracy= 0.93750 Iter 33280, Minibatch Loss= 0.172803, Training Accuracy= 0.93750 Iter 34560, Minibatch Loss= 0.178191, Training Accuracy= 0.96094 Iter 35840, Minibatch Loss= 0.190618, Training Accuracy= 0.96094 Iter 37120, Minibatch Loss= 0.230897, Training Accuracy= 0.91406 Iter 38400, Minibatch Loss= 0.111684, Training Accuracy= 0.96875 Iter 39680, Minibatch Loss= 0.116336, Training Accuracy= 0.96875 Iter 40960, Minibatch Loss= 0.208338, Training Accuracy= 0.92188 Iter 42240, Minibatch Loss= 0.099843, Training Accuracy= 0.96094 Iter 43520, Minibatch Loss= 0.175103, Training Accuracy= 0.93750 Iter 44800, Minibatch Loss= 0.188875, Training Accuracy= 0.93750 Iter 46080, Minibatch Loss= 0.076799, Training Accuracy= 0.96094 Iter 47360, Minibatch Loss= 0.203019, Training Accuracy= 0.94531 Iter 48640, Minibatch Loss= 0.190256, Training Accuracy= 0.92188 Iter 49920, Minibatch Loss= 0.138491, Training Accuracy= 0.95312 Iter 51200, Minibatch Loss= 0.084339, Training Accuracy= 0.96875 Iter 52480, Minibatch Loss= 0.144099, Training Accuracy= 0.96094 Iter 53760, Minibatch Loss= 0.046987, Training Accuracy= 0.97656 Iter 55040, Minibatch Loss= 0.242961, Training Accuracy= 0.91406 Iter 56320, Minibatch Loss= 0.127308, Training Accuracy= 0.95312 Iter 57600, Minibatch Loss= 0.075414, Training Accuracy= 0.96875 Iter 58880, Minibatch Loss= 0.185896, Training Accuracy= 0.94531 Iter 60160, Minibatch Loss= 0.029974, Training Accuracy= 1.00000 Iter 61440, Minibatch Loss= 0.117170, Training Accuracy= 0.95312 Iter 62720, Minibatch Loss= 0.118068, Training Accuracy= 0.95312 Iter 64000, Minibatch Loss= 0.145590, Training Accuracy= 0.94531 Iter 65280, Minibatch Loss= 0.131703, Training Accuracy= 0.96094 Iter 66560, Minibatch Loss= 0.101745, Training Accuracy= 0.98438 Iter 67840, Minibatch Loss= 0.137928, Training Accuracy= 0.95312 Iter 69120, Minibatch Loss= 0.102363, Training Accuracy= 0.96875 Iter 70400, Minibatch Loss= 0.035047, Training Accuracy= 1.00000 Iter 71680, Minibatch Loss= 0.211946, Training Accuracy= 0.95312 Iter 72960, Minibatch Loss= 0.115560, Training Accuracy= 0.95312 Iter 74240, Minibatch Loss= 0.072575, Training Accuracy= 0.96875 Iter 75520, Minibatch Loss= 0.145280, Training Accuracy= 0.96094 Iter 76800, Minibatch Loss= 0.154121, Training Accuracy= 0.96094 Iter 78080, Minibatch Loss= 0.167381, Training Accuracy= 0.96094 Iter 79360, Minibatch Loss= 0.094849, Training Accuracy= 0.95312 Iter 80640, Minibatch Loss= 0.021832, Training Accuracy= 1.00000 Iter 81920, Minibatch Loss= 0.072856, Training Accuracy= 0.97656 Iter 83200, Minibatch Loss= 0.039065, Training Accuracy= 0.99219 Iter 84480, Minibatch Loss= 0.072395, Training Accuracy= 0.96875 Iter 85760, Minibatch Loss= 0.090926, Training Accuracy= 0.96094 Iter 87040, Minibatch Loss= 0.026965, Training Accuracy= 1.00000 Iter 88320, Minibatch Loss= 0.051769, Training Accuracy= 0.96875 Iter 89600, Minibatch Loss= 0.086891, Training Accuracy= 0.96875 Iter 90880, Minibatch Loss= 0.081950, Training Accuracy= 0.96094 Iter 92160, Minibatch Loss= 0.084406, Training Accuracy= 0.96094 Iter 93440, Minibatch Loss= 0.017629, Training Accuracy= 1.00000 Iter 94720, Minibatch Loss= 0.102386, Training Accuracy= 0.96875 Iter 96000, Minibatch Loss= 0.037047, Training Accuracy= 0.99219 Iter 97280, Minibatch Loss= 0.046284, Training Accuracy= 0.99219 Iter 98560, Minibatch Loss= 0.167275, Training Accuracy= 0.95312 Iter 99840, Minibatch Loss= 0.086690, Training Accuracy= 0.97656 Optimization Finished! ('Testing Accuracy:', 0.984375)