CS 20 : TensorFlow for Deep Learning Research¶

Lecture 11 : Recurrent Neural Networks¶

Simple example for Many to One Classification (word sentiment classification) by Stacked RNN with Drop out.

Many to One Classification by Stacked RNN with Drop out¶

Creating the data pipeline with tf.data
Preprocessing word sequences (variable input sequence length) using padding technique by user function (pad_seq)
Using tf.nn.embedding_lookup for getting vector of tokens (eg. word, character)
Creating the model as Class
Applying Drop out to model by tf.contrib.rnn.DropoutWrapper
Applying Stacking to model by tf.contrib.rnn.MultiRNNCell
Reference

Setup¶

In [1]:

import os, sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import string
%matplotlib inline

slim = tf.contrib.slim
print(tf.__version__)

1.8.0

Prepare example data¶

In [2]:

words = ['good', 'bad', 'amazing', 'so good', 'bull shit', 'awesome']
y = [[1.,0.], [0.,1.], [1.,0.], [1., 0.],[0.,1.], [1.,0.]]

In [3]:

# Character quantization
char_space = string.ascii_lowercase 
char_space = char_space + ' ' + '*'
char_space

Out[3]:

'abcdefghijklmnopqrstuvwxyz *'

In [4]:

char_dic = {char : idx for idx, char in enumerate(char_space)}
print(char_dic)

{'a': 0, 'b': 1, 'c': 2, 'd': 3, 'e': 4, 'f': 5, 'g': 6, 'h': 7, 'i': 8, 'j': 9, 'k': 10, 'l': 11, 'm': 12, 'n': 13, 'o': 14, 'p': 15, 'q': 16, 'r': 17, 's': 18, 't': 19, 'u': 20, 'v': 21, 'w': 22, 'x': 23, 'y': 24, 'z': 25, ' ': 26, '*': 27}

Create pad_seq function¶

In [5]:

def pad_seq(sequences, max_len, dic):
    seq_len, seq_indices = [], []
    for seq in sequences:
        seq_len.append(len(seq))
        seq_idx = [dic.get(char) for char in seq]
        seq_idx += (max_len - len(seq_idx)) * [dic.get('*')] # 27 is idx of meaningless token "*"
        seq_indices.append(seq_idx)
    return seq_len, seq_indices

Apply pad_seq function to data¶

In [6]:

max_length = 10
X_length, X_indices = pad_seq(sequences = words, max_len = max_length, dic = char_dic)

In [7]:

print(X_length)
print(np.shape(X_indices))

[4, 3, 7, 7, 9, 7]
(6, 10)

Define CharStackedRNN class¶

In [8]:

class CharStackedRNN:
    def __init__(self, X_length, X_indices, y, n_of_classes, dic, hidden_dims = [32, 16]):
        
        # data pipeline
        with tf.variable_scope('input_layer'):
            self._X_length = X_length
            self._X_indices = X_indices
            self._y = y
            self._keep_prob = tf.placeholder(dtype = tf.float32)
            
            one_hot = tf.eye(len(dic), dtype = tf.float32)
            self._one_hot = tf.get_variable(name='one_hot_embedding', initializer = one_hot,
                                            trainable = False) # embedding vector training 안할 것이기 때문
            self._X_batch = tf.nn.embedding_lookup(params = self._one_hot, ids = self._X_indices)
            
        # Stacked-RNN
        with tf.variable_scope('stacked_rnn'):
            
            cells = []
            for hidden_dim in hidden_dims:
                cell = tf.contrib.rnn.BasicRNNCell(num_units = hidden_dim, activation = tf.nn.tanh)
                cell = tf.contrib.rnn.DropoutWrapper(cell = cell, output_keep_prob = self._keep_prob)
                cells.append(cell)
            else:
                cells = tf.contrib.rnn.MultiRNNCell(cells = cells)
                
            _, state = tf.nn.dynamic_rnn(cell = cells, inputs = self._X_batch,
                                         sequence_length = self._X_length, dtype = tf.float32)
                        
        with tf.variable_scope('output_layer'):
            self._score = slim.fully_connected(inputs = state[-1], num_outputs = n_of_classes,
                                               activation_fn = None)
            
        with tf.variable_scope('loss'):
            self.ce_loss = tf.losses.softmax_cross_entropy(onehot_labels = self._y, logits = self._score)
            
        with tf.variable_scope('prediction'):
            self._prediction = tf.argmax(input = self._score, axis = -1, output_type = tf.int32)
    
    def predict(self, sess, X_length, X_indices, keep_prob = 1.):
        feed_prediction = {self._X_length : X_length, self._X_indices : X_indices, self._keep_prob : keep_prob}
        return sess.run(self._prediction, feed_dict = feed_prediction)

Create a model of CharStackedRNN¶

In [9]:

# hyper-parameter#
lr = .003
epochs = 10
batch_size = 2
total_step = int(np.shape(X_indices)[0] / batch_size)
print(total_step)

In [10]:

## create data pipeline with tf.data
tr_dataset = tf.data.Dataset.from_tensor_slices((X_length, X_indices, y))
tr_dataset = tr_dataset.shuffle(buffer_size = 20)
tr_dataset = tr_dataset.batch(batch_size = batch_size)
tr_iterator = tr_dataset.make_initializable_iterator()
print(tr_dataset)

<BatchDataset shapes: ((?,), (?, 10), (?, 2)), types: (tf.int32, tf.int32, tf.float32)>

In [11]:

X_length_mb, X_indices_mb, y_mb = tr_iterator.get_next()

In [12]:

char_stacked_rnn = CharStackedRNN(X_length = X_length_mb, X_indices = X_indices_mb, y = y_mb,
                                  n_of_classes = 2, dic = char_dic, hidden_dims = [32,16])

Creat training op and train model¶

In [13]:

## create training op
opt = tf.train.AdamOptimizer(learning_rate = lr)
training_op = opt.minimize(loss = char_stacked_rnn.ce_loss)

In [14]:

sess = tf.Session()
sess.run(tf.global_variables_initializer())

tr_loss_hist = []

for epoch in range(epochs):
    avg_tr_loss = 0
    tr_step = 0
    
    sess.run(tr_iterator.initializer)
    try:
        while True:
            _, tr_loss = sess.run(fetches = [training_op, char_stacked_rnn.ce_loss],
                                  feed_dict = {char_stacked_rnn._keep_prob : .5})
            avg_tr_loss += tr_loss
            tr_step += 1
            
    except tf.errors.OutOfRangeError:
        pass
    
    avg_tr_loss /= tr_step
    tr_loss_hist.append(avg_tr_loss)
    
    print('epoch : {:3}, tr_loss : {:.3f}'.format(epoch + 1, avg_tr_loss))

epoch :   1, tr_loss : 0.611
epoch :   2, tr_loss : 0.373
epoch :   3, tr_loss : 0.330
epoch :   4, tr_loss : 0.157
epoch :   5, tr_loss : 0.093
epoch :   6, tr_loss : 0.094
epoch :   7, tr_loss : 0.036
epoch :   8, tr_loss : 0.060
epoch :   9, tr_loss : 0.013
epoch :  10, tr_loss : 0.025

In [15]:

plt.plot(tr_loss_hist, label = 'train')

Out[15]:

[<matplotlib.lines.Line2D at 0x115c6e438>]

In [16]:

yhat = char_stacked_rnn.predict(sess = sess, X_length = X_length, X_indices = X_indices)

In [17]:

print('training acc: {:.2%}'.format(np.mean(yhat == np.argmax(y, axis = -1))))

training acc: 100.00%