Simple example for Many to One Classification (word sentiment classification) by Stacked RNN with Drop out.
tf.data
padding technique
by user function (pad_seq)
tf.nn.embedding_lookup
for getting vector of tokens (eg. word, character)tf.contrib.rnn.DropoutWrapper
tf.contrib.rnn.MultiRNNCell
import os, sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import string
%matplotlib inline
slim = tf.contrib.slim
print(tf.__version__)
1.8.0
words = ['good', 'bad', 'amazing', 'so good', 'bull shit', 'awesome']
y = [[1.,0.], [0.,1.], [1.,0.], [1., 0.],[0.,1.], [1.,0.]]
# Character quantization
char_space = string.ascii_lowercase
char_space = char_space + ' ' + '*'
char_space
'abcdefghijklmnopqrstuvwxyz *'
char_dic = {char : idx for idx, char in enumerate(char_space)}
print(char_dic)
{'a': 0, 'b': 1, 'c': 2, 'd': 3, 'e': 4, 'f': 5, 'g': 6, 'h': 7, 'i': 8, 'j': 9, 'k': 10, 'l': 11, 'm': 12, 'n': 13, 'o': 14, 'p': 15, 'q': 16, 'r': 17, 's': 18, 't': 19, 'u': 20, 'v': 21, 'w': 22, 'x': 23, 'y': 24, 'z': 25, ' ': 26, '*': 27}
def pad_seq(sequences, max_len, dic):
seq_len, seq_indices = [], []
for seq in sequences:
seq_len.append(len(seq))
seq_idx = [dic.get(char) for char in seq]
seq_idx += (max_len - len(seq_idx)) * [dic.get('*')] # 27 is idx of meaningless token "*"
seq_indices.append(seq_idx)
return seq_len, seq_indices
max_length = 10
X_length, X_indices = pad_seq(sequences = words, max_len = max_length, dic = char_dic)
print(X_length)
print(np.shape(X_indices))
[4, 3, 7, 7, 9, 7] (6, 10)
class CharStackedRNN:
def __init__(self, X_length, X_indices, y, n_of_classes, dic, hidden_dims = [32, 16]):
# data pipeline
with tf.variable_scope('input_layer'):
self._X_length = X_length
self._X_indices = X_indices
self._y = y
self._keep_prob = tf.placeholder(dtype = tf.float32)
one_hot = tf.eye(len(dic), dtype = tf.float32)
self._one_hot = tf.get_variable(name='one_hot_embedding', initializer = one_hot,
trainable = False) # embedding vector training 안할 것이기 때문
self._X_batch = tf.nn.embedding_lookup(params = self._one_hot, ids = self._X_indices)
# Stacked-RNN
with tf.variable_scope('stacked_rnn'):
cells = []
for hidden_dim in hidden_dims:
cell = tf.contrib.rnn.BasicRNNCell(num_units = hidden_dim, activation = tf.nn.tanh)
cell = tf.contrib.rnn.DropoutWrapper(cell = cell, output_keep_prob = self._keep_prob)
cells.append(cell)
else:
cells = tf.contrib.rnn.MultiRNNCell(cells = cells)
_, state = tf.nn.dynamic_rnn(cell = cells, inputs = self._X_batch,
sequence_length = self._X_length, dtype = tf.float32)
with tf.variable_scope('output_layer'):
self._score = slim.fully_connected(inputs = state[-1], num_outputs = n_of_classes,
activation_fn = None)
with tf.variable_scope('loss'):
self.ce_loss = tf.losses.softmax_cross_entropy(onehot_labels = self._y, logits = self._score)
with tf.variable_scope('prediction'):
self._prediction = tf.argmax(input = self._score, axis = -1, output_type = tf.int32)
def predict(self, sess, X_length, X_indices, keep_prob = 1.):
feed_prediction = {self._X_length : X_length, self._X_indices : X_indices, self._keep_prob : keep_prob}
return sess.run(self._prediction, feed_dict = feed_prediction)
# hyper-parameter#
lr = .003
epochs = 10
batch_size = 2
total_step = int(np.shape(X_indices)[0] / batch_size)
print(total_step)
3
## create data pipeline with tf.data
tr_dataset = tf.data.Dataset.from_tensor_slices((X_length, X_indices, y))
tr_dataset = tr_dataset.shuffle(buffer_size = 20)
tr_dataset = tr_dataset.batch(batch_size = batch_size)
tr_iterator = tr_dataset.make_initializable_iterator()
print(tr_dataset)
<BatchDataset shapes: ((?,), (?, 10), (?, 2)), types: (tf.int32, tf.int32, tf.float32)>
X_length_mb, X_indices_mb, y_mb = tr_iterator.get_next()
char_stacked_rnn = CharStackedRNN(X_length = X_length_mb, X_indices = X_indices_mb, y = y_mb,
n_of_classes = 2, dic = char_dic, hidden_dims = [32,16])
## create training op
opt = tf.train.AdamOptimizer(learning_rate = lr)
training_op = opt.minimize(loss = char_stacked_rnn.ce_loss)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
tr_loss_hist = []
for epoch in range(epochs):
avg_tr_loss = 0
tr_step = 0
sess.run(tr_iterator.initializer)
try:
while True:
_, tr_loss = sess.run(fetches = [training_op, char_stacked_rnn.ce_loss],
feed_dict = {char_stacked_rnn._keep_prob : .5})
avg_tr_loss += tr_loss
tr_step += 1
except tf.errors.OutOfRangeError:
pass
avg_tr_loss /= tr_step
tr_loss_hist.append(avg_tr_loss)
print('epoch : {:3}, tr_loss : {:.3f}'.format(epoch + 1, avg_tr_loss))
epoch : 1, tr_loss : 0.611 epoch : 2, tr_loss : 0.373 epoch : 3, tr_loss : 0.330 epoch : 4, tr_loss : 0.157 epoch : 5, tr_loss : 0.093 epoch : 6, tr_loss : 0.094 epoch : 7, tr_loss : 0.036 epoch : 8, tr_loss : 0.060 epoch : 9, tr_loss : 0.013 epoch : 10, tr_loss : 0.025
plt.plot(tr_loss_hist, label = 'train')
[<matplotlib.lines.Line2D at 0x115c6e438>]
yhat = char_stacked_rnn.predict(sess = sess, X_length = X_length, X_indices = X_indices)
print('training acc: {:.2%}'.format(np.mean(yhat == np.argmax(y, axis = -1))))
training acc: 100.00%