Simple example for Many to One Classification (word sentiment classification) by Stacked LSTM with Drop out.
tf.data
padding technique
by user function (pad_seq)
tf.nn.embedding_lookup
for getting vector of tokens (eg. word, character)tf.contrib.rnn.DropoutWrapper
tf.contrib.rnn.MultiRNNCell
import os, sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import string
%matplotlib inline
slim = tf.contrib.slim
print(tf.__version__)
1.8.0
words = ['good', 'bad', 'amazing', 'so good', 'bull shit', 'awesome']
y = [[1.,0.], [0.,1.], [1.,0.], [1., 0.],[0.,1.], [1.,0.]]
# Character quantization
char_space = string.ascii_lowercase
char_space = char_space + ' ' + '*'
char_space
'abcdefghijklmnopqrstuvwxyz *'
char_dic = {char : idx for idx, char in enumerate(char_space)}
print(char_dic)
{'a': 0, 'b': 1, 'c': 2, 'd': 3, 'e': 4, 'f': 5, 'g': 6, 'h': 7, 'i': 8, 'j': 9, 'k': 10, 'l': 11, 'm': 12, 'n': 13, 'o': 14, 'p': 15, 'q': 16, 'r': 17, 's': 18, 't': 19, 'u': 20, 'v': 21, 'w': 22, 'x': 23, 'y': 24, 'z': 25, ' ': 26, '*': 27}
def pad_seq(sequences, max_len, dic):
seq_len, seq_indices = [], []
for seq in sequences:
seq_len.append(len(seq))
seq_idx = [dic.get(char) for char in seq]
seq_idx += (max_len - len(seq_idx)) * [dic.get('*')] # 27 is idx of meaningless token "*"
seq_indices.append(seq_idx)
return seq_len, seq_indices
max_length = 10
X_length, X_indices = pad_seq(sequences = words, max_len = max_length, dic = char_dic)
print(X_length)
print(np.shape(X_indices))
[4, 3, 7, 7, 9, 7] (6, 10)
class CharStackedLSTM:
def __init__(self, X_length, X_indices, y, n_of_classes, dic, hidden_dims = [32, 16]):
# data pipeline
with tf.variable_scope('input_layer'):
self._X_length = X_length
self._X_indices = X_indices
self._y = y
self._keep_prob = tf.placeholder(dtype = tf.float32)
one_hot = tf.eye(len(dic), dtype = tf.float32)
self._one_hot = tf.get_variable(name='one_hot_embedding', initializer = one_hot,
trainable = False) # embedding vector training 안할 것이기 때문
self._X_batch = tf.nn.embedding_lookup(params = self._one_hot, ids = self._X_indices)
# Stacked-LSTM
with tf.variable_scope('stacked_lstm'):
cells = []
for hidden_dim in hidden_dims:
cell = tf.contrib.rnn.BasicLSTMCell(num_units = hidden_dim, activation = tf.nn.tanh)
cell = tf.contrib.rnn.DropoutWrapper(cell = cell, output_keep_prob = self._keep_prob)
cells.append(cell)
else:
cells = tf.contrib.rnn.MultiRNNCell(cells = cells)
_, states = tf.nn.dynamic_rnn(cell = cells, inputs = self._X_batch,
sequence_length = self._X_length, dtype = tf.float32)
with tf.variable_scope('output_layer'):
self._score = slim.fully_connected(inputs = states[-1].h, num_outputs = n_of_classes,
activation_fn = None)
with tf.variable_scope('loss'):
self.ce_loss = tf.losses.softmax_cross_entropy(onehot_labels = self._y, logits = self._score)
with tf.variable_scope('prediction'):
self._prediction = tf.argmax(input = self._score, axis = -1, output_type = tf.int32)
def predict(self, sess, X_length, X_indices, keep_prob = 1.):
feed_prediction = {self._X_length : X_length, self._X_indices : X_indices, self._keep_prob : keep_prob}
return sess.run(self._prediction, feed_dict = feed_prediction)
# hyper-parameter#
lr = .003
epochs = 10
batch_size = 2
total_step = int(np.shape(X_indices)[0] / batch_size)
print(total_step)
3
## create data pipeline with tf.data
tr_dataset = tf.data.Dataset.from_tensor_slices((X_length, X_indices, y))
tr_dataset = tr_dataset.shuffle(buffer_size = 20)
tr_dataset = tr_dataset.batch(batch_size = batch_size)
tr_iterator = tr_dataset.make_initializable_iterator()
print(tr_dataset)
<BatchDataset shapes: ((?,), (?, 10), (?, 2)), types: (tf.int32, tf.int32, tf.float32)>
X_length_mb, X_indices_mb, y_mb = tr_iterator.get_next()
char_stacked_lstm = CharStackedLSTM(X_length = X_length_mb, X_indices = X_indices_mb, y = y_mb,
n_of_classes = 2, dic = char_dic, hidden_dims = [32,16])
## create training op
opt = tf.train.AdamOptimizer(learning_rate = lr)
training_op = opt.minimize(loss = char_stacked_lstm.ce_loss)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
tr_loss_hist = []
for epoch in range(epochs):
avg_tr_loss = 0
tr_step = 0
sess.run(tr_iterator.initializer)
try:
while True:
_, tr_loss = sess.run(fetches = [training_op, char_stacked_lstm.ce_loss],
feed_dict = {char_stacked_lstm._keep_prob : .5})
avg_tr_loss += tr_loss
tr_step += 1
except tf.errors.OutOfRangeError:
pass
avg_tr_loss /= tr_step
tr_loss_hist.append(avg_tr_loss)
print('epoch : {:3}, tr_loss : {:.3f}'.format(epoch + 1, avg_tr_loss))
epoch : 1, tr_loss : 0.692 epoch : 2, tr_loss : 0.665 epoch : 3, tr_loss : 0.638 epoch : 4, tr_loss : 0.610 epoch : 5, tr_loss : 0.572 epoch : 6, tr_loss : 0.503 epoch : 7, tr_loss : 0.439 epoch : 8, tr_loss : 0.338 epoch : 9, tr_loss : 0.282 epoch : 10, tr_loss : 0.229
plt.plot(tr_loss_hist, label = 'train')
[<matplotlib.lines.Line2D at 0x117ea0048>]
yhat = char_stacked_lstm.predict(sess = sess, X_length = X_length, X_indices = X_indices)
print('training acc: {:.2%}'.format(np.mean(yhat == np.argmax(y, axis = -1))))
training acc: 83.33%