Simple example for Many to One Classification (word sentiment classification) by Recurrent Neural Networks.
tf.data
padding technique
by tf.keras.preprocessing.sequence.pad_sequences
tf.nn.embedding_lookup
for getting vector of tokens (eg. word, character)from __future__ import absolute_import, division, print_function
import os, sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow import keras
import string
%matplotlib inline
print(tf.__version__)
tf.enable_eager_execution()
1.12.0
words = ['good', 'bad', 'amazing', 'so good', 'bull shit', 'awesome']
y = [[1.,0.], [0.,1.], [1.,0.], [1., 0.],[0.,1.], [1.,0.]]
# Character quantization
char_space = string.ascii_lowercase
char_space = char_space + ' ' + '*'
char_space = list(char_space)
char_space.insert(0, '<pad>')
print(char_space)
['<pad>', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', ' ', '*']
char2idx = {char : idx for idx, char in enumerate(char_space)}
print(char2idx)
{'<pad>': 0, 'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6, 'g': 7, 'h': 8, 'i': 9, 'j': 10, 'k': 11, 'l': 12, 'm': 13, 'n': 14, 'o': 15, 'p': 16, 'q': 17, 'r': 18, 's': 19, 't': 20, 'u': 21, 'v': 22, 'w': 23, 'x': 24, 'y': 25, 'z': 26, ' ': 27, '*': 28}
words = list(map(lambda word : [char2idx.get(char) for char in word],words))
max_length = 10
X_length = list(map(lambda word : len(word), words))
X_indices = pad_sequences(sequences=words, maxlen=max_length, padding='post', truncating='post')
print(X_length)
print(np.shape(X_indices))
[4, 3, 7, 7, 9, 7] (6, 10)
class CharRNN(keras.Model):
def __init__(self, num_classes, hidden_dim, max_length, dic):
super(CharRNN, self).__init__()
self.look_up = keras.layers.Embedding(input_dim=len(dic), output_dim=len(dic),
trainable=False, mask_zero=True, input_length=max_length,
embeddings_initializer=keras.initializers.Constant(np.eye(len(dic))))
self.rnn_cell = keras.layers.SimpleRNN(units=hidden_dim, return_sequences=True,
return_state=True)
self.dense = keras.layers.Dense(units=num_classes)
def call(self, inputs):
token_representation = self.look_up(inputs)
_, final_h = self.rnn_cell(token_representation)
score = self.dense(final_h)
return score
# hyper-parameter#
lr = .003
epochs = 10
batch_size = 2
total_step = int(np.shape(X_indices)[0] / batch_size)
print(total_step)
3
## create data pipeline with tf.data
tr_dataset = tf.data.Dataset.from_tensor_slices((X_indices, y))
tr_dataset = tr_dataset.shuffle(buffer_size = 20)
tr_dataset = tr_dataset.batch(batch_size = batch_size)
print(tr_dataset)
<BatchDataset shapes: ((?, 10), (?, 2)), types: (tf.int32, tf.float32)>
char_rnn = CharRNN(num_classes=2, hidden_dim=16, dic=char2idx, max_length=10)
def loss_fn(model, x, y):
return tf.losses.softmax_cross_entropy(onehot_labels=y, logits=model(x))
opt = tf.train.AdamOptimizer(learning_rate=lr)
tr_loss_hist = []
for epoch in range(epochs):
avg_tr_loss = 0
tr_step = 0
for x_mb, y_mb in tr_dataset:
with tf.GradientTape() as tape:
tr_loss = loss_fn(char_rnn, x=x_mb, y=y_mb)
grads = tape.gradient(target=tr_loss, sources=char_rnn.variables)
opt.apply_gradients(grads_and_vars=zip(grads, char_rnn.variables))
avg_tr_loss += tr_loss
tr_step += 1
else:
avg_tr_loss /= tr_step
tr_loss_hist.append(avg_tr_loss)
print('epoch : {:3}, tr_loss : {:.3f}'.format(epoch + 1, avg_tr_loss))
epoch : 1, tr_loss : 0.739 epoch : 2, tr_loss : 0.649 epoch : 3, tr_loss : 0.546 epoch : 4, tr_loss : 0.444 epoch : 5, tr_loss : 0.367 epoch : 6, tr_loss : 0.272 epoch : 7, tr_loss : 0.215 epoch : 8, tr_loss : 0.156 epoch : 9, tr_loss : 0.113 epoch : 10, tr_loss : 0.088
plt.plot(tr_loss_hist, label = 'train')
[<matplotlib.lines.Line2D at 0x7f3de0704b38>]
yhat = np.argmax(char_rnn(inputs=tf.convert_to_tensor(X_indices)), axis=-1)
print('acc : {:.2%}'.format(np.mean(yhat == np.argmax(y, axis=-1))))
acc : 100.00%