Making a RNN learn Addition and Subtraction.¶

In [1]:

import numpy as np

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import TimeDistributed, Dense, Dropout, SimpleRNN, RepeatVector
from tensorflow.keras.callbacks import EarlyStopping, LambdaCallback

from termcolor import colored

Generating Data¶

In [2]:

all_chars = "1234567890+-"
num_features = len(all_chars)
num_features

Out[2]:

In [3]:

# Tokenizing the characters
char_to_idx = {c:i for i,c in enumerate(all_chars)}
idx_to_char = {i:c for i,c in enumerate(all_chars)}

In [4]:

print(char_to_idx)
print(idx_to_char)

{'1': 0, '2': 1, '3': 2, '4': 3, '5': 4, '6': 5, '7': 6, '8': 7, '9': 8, '0': 9, '+': 10, '-': 11}
{0: '1', 1: '2', 2: '3', 3: '4', 4: '5', 5: '6', 6: '7', 7: '8', 8: '9', 9: '0', 10: '+', 11: '-'}

In [5]:

def random_operation(num1, num2):
    """Applies random operation from `+, -` and return result and operator"""
    
    operator = np.random.choice(['+', '-'])
    result = 0
    if operator == '+':
        result = num1 + num2
    else:
        result = num1 - num2

    return result, operator

In [6]:

random_operation(5,5)

Out[6]:

(10, '+')

In [7]:

def generate_data():
    """Generates sample(str) - results(str) pair """
    first_num = np.random.randint(0,100)
    second_num = np.random.randint(0,100)
    res, opr = random_operation(first_num, second_num)
    sample = str(first_num) + opr + str(second_num)
    label = str(res)
    return (sample, label)

# Test
generate_data()

Out[7]:

('90-27', '63')

Creating the Model¶

In [8]:

hidden_units = 128
max_timesteps = 5

model = Sequential([
    # Encoder
    SimpleRNN(hidden_units, input_shape=(None, num_features)),
    RepeatVector(max_timesteps),
    # Decoder
    SimpleRNN(hidden_units, return_sequences=True),
    TimeDistributed(Dense(num_features, activation="softmax"))
])

model.compile('adam', loss='categorical_crossentropy', metrics=['accuracy'])

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
simple_rnn (SimpleRNN)       (None, 128)               18048     
_________________________________________________________________
repeat_vector (RepeatVector) (None, 5, 128)            0         
_________________________________________________________________
simple_rnn_1 (SimpleRNN)     (None, 5, 128)            32896     
_________________________________________________________________
time_distributed (TimeDistri (None, 5, 12)             1548      
=================================================================
Total params: 52,492
Trainable params: 52,492
Non-trainable params: 0
_________________________________________________________________

Vectorize and De-Vectorize Data¶

In [9]:

 def vectorize_sample(sample, label):
    x = np.zeros((max_timesteps, num_features))
    y = np.zeros((max_timesteps, num_features))
    
    diff_x = max_timesteps - len(sample)
    diff_y = max_timesteps - len(label)
    
    for i, c in enumerate(sample):
        x[diff_x + i, char_to_idx[c]] = 1
    for i in range(diff_x):
        x[i, char_to_idx['0']] = 1
    
    for i, c in enumerate(label):
        y[diff_y + i, char_to_idx[c]] = 1
    for i in range(diff_y):
        y[i, char_to_idx['0']] = 1
    
    return (x,y)

In [10]:

s1,l1 = generate_data()
print(s1 +' = ' + l1)
s1v, l1v = vectorize_sample(s1, l1)
# print(s1v)
# print(l1v)

78-26 = 52

In [11]:

def devectorize_sample(sample):
    sample_char_list = [idx_to_char[i] for i in np.argmax(sample, axis=1)]
    sample = ''.join(sample_char_list).lstrip('0')
    return sample

In [12]:

devectorize_sample(l1v)

Out[12]:

'52'

Creating Trainset¶

In [13]:

def create_dataset(num_sample=1000):
    x = np.zeros((num_sample, max_timesteps, num_features))
    y = np.zeros((num_sample, max_timesteps, num_features))
    
    for i in range(num_sample):
        s, l = generate_data()
        sv, lv = vectorize_sample(s,l)
        x[i] = sv
        y[i] = lv
    
    return x, y

In [14]:

x_train, y_train = create_dataset(10000)
print(x_train.shape, y_train.shape)

(10000, 5, 12) (10000, 5, 12)

In [15]:

devectorize_sample(x_train[0]), devectorize_sample(y_train[0])

Out[15]:

('4-9', '-5')

Training the Model¶

In [16]:

lb_cb = LambdaCallback(
    on_epoch_end=lambda e, l: print('->{:.2f}'.format(l['val_accuracy']), end='')
)
es_cb = EarlyStopping(monitor='val_loss', patience=5)

model.fit(x_train, y_train, epochs=100, batch_size=256, validation_split=0.1,
          callbacks=[lb_cb, es_cb], verbose=0)

->0.60->0.63->0.63->0.65->0.67->0.68->0.70->0.71->0.72->0.73->0.74->0.76->0.76->0.77->0.79->0.80->0.82->0.84->0.85->0.87->0.88->0.89->0.90->0.91->0.92->0.93->0.94->0.94->0.94->0.95->0.96->0.95->0.96->0.96->0.97->0.97->0.97->0.97->0.97->0.97->0.97->0.97->0.97->0.98->0.98->0.96->0.97->0.98->0.98->0.98->0.98->0.98->0.98->0.98->0.98->0.98->0.98->0.98->0.99->0.99->0.98->0.99->0.98->0.98->0.99->0.99->0.99->0.98->0.98->0.97->0.96

Out[16]:

<tensorflow.python.keras.callbacks.History at 0x7f57da6a4390>

In [19]:

x_test, y_test = create_dataset(10)
y_pred = model.predict(x_test)

for i, pred in enumerate(y_pred):
    y = devectorize_sample(y_test[i])
    y_hat = devectorize_sample(pred)
    col = 'green'
    if y!=y_hat:
        col = 'red'
    out = 'Sample: ' + devectorize_sample(x_test[i]) + ' Actual: ' + y + ' Predicted: ' + y_hat
    print(colored(out, col))

Sample: 89-19 Actual: 70 Predicted: 70
Sample: 3+32 Actual: 35 Predicted: 35
Sample: 54+74 Actual: 128 Predicted: 127
Sample: 55+72 Actual: 127 Predicted: 127
Sample: 9+93 Actual: 102 Predicted: 102
Sample: 76-39 Actual: 37 Predicted: 37
Sample: 56-59 Actual: -3 Predicted: -3
Sample: 37-29 Actual: 8 Predicted: 8
Sample: 78+85 Actual: 163 Predicted: 163
Sample: 52+86 Actual: 138 Predicted: 138

In [20]:

x_test, y_test = create_dataset(1000)
_, acc = model.evaluate(x_test, y_test, verbose=0)
print('Test Accuracy ::', acc)

Test Accuracy :: 0.9648000001907349