How can we use the RNN technology to classify lastnames by nationality?
Let's consider the word 'Dolores', our program will read letter by letter and make a classification based on the previous subwords:
D
(-2.13) Korean (-2.47) Vietnamese (-2.52) English
Do
(-0.44) Vietnamese (-2.51) Korean (-2.87) Portuguese
Dol
(-1.21) Vietnamese (-1.69) Korean (-1.73) Chinese
Dolo
(-1.22) Spanish (-1.27) Italian (-2.17) Portuguese
Dolor
(-1.45) French (-1.69) German (-1.76) English
Dolore
(-0.80) French (-2.28) Italian (-2.45) Irish
Dolores
(-0.33) Portuguese (-1.76) Spanish (-3.56) Greek
Our program has the following architecture.
from IPython.display import Image
Image("nn.png")
import torch.nn as nn #How does the diagram translates into code?
class RNN(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(RNN, self).__init__()
self.hidden_size = hidden_size
self.i2h = nn.Linear(input_size + hidden_size, hidden_size)
self.i2o = nn.Linear(input_size + hidden_size, output_size)
self.softmax = nn.LogSoftmax(dim=1)
def forward(self, input, hidden):
combined = torch.cat((input, hidden), 1)
hidden = self.i2h(combined)
output = self.i2o(combined)
output = self.softmax(output)
return output, hidden
def initHidden(self):
return torch.zeros(1, self.hidden_size)
n_hidden = 128
rnn = RNN(n_letters, n_hidden, n_categories)
The code is at the end of the notebook.
The gate controllers can look at the long-term state as well (Felix Gers and Jurgen Schmidhuber, 2000).
from IPython.display import Image
Image("peepholes.png")
Coupled forget and input gates.
If the gate controller inputs a 1, the forget gate is open and the input gate is closed.
If the gate controller inputs a 0, the forget gate is closed and the input gate is open.
Whenever a memory must be stored, the location where it will be stored is erased first.
from IPython.display import Image
Image("tied.png")
Gated Recurrent Unit (Kyunghyun Cho, 2014). Both state vectors are merged into a single vector $h_t$.
from IPython.display import Image
Image("GRU.png")
from __future__ import unicode_literals, print_function, division
from io import open
import glob
def findFiles(path): return glob.glob(path)
print(findFiles('data/names/*.txt'))
import unicodedata
import string
all_letters = string.ascii_letters + " .,;'"
n_letters = len(all_letters)
# Turn a Unicode string to plain ASCII, thanks to http://stackoverflow.com/a/518232/2809427
def unicodeToAscii(s):
return ''.join(
c for c in unicodedata.normalize('NFD', s)
if unicodedata.category(c) != 'Mn'
and c in all_letters
)
#print(unicodeToAscii('Ślusàrski'))
# Build the category_lines dictionary, a list of names per language
category_lines = {}
all_categories = []
# Read a file and split into lines
def readLines(filename):
lines = open(filename, encoding='utf-8').read().strip().split('\n')
return [unicodeToAscii(line) for line in lines]
for filename in findFiles('data/names/*.txt'):
category = filename.split('\\')[-1].split('.')[0]
all_categories.append(category)
lines = readLines(filename)
category_lines[category] = lines
n_categories = len(all_categories)
['data/names\\Arabic.txt', 'data/names\\Chinese.txt', 'data/names\\Czech.txt', 'data/names\\Dutch.txt', 'data/names\\English.txt', 'data/names\\French.txt', 'data/names\\German.txt', 'data/names\\Greek.txt', 'data/names\\Irish.txt', 'data/names\\Italian.txt', 'data/names\\Japanese.txt', 'data/names\\Korean.txt', 'data/names\\Polish.txt', 'data/names\\Portuguese.txt', 'data/names\\Russian.txt', 'data/names\\Scottish.txt', 'data/names\\Spanish.txt', 'data/names\\Vietnamese.txt']
import torch
# Find letter index from all_letters, e.g. "a" = 0
def letterToIndex(letter):
return all_letters.find(letter)
# Just for demonstration, turn a letter into a <1 x n_letters> Tensor
def letterToTensor(letter):
tensor = torch.zeros(1, n_letters)
tensor[0][letterToIndex(letter)] = 1
return tensor
# Turn a line into a <line_length x 1 x n_letters>,
# or an array of one-hot letter vectors
def lineToTensor(line):
tensor = torch.zeros(len(line), 1, n_letters)
for li, letter in enumerate(line):
tensor[li][0][letterToIndex(letter)] = 1
return tensor
print(letterToTensor('e'))
print(lineToTensor('Eric').size())
tensor([[ 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]) torch.Size([4, 1, 57])
import torch.nn as nn
class RNN(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(RNN, self).__init__()
self.hidden_size = hidden_size
self.i2h = nn.Linear(input_size + hidden_size, hidden_size)
self.i2o = nn.Linear(input_size + hidden_size, output_size)
self.softmax = nn.LogSoftmax(dim=1)
def forward(self, input, hidden):
combined = torch.cat((input, hidden), 1)
hidden = self.i2h(combined)
output = self.i2o(combined)
output = self.softmax(output)
return output, hidden
def initHidden(self):
return torch.zeros(1, self.hidden_size)
n_hidden = 128
rnn = RNN(n_letters, n_hidden, n_categories)
input = letterToTensor('A')
hidden =torch.zeros(1, n_hidden)
output, next_hidden = rnn(input, hidden)
input = lineToTensor('Albert')
hidden = torch.zeros(1, n_hidden)
output, next_hidden = rnn(input[0], hidden)
print(output)
tensor([[-2.8621, -2.8841, -2.8587, -2.8835, -2.9150, -2.8640, -2.9189, -2.9160, -2.7778, -2.8854, -2.8547, -2.9620, -2.9186, -2.8763, -2.8906, -2.9999, -2.9841, -2.8015]])
def categoryFromOutput(output):
top_n, top_i = output.topk(1)
category_i = top_i[0].item()
return all_categories[category_i], category_i
print(categoryFromOutput(output))
('Irish', 8)
import random
def randomChoice(l):
return l[random.randint(0, len(l) - 1)]
def randomTrainingExample():
category = randomChoice(all_categories)
line = randomChoice(category_lines[category])
category_tensor = torch.tensor([all_categories.index(category)], dtype=torch.long)
line_tensor = lineToTensor(line)
return category, line, category_tensor, line_tensor
for i in range(10):
category, line, category_tensor, line_tensor = randomTrainingExample()
print('category =', category, '/ line =', line)
category = Italian / line = Muraro category = Scottish / line = Martin category = German / line = Schafer category = Portuguese / line = Souza category = Portuguese / line = Ventura category = Italian / line = Caito category = French / line = Lesauvage category = Russian / line = Aboyantsev category = Italian / line = Borghi category = Greek / line = Grammatakakis
criterion = nn.NLLLoss() #We use negative loss likelihood for classification problems
learning_rate = 0.005 # If you set this too high, it might explode. If too low, it might not learn
def train(category_tensor, line_tensor):
hidden = rnn.initHidden()
rnn.zero_grad()
for i in range(line_tensor.size()[0]):
output, hidden = rnn(line_tensor[i], hidden)
loss = criterion(output, category_tensor)
loss.backward()
# Add parameters' gradients to their values, multiplied by learning rate
for p in rnn.parameters():
p.data.add_(-learning_rate, p.grad.data)
return output, loss.item()
import time
import math
n_iters = 100000
print_every = 5000
plot_every = 1000
# Keep track of losses for plotting
current_loss = 0
all_losses = []
def timeSince(since):
now = time.time()
s = now - since
m = math.floor(s / 60)
s -= m * 60
return '%dm %ds' % (m, s)
start = time.time()
for iter in range(1, n_iters + 1):
category, line, category_tensor, line_tensor = randomTrainingExample()
output, loss = train(category_tensor, line_tensor)
current_loss += loss
# Print iter number, loss, name and guess
if iter % print_every == 0:
guess, guess_i = categoryFromOutput(output)
correct = '✓' if guess == category else '✗ (%s)' % category
print('%d %d%% (%s) %.4f %s / %s %s' % (iter, iter / n_iters * 100, timeSince(start), loss, line, guess, correct))
# Add current loss avg to list of losses
if iter % plot_every == 0:
all_losses.append(current_loss / plot_every)
current_loss = 0
5000 5% (0m 16s) 2.0365 Ziemniak / Polish ✓ 10000 10% (0m 30s) 2.0735 Aiello / Spanish ✗ (Italian) 15000 15% (0m 42s) 0.8022 Shon / Korean ✓ 20000 20% (0m 55s) 0.6860 Xing / Chinese ✓ 25000 25% (1m 8s) 2.4584 Brant / Scottish ✗ (German) 30000 30% (1m 22s) 0.2492 Shimada / Japanese ✓ 35000 35% (1m 35s) 2.0576 Zogby / Irish ✗ (Arabic) 40000 40% (1m 46s) 0.7605 Suh / Korean ✓ 45000 45% (2m 0s) 0.4057 Almeida / Portuguese ✓ 50000 50% (2m 12s) 1.3397 Santos / Portuguese ✓ 55000 55% (2m 25s) 1.9105 Waldfogel / English ✗ (German) 60000 60% (2m 37s) 2.3271 Solberg / French ✗ (German) 65000 65% (2m 51s) 2.6778 Santana / Italian ✗ (Portuguese) 70000 70% (3m 4s) 2.7850 Sokolofsky / Russian ✗ (Polish) 75000 75% (3m 18s) 0.0339 Napoleoni / Italian ✓ 80000 80% (3m 31s) 0.8089 Ibarra / Spanish ✓ 85000 85% (3m 45s) 0.0427 O'Keefe / Irish ✓ 90000 90% (3m 57s) 2.4093 Major / Arabic ✗ (English) 95000 95% (4m 10s) 1.0790 Hill / Scottish ✓ 100000 100% (4m 23s) 2.0619 Lovey / French ✗ (English)
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
plt.figure()
plt.plot(all_losses)
[<matplotlib.lines.Line2D at 0x1dc7a4c8f98>]
# Keep track of correct guesses in a confusion matrix
confusion = torch.zeros(n_categories, n_categories)
n_confusion = 10000
# Just return an output given a line
def evaluate(line_tensor):
hidden = rnn.initHidden()
for i in range(line_tensor.size()[0]):
output, hidden = rnn(line_tensor[i], hidden)
return output
# Go through a bunch of examples and record which are correctly guessed
for i in range(n_confusion):
category, line, category_tensor, line_tensor = randomTrainingExample()
output = evaluate(line_tensor)
guess, guess_i = categoryFromOutput(output)
category_i = all_categories.index(category)
confusion[category_i][guess_i] += 1
# Normalize by dividing every row by its sum
for i in range(n_categories):
confusion[i] = confusion[i] / confusion[i].sum()
# Set up plot
fig = plt.figure()
ax = fig.add_subplot(111)
cax = ax.matshow(confusion.numpy())
fig.colorbar(cax)
# Set up axes
ax.set_xticklabels([''] + all_categories, rotation=90)
ax.set_yticklabels([''] + all_categories)
# Force label at every tick
ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
ax.yaxis.set_major_locator(ticker.MultipleLocator(1))
# sphinx_gallery_thumbnail_number = 2
plt.show()
def predict(input_line, n_predictions=3):
print('\n> %s' % input_line)
with torch.no_grad():
output = evaluate(lineToTensor(input_line))
# Get top N categories
topv, topi = output.topk(n_predictions, 1, True)
predictions = []
for i in range(n_predictions):
value = topv[0][i].item()
category_index = topi[0][i].item()
print('(%.2f) %s' % (value, all_categories[category_index]))
predictions.append([value, all_categories[category_index]])
predict('Dovesky')
predict('Jackson')
predict('Satoshi')
> Dovesky (-0.65) Russian (-1.07) Czech (-2.37) English > Jackson (-0.61) Scottish (-1.36) English (-2.52) Russian > Satoshi (-1.19) Arabic (-1.58) Japanese (-1.94) Polish
predict('Dolores')
> Dolores (-0.24) Portuguese (-1.84) Spanish (-4.11) Dutch
Can we increase the deepness of the hidden layer with two more inner layers, each one followed by an activation function?
If so, modify the code accordingly.
Pytorch Example:Classifying Names with a Character-Level RNN https://pytorch.org/tutorials/intermediate/char_rnn_classification_tutorial.html