Pytorch / TorchText
RNN network를 사용한 Sentiment Analysis 예제
In this tutorial, We will use :
위의 방식을 사용함으로써 정확도를 ~84% 까지 향상시킬 수 있습니다.
2021/03/13 Happy-jihye
Reference : pytorch-sentiment-analysis/2 - Updated Sentiment Analysis
!apt install python3.7
!pip install -U torchtext==0.6.0
%%capture
!python -m spacy download en
import torch
from torchtext import data
TEXT = data.Field(tokenize = 'spacy',
tokenizer_language = 'en',
include_lengths = True)
LABEL = data.LabelField(dtype = torch.float) # pos -> 1 / neg -> 0
from torchtext import datasets
train_data, test_data = datasets.IMDB.splits(TEXT, LABEL)
import random
SEED = 1234
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True
train_data, valid_data = train_data.split(random_state = random.seed(SEED))
print(f'training examples 수 : {len(train_data)}')
print(f'validations examples 수 : {len(valid_data)}')
print(f'testing examples 수 : {len(test_data)}')
training examples 수 : 17500 validations examples 수 : 7500 testing examples 수 : 25000
MAX_VOCAB_SIZE = 25_000
TEXT.build_vocab(train_data,
max_size = MAX_VOCAB_SIZE,
vectors = "glove.6B.100d",
unk_init = torch.Tensor.normal_)
LABEL.build_vocab(train_data)
print(f"Unique tokens in TEXT vocabulary: {len(TEXT.vocab)}")
print(f"Unique tokens in LABEL vocabulary: {len(LABEL.vocab)}")
Unique tokens in TEXT vocabulary: 25002 Unique tokens in LABEL vocabulary: 2
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
BATCH_SIZE = 64
train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
(train_data, valid_data, test_data),
batch_size = BATCH_SIZE,
sort_within_batch = True,
device = device
)
# iterator 출력
for batch in train_iterator:
print(batch.text[0].shape)
#sentence
print(batch.text[0])
#sentence lengths
print(batch.text[1])
print(batch.text[1].shape)
# 첫 번째 batch만 출력
break
torch.Size([133, 64]) tensor([[ 806, 66, 52, ..., 66, 149, 3190], [ 7, 22, 15, ..., 9, 1716, 10449], [ 42, 18, 5, ..., 5, 2898, 9], ..., [ 6985, 293, 7, ..., 4, 4, 39], [ 0, 4423, 727, ..., 1, 1, 1], [ 30, 4, 4, ..., 1, 1, 1]], device='cuda:0') tensor([133, 133, 133, 133, 133, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131], device='cuda:0') torch.Size([64])
import torch.nn as nn
class RNN(nn.Module):
def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim, n_layers, bidirectional, dropout, pad_idx):
super().__init__()
self.embedding = nn.Embedding(input_dim, embedding_dim, padding_idx = pad_idx)
self.rnn = nn.LSTM(embedding_dim,
hidden_dim,
num_layers = n_layers,
bidirectional = bidirectional,
dropout = dropout)
self.bidirectional = bidirectional
if bidirectional :
self.fc = nn.Linear(hidden_dim * 2, output_dim)
else:
self.fc = nn.Linear(hidden_dim , output_dim)
self.dropout = nn.Dropout(dropout)
def forward(self, text, text_lengths):
# text = [sentence length, batch size]
embedded = self.dropout(self.embedding(text))
# embedded = [sentence length, batch size, embedding dim]
# pack sequence
packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths.to('cpu'))
# LSTM module을 지나고 나면 hidden state와 cell state가 나옴
packed_output, (hidden, cell) = self.rnn(packed_embedded)
# unpack sequence
output, output_lengths = nn.utils.rnn.pad_packed_sequence(packed_output)
# output = [sentence length, batch size, hidden dim * num directions]
# hidden = [num layers * num directions, batch size, hidden dim]
# cell = [num layers * num directions, batch size, hidden dim]
# 최종적으로 hidden state는 final(top) layer만 남겨두기!
if self.bidirectional :
hidden = self.dropout(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim = 1))
else :
hidden = self.dropout(hidden[-1,:,:])
# hidden = [batch size, hidden dim * num directions]
return self.fc(hidden)
INPUT_DIM = len(TEXT.vocab) #25,002
EMBEDDING_DIM = 100
HIDDEN_DIM = 256
OUTPUT_DIM = 1
N_LAYERS = 2
BIDIRECTIONAL = True
DROPOUT = 0.5
PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token] # = 1 (<pad> token의 index)
model = RNN(INPUT_DIM,
EMBEDDING_DIM,
HIDDEN_DIM,
OUTPUT_DIM,
N_LAYERS,
BIDIRECTIONAL,
DROPOUT,
PAD_IDX)
def count_parameters(model):
return sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f'The model has {count_parameters(model):,} trainable parameters')
The model has 4,810,857 trainable parameters
pretrained_embeddings = TEXT.vocab.vectors
print(pretrained_embeddings.shape)
torch.Size([25002, 100])
model.embedding.weight.data.copy_(pretrained_embeddings)
tensor([[-0.1117, -0.4966, 0.1631, ..., 1.2647, -0.2753, -0.1325], [-0.8555, -0.7208, 1.3755, ..., 0.0825, -1.1314, 0.3997], [-0.0382, -0.2449, 0.7281, ..., -0.1459, 0.8278, 0.2706], ..., [ 0.6018, -0.6654, 0.4103, ..., 0.1696, 0.6404, -0.2471], [ 0.0694, 0.5585, 0.2600, ..., 0.5310, -0.4120, 0.5282], [-0.6065, 0.1395, 0.1336, ..., 0.9524, -0.8642, 0.2109]])
# PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token] : 1
UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token] #0
model.embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
model.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)
print(model.embedding.weight.data)
tensor([[ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000], [ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000], [-0.0382, -0.2449, 0.7281, ..., -0.1459, 0.8278, 0.2706], ..., [ 0.6018, -0.6654, 0.4103, ..., 0.1696, 0.6404, -0.2471], [ 0.0694, 0.5585, 0.2600, ..., 0.5310, -0.4120, 0.5282], [-0.6065, 0.1395, 0.1336, ..., 0.9524, -0.8642, 0.2109]])
import torch.optim as optim
optimizer =optim.Adam(model.parameters())
criterion = nn.BCEWithLogitsLoss()
# GPU
model = model.to(device)
criterion = criterion.to(device)
accuracy function
def binary_accuracy(preds, y):
rounded_preds = torch.round(torch.sigmoid(preds))
# rounded_preds : [batch size]
# y : batch.label
correct = (rounded_preds == y).float()
acc = correct.sum() / len(correct)
return acc
def train(model, iterator, optimizer, criterion):
epoch_loss = 0
epoch_acc = 0
model.train()
for batch in iterator:
# 모든 batch마다 gradient를 0으로 초기화
optimizer.zero_grad()
# packed padded sequences 를 했으므로 batch.text에는 text정보와 text_lengths의 정보가 있습니다.
text, text_lengths = batch.text
# batch of sentences인 batch.text를 model에 입력 (저절로 forward가 됨)
# predictions의 크기가 [batch size, 1]이므로 squeeze해서 [batch size]로 size를 변경해줘야 함
predictions = model(text, text_lengths).squeeze(1)
# prediction결과와 batch.label을 비교하여 loss값 계산
loss = criterion(predictions, batch.label)
# 정확도 계산
acc = binary_accuracy(predictions, batch.label)
# backward()를 사용하여 역전파 수행
loss.backward()
# 최적화 알고리즘을 사용하여 parameter를 update
optimizer.step()
epoch_loss += loss.item()
epoch_acc += acc.item()
return epoch_loss / len(iterator), epoch_acc / len(iterator)
def evaluate(model, iterator, criterion):
epoch_loss = 0
epoch_acc = 0
# "evaluation mode" : dropout이나 batch nomalizaation을 끔
model.eval()
# pytorch에서 gradient가 계산되지 않도록 해서 memory를 적게 쓰고 computation 속도를 높임
with torch.no_grad():
for batch in iterator :
text, text_lengths = batch.text
predictions = model(text, text_lengths).squeeze(1)
loss = criterion(predictions, batch.label)
acc = binary_accuracy(predictions, batch.label)
epoch_loss += loss.item()
epoch_acc += acc.item()
return epoch_loss / len(iterator), epoch_acc / len(iterator)
import time
def epoch_time(start_time, end_time):
elapsed_time = end_time - start_time
elapsed_mins = int(elapsed_time / 60)
elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
return elapsed_mins, elapsed_secs
N_EPOCHS = 5
best_valid_loss = float('inf')
for epoch in range(N_EPOCHS):
start_time = time.time()
train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)
end_time = time.time()
epoch_mins, epoch_secs = epoch_time(start_time, end_time)
if valid_loss < best_valid_loss:
best_valid_loss = valid_loss
torch.save(model.state_dict(), 'tut2-model1.pt')
print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
print(f'\t Val. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc*100:.2f}%')
Epoch: 01 | Epoch Time: 0m 39s Train Loss: 0.653 | Train Acc: 61.07% Val. Loss: 0.509 | Val. Acc: 76.11% Epoch: 02 | Epoch Time: 0m 39s Train Loss: 0.557 | Train Acc: 71.73% Val. Loss: 0.438 | Val. Acc: 82.97% Epoch: 03 | Epoch Time: 0m 39s Train Loss: 0.422 | Train Acc: 81.44% Val. Loss: 0.372 | Val. Acc: 84.18% Epoch: 04 | Epoch Time: 0m 39s Train Loss: 0.350 | Train Acc: 85.13% Val. Loss: 0.338 | Val. Acc: 84.69% Epoch: 05 | Epoch Time: 0m 39s Train Loss: 0.296 | Train Acc: 88.10% Val. Loss: 0.332 | Val. Acc: 87.54%
model.load_state_dict(torch.load('tut2-model1.pt'))
test_loss, test_acc = evaluate(model, test_iterator, criterion)
print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')
Test Loss: 0.343 | Test Acc: 87.03%
import torch
model.load_state_dict(torch.load('tut2-model1.pt'))
<All keys matched successfully>
import spacy
nlp = spacy.load('en')
def predict_sentiment(model, sentence):
model.eval()
tokenized = [tok.text for tok in nlp.tokenizer(sentence)]
indexed = [TEXT.vocab.stoi[t] for t in tokenized]
length = [len(indexed)]
tensor = torch.LongTensor(indexed).to(device)
tensor = tensor.unsqueeze(1)
length_tensor = torch.LongTensor(length)
prediction = torch.sigmoid(model(tensor, length_tensor))
return prediction.item()
def predict_test_sentiment(model, tokenized):
model.eval()
indexed = [TEXT.vocab.stoi[t] for t in tokenized]
length = [len(indexed)]
tensor = torch.LongTensor(indexed).to(device)
tensor = tensor.unsqueeze(1)
length_tensor = torch.LongTensor(length)
prediction = torch.sigmoid(model(tensor, length_tensor))
return prediction.item()
print(vars(test_data.examples[0])['text'])
predict_test_sentiment(model, vars(test_data.examples[0])['text'])
['This', 'is', 'not', 'a', 'film', 'is', 'a', 'clever', ',', 'witty', 'and', 'often', 'heart', 'touching', 'movie', '.', 'It', "'s", 'a', 'retrospective', 'of', 'a', 'failed', 'relationship', 'between', 'Michael', 'Connor', '(', 'Michael', 'Leydon', 'Campbell', ')', 'and', 'his', 'estranged', 'Irish', 'girlfriend', 'Grace', 'Mckenna', '.', 'Michael', 'down', 'on', 'his', 'luck', 'decides', 'to', 'make', 'a', 'documentary', 'replaying', 'his', 'whole', 'relationship', 'and', 'what', 'went', 'wrong', '.', 'He', 'exploits', 'his', 'friendship', 'with', 'an', 'actor', 'he', 'met', 'at', 'the', 'gym', 'Nadia', '(', 'Nadia', 'Dajani', ')', 'who', 'he', 'gets', 'to', 'play', 'Grace', '.', 'The', 'concept', 'of', 'this', 'film', 'is', 'very', 'original', '.', 'Michaels', 'relationship', 'is', 'shown', 'from', 'every', 'point', 'whether', 'it', "'s", 'a', 'high', 'or', 'low', '.', 'Michael', 'Leydon', 'Campbell', 'pulls', 'off', 'a', 'fantastic', 'performance', 'that', 'makes', 'you', 'want', 'to', 'help', 'him', 'find', 'Grace', '.', 'If', 'fact', 'most', 'of', 'the', 'characters', 'pull', 'off', 'great', 'performances', 'except', 'the', 'puzzler', '.', 'The', 'puzzler', 'is', 'needed', 'to', 'move', 'the', 'plot', 'along', 'yet', 'seems', 'too', 'surreal', 'to', 'exist', 'in', 'a', 'coffee', 'shop', '.', 'His', 'monologues', 'are', 'often', 'overdrawn', 'and', 'pointless', '.', 'This', 'is', 'proved', 'when', 'he', 'says', '"', 'Out', 'of', 'this', 'chaos', ',', 'we', "'re", 'all', 'trying', 'to', 'create', 'order', '.', 'And', 'out', 'of', 'the', 'order', ',', 'meaning', '.', 'But', 'in', 'reality', 'there', 'is', 'no', 'such', 'thing', 'as', 'meaning', '.', 'Something', 'only', 'has', 'meaning', 'if', 'we', 'make', 'it', 'have', 'meaning', '.', '"<br', '/><br', '/>The', 'commentary', 'saves', 'this', 'movie', '.', 'The', 'commentary', 'is', 'done', 'in', 'the', 'vain', 'of', 'This', 'is', 'Spinal', 'Tap', 'and', 'has', 'Michael', 'and', 'his', 'brother', 'explain', 'the', 'problems', 'they', 'had', 'while', 'making', 'the', 'film', '.', 'Michael', 'offers', 'a', 'very', 'funny', 'self', 'conscious', 'commentary', 'that', 'makes', 'for', 'some', 'very', 'good', 'belly', 'laughs.<br', '/><br', '/>Overall', 'I', "'d", 'give', 'this', 'movie', 'a', '7/10', '.']
0.9990482926368713
predict_sentiment(model, "This film is terrible")
0.3941175043582916
predict_sentiment(model, "This film is great")
0.9906774759292603
predict_sentiment(model, "This movie is fantastic")
0.991801917552948
아래와 같이 RNN architecture를 약간씩 바꿔가면서도 실험을 할 수 있습니다.
실험 결과
[Test1] single layer, Dropout = 0, Bidirectional X
[Test2] single layer, Dropout = 0.5, Bidirectional X
[Test3] single layer, Dropout = 0.2, Bidirectional RNN model
[Test4] 3 multi-layer, Dropout = 0.2, Bidirectional X
한번 더 돌려본 결과
[Test1] single layer, Dropout = 0, Bidirectional X
[Test2] single layer, Dropout = 0.5, Bidirectional X
[Test3] single layer, Dropout = 0.2, Bidirectional RNN model
[Test4] 3 multi-layer, Dropout = 0.2, Bidirectional X
# [Test1] single layer, Dropout = 0, Bidirectional X
INPUT_DIM = len(TEXT.vocab) #25,002
EMBEDDING_DIM = 100
HIDDEN_DIM = 256
OUTPUT_DIM = 1
N_LAYERS = 1
BIDIRECTIONAL = False
DROPOUT = 0
PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token] # = 1 (<pad> token의 index)
UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token]
model = RNN(INPUT_DIM,
EMBEDDING_DIM,
HIDDEN_DIM,
OUTPUT_DIM,
N_LAYERS,
BIDIRECTIONAL,
DROPOUT,
PAD_IDX)
pretrained_embeddings = TEXT.vocab.vectors
model.embedding.weight.data.copy_(pretrained_embeddings)
model.embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
model.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)
optimizer =optim.Adam(model.parameters())
criterion = nn.BCEWithLogitsLoss()
# GPU
model = model.to(device)
criterion = criterion.to(device)
N_EPOCHS = 5
best_valid_loss = float('inf')
for epoch in range(N_EPOCHS):
start_time = time.time()
train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)
end_time = time.time()
epoch_mins, epoch_secs = epoch_time(start_time, end_time)
if valid_loss < best_valid_loss:
best_valid_loss = valid_loss
torch.save(model.state_dict(), 'tut2-model2.pt')
print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
print(f'\t Val. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc*100:.2f}%')
# test
test_loss, test_acc = evaluate(model, test_iterator, criterion)
print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')
Epoch: 01 | Epoch Time: 0m 9s Train Loss: 0.636 | Train Acc: 64.61% Val. Loss: 0.549 | Val. Acc: 74.89% Epoch: 02 | Epoch Time: 0m 9s Train Loss: 0.478 | Train Acc: 78.32% Val. Loss: 0.488 | Val. Acc: 76.55% Epoch: 03 | Epoch Time: 0m 9s Train Loss: 0.365 | Train Acc: 85.16% Val. Loss: 0.414 | Val. Acc: 82.23% Epoch: 04 | Epoch Time: 0m 9s Train Loss: 0.243 | Train Acc: 90.94% Val. Loss: 0.331 | Val. Acc: 86.58% Epoch: 05 | Epoch Time: 0m 9s Train Loss: 0.169 | Train Acc: 94.11% Val. Loss: 0.368 | Val. Acc: 86.22% Test Loss: 0.391 | Test Acc: 84.78%
# [Test2] single layer, Dropout = 0.5, Bidirectional X
INPUT_DIM = len(TEXT.vocab) #25,002
EMBEDDING_DIM = 100
HIDDEN_DIM = 256
OUTPUT_DIM = 1
N_LAYERS = 1
BIDIRECTIONAL = False
DROPOUT = 0.5
PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token] # = 1 (<pad> token의 index)
UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token]
model = RNN(INPUT_DIM,
EMBEDDING_DIM,
HIDDEN_DIM,
OUTPUT_DIM,
N_LAYERS,
BIDIRECTIONAL,
DROPOUT,
PAD_IDX)
pretrained_embeddings = TEXT.vocab.vectors
model.embedding.weight.data.copy_(pretrained_embeddings)
model.embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
model.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)
optimizer =optim.Adam(model.parameters())
criterion = nn.BCEWithLogitsLoss()
# GPU
model = model.to(device)
criterion = criterion.to(device)
N_EPOCHS = 5
best_valid_loss = float('inf')
for epoch in range(N_EPOCHS):
start_time = time.time()
train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)
end_time = time.time()
epoch_mins, epoch_secs = epoch_time(start_time, end_time)
if valid_loss < best_valid_loss:
best_valid_loss = valid_loss
torch.save(model.state_dict(), 'tut2-model3.pt')
print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
print(f'\t Val. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc*100:.2f}%')
# test
test_loss, test_acc = evaluate(model, test_iterator, criterion)
print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/rnn.py:63: UserWarning: dropout option adds dropout after all but last recurrent layer, so non-zero dropout expects num_layers greater than 1, but got dropout=0.5 and num_layers=1 "num_layers={}".format(dropout, num_layers))
Epoch: 01 | Epoch Time: 0m 9s Train Loss: 0.670 | Train Acc: 58.37% Val. Loss: 0.622 | Val. Acc: 66.00% Epoch: 02 | Epoch Time: 0m 9s Train Loss: 0.620 | Train Acc: 65.59% Val. Loss: 0.552 | Val. Acc: 73.34% Epoch: 03 | Epoch Time: 0m 9s Train Loss: 0.579 | Train Acc: 69.66% Val. Loss: 0.598 | Val. Acc: 68.92% Epoch: 04 | Epoch Time: 0m 9s Train Loss: 0.464 | Train Acc: 78.51% Val. Loss: 0.327 | Val. Acc: 85.50% Epoch: 05 | Epoch Time: 0m 9s Train Loss: 0.306 | Train Acc: 87.48% Val. Loss: 0.290 | Val. Acc: 88.06% Test Loss: 0.300 | Test Acc: 87.59%
# [Test3] single layer, Dropout = 0.2, Bidirectional RNN model
INPUT_DIM = len(TEXT.vocab) #25,002
EMBEDDING_DIM = 100
HIDDEN_DIM = 256
OUTPUT_DIM = 1
N_LAYERS = 1
BIDIRECTIONAL = True
DROPOUT = 0.2
PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token] # = 1 (<pad> token의 index)
UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token]
model = RNN(INPUT_DIM,
EMBEDDING_DIM,
HIDDEN_DIM,
OUTPUT_DIM,
N_LAYERS,
BIDIRECTIONAL,
DROPOUT,
PAD_IDX)
pretrained_embeddings = TEXT.vocab.vectors
model.embedding.weight.data.copy_(pretrained_embeddings)
model.embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
model.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)
optimizer =optim.Adam(model.parameters())
criterion = nn.BCEWithLogitsLoss()
# GPU
model = model.to(device)
criterion = criterion.to(device)
N_EPOCHS = 5
best_valid_loss = float('inf')
for epoch in range(N_EPOCHS):
start_time = time.time()
train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)
end_time = time.time()
epoch_mins, epoch_secs = epoch_time(start_time, end_time)
if valid_loss < best_valid_loss:
best_valid_loss = valid_loss
torch.save(model.state_dict(), 'tut2-model4.pt')
print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
print(f'\t Val. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc*100:.2f}%')
test_loss, test_acc = evaluate(model, test_iterator, criterion)
print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/rnn.py:63: UserWarning: dropout option adds dropout after all but last recurrent layer, so non-zero dropout expects num_layers greater than 1, but got dropout=0.2 and num_layers=1 "num_layers={}".format(dropout, num_layers))
Epoch: 01 | Epoch Time: 0m 14s Train Loss: 0.636 | Train Acc: 63.25% Val. Loss: 0.503 | Val. Acc: 76.03% Epoch: 02 | Epoch Time: 0m 14s Train Loss: 0.497 | Train Acc: 75.93% Val. Loss: 0.401 | Val. Acc: 82.77% Epoch: 03 | Epoch Time: 0m 14s Train Loss: 0.388 | Train Acc: 82.49% Val. Loss: 0.374 | Val. Acc: 84.31% Epoch: 04 | Epoch Time: 0m 14s Train Loss: 0.241 | Train Acc: 90.40% Val. Loss: 0.292 | Val. Acc: 87.80% Epoch: 05 | Epoch Time: 0m 14s Train Loss: 0.183 | Train Acc: 93.03% Val. Loss: 0.293 | Val. Acc: 88.99% Test Loss: 0.300 | Test Acc: 88.49%
# [Test4] 3 multi-layer, Dropout = 0.2, Bidirectional X
INPUT_DIM = len(TEXT.vocab) #25,002
EMBEDDING_DIM = 100
HIDDEN_DIM = 256
OUTPUT_DIM = 1
N_LAYERS = 3
BIDIRECTIONAL = False
DROPOUT = 0.2
PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token] # = 1 (<pad> token의 index)
UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token]
model = RNN(INPUT_DIM,
EMBEDDING_DIM,
HIDDEN_DIM,
OUTPUT_DIM,
N_LAYERS,
BIDIRECTIONAL,
DROPOUT,
PAD_IDX)
pretrained_embeddings = TEXT.vocab.vectors
model.embedding.weight.data.copy_(pretrained_embeddings)
model.embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
model.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)
optimizer =optim.Adam(model.parameters())
criterion = nn.BCEWithLogitsLoss()
# GPU
model = model.to(device)
criterion = criterion.to(device)
N_EPOCHS = 5
best_valid_loss = float('inf')
for epoch in range(N_EPOCHS):
start_time = time.time()
train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)
end_time = time.time()
epoch_mins, epoch_secs = epoch_time(start_time, end_time)
if valid_loss < best_valid_loss:
best_valid_loss = valid_loss
torch.save(model.state_dict(), 'tut2-model5.pt')
print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
print(f'\t Val. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc*100:.2f}%')
test_loss, test_acc = evaluate(model, test_iterator, criterion)
print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')
Epoch: 01 | Epoch Time: 0m 30s Train Loss: 0.670 | Train Acc: 57.03% Val. Loss: 0.693 | Val. Acc: 50.62% Epoch: 02 | Epoch Time: 0m 30s Train Loss: 0.640 | Train Acc: 62.59% Val. Loss: 0.664 | Val. Acc: 55.83% Epoch: 03 | Epoch Time: 0m 30s Train Loss: 0.498 | Train Acc: 76.21% Val. Loss: 0.343 | Val. Acc: 85.77% Epoch: 04 | Epoch Time: 0m 30s Train Loss: 0.311 | Train Acc: 87.23% Val. Loss: 0.338 | Val. Acc: 86.21% Epoch: 05 | Epoch Time: 0m 30s Train Loss: 0.281 | Train Acc: 88.79% Val. Loss: 0.376 | Val. Acc: 84.32% Test Loss: 0.390 | Test Acc: 83.83%