In [0]:
"""
We use following lines because we are running on Google Colab
If you are running notebook on a local computer, you don't need these
"""
from google.colab import drive
drive.mount('/content/gdrive')
import os
os.chdir('/content/gdrive/My Drive/finch/tensorflow1/question_answering/babi/main')
In [0]:
from collections import Counter
from pathlib import Path

Make Vocabulary

In [0]:
Path('../vocab').mkdir(exist_ok=True)
counter = Counter()

with open('../data/qa5_three-arg-relations_train.txt') as f:
  for line in f:
    line = line.lower().rstrip()

    if '?' in line:
      temp = line.split('\t')
      _q = temp[0].replace('?', '').split()[1:]
      _a = temp[1]
      line = _q + [_a]
    else:
      line = line.replace('.', '').split()[1:]
      
    counter.update(line)
    
    
words = ['<pad>'] + [w for w, freq in counter.most_common()]
with open('../vocab/word.txt', 'w') as f:
  for w in words:
    f.write(w+'\n')