from fastai.gen_doc.nbdoc import *

from fastai.text import * 

path = untar_data(URLs.IMDB_SAMPLE)
path

df = pd.read_csv(path/'texts.csv')
df.head()

for file in ['train_tok.npy', 'valid_tok.npy']:
    if os.path.exists(path/'tmp'/file): os.remove(path/'tmp'/file)

# Language model data
data_lm = TextLMDataBunch.from_csv(path, 'texts.csv')
# Classifier model data
data_clas = TextClasDataBunch.from_csv(path, 'texts.csv', vocab=data_lm.train_ds.vocab, bs=32)

data_lm.save('data_lm_export.pkl')
data_clas.save('data_clas_export.pkl')

data_lm = load_data(path, 'data_lm_export.pkl')
data_clas = load_data(path, 'data_clas_export.pkl', bs=16)

learn = language_model_learner(data_lm, AWD_LSTM, drop_mult=0.5)
learn.fit_one_cycle(1, 1e-2)

learn.unfreeze()
learn.fit_one_cycle(1, 1e-3)

learn.predict("This is a review about", n_words=10)

learn.save_encoder('ft_enc')

learn = text_classifier_learner(data_clas, AWD_LSTM, drop_mult=0.5)
learn.load_encoder('ft_enc')

data_clas.show_batch()

learn.fit_one_cycle(1, 1e-2)

learn.freeze_to(-2)
learn.fit_one_cycle(1, slice(5e-3/2., 5e-3))

learn.unfreeze()
learn.fit_one_cycle(1, slice(2e-3/100, 2e-3))

learn.predict("This was a great movie!")