from fastai.gen_doc.nbdoc import * from fastai.text import * path = untar_data(URLs.IMDB_SAMPLE) path df = pd.read_csv(path/'texts.csv') df.head() for file in ['train_tok.npy', 'valid_tok.npy']: if os.path.exists(path/'tmp'/file): os.remove(path/'tmp'/file) # Language model data data_lm = TextLMDataBunch.from_csv(path, 'texts.csv') # Classifier model data data_clas = TextClasDataBunch.from_csv(path, 'texts.csv', vocab=data_lm.train_ds.vocab, bs=32) data_lm.save('data_lm_export.pkl') data_clas.save('data_clas_export.pkl') data_lm = load_data(path, 'data_lm_export.pkl') data_clas = load_data(path, 'data_clas_export.pkl', bs=16) learn = language_model_learner(data_lm, AWD_LSTM, drop_mult=0.5) learn.fit_one_cycle(1, 1e-2) learn.unfreeze() learn.fit_one_cycle(1, 1e-3) learn.predict("This is a review about", n_words=10) learn.save_encoder('ft_enc') learn = text_classifier_learner(data_clas, AWD_LSTM, drop_mult=0.5) learn.load_encoder('ft_enc') data_clas.show_batch() learn.fit_one_cycle(1, 1e-2) learn.freeze_to(-2) learn.fit_one_cycle(1, slice(5e-3/2., 5e-3)) learn.unfreeze() learn.fit_one_cycle(1, slice(2e-3/100, 2e-3)) learn.predict("This was a great movie!")