%reload_ext autoreload
%autoreload 2
%matplotlib inline
from fastai import *
from fastai.text import *
bs=128
data_path = Config.data_path()
lang = 'vi'
name = f'{lang}wiki'
path = data_path/name
dest = path/'docs'
lm_fns = [f'{lang}_wt_bwd', f'{lang}_wt_vocab_bwd']
data = (TextList.from_folder(dest)
.split_by_rand_pct(0.1, seed=42)
.label_for_lm()
.databunch(bs=bs, num_workers=1, backwards=True))
data.save(f'{lang}_databunch_bwd')
data = load_data(dest, f'{lang}_databunch_bwd', bs=bs, backwards=True)
/home/jhoward/anaconda3/lib/python3.7/site-packages/torch/serialization.py:493: SourceChangeWarning: source code of class 'torch.nn.modules.loss.CrossEntropyLoss' has changed. you can retrieve the original source code by accessing the object's source attribute or set `torch.nn.Module.dump_patches = True` and use the patch tool to revert the changes. warnings.warn(msg, SourceChangeWarning)
learn = language_model_learner(data, AWD_LSTM, drop_mult=0.5, pretrained=False).to_fp16()
lr = 3e-3
lr *= bs/48 # Scale learning rate by batch size
learn.unfreeze()
learn.fit_one_cycle(10, lr, moms=(0.8,0.7))
epoch | train_loss | valid_loss | accuracy | time |
---|---|---|---|---|
0 | 3.445849 | 3.424579 | 0.401327 | 32:56 |
1 | 3.420865 | 3.383994 | 0.402841 | 33:31 |
2 | 3.374694 | 3.330634 | 0.407800 | 33:26 |
3 | 3.273197 | 3.257108 | 0.416047 | 32:54 |
4 | 3.223044 | 3.200649 | 0.422695 | 32:56 |
5 | 3.134357 | 3.132859 | 0.430725 | 31:35 |
6 | 3.135637 | 3.057030 | 0.439737 | 31:41 |
7 | 3.080461 | 2.992323 | 0.447939 | 31:45 |
8 | 3.075036 | 2.943683 | 0.454494 | 31:39 |
9 | 2.947997 | 2.929258 | 0.456500 | 31:46 |
mdl_path = path/'models'
mdl_path.mkdir(exist_ok=True)
learn.to_fp32().save(mdl_path/lm_fns[0], with_opt=False)
learn.data.vocab.save(mdl_path/(lm_fns[1] + '.pkl'))
train_df = pd.read_csv(path/'train.csv')
train_df.loc[pd.isna(train_df.comment),'comment']='NA'
test_df = pd.read_csv(path/'test.csv')
test_df.loc[pd.isna(test_df.comment),'comment']='NA'
test_df['label'] = 0
df = pd.concat([train_df,test_df])
data_lm = (TextList.from_df(df, path, cols='comment')
.split_by_rand_pct(0.1, seed=42)
.label_for_lm()
.databunch(bs=bs, num_workers=1, backwards=True))
learn_lm = language_model_learner(data_lm, AWD_LSTM, config={**awd_lstm_lm_config, 'n_hid': 1152},
pretrained_fnames=lm_fns, drop_mult=1.0)
lr = 1e-3
lr *= bs/48
learn_lm.fit_one_cycle(2, lr*10, moms=(0.8,0.7))
epoch | train_loss | valid_loss | accuracy | time |
---|---|---|---|---|
0 | 4.797052 | 4.025901 | 0.323326 | 00:07 |
1 | 4.275975 | 3.914450 | 0.333719 | 00:06 |
learn_lm.unfreeze()
learn_lm.fit_one_cycle(8, lr, moms=(0.8,0.7))
epoch | train_loss | valid_loss | accuracy | time |
---|---|---|---|---|
0 | 3.996770 | 3.809489 | 0.346052 | 00:09 |
1 | 3.856959 | 3.664919 | 0.363239 | 00:09 |
2 | 3.726143 | 3.584303 | 0.369685 | 00:09 |
3 | 3.608569 | 3.531390 | 0.375307 | 00:09 |
4 | 3.514265 | 3.500826 | 0.379701 | 00:09 |
5 | 3.446292 | 3.486931 | 0.380859 | 00:09 |
6 | 3.392542 | 3.479732 | 0.382520 | 00:09 |
7 | 3.357502 | 3.478930 | 0.382520 | 00:09 |
learn_lm.save(f'{lang}fine_tuned_bwd')
learn_lm.save_encoder(f'{lang}fine_tuned_enc_bwd')
data_clas = (TextList.from_df(train_df, path, vocab=data_lm.vocab, cols='comment')
.split_by_rand_pct(0.1, seed=42)
.label_from_df(cols='label')
.databunch(bs=bs, num_workers=1, backwards=True))
data_clas.save(f'{lang}_textlist_class_bwd')
data_clas = load_data(path, f'{lang}_textlist_class_bwd', bs=bs, num_workers=1, backwards=True)
from sklearn.metrics import f1_score
@np_func
def f1(inp,targ): return f1_score(targ, np.argmax(inp, axis=-1))
learn_c = text_classifier_learner(data_clas, AWD_LSTM, drop_mult=0.5, metrics=[accuracy,f1]).to_fp16()
learn_c.load_encoder(f'{lang}fine_tuned_enc_bwd')
learn_c.freeze()
lr=2e-2
lr *= bs/48
learn_c.fit_one_cycle(2, lr, moms=(0.8,0.7))
epoch | train_loss | valid_loss | accuracy | f1 | time |
---|---|---|---|---|---|
0 | 0.369300 | 0.363769 | 0.834577 | 0.826098 | 00:03 |
1 | 0.328192 | 0.278986 | 0.874378 | 0.851747 | 00:02 |
learn_c.freeze_to(-2)
learn_c.fit_one_cycle(2, slice(lr/(2.6**4),lr), moms=(0.8,0.7))
epoch | train_loss | valid_loss | accuracy | f1 | time |
---|---|---|---|---|---|
0 | 0.337875 | 0.306132 | 0.876866 | 0.860107 | 00:03 |
1 | 0.276982 | 0.237260 | 0.906095 | 0.886427 | 00:03 |
learn_c.freeze_to(-3)
learn_c.fit_one_cycle(2, slice(lr/2/(2.6**4),lr/2), moms=(0.8,0.7))
epoch | train_loss | valid_loss | accuracy | f1 | time |
---|---|---|---|---|---|
0 | 0.292297 | 0.252393 | 0.896144 | 0.877916 | 00:04 |
1 | 0.255284 | 0.213655 | 0.912313 | 0.892551 | 00:04 |
learn_c.unfreeze()
learn_c.fit_one_cycle(1, slice(lr/10/(2.6**4),lr/10), moms=(0.8,0.7))
epoch | train_loss | valid_loss | accuracy | f1 | time |
---|---|---|---|---|---|
0 | 0.167376 | 0.266633 | 0.904851 | 0.885386 | 00:04 |
learn_c.save(f'{lang}clas_bwd')