from fastai.text import *
bs=64
path = untar_data(URLs.HUMAN_NUMBERS)
path.ls()
[PosixPath('/home/ubuntu/.fastai/data/human_numbers/train.txt'), PosixPath('/home/ubuntu/.fastai/data/human_numbers/valid.txt')]
def readnums(d): return [', '.join(o.strip() for o in open(path/d).readlines())]
train_txt = readnums('train.txt'); train_txt[0][:80]
'one, two, three, four, five, six, seven, eight, nine, ten, eleven, twelve, thirt'
valid_txt = readnums('valid.txt'); valid_txt[0][-80:]
' nine thousand nine hundred ninety eight, nine thousand nine hundred ninety nine'
train = TextList(train_txt, path=path)
valid = TextList(valid_txt, path=path)
src = ItemLists(path=path, train=train, valid=valid).label_for_lm()
data = src.databunch(bs=bs)
train[0].text[:80]
'xxbos one , two , three , four , five , six , seven , eight , nine , ten , eleve'
len(data.valid_ds[0][0].data)
13017
data.bptt, len(data.valid_dl)
(70, 3)
13017/70/bs
2.905580357142857
it = iter(data.valid_dl)
x1,y1 = next(it)
x2,y2 = next(it)
x3,y3 = next(it)
it.close()
x1.numel()+x2.numel()+x3.numel()
13440
x1.shape,y1.shape
(torch.Size([64, 70]), torch.Size([64, 70]))
x2.shape,y2.shape
(torch.Size([64, 70]), torch.Size([64, 70]))
x1[:,0]
tensor([ 2, 8, 10, 11, 12, 10, 9, 8, 9, 13, 18, 24, 18, 14, 15, 10, 18, 8, 9, 8, 18, 24, 18, 10, 18, 10, 9, 8, 18, 19, 10, 25, 19, 22, 19, 19, 23, 19, 10, 13, 10, 10, 8, 13, 8, 19, 9, 19, 34, 16, 10, 9, 8, 16, 8, 19, 9, 19, 10, 19, 10, 19, 19, 19], device='cuda:0')
y1[:,0]
tensor([18, 18, 26, 9, 8, 11, 31, 18, 25, 9, 10, 14, 10, 9, 8, 14, 10, 18, 25, 18, 10, 17, 10, 17, 8, 17, 20, 18, 9, 9, 19, 8, 10, 15, 10, 10, 12, 10, 12, 8, 12, 13, 19, 9, 19, 10, 23, 10, 8, 8, 15, 16, 19, 9, 19, 10, 23, 10, 18, 8, 18, 10, 10, 9], device='cuda:0')
v = data.valid_ds.vocab
v.textify(x1[0])
'xxbos eight thousand one , eight thousand two , eight thousand three , eight thousand four , eight thousand five , eight thousand six , eight thousand seven , eight thousand eight , eight thousand nine , eight thousand ten , eight thousand eleven , eight thousand twelve , eight thousand thirteen , eight thousand fourteen , eight thousand fifteen , eight thousand sixteen , eight thousand seventeen , eight'
v.textify(y1[0])
'eight thousand one , eight thousand two , eight thousand three , eight thousand four , eight thousand five , eight thousand six , eight thousand seven , eight thousand eight , eight thousand nine , eight thousand ten , eight thousand eleven , eight thousand twelve , eight thousand thirteen , eight thousand fourteen , eight thousand fifteen , eight thousand sixteen , eight thousand seventeen , eight thousand'
v.textify(x2[0])
'thousand eighteen , eight thousand nineteen , eight thousand twenty , eight thousand twenty one , eight thousand twenty two , eight thousand twenty three , eight thousand twenty four , eight thousand twenty five , eight thousand twenty six , eight thousand twenty seven , eight thousand twenty eight , eight thousand twenty nine , eight thousand thirty , eight thousand thirty one , eight thousand thirty two ,'
v.textify(x3[0])
'eight thousand thirty three , eight thousand thirty four , eight thousand thirty five , eight thousand thirty six , eight thousand thirty seven , eight thousand thirty eight , eight thousand thirty nine , eight thousand forty , eight thousand forty one , eight thousand forty two , eight thousand forty three , eight thousand forty four , eight thousand forty five , eight thousand forty six , eight'
v.textify(x1[1])
', eight thousand forty six , eight thousand forty seven , eight thousand forty eight , eight thousand forty nine , eight thousand fifty , eight thousand fifty one , eight thousand fifty two , eight thousand fifty three , eight thousand fifty four , eight thousand fifty five , eight thousand fifty six , eight thousand fifty seven , eight thousand fifty eight , eight thousand fifty nine ,'
v.textify(x2[1])
'eight thousand sixty , eight thousand sixty one , eight thousand sixty two , eight thousand sixty three , eight thousand sixty four , eight thousand sixty five , eight thousand sixty six , eight thousand sixty seven , eight thousand sixty eight , eight thousand sixty nine , eight thousand seventy , eight thousand seventy one , eight thousand seventy two , eight thousand seventy three , eight thousand'
v.textify(x3[1])
'seventy four , eight thousand seventy five , eight thousand seventy six , eight thousand seventy seven , eight thousand seventy eight , eight thousand seventy nine , eight thousand eighty , eight thousand eighty one , eight thousand eighty two , eight thousand eighty three , eight thousand eighty four , eight thousand eighty five , eight thousand eighty six , eight thousand eighty seven , eight thousand eighty'
v.textify(x3[-1])
'ninety , nine thousand nine hundred ninety one , nine thousand nine hundred ninety two , nine thousand nine hundred ninety three , nine thousand nine hundred ninety four , nine thousand nine hundred ninety five , nine thousand nine hundred ninety six , nine thousand nine hundred ninety seven , nine thousand nine hundred ninety eight , nine thousand nine hundred ninety nine xxbos eight thousand one , eight'
data.show_batch(ds_type=DatasetType.Valid)
idx | text |
---|---|
0 | thousand forty seven , eight thousand forty eight , eight thousand forty nine , eight thousand fifty , eight thousand fifty one , eight thousand fifty two , eight thousand fifty three , eight thousand fifty four , eight thousand fifty five , eight thousand fifty six , eight thousand fifty seven , eight thousand fifty eight , eight thousand fifty nine , eight thousand sixty , eight thousand sixty |
1 | eight , eight thousand eighty nine , eight thousand ninety , eight thousand ninety one , eight thousand ninety two , eight thousand ninety three , eight thousand ninety four , eight thousand ninety five , eight thousand ninety six , eight thousand ninety seven , eight thousand ninety eight , eight thousand ninety nine , eight thousand one hundred , eight thousand one hundred one , eight thousand one |
2 | thousand one hundred twenty four , eight thousand one hundred twenty five , eight thousand one hundred twenty six , eight thousand one hundred twenty seven , eight thousand one hundred twenty eight , eight thousand one hundred twenty nine , eight thousand one hundred thirty , eight thousand one hundred thirty one , eight thousand one hundred thirty two , eight thousand one hundred thirty three , eight thousand |
3 | three , eight thousand one hundred fifty four , eight thousand one hundred fifty five , eight thousand one hundred fifty six , eight thousand one hundred fifty seven , eight thousand one hundred fifty eight , eight thousand one hundred fifty nine , eight thousand one hundred sixty , eight thousand one hundred sixty one , eight thousand one hundred sixty two , eight thousand one hundred sixty three |
4 | thousand one hundred eighty three , eight thousand one hundred eighty four , eight thousand one hundred eighty five , eight thousand one hundred eighty six , eight thousand one hundred eighty seven , eight thousand one hundred eighty eight , eight thousand one hundred eighty nine , eight thousand one hundred ninety , eight thousand one hundred ninety one , eight thousand one hundred ninety two , eight thousand |
data = src.databunch(bs=bs, bptt=3)
x,y = data.one_batch()
x.shape,y.shape
(torch.Size([64, 3]), torch.Size([64, 3]))
nv = len(v.itos); nv
38
nh=64
def loss4(input,target): return F.cross_entropy(input, target[:,-1])
def acc4 (input,target): return accuracy(input, target[:,-1])
class Model0(nn.Module):
def __init__(self):
super().__init__()
self.i_h = nn.Embedding(nv,nh) # green arrow
self.h_h = nn.Linear(nh,nh) # brown arrow
self.h_o = nn.Linear(nh,nv) # blue arrow
self.bn = nn.BatchNorm1d(nh)
def forward(self, x):
h = self.bn(F.relu(self.h_h(self.i_h(x[:,0]))))
if x.shape[1]>1:
h = h + self.i_h(x[:,1])
h = self.bn(F.relu(self.h_h(h)))
if x.shape[1]>2:
h = h + self.i_h(x[:,2])
h = self.bn(F.relu(self.h_h(h)))
return self.h_o(h)
learn = Learner(data, Model0(), loss_func=loss4, metrics=acc4)
learn.fit_one_cycle(6, 1e-4)
epoch | train_loss | valid_loss | acc4 |
---|---|---|---|
1 | 3.596286 | 3.588869 | 0.046645 |
2 | 3.086100 | 3.205763 | 0.274816 |
3 | 2.494411 | 2.749365 | 0.392004 |
4 | 2.144753 | 2.463537 | 0.415671 |
5 | 2.010915 | 2.352887 | 0.409237 |
6 | 1.983992 | 2.336967 | 0.408778 |
class Model1(nn.Module):
def __init__(self):
super().__init__()
self.i_h = nn.Embedding(nv,nh) # green arrow
self.h_h = nn.Linear(nh,nh) # brown arrow
self.h_o = nn.Linear(nh,nv) # blue arrow
self.bn = nn.BatchNorm1d(nh)
def forward(self, x):
h = torch.zeros(x.shape[0], nh).to(device=x.device)
for i in range(x.shape[1]):
h = h + self.i_h(x[:,i])
h = self.bn(F.relu(self.h_h(h)))
return self.h_o(h)
learn = Learner(data, Model1(), loss_func=loss4, metrics=acc4)
learn.fit_one_cycle(6, 1e-4)
epoch | train_loss | valid_loss | acc4 |
---|---|---|---|
1 | 3.493525 | 3.420231 | 0.156250 |
2 | 2.987600 | 2.937893 | 0.376149 |
3 | 2.440199 | 2.477995 | 0.388787 |
4 | 2.132837 | 2.256569 | 0.391774 |
5 | 2.011305 | 2.181337 | 0.392923 |
6 | 1.985913 | 2.170874 | 0.393153 |
data = src.databunch(bs=bs, bptt=20)
x,y = data.one_batch()
x.shape,y.shape
(torch.Size([64, 20]), torch.Size([64, 20]))
class Model2(nn.Module):
def __init__(self):
super().__init__()
self.i_h = nn.Embedding(nv,nh)
self.h_h = nn.Linear(nh,nh)
self.h_o = nn.Linear(nh,nv)
self.bn = nn.BatchNorm1d(nh)
def forward(self, x):
h = torch.zeros(x.shape[0], nh).to(device=x.device)
res = []
for i in range(x.shape[1]):
h = h + self.i_h(x[:,i])
h = F.relu(self.h_h(h))
res.append(self.h_o(self.bn(h)))
return torch.stack(res, dim=1)
learn = Learner(data, Model2(), metrics=accuracy)
learn.fit_one_cycle(10, 1e-4, pct_start=0.1)
epoch | train_loss | valid_loss | accuracy |
---|---|---|---|
1 | 3.639285 | 3.709278 | 0.058949 |
2 | 3.551151 | 3.565677 | 0.151776 |
3 | 3.439908 | 3.431850 | 0.207741 |
4 | 3.323083 | 3.314237 | 0.283949 |
5 | 3.213422 | 3.219906 | 0.321662 |
6 | 3.119673 | 3.151162 | 0.336790 |
7 | 3.046645 | 3.106630 | 0.341690 |
8 | 2.995379 | 3.082552 | 0.346662 |
9 | 2.963800 | 3.073327 | 0.349645 |
10 | 2.947312 | 3.071951 | 0.349787 |
class Model3(nn.Module):
def __init__(self):
super().__init__()
self.i_h = nn.Embedding(nv,nh)
self.h_h = nn.Linear(nh,nh)
self.h_o = nn.Linear(nh,nv)
self.bn = nn.BatchNorm1d(nh)
self.h = torch.zeros(bs, nh).cuda()
def forward(self, x):
res = []
h = self.h
for i in range(x.shape[1]):
h = h + self.i_h(x[:,i])
h = F.relu(self.h_h(h))
res.append(self.bn(h))
self.h = h.detach()
res = torch.stack(res, dim=1)
res = self.h_o(res)
return res
learn = Learner(data, Model3(), metrics=accuracy)
learn.fit_one_cycle(20, 3e-3)
epoch | train_loss | valid_loss | accuracy |
---|---|---|---|
1 | 3.598183 | 3.556362 | 0.050710 |
2 | 3.274616 | 2.975699 | 0.401634 |
3 | 2.624206 | 2.036894 | 0.467330 |
4 | 2.022702 | 1.956439 | 0.316193 |
5 | 1.681813 | 1.934952 | 0.336861 |
6 | 1.453007 | 1.948201 | 0.351349 |
7 | 1.276971 | 2.005776 | 0.368679 |
8 | 1.138499 | 2.081261 | 0.360156 |
9 | 1.029217 | 2.145853 | 0.360795 |
10 | 0.939949 | 2.215388 | 0.372230 |
11 | 0.865441 | 2.240438 | 0.401491 |
12 | 0.805310 | 2.195846 | 0.409375 |
13 | 0.755035 | 2.324373 | 0.422727 |
14 | 0.713073 | 2.305542 | 0.449716 |
15 | 0.677393 | 2.350155 | 0.446449 |
16 | 0.645841 | 2.418738 | 0.446591 |
17 | 0.621809 | 2.456903 | 0.446165 |
18 | 0.605300 | 2.541699 | 0.443040 |
19 | 0.594099 | 2.539824 | 0.443040 |
20 | 0.587563 | 2.551423 | 0.442827 |
class Model4(nn.Module):
def __init__(self):
super().__init__()
self.i_h = nn.Embedding(nv,nh)
self.rnn = nn.RNN(nh,nh, batch_first=True)
self.h_o = nn.Linear(nh,nv)
self.bn = BatchNorm1dFlat(nh)
self.h = torch.zeros(1, bs, nh).cuda()
def forward(self, x):
res,h = self.rnn(self.i_h(x), self.h)
self.h = h.detach()
return self.h_o(self.bn(res))
learn = Learner(data, Model4(), metrics=accuracy)
learn.fit_one_cycle(20, 3e-3)
epoch | train_loss | valid_loss | accuracy |
---|---|---|---|
1 | 3.451432 | 3.268344 | 0.224148 |
2 | 2.974938 | 2.456569 | 0.466051 |
3 | 2.316732 | 1.946969 | 0.465625 |
4 | 1.866151 | 1.991952 | 0.314702 |
5 | 1.618516 | 1.802403 | 0.437216 |
6 | 1.411517 | 1.731107 | 0.436293 |
7 | 1.171916 | 1.655979 | 0.504048 |
8 | 0.965887 | 1.579963 | 0.522088 |
9 | 0.797046 | 1.479819 | 0.565057 |
10 | 0.659378 | 1.487831 | 0.579048 |
11 | 0.553282 | 1.441922 | 0.597798 |
12 | 0.475167 | 1.498148 | 0.600781 |
13 | 0.416131 | 1.546984 | 0.606463 |
14 | 0.372395 | 1.594261 | 0.607386 |
15 | 0.337093 | 1.578321 | 0.613352 |
16 | 0.311385 | 1.580973 | 0.623366 |
17 | 0.292869 | 1.625745 | 0.618253 |
18 | 0.279486 | 1.623960 | 0.626065 |
19 | 0.270054 | 1.682090 | 0.611719 |
20 | 0.263857 | 1.675676 | 0.614702 |
class Model5(nn.Module):
def __init__(self):
super().__init__()
self.i_h = nn.Embedding(nv,nh)
self.rnn = nn.GRU(nh, nh, 2, batch_first=True)
self.h_o = nn.Linear(nh,nv)
self.bn = BatchNorm1dFlat(nh)
self.h = torch.zeros(2, bs, nh).cuda()
def forward(self, x):
res,h = self.rnn(self.i_h(x), self.h)
self.h = h.detach()
return self.h_o(self.bn(res))
learn = Learner(data, Model5(), metrics=accuracy)
learn.fit_one_cycle(10, 1e-2)
epoch | train_loss | valid_loss | accuracy |
---|---|---|---|
1 | 2.864854 | 2.314943 | 0.454545 |
2 | 1.798988 | 1.357116 | 0.629688 |
3 | 0.932729 | 1.307463 | 0.796733 |
4 | 0.451969 | 1.329699 | 0.788636 |
5 | 0.225787 | 1.293570 | 0.800142 |
6 | 0.118085 | 1.265926 | 0.803338 |
7 | 0.065306 | 1.207096 | 0.806960 |
8 | 0.038098 | 1.205361 | 0.813920 |
9 | 0.024069 | 1.239411 | 0.807813 |
10 | 0.017078 | 1.253409 | 0.807102 |