from fastai.text import *
bs = 64
path = untar_data(URLs.HUMAN_NUMBERS)
path.ls()
[PosixPath('/home/ubuntu/.fastai/data/human_numbers/valid.txt'), PosixPath('/home/ubuntu/.fastai/data/human_numbers/train.txt')]
def readnums(d): return [', '.join(o.strip() for o in open(path / d).readlines())]
train_txt = readnums('train.txt')
train_txt[0][:80]
'one, two, three, four, five, six, seven, eight, nine, ten, eleven, twelve, thirt'
valid_txt = readnums('valid.txt')
valid_txt[0][-80:]
' nine thousand nine hundred ninety eight, nine thousand nine hundred ninety nine'
train = TextList(train_txt, path=path)
valid = TextList(valid_txt, path=path)
src = ItemLists(path=path, train=train, valid=valid).label_for_lm()
data = src.databunch(bs=bs)
train[0].text[:80]
'xxbos one , two , three , four , five , six , seven , eight , nine , ten , eleve'
len(data.valid_ds[0][0].data)
13017
data.bptt, len(data.valid_dl)
(70, 3)
13017/70/bs
2.905580357142857
it = iter(data.valid_dl)
x1, y1 = next(it)
x2, y2 = next(it)
x3, y3 = next(it)
it.close()
x1.numel() + x2.numel() + x3.numel()
12928
x1.shape, y1.shape
(torch.Size([64, 95]), torch.Size([64, 95]))
x2.shape, y2.shape
(torch.Size([64, 76]), torch.Size([64, 76]))
x1[:, 0]
tensor([ 2, 8, 10, 10, 23, 18, 10, 22, 18, 8, 21, 18, 9, 20, 18, 9, 18, 18, 9, 15, 18, 9, 8, 18, 9, 8, 8, 9, 8, 9, 9, 8, 19, 19, 26, 10, 9, 8, 8, 22, 19, 13, 21, 19, 9, 20, 19, 9, 31, 19, 9, 16, 19, 9, 8, 19, 9, 8, 9, 9, 8, 10, 9, 8], device='cuda:0')
y1[:, 0]
tensor([18, 18, 26, 11, 12, 10, 12, 13, 10, 18, 14, 10, 27, 15, 10, 26, 10, 10, 25, 8, 10, 24, 18, 10, 23, 18, 18, 22, 18, 18, 21, 18, 9, 10, 14, 11, 23, 19, 19, 11, 10, 9, 12, 10, 27, 13, 10, 26, 8, 10, 25, 9, 10, 24, 19, 10, 23, 19, 34, 22, 19, 19, 21, 19], device='cuda:0')
v = data.valid_ds.vocab
x1[0].shape
torch.Size([95])
v.textify(x1[0])
'xxbos eight thousand one , eight thousand two , eight thousand three , eight thousand four , eight thousand five , eight thousand six , eight thousand seven , eight thousand eight , eight thousand nine , eight thousand ten , eight thousand eleven , eight thousand twelve , eight thousand thirteen , eight thousand fourteen , eight thousand fifteen , eight thousand sixteen , eight thousand seventeen , eight thousand eighteen , eight thousand nineteen , eight thousand twenty , eight thousand twenty one , eight thousand twenty two , eight thousand twenty three'
v.textify(y1[0])
'eight thousand one , eight thousand two , eight thousand three , eight thousand four , eight thousand five , eight thousand six , eight thousand seven , eight thousand eight , eight thousand nine , eight thousand ten , eight thousand eleven , eight thousand twelve , eight thousand thirteen , eight thousand fourteen , eight thousand fifteen , eight thousand sixteen , eight thousand seventeen , eight thousand eighteen , eight thousand nineteen , eight thousand twenty , eight thousand twenty one , eight thousand twenty two , eight thousand twenty three ,'
v.textify(x2[0])
', eight thousand twenty four , eight thousand twenty five , eight thousand twenty six , eight thousand twenty seven , eight thousand twenty eight , eight thousand twenty nine , eight thousand thirty , eight thousand thirty one , eight thousand thirty two , eight thousand thirty three , eight thousand thirty four , eight thousand thirty five , eight thousand thirty six , eight thousand thirty seven , eight thousand thirty eight , eight'
v.textify(x3[0])
'thousand thirty nine , eight thousand forty , eight thousand forty one , eight thousand forty two , eight thousand forty three , eight thousand forty four , eight thousand forty'
v.textify(x1[1])
', eight thousand forty six , eight thousand forty seven , eight thousand forty eight , eight thousand forty nine , eight thousand fifty , eight thousand fifty one , eight thousand fifty two , eight thousand fifty three , eight thousand fifty four , eight thousand fifty five , eight thousand fifty six , eight thousand fifty seven , eight thousand fifty eight , eight thousand fifty nine , eight thousand sixty , eight thousand sixty one , eight thousand sixty two , eight thousand sixty three , eight thousand sixty four , eight'
v.textify(x2[1])
'thousand sixty five , eight thousand sixty six , eight thousand sixty seven , eight thousand sixty eight , eight thousand sixty nine , eight thousand seventy , eight thousand seventy one , eight thousand seventy two , eight thousand seventy three , eight thousand seventy four , eight thousand seventy five , eight thousand seventy six , eight thousand seventy seven , eight thousand seventy eight , eight thousand seventy nine , eight thousand eighty'
v.textify(x3[1])
', eight thousand eighty one , eight thousand eighty two , eight thousand eighty three , eight thousand eighty four , eight thousand eighty five , eight thousand eighty six ,'
v.textify(x3[-1])
'one , nine thousand nine hundred ninety two , nine thousand nine hundred ninety three , nine thousand nine hundred ninety four , nine thousand nine hundred ninety five , nine'
data.show_batch(ds_type=DatasetType.Valid)
idx | text |
---|---|
0 | xxbos eight thousand one , eight thousand two , eight thousand three , eight thousand four , eight thousand five , eight thousand six , eight thousand seven , eight thousand eight , eight thousand nine , eight thousand ten , eight thousand eleven , eight thousand twelve , eight thousand thirteen , eight thousand fourteen , eight thousand fifteen , eight thousand sixteen , eight thousand |
1 | , eight thousand forty six , eight thousand forty seven , eight thousand forty eight , eight thousand forty nine , eight thousand fifty , eight thousand fifty one , eight thousand fifty two , eight thousand fifty three , eight thousand fifty four , eight thousand fifty five , eight thousand fifty six , eight thousand fifty seven , eight thousand fifty eight , eight thousand |
2 | thousand eighty seven , eight thousand eighty eight , eight thousand eighty nine , eight thousand ninety , eight thousand ninety one , eight thousand ninety two , eight thousand ninety three , eight thousand ninety four , eight thousand ninety five , eight thousand ninety six , eight thousand ninety seven , eight thousand ninety eight , eight thousand ninety nine , eight thousand one hundred |
3 | thousand one hundred twenty three , eight thousand one hundred twenty four , eight thousand one hundred twenty five , eight thousand one hundred twenty six , eight thousand one hundred twenty seven , eight thousand one hundred twenty eight , eight thousand one hundred twenty nine , eight thousand one hundred thirty , eight thousand one hundred thirty one , eight thousand one hundred thirty two |
4 | fifty two , eight thousand one hundred fifty three , eight thousand one hundred fifty four , eight thousand one hundred fifty five , eight thousand one hundred fifty six , eight thousand one hundred fifty seven , eight thousand one hundred fifty eight , eight thousand one hundred fifty nine , eight thousand one hundred sixty , eight thousand one hundred sixty one , eight thousand |
data = src.databunch(bs=bs, bptt=3, max_len=0, p_bptt=1.)
x, y = data.one_batch()
x.shape, y.shape
(torch.Size([64, 3]), torch.Size([64, 3]))
nv = len(v.itos)
nv
38
nh = 64
def loss4(input, target): return F.cross_entropy(input, target[:, -1])
def acc4(input, target): return accuracy(input, target[:, -1])
class Model0(nn.Module):
def __init__(self):
super().__init__()
self.i_h = nn.Embedding(nv, nh) # green arrow
self.h_h = nn.Linear(nh, nh) # brown arrow
self.h_o = nn.Linear(nh, nv) # blue arrow
self.bn = nn.BatchNorm1d(nh)
def forward(self, x):
h = self.bn(F.relu(self.i_h(x[:,0])))
if x.shape[1] > 1:
h = h + self.i_h(x[:,1])
h = self.bn(F.relu(self.h_h(h)))
if x.shape[1] > 2:
h = h + self.i_h(x[:,2])
h = self.bn(F.relu(self.h_h(h)))
return self.h_o(h)
learn = Learner(data, Model0(), loss_func=loss4, metrics=acc4)
learn.fit_one_cycle(6, 1e-4)
epoch | train_loss | valid_loss | acc4 |
---|---|---|---|
1 | 3.609755 | 3.622831 | 0.046186 |
2 | 3.103226 | 3.217704 | 0.425781 |
3 | 2.523298 | 2.733683 | 0.449908 |
4 | 2.192890 | 2.447449 | 0.452665 |
5 | 2.065991 | 2.342171 | 0.454274 |
6 | 2.039980 | 2.326865 | 0.454274 |
class Model1(nn.Module):
def __init__(self):
super().__init__()
self.i_h = nn.Embedding(nv, nh) # green arrow
self.h_h = nn.Linear(nh, nh) # brown arrow
self.h_o = nn.Linear(nh, nv) # blue arrow
self.bn = nn.BatchNorm1d(nh)
def forward(self, x):
h = torch.zeros(x.shape[0], nh).to(device=x.device)
for i in range(x.shape[1]):
h = h + self.i_h(x[:,i])
h = self.bn(F.relu(self.h_h(h)))
return self.h_o(h)
learn = Learner(data, Model1(), loss_func=loss4, metrics=acc4)
learn.fit_one_cycle(6, 1e-4)
epoch | train_loss | valid_loss | acc4 |
---|---|---|---|
1 | 3.550785 | 3.565774 | 0.039062 |
2 | 2.994696 | 3.056980 | 0.434283 |
3 | 2.444730 | 2.576163 | 0.462546 |
4 | 2.147489 | 2.336781 | 0.463925 |
5 | 2.030240 | 2.252541 | 0.465533 |
6 | 2.005649 | 2.240313 | 0.465763 |
data = src.databunch(bs=bs, bptt=20)
x, y = data.one_batch()
x.shape, y.shape
(torch.Size([64, 45]), torch.Size([64, 45]))
class Model2(nn.Module):
def __init__(self):
super().__init__()
self.i_h = nn.Embedding(nv, nh) # green arrow
self.h_h = nn.Linear(nh, nh) # brown arrow
self.h_o = nn.Linear(nh, nv) # blue arrow
self.bn = nn.BatchNorm1d(nh)
def forward(self, x):
h = torch.zeros(x.shape[0], nh).to(device=x.device)
res = []
for i in range(x.shape[1]):
h = h + self.i_h(x[:,i])
h = F.relu(self.h_h(h))
res.append(self.h_o(self.bn(h)))
return torch.stack(res, dim=1)
learn = Learner(data, Model2(), metrics=accuracy)
learn.fit_one_cycle(10, 1e-4, pct_start=0.1)
epoch | train_loss | valid_loss | accuracy |
---|---|---|---|
1 | 3.613750 | 3.468091 | 0.094572 |
2 | 3.507750 | 3.366867 | 0.201829 |
3 | 3.378657 | 3.263256 | 0.307511 |
4 | 3.248644 | 3.172283 | 0.352421 |
5 | 3.129175 | 3.094844 | 0.377534 |
6 | 3.031686 | 3.033968 | 0.382643 |
7 | 2.954897 | 2.999621 | 0.390039 |
8 | 2.903233 | 2.998493 | 0.382097 |
9 | 2.870547 | 2.957848 | 0.398152 |
10 | 2.856550 | 2.963168 | 0.395220 |
class Model3(nn.Module):
def __init__(self):
super().__init__()
self.i_h = nn.Embedding(nv, nh) # green arrow
self.h_h = nn.Linear(nh, nh) # brown arrow
self.h_o = nn.Linear(nh, nv) # blue arrow
self.bn = nn.BatchNorm1d(nh)
self.h = torch.zeros(bs, nh).cuda()
def forward(self, x):
res = []
h = self.h
for i in range(x.shape[1]):
h = h + self.i_h(x[:,i])
h = F.relu(self.h_h(h))
res.append(self.bn(h))
self.h = h.detach()
res = torch.stack(res, dim=1)
res = self.h_o(res)
return res
learn = Learner(data, Model3(), metrics=accuracy)
learn.fit_one_cycle(20, 3e-3)
epoch | train_loss | valid_loss | accuracy |
---|---|---|---|
1 | 3.498832 | 3.457415 | 0.145312 |
2 | 3.161868 | 2.874971 | 0.449906 |
3 | 2.515988 | 2.037722 | 0.467106 |
4 | 1.967317 | 2.110590 | 0.316073 |
5 | 1.672088 | 2.135250 | 0.337800 |
6 | 1.493022 | 2.155137 | 0.344380 |
7 | 1.335257 | 2.116041 | 0.394331 |
8 | 1.201654 | 2.299078 | 0.408730 |
9 | 1.090030 | 2.624311 | 0.427448 |
10 | 1.008497 | 2.462456 | 0.422197 |
11 | 0.971175 | 2.352604 | 0.437458 |
12 | 0.906372 | 2.458878 | 0.453475 |
13 | 0.843947 | 2.455768 | 0.461694 |
14 | 0.794068 | 2.469155 | 0.458606 |
15 | 0.754808 | 2.490652 | 0.453702 |
16 | 0.722530 | 2.597134 | 0.453636 |
17 | 0.689590 | 2.633052 | 0.452083 |
18 | 0.670493 | 2.525233 | 0.467502 |
19 | 0.656720 | 2.663035 | 0.460008 |
20 | 0.657596 | 2.545538 | 0.464172 |
nn.RNN
¶class Model4(nn.Module):
def __init__(self):
super().__init__()
self.i_h = nn.Embedding(nv, nh)
self.rnn = nn.RNN(nh, nh, batch_first=True)
self.h_o = nn.Linear(nh, nv)
self.bn = BatchNorm1dFlat(nh)
self.h = torch.zeros(1, bs, nh).cuda()
def forward(self, x):
res, h = self.rnn(self.i_h(x), self.h)
self.h = h.detach()
return self.h_o(self.bn(res))
learn = Learner(data, Model4(), metrics=accuracy)
learn.fit_one_cycle(20, 3e-3)
epoch | train_loss | valid_loss | accuracy |
---|---|---|---|
1 | 3.488441 | 3.392691 | 0.156386 |
2 | 3.016769 | 2.516589 | 0.461222 |
3 | 2.346680 | 1.942615 | 0.467149 |
4 | 1.879993 | 2.002077 | 0.312014 |
5 | 1.631300 | 1.904240 | 0.432754 |
6 | 1.462227 | 1.904621 | 0.482044 |
7 | 1.300646 | 1.851880 | 0.492365 |
8 | 1.153744 | 1.653138 | 0.492104 |
9 | 1.008373 | 1.549363 | 0.494715 |
10 | 0.878932 | 1.600824 | 0.500036 |
11 | 0.777283 | 1.508291 | 0.520914 |
12 | 0.712406 | 1.532992 | 0.570639 |
13 | 0.626053 | 1.426348 | 0.569779 |
14 | 0.555844 | 1.715479 | 0.545101 |
15 | 0.499043 | 1.626162 | 0.542316 |
16 | 0.458131 | 1.536722 | 0.548794 |
17 | 0.438000 | 1.548677 | 0.545291 |
18 | 0.409515 | 1.462034 | 0.552396 |
19 | 0.394507 | 1.477735 | 0.554738 |
20 | 0.390583 | 1.518102 | 0.549247 |
class Model5(nn.Module):
def __init__(self):
super().__init__()
self.i_h = nn.Embedding(nv, nh)
self.rnn = nn.GRU(nh, nh, 2, batch_first=True)
self.h_o = nn.Linear(nh, nv)
self.bn = BatchNorm1dFlat(nh)
self.h = torch.zeros(2, bs, nh).cuda()
def forward(self, x):
res, h = self.rnn(self.i_h(x), self.h)
self.h = h.detach()
return self.h_o(self.bn(res))
learn = Learner(data, Model5(), metrics=accuracy)
learn.fit_one_cycle(10, 1e-2)
epoch | train_loss | valid_loss | accuracy |
---|---|---|---|
1 | 2.983626 | 2.321548 | 0.444593 |
2 | 1.880445 | 1.621104 | 0.546462 |
3 | 1.015526 | 1.040678 | 0.796203 |
4 | 0.525928 | 0.822173 | 0.829538 |
5 | 0.268591 | 1.000392 | 0.813538 |
6 | 0.140787 | 0.820461 | 0.842801 |
7 | 0.079602 | 0.882789 | 0.833222 |
8 | 0.047994 | 0.795663 | 0.843396 |
9 | 0.037621 | 0.872451 | 0.833012 |
10 | 0.030443 | 0.875417 | 0.833148 |