from fastai.text.all import *
bs=64
path = untar_data(URLs.HUMAN_NUMBERS)
path.ls()
(#2) [Path('/home/jhoward/.fastai/data/human_numbers/train.txt'),Path('/home/jhoward/.fastai/data/human_numbers/valid.txt')]
def readnums(d): return ', '.join(o.strip() for o in open(path/d).readlines())
train_txt = readnums('train.txt'); train_txt[:80]
'one, two, three, four, five, six, seven, eight, nine, ten, eleven, twelve, thirt'
valid_txt = readnums('valid.txt'); valid_txt[-80:]
' nine thousand nine hundred ninety eight, nine thousand nine hundred ninety nine'
train_tok = tokenize1(train_txt)
valid_tok = tokenize1(valid_txt)
dsets = Datasets([train_tok, valid_tok], tfms=Numericalize, dl_type=LMDataLoader, splits=[[0], [1]])
dls = dsets.dataloaders(bs=bs, val_bs=bs)
dsets.show((dsets.train[0][0][:80],))
len(dsets.valid[0][0])
13017
len(dls.valid)
3
dls.seq_len, len(dls.valid)
(72, 3)
13017/72/bs
2.8248697916666665
it = iter(dls.valid)
x1,y1 = next(it)
x2,y2 = next(it)
x3,y3 = next(it)
it.close()
x1.numel()+x2.numel()+x3.numel()
12992
This is the closes multiple of 64 below 13017
x1.shape,y1.shape
(torch.Size([64, 72]), torch.Size([64, 72]))
x2.shape,y2.shape
(torch.Size([64, 72]), torch.Size([64, 72]))
x1[0]
tensor([ 2, 19, 11, 12, 9, 19, 11, 13, 9, 19, 11, 14, 9, 19, 11, 15, 9, 19, 11, 16, 9, 19, 11, 17, 9, 19, 11, 18, 9, 19, 11, 19, 9, 19, 11, 20, 9, 19, 11, 29, 9, 19, 11, 30, 9, 19, 11, 31, 9, 19, 11, 32, 9, 19, 11, 33, 9, 19, 11, 34, 9, 19, 11, 35, 9, 19, 11, 36, 9, 19, 11, 37], device='cuda:5')
y1[0]
tensor([19, 11, 12, 9, 19, 11, 13, 9, 19, 11, 14, 9, 19, 11, 15, 9, 19, 11, 16, 9, 19, 11, 17, 9, 19, 11, 18, 9, 19, 11, 19, 9, 19, 11, 20, 9, 19, 11, 29, 9, 19, 11, 30, 9, 19, 11, 31, 9, 19, 11, 32, 9, 19, 11, 33, 9, 19, 11, 34, 9, 19, 11, 35, 9, 19, 11, 36, 9, 19, 11, 37, 9], device='cuda:5')
v = dls.vocab
' '.join([v[x] for x in x1[0]])
'xxbos eight thousand one , eight thousand two , eight thousand three , eight thousand four , eight thousand five , eight thousand six , eight thousand seven , eight thousand eight , eight thousand nine , eight thousand ten , eight thousand eleven , eight thousand twelve , eight thousand thirteen , eight thousand fourteen , eight thousand fifteen , eight thousand sixteen , eight thousand seventeen , eight thousand eighteen'
' '.join([v[x] for x in y1[0]])
'eight thousand one , eight thousand two , eight thousand three , eight thousand four , eight thousand five , eight thousand six , eight thousand seven , eight thousand eight , eight thousand nine , eight thousand ten , eight thousand eleven , eight thousand twelve , eight thousand thirteen , eight thousand fourteen , eight thousand fifteen , eight thousand sixteen , eight thousand seventeen , eight thousand eighteen ,'
' '.join([v[x] for x in x2[0]])
', eight thousand nineteen , eight thousand twenty , eight thousand twenty one , eight thousand twenty two , eight thousand twenty three , eight thousand twenty four , eight thousand twenty five , eight thousand twenty six , eight thousand twenty seven , eight thousand twenty eight , eight thousand twenty nine , eight thousand thirty , eight thousand thirty one , eight thousand thirty two , eight thousand thirty three'
' '.join([v[x] for x in x3[0]])
', eight thousand thirty four , eight thousand thirty five , eight thousand thirty six , eight thousand thirty seven , eight thousand thirty eight , eight thousand thirty nine , eight thousand forty , eight thousand forty one , eight thousand forty two , eight thousand forty three , eight thousand forty four , eight thousand forty five'
' '.join([v[x] for x in x1[1]])
', eight thousand forty six , eight thousand forty seven , eight thousand forty eight , eight thousand forty nine , eight thousand fifty , eight thousand fifty one , eight thousand fifty two , eight thousand fifty three , eight thousand fifty four , eight thousand fifty five , eight thousand fifty six , eight thousand fifty seven , eight thousand fifty eight , eight thousand fifty nine , eight thousand'
' '.join([v[x] for x in x2[1]])
'sixty , eight thousand sixty one , eight thousand sixty two , eight thousand sixty three , eight thousand sixty four , eight thousand sixty five , eight thousand sixty six , eight thousand sixty seven , eight thousand sixty eight , eight thousand sixty nine , eight thousand seventy , eight thousand seventy one , eight thousand seventy two , eight thousand seventy three , eight thousand seventy four , eight'
' '.join([v[x] for x in x3[1]])
'thousand seventy five , eight thousand seventy six , eight thousand seventy seven , eight thousand seventy eight , eight thousand seventy nine , eight thousand eighty , eight thousand eighty one , eight thousand eighty two , eight thousand eighty three , eight thousand eighty four , eight thousand eighty five , eight thousand eighty six , eight'
' '.join([v[x] for x in x3[-1]])
'seven , nine thousand nine hundred eighty eight , nine thousand nine hundred eighty nine , nine thousand nine hundred ninety , nine thousand nine hundred ninety one , nine thousand nine hundred ninety two , nine thousand nine hundred ninety three , nine thousand nine hundred ninety four , nine thousand nine hundred ninety five , nine thousand'
dls = dsets.dataloaders(bs=bs, seq_len=3)
x,y = dls.one_batch()
x.shape,y.shape
(torch.Size([64, 3]), torch.Size([64, 3]))
nv = len(v); nv
40
nh=64
def loss4(input,target): return F.cross_entropy(input, target[:,-1])
def acc4 (input,target): return accuracy(input, target[:,-1])
class Model0(Module):
def __init__(self):
self.i_h = nn.Embedding(nv,nh) # green arrow
self.h_h = nn.Linear(nh,nh) # brown arrow
self.h_o = nn.Linear(nh,nv) # blue arrow
self.bn = nn.BatchNorm1d(nh)
def forward(self, x):
h = self.bn(F.relu(self.h_h(self.i_h(x[:,0]))))
if x.shape[1]>1:
h = h + self.i_h(x[:,1])
h = self.bn(F.relu(self.h_h(h)))
if x.shape[1]>2:
h = h + self.i_h(x[:,2])
h = self.bn(F.relu(self.h_h(h)))
return self.h_o(h)
learn = Learner(dls, Model0(), loss_func=loss4, metrics=acc4)
learn.fit_one_cycle(6, 1e-4)
epoch | train_loss | valid_loss | acc4 | time |
---|---|---|---|---|
0 | 3.459452 | 3.417839 | 0.144213 | 00:10 |
1 | 2.519120 | 2.569264 | 0.456250 | 00:10 |
2 | 2.031360 | 2.176257 | 0.459722 | 00:10 |
3 | 1.840601 | 2.040740 | 0.463657 | 00:10 |
4 | 1.772740 | 2.000901 | 0.463657 | 00:10 |
5 | 1.758649 | 1.995709 | 0.464120 | 00:10 |
class Model1(Module):
def __init__(self):
self.i_h = nn.Embedding(nv,nh) # green arrow
self.h_h = nn.Linear(nh,nh) # brown arrow
self.h_o = nn.Linear(nh,nv) # blue arrow
self.bn = nn.BatchNorm1d(nh)
def forward(self, x):
h = torch.zeros(x.shape[0], nh).to(device=x.device)
for i in range(x.shape[1]):
h = h + self.i_h(x[:,i])
h = self.bn(F.relu(self.h_h(h)))
return self.h_o(h)
learn = Learner(dls, Model1(), loss_func=loss4, metrics=acc4)
learn.fit_one_cycle(6, 1e-4)
epoch | train_loss | valid_loss | acc4 | time |
---|---|---|---|---|
0 | 3.445585 | 3.383623 | 0.194213 | 00:10 |
1 | 2.568218 | 2.707002 | 0.425694 | 00:10 |
2 | 2.063069 | 2.317326 | 0.460185 | 00:10 |
3 | 1.860497 | 2.152390 | 0.466667 | 00:10 |
4 | 1.787315 | 2.100394 | 0.467593 | 00:10 |
5 | 1.772113 | 2.092769 | 0.467593 | 00:10 |
dls = dsets.dataloaders(bs=bs, seq_len=20)
x,y = dls.one_batch()
x.shape,y.shape
(torch.Size([64, 20]), torch.Size([64, 20]))
class Model2(Module):
def __init__(self):
self.i_h = nn.Embedding(nv,nh)
self.h_h = nn.Linear(nh,nh)
self.h_o = nn.Linear(nh,nv)
self.bn = nn.BatchNorm1d(nh)
def forward(self, x):
h = torch.zeros(x.shape[0], nh).to(device=x.device)
res = []
for i in range(x.shape[1]):
h = h + self.i_h(x[:,i])
h = F.relu(self.h_h(h))
res.append(self.h_o(self.bn(h)))
return torch.stack(res, dim=1)
learn = Learner(dls, Model2(), loss_func=CrossEntropyLossFlat(), metrics=accuracy)
learn.fit_one_cycle(10, 1e-4, pct_start=0.1)
epoch | train_loss | valid_loss | accuracy | time |
---|---|---|---|---|
0 | 3.736573 | 3.754480 | 0.063566 | 00:02 |
1 | 3.540261 | 3.523310 | 0.124826 | 00:02 |
2 | 3.300708 | 3.304820 | 0.248735 | 00:02 |
3 | 3.063205 | 3.128578 | 0.299777 | 00:02 |
4 | 2.861345 | 3.009128 | 0.335367 | 00:02 |
5 | 2.705495 | 2.929025 | 0.353894 | 00:02 |
6 | 2.593792 | 2.878335 | 0.367832 | 00:02 |
7 | 2.519732 | 2.850741 | 0.373140 | 00:02 |
8 | 2.475534 | 2.840007 | 0.375546 | 00:02 |
9 | 2.452727 | 2.838413 | 0.375918 | 00:02 |
class Model3(Module):
def __init__(self):
self.i_h = nn.Embedding(nv,nh)
self.h_h = nn.Linear(nh,nh)
self.h_o = nn.Linear(nh,nv)
self.bn = nn.BatchNorm1d(nh)
self.h = torch.zeros(bs, nh).cuda()
def forward(self, x):
res = []
if x.shape[0]!=self.h.shape[0]: self.h = torch.zeros(x.shape[0], nh).cuda()
h = self.h
for i in range(x.shape[1]):
h = h + self.i_h(x[:,i])
h = F.relu(self.h_h(h))
res.append(self.bn(h))
self.h = h.detach()
res = torch.stack(res, dim=1)
res = self.h_o(res)
return res
def reset(self): self.h = torch.zeros(bs, nh).cuda()
learn = Learner(dls, Model3(), metrics=accuracy, loss_func=CrossEntropyLossFlat())
learn.fit_one_cycle(20, 3e-3)
epoch | train_loss | valid_loss | accuracy | time |
---|---|---|---|---|
0 | 3.482397 | 3.442618 | 0.139980 | 00:02 |
1 | 2.828804 | 2.455908 | 0.417783 | 00:02 |
2 | 2.134592 | 2.153767 | 0.315203 | 00:02 |
3 | 1.763576 | 2.096672 | 0.316940 | 00:02 |
4 | 1.589015 | 2.090171 | 0.317088 | 00:02 |
5 | 1.497501 | 2.057994 | 0.331374 | 00:02 |
6 | 1.414305 | 1.895721 | 0.441195 | 00:02 |
7 | 1.307273 | 2.044791 | 0.437872 | 00:02 |
8 | 1.165429 | 1.991641 | 0.461210 | 00:02 |
9 | 1.033335 | 1.776033 | 0.542783 | 00:02 |
10 | 0.923316 | 1.810016 | 0.564509 | 00:02 |
11 | 0.834117 | 1.762270 | 0.565005 | 00:02 |
12 | 0.758906 | 1.723969 | 0.591940 | 00:02 |
13 | 0.699892 | 1.808163 | 0.578944 | 00:02 |
14 | 0.653839 | 1.802881 | 0.592039 | 00:02 |
15 | 0.620560 | 1.769326 | 0.614509 | 00:02 |
16 | 0.595637 | 1.782574 | 0.616667 | 00:02 |
17 | 0.578359 | 1.772477 | 0.623785 | 00:02 |
18 | 0.567210 | 1.772950 | 0.623115 | 00:02 |
19 | 0.561052 | 1.781880 | 0.621751 | 00:02 |
class Model4(Module):
def __init__(self):
self.i_h = nn.Embedding(nv,nh)
self.rnn = nn.RNN(nh,nh, batch_first=True)
self.h_o = nn.Linear(nh,nv)
self.bn = BatchNorm1dFlat(nh)
self.h = torch.zeros(1, bs, nh).cuda()
def forward(self, x):
if x.shape[0]!=self.h.shape[1]: self.h = torch.zeros(1, x.shape[0], nh).cuda()
res,h = self.rnn(self.i_h(x), self.h)
self.h = h.detach()
return self.h_o(self.bn(res))
learn = Learner(dls, Model4(), loss_func=CrossEntropyLossFlat(), metrics=accuracy)
learn.fit_one_cycle(20, 3e-3)
epoch | train_loss | valid_loss | accuracy | time |
---|---|---|---|---|
0 | 3.462379 | 3.272240 | 0.265749 | 00:01 |
1 | 2.669984 | 2.254657 | 0.462872 | 00:01 |
2 | 2.026915 | 2.119816 | 0.315923 | 00:01 |
3 | 1.709504 | 2.164839 | 0.316964 | 00:01 |
4 | 1.538079 | 2.037120 | 0.388790 | 00:01 |
5 | 1.376378 | 2.241062 | 0.339459 | 00:01 |
6 | 1.182906 | 2.094107 | 0.371429 | 00:01 |
7 | 1.019852 | 1.614843 | 0.476141 | 00:01 |
8 | 0.871662 | 1.549297 | 0.486880 | 00:01 |
9 | 0.743875 | 1.525240 | 0.522867 | 00:01 |
10 | 0.636371 | 1.434942 | 0.558606 | 00:01 |
11 | 0.549575 | 1.398644 | 0.553646 | 00:01 |
12 | 0.480547 | 1.357781 | 0.564410 | 00:01 |
13 | 0.427223 | 1.290959 | 0.583606 | 00:01 |
14 | 0.388108 | 1.209717 | 0.606944 | 00:01 |
15 | 0.356891 | 1.256806 | 0.609722 | 00:01 |
16 | 0.332150 | 1.269009 | 0.610045 | 00:01 |
17 | 0.315104 | 1.244885 | 0.617956 | 00:01 |
18 | 0.304269 | 1.261909 | 0.615501 | 00:01 |
19 | 0.297769 | 1.279711 | 0.611533 | 00:01 |
class Model5(Module):
def __init__(self):
self.i_h = nn.Embedding(nv,nh)
self.rnn = nn.GRU(nh, nh, 2, batch_first=True)
self.h_o = nn.Linear(nh,nv)
self.bn = BatchNorm1dFlat(nh)
self.h = torch.zeros(2, bs, nh).cuda()
def forward(self, x):
if x.shape[0]!=self.h.shape[1]: self.h = torch.zeros(2, x.shape[0], nh).cuda()
res,h = self.rnn(self.i_h(x), self.h)
self.h = h.detach()
return self.h_o(self.bn(res))
learn = Learner(dls, Model5(), loss_func=CrossEntropyLossFlat(), metrics=accuracy)
learn.fit_one_cycle(10, 1e-2)
epoch | train_loss | valid_loss | accuracy | time |
---|---|---|---|---|
0 | 2.666392 | 2.114901 | 0.497594 | 00:01 |
1 | 1.436292 | 1.357266 | 0.624330 | 00:01 |
2 | 0.678816 | 1.007875 | 0.745387 | 00:01 |
3 | 0.329509 | 0.735918 | 0.813219 | 00:01 |
4 | 0.168463 | 0.633921 | 0.837922 | 00:01 |
5 | 0.089841 | 0.612871 | 0.851290 | 00:01 |
6 | 0.051091 | 0.690696 | 0.840972 | 00:01 |
7 | 0.031449 | 0.706523 | 0.834896 | 00:01 |
8 | 0.020642 | 0.633427 | 0.843948 | 00:01 |
9 | 0.014271 | 0.636002 | 0.844072 | 00:01 |