Human numbers¶

In [ ]:

from fastai.text.all import *

In [ ]:

bs=64

Data¶

In [ ]:

path = untar_data(URLs.HUMAN_NUMBERS)
path.ls()

Out[ ]:

(#2) [Path('/home/jhoward/.fastai/data/human_numbers/train.txt'),Path('/home/jhoward/.fastai/data/human_numbers/valid.txt')]

In [ ]:

def readnums(d): return ', '.join(o.strip() for o in open(path/d).readlines())

In [ ]:

train_txt = readnums('train.txt'); train_txt[:80]

Out[ ]:

'one, two, three, four, five, six, seven, eight, nine, ten, eleven, twelve, thirt'

In [ ]:

valid_txt = readnums('valid.txt'); valid_txt[-80:]

Out[ ]:

' nine thousand nine hundred ninety eight, nine thousand nine hundred ninety nine'

In [ ]:

train_tok = tokenize1(train_txt)
valid_tok = tokenize1(valid_txt)

In [ ]:

dsets = Datasets([train_tok, valid_tok], tfms=Numericalize, dl_type=LMDataLoader, splits=[[0], [1]])

In [ ]:

dls = dsets.dataloaders(bs=bs, val_bs=bs)

In [ ]:

dsets.show((dsets.train[0][0][:80],))

In [ ]:

len(dsets.valid[0][0])

Out[ ]:

In [ ]:

len(dls.valid)

Out[ ]:

In [ ]:

dls.seq_len, len(dls.valid)

Out[ ]:

(72, 3)

In [ ]:

13017/72/bs

Out[ ]:

2.8248697916666665

In [ ]:

it = iter(dls.valid)
x1,y1 = next(it)
x2,y2 = next(it)
x3,y3 = next(it)
it.close()

In [ ]:

x1.numel()+x2.numel()+x3.numel()

Out[ ]:

This is the closes multiple of 64 below 13017

In [ ]:

x1.shape,y1.shape

Out[ ]:

(torch.Size([64, 72]), torch.Size([64, 72]))

In [ ]:

x2.shape,y2.shape

Out[ ]:

(torch.Size([64, 72]), torch.Size([64, 72]))

In [ ]:

x1[0]

Out[ ]:

tensor([ 2, 19, 11, 12,  9, 19, 11, 13,  9, 19, 11, 14,  9, 19, 11, 15,  9, 19,
        11, 16,  9, 19, 11, 17,  9, 19, 11, 18,  9, 19, 11, 19,  9, 19, 11, 20,
         9, 19, 11, 29,  9, 19, 11, 30,  9, 19, 11, 31,  9, 19, 11, 32,  9, 19,
        11, 33,  9, 19, 11, 34,  9, 19, 11, 35,  9, 19, 11, 36,  9, 19, 11, 37],
       device='cuda:5')

In [ ]:

y1[0]

Out[ ]:

tensor([19, 11, 12,  9, 19, 11, 13,  9, 19, 11, 14,  9, 19, 11, 15,  9, 19, 11,
        16,  9, 19, 11, 17,  9, 19, 11, 18,  9, 19, 11, 19,  9, 19, 11, 20,  9,
        19, 11, 29,  9, 19, 11, 30,  9, 19, 11, 31,  9, 19, 11, 32,  9, 19, 11,
        33,  9, 19, 11, 34,  9, 19, 11, 35,  9, 19, 11, 36,  9, 19, 11, 37,  9],
       device='cuda:5')

In [ ]:

v = dls.vocab

In [ ]:

' '.join([v[x] for x in x1[0]])

Out[ ]:

'xxbos eight thousand one , eight thousand two , eight thousand three , eight thousand four , eight thousand five , eight thousand six , eight thousand seven , eight thousand eight , eight thousand nine , eight thousand ten , eight thousand eleven , eight thousand twelve , eight thousand thirteen , eight thousand fourteen , eight thousand fifteen , eight thousand sixteen , eight thousand seventeen , eight thousand eighteen'

In [ ]:

' '.join([v[x] for x in y1[0]])

Out[ ]:

'eight thousand one , eight thousand two , eight thousand three , eight thousand four , eight thousand five , eight thousand six , eight thousand seven , eight thousand eight , eight thousand nine , eight thousand ten , eight thousand eleven , eight thousand twelve , eight thousand thirteen , eight thousand fourteen , eight thousand fifteen , eight thousand sixteen , eight thousand seventeen , eight thousand eighteen ,'

In [ ]:

' '.join([v[x] for x in x2[0]])

Out[ ]:

', eight thousand nineteen , eight thousand twenty , eight thousand twenty one , eight thousand twenty two , eight thousand twenty three , eight thousand twenty four , eight thousand twenty five , eight thousand twenty six , eight thousand twenty seven , eight thousand twenty eight , eight thousand twenty nine , eight thousand thirty , eight thousand thirty one , eight thousand thirty two , eight thousand thirty three'

In [ ]:

' '.join([v[x] for x in x3[0]])

Out[ ]:

', eight thousand thirty four , eight thousand thirty five , eight thousand thirty six , eight thousand thirty seven , eight thousand thirty eight , eight thousand thirty nine , eight thousand forty , eight thousand forty one , eight thousand forty two , eight thousand forty three , eight thousand forty four , eight thousand forty five'

In [ ]:

' '.join([v[x] for x in x1[1]])

Out[ ]:

', eight thousand forty six , eight thousand forty seven , eight thousand forty eight , eight thousand forty nine , eight thousand fifty , eight thousand fifty one , eight thousand fifty two , eight thousand fifty three , eight thousand fifty four , eight thousand fifty five , eight thousand fifty six , eight thousand fifty seven , eight thousand fifty eight , eight thousand fifty nine , eight thousand'

In [ ]:

' '.join([v[x] for x in x2[1]])

Out[ ]:

'sixty , eight thousand sixty one , eight thousand sixty two , eight thousand sixty three , eight thousand sixty four , eight thousand sixty five , eight thousand sixty six , eight thousand sixty seven , eight thousand sixty eight , eight thousand sixty nine , eight thousand seventy , eight thousand seventy one , eight thousand seventy two , eight thousand seventy three , eight thousand seventy four , eight'

In [ ]:

' '.join([v[x] for x in x3[1]])

Out[ ]:

'thousand seventy five , eight thousand seventy six , eight thousand seventy seven , eight thousand seventy eight , eight thousand seventy nine , eight thousand eighty , eight thousand eighty one , eight thousand eighty two , eight thousand eighty three , eight thousand eighty four , eight thousand eighty five , eight thousand eighty six , eight'

In [ ]:

' '.join([v[x] for x in x3[-1]])

Out[ ]:

'seven , nine thousand nine hundred eighty eight , nine thousand nine hundred eighty nine , nine thousand nine hundred ninety , nine thousand nine hundred ninety one , nine thousand nine hundred ninety two , nine thousand nine hundred ninety three , nine thousand nine hundred ninety four , nine thousand nine hundred ninety five , nine thousand'

Single fully connected model¶

In [ ]:

dls = dsets.dataloaders(bs=bs, seq_len=3)

In [ ]:

x,y = dls.one_batch()
x.shape,y.shape

Out[ ]:

(torch.Size([64, 3]), torch.Size([64, 3]))

In [ ]:

nv = len(v); nv

Out[ ]:

In [ ]:

nh=64

In [ ]:

def loss4(input,target): return F.cross_entropy(input, target[:,-1])
def acc4 (input,target): return accuracy(input, target[:,-1])

In [ ]:

class Model0(Module):
    def __init__(self):
        self.i_h = nn.Embedding(nv,nh)  # green arrow
        self.h_h = nn.Linear(nh,nh)     # brown arrow
        self.h_o = nn.Linear(nh,nv)     # blue arrow
        self.bn = nn.BatchNorm1d(nh)
        
    def forward(self, x):
        h = self.bn(F.relu(self.h_h(self.i_h(x[:,0]))))
        if x.shape[1]>1:
            h = h + self.i_h(x[:,1])
            h = self.bn(F.relu(self.h_h(h)))
        if x.shape[1]>2:
            h = h + self.i_h(x[:,2])
            h = self.bn(F.relu(self.h_h(h)))
        return self.h_o(h)

In [ ]:

learn = Learner(dls, Model0(), loss_func=loss4, metrics=acc4)

In [ ]:

learn.fit_one_cycle(6, 1e-4)

epoch	train_loss	valid_loss	acc4	time
0	3.459452	3.417839	0.144213	00:10
1	2.519120	2.569264	0.456250	00:10
2	2.031360	2.176257	0.459722	00:10
3	1.840601	2.040740	0.463657	00:10
4	1.772740	2.000901	0.463657	00:10
5	1.758649	1.995709	0.464120	00:10

Same thing with a loop¶

In [ ]:

class Model1(Module):
    def __init__(self):
        self.i_h = nn.Embedding(nv,nh)  # green arrow
        self.h_h = nn.Linear(nh,nh)     # brown arrow
        self.h_o = nn.Linear(nh,nv)     # blue arrow
        self.bn = nn.BatchNorm1d(nh)
        
    def forward(self, x):
        h = torch.zeros(x.shape[0], nh).to(device=x.device)
        for i in range(x.shape[1]):
            h = h + self.i_h(x[:,i])
            h = self.bn(F.relu(self.h_h(h)))
        return self.h_o(h)

In [ ]:

learn = Learner(dls, Model1(), loss_func=loss4, metrics=acc4)

In [ ]:

learn.fit_one_cycle(6, 1e-4)

epoch	train_loss	valid_loss	acc4	time
0	3.445585	3.383623	0.194213	00:10
1	2.568218	2.707002	0.425694	00:10
2	2.063069	2.317326	0.460185	00:10
3	1.860497	2.152390	0.466667	00:10
4	1.787315	2.100394	0.467593	00:10
5	1.772113	2.092769	0.467593	00:10

Multi fully connected model¶

In [ ]:

dls = dsets.dataloaders(bs=bs, seq_len=20)

In [ ]:

x,y = dls.one_batch()
x.shape,y.shape

Out[ ]:

(torch.Size([64, 20]), torch.Size([64, 20]))

In [ ]:

class Model2(Module):
    def __init__(self):
        self.i_h = nn.Embedding(nv,nh)
        self.h_h = nn.Linear(nh,nh)
        self.h_o = nn.Linear(nh,nv)
        self.bn = nn.BatchNorm1d(nh)
        
    def forward(self, x):
        h = torch.zeros(x.shape[0], nh).to(device=x.device)
        res = []
        for i in range(x.shape[1]):
            h = h + self.i_h(x[:,i])
            h = F.relu(self.h_h(h))
            res.append(self.h_o(self.bn(h)))
        return torch.stack(res, dim=1)

In [ ]:

learn = Learner(dls, Model2(), loss_func=CrossEntropyLossFlat(), metrics=accuracy)

In [ ]:

learn.fit_one_cycle(10, 1e-4, pct_start=0.1)

epoch	train_loss	valid_loss	accuracy	time
0	3.736573	3.754480	0.063566	00:02
1	3.540261	3.523310	0.124826	00:02
2	3.300708	3.304820	0.248735	00:02
3	3.063205	3.128578	0.299777	00:02
4	2.861345	3.009128	0.335367	00:02
5	2.705495	2.929025	0.353894	00:02
6	2.593792	2.878335	0.367832	00:02
7	2.519732	2.850741	0.373140	00:02
8	2.475534	2.840007	0.375546	00:02
9	2.452727	2.838413	0.375918	00:02

Maintain state¶

In [ ]:

class Model3(Module):
    def __init__(self):
        self.i_h = nn.Embedding(nv,nh)
        self.h_h = nn.Linear(nh,nh)
        self.h_o = nn.Linear(nh,nv)
        self.bn = nn.BatchNorm1d(nh)
        self.h = torch.zeros(bs, nh).cuda()
        
    def forward(self, x):
        res = []
        if x.shape[0]!=self.h.shape[0]: self.h = torch.zeros(x.shape[0], nh).cuda()
        h = self.h
        for i in range(x.shape[1]):
            h = h + self.i_h(x[:,i])
            h = F.relu(self.h_h(h))
            res.append(self.bn(h))
        self.h = h.detach()
        res = torch.stack(res, dim=1)
        res = self.h_o(res)
        return res
    
    def reset(self): self.h = torch.zeros(bs, nh).cuda()

In [ ]:

learn = Learner(dls, Model3(), metrics=accuracy, loss_func=CrossEntropyLossFlat())

In [ ]:

learn.fit_one_cycle(20, 3e-3)

epoch	train_loss	valid_loss	accuracy	time
0	3.482397	3.442618	0.139980	00:02
1	2.828804	2.455908	0.417783	00:02
2	2.134592	2.153767	0.315203	00:02
3	1.763576	2.096672	0.316940	00:02
4	1.589015	2.090171	0.317088	00:02
5	1.497501	2.057994	0.331374	00:02
6	1.414305	1.895721	0.441195	00:02
7	1.307273	2.044791	0.437872	00:02
8	1.165429	1.991641	0.461210	00:02
9	1.033335	1.776033	0.542783	00:02
10	0.923316	1.810016	0.564509	00:02
11	0.834117	1.762270	0.565005	00:02
12	0.758906	1.723969	0.591940	00:02
13	0.699892	1.808163	0.578944	00:02
14	0.653839	1.802881	0.592039	00:02
15	0.620560	1.769326	0.614509	00:02
16	0.595637	1.782574	0.616667	00:02
17	0.578359	1.772477	0.623785	00:02
18	0.567210	1.772950	0.623115	00:02
19	0.561052	1.781880	0.621751	00:02

nn.RNN¶

In [ ]:

class Model4(Module):
    def __init__(self):
        self.i_h = nn.Embedding(nv,nh)
        self.rnn = nn.RNN(nh,nh, batch_first=True)
        self.h_o = nn.Linear(nh,nv)
        self.bn = BatchNorm1dFlat(nh)
        self.h = torch.zeros(1, bs, nh).cuda()
        
    def forward(self, x):
        if x.shape[0]!=self.h.shape[1]: self.h = torch.zeros(1, x.shape[0], nh).cuda()
        res,h = self.rnn(self.i_h(x), self.h)
        self.h = h.detach()
        return self.h_o(self.bn(res))

In [ ]:

learn = Learner(dls, Model4(), loss_func=CrossEntropyLossFlat(), metrics=accuracy)

In [ ]:

learn.fit_one_cycle(20, 3e-3)

epoch	train_loss	valid_loss	accuracy	time
0	3.462379	3.272240	0.265749	00:01
1	2.669984	2.254657	0.462872	00:01
2	2.026915	2.119816	0.315923	00:01
3	1.709504	2.164839	0.316964	00:01
4	1.538079	2.037120	0.388790	00:01
5	1.376378	2.241062	0.339459	00:01
6	1.182906	2.094107	0.371429	00:01
7	1.019852	1.614843	0.476141	00:01
8	0.871662	1.549297	0.486880	00:01
9	0.743875	1.525240	0.522867	00:01
10	0.636371	1.434942	0.558606	00:01
11	0.549575	1.398644	0.553646	00:01
12	0.480547	1.357781	0.564410	00:01
13	0.427223	1.290959	0.583606	00:01
14	0.388108	1.209717	0.606944	00:01
15	0.356891	1.256806	0.609722	00:01
16	0.332150	1.269009	0.610045	00:01
17	0.315104	1.244885	0.617956	00:01
18	0.304269	1.261909	0.615501	00:01
19	0.297769	1.279711	0.611533	00:01

2-layer GRU¶

In [ ]:

class Model5(Module):
    def __init__(self):
        self.i_h = nn.Embedding(nv,nh)
        self.rnn = nn.GRU(nh, nh, 2, batch_first=True)
        self.h_o = nn.Linear(nh,nv)
        self.bn = BatchNorm1dFlat(nh)
        self.h = torch.zeros(2, bs, nh).cuda()
        
    def forward(self, x):
        if x.shape[0]!=self.h.shape[1]: self.h = torch.zeros(2, x.shape[0], nh).cuda()
        res,h = self.rnn(self.i_h(x), self.h)
        self.h = h.detach()
        return self.h_o(self.bn(res))

In [ ]:

learn = Learner(dls, Model5(), loss_func=CrossEntropyLossFlat(), metrics=accuracy)

In [ ]:

learn.fit_one_cycle(10, 1e-2)

epoch	train_loss	valid_loss	accuracy	time
0	2.666392	2.114901	0.497594	00:01
1	1.436292	1.357266	0.624330	00:01
2	0.678816	1.007875	0.745387	00:01
3	0.329509	0.735918	0.813219	00:01
4	0.168463	0.633921	0.837922	00:01
5	0.089841	0.612871	0.851290	00:01
6	0.051091	0.690696	0.840972	00:01
7	0.031449	0.706523	0.834896	00:01
8	0.020642	0.633427	0.843948	00:01
9	0.014271	0.636002	0.844072	00:01

fin¶

In [ ]: