Human numbers¶

In [ ]:

from fastai.text import *

In [ ]:

bs=64

Data¶

In [ ]:

path = untar_data(URLs.HUMAN_NUMBERS)
path.ls()

Out[ ]:

[PosixPath('/home/ubuntu/.fastai/data/human_numbers/train.txt'),
 PosixPath('/home/ubuntu/.fastai/data/human_numbers/valid.txt')]

In [ ]:

def readnums(d): return [', '.join(o.strip() for o in open(path/d).readlines())]

In [ ]:

train_txt = readnums('train.txt'); train_txt[0][:80]

Out[ ]:

'one, two, three, four, five, six, seven, eight, nine, ten, eleven, twelve, thirt'

In [ ]:

valid_txt = readnums('valid.txt'); valid_txt[0][-80:]

Out[ ]:

' nine thousand nine hundred ninety eight, nine thousand nine hundred ninety nine'

In [ ]:

train = TextList(train_txt, path=path)
valid = TextList(valid_txt, path=path)

src = ItemLists(path=path, train=train, valid=valid).label_for_lm()
data = src.databunch(bs=bs)

In [ ]:

train[0].text[:80]

Out[ ]:

'xxbos one , two , three , four , five , six , seven , eight , nine , ten , eleve'

In [ ]:

len(data.valid_ds[0][0].data)

Out[ ]:

In [ ]:

data.bptt, len(data.valid_dl)

Out[ ]:

(70, 3)

In [ ]:

13017/70/bs

Out[ ]:

2.905580357142857

In [ ]:

it = iter(data.valid_dl)
x1,y1 = next(it)
x2,y2 = next(it)
x3,y3 = next(it)
it.close()

In [ ]:

x1.numel()+x2.numel()+x3.numel()

Out[ ]:

In [ ]:

x1.shape,y1.shape

Out[ ]:

(torch.Size([64, 70]), torch.Size([64, 70]))

In [ ]:

x2.shape,y2.shape

Out[ ]:

(torch.Size([64, 70]), torch.Size([64, 70]))

In [ ]:

x1[:,0]

Out[ ]:

tensor([ 2,  8, 10, 11, 12, 10,  9,  8,  9, 13, 18, 24, 18, 14, 15, 10, 18,  8,
         9,  8, 18, 24, 18, 10, 18, 10,  9,  8, 18, 19, 10, 25, 19, 22, 19, 19,
        23, 19, 10, 13, 10, 10,  8, 13,  8, 19,  9, 19, 34, 16, 10,  9,  8, 16,
         8, 19,  9, 19, 10, 19, 10, 19, 19, 19], device='cuda:0')

In [ ]:

y1[:,0]

Out[ ]:

tensor([18, 18, 26,  9,  8, 11, 31, 18, 25,  9, 10, 14, 10,  9,  8, 14, 10, 18,
        25, 18, 10, 17, 10, 17,  8, 17, 20, 18,  9,  9, 19,  8, 10, 15, 10, 10,
        12, 10, 12,  8, 12, 13, 19,  9, 19, 10, 23, 10,  8,  8, 15, 16, 19,  9,
        19, 10, 23, 10, 18,  8, 18, 10, 10,  9], device='cuda:0')

In [ ]:

v = data.valid_ds.vocab

In [ ]:

v.textify(x1[0])

Out[ ]:

'xxbos eight thousand one , eight thousand two , eight thousand three , eight thousand four , eight thousand five , eight thousand six , eight thousand seven , eight thousand eight , eight thousand nine , eight thousand ten , eight thousand eleven , eight thousand twelve , eight thousand thirteen , eight thousand fourteen , eight thousand fifteen , eight thousand sixteen , eight thousand seventeen , eight'

In [ ]:

v.textify(y1[0])

Out[ ]:

'eight thousand one , eight thousand two , eight thousand three , eight thousand four , eight thousand five , eight thousand six , eight thousand seven , eight thousand eight , eight thousand nine , eight thousand ten , eight thousand eleven , eight thousand twelve , eight thousand thirteen , eight thousand fourteen , eight thousand fifteen , eight thousand sixteen , eight thousand seventeen , eight thousand'

In [ ]:

v.textify(x2[0])

Out[ ]:

'thousand eighteen , eight thousand nineteen , eight thousand twenty , eight thousand twenty one , eight thousand twenty two , eight thousand twenty three , eight thousand twenty four , eight thousand twenty five , eight thousand twenty six , eight thousand twenty seven , eight thousand twenty eight , eight thousand twenty nine , eight thousand thirty , eight thousand thirty one , eight thousand thirty two ,'

In [ ]:

v.textify(x3[0])

Out[ ]:

'eight thousand thirty three , eight thousand thirty four , eight thousand thirty five , eight thousand thirty six , eight thousand thirty seven , eight thousand thirty eight , eight thousand thirty nine , eight thousand forty , eight thousand forty one , eight thousand forty two , eight thousand forty three , eight thousand forty four , eight thousand forty five , eight thousand forty six , eight'

In [ ]:

v.textify(x1[1])

Out[ ]:

', eight thousand forty six , eight thousand forty seven , eight thousand forty eight , eight thousand forty nine , eight thousand fifty , eight thousand fifty one , eight thousand fifty two , eight thousand fifty three , eight thousand fifty four , eight thousand fifty five , eight thousand fifty six , eight thousand fifty seven , eight thousand fifty eight , eight thousand fifty nine ,'

In [ ]:

v.textify(x2[1])

Out[ ]:

'eight thousand sixty , eight thousand sixty one , eight thousand sixty two , eight thousand sixty three , eight thousand sixty four , eight thousand sixty five , eight thousand sixty six , eight thousand sixty seven , eight thousand sixty eight , eight thousand sixty nine , eight thousand seventy , eight thousand seventy one , eight thousand seventy two , eight thousand seventy three , eight thousand'

In [ ]:

v.textify(x3[1])

Out[ ]:

'seventy four , eight thousand seventy five , eight thousand seventy six , eight thousand seventy seven , eight thousand seventy eight , eight thousand seventy nine , eight thousand eighty , eight thousand eighty one , eight thousand eighty two , eight thousand eighty three , eight thousand eighty four , eight thousand eighty five , eight thousand eighty six , eight thousand eighty seven , eight thousand eighty'

In [ ]:

v.textify(x3[-1])

Out[ ]:

'ninety , nine thousand nine hundred ninety one , nine thousand nine hundred ninety two , nine thousand nine hundred ninety three , nine thousand nine hundred ninety four , nine thousand nine hundred ninety five , nine thousand nine hundred ninety six , nine thousand nine hundred ninety seven , nine thousand nine hundred ninety eight , nine thousand nine hundred ninety nine xxbos eight thousand one , eight'

In [ ]:

data.show_batch(ds_type=DatasetType.Valid)

idx	text
0	thousand forty seven , eight thousand forty eight , eight thousand forty nine , eight thousand fifty , eight thousand fifty one , eight thousand fifty two , eight thousand fifty three , eight thousand fifty four , eight thousand fifty five , eight thousand fifty six , eight thousand fifty seven , eight thousand fifty eight , eight thousand fifty nine , eight thousand sixty , eight thousand sixty
1	eight , eight thousand eighty nine , eight thousand ninety , eight thousand ninety one , eight thousand ninety two , eight thousand ninety three , eight thousand ninety four , eight thousand ninety five , eight thousand ninety six , eight thousand ninety seven , eight thousand ninety eight , eight thousand ninety nine , eight thousand one hundred , eight thousand one hundred one , eight thousand one
2	thousand one hundred twenty four , eight thousand one hundred twenty five , eight thousand one hundred twenty six , eight thousand one hundred twenty seven , eight thousand one hundred twenty eight , eight thousand one hundred twenty nine , eight thousand one hundred thirty , eight thousand one hundred thirty one , eight thousand one hundred thirty two , eight thousand one hundred thirty three , eight thousand
3	three , eight thousand one hundred fifty four , eight thousand one hundred fifty five , eight thousand one hundred fifty six , eight thousand one hundred fifty seven , eight thousand one hundred fifty eight , eight thousand one hundred fifty nine , eight thousand one hundred sixty , eight thousand one hundred sixty one , eight thousand one hundred sixty two , eight thousand one hundred sixty three
4	thousand one hundred eighty three , eight thousand one hundred eighty four , eight thousand one hundred eighty five , eight thousand one hundred eighty six , eight thousand one hundred eighty seven , eight thousand one hundred eighty eight , eight thousand one hundred eighty nine , eight thousand one hundred ninety , eight thousand one hundred ninety one , eight thousand one hundred ninety two , eight thousand

Single fully connected model¶

In [ ]:

data = src.databunch(bs=bs, bptt=3)

In [ ]:

x,y = data.one_batch()
x.shape,y.shape

Out[ ]:

(torch.Size([64, 3]), torch.Size([64, 3]))

In [ ]:

nv = len(v.itos); nv

Out[ ]:

In [ ]:

nh=64

In [ ]:

def loss4(input,target): return F.cross_entropy(input, target[:,-1])
def acc4 (input,target): return accuracy(input, target[:,-1])

In [ ]:

class Model0(nn.Module):
    def __init__(self):
        super().__init__()
        self.i_h = nn.Embedding(nv,nh)  # green arrow
        self.h_h = nn.Linear(nh,nh)     # brown arrow
        self.h_o = nn.Linear(nh,nv)     # blue arrow
        self.bn = nn.BatchNorm1d(nh)
        
    def forward(self, x):
        h = self.bn(F.relu(self.h_h(self.i_h(x[:,0]))))
        if x.shape[1]>1:
            h = h + self.i_h(x[:,1])
            h = self.bn(F.relu(self.h_h(h)))
        if x.shape[1]>2:
            h = h + self.i_h(x[:,2])
            h = self.bn(F.relu(self.h_h(h)))
        return self.h_o(h)

In [ ]:

learn = Learner(data, Model0(), loss_func=loss4, metrics=acc4)

In [ ]:

learn.fit_one_cycle(6, 1e-4)

Total time: 00:07

epoch	train_loss	valid_loss	acc4
1	3.596286	3.588869	0.046645
2	3.086100	3.205763	0.274816
3	2.494411	2.749365	0.392004
4	2.144753	2.463537	0.415671
5	2.010915	2.352887	0.409237
6	1.983992	2.336967	0.408778

Same thing with a loop¶

In [ ]:

class Model1(nn.Module):
    def __init__(self):
        super().__init__()
        self.i_h = nn.Embedding(nv,nh)  # green arrow
        self.h_h = nn.Linear(nh,nh)     # brown arrow
        self.h_o = nn.Linear(nh,nv)     # blue arrow
        self.bn = nn.BatchNorm1d(nh)
        
    def forward(self, x):
        h = torch.zeros(x.shape[0], nh).to(device=x.device)
        for i in range(x.shape[1]):
            h = h + self.i_h(x[:,i])
            h = self.bn(F.relu(self.h_h(h)))
        return self.h_o(h)

In [ ]:

learn = Learner(data, Model1(), loss_func=loss4, metrics=acc4)

In [ ]:

learn.fit_one_cycle(6, 1e-4)

Total time: 00:07

epoch	train_loss	valid_loss	acc4
1	3.493525	3.420231	0.156250
2	2.987600	2.937893	0.376149
3	2.440199	2.477995	0.388787
4	2.132837	2.256569	0.391774
5	2.011305	2.181337	0.392923
6	1.985913	2.170874	0.393153

Multi fully connected model¶

In [ ]:

data = src.databunch(bs=bs, bptt=20)

In [ ]:

x,y = data.one_batch()
x.shape,y.shape

Out[ ]:

(torch.Size([64, 20]), torch.Size([64, 20]))

In [ ]:

class Model2(nn.Module):
    def __init__(self):
        super().__init__()
        self.i_h = nn.Embedding(nv,nh)
        self.h_h = nn.Linear(nh,nh)
        self.h_o = nn.Linear(nh,nv)
        self.bn = nn.BatchNorm1d(nh)
        
    def forward(self, x):
        h = torch.zeros(x.shape[0], nh).to(device=x.device)
        res = []
        for i in range(x.shape[1]):
            h = h + self.i_h(x[:,i])
            h = F.relu(self.h_h(h))
            res.append(self.h_o(self.bn(h)))
        return torch.stack(res, dim=1)

In [ ]:

learn = Learner(data, Model2(), metrics=accuracy)

In [ ]:

learn.fit_one_cycle(10, 1e-4, pct_start=0.1)

Total time: 00:06

epoch	train_loss	valid_loss	accuracy
1	3.639285	3.709278	0.058949
2	3.551151	3.565677	0.151776
3	3.439908	3.431850	0.207741
4	3.323083	3.314237	0.283949
5	3.213422	3.219906	0.321662
6	3.119673	3.151162	0.336790
7	3.046645	3.106630	0.341690
8	2.995379	3.082552	0.346662
9	2.963800	3.073327	0.349645
10	2.947312	3.071951	0.349787

Maintain state¶

In [ ]:

class Model3(nn.Module):
    def __init__(self):
        super().__init__()
        self.i_h = nn.Embedding(nv,nh)
        self.h_h = nn.Linear(nh,nh)
        self.h_o = nn.Linear(nh,nv)
        self.bn = nn.BatchNorm1d(nh)
        self.h = torch.zeros(bs, nh).cuda()
        
    def forward(self, x):
        res = []
        h = self.h
        for i in range(x.shape[1]):
            h = h + self.i_h(x[:,i])
            h = F.relu(self.h_h(h))
            res.append(self.bn(h))
        self.h = h.detach()
        res = torch.stack(res, dim=1)
        res = self.h_o(res)
        return res

In [ ]:

learn = Learner(data, Model3(), metrics=accuracy)

In [ ]:

learn.fit_one_cycle(20, 3e-3)

Total time: 00:11

epoch	train_loss	valid_loss	accuracy
1	3.598183	3.556362	0.050710
2	3.274616	2.975699	0.401634
3	2.624206	2.036894	0.467330
4	2.022702	1.956439	0.316193
5	1.681813	1.934952	0.336861
6	1.453007	1.948201	0.351349
7	1.276971	2.005776	0.368679
8	1.138499	2.081261	0.360156
9	1.029217	2.145853	0.360795
10	0.939949	2.215388	0.372230
11	0.865441	2.240438	0.401491
12	0.805310	2.195846	0.409375
13	0.755035	2.324373	0.422727
14	0.713073	2.305542	0.449716
15	0.677393	2.350155	0.446449
16	0.645841	2.418738	0.446591
17	0.621809	2.456903	0.446165
18	0.605300	2.541699	0.443040
19	0.594099	2.539824	0.443040
20	0.587563	2.551423	0.442827

nn.RNN¶

In [ ]:

class Model4(nn.Module):
    def __init__(self):
        super().__init__()
        self.i_h = nn.Embedding(nv,nh)
        self.rnn = nn.RNN(nh,nh, batch_first=True)
        self.h_o = nn.Linear(nh,nv)
        self.bn = BatchNorm1dFlat(nh)
        self.h = torch.zeros(1, bs, nh).cuda()
        
    def forward(self, x):
        res,h = self.rnn(self.i_h(x), self.h)
        self.h = h.detach()
        return self.h_o(self.bn(res))

In [ ]:

learn = Learner(data, Model4(), metrics=accuracy)

In [ ]:

learn.fit_one_cycle(20, 3e-3)

Total time: 00:04

epoch	train_loss	valid_loss	accuracy
1	3.451432	3.268344	0.224148
2	2.974938	2.456569	0.466051
3	2.316732	1.946969	0.465625
4	1.866151	1.991952	0.314702
5	1.618516	1.802403	0.437216
6	1.411517	1.731107	0.436293
7	1.171916	1.655979	0.504048
8	0.965887	1.579963	0.522088
9	0.797046	1.479819	0.565057
10	0.659378	1.487831	0.579048
11	0.553282	1.441922	0.597798
12	0.475167	1.498148	0.600781
13	0.416131	1.546984	0.606463
14	0.372395	1.594261	0.607386
15	0.337093	1.578321	0.613352
16	0.311385	1.580973	0.623366
17	0.292869	1.625745	0.618253
18	0.279486	1.623960	0.626065
19	0.270054	1.682090	0.611719
20	0.263857	1.675676	0.614702

2-layer GRU¶

In [ ]:

class Model5(nn.Module):
    def __init__(self):
        super().__init__()
        self.i_h = nn.Embedding(nv,nh)
        self.rnn = nn.GRU(nh, nh, 2, batch_first=True)
        self.h_o = nn.Linear(nh,nv)
        self.bn = BatchNorm1dFlat(nh)
        self.h = torch.zeros(2, bs, nh).cuda()
        
    def forward(self, x):
        res,h = self.rnn(self.i_h(x), self.h)
        self.h = h.detach()
        return self.h_o(self.bn(res))

In [ ]:

learn = Learner(data, Model5(), metrics=accuracy)

In [ ]:

learn.fit_one_cycle(10, 1e-2)

Total time: 00:02

epoch	train_loss	valid_loss	accuracy
1	2.864854	2.314943	0.454545
2	1.798988	1.357116	0.629688
3	0.932729	1.307463	0.796733
4	0.451969	1.329699	0.788636
5	0.225787	1.293570	0.800142
6	0.118085	1.265926	0.803338
7	0.065306	1.207096	0.806960
8	0.038098	1.205361	0.813920
9	0.024069	1.239411	0.807813
10	0.017078	1.253409	0.807102

fin¶

In [ ]: