#hide
from fastai2.vision.all import *
from utils import *
matplotlib.rc('image', cmap='Greys')
path = untar_data(URLs.MNIST_SAMPLE)
#hide
Path.BASE_PATH = path
path.ls()
(#9) [Path('cleaned.csv'),Path('item_list.txt'),Path('trained_model.pkl'),Path('models'),Path('valid'),Path('labels.csv'),Path('export.pkl'),Path('history.csv'),Path('train')]
(path/'train').ls()
(#2) [Path('train/7'),Path('train/3')]
threes = (path/'train'/'3').ls().sorted()
sevens = (path/'train'/'7').ls().sorted()
threes
(#6131) [Path('train/3/10.png'),Path('train/3/10000.png'),Path('train/3/10011.png'),Path('train/3/10031.png'),Path('train/3/10034.png'),Path('train/3/10042.png'),Path('train/3/10052.png'),Path('train/3/1007.png'),Path('train/3/10074.png'),Path('train/3/10091.png')...]
im3_path = threes[1]
im3 = Image.open(im3_path)
im3
array(im3)[4:10,4:10]
array([[ 0, 0, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 29], [ 0, 0, 0, 48, 166, 224], [ 0, 93, 244, 249, 253, 187], [ 0, 107, 253, 253, 230, 48], [ 0, 3, 20, 20, 15, 0]], dtype=uint8)
tensor(im3)[4:10,4:10]
tensor([[ 0, 0, 0, 0, 0, 0], [ 0, 0, 0, 0, 0, 29], [ 0, 0, 0, 48, 166, 224], [ 0, 93, 244, 249, 253, 187], [ 0, 107, 253, 253, 230, 48], [ 0, 3, 20, 20, 15, 0]], dtype=torch.uint8)
im3_t = tensor(im3)
df = pd.DataFrame(im3_t[4:15,4:22])
df.style.set_properties(**{'font-size':'6pt'}).background_gradient('Greys')
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1 | 0 | 0 | 0 | 0 | 0 | 29 | 150 | 195 | 254 | 255 | 254 | 176 | 193 | 150 | 96 | 0 | 0 | 0 |
2 | 0 | 0 | 0 | 48 | 166 | 224 | 253 | 253 | 234 | 196 | 253 | 253 | 253 | 253 | 233 | 0 | 0 | 0 |
3 | 0 | 93 | 244 | 249 | 253 | 187 | 46 | 10 | 8 | 4 | 10 | 194 | 253 | 253 | 233 | 0 | 0 | 0 |
4 | 0 | 107 | 253 | 253 | 230 | 48 | 0 | 0 | 0 | 0 | 0 | 192 | 253 | 253 | 156 | 0 | 0 | 0 |
5 | 0 | 3 | 20 | 20 | 15 | 0 | 0 | 0 | 0 | 0 | 43 | 224 | 253 | 245 | 74 | 0 | 0 | 0 |
6 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 249 | 253 | 245 | 126 | 0 | 0 | 0 | 0 |
7 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 14 | 101 | 223 | 253 | 248 | 124 | 0 | 0 | 0 | 0 | 0 |
8 | 0 | 0 | 0 | 0 | 0 | 11 | 166 | 239 | 253 | 253 | 253 | 187 | 30 | 0 | 0 | 0 | 0 | 0 |
9 | 0 | 0 | 0 | 0 | 0 | 16 | 248 | 250 | 253 | 253 | 253 | 253 | 232 | 213 | 111 | 2 | 0 | 0 |
10 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 43 | 98 | 98 | 208 | 253 | 253 | 253 | 253 | 187 | 22 | 0 |
seven_tensors = [tensor(Image.open(o)) for o in sevens]
three_tensors = [tensor(Image.open(o)) for o in threes]
len(three_tensors),len(seven_tensors)
(6131, 6265)
show_image(three_tensors[1]);
stacked_sevens = torch.stack(seven_tensors).float()/255
stacked_threes = torch.stack(three_tensors).float()/255
stacked_threes.shape
torch.Size([6131, 28, 28])
len(stacked_threes.shape)
3
stacked_threes.ndim
3
mean3 = stacked_threes.mean(0)
show_image(mean3);
mean7 = stacked_sevens.mean(0)
show_image(mean7);
a_3 = stacked_threes[1]
show_image(a_3);
dist_3_abs = (a_3 - mean3).abs().mean()
dist_3_sqr = ((a_3 - mean3)**2).mean().sqrt()
dist_3_abs,dist_3_sqr
(tensor(0.1114), tensor(0.2021))
dist_7_abs = (a_3 - mean7).abs().mean()
dist_7_sqr = ((a_3 - mean7)**2).mean().sqrt()
dist_7_abs,dist_7_sqr
(tensor(0.1586), tensor(0.3021))
F.l1_loss(a_3.float(),mean7), F.mse_loss(a_3,mean7).sqrt()
(tensor(0.1586), tensor(0.3021))
data = [[1,2,3],[4,5,6]]
arr = array (data)
tns = tensor(data)
arr # numpy
array([[1, 2, 3], [4, 5, 6]])
tns # pytorch
tensor([[1, 2, 3], [4, 5, 6]])
tns[1]
tensor([4, 5, 6])
tns[:,1]
tensor([2, 5])
tns[1,1:3]
tensor([5, 6])
tns+1
tensor([[2, 3, 4], [5, 6, 7]])
tns.type()
'torch.LongTensor'
tns*1.5
tensor([[1.5000, 3.0000, 4.5000], [6.0000, 7.5000, 9.0000]])
valid_3_tens = torch.stack([tensor(Image.open(o)) for o in (path/'valid'/'3').ls()])
valid_3_tens = valid_3_tens.float()/255
valid_7_tens = torch.stack([tensor(Image.open(o)) for o in (path/'valid'/'7').ls()])
valid_7_tens = valid_7_tens.float()/255
valid_3_tens.shape,valid_7_tens.shape
(torch.Size([1010, 28, 28]), torch.Size([1028, 28, 28]))
def mnist_distance(a,b): return (a-b).abs().mean((-1,-2))
mnist_distance(a_3, mean3)
tensor(0.1114)
valid_3_dist = mnist_distance(valid_3_tens, mean3)
valid_3_dist, valid_3_dist.shape
(tensor([0.1050, 0.1526, 0.1186, ..., 0.1122, 0.1170, 0.1086]), torch.Size([1010]))
tensor([1,2,3]) + tensor([1,1,1])
tensor([2, 3, 4])
(valid_3_tens-mean3).shape
torch.Size([1010, 28, 28])
def is_3(x): return mnist_distance(x,mean3) < mnist_distance(x,mean7)
is_3(a_3), is_3(a_3).float()
(tensor(True), tensor(1.))
is_3(valid_3_tens)
tensor([True, True, True, ..., True, True, True])
accuracy_3s = is_3(valid_3_tens).float() .mean()
accuracy_7s = (1 - is_3(valid_7_tens).float()).mean()
accuracy_3s,accuracy_7s,(accuracy_3s+accuracy_7s)/2
(tensor(0.9168), tensor(0.9854), tensor(0.9511))
gv('''
init->predict->loss->gradient->step->stop
step->predict[label=repeat]
''')
def f(x): return x**2
plot_function(f, 'x', 'x**2')
plot_function(f, 'x', 'x**2')
plt.scatter(-1.5, f(-1.5), color='red');
xt = tensor(3.).requires_grad_()
yt = f(xt)
yt
tensor(9., grad_fn=<PowBackward0>)
yt.backward()
xt.grad
tensor(6.)
xt = tensor([3.,4.,10.]).requires_grad_()
xt
tensor([ 3., 4., 10.], requires_grad=True)
def f(x): return (x**2).sum()
yt = f(xt)
yt
tensor(125., grad_fn=<SumBackward0>)
yt.backward()
xt.grad
tensor([ 6., 8., 20.])
def mnist_loss(inputs, targets):
return torch.where(targets==1, 1-inputs, inputs).mean()
tgt = tensor([1,0,1])
inp = tensor([0.9, 0.4, 0.2])
torch.where(tgt==1, 1-inp, inp)
tensor([0.1000, 0.4000, 0.8000])
mnist_loss(inp,tgt)
tensor(0.4333)
mnist_loss(tensor([0.9, 0.4, 0.8]),tgt)
tensor(0.2333)
def sigmoid(x): return 1/(1+torch.exp(-x))
plot_function(torch.sigmoid, title='Sigmoid', min=-4, max=4)
def mnist_loss(inputs, targets):
inputs = inputs.sigmoid()
return torch.where(targets==1, 1-inputs, inputs).mean()
coll = range(15)
dl = DataLoader(coll, batch_size=5, shuffle=True)
list(dl)
[tensor([9, 3, 6, 8, 0]), tensor([13, 1, 14, 4, 12]), tensor([ 7, 11, 2, 5, 10])]
ds = L(enumerate(string.ascii_lowercase))
ds
(#26) [(0, 'a'),(1, 'b'),(2, 'c'),(3, 'd'),(4, 'e'),(5, 'f'),(6, 'g'),(7, 'h'),(8, 'i'),(9, 'j')...]
dl = DataLoader(ds, batch_size=6, shuffle=True)
list(dl)
[(tensor([ 7, 19, 17, 13, 25, 15]), ('h', 't', 'r', 'n', 'z', 'p')), (tensor([11, 9, 23, 21, 3, 16]), ('l', 'j', 'x', 'v', 'd', 'q')), (tensor([12, 2, 18, 22, 14, 24]), ('m', 'c', 's', 'w', 'o', 'y')), (tensor([ 1, 0, 20, 4, 6, 10]), ('b', 'a', 'u', 'e', 'g', 'k')), (tensor([8, 5]), ('i', 'f'))]
train_x = torch.cat([stacked_threes, stacked_sevens]).view(-1, 28*28)
train_y = tensor([1]*len(threes) + [0]*len(sevens)).unsqueeze(1)
train_x.shape,train_y.shape
(torch.Size([12396, 784]), torch.Size([12396, 1]))
dset = list(zip(train_x,train_y))
x,y = dset[0]
x.shape,y
(torch.Size([784]), tensor([1]))
dl = DataLoader(dset, batch_size=256)
xb,yb = first(dl)
xb.shape,yb.shape
(torch.Size([256, 784]), torch.Size([256, 1]))
valid_x = torch.cat([valid_3_tens, valid_7_tens]).view(-1, 28*28)
valid_y = tensor([1]*len(valid_3_tens) + [0]*len(valid_7_tens)).unsqueeze(1)
valid_dset = list(zip(valid_x,valid_y))
valid_dl = DataLoader(valid_dset, batch_size=256)
def init_params(size, std=1.0): return (torch.randn(size)*std).requires_grad_()
weights = init_params((28*28,1))
bias = init_params(1)
(train_x[0]*weights.T).sum() + bias
tensor([4.5118], grad_fn=<AddBackward0>)
batch = train_x[:4]
batch.shape
torch.Size([4, 784])
def linear1(xb): return xb@weights + bias
preds = linear1(batch)
preds
tensor([[ 4.5118], [ 3.6536], [11.2975], [14.1164]], grad_fn=<AddBackward0>)
loss = mnist_loss(preds, train_y[:4])
loss
tensor(0.0090, grad_fn=<MeanBackward0>)
loss.backward()
weights.grad.shape,weights.grad.mean(),bias.grad
(torch.Size([784, 1]), tensor(-0.0013), tensor([-0.0088]))
def calc_grad(xb, yb, model):
preds = model(xb)
loss = mnist_loss(preds, yb)
loss.backward()
calc_grad(batch, train_y[:4], linear1)
weights.grad.mean(),bias.grad
(tensor(-0.0025), tensor([-0.0177]))
calc_grad(batch, train_y[:4], linear1)
weights.grad.mean(),bias.grad
(tensor(-0.0038), tensor([-0.0265]))
weights.grad.zero_()
bias.grad.zero_();
def train_epoch(model, lr, params):
for xb,yb in dl:
calc_grad(xb, yb, model)
for p in params:
p.data -= p.grad*lr
p.grad.zero_()
(preds>0.0).float() == train_y[:4]
tensor([[True], [True], [True], [True]])
def batch_accuracy(xb, yb):
preds = xb.sigmoid()
correct = (preds>0.5) == yb
return correct.float().mean()
batch_accuracy(linear1(batch), train_y[:4])
tensor(1.)
def validate_epoch(model):
accs = [batch_accuracy(model(xb), yb) for xb,yb in valid_dl]
return round(torch.stack(accs).mean().item(), 4)
validate_epoch(linear1)
0.4403
lr = 1.
params = weights,bias
train_epoch(linear1, lr, params)
validate_epoch(linear1)
0.4992
for i in range(20):
train_epoch(linear1, lr, params)
print(validate_epoch(linear1), end=' ')
0.6772 0.8081 0.914 0.9453 0.9565 0.9619 0.9624 0.9633 0.9658 0.9677 0.9702 0.9716 0.9721 0.9736 0.9741 0.9745 0.9765 0.977 0.977 0.9765
linear_model = nn.Linear(28*28,1)
w,b = linear_model.parameters()
w.shape,b.shape
(torch.Size([1, 784]), torch.Size([1]))
class BasicOptim:
def __init__(self,params,lr): self.params,self.lr = list(params),lr
def step(self, *args, **kwargs):
for p in self.params: p.data -= p.grad.data * self.lr
def zero_grad(self, *args, **kwargs):
for p in self.params: p.grad = None
opt = BasicOptim(linear_model.parameters(), lr)
def train_epoch(model):
for xb,yb in dl:
calc_grad(xb, yb, model)
opt.step()
opt.zero_grad()
validate_epoch(linear_model)
0.6714
def train_model(model, epochs):
for i in range(epochs):
train_epoch(model)
print(validate_epoch(model), end=' ')
train_model(linear_model, 20)
0.4932 0.7935 0.8477 0.9165 0.9346 0.9482 0.956 0.9634 0.9658 0.9673 0.9702 0.9717 0.9731 0.9751 0.9756 0.9765 0.9775 0.978 0.9785 0.9785
linear_model = nn.Linear(28*28,1)
opt = SGD(linear_model.parameters(), lr)
train_model(linear_model, 20)
0.4932 0.771 0.8594 0.918 0.9355 0.9492 0.9575 0.9634 0.9658 0.9682 0.9692 0.9717 0.9731 0.9751 0.9756 0.977 0.977 0.9785 0.9785 0.9785
dls = DataLoaders(dl, valid_dl)
learn = Learner(dls, nn.Linear(28*28,1), opt_func=SGD,
loss_func=mnist_loss, metrics=batch_accuracy)
learn.fit(10, lr=lr)
epoch | train_loss | valid_loss | batch_accuracy | time |
---|---|---|---|---|
0 | 0.636918 | 0.503445 | 0.495584 | 00:00 |
1 | 0.500283 | 0.192597 | 0.839549 | 00:00 |
2 | 0.184349 | 0.182295 | 0.833660 | 00:00 |
3 | 0.081278 | 0.107260 | 0.912169 | 00:00 |
4 | 0.043316 | 0.078320 | 0.932777 | 00:00 |
5 | 0.028503 | 0.062712 | 0.946025 | 00:00 |
6 | 0.022414 | 0.052999 | 0.955348 | 00:00 |
7 | 0.019704 | 0.046531 | 0.962218 | 00:00 |
8 | 0.018323 | 0.041979 | 0.965653 | 00:00 |
9 | 0.017486 | 0.038622 | 0.966634 | 00:00 |
def simple_net(xb):
res = xb@w1 + b1
res = res.max(tensor(0.0))
res = res@w2 + b2
return res
w1 = init_params((28*28,30))
b1 = init_params(30)
w2 = init_params((30,1))
b2 = init_params(1)
plot_function(F.relu)
simple_net = nn.Sequential(
nn.Linear(28*28,30),
nn.ReLU(),
nn.Linear(30,1)
)
learn = Learner(dls, simple_net, opt_func=SGD,
loss_func=mnist_loss, metrics=batch_accuracy)
learn.fit(40, 0.1)
epoch | train_loss | valid_loss | batch_accuracy | time |
---|---|---|---|---|
0 | 0.294820 | 0.416238 | 0.504907 | 00:00 |
1 | 0.141692 | 0.216893 | 0.816487 | 00:00 |
2 | 0.079073 | 0.110840 | 0.921001 | 00:00 |
3 | 0.052444 | 0.075782 | 0.941119 | 00:00 |
4 | 0.040078 | 0.059658 | 0.957802 | 00:00 |
5 | 0.033729 | 0.050542 | 0.962709 | 00:00 |
6 | 0.030057 | 0.044751 | 0.965653 | 00:00 |
7 | 0.027653 | 0.040775 | 0.967615 | 00:00 |
8 | 0.025914 | 0.037867 | 0.969087 | 00:00 |
9 | 0.024563 | 0.035642 | 0.970069 | 00:00 |
10 | 0.023465 | 0.033873 | 0.972031 | 00:00 |
11 | 0.022547 | 0.032421 | 0.972031 | 00:00 |
12 | 0.021761 | 0.031202 | 0.973013 | 00:00 |
13 | 0.021081 | 0.030153 | 0.974485 | 00:00 |
14 | 0.020482 | 0.029238 | 0.974485 | 00:00 |
15 | 0.019949 | 0.028429 | 0.975957 | 00:00 |
16 | 0.019472 | 0.027706 | 0.976938 | 00:00 |
17 | 0.019039 | 0.027055 | 0.977429 | 00:00 |
18 | 0.018645 | 0.026466 | 0.977920 | 00:00 |
19 | 0.018283 | 0.025931 | 0.977920 | 00:00 |
20 | 0.017950 | 0.025441 | 0.978901 | 00:00 |
21 | 0.017641 | 0.024991 | 0.979882 | 00:00 |
22 | 0.017353 | 0.024576 | 0.979882 | 00:00 |
23 | 0.017084 | 0.024192 | 0.980373 | 00:00 |
24 | 0.016832 | 0.023837 | 0.980864 | 00:00 |
25 | 0.016595 | 0.023506 | 0.981354 | 00:00 |
26 | 0.016371 | 0.023198 | 0.981354 | 00:00 |
27 | 0.016159 | 0.022910 | 0.981845 | 00:00 |
28 | 0.015959 | 0.022641 | 0.981845 | 00:00 |
29 | 0.015768 | 0.022389 | 0.981845 | 00:00 |
30 | 0.015587 | 0.022154 | 0.981845 | 00:00 |
31 | 0.015414 | 0.021932 | 0.981845 | 00:00 |
32 | 0.015249 | 0.021725 | 0.981845 | 00:00 |
33 | 0.015092 | 0.021529 | 0.982336 | 00:00 |
34 | 0.014941 | 0.021345 | 0.982336 | 00:00 |
35 | 0.014796 | 0.021171 | 0.982826 | 00:00 |
36 | 0.014658 | 0.021007 | 0.982826 | 00:00 |
37 | 0.014524 | 0.020852 | 0.982826 | 00:00 |
38 | 0.014396 | 0.020704 | 0.983317 | 00:00 |
39 | 0.014272 | 0.020564 | 0.983317 | 00:00 |
plt.plot(L(learn.recorder.values).itemgot(2));
learn.recorder.values[-1][2]
0.983316957950592
dls = ImageDataLoaders.from_folder(path)
learn = cnn_learner(dls, resnet18, pretrained=False,
loss_func=F.cross_entropy, metrics=accuracy)
learn.fit_one_cycle(1, 0.1)
epoch | train_loss | valid_loss | accuracy | time |
---|---|---|---|---|
0 | 0.125685 | 0.026256 | 0.992640 | 00:11 |