DS4420: Fun with autoencoders and self-supervision

Your name:

In [179]:
import numpy as np 
import matplotlib.pyplot as plt

import torch
from torch import nn
# conda install -c pytorch torchvision
import torchvision

# note: if you cannot get torchvision installed 
# using the above sequence, you can resort to 
# the colab version here: 
# -- just be sure to download and then upload
# the notebook to blackboard when complete.
fMNIST = torchvision.datasets.FashionMNIST(
    root = './data/FashionMNIST',
    train = True,
    download = True)   

Once again, we are playing with Fashion-MNIST here, following the last few lectures.

In [180]:
from IPython.display import Image 
from matplotlib.pyplot import imshow
%matplotlib inline
imshow(np.asarray(fMNIST.data[6]), cmap='gray')
Out[180]:
<matplotlib.image.AxesImage at 0x7f8f71d81b70>
In [181]:
X = fMNIST.data
X = np.array([x_i.flatten().numpy() for x_i in X])
X = X / 255 # normalize
X.shape
Out[181]:
(60000, 784)

A brief detour / torch intro (or refresher)

We're going to implement a few autoencoder (AE) variants in torch.

Given that for some of you this may serve as something of an introduction to (or at least refresher for) torch, Here is one way to define and train a simple model.

Note that you can also use the simple Sequential pipeline to build such straightforward models, but this style affords more flexibility (though overkill for something like this).

In [182]:
class SimpleMLP(nn.Module):
    def __init__(self, input_size=784, hidden_size=32, n_labels=10):
        '''
        In the initializer we setup model parameters/layers.
        '''
        super(SimpleMLP, self).__init__() 

        self.input_size = input_size
        self.hidden_size = hidden_size
        self.n_labels = 10
        
        # input layer; from x -> z
        self.i = nn.Linear(self.input_size, self.hidden_size, bias=False)
        # nonlinear activation
        self.a = nn.ReLU()
        # output layer
        self.o = nn.Linear(self.hidden_size, 10)
        self.sm = nn.Softmax()
      
    def forward(self, X):
        '''
        The forward pass defines how inputs flow forward through
        the model (linking layers together).
        '''
        z = self.i(X)
        z = self.a(z)
        y_hat = self.o(z)
        return y_hat

Now to actually train the model, we need to define an optimizer and a loss function.

In [183]:
model = SimpleMLP().float()

from torch import optim
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

loss_function = nn.CrossEntropyLoss() 
In [184]:
y = fMNIST.targets
y
Out[184]:
tensor([9, 0, 0,  ..., 3, 0, 5])
In [185]:
# convert X to a torch tensor
X = torch.tensor(X)

Let's take a look at making predictions and calculating a loss.

In [186]:
# make a prediction for the first 5 instances 
# (note that this is "batched"; we are pushing 
# through 5 instances at once)
y_hat = model(X[:5,:].float())
# calculate loss
loss = loss_function(y_hat, y[:5])
print(loss)
tensor(2.3589, grad_fn=<NllLossBackward>)

And now take some number of passes over our training data, incurring loss, and performing backprop.

In [187]:
EPOCHS = 100
for epoch in range(EPOCHS):  

    running_loss = 0.0
    idx, batch_num = 0, 0
    batch_size = 16
    
    print("")
    while idx < 20000:
        # zero the parameter gradients
        optimizer.zero_grad()
        
        X_batch = X[idx: idx + batch_size].float()
        y_batch = y[idx: idx + batch_size]
        idx += batch_size
        
        # now run our X's forward, get preds, incur
        # loss, backprop, and step the optimizer.
        y_hat_batch = model(X_batch)
        loss = loss_function(y_hat_batch, y_batch)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if batch_num % 100 == 0:
            print("epoch: {}, batch: {} // loss: {:.3f}".format(epoch, batch_num, loss.item()))
            
        batch_num += 1
epoch: 0, batch: 0 // loss: 2.305
epoch: 0, batch: 100 // loss: 1.929
epoch: 0, batch: 200 // loss: 1.576
epoch: 0, batch: 300 // loss: 1.377
epoch: 0, batch: 400 // loss: 1.068
epoch: 0, batch: 500 // loss: 1.100
epoch: 0, batch: 600 // loss: 0.984
epoch: 0, batch: 700 // loss: 0.792
epoch: 0, batch: 800 // loss: 0.861
epoch: 0, batch: 900 // loss: 0.682
epoch: 0, batch: 1000 // loss: 0.832
epoch: 0, batch: 1100 // loss: 0.547
epoch: 0, batch: 1200 // loss: 0.574

epoch: 1, batch: 0 // loss: 0.745
epoch: 1, batch: 100 // loss: 0.593
epoch: 1, batch: 200 // loss: 0.689
epoch: 1, batch: 300 // loss: 0.644
epoch: 1, batch: 400 // loss: 0.532
epoch: 1, batch: 500 // loss: 0.638
epoch: 1, batch: 600 // loss: 0.727
epoch: 1, batch: 700 // loss: 0.405
epoch: 1, batch: 800 // loss: 0.587
epoch: 1, batch: 900 // loss: 0.369
epoch: 1, batch: 1000 // loss: 0.702
epoch: 1, batch: 1100 // loss: 0.356
epoch: 1, batch: 1200 // loss: 0.446

epoch: 2, batch: 0 // loss: 0.595
epoch: 2, batch: 100 // loss: 0.479
epoch: 2, batch: 200 // loss: 0.678
epoch: 2, batch: 300 // loss: 0.514
epoch: 2, batch: 400 // loss: 0.383
epoch: 2, batch: 500 // loss: 0.534
epoch: 2, batch: 600 // loss: 0.659
epoch: 2, batch: 700 // loss: 0.322
epoch: 2, batch: 800 // loss: 0.469
epoch: 2, batch: 900 // loss: 0.271
epoch: 2, batch: 1000 // loss: 0.668
epoch: 2, batch: 1100 // loss: 0.271
epoch: 2, batch: 1200 // loss: 0.404

epoch: 3, batch: 0 // loss: 0.523
epoch: 3, batch: 100 // loss: 0.408
epoch: 3, batch: 200 // loss: 0.694
epoch: 3, batch: 300 // loss: 0.457
epoch: 3, batch: 400 // loss: 0.311
epoch: 3, batch: 500 // loss: 0.498
epoch: 3, batch: 600 // loss: 0.619
epoch: 3, batch: 700 // loss: 0.296
epoch: 3, batch: 800 // loss: 0.407
epoch: 3, batch: 900 // loss: 0.221
epoch: 3, batch: 1000 // loss: 0.658
epoch: 3, batch: 1100 // loss: 0.227
epoch: 3, batch: 1200 // loss: 0.379

epoch: 4, batch: 0 // loss: 0.470
epoch: 4, batch: 100 // loss: 0.372
epoch: 4, batch: 200 // loss: 0.691
epoch: 4, batch: 300 // loss: 0.421
epoch: 4, batch: 400 // loss: 0.277
epoch: 4, batch: 500 // loss: 0.480
epoch: 4, batch: 600 // loss: 0.593
epoch: 4, batch: 700 // loss: 0.285
epoch: 4, batch: 800 // loss: 0.368
epoch: 4, batch: 900 // loss: 0.191
epoch: 4, batch: 1000 // loss: 0.654
epoch: 4, batch: 1100 // loss: 0.201
epoch: 4, batch: 1200 // loss: 0.352

epoch: 5, batch: 0 // loss: 0.428
epoch: 5, batch: 100 // loss: 0.352
epoch: 5, batch: 200 // loss: 0.670
epoch: 5, batch: 300 // loss: 0.397
epoch: 5, batch: 400 // loss: 0.258
epoch: 5, batch: 500 // loss: 0.471
epoch: 5, batch: 600 // loss: 0.576
epoch: 5, batch: 700 // loss: 0.279
epoch: 5, batch: 800 // loss: 0.340
epoch: 5, batch: 900 // loss: 0.173
epoch: 5, batch: 1000 // loss: 0.653
epoch: 5, batch: 1100 // loss: 0.185
epoch: 5, batch: 1200 // loss: 0.330

epoch: 6, batch: 0 // loss: 0.399
epoch: 6, batch: 100 // loss: 0.343
epoch: 6, batch: 200 // loss: 0.645
epoch: 6, batch: 300 // loss: 0.381
epoch: 6, batch: 400 // loss: 0.244
epoch: 6, batch: 500 // loss: 0.459
epoch: 6, batch: 600 // loss: 0.566
epoch: 6, batch: 700 // loss: 0.273
epoch: 6, batch: 800 // loss: 0.322
epoch: 6, batch: 900 // loss: 0.161
epoch: 6, batch: 1000 // loss: 0.650
epoch: 6, batch: 1100 // loss: 0.174
epoch: 6, batch: 1200 // loss: 0.310

epoch: 7, batch: 0 // loss: 0.375
epoch: 7, batch: 100 // loss: 0.336
epoch: 7, batch: 200 // loss: 0.620
epoch: 7, batch: 300 // loss: 0.372
epoch: 7, batch: 400 // loss: 0.234
epoch: 7, batch: 500 // loss: 0.452
epoch: 7, batch: 600 // loss: 0.556
epoch: 7, batch: 700 // loss: 0.265
epoch: 7, batch: 800 // loss: 0.311
epoch: 7, batch: 900 // loss: 0.149
epoch: 7, batch: 1000 // loss: 0.646
epoch: 7, batch: 1100 // loss: 0.166
epoch: 7, batch: 1200 // loss: 0.293

epoch: 8, batch: 0 // loss: 0.358
epoch: 8, batch: 100 // loss: 0.331
epoch: 8, batch: 200 // loss: 0.600
epoch: 8, batch: 300 // loss: 0.357
epoch: 8, batch: 400 // loss: 0.226
epoch: 8, batch: 500 // loss: 0.443
epoch: 8, batch: 600 // loss: 0.546
epoch: 8, batch: 700 // loss: 0.259
epoch: 8, batch: 800 // loss: 0.302
epoch: 8, batch: 900 // loss: 0.139
epoch: 8, batch: 1000 // loss: 0.639
epoch: 8, batch: 1100 // loss: 0.160
epoch: 8, batch: 1200 // loss: 0.280

epoch: 9, batch: 0 // loss: 0.345
epoch: 9, batch: 100 // loss: 0.331
epoch: 9, batch: 200 // loss: 0.582
epoch: 9, batch: 300 // loss: 0.345
epoch: 9, batch: 400 // loss: 0.220
epoch: 9, batch: 500 // loss: 0.432
epoch: 9, batch: 600 // loss: 0.535
epoch: 9, batch: 700 // loss: 0.257
epoch: 9, batch: 800 // loss: 0.295
epoch: 9, batch: 900 // loss: 0.132
epoch: 9, batch: 1000 // loss: 0.633
epoch: 9, batch: 1100 // loss: 0.154
epoch: 9, batch: 1200 // loss: 0.266

epoch: 10, batch: 0 // loss: 0.332
epoch: 10, batch: 100 // loss: 0.330
epoch: 10, batch: 200 // loss: 0.571
epoch: 10, batch: 300 // loss: 0.337
epoch: 10, batch: 400 // loss: 0.213
epoch: 10, batch: 500 // loss: 0.429
epoch: 10, batch: 600 // loss: 0.527
epoch: 10, batch: 700 // loss: 0.256
epoch: 10, batch: 800 // loss: 0.291
epoch: 10, batch: 900 // loss: 0.126
epoch: 10, batch: 1000 // loss: 0.627
epoch: 10, batch: 1100 // loss: 0.151
epoch: 10, batch: 1200 // loss: 0.253

epoch: 11, batch: 0 // loss: 0.323
epoch: 11, batch: 100 // loss: 0.330
epoch: 11, batch: 200 // loss: 0.550
epoch: 11, batch: 300 // loss: 0.328
epoch: 11, batch: 400 // loss: 0.207
epoch: 11, batch: 500 // loss: 0.423
epoch: 11, batch: 600 // loss: 0.517
epoch: 11, batch: 700 // loss: 0.252
epoch: 11, batch: 800 // loss: 0.286
epoch: 11, batch: 900 // loss: 0.122
epoch: 11, batch: 1000 // loss: 0.618
epoch: 11, batch: 1100 // loss: 0.149
epoch: 11, batch: 1200 // loss: 0.245

epoch: 12, batch: 0 // loss: 0.319
epoch: 12, batch: 100 // loss: 0.330
epoch: 12, batch: 200 // loss: 0.537
epoch: 12, batch: 300 // loss: 0.324
epoch: 12, batch: 400 // loss: 0.200
epoch: 12, batch: 500 // loss: 0.414
epoch: 12, batch: 600 // loss: 0.513
epoch: 12, batch: 700 // loss: 0.251
epoch: 12, batch: 800 // loss: 0.284
epoch: 12, batch: 900 // loss: 0.120
epoch: 12, batch: 1000 // loss: 0.609
epoch: 12, batch: 1100 // loss: 0.147
epoch: 12, batch: 1200 // loss: 0.232

epoch: 13, batch: 0 // loss: 0.311
epoch: 13, batch: 100 // loss: 0.328
epoch: 13, batch: 200 // loss: 0.523
epoch: 13, batch: 300 // loss: 0.325
epoch: 13, batch: 400 // loss: 0.194
epoch: 13, batch: 500 // loss: 0.409
epoch: 13, batch: 600 // loss: 0.501
epoch: 13, batch: 700 // loss: 0.248
epoch: 13, batch: 800 // loss: 0.281
epoch: 13, batch: 900 // loss: 0.117
epoch: 13, batch: 1000 // loss: 0.603
epoch: 13, batch: 1100 // loss: 0.145
epoch: 13, batch: 1200 // loss: 0.222

epoch: 14, batch: 0 // loss: 0.307
epoch: 14, batch: 100 // loss: 0.328
epoch: 14, batch: 200 // loss: 0.508
epoch: 14, batch: 300 // loss: 0.322
epoch: 14, batch: 400 // loss: 0.188
epoch: 14, batch: 500 // loss: 0.407
epoch: 14, batch: 600 // loss: 0.490
epoch: 14, batch: 700 // loss: 0.249
epoch: 14, batch: 800 // loss: 0.286
epoch: 14, batch: 900 // loss: 0.112
epoch: 14, batch: 1000 // loss: 0.597
epoch: 14, batch: 1100 // loss: 0.143
epoch: 14, batch: 1200 // loss: 0.215

epoch: 15, batch: 0 // loss: 0.303
epoch: 15, batch: 100 // loss: 0.326
epoch: 15, batch: 200 // loss: 0.487
epoch: 15, batch: 300 // loss: 0.315
epoch: 15, batch: 400 // loss: 0.184
epoch: 15, batch: 500 // loss: 0.405
epoch: 15, batch: 600 // loss: 0.484
epoch: 15, batch: 700 // loss: 0.250
epoch: 15, batch: 800 // loss: 0.284
epoch: 15, batch: 900 // loss: 0.109
epoch: 15, batch: 1000 // loss: 0.593
epoch: 15, batch: 1100 // loss: 0.139
epoch: 15, batch: 1200 // loss: 0.209

epoch: 16, batch: 0 // loss: 0.297
epoch: 16, batch: 100 // loss: 0.325
epoch: 16, batch: 200 // loss: 0.476
epoch: 16, batch: 300 // loss: 0.316
epoch: 16, batch: 400 // loss: 0.180
epoch: 16, batch: 500 // loss: 0.406
epoch: 16, batch: 600 // loss: 0.477
epoch: 16, batch: 700 // loss: 0.247
epoch: 16, batch: 800 // loss: 0.287
epoch: 16, batch: 900 // loss: 0.109
epoch: 16, batch: 1000 // loss: 0.590
epoch: 16, batch: 1100 // loss: 0.135
epoch: 16, batch: 1200 // loss: 0.200

epoch: 17, batch: 0 // loss: 0.294
epoch: 17, batch: 100 // loss: 0.323
epoch: 17, batch: 200 // loss: 0.467
epoch: 17, batch: 300 // loss: 0.314
epoch: 17, batch: 400 // loss: 0.176
epoch: 17, batch: 500 // loss: 0.406
epoch: 17, batch: 600 // loss: 0.471
epoch: 17, batch: 700 // loss: 0.247
epoch: 17, batch: 800 // loss: 0.289
epoch: 17, batch: 900 // loss: 0.106
epoch: 17, batch: 1000 // loss: 0.585
epoch: 17, batch: 1100 // loss: 0.135
epoch: 17, batch: 1200 // loss: 0.198

epoch: 18, batch: 0 // loss: 0.291
epoch: 18, batch: 100 // loss: 0.325
epoch: 18, batch: 200 // loss: 0.458
epoch: 18, batch: 300 // loss: 0.313
epoch: 18, batch: 400 // loss: 0.173
epoch: 18, batch: 500 // loss: 0.412
epoch: 18, batch: 600 // loss: 0.465
epoch: 18, batch: 700 // loss: 0.245
epoch: 18, batch: 800 // loss: 0.287
epoch: 18, batch: 900 // loss: 0.102
epoch: 18, batch: 1000 // loss: 0.576
epoch: 18, batch: 1100 // loss: 0.133
epoch: 18, batch: 1200 // loss: 0.193

epoch: 19, batch: 0 // loss: 0.287
epoch: 19, batch: 100 // loss: 0.324
epoch: 19, batch: 200 // loss: 0.450
epoch: 19, batch: 300 // loss: 0.312
epoch: 19, batch: 400 // loss: 0.169
epoch: 19, batch: 500 // loss: 0.418
epoch: 19, batch: 600 // loss: 0.459
epoch: 19, batch: 700 // loss: 0.241
epoch: 19, batch: 800 // loss: 0.281
epoch: 19, batch: 900 // loss: 0.099
epoch: 19, batch: 1000 // loss: 0.569
epoch: 19, batch: 1100 // loss: 0.133
epoch: 19, batch: 1200 // loss: 0.187

epoch: 20, batch: 0 // loss: 0.287
epoch: 20, batch: 100 // loss: 0.325
epoch: 20, batch: 200 // loss: 0.440
epoch: 20, batch: 300 // loss: 0.309
epoch: 20, batch: 400 // loss: 0.165
epoch: 20, batch: 500 // loss: 0.416
epoch: 20, batch: 600 // loss: 0.453
epoch: 20, batch: 700 // loss: 0.238
epoch: 20, batch: 800 // loss: 0.286
epoch: 20, batch: 900 // loss: 0.096
epoch: 20, batch: 1000 // loss: 0.568
epoch: 20, batch: 1100 // loss: 0.131
epoch: 20, batch: 1200 // loss: 0.185

epoch: 21, batch: 0 // loss: 0.285
epoch: 21, batch: 100 // loss: 0.324
epoch: 21, batch: 200 // loss: 0.434
epoch: 21, batch: 300 // loss: 0.306
epoch: 21, batch: 400 // loss: 0.162
epoch: 21, batch: 500 // loss: 0.418
epoch: 21, batch: 600 // loss: 0.445
epoch: 21, batch: 700 // loss: 0.237
epoch: 21, batch: 800 // loss: 0.286
epoch: 21, batch: 900 // loss: 0.096
epoch: 21, batch: 1000 // loss: 0.566
epoch: 21, batch: 1100 // loss: 0.130
epoch: 21, batch: 1200 // loss: 0.181

epoch: 22, batch: 0 // loss: 0.281
epoch: 22, batch: 100 // loss: 0.320
epoch: 22, batch: 200 // loss: 0.422
epoch: 22, batch: 300 // loss: 0.302
epoch: 22, batch: 400 // loss: 0.160
epoch: 22, batch: 500 // loss: 0.420
epoch: 22, batch: 600 // loss: 0.439
epoch: 22, batch: 700 // loss: 0.235
epoch: 22, batch: 800 // loss: 0.294
epoch: 22, batch: 900 // loss: 0.092
epoch: 22, batch: 1000 // loss: 0.565
epoch: 22, batch: 1100 // loss: 0.128
epoch: 22, batch: 1200 // loss: 0.176

epoch: 23, batch: 0 // loss: 0.285
epoch: 23, batch: 100 // loss: 0.318
epoch: 23, batch: 200 // loss: 0.424
epoch: 23, batch: 300 // loss: 0.298
epoch: 23, batch: 400 // loss: 0.157
epoch: 23, batch: 500 // loss: 0.423
epoch: 23, batch: 600 // loss: 0.436
epoch: 23, batch: 700 // loss: 0.239
epoch: 23, batch: 800 // loss: 0.292
epoch: 23, batch: 900 // loss: 0.091
epoch: 23, batch: 1000 // loss: 0.562
epoch: 23, batch: 1100 // loss: 0.122
epoch: 23, batch: 1200 // loss: 0.172

epoch: 24, batch: 0 // loss: 0.285
epoch: 24, batch: 100 // loss: 0.310
epoch: 24, batch: 200 // loss: 0.418
epoch: 24, batch: 300 // loss: 0.299
epoch: 24, batch: 400 // loss: 0.154
epoch: 24, batch: 500 // loss: 0.425
epoch: 24, batch: 600 // loss: 0.431
epoch: 24, batch: 700 // loss: 0.228
epoch: 24, batch: 800 // loss: 0.309
epoch: 24, batch: 900 // loss: 0.088
epoch: 24, batch: 1000 // loss: 0.561
epoch: 24, batch: 1100 // loss: 0.121
epoch: 24, batch: 1200 // loss: 0.170

epoch: 25, batch: 0 // loss: 0.283
epoch: 25, batch: 100 // loss: 0.309
epoch: 25, batch: 200 // loss: 0.409
epoch: 25, batch: 300 // loss: 0.296
epoch: 25, batch: 400 // loss: 0.151
epoch: 25, batch: 500 // loss: 0.425
epoch: 25, batch: 600 // loss: 0.427
epoch: 25, batch: 700 // loss: 0.220
epoch: 25, batch: 800 // loss: 0.311
epoch: 25, batch: 900 // loss: 0.085
epoch: 25, batch: 1000 // loss: 0.559
epoch: 25, batch: 1100 // loss: 0.121
epoch: 25, batch: 1200 // loss: 0.169

epoch: 26, batch: 0 // loss: 0.286
epoch: 26, batch: 100 // loss: 0.309
epoch: 26, batch: 200 // loss: 0.401
epoch: 26, batch: 300 // loss: 0.291
epoch: 26, batch: 400 // loss: 0.149
epoch: 26, batch: 500 // loss: 0.428
epoch: 26, batch: 600 // loss: 0.423
epoch: 26, batch: 700 // loss: 0.236
epoch: 26, batch: 800 // loss: 0.310
epoch: 26, batch: 900 // loss: 0.084
epoch: 26, batch: 1000 // loss: 0.558
epoch: 26, batch: 1100 // loss: 0.119
epoch: 26, batch: 1200 // loss: 0.166

epoch: 27, batch: 0 // loss: 0.282
epoch: 27, batch: 100 // loss: 0.310
epoch: 27, batch: 200 // loss: 0.394
epoch: 27, batch: 300 // loss: 0.293
epoch: 27, batch: 400 // loss: 0.146
epoch: 27, batch: 500 // loss: 0.428
epoch: 27, batch: 600 // loss: 0.420
epoch: 27, batch: 700 // loss: 0.242
epoch: 27, batch: 800 // loss: 0.312
epoch: 27, batch: 900 // loss: 0.083
epoch: 27, batch: 1000 // loss: 0.558
epoch: 27, batch: 1100 // loss: 0.118
epoch: 27, batch: 1200 // loss: 0.163

epoch: 28, batch: 0 // loss: 0.285
epoch: 28, batch: 100 // loss: 0.310
epoch: 28, batch: 200 // loss: 0.389
epoch: 28, batch: 300 // loss: 0.287
epoch: 28, batch: 400 // loss: 0.144
epoch: 28, batch: 500 // loss: 0.429
epoch: 28, batch: 600 // loss: 0.419
epoch: 28, batch: 700 // loss: 0.242
epoch: 28, batch: 800 // loss: 0.310
epoch: 28, batch: 900 // loss: 0.081
epoch: 28, batch: 1000 // loss: 0.551
epoch: 28, batch: 1100 // loss: 0.115
epoch: 28, batch: 1200 // loss: 0.160

epoch: 29, batch: 0 // loss: 0.282
epoch: 29, batch: 100 // loss: 0.298
epoch: 29, batch: 200 // loss: 0.400
epoch: 29, batch: 300 // loss: 0.283
epoch: 29, batch: 400 // loss: 0.143
epoch: 29, batch: 500 // loss: 0.427
epoch: 29, batch: 600 // loss: 0.420
epoch: 29, batch: 700 // loss: 0.248
epoch: 29, batch: 800 // loss: 0.311
epoch: 29, batch: 900 // loss: 0.080
epoch: 29, batch: 1000 // loss: 0.549
epoch: 29, batch: 1100 // loss: 0.113
epoch: 29, batch: 1200 // loss: 0.157

epoch: 30, batch: 0 // loss: 0.283
epoch: 30, batch: 100 // loss: 0.294
epoch: 30, batch: 200 // loss: 0.406
epoch: 30, batch: 300 // loss: 0.281
epoch: 30, batch: 400 // loss: 0.141
epoch: 30, batch: 500 // loss: 0.421
epoch: 30, batch: 600 // loss: 0.419
epoch: 30, batch: 700 // loss: 0.250
epoch: 30, batch: 800 // loss: 0.312
epoch: 30, batch: 900 // loss: 0.080
epoch: 30, batch: 1000 // loss: 0.548
epoch: 30, batch: 1100 // loss: 0.114
epoch: 30, batch: 1200 // loss: 0.154

epoch: 31, batch: 0 // loss: 0.279
epoch: 31, batch: 100 // loss: 0.290
epoch: 31, batch: 200 // loss: 0.401
epoch: 31, batch: 300 // loss: 0.278
epoch: 31, batch: 400 // loss: 0.140
epoch: 31, batch: 500 // loss: 0.418
epoch: 31, batch: 600 // loss: 0.419
epoch: 31, batch: 700 // loss: 0.251
epoch: 31, batch: 800 // loss: 0.313
epoch: 31, batch: 900 // loss: 0.079
epoch: 31, batch: 1000 // loss: 0.544
epoch: 31, batch: 1100 // loss: 0.114
epoch: 31, batch: 1200 // loss: 0.154

epoch: 32, batch: 0 // loss: 0.281
epoch: 32, batch: 100 // loss: 0.284
epoch: 32, batch: 200 // loss: 0.404
epoch: 32, batch: 300 // loss: 0.277
epoch: 32, batch: 400 // loss: 0.138
epoch: 32, batch: 500 // loss: 0.424
epoch: 32, batch: 600 // loss: 0.419
epoch: 32, batch: 700 // loss: 0.254
epoch: 32, batch: 800 // loss: 0.309
epoch: 32, batch: 900 // loss: 0.077
epoch: 32, batch: 1000 // loss: 0.547
epoch: 32, batch: 1100 // loss: 0.114
epoch: 32, batch: 1200 // loss: 0.151

epoch: 33, batch: 0 // loss: 0.281
epoch: 33, batch: 100 // loss: 0.287
epoch: 33, batch: 200 // loss: 0.402
epoch: 33, batch: 300 // loss: 0.279
epoch: 33, batch: 400 // loss: 0.137
epoch: 33, batch: 500 // loss: 0.424
epoch: 33, batch: 600 // loss: 0.416
epoch: 33, batch: 700 // loss: 0.254
epoch: 33, batch: 800 // loss: 0.313
epoch: 33, batch: 900 // loss: 0.077
epoch: 33, batch: 1000 // loss: 0.543
epoch: 33, batch: 1100 // loss: 0.116
epoch: 33, batch: 1200 // loss: 0.149

epoch: 34, batch: 0 // loss: 0.286
epoch: 34, batch: 100 // loss: 0.278
epoch: 34, batch: 200 // loss: 0.394
epoch: 34, batch: 300 // loss: 0.277
epoch: 34, batch: 400 // loss: 0.137
epoch: 34, batch: 500 // loss: 0.429
epoch: 34, batch: 600 // loss: 0.413
epoch: 34, batch: 700 // loss: 0.257
epoch: 34, batch: 800 // loss: 0.309
epoch: 34, batch: 900 // loss: 0.075
epoch: 34, batch: 1000 // loss: 0.539
epoch: 34, batch: 1100 // loss: 0.116
epoch: 34, batch: 1200 // loss: 0.147

epoch: 35, batch: 0 // loss: 0.279
epoch: 35, batch: 100 // loss: 0.279
epoch: 35, batch: 200 // loss: 0.397
epoch: 35, batch: 300 // loss: 0.277
epoch: 35, batch: 400 // loss: 0.137
epoch: 35, batch: 500 // loss: 0.426
epoch: 35, batch: 600 // loss: 0.409
epoch: 35, batch: 700 // loss: 0.258
epoch: 35, batch: 800 // loss: 0.316
epoch: 35, batch: 900 // loss: 0.075
epoch: 35, batch: 1000 // loss: 0.541
epoch: 35, batch: 1100 // loss: 0.115
epoch: 35, batch: 1200 // loss: 0.145

epoch: 36, batch: 0 // loss: 0.276
epoch: 36, batch: 100 // loss: 0.274
epoch: 36, batch: 200 // loss: 0.400
epoch: 36, batch: 300 // loss: 0.277
epoch: 36, batch: 400 // loss: 0.136
epoch: 36, batch: 500 // loss: 0.434
epoch: 36, batch: 600 // loss: 0.406
epoch: 36, batch: 700 // loss: 0.259
epoch: 36, batch: 800 // loss: 0.315
epoch: 36, batch: 900 // loss: 0.073
epoch: 36, batch: 1000 // loss: 0.540
epoch: 36, batch: 1100 // loss: 0.115
epoch: 36, batch: 1200 // loss: 0.143

epoch: 37, batch: 0 // loss: 0.272
epoch: 37, batch: 100 // loss: 0.269
epoch: 37, batch: 200 // loss: 0.386
epoch: 37, batch: 300 // loss: 0.271
epoch: 37, batch: 400 // loss: 0.138
epoch: 37, batch: 500 // loss: 0.432
epoch: 37, batch: 600 // loss: 0.404
epoch: 37, batch: 700 // loss: 0.265
epoch: 37, batch: 800 // loss: 0.309
epoch: 37, batch: 900 // loss: 0.074
epoch: 37, batch: 1000 // loss: 0.534
epoch: 37, batch: 1100 // loss: 0.116
epoch: 37, batch: 1200 // loss: 0.143

epoch: 38, batch: 0 // loss: 0.272
epoch: 38, batch: 100 // loss: 0.264
epoch: 38, batch: 200 // loss: 0.391
epoch: 38, batch: 300 // loss: 0.269
epoch: 38, batch: 400 // loss: 0.136
epoch: 38, batch: 500 // loss: 0.439
epoch: 38, batch: 600 // loss: 0.402
epoch: 38, batch: 700 // loss: 0.266
epoch: 38, batch: 800 // loss: 0.304
epoch: 38, batch: 900 // loss: 0.072
epoch: 38, batch: 1000 // loss: 0.531
epoch: 38, batch: 1100 // loss: 0.117
epoch: 38, batch: 1200 // loss: 0.140

epoch: 39, batch: 0 // loss: 0.273
epoch: 39, batch: 100 // loss: 0.260
epoch: 39, batch: 200 // loss: 0.385
epoch: 39, batch: 300 // loss: 0.267
epoch: 39, batch: 400 // loss: 0.137
epoch: 39, batch: 500 // loss: 0.435
epoch: 39, batch: 600 // loss: 0.404
epoch: 39, batch: 700 // loss: 0.269
epoch: 39, batch: 800 // loss: 0.303
epoch: 39, batch: 900 // loss: 0.072
epoch: 39, batch: 1000 // loss: 0.525
epoch: 39, batch: 1100 // loss: 0.118
epoch: 39, batch: 1200 // loss: 0.139

epoch: 40, batch: 0 // loss: 0.268
epoch: 40, batch: 100 // loss: 0.256
epoch: 40, batch: 200 // loss: 0.383
epoch: 40, batch: 300 // loss: 0.267
epoch: 40, batch: 400 // loss: 0.136
epoch: 40, batch: 500 // loss: 0.433
epoch: 40, batch: 600 // loss: 0.400
epoch: 40, batch: 700 // loss: 0.277
epoch: 40, batch: 800 // loss: 0.299
epoch: 40, batch: 900 // loss: 0.073
epoch: 40, batch: 1000 // loss: 0.524
epoch: 40, batch: 1100 // loss: 0.118
epoch: 40, batch: 1200 // loss: 0.139

epoch: 41, batch: 0 // loss: 0.268
epoch: 41, batch: 100 // loss: 0.255
epoch: 41, batch: 200 // loss: 0.390
epoch: 41, batch: 300 // loss: 0.267
epoch: 41, batch: 400 // loss: 0.138
epoch: 41, batch: 500 // loss: 0.434
epoch: 41, batch: 600 // loss: 0.396
epoch: 41, batch: 700 // loss: 0.277
epoch: 41, batch: 800 // loss: 0.297
epoch: 41, batch: 900 // loss: 0.070
epoch: 41, batch: 1000 // loss: 0.524
epoch: 41, batch: 1100 // loss: 0.120
epoch: 41, batch: 1200 // loss: 0.137

epoch: 42, batch: 0 // loss: 0.264
epoch: 42, batch: 100 // loss: 0.252
epoch: 42, batch: 200 // loss: 0.388
epoch: 42, batch: 300 // loss: 0.263
epoch: 42, batch: 400 // loss: 0.139
epoch: 42, batch: 500 // loss: 0.431
epoch: 42, batch: 600 // loss: 0.400
epoch: 42, batch: 700 // loss: 0.277
epoch: 42, batch: 800 // loss: 0.294
epoch: 42, batch: 900 // loss: 0.070
epoch: 42, batch: 1000 // loss: 0.530
epoch: 42, batch: 1100 // loss: 0.125
epoch: 42, batch: 1200 // loss: 0.137

epoch: 43, batch: 0 // loss: 0.259
epoch: 43, batch: 100 // loss: 0.247
epoch: 43, batch: 200 // loss: 0.382
epoch: 43, batch: 300 // loss: 0.264
epoch: 43, batch: 400 // loss: 0.138
epoch: 43, batch: 500 // loss: 0.437
epoch: 43, batch: 600 // loss: 0.403
epoch: 43, batch: 700 // loss: 0.267
epoch: 43, batch: 800 // loss: 0.301
epoch: 43, batch: 900 // loss: 0.068
epoch: 43, batch: 1000 // loss: 0.525
epoch: 43, batch: 1100 // loss: 0.124
epoch: 43, batch: 1200 // loss: 0.139

epoch: 44, batch: 0 // loss: 0.258
epoch: 44, batch: 100 // loss: 0.245
epoch: 44, batch: 200 // loss: 0.384
epoch: 44, batch: 300 // loss: 0.261
epoch: 44, batch: 400 // loss: 0.138
epoch: 44, batch: 500 // loss: 0.434
epoch: 44, batch: 600 // loss: 0.404
epoch: 44, batch: 700 // loss: 0.267
epoch: 44, batch: 800 // loss: 0.292
epoch: 44, batch: 900 // loss: 0.072
epoch: 44, batch: 1000 // loss: 0.528
epoch: 44, batch: 1100 // loss: 0.126
epoch: 44, batch: 1200 // loss: 0.137

epoch: 45, batch: 0 // loss: 0.254
epoch: 45, batch: 100 // loss: 0.243
epoch: 45, batch: 200 // loss: 0.371
epoch: 45, batch: 300 // loss: 0.257
epoch: 45, batch: 400 // loss: 0.137
epoch: 45, batch: 500 // loss: 0.436
epoch: 45, batch: 600 // loss: 0.404
epoch: 45, batch: 700 // loss: 0.269
epoch: 45, batch: 800 // loss: 0.286
epoch: 45, batch: 900 // loss: 0.069
epoch: 45, batch: 1000 // loss: 0.531
epoch: 45, batch: 1100 // loss: 0.124
epoch: 45, batch: 1200 // loss: 0.138

epoch: 46, batch: 0 // loss: 0.248
epoch: 46, batch: 100 // loss: 0.243
epoch: 46, batch: 200 // loss: 0.372
epoch: 46, batch: 300 // loss: 0.257
epoch: 46, batch: 400 // loss: 0.137
epoch: 46, batch: 500 // loss: 0.440
epoch: 46, batch: 600 // loss: 0.406
epoch: 46, batch: 700 // loss: 0.262
epoch: 46, batch: 800 // loss: 0.285
epoch: 46, batch: 900 // loss: 0.070
epoch: 46, batch: 1000 // loss: 0.531
epoch: 46, batch: 1100 // loss: 0.127
epoch: 46, batch: 1200 // loss: 0.140

epoch: 47, batch: 0 // loss: 0.247
epoch: 47, batch: 100 // loss: 0.243
epoch: 47, batch: 200 // loss: 0.366
epoch: 47, batch: 300 // loss: 0.255
epoch: 47, batch: 400 // loss: 0.137
epoch: 47, batch: 500 // loss: 0.438
epoch: 47, batch: 600 // loss: 0.407
epoch: 47, batch: 700 // loss: 0.263
epoch: 47, batch: 800 // loss: 0.284
epoch: 47, batch: 900 // loss: 0.068
epoch: 47, batch: 1000 // loss: 0.531
epoch: 47, batch: 1100 // loss: 0.127
epoch: 47, batch: 1200 // loss: 0.140

epoch: 48, batch: 0 // loss: 0.243
epoch: 48, batch: 100 // loss: 0.243
epoch: 48, batch: 200 // loss: 0.361
epoch: 48, batch: 300 // loss: 0.250
epoch: 48, batch: 400 // loss: 0.136
epoch: 48, batch: 500 // loss: 0.440
epoch: 48, batch: 600 // loss: 0.410
epoch: 48, batch: 700 // loss: 0.268
epoch: 48, batch: 800 // loss: 0.279
epoch: 48, batch: 900 // loss: 0.065
epoch: 48, batch: 1000 // loss: 0.528
epoch: 48, batch: 1100 // loss: 0.129
epoch: 48, batch: 1200 // loss: 0.132

epoch: 49, batch: 0 // loss: 0.239
epoch: 49, batch: 100 // loss: 0.240
epoch: 49, batch: 200 // loss: 0.361
epoch: 49, batch: 300 // loss: 0.253
epoch: 49, batch: 400 // loss: 0.135
epoch: 49, batch: 500 // loss: 0.439
epoch: 49, batch: 600 // loss: 0.411
epoch: 49, batch: 700 // loss: 0.271
epoch: 49, batch: 800 // loss: 0.271
epoch: 49, batch: 900 // loss: 0.066
epoch: 49, batch: 1000 // loss: 0.527
epoch: 49, batch: 1100 // loss: 0.130
epoch: 49, batch: 1200 // loss: 0.132

epoch: 50, batch: 0 // loss: 0.231
epoch: 50, batch: 100 // loss: 0.237
epoch: 50, batch: 200 // loss: 0.363
epoch: 50, batch: 300 // loss: 0.245
epoch: 50, batch: 400 // loss: 0.135
epoch: 50, batch: 500 // loss: 0.444
epoch: 50, batch: 600 // loss: 0.409
epoch: 50, batch: 700 // loss: 0.269
epoch: 50, batch: 800 // loss: 0.269
epoch: 50, batch: 900 // loss: 0.065
epoch: 50, batch: 1000 // loss: 0.525
epoch: 50, batch: 1100 // loss: 0.130
epoch: 50, batch: 1200 // loss: 0.136

epoch: 51, batch: 0 // loss: 0.231
epoch: 51, batch: 100 // loss: 0.233
epoch: 51, batch: 200 // loss: 0.350
epoch: 51, batch: 300 // loss: 0.243
epoch: 51, batch: 400 // loss: 0.135
epoch: 51, batch: 500 // loss: 0.444
epoch: 51, batch: 600 // loss: 0.408
epoch: 51, batch: 700 // loss: 0.266
epoch: 51, batch: 800 // loss: 0.269
epoch: 51, batch: 900 // loss: 0.064
epoch: 51, batch: 1000 // loss: 0.524
epoch: 51, batch: 1100 // loss: 0.131
epoch: 51, batch: 1200 // loss: 0.130

epoch: 52, batch: 0 // loss: 0.225
epoch: 52, batch: 100 // loss: 0.232
epoch: 52, batch: 200 // loss: 0.349
epoch: 52, batch: 300 // loss: 0.243
epoch: 52, batch: 400 // loss: 0.135
epoch: 52, batch: 500 // loss: 0.443
epoch: 52, batch: 600 // loss: 0.404
epoch: 52, batch: 700 // loss: 0.268
epoch: 52, batch: 800 // loss: 0.264
epoch: 52, batch: 900 // loss: 0.064
epoch: 52, batch: 1000 // loss: 0.522
epoch: 52, batch: 1100 // loss: 0.133
epoch: 52, batch: 1200 // loss: 0.137

epoch: 53, batch: 0 // loss: 0.224
epoch: 53, batch: 100 // loss: 0.225
epoch: 53, batch: 200 // loss: 0.341
epoch: 53, batch: 300 // loss: 0.236
epoch: 53, batch: 400 // loss: 0.135
epoch: 53, batch: 500 // loss: 0.448
epoch: 53, batch: 600 // loss: 0.403
epoch: 53, batch: 700 // loss: 0.266
epoch: 53, batch: 800 // loss: 0.259
epoch: 53, batch: 900 // loss: 0.064
epoch: 53, batch: 1000 // loss: 0.526
epoch: 53, batch: 1100 // loss: 0.133
epoch: 53, batch: 1200 // loss: 0.134

epoch: 54, batch: 0 // loss: 0.220
epoch: 54, batch: 100 // loss: 0.222
epoch: 54, batch: 200 // loss: 0.352
epoch: 54, batch: 300 // loss: 0.241
epoch: 54, batch: 400 // loss: 0.131
epoch: 54, batch: 500 // loss: 0.440
epoch: 54, batch: 600 // loss: 0.401
epoch: 54, batch: 700 // loss: 0.273
epoch: 54, batch: 800 // loss: 0.257
epoch: 54, batch: 900 // loss: 0.063
epoch: 54, batch: 1000 // loss: 0.527
epoch: 54, batch: 1100 // loss: 0.134
epoch: 54, batch: 1200 // loss: 0.134

epoch: 55, batch: 0 // loss: 0.220
epoch: 55, batch: 100 // loss: 0.226
epoch: 55, batch: 200 // loss: 0.347
epoch: 55, batch: 300 // loss: 0.240
epoch: 55, batch: 400 // loss: 0.131
epoch: 55, batch: 500 // loss: 0.451
epoch: 55, batch: 600 // loss: 0.402
epoch: 55, batch: 700 // loss: 0.268
epoch: 55, batch: 800 // loss: 0.256
epoch: 55, batch: 900 // loss: 0.064
epoch: 55, batch: 1000 // loss: 0.526
epoch: 55, batch: 1100 // loss: 0.131
epoch: 55, batch: 1200 // loss: 0.134

epoch: 56, batch: 0 // loss: 0.218
epoch: 56, batch: 100 // loss: 0.220
epoch: 56, batch: 200 // loss: 0.343
epoch: 56, batch: 300 // loss: 0.234
epoch: 56, batch: 400 // loss: 0.132
epoch: 56, batch: 500 // loss: 0.451
epoch: 56, batch: 600 // loss: 0.400
epoch: 56, batch: 700 // loss: 0.260
epoch: 56, batch: 800 // loss: 0.250
epoch: 56, batch: 900 // loss: 0.063
epoch: 56, batch: 1000 // loss: 0.528
epoch: 56, batch: 1100 // loss: 0.135
epoch: 56, batch: 1200 // loss: 0.133

epoch: 57, batch: 0 // loss: 0.217
epoch: 57, batch: 100 // loss: 0.219
epoch: 57, batch: 200 // loss: 0.343
epoch: 57, batch: 300 // loss: 0.235
epoch: 57, batch: 400 // loss: 0.131
epoch: 57, batch: 500 // loss: 0.443
epoch: 57, batch: 600 // loss: 0.396
epoch: 57, batch: 700 // loss: 0.261
epoch: 57, batch: 800 // loss: 0.242
epoch: 57, batch: 900 // loss: 0.063
epoch: 57, batch: 1000 // loss: 0.524
epoch: 57, batch: 1100 // loss: 0.134
epoch: 57, batch: 1200 // loss: 0.130

epoch: 58, batch: 0 // loss: 0.217
epoch: 58, batch: 100 // loss: 0.220
epoch: 58, batch: 200 // loss: 0.348
epoch: 58, batch: 300 // loss: 0.233
epoch: 58, batch: 400 // loss: 0.131
epoch: 58, batch: 500 // loss: 0.446
epoch: 58, batch: 600 // loss: 0.398
epoch: 58, batch: 700 // loss: 0.258
epoch: 58, batch: 800 // loss: 0.245
epoch: 58, batch: 900 // loss: 0.063
epoch: 58, batch: 1000 // loss: 0.525
epoch: 58, batch: 1100 // loss: 0.133
epoch: 58, batch: 1200 // loss: 0.130

epoch: 59, batch: 0 // loss: 0.218
epoch: 59, batch: 100 // loss: 0.215
epoch: 59, batch: 200 // loss: 0.345
epoch: 59, batch: 300 // loss: 0.233
epoch: 59, batch: 400 // loss: 0.131
epoch: 59, batch: 500 // loss: 0.445
epoch: 59, batch: 600 // loss: 0.399
epoch: 59, batch: 700 // loss: 0.256
epoch: 59, batch: 800 // loss: 0.237
epoch: 59, batch: 900 // loss: 0.064
epoch: 59, batch: 1000 // loss: 0.529
epoch: 59, batch: 1100 // loss: 0.142
epoch: 59, batch: 1200 // loss: 0.127

epoch: 60, batch: 0 // loss: 0.211
epoch: 60, batch: 100 // loss: 0.217
epoch: 60, batch: 200 // loss: 0.335
epoch: 60, batch: 300 // loss: 0.233
epoch: 60, batch: 400 // loss: 0.130
epoch: 60, batch: 500 // loss: 0.450
epoch: 60, batch: 600 // loss: 0.397
epoch: 60, batch: 700 // loss: 0.255
epoch: 60, batch: 800 // loss: 0.235
epoch: 60, batch: 900 // loss: 0.063
epoch: 60, batch: 1000 // loss: 0.522
epoch: 60, batch: 1100 // loss: 0.139
epoch: 60, batch: 1200 // loss: 0.130

epoch: 61, batch: 0 // loss: 0.208
epoch: 61, batch: 100 // loss: 0.220
epoch: 61, batch: 200 // loss: 0.329
epoch: 61, batch: 300 // loss: 0.229
epoch: 61, batch: 400 // loss: 0.132
epoch: 61, batch: 500 // loss: 0.449
epoch: 61, batch: 600 // loss: 0.394
epoch: 61, batch: 700 // loss: 0.253
epoch: 61, batch: 800 // loss: 0.229
epoch: 61, batch: 900 // loss: 0.062
epoch: 61, batch: 1000 // loss: 0.523
epoch: 61, batch: 1100 // loss: 0.139
epoch: 61, batch: 1200 // loss: 0.127

epoch: 62, batch: 0 // loss: 0.204
epoch: 62, batch: 100 // loss: 0.215
epoch: 62, batch: 200 // loss: 0.331
epoch: 62, batch: 300 // loss: 0.229
epoch: 62, batch: 400 // loss: 0.131
epoch: 62, batch: 500 // loss: 0.449
epoch: 62, batch: 600 // loss: 0.391
epoch: 62, batch: 700 // loss: 0.251
epoch: 62, batch: 800 // loss: 0.226
epoch: 62, batch: 900 // loss: 0.062
epoch: 62, batch: 1000 // loss: 0.522
epoch: 62, batch: 1100 // loss: 0.141
epoch: 62, batch: 1200 // loss: 0.129

epoch: 63, batch: 0 // loss: 0.207
epoch: 63, batch: 100 // loss: 0.215
epoch: 63, batch: 200 // loss: 0.323
epoch: 63, batch: 300 // loss: 0.230
epoch: 63, batch: 400 // loss: 0.130
epoch: 63, batch: 500 // loss: 0.455
epoch: 63, batch: 600 // loss: 0.391
epoch: 63, batch: 700 // loss: 0.245
epoch: 63, batch: 800 // loss: 0.224
epoch: 63, batch: 900 // loss: 0.060
epoch: 63, batch: 1000 // loss: 0.520
epoch: 63, batch: 1100 // loss: 0.140
epoch: 63, batch: 1200 // loss: 0.127

epoch: 64, batch: 0 // loss: 0.206
epoch: 64, batch: 100 // loss: 0.211
epoch: 64, batch: 200 // loss: 0.316
epoch: 64, batch: 300 // loss: 0.224
epoch: 64, batch: 400 // loss: 0.132
epoch: 64, batch: 500 // loss: 0.453
epoch: 64, batch: 600 // loss: 0.388
epoch: 64, batch: 700 // loss: 0.244
epoch: 64, batch: 800 // loss: 0.219
epoch: 64, batch: 900 // loss: 0.059
epoch: 64, batch: 1000 // loss: 0.506
epoch: 64, batch: 1100 // loss: 0.140
epoch: 64, batch: 1200 // loss: 0.127

epoch: 65, batch: 0 // loss: 0.202
epoch: 65, batch: 100 // loss: 0.211
epoch: 65, batch: 200 // loss: 0.308
epoch: 65, batch: 300 // loss: 0.226
epoch: 65, batch: 400 // loss: 0.131
epoch: 65, batch: 500 // loss: 0.453
epoch: 65, batch: 600 // loss: 0.385
epoch: 65, batch: 700 // loss: 0.245
epoch: 65, batch: 800 // loss: 0.219
epoch: 65, batch: 900 // loss: 0.059
epoch: 65, batch: 1000 // loss: 0.518
epoch: 65, batch: 1100 // loss: 0.142
epoch: 65, batch: 1200 // loss: 0.128

epoch: 66, batch: 0 // loss: 0.203
epoch: 66, batch: 100 // loss: 0.205
epoch: 66, batch: 200 // loss: 0.305
epoch: 66, batch: 300 // loss: 0.226
epoch: 66, batch: 400 // loss: 0.130
epoch: 66, batch: 500 // loss: 0.443
epoch: 66, batch: 600 // loss: 0.383
epoch: 66, batch: 700 // loss: 0.242
epoch: 66, batch: 800 // loss: 0.215
epoch: 66, batch: 900 // loss: 0.058
epoch: 66, batch: 1000 // loss: 0.509
epoch: 66, batch: 1100 // loss: 0.140
epoch: 66, batch: 1200 // loss: 0.130

epoch: 67, batch: 0 // loss: 0.201
epoch: 67, batch: 100 // loss: 0.210
epoch: 67, batch: 200 // loss: 0.303
epoch: 67, batch: 300 // loss: 0.223
epoch: 67, batch: 400 // loss: 0.129
epoch: 67, batch: 500 // loss: 0.439
epoch: 67, batch: 600 // loss: 0.379
epoch: 67, batch: 700 // loss: 0.237
epoch: 67, batch: 800 // loss: 0.207
epoch: 67, batch: 900 // loss: 0.057
epoch: 67, batch: 1000 // loss: 0.505
epoch: 67, batch: 1100 // loss: 0.140
epoch: 67, batch: 1200 // loss: 0.125

epoch: 68, batch: 0 // loss: 0.202
epoch: 68, batch: 100 // loss: 0.210
epoch: 68, batch: 200 // loss: 0.297
epoch: 68, batch: 300 // loss: 0.218
epoch: 68, batch: 400 // loss: 0.130
epoch: 68, batch: 500 // loss: 0.439
epoch: 68, batch: 600 // loss: 0.378
epoch: 68, batch: 700 // loss: 0.245
epoch: 68, batch: 800 // loss: 0.208
epoch: 68, batch: 900 // loss: 0.058
epoch: 68, batch: 1000 // loss: 0.504
epoch: 68, batch: 1100 // loss: 0.140
epoch: 68, batch: 1200 // loss: 0.126

epoch: 69, batch: 0 // loss: 0.197
epoch: 69, batch: 100 // loss: 0.205
epoch: 69, batch: 200 // loss: 0.295
epoch: 69, batch: 300 // loss: 0.221
epoch: 69, batch: 400 // loss: 0.133
epoch: 69, batch: 500 // loss: 0.441
epoch: 69, batch: 600 // loss: 0.377
epoch: 69, batch: 700 // loss: 0.235
epoch: 69, batch: 800 // loss: 0.202
epoch: 69, batch: 900 // loss: 0.057
epoch: 69, batch: 1000 // loss: 0.499
epoch: 69, batch: 1100 // loss: 0.140
epoch: 69, batch: 1200 // loss: 0.127

epoch: 70, batch: 0 // loss: 0.199
epoch: 70, batch: 100 // loss: 0.204
epoch: 70, batch: 200 // loss: 0.289
epoch: 70, batch: 300 // loss: 0.228
epoch: 70, batch: 400 // loss: 0.133
epoch: 70, batch: 500 // loss: 0.440
epoch: 70, batch: 600 // loss: 0.372
epoch: 70, batch: 700 // loss: 0.236
epoch: 70, batch: 800 // loss: 0.205
epoch: 70, batch: 900 // loss: 0.057
epoch: 70, batch: 1000 // loss: 0.507
epoch: 70, batch: 1100 // loss: 0.137
epoch: 70, batch: 1200 // loss: 0.127

epoch: 71, batch: 0 // loss: 0.198
epoch: 71, batch: 100 // loss: 0.199
epoch: 71, batch: 200 // loss: 0.278
epoch: 71, batch: 300 // loss: 0.218
epoch: 71, batch: 400 // loss: 0.133
epoch: 71, batch: 500 // loss: 0.441
epoch: 71, batch: 600 // loss: 0.370
epoch: 71, batch: 700 // loss: 0.238
epoch: 71, batch: 800 // loss: 0.204
epoch: 71, batch: 900 // loss: 0.056
epoch: 71, batch: 1000 // loss: 0.511
epoch: 71, batch: 1100 // loss: 0.137
epoch: 71, batch: 1200 // loss: 0.123

epoch: 72, batch: 0 // loss: 0.197
epoch: 72, batch: 100 // loss: 0.197
epoch: 72, batch: 200 // loss: 0.274
epoch: 72, batch: 300 // loss: 0.215
epoch: 72, batch: 400 // loss: 0.133
epoch: 72, batch: 500 // loss: 0.426
epoch: 72, batch: 600 // loss: 0.366
epoch: 72, batch: 700 // loss: 0.238
epoch: 72, batch: 800 // loss: 0.196
epoch: 72, batch: 900 // loss: 0.057
epoch: 72, batch: 1000 // loss: 0.493
epoch: 72, batch: 1100 // loss: 0.139
epoch: 72, batch: 1200 // loss: 0.125

epoch: 73, batch: 0 // loss: 0.198
epoch: 73, batch: 100 // loss: 0.199
epoch: 73, batch: 200 // loss: 0.273
epoch: 73, batch: 300 // loss: 0.213
epoch: 73, batch: 400 // loss: 0.133
epoch: 73, batch: 500 // loss: 0.421
epoch: 73, batch: 600 // loss: 0.365
epoch: 73, batch: 700 // loss: 0.240
epoch: 73, batch: 800 // loss: 0.196
epoch: 73, batch: 900 // loss: 0.058
epoch: 73, batch: 1000 // loss: 0.507
epoch: 73, batch: 1100 // loss: 0.137
epoch: 73, batch: 1200 // loss: 0.123

epoch: 74, batch: 0 // loss: 0.197
epoch: 74, batch: 100 // loss: 0.196
epoch: 74, batch: 200 // loss: 0.262
epoch: 74, batch: 300 // loss: 0.214
epoch: 74, batch: 400 // loss: 0.131
epoch: 74, batch: 500 // loss: 0.422
epoch: 74, batch: 600 // loss: 0.359
epoch: 74, batch: 700 // loss: 0.235
epoch: 74, batch: 800 // loss: 0.197
epoch: 74, batch: 900 // loss: 0.057
epoch: 74, batch: 1000 // loss: 0.490
epoch: 74, batch: 1100 // loss: 0.139
epoch: 74, batch: 1200 // loss: 0.125

epoch: 75, batch: 0 // loss: 0.194
epoch: 75, batch: 100 // loss: 0.193
epoch: 75, batch: 200 // loss: 0.263
epoch: 75, batch: 300 // loss: 0.209
epoch: 75, batch: 400 // loss: 0.132
epoch: 75, batch: 500 // loss: 0.414
epoch: 75, batch: 600 // loss: 0.362
epoch: 75, batch: 700 // loss: 0.239
epoch: 75, batch: 800 // loss: 0.197
epoch: 75, batch: 900 // loss: 0.058
epoch: 75, batch: 1000 // loss: 0.506
epoch: 75, batch: 1100 // loss: 0.131
epoch: 75, batch: 1200 // loss: 0.124

epoch: 76, batch: 0 // loss: 0.189
epoch: 76, batch: 100 // loss: 0.193
epoch: 76, batch: 200 // loss: 0.262
epoch: 76, batch: 300 // loss: 0.213
epoch: 76, batch: 400 // loss: 0.132
epoch: 76, batch: 500 // loss: 0.418
epoch: 76, batch: 600 // loss: 0.359
epoch: 76, batch: 700 // loss: 0.232
epoch: 76, batch: 800 // loss: 0.193
epoch: 76, batch: 900 // loss: 0.057
epoch: 76, batch: 1000 // loss: 0.490
epoch: 76, batch: 1100 // loss: 0.137
epoch: 76, batch: 1200 // loss: 0.122

epoch: 77, batch: 0 // loss: 0.192
epoch: 77, batch: 100 // loss: 0.195
epoch: 77, batch: 200 // loss: 0.259
epoch: 77, batch: 300 // loss: 0.209
epoch: 77, batch: 400 // loss: 0.136
epoch: 77, batch: 500 // loss: 0.421
epoch: 77, batch: 600 // loss: 0.355
epoch: 77, batch: 700 // loss: 0.235
epoch: 77, batch: 800 // loss: 0.194
epoch: 77, batch: 900 // loss: 0.059
epoch: 77, batch: 1000 // loss: 0.507
epoch: 77, batch: 1100 // loss: 0.133
epoch: 77, batch: 1200 // loss: 0.121

epoch: 78, batch: 0 // loss: 0.186
epoch: 78, batch: 100 // loss: 0.191
epoch: 78, batch: 200 // loss: 0.258
epoch: 78, batch: 300 // loss: 0.210
epoch: 78, batch: 400 // loss: 0.135
epoch: 78, batch: 500 // loss: 0.414
epoch: 78, batch: 600 // loss: 0.354
epoch: 78, batch: 700 // loss: 0.233
epoch: 78, batch: 800 // loss: 0.190
epoch: 78, batch: 900 // loss: 0.060
epoch: 78, batch: 1000 // loss: 0.503
epoch: 78, batch: 1100 // loss: 0.139
epoch: 78, batch: 1200 // loss: 0.123

epoch: 79, batch: 0 // loss: 0.187
epoch: 79, batch: 100 // loss: 0.187
epoch: 79, batch: 200 // loss: 0.252
epoch: 79, batch: 300 // loss: 0.213
epoch: 79, batch: 400 // loss: 0.136
epoch: 79, batch: 500 // loss: 0.417
epoch: 79, batch: 600 // loss: 0.351
epoch: 79, batch: 700 // loss: 0.229
epoch: 79, batch: 800 // loss: 0.191
epoch: 79, batch: 900 // loss: 0.061
epoch: 79, batch: 1000 // loss: 0.501
epoch: 79, batch: 1100 // loss: 0.133
epoch: 79, batch: 1200 // loss: 0.124

epoch: 80, batch: 0 // loss: 0.191
epoch: 80, batch: 100 // loss: 0.189
epoch: 80, batch: 200 // loss: 0.245
epoch: 80, batch: 300 // loss: 0.213
epoch: 80, batch: 400 // loss: 0.137
epoch: 80, batch: 500 // loss: 0.412
epoch: 80, batch: 600 // loss: 0.348
epoch: 80, batch: 700 // loss: 0.226
epoch: 80, batch: 800 // loss: 0.189
epoch: 80, batch: 900 // loss: 0.061
epoch: 80, batch: 1000 // loss: 0.484
epoch: 80, batch: 1100 // loss: 0.137
epoch: 80, batch: 1200 // loss: 0.126

epoch: 81, batch: 0 // loss: 0.189
epoch: 81, batch: 100 // loss: 0.184
epoch: 81, batch: 200 // loss: 0.251
epoch: 81, batch: 300 // loss: 0.211
epoch: 81, batch: 400 // loss: 0.135
epoch: 81, batch: 500 // loss: 0.410
epoch: 81, batch: 600 // loss: 0.341
epoch: 81, batch: 700 // loss: 0.224
epoch: 81, batch: 800 // loss: 0.181
epoch: 81, batch: 900 // loss: 0.063
epoch: 81, batch: 1000 // loss: 0.477
epoch: 81, batch: 1100 // loss: 0.134
epoch: 81, batch: 1200 // loss: 0.127

epoch: 82, batch: 0 // loss: 0.185
epoch: 82, batch: 100 // loss: 0.182
epoch: 82, batch: 200 // loss: 0.249
epoch: 82, batch: 300 // loss: 0.206
epoch: 82, batch: 400 // loss: 0.137
epoch: 82, batch: 500 // loss: 0.402
epoch: 82, batch: 600 // loss: 0.342
epoch: 82, batch: 700 // loss: 0.220
epoch: 82, batch: 800 // loss: 0.182
epoch: 82, batch: 900 // loss: 0.063
epoch: 82, batch: 1000 // loss: 0.494
epoch: 82, batch: 1100 // loss: 0.137
epoch: 82, batch: 1200 // loss: 0.121

epoch: 83, batch: 0 // loss: 0.189
epoch: 83, batch: 100 // loss: 0.176
epoch: 83, batch: 200 // loss: 0.249
epoch: 83, batch: 300 // loss: 0.204
epoch: 83, batch: 400 // loss: 0.137
epoch: 83, batch: 500 // loss: 0.400
epoch: 83, batch: 600 // loss: 0.332
epoch: 83, batch: 700 // loss: 0.220
epoch: 83, batch: 800 // loss: 0.177
epoch: 83, batch: 900 // loss: 0.064
epoch: 83, batch: 1000 // loss: 0.487
epoch: 83, batch: 1100 // loss: 0.129
epoch: 83, batch: 1200 // loss: 0.121

epoch: 84, batch: 0 // loss: 0.188
epoch: 84, batch: 100 // loss: 0.177
epoch: 84, batch: 200 // loss: 0.239
epoch: 84, batch: 300 // loss: 0.202
epoch: 84, batch: 400 // loss: 0.137
epoch: 84, batch: 500 // loss: 0.405
epoch: 84, batch: 600 // loss: 0.330
epoch: 84, batch: 700 // loss: 0.216
epoch: 84, batch: 800 // loss: 0.176
epoch: 84, batch: 900 // loss: 0.064
epoch: 84, batch: 1000 // loss: 0.488
epoch: 84, batch: 1100 // loss: 0.136
epoch: 84, batch: 1200 // loss: 0.122

epoch: 85, batch: 0 // loss: 0.185
epoch: 85, batch: 100 // loss: 0.175
epoch: 85, batch: 200 // loss: 0.241
epoch: 85, batch: 300 // loss: 0.207
epoch: 85, batch: 400 // loss: 0.139
epoch: 85, batch: 500 // loss: 0.398
epoch: 85, batch: 600 // loss: 0.325
epoch: 85, batch: 700 // loss: 0.219
epoch: 85, batch: 800 // loss: 0.170
epoch: 85, batch: 900 // loss: 0.067
epoch: 85, batch: 1000 // loss: 0.471
epoch: 85, batch: 1100 // loss: 0.138
epoch: 85, batch: 1200 // loss: 0.123

epoch: 86, batch: 0 // loss: 0.181
epoch: 86, batch: 100 // loss: 0.177
epoch: 86, batch: 200 // loss: 0.237
epoch: 86, batch: 300 // loss: 0.210
epoch: 86, batch: 400 // loss: 0.141
epoch: 86, batch: 500 // loss: 0.394
epoch: 86, batch: 600 // loss: 0.321
epoch: 86, batch: 700 // loss: 0.206
epoch: 86, batch: 800 // loss: 0.165
epoch: 86, batch: 900 // loss: 0.064
epoch: 86, batch: 1000 // loss: 0.481
epoch: 86, batch: 1100 // loss: 0.130
epoch: 86, batch: 1200 // loss: 0.122

epoch: 87, batch: 0 // loss: 0.184
epoch: 87, batch: 100 // loss: 0.180
epoch: 87, batch: 200 // loss: 0.240
epoch: 87, batch: 300 // loss: 0.208
epoch: 87, batch: 400 // loss: 0.140
epoch: 87, batch: 500 // loss: 0.382
epoch: 87, batch: 600 // loss: 0.323
epoch: 87, batch: 700 // loss: 0.211
epoch: 87, batch: 800 // loss: 0.164
epoch: 87, batch: 900 // loss: 0.064
epoch: 87, batch: 1000 // loss: 0.480
epoch: 87, batch: 1100 // loss: 0.136
epoch: 87, batch: 1200 // loss: 0.126

epoch: 88, batch: 0 // loss: 0.182
epoch: 88, batch: 100 // loss: 0.179
epoch: 88, batch: 200 // loss: 0.231
epoch: 88, batch: 300 // loss: 0.205
epoch: 88, batch: 400 // loss: 0.140
epoch: 88, batch: 500 // loss: 0.388
epoch: 88, batch: 600 // loss: 0.317
epoch: 88, batch: 700 // loss: 0.214
epoch: 88, batch: 800 // loss: 0.166
epoch: 88, batch: 900 // loss: 0.067
epoch: 88, batch: 1000 // loss: 0.466
epoch: 88, batch: 1100 // loss: 0.134
epoch: 88, batch: 1200 // loss: 0.124

epoch: 89, batch: 0 // loss: 0.186
epoch: 89, batch: 100 // loss: 0.175
epoch: 89, batch: 200 // loss: 0.224
epoch: 89, batch: 300 // loss: 0.209
epoch: 89, batch: 400 // loss: 0.138
epoch: 89, batch: 500 // loss: 0.385
epoch: 89, batch: 600 // loss: 0.315
epoch: 89, batch: 700 // loss: 0.215
epoch: 89, batch: 800 // loss: 0.162
epoch: 89, batch: 900 // loss: 0.065
epoch: 89, batch: 1000 // loss: 0.461
epoch: 89, batch: 1100 // loss: 0.133
epoch: 89, batch: 1200 // loss: 0.124

epoch: 90, batch: 0 // loss: 0.179
epoch: 90, batch: 100 // loss: 0.175
epoch: 90, batch: 200 // loss: 0.225
epoch: 90, batch: 300 // loss: 0.207
epoch: 90, batch: 400 // loss: 0.140
epoch: 90, batch: 500 // loss: 0.379
epoch: 90, batch: 600 // loss: 0.308
epoch: 90, batch: 700 // loss: 0.208
epoch: 90, batch: 800 // loss: 0.156
epoch: 90, batch: 900 // loss: 0.065
epoch: 90, batch: 1000 // loss: 0.473
epoch: 90, batch: 1100 // loss: 0.131
epoch: 90, batch: 1200 // loss: 0.125

epoch: 91, batch: 0 // loss: 0.174
epoch: 91, batch: 100 // loss: 0.173
epoch: 91, batch: 200 // loss: 0.222
epoch: 91, batch: 300 // loss: 0.200
epoch: 91, batch: 400 // loss: 0.139
epoch: 91, batch: 500 // loss: 0.379
epoch: 91, batch: 600 // loss: 0.303
epoch: 91, batch: 700 // loss: 0.199
epoch: 91, batch: 800 // loss: 0.153
epoch: 91, batch: 900 // loss: 0.067
epoch: 91, batch: 1000 // loss: 0.455
epoch: 91, batch: 1100 // loss: 0.124
epoch: 91, batch: 1200 // loss: 0.123

epoch: 92, batch: 0 // loss: 0.176
epoch: 92, batch: 100 // loss: 0.175
epoch: 92, batch: 200 // loss: 0.230
epoch: 92, batch: 300 // loss: 0.205
epoch: 92, batch: 400 // loss: 0.139
epoch: 92, batch: 500 // loss: 0.376
epoch: 92, batch: 600 // loss: 0.295
epoch: 92, batch: 700 // loss: 0.199
epoch: 92, batch: 800 // loss: 0.155
epoch: 92, batch: 900 // loss: 0.068
epoch: 92, batch: 1000 // loss: 0.455
epoch: 92, batch: 1100 // loss: 0.129
epoch: 92, batch: 1200 // loss: 0.123

epoch: 93, batch: 0 // loss: 0.177
epoch: 93, batch: 100 // loss: 0.176
epoch: 93, batch: 200 // loss: 0.216
epoch: 93, batch: 300 // loss: 0.205
epoch: 93, batch: 400 // loss: 0.137
epoch: 93, batch: 500 // loss: 0.380
epoch: 93, batch: 600 // loss: 0.298
epoch: 93, batch: 700 // loss: 0.211
epoch: 93, batch: 800 // loss: 0.155
epoch: 93, batch: 900 // loss: 0.067
epoch: 93, batch: 1000 // loss: 0.464
epoch: 93, batch: 1100 // loss: 0.127
epoch: 93, batch: 1200 // loss: 0.124

epoch: 94, batch: 0 // loss: 0.171
epoch: 94, batch: 100 // loss: 0.175
epoch: 94, batch: 200 // loss: 0.228
epoch: 94, batch: 300 // loss: 0.206
epoch: 94, batch: 400 // loss: 0.139
epoch: 94, batch: 500 // loss: 0.372
epoch: 94, batch: 600 // loss: 0.292
epoch: 94, batch: 700 // loss: 0.207
epoch: 94, batch: 800 // loss: 0.156
epoch: 94, batch: 900 // loss: 0.067
epoch: 94, batch: 1000 // loss: 0.446
epoch: 94, batch: 1100 // loss: 0.125
epoch: 94, batch: 1200 // loss: 0.120

epoch: 95, batch: 0 // loss: 0.178
epoch: 95, batch: 100 // loss: 0.172
epoch: 95, batch: 200 // loss: 0.229
epoch: 95, batch: 300 // loss: 0.205
epoch: 95, batch: 400 // loss: 0.138
epoch: 95, batch: 500 // loss: 0.365
epoch: 95, batch: 600 // loss: 0.292
epoch: 95, batch: 700 // loss: 0.204
epoch: 95, batch: 800 // loss: 0.155
epoch: 95, batch: 900 // loss: 0.068
epoch: 95, batch: 1000 // loss: 0.463
epoch: 95, batch: 1100 // loss: 0.128
epoch: 95, batch: 1200 // loss: 0.123

epoch: 96, batch: 0 // loss: 0.172
epoch: 96, batch: 100 // loss: 0.178
epoch: 96, batch: 200 // loss: 0.225
epoch: 96, batch: 300 // loss: 0.207
epoch: 96, batch: 400 // loss: 0.140
epoch: 96, batch: 500 // loss: 0.372
epoch: 96, batch: 600 // loss: 0.289
epoch: 96, batch: 700 // loss: 0.199
epoch: 96, batch: 800 // loss: 0.154
epoch: 96, batch: 900 // loss: 0.067
epoch: 96, batch: 1000 // loss: 0.439
epoch: 96, batch: 1100 // loss: 0.124
epoch: 96, batch: 1200 // loss: 0.125

epoch: 97, batch: 0 // loss: 0.177
epoch: 97, batch: 100 // loss: 0.172
epoch: 97, batch: 200 // loss: 0.231
epoch: 97, batch: 300 // loss: 0.212
epoch: 97, batch: 400 // loss: 0.139
epoch: 97, batch: 500 // loss: 0.364
epoch: 97, batch: 600 // loss: 0.285
epoch: 97, batch: 700 // loss: 0.204
epoch: 97, batch: 800 // loss: 0.155
epoch: 97, batch: 900 // loss: 0.066
epoch: 97, batch: 1000 // loss: 0.446
epoch: 97, batch: 1100 // loss: 0.126
epoch: 97, batch: 1200 // loss: 0.124

epoch: 98, batch: 0 // loss: 0.185
epoch: 98, batch: 100 // loss: 0.177
epoch: 98, batch: 200 // loss: 0.226
epoch: 98, batch: 300 // loss: 0.211
epoch: 98, batch: 400 // loss: 0.140
epoch: 98, batch: 500 // loss: 0.361
epoch: 98, batch: 600 // loss: 0.286
epoch: 98, batch: 700 // loss: 0.198
epoch: 98, batch: 800 // loss: 0.153
epoch: 98, batch: 900 // loss: 0.066
epoch: 98, batch: 1000 // loss: 0.460
epoch: 98, batch: 1100 // loss: 0.120
epoch: 98, batch: 1200 // loss: 0.126

epoch: 99, batch: 0 // loss: 0.178
epoch: 99, batch: 100 // loss: 0.176
epoch: 99, batch: 200 // loss: 0.223
epoch: 99, batch: 300 // loss: 0.201
epoch: 99, batch: 400 // loss: 0.140
epoch: 99, batch: 500 // loss: 0.357
epoch: 99, batch: 600 // loss: 0.282
epoch: 99, batch: 700 // loss: 0.197
epoch: 99, batch: 800 // loss: 0.150
epoch: 99, batch: 900 // loss: 0.067
epoch: 99, batch: 1000 // loss: 0.450
epoch: 99, batch: 1100 // loss: 0.120
epoch: 99, batch: 1200 // loss: 0.123

OK! Now let's come back to auto-encoders

TODO 1

Implement a simple autoencoder in torch. In particular, let's start with a vanilla linear auto-encoder, mapping to two dimensions in the hidden space.

In [192]:
class AE(nn.Module):
    
    def __init__(self, input_size=784, hidden_size=2):
        '''
        In the initializer we setup model parameters/layers.
        '''
        super(AE, self).__init__() 

        ### REMOVE BELOW
        self.input_size = input_size
        self.hidden_size = hidden_size
       
        # input layer; from x -> z
        self.i = nn.Linear(self.input_size, self.hidden_size)
        
        # output layer
        self.o = nn.Linear(self.hidden_size, self.input_size)
        

    def forward(self, X, return_z=False):
        ### REMOVE BELOW
        z = self.i(X)
        if return_z:
            return z
        return self.o(z)
In [193]:
auto = AE(hidden_size=50)
X_tilde = auto(X[:5,:].float())
X_tilde.shape
Out[193]:
torch.Size([5, 784])

TODO 2

Define a training loop -- follow the above example.

hint check out: https://pytorch.org/docs/stable/nn.html#loss-functions

In [194]:
def train_AE(X_in, X_target, model, optimizer, loss_function, EPOCHS=10):
    for epoch in range(EPOCHS):  
        idx, batch_num = 0, 0
        batch_size = 16

        print("")
        while idx < 60000:
            # zero the parameter gradients
            optimizer.zero_grad()

            X_batch = X_in[idx: idx + batch_size].float()
            X_target_batch = X_target[idx: idx + batch_size].float()
            idx += batch_size

            # now run our X's forward, get preds, incur
            # loss, backprop, and step the optimizer.
            X_tilde_batch = model(X_batch)
            loss = loss_function(X_tilde_batch, X_target_batch)
            loss.backward()
            optimizer.step()

            # print out loss
            if batch_num % 100 == 0:
                print("epoch: {}, batch: {} // loss: {:.3f}".format(epoch, batch_num, loss.item()))

            batch_num += 1
In [195]:
loss_function = nn.MSELoss()
auto = AE()
optimizer = optim.SGD(auto.parameters(), lr=0.01, momentum=0.9)

train_AE(X, X, auto, optimizer, loss_function, EPOCHS=50)
epoch: 0, batch: 0 // loss: 0.383
epoch: 0, batch: 100 // loss: 0.357
epoch: 0, batch: 200 // loss: 0.321
epoch: 0, batch: 300 // loss: 0.303
epoch: 0, batch: 400 // loss: 0.298
epoch: 0, batch: 500 // loss: 0.288
epoch: 0, batch: 600 // loss: 0.284
epoch: 0, batch: 700 // loss: 0.283
epoch: 0, batch: 800 // loss: 0.248
epoch: 0, batch: 900 // loss: 0.278
epoch: 0, batch: 1000 // loss: 0.245
epoch: 0, batch: 1100 // loss: 0.265
epoch: 0, batch: 1200 // loss: 0.218
epoch: 0, batch: 1300 // loss: 0.243
epoch: 0, batch: 1400 // loss: 0.203
epoch: 0, batch: 1500 // loss: 0.199
epoch: 0, batch: 1600 // loss: 0.214
epoch: 0, batch: 1700 // loss: 0.196
epoch: 0, batch: 1800 // loss: 0.219
epoch: 0, batch: 1900 // loss: 0.192
epoch: 0, batch: 2000 // loss: 0.168
epoch: 0, batch: 2100 // loss: 0.169
epoch: 0, batch: 2200 // loss: 0.195
epoch: 0, batch: 2300 // loss: 0.171
epoch: 0, batch: 2400 // loss: 0.138
epoch: 0, batch: 2500 // loss: 0.140
epoch: 0, batch: 2600 // loss: 0.172
epoch: 0, batch: 2700 // loss: 0.134
epoch: 0, batch: 2800 // loss: 0.167
epoch: 0, batch: 2900 // loss: 0.124
epoch: 0, batch: 3000 // loss: 0.134
epoch: 0, batch: 3100 // loss: 0.151
epoch: 0, batch: 3200 // loss: 0.116
epoch: 0, batch: 3300 // loss: 0.126
epoch: 0, batch: 3400 // loss: 0.123
epoch: 0, batch: 3500 // loss: 0.127
epoch: 0, batch: 3600 // loss: 0.131
epoch: 0, batch: 3700 // loss: 0.140

epoch: 1, batch: 0 // loss: 0.134
epoch: 1, batch: 100 // loss: 0.127
epoch: 1, batch: 200 // loss: 0.124
epoch: 1, batch: 300 // loss: 0.114
epoch: 1, batch: 400 // loss: 0.116
epoch: 1, batch: 500 // loss: 0.109
epoch: 1, batch: 600 // loss: 0.111
epoch: 1, batch: 700 // loss: 0.115
epoch: 1, batch: 800 // loss: 0.105
epoch: 1, batch: 900 // loss: 0.125
epoch: 1, batch: 1000 // loss: 0.103
epoch: 1, batch: 1100 // loss: 0.120
epoch: 1, batch: 1200 // loss: 0.099
epoch: 1, batch: 1300 // loss: 0.116
epoch: 1, batch: 1400 // loss: 0.097
epoch: 1, batch: 1500 // loss: 0.097
epoch: 1, batch: 1600 // loss: 0.109
epoch: 1, batch: 1700 // loss: 0.105
epoch: 1, batch: 1800 // loss: 0.114
epoch: 1, batch: 1900 // loss: 0.102
epoch: 1, batch: 2000 // loss: 0.090
epoch: 1, batch: 2100 // loss: 0.097
epoch: 1, batch: 2200 // loss: 0.114
epoch: 1, batch: 2300 // loss: 0.101
epoch: 1, batch: 2400 // loss: 0.084
epoch: 1, batch: 2500 // loss: 0.083
epoch: 1, batch: 2600 // loss: 0.106
epoch: 1, batch: 2700 // loss: 0.083
epoch: 1, batch: 2800 // loss: 0.108
epoch: 1, batch: 2900 // loss: 0.080
epoch: 1, batch: 3000 // loss: 0.088
epoch: 1, batch: 3100 // loss: 0.098
epoch: 1, batch: 3200 // loss: 0.081
epoch: 1, batch: 3300 // loss: 0.086
epoch: 1, batch: 3400 // loss: 0.082
epoch: 1, batch: 3500 // loss: 0.089
epoch: 1, batch: 3600 // loss: 0.095
epoch: 1, batch: 3700 // loss: 0.101

epoch: 2, batch: 0 // loss: 0.103
epoch: 2, batch: 100 // loss: 0.091
epoch: 2, batch: 200 // loss: 0.098
epoch: 2, batch: 300 // loss: 0.089
epoch: 2, batch: 400 // loss: 0.090
epoch: 2, batch: 500 // loss: 0.083
epoch: 2, batch: 600 // loss: 0.085
epoch: 2, batch: 700 // loss: 0.089
epoch: 2, batch: 800 // loss: 0.087
epoch: 2, batch: 900 // loss: 0.101
epoch: 2, batch: 1000 // loss: 0.082
epoch: 2, batch: 1100 // loss: 0.095
epoch: 2, batch: 1200 // loss: 0.084
epoch: 2, batch: 1300 // loss: 0.096
epoch: 2, batch: 1400 // loss: 0.083
epoch: 2, batch: 1500 // loss: 0.083
epoch: 2, batch: 1600 // loss: 0.093
epoch: 2, batch: 1700 // loss: 0.093
epoch: 2, batch: 1800 // loss: 0.095
epoch: 2, batch: 1900 // loss: 0.088
epoch: 2, batch: 2000 // loss: 0.079
epoch: 2, batch: 2100 // loss: 0.088
epoch: 2, batch: 2200 // loss: 0.100
epoch: 2, batch: 2300 // loss: 0.091
epoch: 2, batch: 2400 // loss: 0.080
epoch: 2, batch: 2500 // loss: 0.076
epoch: 2, batch: 2600 // loss: 0.095
epoch: 2, batch: 2700 // loss: 0.077
epoch: 2, batch: 2800 // loss: 0.098
epoch: 2, batch: 2900 // loss: 0.076
epoch: 2, batch: 3000 // loss: 0.083
epoch: 2, batch: 3100 // loss: 0.090
epoch: 2, batch: 3200 // loss: 0.079
epoch: 2, batch: 3300 // loss: 0.080
epoch: 2, batch: 3400 // loss: 0.076
epoch: 2, batch: 3500 // loss: 0.083
epoch: 2, batch: 3600 // loss: 0.091
epoch: 2, batch: 3700 // loss: 0.094

epoch: 3, batch: 0 // loss: 0.099
epoch: 3, batch: 100 // loss: 0.085
epoch: 3, batch: 200 // loss: 0.095
epoch: 3, batch: 300 // loss: 0.086
epoch: 3, batch: 400 // loss: 0.087
epoch: 3, batch: 500 // loss: 0.079
epoch: 3, batch: 600 // loss: 0.081
epoch: 3, batch: 700 // loss: 0.085
epoch: 3, batch: 800 // loss: 0.085
epoch: 3, batch: 900 // loss: 0.096
epoch: 3, batch: 1000 // loss: 0.078
epoch: 3, batch: 1100 // loss: 0.090
epoch: 3, batch: 1200 // loss: 0.082
epoch: 3, batch: 1300 // loss: 0.092
epoch: 3, batch: 1400 // loss: 0.082
epoch: 3, batch: 1500 // loss: 0.082
epoch: 3, batch: 1600 // loss: 0.090
epoch: 3, batch: 1700 // loss: 0.091
epoch: 3, batch: 1800 // loss: 0.092
epoch: 3, batch: 1900 // loss: 0.086
epoch: 3, batch: 2000 // loss: 0.078
epoch: 3, batch: 2100 // loss: 0.088
epoch: 3, batch: 2200 // loss: 0.098
epoch: 3, batch: 2300 // loss: 0.090
epoch: 3, batch: 2400 // loss: 0.081
epoch: 3, batch: 2500 // loss: 0.076
epoch: 3, batch: 2600 // loss: 0.093
epoch: 3, batch: 2700 // loss: 0.077
epoch: 3, batch: 2800 // loss: 0.096
epoch: 3, batch: 2900 // loss: 0.076
epoch: 3, batch: 3000 // loss: 0.082
epoch: 3, batch: 3100 // loss: 0.088
epoch: 3, batch: 3200 // loss: 0.079
epoch: 3, batch: 3300 // loss: 0.079
epoch: 3, batch: 3400 // loss: 0.076
epoch: 3, batch: 3500 // loss: 0.082
epoch: 3, batch: 3600 // loss: 0.090
epoch: 3, batch: 3700 // loss: 0.092

epoch: 4, batch: 0 // loss: 0.098
epoch: 4, batch: 100 // loss: 0.084
epoch: 4, batch: 200 // loss: 0.095
epoch: 4, batch: 300 // loss: 0.086
epoch: 4, batch: 400 // loss: 0.087
epoch: 4, batch: 500 // loss: 0.078
epoch: 4, batch: 600 // loss: 0.081
epoch: 4, batch: 700 // loss: 0.084
epoch: 4, batch: 800 // loss: 0.085
epoch: 4, batch: 900 // loss: 0.096
epoch: 4, batch: 1000 // loss: 0.077
epoch: 4, batch: 1100 // loss: 0.089
epoch: 4, batch: 1200 // loss: 0.082
epoch: 4, batch: 1300 // loss: 0.091
epoch: 4, batch: 1400 // loss: 0.082
epoch: 4, batch: 1500 // loss: 0.082
epoch: 4, batch: 1600 // loss: 0.090
epoch: 4, batch: 1700 // loss: 0.091
epoch: 4, batch: 1800 // loss: 0.090
epoch: 4, batch: 1900 // loss: 0.085
epoch: 4, batch: 2000 // loss: 0.078
epoch: 4, batch: 2100 // loss: 0.088
epoch: 4, batch: 2200 // loss: 0.097
epoch: 4, batch: 2300 // loss: 0.089
epoch: 4, batch: 2400 // loss: 0.081
epoch: 4, batch: 2500 // loss: 0.076
epoch: 4, batch: 2600 // loss: 0.092
epoch: 4, batch: 2700 // loss: 0.077
epoch: 4, batch: 2800 // loss: 0.096
epoch: 4, batch: 2900 // loss: 0.076
epoch: 4, batch: 3000 // loss: 0.082
epoch: 4, batch: 3100 // loss: 0.087
epoch: 4, batch: 3200 // loss: 0.080
epoch: 4, batch: 3300 // loss: 0.079
epoch: 4, batch: 3400 // loss: 0.075
epoch: 4, batch: 3500 // loss: 0.082
epoch: 4, batch: 3600 // loss: 0.090
epoch: 4, batch: 3700 // loss: 0.092

epoch: 5, batch: 0 // loss: 0.098
epoch: 5, batch: 100 // loss: 0.083
epoch: 5, batch: 200 // loss: 0.095
epoch: 5, batch: 300 // loss: 0.086
epoch: 5, batch: 400 // loss: 0.087
epoch: 5, batch: 500 // loss: 0.078
epoch: 5, batch: 600 // loss: 0.080
epoch: 5, batch: 700 // loss: 0.084
epoch: 5, batch: 800 // loss: 0.085
epoch: 5, batch: 900 // loss: 0.095
epoch: 5, batch: 1000 // loss: 0.077
epoch: 5, batch: 1100 // loss: 0.088
epoch: 5, batch: 1200 // loss: 0.082
epoch: 5, batch: 1300 // loss: 0.091
epoch: 5, batch: 1400 // loss: 0.082
epoch: 5, batch: 1500 // loss: 0.082
epoch: 5, batch: 1600 // loss: 0.090
epoch: 5, batch: 1700 // loss: 0.091
epoch: 5, batch: 1800 // loss: 0.090
epoch: 5, batch: 1900 // loss: 0.085
epoch: 5, batch: 2000 // loss: 0.078
epoch: 5, batch: 2100 // loss: 0.088
epoch: 5, batch: 2200 // loss: 0.097
epoch: 5, batch: 2300 // loss: 0.089
epoch: 5, batch: 2400 // loss: 0.081
epoch: 5, batch: 2500 // loss: 0.076
epoch: 5, batch: 2600 // loss: 0.092
epoch: 5, batch: 2700 // loss: 0.077
epoch: 5, batch: 2800 // loss: 0.096
epoch: 5, batch: 2900 // loss: 0.076
epoch: 5, batch: 3000 // loss: 0.082
epoch: 5, batch: 3100 // loss: 0.087
epoch: 5, batch: 3200 // loss: 0.080
epoch: 5, batch: 3300 // loss: 0.079
epoch: 5, batch: 3400 // loss: 0.075
epoch: 5, batch: 3500 // loss: 0.082
epoch: 5, batch: 3600 // loss: 0.090
epoch: 5, batch: 3700 // loss: 0.092

epoch: 6, batch: 0 // loss: 0.098
epoch: 6, batch: 100 // loss: 0.083
epoch: 6, batch: 200 // loss: 0.095
epoch: 6, batch: 300 // loss: 0.086
epoch: 6, batch: 400 // loss: 0.087
epoch: 6, batch: 500 // loss: 0.078
epoch: 6, batch: 600 // loss: 0.080
epoch: 6, batch: 700 // loss: 0.083
epoch: 6, batch: 800 // loss: 0.085
epoch: 6, batch: 900 // loss: 0.095
epoch: 6, batch: 1000 // loss: 0.077
epoch: 6, batch: 1100 // loss: 0.088
epoch: 6, batch: 1200 // loss: 0.082
epoch: 6, batch: 1300 // loss: 0.091
epoch: 6, batch: 1400 // loss: 0.082
epoch: 6, batch: 1500 // loss: 0.082
epoch: 6, batch: 1600 // loss: 0.089
epoch: 6, batch: 1700 // loss: 0.091
epoch: 6, batch: 1800 // loss: 0.090
epoch: 6, batch: 1900 // loss: 0.085
epoch: 6, batch: 2000 // loss: 0.078
epoch: 6, batch: 2100 // loss: 0.088
epoch: 6, batch: 2200 // loss: 0.096
epoch: 6, batch: 2300 // loss: 0.089
epoch: 6, batch: 2400 // loss: 0.081
epoch: 6, batch: 2500 // loss: 0.076
epoch: 6, batch: 2600 // loss: 0.092
epoch: 6, batch: 2700 // loss: 0.077
epoch: 6, batch: 2800 // loss: 0.095
epoch: 6, batch: 2900 // loss: 0.076
epoch: 6, batch: 3000 // loss: 0.082
epoch: 6, batch: 3100 // loss: 0.087
epoch: 6, batch: 3200 // loss: 0.080
epoch: 6, batch: 3300 // loss: 0.079
epoch: 6, batch: 3400 // loss: 0.075
epoch: 6, batch: 3500 // loss: 0.081
epoch: 6, batch: 3600 // loss: 0.090
epoch: 6, batch: 3700 // loss: 0.092

epoch: 7, batch: 0 // loss: 0.098
epoch: 7, batch: 100 // loss: 0.083
epoch: 7, batch: 200 // loss: 0.095
epoch: 7, batch: 300 // loss: 0.086
epoch: 7, batch: 400 // loss: 0.087
epoch: 7, batch: 500 // loss: 0.078
epoch: 7, batch: 600 // loss: 0.080
epoch: 7, batch: 700 // loss: 0.083
epoch: 7, batch: 800 // loss: 0.085
epoch: 7, batch: 900 // loss: 0.095
epoch: 7, batch: 1000 // loss: 0.077
epoch: 7, batch: 1100 // loss: 0.088
epoch: 7, batch: 1200 // loss: 0.082
epoch: 7, batch: 1300 // loss: 0.090
epoch: 7, batch: 1400 // loss: 0.082
epoch: 7, batch: 1500 // loss: 0.082
epoch: 7, batch: 1600 // loss: 0.089
epoch: 7, batch: 1700 // loss: 0.090
epoch: 7, batch: 1800 // loss: 0.090
epoch: 7, batch: 1900 // loss: 0.085
epoch: 7, batch: 2000 // loss: 0.078
epoch: 7, batch: 2100 // loss: 0.088
epoch: 7, batch: 2200 // loss: 0.096
epoch: 7, batch: 2300 // loss: 0.089
epoch: 7, batch: 2400 // loss: 0.081
epoch: 7, batch: 2500 // loss: 0.076
epoch: 7, batch: 2600 // loss: 0.091
epoch: 7, batch: 2700 // loss: 0.077
epoch: 7, batch: 2800 // loss: 0.095
epoch: 7, batch: 2900 // loss: 0.076
epoch: 7, batch: 3000 // loss: 0.082
epoch: 7, batch: 3100 // loss: 0.087
epoch: 7, batch: 3200 // loss: 0.080
epoch: 7, batch: 3300 // loss: 0.079
epoch: 7, batch: 3400 // loss: 0.075
epoch: 7, batch: 3500 // loss: 0.081
epoch: 7, batch: 3600 // loss: 0.089
epoch: 7, batch: 3700 // loss: 0.091

epoch: 8, batch: 0 // loss: 0.098
epoch: 8, batch: 100 // loss: 0.082
epoch: 8, batch: 200 // loss: 0.095
epoch: 8, batch: 300 // loss: 0.086
epoch: 8, batch: 400 // loss: 0.087
epoch: 8, batch: 500 // loss: 0.078
epoch: 8, batch: 600 // loss: 0.080
epoch: 8, batch: 700 // loss: 0.083
epoch: 8, batch: 800 // loss: 0.085
epoch: 8, batch: 900 // loss: 0.094
epoch: 8, batch: 1000 // loss: 0.077
epoch: 8, batch: 1100 // loss: 0.088
epoch: 8, batch: 1200 // loss: 0.082
epoch: 8, batch: 1300 // loss: 0.090
epoch: 8, batch: 1400 // loss: 0.082
epoch: 8, batch: 1500 // loss: 0.081
epoch: 8, batch: 1600 // loss: 0.089
epoch: 8, batch: 1700 // loss: 0.090
epoch: 8, batch: 1800 // loss: 0.089
epoch: 8, batch: 1900 // loss: 0.085
epoch: 8, batch: 2000 // loss: 0.078
epoch: 8, batch: 2100 // loss: 0.087
epoch: 8, batch: 2200 // loss: 0.096
epoch: 8, batch: 2300 // loss: 0.089
epoch: 8, batch: 2400 // loss: 0.081
epoch: 8, batch: 2500 // loss: 0.076
epoch: 8, batch: 2600 // loss: 0.091
epoch: 8, batch: 2700 // loss: 0.076
epoch: 8, batch: 2800 // loss: 0.095
epoch: 8, batch: 2900 // loss: 0.076
epoch: 8, batch: 3000 // loss: 0.081
epoch: 8, batch: 3100 // loss: 0.086
epoch: 8, batch: 3200 // loss: 0.079
epoch: 8, batch: 3300 // loss: 0.079
epoch: 8, batch: 3400 // loss: 0.075
epoch: 8, batch: 3500 // loss: 0.081
epoch: 8, batch: 3600 // loss: 0.089
epoch: 8, batch: 3700 // loss: 0.091

epoch: 9, batch: 0 // loss: 0.097
epoch: 9, batch: 100 // loss: 0.082
epoch: 9, batch: 200 // loss: 0.094
epoch: 9, batch: 300 // loss: 0.085
epoch: 9, batch: 400 // loss: 0.086
epoch: 9, batch: 500 // loss: 0.077
epoch: 9, batch: 600 // loss: 0.079
epoch: 9, batch: 700 // loss: 0.083
epoch: 9, batch: 800 // loss: 0.084
epoch: 9, batch: 900 // loss: 0.094
epoch: 9, batch: 1000 // loss: 0.076
epoch: 9, batch: 1100 // loss: 0.087
epoch: 9, batch: 1200 // loss: 0.082
epoch: 9, batch: 1300 // loss: 0.090
epoch: 9, batch: 1400 // loss: 0.082
epoch: 9, batch: 1500 // loss: 0.081
epoch: 9, batch: 1600 // loss: 0.089
epoch: 9, batch: 1700 // loss: 0.090
epoch: 9, batch: 1800 // loss: 0.089
epoch: 9, batch: 1900 // loss: 0.084
epoch: 9, batch: 2000 // loss: 0.078
epoch: 9, batch: 2100 // loss: 0.087
epoch: 9, batch: 2200 // loss: 0.095
epoch: 9, batch: 2300 // loss: 0.088
epoch: 9, batch: 2400 // loss: 0.081
epoch: 9, batch: 2500 // loss: 0.075
epoch: 9, batch: 2600 // loss: 0.091
epoch: 9, batch: 2700 // loss: 0.076
epoch: 9, batch: 2800 // loss: 0.094
epoch: 9, batch: 2900 // loss: 0.075
epoch: 9, batch: 3000 // loss: 0.081
epoch: 9, batch: 3100 // loss: 0.086
epoch: 9, batch: 3200 // loss: 0.079
epoch: 9, batch: 3300 // loss: 0.078
epoch: 9, batch: 3400 // loss: 0.075
epoch: 9, batch: 3500 // loss: 0.080
epoch: 9, batch: 3600 // loss: 0.089
epoch: 9, batch: 3700 // loss: 0.091

epoch: 10, batch: 0 // loss: 0.097
epoch: 10, batch: 100 // loss: 0.082
epoch: 10, batch: 200 // loss: 0.094
epoch: 10, batch: 300 // loss: 0.085
epoch: 10, batch: 400 // loss: 0.086
epoch: 10, batch: 500 // loss: 0.077
epoch: 10, batch: 600 // loss: 0.079
epoch: 10, batch: 700 // loss: 0.082
epoch: 10, batch: 800 // loss: 0.084
epoch: 10, batch: 900 // loss: 0.094
epoch: 10, batch: 1000 // loss: 0.076
epoch: 10, batch: 1100 // loss: 0.087
epoch: 10, batch: 1200 // loss: 0.081
epoch: 10, batch: 1300 // loss: 0.089
epoch: 10, batch: 1400 // loss: 0.081
epoch: 10, batch: 1500 // loss: 0.081
epoch: 10, batch: 1600 // loss: 0.088
epoch: 10, batch: 1700 // loss: 0.089
epoch: 10, batch: 1800 // loss: 0.089
epoch: 10, batch: 1900 // loss: 0.084
epoch: 10, batch: 2000 // loss: 0.077
epoch: 10, batch: 2100 // loss: 0.087
epoch: 10, batch: 2200 // loss: 0.095
epoch: 10, batch: 2300 // loss: 0.088
epoch: 10, batch: 2400 // loss: 0.081
epoch: 10, batch: 2500 // loss: 0.075
epoch: 10, batch: 2600 // loss: 0.090
epoch: 10, batch: 2700 // loss: 0.076
epoch: 10, batch: 2800 // loss: 0.094
epoch: 10, batch: 2900 // loss: 0.075
epoch: 10, batch: 3000 // loss: 0.081
epoch: 10, batch: 3100 // loss: 0.085
epoch: 10, batch: 3200 // loss: 0.079
epoch: 10, batch: 3300 // loss: 0.078
epoch: 10, batch: 3400 // loss: 0.074
epoch: 10, batch: 3500 // loss: 0.080
epoch: 10, batch: 3600 // loss: 0.088
epoch: 10, batch: 3700 // loss: 0.090

epoch: 11, batch: 0 // loss: 0.096
epoch: 11, batch: 100 // loss: 0.081
epoch: 11, batch: 200 // loss: 0.093
epoch: 11, batch: 300 // loss: 0.085
epoch: 11, batch: 400 // loss: 0.085
epoch: 11, batch: 500 // loss: 0.076
epoch: 11, batch: 600 // loss: 0.078
epoch: 11, batch: 700 // loss: 0.082
epoch: 11, batch: 800 // loss: 0.083
epoch: 11, batch: 900 // loss: 0.093
epoch: 11, batch: 1000 // loss: 0.076
epoch: 11, batch: 1100 // loss: 0.086
epoch: 11, batch: 1200 // loss: 0.081
epoch: 11, batch: 1300 // loss: 0.088
epoch: 11, batch: 1400 // loss: 0.081
epoch: 11, batch: 1500 // loss: 0.081
epoch: 11, batch: 1600 // loss: 0.088
epoch: 11, batch: 1700 // loss: 0.088
epoch: 11, batch: 1800 // loss: 0.088
epoch: 11, batch: 1900 // loss: 0.084
epoch: 11, batch: 2000 // loss: 0.077
epoch: 11, batch: 2100 // loss: 0.086
epoch: 11, batch: 2200 // loss: 0.094
epoch: 11, batch: 2300 // loss: 0.087
epoch: 11, batch: 2400 // loss: 0.080
epoch: 11, batch: 2500 // loss: 0.075
epoch: 11, batch: 2600 // loss: 0.089
epoch: 11, batch: 2700 // loss: 0.075
epoch: 11, batch: 2800 // loss: 0.093
epoch: 11, batch: 2900 // loss: 0.075
epoch: 11, batch: 3000 // loss: 0.080
epoch: 11, batch: 3100 // loss: 0.085
epoch: 11, batch: 3200 // loss: 0.078
epoch: 11, batch: 3300 // loss: 0.077
epoch: 11, batch: 3400 // loss: 0.074
epoch: 11, batch: 3500 // loss: 0.079
epoch: 11, batch: 3600 // loss: 0.087
epoch: 11, batch: 3700 // loss: 0.089

epoch: 12, batch: 0 // loss: 0.096
epoch: 12, batch: 100 // loss: 0.081
epoch: 12, batch: 200 // loss: 0.092
epoch: 12, batch: 300 // loss: 0.084
epoch: 12, batch: 400 // loss: 0.085
epoch: 12, batch: 500 // loss: 0.076
epoch: 12, batch: 600 // loss: 0.078
epoch: 12, batch: 700 // loss: 0.081
epoch: 12, batch: 800 // loss: 0.083
epoch: 12, batch: 900 // loss: 0.092
epoch: 12, batch: 1000 // loss: 0.075
epoch: 12, batch: 1100 // loss: 0.085
epoch: 12, batch: 1200 // loss: 0.081
epoch: 12, batch: 1300 // loss: 0.088
epoch: 12, batch: 1400 // loss: 0.080
epoch: 12, batch: 1500 // loss: 0.080
epoch: 12, batch: 1600 // loss: 0.087
epoch: 12, batch: 1700 // loss: 0.087
epoch: 12, batch: 1800 // loss: 0.088
epoch: 12, batch: 1900 // loss: 0.083
epoch: 12, batch: 2000 // loss: 0.077
epoch: 12, batch: 2100 // loss: 0.086
epoch: 12, batch: 2200 // loss: 0.093
epoch: 12, batch: 2300 // loss: 0.086
epoch: 12, batch: 2400 // loss: 0.080
epoch: 12, batch: 2500 // loss: 0.074
epoch: 12, batch: 2600 // loss: 0.088
epoch: 12, batch: 2700 // loss: 0.075
epoch: 12, batch: 2800 // loss: 0.092
epoch: 12, batch: 2900 // loss: 0.074
epoch: 12, batch: 3000 // loss: 0.079
epoch: 12, batch: 3100 // loss: 0.084
epoch: 12, batch: 3200 // loss: 0.078
epoch: 12, batch: 3300 // loss: 0.077
epoch: 12, batch: 3400 // loss: 0.073
epoch: 12, batch: 3500 // loss: 0.078
epoch: 12, batch: 3600 // loss: 0.086
epoch: 12, batch: 3700 // loss: 0.089

epoch: 13, batch: 0 // loss: 0.095
epoch: 13, batch: 100 // loss: 0.080
epoch: 13, batch: 200 // loss: 0.091
epoch: 13, batch: 300 // loss: 0.084
epoch: 13, batch: 400 // loss: 0.084
epoch: 13, batch: 500 // loss: 0.075
epoch: 13, batch: 600 // loss: 0.077
epoch: 13, batch: 700 // loss: 0.080
epoch: 13, batch: 800 // loss: 0.082
epoch: 13, batch: 900 // loss: 0.091
epoch: 13, batch: 1000 // loss: 0.075
epoch: 13, batch: 1100 // loss: 0.084
epoch: 13, batch: 1200 // loss: 0.080
epoch: 13, batch: 1300 // loss: 0.087
epoch: 13, batch: 1400 // loss: 0.079
epoch: 13, batch: 1500 // loss: 0.080
epoch: 13, batch: 1600 // loss: 0.087
epoch: 13, batch: 1700 // loss: 0.086
epoch: 13, batch: 1800 // loss: 0.087
epoch: 13, batch: 1900 // loss: 0.082
epoch: 13, batch: 2000 // loss: 0.076
epoch: 13, batch: 2100 // loss: 0.085
epoch: 13, batch: 2200 // loss: 0.092
epoch: 13, batch: 2300 // loss: 0.085
epoch: 13, batch: 2400 // loss: 0.079
epoch: 13, batch: 2500 // loss: 0.073
epoch: 13, batch: 2600 // loss: 0.087
epoch: 13, batch: 2700 // loss: 0.074
epoch: 13, batch: 2800 // loss: 0.091
epoch: 13, batch: 2900 // loss: 0.073
epoch: 13, batch: 3000 // loss: 0.078
epoch: 13, batch: 3100 // loss: 0.083
epoch: 13, batch: 3200 // loss: 0.077
epoch: 13, batch: 3300 // loss: 0.076
epoch: 13, batch: 3400 // loss: 0.073
epoch: 13, batch: 3500 // loss: 0.077
epoch: 13, batch: 3600 // loss: 0.085
epoch: 13, batch: 3700 // loss: 0.088

epoch: 14, batch: 0 // loss: 0.094
epoch: 14, batch: 100 // loss: 0.079
epoch: 14, batch: 200 // loss: 0.090
epoch: 14, batch: 300 // loss: 0.083
epoch: 14, batch: 400 // loss: 0.083
epoch: 14, batch: 500 // loss: 0.074
epoch: 14, batch: 600 // loss: 0.076
epoch: 14, batch: 700 // loss: 0.079
epoch: 14, batch: 800 // loss: 0.081
epoch: 14, batch: 900 // loss: 0.090
epoch: 14, batch: 1000 // loss: 0.074
epoch: 14, batch: 1100 // loss: 0.083
epoch: 14, batch: 1200 // loss: 0.079
epoch: 14, batch: 1300 // loss: 0.085
epoch: 14, batch: 1400 // loss: 0.079
epoch: 14, batch: 1500 // loss: 0.079
epoch: 14, batch: 1600 // loss: 0.086
epoch: 14, batch: 1700 // loss: 0.085
epoch: 14, batch: 1800 // loss: 0.086
epoch: 14, batch: 1900 // loss: 0.081
epoch: 14, batch: 2000 // loss: 0.076
epoch: 14, batch: 2100 // loss: 0.084
epoch: 14, batch: 2200 // loss: 0.091
epoch: 14, batch: 2300 // loss: 0.084
epoch: 14, batch: 2400 // loss: 0.078
epoch: 14, batch: 2500 // loss: 0.073
epoch: 14, batch: 2600 // loss: 0.086
epoch: 14, batch: 2700 // loss: 0.073
epoch: 14, batch: 2800 // loss: 0.090
epoch: 14, batch: 2900 // loss: 0.073
epoch: 14, batch: 3000 // loss: 0.077
epoch: 14, batch: 3100 // loss: 0.082
epoch: 14, batch: 3200 // loss: 0.076
epoch: 14, batch: 3300 // loss: 0.075
epoch: 14, batch: 3400 // loss: 0.072
epoch: 14, batch: 3500 // loss: 0.075
epoch: 14, batch: 3600 // loss: 0.084
epoch: 14, batch: 3700 // loss: 0.086

epoch: 15, batch: 0 // loss: 0.092
epoch: 15, batch: 100 // loss: 0.078
epoch: 15, batch: 200 // loss: 0.088
epoch: 15, batch: 300 // loss: 0.082
epoch: 15, batch: 400 // loss: 0.082
epoch: 15, batch: 500 // loss: 0.073
epoch: 15, batch: 600 // loss: 0.074
epoch: 15, batch: 700 // loss: 0.078
epoch: 15, batch: 800 // loss: 0.080
epoch: 15, batch: 900 // loss: 0.089
epoch: 15, batch: 1000 // loss: 0.073
epoch: 15, batch: 1100 // loss: 0.081
epoch: 15, batch: 1200 // loss: 0.079
epoch: 15, batch: 1300 // loss: 0.084
epoch: 15, batch: 1400 // loss: 0.077
epoch: 15, batch: 1500 // loss: 0.079
epoch: 15, batch: 1600 // loss: 0.085
epoch: 15, batch: 1700 // loss: 0.083
epoch: 15, batch: 1800 // loss: 0.085
epoch: 15, batch: 1900 // loss: 0.080
epoch: 15, batch: 2000 // loss: 0.075
epoch: 15, batch: 2100 // loss: 0.083
epoch: 15, batch: 2200 // loss: 0.089
epoch: 15, batch: 2300 // loss: 0.083
epoch: 15, batch: 2400 // loss: 0.077
epoch: 15, batch: 2500 // loss: 0.072
epoch: 15, batch: 2600 // loss: 0.084
epoch: 15, batch: 2700 // loss: 0.072
epoch: 15, batch: 2800 // loss: 0.089
epoch: 15, batch: 2900 // loss: 0.072
epoch: 15, batch: 3000 // loss: 0.076
epoch: 15, batch: 3100 // loss: 0.081
epoch: 15, batch: 3200 // loss: 0.075
epoch: 15, batch: 3300 // loss: 0.073
epoch: 15, batch: 3400 // loss: 0.071
epoch: 15, batch: 3500 // loss: 0.074
epoch: 15, batch: 3600 // loss: 0.082
epoch: 15, batch: 3700 // loss: 0.085

epoch: 16, batch: 0 // loss: 0.091
epoch: 16, batch: 100 // loss: 0.077
epoch: 16, batch: 200 // loss: 0.086
epoch: 16, batch: 300 // loss: 0.081
epoch: 16, batch: 400 // loss: 0.081
epoch: 16, batch: 500 // loss: 0.072
epoch: 16, batch: 600 // loss: 0.073
epoch: 16, batch: 700 // loss: 0.077
epoch: 16, batch: 800 // loss: 0.078
epoch: 16, batch: 900 // loss: 0.087
epoch: 16, batch: 1000 // loss: 0.072
epoch: 16, batch: 1100 // loss: 0.080
epoch: 16, batch: 1200 // loss: 0.078
epoch: 16, batch: 1300 // loss: 0.083
epoch: 16, batch: 1400 // loss: 0.076
epoch: 16, batch: 1500 // loss: 0.078
epoch: 16, batch: 1600 // loss: 0.083
epoch: 16, batch: 1700 // loss: 0.081
epoch: 16, batch: 1800 // loss: 0.084
epoch: 16, batch: 1900 // loss: 0.079
epoch: 16, batch: 2000 // loss: 0.074
epoch: 16, batch: 2100 // loss: 0.082
epoch: 16, batch: 2200 // loss: 0.087
epoch: 16, batch: 2300 // loss: 0.082
epoch: 16, batch: 2400 // loss: 0.076
epoch: 16, batch: 2500 // loss: 0.071
epoch: 16, batch: 2600 // loss: 0.082
epoch: 16, batch: 2700 // loss: 0.071
epoch: 16, batch: 2800 // loss: 0.087
epoch: 16, batch: 2900 // loss: 0.071
epoch: 16, batch: 3000 // loss: 0.074
epoch: 16, batch: 3100 // loss: 0.079
epoch: 16, batch: 3200 // loss: 0.074
epoch: 16, batch: 3300 // loss: 0.072
epoch: 16, batch: 3400 // loss: 0.070
epoch: 16, batch: 3500 // loss: 0.072
epoch: 16, batch: 3600 // loss: 0.080
epoch: 16, batch: 3700 // loss: 0.083

epoch: 17, batch: 0 // loss: 0.089
epoch: 17, batch: 100 // loss: 0.076
epoch: 17, batch: 200 // loss: 0.084
epoch: 17, batch: 300 // loss: 0.079
epoch: 17, batch: 400 // loss: 0.079
epoch: 17, batch: 500 // loss: 0.070
epoch: 17, batch: 600 // loss: 0.071
epoch: 17, batch: 700 // loss: 0.075
epoch: 17, batch: 800 // loss: 0.077
epoch: 17, batch: 900 // loss: 0.086
epoch: 17, batch: 1000 // loss: 0.071
epoch: 17, batch: 1100 // loss: 0.078
epoch: 17, batch: 1200 // loss: 0.076
epoch: 17, batch: 1300 // loss: 0.081
epoch: 17, batch: 1400 // loss: 0.075
epoch: 17, batch: 1500 // loss: 0.077
epoch: 17, batch: 1600 // loss: 0.082
epoch: 17, batch: 1700 // loss: 0.079
epoch: 17, batch: 1800 // loss: 0.082
epoch: 17, batch: 1900 // loss: 0.078
epoch: 17, batch: 2000 // loss: 0.073
epoch: 17, batch: 2100 // loss: 0.080
epoch: 17, batch: 2200 // loss: 0.085
epoch: 17, batch: 2300 // loss: 0.080
epoch: 17, batch: 2400 // loss: 0.074
epoch: 17, batch: 2500 // loss: 0.069
epoch: 17, batch: 2600 // loss: 0.080
epoch: 17, batch: 2700 // loss: 0.069
epoch: 17, batch: 2800 // loss: 0.085
epoch: 17, batch: 2900 // loss: 0.069
epoch: 17, batch: 3000 // loss: 0.072
epoch: 17, batch: 3100 // loss: 0.077
epoch: 17, batch: 3200 // loss: 0.072
epoch: 17, batch: 3300 // loss: 0.070
epoch: 17, batch: 3400 // loss: 0.069
epoch: 17, batch: 3500 // loss: 0.070
epoch: 17, batch: 3600 // loss: 0.078
epoch: 17, batch: 3700 // loss: 0.082

epoch: 18, batch: 0 // loss: 0.087
epoch: 18, batch: 100 // loss: 0.075
epoch: 18, batch: 200 // loss: 0.081
epoch: 18, batch: 300 // loss: 0.078
epoch: 18, batch: 400 // loss: 0.078
epoch: 18, batch: 500 // loss: 0.069
epoch: 18, batch: 600 // loss: 0.069
epoch: 18, batch: 700 // loss: 0.073
epoch: 18, batch: 800 // loss: 0.075
epoch: 18, batch: 900 // loss: 0.084
epoch: 18, batch: 1000 // loss: 0.070
epoch: 18, batch: 1100 // loss: 0.075
epoch: 18, batch: 1200 // loss: 0.075
epoch: 18, batch: 1300 // loss: 0.079
epoch: 18, batch: 1400 // loss: 0.073
epoch: 18, batch: 1500 // loss: 0.076
epoch: 18, batch: 1600 // loss: 0.080
epoch: 18, batch: 1700 // loss: 0.076
epoch: 18, batch: 1800 // loss: 0.081
epoch: 18, batch: 1900 // loss: 0.076
epoch: 18, batch: 2000 // loss: 0.072
epoch: 18, batch: 2100 // loss: 0.079
epoch: 18, batch: 2200 // loss: 0.083
epoch: 18, batch: 2300 // loss: 0.078
epoch: 18, batch: 2400 // loss: 0.073
epoch: 18, batch: 2500 // loss: 0.068
epoch: 18, batch: 2600 // loss: 0.078
epoch: 18, batch: 2700 // loss: 0.068
epoch: 18, batch: 2800 // loss: 0.083
epoch: 18, batch: 2900 // loss: 0.068
epoch: 18, batch: 3000 // loss: 0.070
epoch: 18, batch: 3100 // loss: 0.076
epoch: 18, batch: 3200 // loss: 0.071
epoch: 18, batch: 3300 // loss: 0.069
epoch: 18, batch: 3400 // loss: 0.068
epoch: 18, batch: 3500 // loss: 0.068
epoch: 18, batch: 3600 // loss: 0.076
epoch: 18, batch: 3700 // loss: 0.080

epoch: 19, batch: 0 // loss: 0.085
epoch: 19, batch: 100 // loss: 0.073
epoch: 19, batch: 200 // loss: 0.078
epoch: 19, batch: 300 // loss: 0.076
epoch: 19, batch: 400 // loss: 0.076
epoch: 19, batch: 500 // loss: 0.067
epoch: 19, batch: 600 // loss: 0.067
epoch: 19, batch: 700 // loss: 0.071
epoch: 19, batch: 800 // loss: 0.073
epoch: 19, batch: 900 // loss: 0.082
epoch: 19, batch: 1000 // loss: 0.068
epoch: 19, batch: 1100 // loss: 0.073
epoch: 19, batch: 1200 // loss: 0.074
epoch: 19, batch: 1300 // loss: 0.077
epoch: 19, batch: 1400 // loss: 0.071
epoch: 19, batch: 1500 // loss: 0.074
epoch: 19, batch: 1600 // loss: 0.078
epoch: 19, batch: 1700 // loss: 0.074
epoch: 19, batch: 1800 // loss: 0.079
epoch: 19, batch: 1900 // loss: 0.075
epoch: 19, batch: 2000 // loss: 0.071
epoch: 19, batch: 2100 // loss: 0.077
epoch: 19, batch: 2200 // loss: 0.081
epoch: 19, batch: 2300 // loss: 0.076
epoch: 19, batch: 2400 // loss: 0.071
epoch: 19, batch: 2500 // loss: 0.067
epoch: 19, batch: 2600 // loss: 0.075
epoch: 19, batch: 2700 // loss: 0.067
epoch: 19, batch: 2800 // loss: 0.081
epoch: 19, batch: 2900 // loss: 0.067
epoch: 19, batch: 3000 // loss: 0.068
epoch: 19, batch: 3100 // loss: 0.074
epoch: 19, batch: 3200 // loss: 0.069
epoch: 19, batch: 3300 // loss: 0.067
epoch: 19, batch: 3400 // loss: 0.067
epoch: 19, batch: 3500 // loss: 0.065
epoch: 19, batch: 3600 // loss: 0.073
epoch: 19, batch: 3700 // loss: 0.077

epoch: 20, batch: 0 // loss: 0.083
epoch: 20, batch: 100 // loss: 0.072
epoch: 20, batch: 200 // loss: 0.076
epoch: 20, batch: 300 // loss: 0.075
epoch: 20, batch: 400 // loss: 0.074
epoch: 20, batch: 500 // loss: 0.065
epoch: 20, batch: 600 // loss: 0.065
epoch: 20, batch: 700 // loss: 0.069
epoch: 20, batch: 800 // loss: 0.071
epoch: 20, batch: 900 // loss: 0.080
epoch: 20, batch: 1000 // loss: 0.067
epoch: 20, batch: 1100 // loss: 0.071
epoch: 20, batch: 1200 // loss: 0.072
epoch: 20, batch: 1300 // loss: 0.075
epoch: 20, batch: 1400 // loss: 0.069
epoch: 20, batch: 1500 // loss: 0.073
epoch: 20, batch: 1600 // loss: 0.077
epoch: 20, batch: 1700 // loss: 0.071
epoch: 20, batch: 1800 // loss: 0.077
epoch: 20, batch: 1900 // loss: 0.073
epoch: 20, batch: 2000 // loss: 0.070
epoch: 20, batch: 2100 // loss: 0.075
epoch: 20, batch: 2200 // loss: 0.079
epoch: 20, batch: 2300 // loss: 0.074
epoch: 20, batch: 2400 // loss: 0.069
epoch: 20, batch: 2500 // loss: 0.065
epoch: 20, batch: 2600 // loss: 0.073
epoch: 20, batch: 2700 // loss: 0.065
epoch: 20, batch: 2800 // loss: 0.079
epoch: 20, batch: 2900 // loss: 0.065
epoch: 20, batch: 3000 // loss: 0.066
epoch: 20, batch: 3100 // loss: 0.072
epoch: 20, batch: 3200 // loss: 0.067
epoch: 20, batch: 3300 // loss: 0.065
epoch: 20, batch: 3400 // loss: 0.066
epoch: 20, batch: 3500 // loss: 0.063
epoch: 20, batch: 3600 // loss: 0.071
epoch: 20, batch: 3700 // loss: 0.075

epoch: 21, batch: 0 // loss: 0.081
epoch: 21, batch: 100 // loss: 0.071
epoch: 21, batch: 200 // loss: 0.073
epoch: 21, batch: 300 // loss: 0.073
epoch: 21, batch: 400 // loss: 0.072
epoch: 21, batch: 500 // loss: 0.063
epoch: 21, batch: 600 // loss: 0.063
epoch: 21, batch: 700 // loss: 0.067
epoch: 21, batch: 800 // loss: 0.069
epoch: 21, batch: 900 // loss: 0.078
epoch: 21, batch: 1000 // loss: 0.065
epoch: 21, batch: 1100 // loss: 0.068
epoch: 21, batch: 1200 // loss: 0.071
epoch: 21, batch: 1300 // loss: 0.074
epoch: 21, batch: 1400 // loss: 0.068
epoch: 21, batch: 1500 // loss: 0.072
epoch: 21, batch: 1600 // loss: 0.075
epoch: 21, batch: 1700 // loss: 0.069
epoch: 21, batch: 1800 // loss: 0.076
epoch: 21, batch: 1900 // loss: 0.071
epoch: 21, batch: 2000 // loss: 0.069
epoch: 21, batch: 2100 // loss: 0.074
epoch: 21, batch: 2200 // loss: 0.077
epoch: 21, batch: 2300 // loss: 0.072
epoch: 21, batch: 2400 // loss: 0.067
epoch: 21, batch: 2500 // loss: 0.064
epoch: 21, batch: 2600 // loss: 0.070
epoch: 21, batch: 2700 // loss: 0.064
epoch: 21, batch: 2800 // loss: 0.077
epoch: 21, batch: 2900 // loss: 0.064
epoch: 21, batch: 3000 // loss: 0.065
epoch: 21, batch: 3100 // loss: 0.070
epoch: 21, batch: 3200 // loss: 0.065
epoch: 21, batch: 3300 // loss: 0.063
epoch: 21, batch: 3400 // loss: 0.064
epoch: 21, batch: 3500 // loss: 0.061
epoch: 21, batch: 3600 // loss: 0.068
epoch: 21, batch: 3700 // loss: 0.073

epoch: 22, batch: 0 // loss: 0.079
epoch: 22, batch: 100 // loss: 0.070
epoch: 22, batch: 200 // loss: 0.070
epoch: 22, batch: 300 // loss: 0.072
epoch: 22, batch: 400 // loss: 0.071
epoch: 22, batch: 500 // loss: 0.061
epoch: 22, batch: 600 // loss: 0.061
epoch: 22, batch: 700 // loss: 0.065
epoch: 22, batch: 800 // loss: 0.068
epoch: 22, batch: 900 // loss: 0.076
epoch: 22, batch: 1000 // loss: 0.064
epoch: 22, batch: 1100 // loss: 0.066
epoch: 22, batch: 1200 // loss: 0.069
epoch: 22, batch: 1300 // loss: 0.072
epoch: 22, batch: 1400 // loss: 0.066
epoch: 22, batch: 1500 // loss: 0.071
epoch: 22, batch: 1600 // loss: 0.073
epoch: 22, batch: 1700 // loss: 0.067
epoch: 22, batch: 1800 // loss: 0.074
epoch: 22, batch: 1900 // loss: 0.070
epoch: 22, batch: 2000 // loss: 0.069
epoch: 22, batch: 2100 // loss: 0.072
epoch: 22, batch: 2200 // loss: 0.075
epoch: 22, batch: 2300 // loss: 0.071
epoch: 22, batch: 2400 // loss: 0.066
epoch: 22, batch: 2500 // loss: 0.063
epoch: 22, batch: 2600 // loss: 0.068
epoch: 22, batch: 2700 // loss: 0.062
epoch: 22, batch: 2800 // loss: 0.076
epoch: 22, batch: 2900 // loss: 0.062
epoch: 22, batch: 3000 // loss: 0.063
epoch: 22, batch: 3100 // loss: 0.068
epoch: 22, batch: 3200 // loss: 0.064
epoch: 22, batch: 3300 // loss: 0.061
epoch: 22, batch: 3400 // loss: 0.063
epoch: 22, batch: 3500 // loss: 0.059
epoch: 22, batch: 3600 // loss: 0.066
epoch: 22, batch: 3700 // loss: 0.071

epoch: 23, batch: 0 // loss: 0.077
epoch: 23, batch: 100 // loss: 0.069
epoch: 23, batch: 200 // loss: 0.068
epoch: 23, batch: 300 // loss: 0.070
epoch: 23, batch: 400 // loss: 0.069
epoch: 23, batch: 500 // loss: 0.059
epoch: 23, batch: 600 // loss: 0.059
epoch: 23, batch: 700 // loss: 0.063
epoch: 23, batch: 800 // loss: 0.066
epoch: 23, batch: 900 // loss: 0.074
epoch: 23, batch: 1000 // loss: 0.063
epoch: 23, batch: 1100 // loss: 0.064
epoch: 23, batch: 1200 // loss: 0.068
epoch: 23, batch: 1300 // loss: 0.070
epoch: 23, batch: 1400 // loss: 0.064
epoch: 23, batch: 1500 // loss: 0.069
epoch: 23, batch: 1600 // loss: 0.072
epoch: 23, batch: 1700 // loss: 0.065
epoch: 23, batch: 1800 // loss: 0.073
epoch: 23, batch: 1900 // loss: 0.069
epoch: 23, batch: 2000 // loss: 0.068
epoch: 23, batch: 2100 // loss: 0.071
epoch: 23, batch: 2200 // loss: 0.073
epoch: 23, batch: 2300 // loss: 0.069
epoch: 23, batch: 2400 // loss: 0.064
epoch: 23, batch: 2500 // loss: 0.061
epoch: 23, batch: 2600 // loss: 0.066
epoch: 23, batch: 2700 // loss: 0.061
epoch: 23, batch: 2800 // loss: 0.074
epoch: 23, batch: 2900 // loss: 0.061
epoch: 23, batch: 3000 // loss: 0.061
epoch: 23, batch: 3100 // loss: 0.067
epoch: 23, batch: 3200 // loss: 0.062
epoch: 23, batch: 3300 // loss: 0.060
epoch: 23, batch: 3400 // loss: 0.062
epoch: 23, batch: 3500 // loss: 0.057
epoch: 23, batch: 3600 // loss: 0.064
epoch: 23, batch: 3700 // loss: 0.070

epoch: 24, batch: 0 // loss: 0.076
epoch: 24, batch: 100 // loss: 0.068
epoch: 24, batch: 200 // loss: 0.065
epoch: 24, batch: 300 // loss: 0.069
epoch: 24, batch: 400 // loss: 0.068
epoch: 24, batch: 500 // loss: 0.058
epoch: 24, batch: 600 // loss: 0.058
epoch: 24, batch: 700 // loss: 0.062
epoch: 24, batch: 800 // loss: 0.065
epoch: 24, batch: 900 // loss: 0.072
epoch: 24, batch: 1000 // loss: 0.061
epoch: 24, batch: 1100 // loss: 0.062
epoch: 24, batch: 1200 // loss: 0.067
epoch: 24, batch: 1300 // loss: 0.069
epoch: 24, batch: 1400 // loss: 0.063
epoch: 24, batch: 1500 // loss: 0.068
epoch: 24, batch: 1600 // loss: 0.070
epoch: 24, batch: 1700 // loss: 0.063
epoch: 24, batch: 1800 // loss: 0.071
epoch: 24, batch: 1900 // loss: 0.067
epoch: 24, batch: 2000 // loss: 0.067
epoch: 24, batch: 2100 // loss: 0.070
epoch: 24, batch: 2200 // loss: 0.072
epoch: 24, batch: 2300 // loss: 0.068
epoch: 24, batch: 2400 // loss: 0.062
epoch: 24, batch: 2500 // loss: 0.060
epoch: 24, batch: 2600 // loss: 0.065
epoch: 24, batch: 2700 // loss: 0.060
epoch: 24, batch: 2800 // loss: 0.072
epoch: 24, batch: 2900 // loss: 0.060
epoch: 24, batch: 3000 // loss: 0.060
epoch: 24, batch: 3100 // loss: 0.065
epoch: 24, batch: 3200 // loss: 0.061
epoch: 24, batch: 3300 // loss: 0.059
epoch: 24, batch: 3400 // loss: 0.061
epoch: 24, batch: 3500 // loss: 0.056
epoch: 24, batch: 3600 // loss: 0.062
epoch: 24, batch: 3700 // loss: 0.068

epoch: 25, batch: 0 // loss: 0.075
epoch: 25, batch: 100 // loss: 0.067
epoch: 25, batch: 200 // loss: 0.063
epoch: 25, batch: 300 // loss: 0.068
epoch: 25, batch: 400 // loss: 0.067
epoch: 25, batch: 500 // loss: 0.057
epoch: 25, batch: 600 // loss: 0.057
epoch: 25, batch: 700 // loss: 0.060
epoch: 25, batch: 800 // loss: 0.063
epoch: 25, batch: 900 // loss: 0.071
epoch: 25, batch: 1000 // loss: 0.060
epoch: 25, batch: 1100 // loss: 0.061
epoch: 25, batch: 1200 // loss: 0.066
epoch: 25, batch: 1300 // loss: 0.068
epoch: 25, batch: 1400 // loss: 0.061
epoch: 25, batch: 1500 // loss: 0.068
epoch: 25, batch: 1600 // loss: 0.069
epoch: 25, batch: 1700 // loss: 0.061
epoch: 25, batch: 1800 // loss: 0.070
epoch: 25, batch: 1900 // loss: 0.066
epoch: 25, batch: 2000 // loss: 0.066
epoch: 25, batch: 2100 // loss: 0.069
epoch: 25, batch: 2200 // loss: 0.071
epoch: 25, batch: 2300 // loss: 0.067
epoch: 25, batch: 2400 // loss: 0.061
epoch: 25, batch: 2500 // loss: 0.060
epoch: 25, batch: 2600 // loss: 0.063
epoch: 25, batch: 2700 // loss: 0.059
epoch: 25, batch: 2800 // loss: 0.071
epoch: 25, batch: 2900 // loss: 0.059
epoch: 25, batch: 3000 // loss: 0.059
epoch: 25, batch: 3100 // loss: 0.064
epoch: 25, batch: 3200 // loss: 0.059
epoch: 25, batch: 3300 // loss: 0.057
epoch: 25, batch: 3400 // loss: 0.061
epoch: 25, batch: 3500 // loss: 0.055
epoch: 25, batch: 3600 // loss: 0.060
epoch: 25, batch: 3700 // loss: 0.067

epoch: 26, batch: 0 // loss: 0.073
epoch: 26, batch: 100 // loss: 0.067
epoch: 26, batch: 200 // loss: 0.062
epoch: 26, batch: 300 // loss: 0.067
epoch: 26, batch: 400 // loss: 0.066
epoch: 26, batch: 500 // loss: 0.055
epoch: 26, batch: 600 // loss: 0.055
epoch: 26, batch: 700 // loss: 0.059
epoch: 26, batch: 800 // loss: 0.062
epoch: 26, batch: 900 // loss: 0.070
epoch: 26, batch: 1000 // loss: 0.060
epoch: 26, batch: 1100 // loss: 0.060
epoch: 26, batch: 1200 // loss: 0.065
epoch: 26, batch: 1300 // loss: 0.067
epoch: 26, batch: 1400 // loss: 0.060
epoch: 26, batch: 1500 // loss: 0.067
epoch: 26, batch: 1600 // loss: 0.068
epoch: 26, batch: 1700 // loss: 0.060
epoch: 26, batch: 1800 // loss: 0.069
epoch: 26, batch: 1900 // loss: 0.065
epoch: 26, batch: 2000 // loss: 0.066
epoch: 26, batch: 2100 // loss: 0.068
epoch: 26, batch: 2200 // loss: 0.070
epoch: 26, batch: 2300 // loss: 0.066
epoch: 26, batch: 2400 // loss: 0.060
epoch: 26, batch: 2500 // loss: 0.059
epoch: 26, batch: 2600 // loss: 0.062
epoch: 26, batch: 2700 // loss: 0.058
epoch: 26, batch: 2800 // loss: 0.070
epoch: 26, batch: 2900 // loss: 0.058
epoch: 26, batch: 3000 // loss: 0.058
epoch: 26, batch: 3100 // loss: 0.063
epoch: 26, batch: 3200 // loss: 0.058
epoch: 26, batch: 3300 // loss: 0.056
epoch: 26, batch: 3400 // loss: 0.060
epoch: 26, batch: 3500 // loss: 0.053
epoch: 26, batch: 3600 // loss: 0.059
epoch: 26, batch: 3700 // loss: 0.066

epoch: 27, batch: 0 // loss: 0.072
epoch: 27, batch: 100 // loss: 0.066
epoch: 27, batch: 200 // loss: 0.060
epoch: 27, batch: 300 // loss: 0.066
epoch: 27, batch: 400 // loss: 0.065
epoch: 27, batch: 500 // loss: 0.054
epoch: 27, batch: 600 // loss: 0.054
epoch: 27, batch: 700 // loss: 0.058
epoch: 27, batch: 800 // loss: 0.062
epoch: 27, batch: 900 // loss: 0.069
epoch: 27, batch: 1000 // loss: 0.059
epoch: 27, batch: 1100 // loss: 0.058
epoch: 27, batch: 1200 // loss: 0.064
epoch: 27, batch: 1300 // loss: 0.067
epoch: 27, batch: 1400 // loss: 0.059
epoch: 27, batch: 1500 // loss: 0.066
epoch: 27, batch: 1600 // loss: 0.067
epoch: 27, batch: 1700 // loss: 0.059
epoch: 27, batch: 1800 // loss: 0.069
epoch: 27, batch: 1900 // loss: 0.065
epoch: 27, batch: 2000 // loss: 0.065
epoch: 27, batch: 2100 // loss: 0.067
epoch: 27, batch: 2200 // loss: 0.069
epoch: 27, batch: 2300 // loss: 0.065
epoch: 27, batch: 2400 // loss: 0.059
epoch: 27, batch: 2500 // loss: 0.058
epoch: 27, batch: 2600 // loss: 0.061
epoch: 27, batch: 2700 // loss: 0.058
epoch: 27, batch: 2800 // loss: 0.069
epoch: 27, batch: 2900 // loss: 0.058
epoch: 27, batch: 3000 // loss: 0.057
epoch: 27, batch: 3100 // loss: 0.062
epoch: 27, batch: 3200 // loss: 0.057
epoch: 27, batch: 3300 // loss: 0.056
epoch: 27, batch: 3400 // loss: 0.059
epoch: 27, batch: 3500 // loss: 0.053
epoch: 27, batch: 3600 // loss: 0.058
epoch: 27, batch: 3700 // loss: 0.065

epoch: 28, batch: 0 // loss: 0.071
epoch: 28, batch: 100 // loss: 0.066
epoch: 28, batch: 200 // loss: 0.059
epoch: 28, batch: 300 // loss: 0.066
epoch: 28, batch: 400 // loss: 0.064
epoch: 28, batch: 500 // loss: 0.054
epoch: 28, batch: 600 // loss: 0.054
epoch: 28, batch: 700 // loss: 0.057
epoch: 28, batch: 800 // loss: 0.061
epoch: 28, batch: 900 // loss: 0.068
epoch: 28, batch: 1000 // loss: 0.058
epoch: 28, batch: 1100 // loss: 0.058
epoch: 28, batch: 1200 // loss: 0.063
epoch: 28, batch: 1300 // loss: 0.066
epoch: 28, batch: 1400 // loss: 0.059
epoch: 28, batch: 1500 // loss: 0.065
epoch: 28, batch: 1600 // loss: 0.066
epoch: 28, batch: 1700 // loss: 0.059
epoch: 28, batch: 1800 // loss: 0.068
epoch: 28, batch: 1900 // loss: 0.064
epoch: 28, batch: 2000 // loss: 0.065
epoch: 28, batch: 2100 // loss: 0.066
epoch: 28, batch: 2200 // loss: 0.068
epoch: 28, batch: 2300 // loss: 0.065
epoch: 28, batch: 2400 // loss: 0.058
epoch: 28, batch: 2500 // loss: 0.058
epoch: 28, batch: 2600 // loss: 0.060
epoch: 28, batch: 2700 // loss: 0.057
epoch: 28, batch: 2800 // loss: 0.068
epoch: 28, batch: 2900 // loss: 0.057
epoch: 28, batch: 3000 // loss: 0.056
epoch: 28, batch: 3100 // loss: 0.061
epoch: 28, batch: 3200 // loss: 0.056
epoch: 28, batch: 3300 // loss: 0.055
epoch: 28, batch: 3400 // loss: 0.059
epoch: 28, batch: 3500 // loss: 0.052
epoch: 28, batch: 3600 // loss: 0.057
epoch: 28, batch: 3700 // loss: 0.064

epoch: 29, batch: 0 // loss: 0.071
epoch: 29, batch: 100 // loss: 0.065
epoch: 29, batch: 200 // loss: 0.058
epoch: 29, batch: 300 // loss: 0.065
epoch: 29, batch: 400 // loss: 0.063
epoch: 29, batch: 500 // loss: 0.053
epoch: 29, batch: 600 // loss: 0.053
epoch: 29, batch: 700 // loss: 0.056
epoch: 29, batch: 800 // loss: 0.060
epoch: 29, batch: 900 // loss: 0.068
epoch: 29, batch: 1000 // loss: 0.058
epoch: 29, batch: 1100 // loss: 0.057
epoch: 29, batch: 1200 // loss: 0.063
epoch: 29, batch: 1300 // loss: 0.066
epoch: 29, batch: 1400 // loss: 0.058
epoch: 29, batch: 1500 // loss: 0.065
epoch: 29, batch: 1600 // loss: 0.066
epoch: 29, batch: 1700 // loss: 0.058
epoch: 29, batch: 1800 // loss: 0.067
epoch: 29, batch: 1900 // loss: 0.063
epoch: 29, batch: 2000 // loss: 0.064
epoch: 29, batch: 2100 // loss: 0.066
epoch: 29, batch: 2200 // loss: 0.068
epoch: 29, batch: 2300 // loss: 0.064
epoch: 29, batch: 2400 // loss: 0.057
epoch: 29, batch: 2500 // loss: 0.057
epoch: 29, batch: 2600 // loss: 0.059
epoch: 29, batch: 2700 // loss: 0.057
epoch: 29, batch: 2800 // loss: 0.068
epoch: 29, batch: 2900 // loss: 0.057
epoch: 29, batch: 3000 // loss: 0.056
epoch: 29, batch: 3100 // loss: 0.061
epoch: 29, batch: 3200 // loss: 0.056
epoch: 29, batch: 3300 // loss: 0.054
epoch: 29, batch: 3400 // loss: 0.059
epoch: 29, batch: 3500 // loss: 0.051
epoch: 29, batch: 3600 // loss: 0.056
epoch: 29, batch: 3700 // loss: 0.063

epoch: 30, batch: 0 // loss: 0.070
epoch: 30, batch: 100 // loss: 0.065
epoch: 30, batch: 200 // loss: 0.058
epoch: 30, batch: 300 // loss: 0.065
epoch: 30, batch: 400 // loss: 0.063
epoch: 30, batch: 500 // loss: 0.052
epoch: 30, batch: 600 // loss: 0.052
epoch: 30, batch: 700 // loss: 0.056
epoch: 30, batch: 800 // loss: 0.060
epoch: 30, batch: 900 // loss: 0.067
epoch: 30, batch: 1000 // loss: 0.057
epoch: 30, batch: 1100 // loss: 0.056
epoch: 30, batch: 1200 // loss: 0.062
epoch: 30, batch: 1300 // loss: 0.065
epoch: 30, batch: 1400 // loss: 0.057
epoch: 30, batch: 1500 // loss: 0.064
epoch: 30, batch: 1600 // loss: 0.065
epoch: 30, batch: 1700 // loss: 0.058
epoch: 30, batch: 1800 // loss: 0.067
epoch: 30, batch: 1900 // loss: 0.063
epoch: 30, batch: 2000 // loss: 0.064
epoch: 30, batch: 2100 // loss: 0.065
epoch: 30, batch: 2200 // loss: 0.067
epoch: 30, batch: 2300 // loss: 0.064
epoch: 30, batch: 2400 // loss: 0.056
epoch: 30, batch: 2500 // loss: 0.057
epoch: 30, batch: 2600 // loss: 0.058
epoch: 30, batch: 2700 // loss: 0.056
epoch: 30, batch: 2800 // loss: 0.067
epoch: 30, batch: 2900 // loss: 0.056
epoch: 30, batch: 3000 // loss: 0.055
epoch: 30, batch: 3100 // loss: 0.060
epoch: 30, batch: 3200 // loss: 0.055
epoch: 30, batch: 3300 // loss: 0.054
epoch: 30, batch: 3400 // loss: 0.058
epoch: 30, batch: 3500 // loss: 0.051
epoch: 30, batch: 3600 // loss: 0.055
epoch: 30, batch: 3700 // loss: 0.063

epoch: 31, batch: 0 // loss: 0.070
epoch: 31, batch: 100 // loss: 0.065
epoch: 31, batch: 200 // loss: 0.057
epoch: 31, batch: 300 // loss: 0.064
epoch: 31, batch: 400 // loss: 0.062
epoch: 31, batch: 500 // loss: 0.052
epoch: 31, batch: 600 // loss: 0.052
epoch: 31, batch: 700 // loss: 0.055
epoch: 31, batch: 800 // loss: 0.059
epoch: 31, batch: 900 // loss: 0.066
epoch: 31, batch: 1000 // loss: 0.057
epoch: 31, batch: 1100 // loss: 0.056
epoch: 31, batch: 1200 // loss: 0.062
epoch: 31, batch: 1300 // loss: 0.065
epoch: 31, batch: 1400 // loss: 0.057
epoch: 31, batch: 1500 // loss: 0.064
epoch: 31, batch: 1600 // loss: 0.064
epoch: 31, batch: 1700 // loss: 0.057
epoch: 31, batch: 1800 // loss: 0.067
epoch: 31, batch: 1900 // loss: 0.062
epoch: 31, batch: 2000 // loss: 0.064
epoch: 31, batch: 2100 // loss: 0.065
epoch: 31, batch: 2200 // loss: 0.067
epoch: 31, batch: 2300 // loss: 0.063
epoch: 31, batch: 2400 // loss: 0.056
epoch: 31, batch: 2500 // loss: 0.056
epoch: 31, batch: 2600 // loss: 0.058
epoch: 31, batch: 2700 // loss: 0.056
epoch: 31, batch: 2800 // loss: 0.067
epoch: 31, batch: 2900 // loss: 0.056
epoch: 31, batch: 3000 // loss: 0.055
epoch: 31, batch: 3100 // loss: 0.059
epoch: 31, batch: 3200 // loss: 0.054
epoch: 31, batch: 3300 // loss: 0.053
epoch: 31, batch: 3400 // loss: 0.058
epoch: 31, batch: 3500 // loss: 0.050
epoch: 31, batch: 3600 // loss: 0.055
epoch: 31, batch: 3700 // loss: 0.062

epoch: 32, batch: 0 // loss: 0.069
epoch: 32, batch: 100 // loss: 0.065
epoch: 32, batch: 200 // loss: 0.056
epoch: 32, batch: 300 // loss: 0.064
epoch: 32, batch: 400 // loss: 0.062
epoch: 32, batch: 500 // loss: 0.051
epoch: 32, batch: 600 // loss: 0.052
epoch: 32, batch: 700 // loss: 0.055
epoch: 32, batch: 800 // loss: 0.059
epoch: 32, batch: 900 // loss: 0.066
epoch: 32, batch: 1000 // loss: 0.056
epoch: 32, batch: 1100 // loss: 0.055
epoch: 32, batch: 1200 // loss: 0.061
epoch: 32, batch: 1300 // loss: 0.065
epoch: 32, batch: 1400 // loss: 0.056
epoch: 32, batch: 1500 // loss: 0.063
epoch: 32, batch: 1600 // loss: 0.064
epoch: 32, batch: 1700 // loss: 0.057
epoch: 32, batch: 1800 // loss: 0.066
epoch: 32, batch: 1900 // loss: 0.062
epoch: 32, batch: 2000 // loss: 0.063
epoch: 32, batch: 2100 // loss: 0.064
epoch: 32, batch: 2200 // loss: 0.067
epoch: 32, batch: 2300 // loss: 0.063
epoch: 32, batch: 2400 // loss: 0.055
epoch: 32, batch: 2500 // loss: 0.056
epoch: 32, batch: 2600 // loss: 0.057
epoch: 32, batch: 2700 // loss: 0.056
epoch: 32, batch: 2800 // loss: 0.066
epoch: 32, batch: 2900 // loss: 0.055
epoch: 32, batch: 3000 // loss: 0.055
epoch: 32, batch: 3100 // loss: 0.059
epoch: 32, batch: 3200 // loss: 0.054
epoch: 32, batch: 3300 // loss: 0.053
epoch: 32, batch: 3400 // loss: 0.057
epoch: 32, batch: 3500 // loss: 0.050
epoch: 32, batch: 3600 // loss: 0.054
epoch: 32, batch: 3700 // loss: 0.061

epoch: 33, batch: 0 // loss: 0.069
epoch: 33, batch: 100 // loss: 0.064
epoch: 33, batch: 200 // loss: 0.056
epoch: 33, batch: 300 // loss: 0.063
epoch: 33, batch: 400 // loss: 0.061
epoch: 33, batch: 500 // loss: 0.051
epoch: 33, batch: 600 // loss: 0.051
epoch: 33, batch: 700 // loss: 0.054
epoch: 33, batch: 800 // loss: 0.059
epoch: 33, batch: 900 // loss: 0.065
epoch: 33, batch: 1000 // loss: 0.056
epoch: 33, batch: 1100 // loss: 0.055
epoch: 33, batch: 1200 // loss: 0.061
epoch: 33, batch: 1300 // loss: 0.064
epoch: 33, batch: 1400 // loss: 0.056
epoch: 33, batch: 1500 // loss: 0.063
epoch: 33, batch: 1600 // loss: 0.064
epoch: 33, batch: 1700 // loss: 0.056
epoch: 33, batch: 1800 // loss: 0.066
epoch: 33, batch: 1900 // loss: 0.062
epoch: 33, batch: 2000 // loss: 0.063
epoch: 33, batch: 2100 // loss: 0.064
epoch: 33, batch: 2200 // loss: 0.066
epoch: 33, batch: 2300 // loss: 0.063
epoch: 33, batch: 2400 // loss: 0.055
epoch: 33, batch: 2500 // loss: 0.056
epoch: 33, batch: 2600 // loss: 0.057
epoch: 33, batch: 2700 // loss: 0.055
epoch: 33, batch: 2800 // loss: 0.065
epoch: 33, batch: 2900 // loss: 0.055
epoch: 33, batch: 3000 // loss: 0.054
epoch: 33, batch: 3100 // loss: 0.058
epoch: 33, batch: 3200 // loss: 0.053
epoch: 33, batch: 3300 // loss: 0.052
epoch: 33, batch: 3400 // loss: 0.057
epoch: 33, batch: 3500 // loss: 0.049
epoch: 33, batch: 3600 // loss: 0.054
epoch: 33, batch: 3700 // loss: 0.061

epoch: 34, batch: 0 // loss: 0.068
epoch: 34, batch: 100 // loss: 0.064
epoch: 34, batch: 200 // loss: 0.055
epoch: 34, batch: 300 // loss: 0.063
epoch: 34, batch: 400 // loss: 0.061
epoch: 34, batch: 500 // loss: 0.050
epoch: 34, batch: 600 // loss: 0.051
epoch: 34, batch: 700 // loss: 0.054
epoch: 34, batch: 800 // loss: 0.058
epoch: 34, batch: 900 // loss: 0.065
epoch: 34, batch: 1000 // loss: 0.055
epoch: 34, batch: 1100 // loss: 0.054
epoch: 34, batch: 1200 // loss: 0.060
epoch: 34, batch: 1300 // loss: 0.064
epoch: 34, batch: 1400 // loss: 0.055
epoch: 34, batch: 1500 // loss: 0.062
epoch: 34, batch: 1600 // loss: 0.063
epoch: 34, batch: 1700 // loss: 0.056
epoch: 34, batch: 1800 // loss: 0.065
epoch: 34, batch: 1900 // loss: 0.061
epoch: 34, batch: 2000 // loss: 0.062
epoch: 34, batch: 2100 // loss: 0.063
epoch: 34, batch: 2200 // loss: 0.066
epoch: 34, batch: 2300 // loss: 0.062
epoch: 34, batch: 2400 // loss: 0.054
epoch: 34, batch: 2500 // loss: 0.055
epoch: 34, batch: 2600 // loss: 0.057
epoch: 34, batch: 2700 // loss: 0.055
epoch: 34, batch: 2800 // loss: 0.065
epoch: 34, batch: 2900 // loss: 0.054
epoch: 34, batch: 3000 // loss: 0.054
epoch: 34, batch: 3100 // loss: 0.058
epoch: 34, batch: 3200 // loss: 0.053
epoch: 34, batch: 3300 // loss: 0.052
epoch: 34, batch: 3400 // loss: 0.057
epoch: 34, batch: 3500 // loss: 0.049
epoch: 34, batch: 3600 // loss: 0.053
epoch: 34, batch: 3700 // loss: 0.060

epoch: 35, batch: 0 // loss: 0.068
epoch: 35, batch: 100 // loss: 0.064
epoch: 35, batch: 200 // loss: 0.055
epoch: 35, batch: 300 // loss: 0.062
epoch: 35, batch: 400 // loss: 0.061
epoch: 35, batch: 500 // loss: 0.050
epoch: 35, batch: 600 // loss: 0.050
epoch: 35, batch: 700 // loss: 0.054
epoch: 35, batch: 800 // loss: 0.058
epoch: 35, batch: 900 // loss: 0.064
epoch: 35, batch: 1000 // loss: 0.055
epoch: 35, batch: 1100 // loss: 0.054
epoch: 35, batch: 1200 // loss: 0.060
epoch: 35, batch: 1300 // loss: 0.063
epoch: 35, batch: 1400 // loss: 0.055
epoch: 35, batch: 1500 // loss: 0.062
epoch: 35, batch: 1600 // loss: 0.063
epoch: 35, batch: 1700 // loss: 0.056
epoch: 35, batch: 1800 // loss: 0.065
epoch: 35, batch: 1900 // loss: 0.061
epoch: 35, batch: 2000 // loss: 0.062
epoch: 35, batch: 2100 // loss: 0.063
epoch: 35, batch: 2200 // loss: 0.065
epoch: 35, batch: 2300 // loss: 0.062
epoch: 35, batch: 2400 // loss: 0.054
epoch: 35, batch: 2500 // loss: 0.055
epoch: 35, batch: 2600 // loss: 0.056
epoch: 35, batch: 2700 // loss: 0.054
epoch: 35, batch: 2800 // loss: 0.064
epoch: 35, batch: 2900 // loss: 0.054
epoch: 35, batch: 3000 // loss: 0.054
epoch: 35, batch: 3100 // loss: 0.057
epoch: 35, batch: 3200 // loss: 0.052
epoch: 35, batch: 3300 // loss: 0.051
epoch: 35, batch: 3400 // loss: 0.056
epoch: 35, batch: 3500 // loss: 0.048
epoch: 35, batch: 3600 // loss: 0.053
epoch: 35, batch: 3700 // loss: 0.060

epoch: 36, batch: 0 // loss: 0.067
epoch: 36, batch: 100 // loss: 0.063
epoch: 36, batch: 200 // loss: 0.054
epoch: 36, batch: 300 // loss: 0.062
epoch: 36, batch: 400 // loss: 0.060
epoch: 36, batch: 500 // loss: 0.050
epoch: 36, batch: 600 // loss: 0.050
epoch: 36, batch: 700 // loss: 0.053
epoch: 36, batch: 800 // loss: 0.057
epoch: 36, batch: 900 // loss: 0.064
epoch: 36, batch: 1000 // loss: 0.055
epoch: 36, batch: 1100 // loss: 0.054
epoch: 36, batch: 1200 // loss: 0.059
epoch: 36, batch: 1300 // loss: 0.063
epoch: 36, batch: 1400 // loss: 0.054
epoch: 36, batch: 1500 // loss: 0.061
epoch: 36, batch: 1600 // loss: 0.062
epoch: 36, batch: 1700 // loss: 0.056
epoch: 36, batch: 1800 // loss: 0.064
epoch: 36, batch: 1900 // loss: 0.060
epoch: 36, batch: 2000 // loss: 0.061
epoch: 36, batch: 2100 // loss: 0.062
epoch: 36, batch: 2200 // loss: 0.065
epoch: 36, batch: 2300 // loss: 0.062
epoch: 36, batch: 2400 // loss: 0.053
epoch: 36, batch: 2500 // loss: 0.055
epoch: 36, batch: 2600 // loss: 0.056
epoch: 36, batch: 2700 // loss: 0.054
epoch: 36, batch: 2800 // loss: 0.064
epoch: 36, batch: 2900 // loss: 0.054
epoch: 36, batch: 3000 // loss: 0.053
epoch: 36, batch: 3100 // loss: 0.057
epoch: 36, batch: 3200 // loss: 0.051
epoch: 36, batch: 3300 // loss: 0.051
epoch: 36, batch: 3400 // loss: 0.056
epoch: 36, batch: 3500 // loss: 0.048
epoch: 36, batch: 3600 // loss: 0.053
epoch: 36, batch: 3700 // loss: 0.059

epoch: 37, batch: 0 // loss: 0.067
epoch: 37, batch: 100 // loss: 0.063
epoch: 37, batch: 200 // loss: 0.054
epoch: 37, batch: 300 // loss: 0.061
epoch: 37, batch: 400 // loss: 0.060
epoch: 37, batch: 500 // loss: 0.049
epoch: 37, batch: 600 // loss: 0.050
epoch: 37, batch: 700 // loss: 0.053
epoch: 37, batch: 800 // loss: 0.057
epoch: 37, batch: 900 // loss: 0.063
epoch: 37, batch: 1000 // loss: 0.054
epoch: 37, batch: 1100 // loss: 0.053
epoch: 37, batch: 1200 // loss: 0.058
epoch: 37, batch: 1300 // loss: 0.062
epoch: 37, batch: 1400 // loss: 0.054
epoch: 37, batch: 1500 // loss: 0.061
epoch: 37, batch: 1600 // loss: 0.062
epoch: 37, batch: 1700 // loss: 0.055
epoch: 37, batch: 1800 // loss: 0.064
epoch: 37, batch: 1900 // loss: 0.060
epoch: 37, batch: 2000 // loss: 0.061
epoch: 37, batch: 2100 // loss: 0.062
epoch: 37, batch: 2200 // loss: 0.065
epoch: 37, batch: 2300 // loss: 0.061
epoch: 37, batch: 2400 // loss: 0.052
epoch: 37, batch: 2500 // loss: 0.054
epoch: 37, batch: 2600 // loss: 0.055
epoch: 37, batch: 2700 // loss: 0.054
epoch: 37, batch: 2800 // loss: 0.063
epoch: 37, batch: 2900 // loss: 0.053
epoch: 37, batch: 3000 // loss: 0.053
epoch: 37, batch: 3100 // loss: 0.056
epoch: 37, batch: 3200 // loss: 0.051
epoch: 37, batch: 3300 // loss: 0.050
epoch: 37, batch: 3400 // loss: 0.055
epoch: 37, batch: 3500 // loss: 0.047
epoch: 37, batch: 3600 // loss: 0.052
epoch: 37, batch: 3700 // loss: 0.058

epoch: 38, batch: 0 // loss: 0.066
epoch: 38, batch: 100 // loss: 0.062
epoch: 38, batch: 200 // loss: 0.053
epoch: 38, batch: 300 // loss: 0.061
epoch: 38, batch: 400 // loss: 0.059
epoch: 38, batch: 500 // loss: 0.049
epoch: 38, batch: 600 // loss: 0.049
epoch: 38, batch: 700 // loss: 0.052
epoch: 38, batch: 800 // loss: 0.056
epoch: 38, batch: 900 // loss: 0.063
epoch: 38, batch: 1000 // loss: 0.054
epoch: 38, batch: 1100 // loss: 0.053
epoch: 38, batch: 1200 // loss: 0.058
epoch: 38, batch: 1300 // loss: 0.062
epoch: 38, batch: 1400 // loss: 0.053
epoch: 38, batch: 1500 // loss: 0.060
epoch: 38, batch: 1600 // loss: 0.061
epoch: 38, batch: 1700 // loss: 0.055
epoch: 38, batch: 1800 // loss: 0.063
epoch: 38, batch: 1900 // loss: 0.059
epoch: 38, batch: 2000 // loss: 0.060
epoch: 38, batch: 2100 // loss: 0.061
epoch: 38, batch: 2200 // loss: 0.064
epoch: 38, batch: 2300 // loss: 0.061
epoch: 38, batch: 2400 // loss: 0.052
epoch: 38, batch: 2500 // loss: 0.054
epoch: 38, batch: 2600 // loss: 0.055
epoch: 38, batch: 2700 // loss: 0.053
epoch: 38, batch: 2800 // loss: 0.062
epoch: 38, batch: 2900 // loss: 0.053
epoch: 38, batch: 3000 // loss: 0.053
epoch: 38, batch: 3100 // loss: 0.056
epoch: 38, batch: 3200 // loss: 0.050
epoch: 38, batch: 3300 // loss: 0.050
epoch: 38, batch: 3400 // loss: 0.055
epoch: 38, batch: 3500 // loss: 0.047
epoch: 38, batch: 3600 // loss: 0.052
epoch: 38, batch: 3700 // loss: 0.058

epoch: 39, batch: 0 // loss: 0.066
epoch: 39, batch: 100 // loss: 0.062
epoch: 39, batch: 200 // loss: 0.053
epoch: 39, batch: 300 // loss: 0.060
epoch: 39, batch: 400 // loss: 0.059
epoch: 39, batch: 500 // loss: 0.048
epoch: 39, batch: 600 // loss: 0.049
epoch: 39, batch: 700 // loss: 0.052
epoch: 39, batch: 800 // loss: 0.056
epoch: 39, batch: 900 // loss: 0.062
epoch: 39, batch: 1000 // loss: 0.053
epoch: 39, batch: 1100 // loss: 0.052
epoch: 39, batch: 1200 // loss: 0.057
epoch: 39, batch: 1300 // loss: 0.061
epoch: 39, batch: 1400 // loss: 0.053
epoch: 39, batch: 1500 // loss: 0.060
epoch: 39, batch: 1600 // loss: 0.061
epoch: 39, batch: 1700 // loss: 0.055
epoch: 39, batch: 1800 // loss: 0.063
epoch: 39, batch: 1900 // loss: 0.058
epoch: 39, batch: 2000 // loss: 0.060
epoch: 39, batch: 2100 // loss: 0.060
epoch: 39, batch: 2200 // loss: 0.063
epoch: 39, batch: 2300 // loss: 0.060
epoch: 39, batch: 2400 // loss: 0.051
epoch: 39, batch: 2500 // loss: 0.053
epoch: 39, batch: 2600 // loss: 0.054
epoch: 39, batch: 2700 // loss: 0.053
epoch: 39, batch: 2800 // loss: 0.061
epoch: 39, batch: 2900 // loss: 0.052
epoch: 39, batch: 3000 // loss: 0.052
epoch: 39, batch: 3100 // loss: 0.055
epoch: 39, batch: 3200 // loss: 0.050
epoch: 39, batch: 3300 // loss: 0.049
epoch: 39, batch: 3400 // loss: 0.054
epoch: 39, batch: 3500 // loss: 0.046
epoch: 39, batch: 3600 // loss: 0.051
epoch: 39, batch: 3700 // loss: 0.057

epoch: 40, batch: 0 // loss: 0.065
epoch: 40, batch: 100 // loss: 0.061
epoch: 40, batch: 200 // loss: 0.052
epoch: 40, batch: 300 // loss: 0.060
epoch: 40, batch: 400 // loss: 0.058
epoch: 40, batch: 500 // loss: 0.048
epoch: 40, batch: 600 // loss: 0.048
epoch: 40, batch: 700 // loss: 0.052
epoch: 40, batch: 800 // loss: 0.055
epoch: 40, batch: 900 // loss: 0.061
epoch: 40, batch: 1000 // loss: 0.053
epoch: 40, batch: 1100 // loss: 0.052
epoch: 40, batch: 1200 // loss: 0.056
epoch: 40, batch: 1300 // loss: 0.060
epoch: 40, batch: 1400 // loss: 0.052
epoch: 40, batch: 1500 // loss: 0.059
epoch: 40, batch: 1600 // loss: 0.060
epoch: 40, batch: 1700 // loss: 0.054
epoch: 40, batch: 1800 // loss: 0.062
epoch: 40, batch: 1900 // loss: 0.058
epoch: 40, batch: 2000 // loss: 0.059
epoch: 40, batch: 2100 // loss: 0.060
epoch: 40, batch: 2200 // loss: 0.063
epoch: 40, batch: 2300 // loss: 0.060
epoch: 40, batch: 2400 // loss: 0.051
epoch: 40, batch: 2500 // loss: 0.053
epoch: 40, batch: 2600 // loss: 0.054
epoch: 40, batch: 2700 // loss: 0.052
epoch: 40, batch: 2800 // loss: 0.060
epoch: 40, batch: 2900 // loss: 0.052
epoch: 40, batch: 3000 // loss: 0.052
epoch: 40, batch: 3100 // loss: 0.054
epoch: 40, batch: 3200 // loss: 0.049
epoch: 40, batch: 3300 // loss: 0.048
epoch: 40, batch: 3400 // loss: 0.054
epoch: 40, batch: 3500 // loss: 0.046
epoch: 40, batch: 3600 // loss: 0.051
epoch: 40, batch: 3700 // loss: 0.056

epoch: 41, batch: 0 // loss: 0.065
epoch: 41, batch: 100 // loss: 0.060
epoch: 41, batch: 200 // loss: 0.052
epoch: 41, batch: 300 // loss: 0.059
epoch: 41, batch: 400 // loss: 0.057
epoch: 41, batch: 500 // loss: 0.047
epoch: 41, batch: 600 // loss: 0.048
epoch: 41, batch: 700 // loss: 0.051
epoch: 41, batch: 800 // loss: 0.054
epoch: 41, batch: 900 // loss: 0.061
epoch: 41, batch: 1000 // loss: 0.052
epoch: 41, batch: 1100 // loss: 0.051
epoch: 41, batch: 1200 // loss: 0.056
epoch: 41, batch: 1300 // loss: 0.060
epoch: 41, batch: 1400 // loss: 0.051
epoch: 41, batch: 1500 // loss: 0.058
epoch: 41, batch: 1600 // loss: 0.060
epoch: 41, batch: 1700 // loss: 0.054
epoch: 41, batch: 1800 // loss: 0.062
epoch: 41, batch: 1900 // loss: 0.057
epoch: 41, batch: 2000 // loss: 0.058
epoch: 41, batch: 2100 // loss: 0.059
epoch: 41, batch: 2200 // loss: 0.062
epoch: 41, batch: 2300 // loss: 0.059
epoch: 41, batch: 2400 // loss: 0.050
epoch: 41, batch: 2500 // loss: 0.052
epoch: 41, batch: 2600 // loss: 0.053
epoch: 41, batch: 2700 // loss: 0.051
epoch: 41, batch: 2800 // loss: 0.060
epoch: 41, batch: 2900 // loss: 0.051
epoch: 41, batch: 3000 // loss: 0.052
epoch: 41, batch: 3100 // loss: 0.054
epoch: 41, batch: 3200 // loss: 0.048
epoch: 41, batch: 3300 // loss: 0.048
epoch: 41, batch: 3400 // loss: 0.053
epoch: 41, batch: 3500 // loss: 0.045
epoch: 41, batch: 3600 // loss: 0.050
epoch: 41, batch: 3700 // loss: 0.055

epoch: 42, batch: 0 // loss: 0.064
epoch: 42, batch: 100 // loss: 0.060
epoch: 42, batch: 200 // loss: 0.051
epoch: 42, batch: 300 // loss: 0.058
epoch: 42, batch: 400 // loss: 0.057
epoch: 42, batch: 500 // loss: 0.047
epoch: 42, batch: 600 // loss: 0.048
epoch: 42, batch: 700 // loss: 0.051
epoch: 42, batch: 800 // loss: 0.053
epoch: 42, batch: 900 // loss: 0.060
epoch: 42, batch: 1000 // loss: 0.052
epoch: 42, batch: 1100 // loss: 0.051
epoch: 42, batch: 1200 // loss: 0.055
epoch: 42, batch: 1300 // loss: 0.059
epoch: 42, batch: 1400 // loss: 0.051
epoch: 42, batch: 1500 // loss: 0.058
epoch: 42, batch: 1600 // loss: 0.059
epoch: 42, batch: 1700 // loss: 0.053
epoch: 42, batch: 1800 // loss: 0.061
epoch: 42, batch: 1900 // loss: 0.056
epoch: 42, batch: 2000 // loss: 0.057
epoch: 42, batch: 2100 // loss: 0.058
epoch: 42, batch: 2200 // loss: 0.061
epoch: 42, batch: 2300 // loss: 0.059
epoch: 42, batch: 2400 // loss: 0.050
epoch: 42, batch: 2500 // loss: 0.052
epoch: 42, batch: 2600 // loss: 0.053
epoch: 42, batch: 2700 // loss: 0.051
epoch: 42, batch: 2800 // loss: 0.059
epoch: 42, batch: 2900 // loss: 0.050
epoch: 42, batch: 3000 // loss: 0.051
epoch: 42, batch: 3100 // loss: 0.053
epoch: 42, batch: 3200 // loss: 0.048
epoch: 42, batch: 3300 // loss: 0.047
epoch: 42, batch: 3400 // loss: 0.052
epoch: 42, batch: 3500 // loss: 0.044
epoch: 42, batch: 3600 // loss: 0.050
epoch: 42, batch: 3700 // loss: 0.055

epoch: 43, batch: 0 // loss: 0.063
epoch: 43, batch: 100 // loss: 0.059
epoch: 43, batch: 200 // loss: 0.050
epoch: 43, batch: 300 // loss: 0.058
epoch: 43, batch: 400 // loss: 0.056
epoch: 43, batch: 500 // loss: 0.046
epoch: 43, batch: 600 // loss: 0.047
epoch: 43, batch: 700 // loss: 0.050
epoch: 43, batch: 800 // loss: 0.053
epoch: 43, batch: 900 // loss: 0.059
epoch: 43, batch: 1000 // loss: 0.051
epoch: 43, batch: 1100 // loss: 0.050
epoch: 43, batch: 1200 // loss: 0.054
epoch: 43, batch: 1300 // loss: 0.058
epoch: 43, batch: 1400 // loss: 0.050
epoch: 43, batch: 1500 // loss: 0.057
epoch: 43, batch: 1600 // loss: 0.059
epoch: 43, batch: 1700 // loss: 0.053
epoch: 43, batch: 1800 // loss: 0.060
epoch: 43, batch: 1900 // loss: 0.055
epoch: 43, batch: 2000 // loss: 0.056
epoch: 43, batch: 2100 // loss: 0.057
epoch: 43, batch: 2200 // loss: 0.060
epoch: 43, batch: 2300 // loss: 0.058
epoch: 43, batch: 2400 // loss: 0.049
epoch: 43, batch: 2500 // loss: 0.051
epoch: 43, batch: 2600 // loss: 0.052
epoch: 43, batch: 2700 // loss: 0.050
epoch: 43, batch: 2800 // loss: 0.058
epoch: 43, batch: 2900 // loss: 0.050
epoch: 43, batch: 3000 // loss: 0.051
epoch: 43, batch: 3100 // loss: 0.052
epoch: 43, batch: 3200 // loss: 0.047
epoch: 43, batch: 3300 // loss: 0.046
epoch: 43, batch: 3400 // loss: 0.052
epoch: 43, batch: 3500 // loss: 0.044
epoch: 43, batch: 3600 // loss: 0.049
epoch: 43, batch: 3700 // loss: 0.054

epoch: 44, batch: 0 // loss: 0.063
epoch: 44, batch: 100 // loss: 0.058
epoch: 44, batch: 200 // loss: 0.050
epoch: 44, batch: 300 // loss: 0.057
epoch: 44, batch: 400 // loss: 0.055
epoch: 44, batch: 500 // loss: 0.046
epoch: 44, batch: 600 // loss: 0.047
epoch: 44, batch: 700 // loss: 0.050
epoch: 44, batch: 800 // loss: 0.052
epoch: 44, batch: 900 // loss: 0.058
epoch: 44, batch: 1000 // loss: 0.051
epoch: 44, batch: 1100 // loss: 0.050
epoch: 44, batch: 1200 // loss: 0.053
epoch: 44, batch: 1300 // loss: 0.057
epoch: 44, batch: 1400 // loss: 0.050
epoch: 44, batch: 1500 // loss: 0.056
epoch: 44, batch: 1600 // loss: 0.058
epoch: 44, batch: 1700 // loss: 0.052
epoch: 44, batch: 1800 // loss: 0.060
epoch: 44, batch: 1900 // loss: 0.055
epoch: 44, batch: 2000 // loss: 0.056
epoch: 44, batch: 2100 // loss: 0.056
epoch: 44, batch: 2200 // loss: 0.060
epoch: 44, batch: 2300 // loss: 0.057
epoch: 44, batch: 2400 // loss: 0.048
epoch: 44, batch: 2500 // loss: 0.050
epoch: 44, batch: 2600 // loss: 0.052
epoch: 44, batch: 2700 // loss: 0.050
epoch: 44, batch: 2800 // loss: 0.057
epoch: 44, batch: 2900 // loss: 0.049
epoch: 44, batch: 3000 // loss: 0.051
epoch: 44, batch: 3100 // loss: 0.051
epoch: 44, batch: 3200 // loss: 0.046
epoch: 44, batch: 3300 // loss: 0.046
epoch: 44, batch: 3400 // loss: 0.051
epoch: 44, batch: 3500 // loss: 0.043
epoch: 44, batch: 3600 // loss: 0.049
epoch: 44, batch: 3700 // loss: 0.053

epoch: 45, batch: 0 // loss: 0.062
epoch: 45, batch: 100 // loss: 0.057
epoch: 45, batch: 200 // loss: 0.049
epoch: 45, batch: 300 // loss: 0.056
epoch: 45, batch: 400 // loss: 0.055
epoch: 45, batch: 500 // loss: 0.045
epoch: 45, batch: 600 // loss: 0.046
epoch: 45, batch: 700 // loss: 0.049
epoch: 45, batch: 800 // loss: 0.051
epoch: 45, batch: 900 // loss: 0.057
epoch: 45, batch: 1000 // loss: 0.050
epoch: 45, batch: 1100 // loss: 0.049
epoch: 45, batch: 1200 // loss: 0.052
epoch: 45, batch: 1300 // loss: 0.056
epoch: 45, batch: 1400 // loss: 0.049
epoch: 45, batch: 1500 // loss: 0.056
epoch: 45, batch: 1600 // loss: 0.057
epoch: 45, batch: 1700 // loss: 0.052
epoch: 45, batch: 1800 // loss: 0.059
epoch: 45, batch: 1900 // loss: 0.054
epoch: 45, batch: 2000 // loss: 0.055
epoch: 45, batch: 2100 // loss: 0.056
epoch: 45, batch: 2200 // loss: 0.059
epoch: 45, batch: 2300 // loss: 0.057
epoch: 45, batch: 2400 // loss: 0.048
epoch: 45, batch: 2500 // loss: 0.050
epoch: 45, batch: 2600 // loss: 0.051
epoch: 45, batch: 2700 // loss: 0.049
epoch: 45, batch: 2800 // loss: 0.056
epoch: 45, batch: 2900 // loss: 0.049
epoch: 45, batch: 3000 // loss: 0.050
epoch: 45, batch: 3100 // loss: 0.051
epoch: 45, batch: 3200 // loss: 0.046
epoch: 45, batch: 3300 // loss: 0.045
epoch: 45, batch: 3400 // loss: 0.051
epoch: 45, batch: 3500 // loss: 0.042
epoch: 45, batch: 3600 // loss: 0.048
epoch: 45, batch: 3700 // loss: 0.052

epoch: 46, batch: 0 // loss: 0.061
epoch: 46, batch: 100 // loss: 0.056
epoch: 46, batch: 200 // loss: 0.049
epoch: 46, batch: 300 // loss: 0.055
epoch: 46, batch: 400 // loss: 0.054
epoch: 46, batch: 500 // loss: 0.045
epoch: 46, batch: 600 // loss: 0.046
epoch: 46, batch: 700 // loss: 0.049
epoch: 46, batch: 800 // loss: 0.050
epoch: 46, batch: 900 // loss: 0.057
epoch: 46, batch: 1000 // loss: 0.050
epoch: 46, batch: 1100 // loss: 0.049
epoch: 46, batch: 1200 // loss: 0.052
epoch: 46, batch: 1300 // loss: 0.055
epoch: 46, batch: 1400 // loss: 0.048
epoch: 46, batch: 1500 // loss: 0.055
epoch: 46, batch: 1600 // loss: 0.057
epoch: 46, batch: 1700 // loss: 0.051
epoch: 46, batch: 1800 // loss: 0.059
epoch: 46, batch: 1900 // loss: 0.053
epoch: 46, batch: 2000 // loss: 0.054
epoch: 46, batch: 2100 // loss: 0.055
epoch: 46, batch: 2200 // loss: 0.058
epoch: 46, batch: 2300 // loss: 0.056
epoch: 46, batch: 2400 // loss: 0.047
epoch: 46, batch: 2500 // loss: 0.049
epoch: 46, batch: 2600 // loss: 0.051
epoch: 46, batch: 2700 // loss: 0.048
epoch: 46, batch: 2800 // loss: 0.055
epoch: 46, batch: 2900 // loss: 0.048
epoch: 46, batch: 3000 // loss: 0.050
epoch: 46, batch: 3100 // loss: 0.050
epoch: 46, batch: 3200 // loss: 0.045
epoch: 46, batch: 3300 // loss: 0.044
epoch: 46, batch: 3400 // loss: 0.050
epoch: 46, batch: 3500 // loss: 0.042
epoch: 46, batch: 3600 // loss: 0.048
epoch: 46, batch: 3700 // loss: 0.051

epoch: 47, batch: 0 // loss: 0.060
epoch: 47, batch: 100 // loss: 0.055
epoch: 47, batch: 200 // loss: 0.048
epoch: 47, batch: 300 // loss: 0.055
epoch: 47, batch: 400 // loss: 0.053
epoch: 47, batch: 500 // loss: 0.044
epoch: 47, batch: 600 // loss: 0.045
epoch: 47, batch: 700 // loss: 0.048
epoch: 47, batch: 800 // loss: 0.050
epoch: 47, batch: 900 // loss: 0.056
epoch: 47, batch: 1000 // loss: 0.050
epoch: 47, batch: 1100 // loss: 0.048
epoch: 47, batch: 1200 // loss: 0.051
epoch: 47, batch: 1300 // loss: 0.054
epoch: 47, batch: 1400 // loss: 0.048
epoch: 47, batch: 1500 // loss: 0.054
epoch: 47, batch: 1600 // loss: 0.056
epoch: 47, batch: 1700 // loss: 0.050
epoch: 47, batch: 1800 // loss: 0.058
epoch: 47, batch: 1900 // loss: 0.052
epoch: 47, batch: 2000 // loss: 0.053
epoch: 47, batch: 2100 // loss: 0.054
epoch: 47, batch: 2200 // loss: 0.057
epoch: 47, batch: 2300 // loss: 0.055
epoch: 47, batch: 2400 // loss: 0.047
epoch: 47, batch: 2500 // loss: 0.049
epoch: 47, batch: 2600 // loss: 0.050
epoch: 47, batch: 2700 // loss: 0.048
epoch: 47, batch: 2800 // loss: 0.054
epoch: 47, batch: 2900 // loss: 0.047
epoch: 47, batch: 3000 // loss: 0.049
epoch: 47, batch: 3100 // loss: 0.049
epoch: 47, batch: 3200 // loss: 0.044
epoch: 47, batch: 3300 // loss: 0.044
epoch: 47, batch: 3400 // loss: 0.049
epoch: 47, batch: 3500 // loss: 0.041
epoch: 47, batch: 3600 // loss: 0.047
epoch: 47, batch: 3700 // loss: 0.050

epoch: 48, batch: 0 // loss: 0.060
epoch: 48, batch: 100 // loss: 0.055
epoch: 48, batch: 200 // loss: 0.047
epoch: 48, batch: 300 // loss: 0.054
epoch: 48, batch: 400 // loss: 0.052
epoch: 48, batch: 500 // loss: 0.044
epoch: 48, batch: 600 // loss: 0.045
epoch: 48, batch: 700 // loss: 0.048
epoch: 48, batch: 800 // loss: 0.049
epoch: 48, batch: 900 // loss: 0.055
epoch: 48, batch: 1000 // loss: 0.049
epoch: 48, batch: 1100 // loss: 0.048
epoch: 48, batch: 1200 // loss: 0.050
epoch: 48, batch: 1300 // loss: 0.053
epoch: 48, batch: 1400 // loss: 0.047
epoch: 48, batch: 1500 // loss: 0.054
epoch: 48, batch: 1600 // loss: 0.056
epoch: 48, batch: 1700 // loss: 0.050
epoch: 48, batch: 1800 // loss: 0.057
epoch: 48, batch: 1900 // loss: 0.052
epoch: 48, batch: 2000 // loss: 0.052
epoch: 48, batch: 2100 // loss: 0.053
epoch: 48, batch: 2200 // loss: 0.056
epoch: 48, batch: 2300 // loss: 0.055
epoch: 48, batch: 2400 // loss: 0.046
epoch: 48, batch: 2500 // loss: 0.048
epoch: 48, batch: 2600 // loss: 0.049
epoch: 48, batch: 2700 // loss: 0.047
epoch: 48, batch: 2800 // loss: 0.053
epoch: 48, batch: 2900 // loss: 0.047
epoch: 48, batch: 3000 // loss: 0.049
epoch: 48, batch: 3100 // loss: 0.049
epoch: 48, batch: 3200 // loss: 0.044
epoch: 48, batch: 3300 // loss: 0.043
epoch: 48, batch: 3400 // loss: 0.049
epoch: 48, batch: 3500 // loss: 0.040
epoch: 48, batch: 3600 // loss: 0.047
epoch: 48, batch: 3700 // loss: 0.050

epoch: 49, batch: 0 // loss: 0.059
epoch: 49, batch: 100 // loss: 0.054
epoch: 49, batch: 200 // loss: 0.047
epoch: 49, batch: 300 // loss: 0.054
epoch: 49, batch: 400 // loss: 0.052
epoch: 49, batch: 500 // loss: 0.043
epoch: 49, batch: 600 // loss: 0.044
epoch: 49, batch: 700 // loss: 0.048
epoch: 49, batch: 800 // loss: 0.048
epoch: 49, batch: 900 // loss: 0.054
epoch: 49, batch: 1000 // loss: 0.049
epoch: 49, batch: 1100 // loss: 0.047
epoch: 49, batch: 1200 // loss: 0.049
epoch: 49, batch: 1300 // loss: 0.052
epoch: 49, batch: 1400 // loss: 0.047
epoch: 49, batch: 1500 // loss: 0.053
epoch: 49, batch: 1600 // loss: 0.055
epoch: 49, batch: 1700 // loss: 0.049
epoch: 49, batch: 1800 // loss: 0.057
epoch: 49, batch: 1900 // loss: 0.051
epoch: 49, batch: 2000 // loss: 0.052
epoch: 49, batch: 2100 // loss: 0.053
epoch: 49, batch: 2200 // loss: 0.056
epoch: 49, batch: 2300 // loss: 0.054
epoch: 49, batch: 2400 // loss: 0.046
epoch: 49, batch: 2500 // loss: 0.047
epoch: 49, batch: 2600 // loss: 0.049
epoch: 49, batch: 2700 // loss: 0.047
epoch: 49, batch: 2800 // loss: 0.052
epoch: 49, batch: 2900 // loss: 0.046
epoch: 49, batch: 3000 // loss: 0.049
epoch: 49, batch: 3100 // loss: 0.048
epoch: 49, batch: 3200 // loss: 0.043
epoch: 49, batch: 3300 // loss: 0.043
epoch: 49, batch: 3400 // loss: 0.048
epoch: 49, batch: 3500 // loss: 0.040
epoch: 49, batch: 3600 // loss: 0.047
epoch: 49, batch: 3700 // loss: 0.049

epoch: 50, batch: 0 // loss: 0.058
epoch: 50, batch: 100 // loss: 0.053
epoch: 50, batch: 200 // loss: 0.046
epoch: 50, batch: 300 // loss: 0.053
epoch: 50, batch: 400 // loss: 0.051
epoch: 50, batch: 500 // loss: 0.043
epoch: 50, batch: 600 // loss: 0.044
epoch: 50, batch: 700 // loss: 0.047
epoch: 50, batch: 800 // loss: 0.047
epoch: 50, batch: 900 // loss: 0.054
epoch: 50, batch: 1000 // loss: 0.048
epoch: 50, batch: 1100 // loss: 0.047
epoch: 50, batch: 1200 // loss: 0.049
epoch: 50, batch: 1300 // loss: 0.051
epoch: 50, batch: 1400 // loss: 0.046
epoch: 50, batch: 1500 // loss: 0.052
epoch: 50, batch: 1600 // loss: 0.055
epoch: 50, batch: 1700 // loss: 0.049
epoch: 50, batch: 1800 // loss: 0.056
epoch: 50, batch: 1900 // loss: 0.050
epoch: 50, batch: 2000 // loss: 0.051
epoch: 50, batch: 2100 // loss: 0.052
epoch: 50, batch: 2200 // loss: 0.055
epoch: 50, batch: 2300 // loss: 0.053
epoch: 50, batch: 2400 // loss: 0.045
epoch: 50, batch: 2500 // loss: 0.047
epoch: 50, batch: 2600 // loss: 0.048
epoch: 50, batch: 2700 // loss: 0.046
epoch: 50, batch: 2800 // loss: 0.051
epoch: 50, batch: 2900 // loss: 0.046
epoch: 50, batch: 3000 // loss: 0.048
epoch: 50, batch: 3100 // loss: 0.047
epoch: 50, batch: 3200 // loss: 0.043
epoch: 50, batch: 3300 // loss: 0.042
epoch: 50, batch: 3400 // loss: 0.048
epoch: 50, batch: 3500 // loss: 0.039
epoch: 50, batch: 3600 // loss: 0.046
epoch: 50, batch: 3700 // loss: 0.048

epoch: 51, batch: 0 // loss: 0.058
epoch: 51, batch: 100 // loss: 0.052
epoch: 51, batch: 200 // loss: 0.046
epoch: 51, batch: 300 // loss: 0.052
epoch: 51, batch: 400 // loss: 0.051
epoch: 51, batch: 500 // loss: 0.042
epoch: 51, batch: 600 // loss: 0.043
epoch: 51, batch: 700 // loss: 0.047
epoch: 51, batch: 800 // loss: 0.047
epoch: 51, batch: 900 // loss: 0.053
epoch: 51, batch: 1000 // loss: 0.048
epoch: 51, batch: 1100 // loss: 0.046
epoch: 51, batch: 1200 // loss: 0.048
epoch: 51, batch: 1300 // loss: 0.051
epoch: 51, batch: 1400 // loss: 0.046
epoch: 51, batch: 1500 // loss: 0.052
epoch: 51, batch: 1600 // loss: 0.054
epoch: 51, batch: 1700 // loss: 0.049
epoch: 51, batch: 1800 // loss: 0.056
epoch: 51, batch: 1900 // loss: 0.050
epoch: 51, batch: 2000 // loss: 0.051
epoch: 51, batch: 2100 // loss: 0.051
epoch: 51, batch: 2200 // loss: 0.054
epoch: 51, batch: 2300 // loss: 0.053
epoch: 51, batch: 2400 // loss: 0.045
epoch: 51, batch: 2500 // loss: 0.046
epoch: 51, batch: 2600 // loss: 0.048
epoch: 51, batch: 2700 // loss: 0.046
epoch: 51, batch: 2800 // loss: 0.051
epoch: 51, batch: 2900 // loss: 0.045
epoch: 51, batch: 3000 // loss: 0.048
epoch: 51, batch: 3100 // loss: 0.047
epoch: 51, batch: 3200 // loss: 0.042
epoch: 51, batch: 3300 // loss: 0.042
epoch: 51, batch: 3400 // loss: 0.047
epoch: 51, batch: 3500 // loss: 0.039
epoch: 51, batch: 3600 // loss: 0.046
epoch: 51, batch: 3700 // loss: 0.048

epoch: 52, batch: 0 // loss: 0.057
epoch: 52, batch: 100 // loss: 0.051
epoch: 52, batch: 200 // loss: 0.046
epoch: 52, batch: 300 // loss: 0.052
epoch: 52, batch: 400 // loss: 0.050
epoch: 52, batch: 500 // loss: 0.042
epoch: 52, batch: 600 // loss: 0.043
epoch: 52, batch: 700 // loss: 0.047
epoch: 52, batch: 800 // loss: 0.046
epoch: 52, batch: 900 // loss: 0.052
epoch: 52, batch: 1000 // loss: 0.048
epoch: 52, batch: 1100 // loss: 0.046
epoch: 52, batch: 1200 // loss: 0.048
epoch: 52, batch: 1300 // loss: 0.050
epoch: 52, batch: 1400 // loss: 0.045
epoch: 52, batch: 1500 // loss: 0.051
epoch: 52, batch: 1600 // loss: 0.054
epoch: 52, batch: 1700 // loss: 0.048
epoch: 52, batch: 1800 // loss: 0.055
epoch: 52, batch: 1900 // loss: 0.049
epoch: 52, batch: 2000 // loss: 0.050
epoch: 52, batch: 2100 // loss: 0.051
epoch: 52, batch: 2200 // loss: 0.054
epoch: 52, batch: 2300 // loss: 0.052
epoch: 52, batch: 2400 // loss: 0.044
epoch: 52, batch: 2500 // loss: 0.046
epoch: 52, batch: 2600 // loss: 0.048
epoch: 52, batch: 2700 // loss: 0.045
epoch: 52, batch: 2800 // loss: 0.050
epoch: 52, batch: 2900 // loss: 0.045
epoch: 52, batch: 3000 // loss: 0.048
epoch: 52, batch: 3100 // loss: 0.046
epoch: 52, batch: 3200 // loss: 0.042
epoch: 52, batch: 3300 // loss: 0.041
epoch: 52, batch: 3400 // loss: 0.047
epoch: 52, batch: 3500 // loss: 0.038
epoch: 52, batch: 3600 // loss: 0.046
epoch: 52, batch: 3700 // loss: 0.047

epoch: 53, batch: 0 // loss: 0.057
epoch: 53, batch: 100 // loss: 0.051
epoch: 53, batch: 200 // loss: 0.045
epoch: 53, batch: 300 // loss: 0.051
epoch: 53, batch: 400 // loss: 0.050
epoch: 53, batch: 500 // loss: 0.042
epoch: 53, batch: 600 // loss: 0.043
epoch: 53, batch: 700 // loss: 0.046
epoch: 53, batch: 800 // loss: 0.046
epoch: 53, batch: 900 // loss: 0.052
epoch: 53, batch: 1000 // loss: 0.047
epoch: 53, batch: 1100 // loss: 0.045
epoch: 53, batch: 1200 // loss: 0.047
epoch: 53, batch: 1300 // loss: 0.049
epoch: 53, batch: 1400 // loss: 0.045
epoch: 53, batch: 1500 // loss: 0.051
epoch: 53, batch: 1600 // loss: 0.054
epoch: 53, batch: 1700 // loss: 0.048
epoch: 53, batch: 1800 // loss: 0.055
epoch: 53, batch: 1900 // loss: 0.049
epoch: 53, batch: 2000 // loss: 0.050
epoch: 53, batch: 2100 // loss: 0.050
epoch: 53, batch: 2200 // loss: 0.053
epoch: 53, batch: 2300 // loss: 0.052
epoch: 53, batch: 2400 // loss: 0.044
epoch: 53, batch: 2500 // loss: 0.045
epoch: 53, batch: 2600 // loss: 0.047
epoch: 53, batch: 2700 // loss: 0.045
epoch: 53, batch: 2800 // loss: 0.050
epoch: 53, batch: 2900 // loss: 0.044
epoch: 53, batch: 3000 // loss: 0.047
epoch: 53, batch: 3100 // loss: 0.046
epoch: 53, batch: 3200 // loss: 0.041
epoch: 53, batch: 3300 // loss: 0.041
epoch: 53, batch: 3400 // loss: 0.046
epoch: 53, batch: 3500 // loss: 0.038
epoch: 53, batch: 3600 // loss: 0.045
epoch: 53, batch: 3700 // loss: 0.047

epoch: 54, batch: 0 // loss: 0.056
epoch: 54, batch: 100 // loss: 0.050
epoch: 54, batch: 200 // loss: 0.045
epoch: 54, batch: 300 // loss: 0.051
epoch: 54, batch: 400 // loss: 0.049
epoch: 54, batch: 500 // loss: 0.041
epoch: 54, batch: 600 // loss: 0.042
epoch: 54, batch: 700 // loss: 0.046
epoch: 54, batch: 800 // loss: 0.045
epoch: 54, batch: 900 // loss: 0.052
epoch: 54, batch: 1000 // loss: 0.047
epoch: 54, batch: 1100 // loss: 0.045
epoch: 54, batch: 1200 // loss: 0.047
epoch: 54, batch: 1300 // loss: 0.049
epoch: 54, batch: 1400 // loss: 0.045
epoch: 54, batch: 1500 // loss: 0.051
epoch: 54, batch: 1600 // loss: 0.053
epoch: 54, batch: 1700 // loss: 0.047
epoch: 54, batch: 1800 // loss: 0.055
epoch: 54, batch: 1900 // loss: 0.048
epoch: 54, batch: 2000 // loss: 0.049
epoch: 54, batch: 2100 // loss: 0.050
epoch: 54, batch: 2200 // loss: 0.053
epoch: 54, batch: 2300 // loss: 0.051
epoch: 54, batch: 2400 // loss: 0.044
epoch: 54, batch: 2500 // loss: 0.045
epoch: 54, batch: 2600 // loss: 0.047
epoch: 54, batch: 2700 // loss: 0.044
epoch: 54, batch: 2800 // loss: 0.049
epoch: 54, batch: 2900 // loss: 0.044
epoch: 54, batch: 3000 // loss: 0.047
epoch: 54, batch: 3100 // loss: 0.045
epoch: 54, batch: 3200 // loss: 0.041
epoch: 54, batch: 3300 // loss: 0.040
epoch: 54, batch: 3400 // loss: 0.046
epoch: 54, batch: 3500 // loss: 0.037
epoch: 54, batch: 3600 // loss: 0.045
epoch: 54, batch: 3700 // loss: 0.046

epoch: 55, batch: 0 // loss: 0.056
epoch: 55, batch: 100 // loss: 0.050
epoch: 55, batch: 200 // loss: 0.045
epoch: 55, batch: 300 // loss: 0.051
epoch: 55, batch: 400 // loss: 0.049
epoch: 55, batch: 500 // loss: 0.041
epoch: 55, batch: 600 // loss: 0.042
epoch: 55, batch: 700 // loss: 0.046
epoch: 55, batch: 800 // loss: 0.045
epoch: 55, batch: 900 // loss: 0.051
epoch: 55, batch: 1000 // loss: 0.047
epoch: 55, batch: 1100 // loss: 0.045
epoch: 55, batch: 1200 // loss: 0.046
epoch: 55, batch: 1300 // loss: 0.048
epoch: 55, batch: 1400 // loss: 0.044
epoch: 55, batch: 1500 // loss: 0.050
epoch: 55, batch: 1600 // loss: 0.053
epoch: 55, batch: 1700 // loss: 0.047
epoch: 55, batch: 1800 // loss: 0.054
epoch: 55, batch: 1900 // loss: 0.048
epoch: 55, batch: 2000 // loss: 0.049
epoch: 55, batch: 2100 // loss: 0.049
epoch: 55, batch: 2200 // loss: 0.052
epoch: 55, batch: 2300 // loss: 0.051
epoch: 55, batch: 2400 // loss: 0.044
epoch: 55, batch: 2500 // loss: 0.045
epoch: 55, batch: 2600 // loss: 0.047
epoch: 55, batch: 2700 // loss: 0.044
epoch: 55, batch: 2800 // loss: 0.049
epoch: 55, batch: 2900 // loss: 0.044
epoch: 55, batch: 3000 // loss: 0.047
epoch: 55, batch: 3100 // loss: 0.045
epoch: 55, batch: 3200 // loss: 0.041
epoch: 55, batch: 3300 // loss: 0.040
epoch: 55, batch: 3400 // loss: 0.046
epoch: 55, batch: 3500 // loss: 0.037
epoch: 55, batch: 3600 // loss: 0.045
epoch: 55, batch: 3700 // loss: 0.046

epoch: 56, batch: 0 // loss: 0.056
epoch: 56, batch: 100 // loss: 0.049
epoch: 56, batch: 200 // loss: 0.044
epoch: 56, batch: 300 // loss: 0.050
epoch: 56, batch: 400 // loss: 0.048
epoch: 56, batch: 500 // loss: 0.041
epoch: 56, batch: 600 // loss: 0.042
epoch: 56, batch: 700 // loss: 0.046
epoch: 56, batch: 800 // loss: 0.044
epoch: 56, batch: 900 // loss: 0.051
epoch: 56, batch: 1000 // loss: 0.047
epoch: 56, batch: 1100 // loss: 0.044
epoch: 56, batch: 1200 // loss: 0.046
epoch: 56, batch: 1300 // loss: 0.048
epoch: 56, batch: 1400 // loss: 0.044
epoch: 56, batch: 1500 // loss: 0.050
epoch: 56, batch: 1600 // loss: 0.053
epoch: 56, batch: 1700 // loss: 0.047
epoch: 56, batch: 1800 // loss: 0.054
epoch: 56, batch: 1900 // loss: 0.047
epoch: 56, batch: 2000 // loss: 0.048
epoch: 56, batch: 2100 // loss: 0.049
epoch: 56, batch: 2200 // loss: 0.052
epoch: 56, batch: 2300 // loss: 0.051
epoch: 56, batch: 2400 // loss: 0.043
epoch: 56, batch: 2500 // loss: 0.044
epoch: 56, batch: 2600 // loss: 0.046
epoch: 56, batch: 2700 // loss: 0.044
epoch: 56, batch: 2800 // loss: 0.048
epoch: 56, batch: 2900 // loss: 0.044
epoch: 56, batch: 3000 // loss: 0.047
epoch: 56, batch: 3100 // loss: 0.045
epoch: 56, batch: 3200 // loss: 0.041
epoch: 56, batch: 3300 // loss: 0.040
epoch: 56, batch: 3400 // loss: 0.046
epoch: 56, batch: 3500 // loss: 0.037
epoch: 56, batch: 3600 // loss: 0.045
epoch: 56, batch: 3700 // loss: 0.045

epoch: 57, batch: 0 // loss: 0.055
epoch: 57, batch: 100 // loss: 0.049
epoch: 57, batch: 200 // loss: 0.044
epoch: 57, batch: 300 // loss: 0.050
epoch: 57, batch: 400 // loss: 0.048
epoch: 57, batch: 500 // loss: 0.041
epoch: 57, batch: 600 // loss: 0.042
epoch: 57, batch: 700 // loss: 0.045
epoch: 57, batch: 800 // loss: 0.044
epoch: 57, batch: 900 // loss: 0.051
epoch: 57, batch: 1000 // loss: 0.046
epoch: 57, batch: 1100 // loss: 0.044
epoch: 57, batch: 1200 // loss: 0.046
epoch: 57, batch: 1300 // loss: 0.047
epoch: 57, batch: 1400 // loss: 0.044
epoch: 57, batch: 1500 // loss: 0.050
epoch: 57, batch: 1600 // loss: 0.053
epoch: 57, batch: 1700 // loss: 0.047
epoch: 57, batch: 1800 // loss: 0.054
epoch: 57, batch: 1900 // loss: 0.047
epoch: 57, batch: 2000 // loss: 0.048
epoch: 57, batch: 2100 // loss: 0.049
epoch: 57, batch: 2200 // loss: 0.051
epoch: 57, batch: 2300 // loss: 0.051
epoch: 57, batch: 2400 // loss: 0.043
epoch: 57, batch: 2500 // loss: 0.044
epoch: 57, batch: 2600 // loss: 0.046
epoch: 57, batch: 2700 // loss: 0.044
epoch: 57, batch: 2800 // loss: 0.048
epoch: 57, batch: 2900 // loss: 0.043
epoch: 57, batch: 3000 // loss: 0.047
epoch: 57, batch: 3100 // loss: 0.045
epoch: 57, batch: 3200 // loss: 0.040
epoch: 57, batch: 3300 // loss: 0.040
epoch: 57, batch: 3400 // loss: 0.045
epoch: 57, batch: 3500 // loss: 0.036
epoch: 57, batch: 3600 // loss: 0.044
epoch: 57, batch: 3700 // loss: 0.045

epoch: 58, batch: 0 // loss: 0.055
epoch: 58, batch: 100 // loss: 0.048
epoch: 58, batch: 200 // loss: 0.044
epoch: 58, batch: 300 // loss: 0.050
epoch: 58, batch: 400 // loss: 0.048
epoch: 58, batch: 500 // loss: 0.041
epoch: 58, batch: 600 // loss: 0.041
epoch: 58, batch: 700 // loss: 0.045
epoch: 58, batch: 800 // loss: 0.044
epoch: 58, batch: 900 // loss: 0.050
epoch: 58, batch: 1000 // loss: 0.046
epoch: 58, batch: 1100 // loss: 0.044
epoch: 58, batch: 1200 // loss: 0.045
epoch: 58, batch: 1300 // loss: 0.047
epoch: 58, batch: 1400 // loss: 0.044
epoch: 58, batch: 1500 // loss: 0.049
epoch: 58, batch: 1600 // loss: 0.053
epoch: 58, batch: 1700 // loss: 0.046
epoch: 58, batch: 1800 // loss: 0.054
epoch: 58, batch: 1900 // loss: 0.047
epoch: 58, batch: 2000 // loss: 0.048
epoch: 58, batch: 2100 // loss: 0.048
epoch: 58, batch: 2200 // loss: 0.051
epoch: 58, batch: 2300 // loss: 0.050
epoch: 58, batch: 2400 // loss: 0.043
epoch: 58, batch: 2500 // loss: 0.044
epoch: 58, batch: 2600 // loss: 0.046
epoch: 58, batch: 2700 // loss: 0.044
epoch: 58, batch: 2800 // loss: 0.048
epoch: 58, batch: 2900 // loss: 0.043
epoch: 58, batch: 3000 // loss: 0.046
epoch: 58, batch: 3100 // loss: 0.044
epoch: 58, batch: 3200 // loss: 0.040
epoch: 58, batch: 3300 // loss: 0.039
epoch: 58, batch: 3400 // loss: 0.045
epoch: 58, batch: 3500 // loss: 0.036
epoch: 58, batch: 3600 // loss: 0.044
epoch: 58, batch: 3700 // loss: 0.045

epoch: 59, batch: 0 // loss: 0.055
epoch: 59, batch: 100 // loss: 0.048
epoch: 59, batch: 200 // loss: 0.044
epoch: 59, batch: 300 // loss: 0.050
epoch: 59, batch: 400 // loss: 0.048
epoch: 59, batch: 500 // loss: 0.040
epoch: 59, batch: 600 // loss: 0.041
epoch: 59, batch: 700 // loss: 0.045
epoch: 59, batch: 800 // loss: 0.044
epoch: 59, batch: 900 // loss: 0.050
epoch: 59, batch: 1000 // loss: 0.046
epoch: 59, batch: 1100 // loss: 0.044
epoch: 59, batch: 1200 // loss: 0.045
epoch: 59, batch: 1300 // loss: 0.047
epoch: 59, batch: 1400 // loss: 0.043
epoch: 59, batch: 1500 // loss: 0.049
epoch: 59, batch: 1600 // loss: 0.052
epoch: 59, batch: 1700 // loss: 0.046
epoch: 59, batch: 1800 // loss: 0.054
epoch: 59, batch: 1900 // loss: 0.047
epoch: 59, batch: 2000 // loss: 0.048
epoch: 59, batch: 2100 // loss: 0.048
epoch: 59, batch: 2200 // loss: 0.051
epoch: 59, batch: 2300 // loss: 0.050
epoch: 59, batch: 2400 // loss: 0.043
epoch: 59, batch: 2500 // loss: 0.044
epoch: 59, batch: 2600 // loss: 0.046
epoch: 59, batch: 2700 // loss: 0.043
epoch: 59, batch: 2800 // loss: 0.047
epoch: 59, batch: 2900 // loss: 0.043
epoch: 59, batch: 3000 // loss: 0.046
epoch: 59, batch: 3100 // loss: 0.044
epoch: 59, batch: 3200 // loss: 0.040
epoch: 59, batch: 3300 // loss: 0.039
epoch: 59, batch: 3400 // loss: 0.045
epoch: 59, batch: 3500 // loss: 0.036
epoch: 59, batch: 3600 // loss: 0.044
epoch: 59, batch: 3700 // loss: 0.045

epoch: 60, batch: 0 // loss: 0.054
epoch: 60, batch: 100 // loss: 0.048
epoch: 60, batch: 200 // loss: 0.044
epoch: 60, batch: 300 // loss: 0.050
epoch: 60, batch: 400 // loss: 0.047
epoch: 60, batch: 500 // loss: 0.040
epoch: 60, batch: 600 // loss: 0.041
epoch: 60, batch: 700 // loss: 0.045
epoch: 60, batch: 800 // loss: 0.043
epoch: 60, batch: 900 // loss: 0.050
epoch: 60, batch: 1000 // loss: 0.046
epoch: 60, batch: 1100 // loss: 0.044
epoch: 60, batch: 1200 // loss: 0.045
epoch: 60, batch: 1300 // loss: 0.047
epoch: 60, batch: 1400 // loss: 0.043
epoch: 60, batch: 1500 // loss: 0.049
epoch: 60, batch: 1600 // loss: 0.052
epoch: 60, batch: 1700 // loss: 0.046
epoch: 60, batch: 1800 // loss: 0.053
epoch: 60, batch: 1900 // loss: 0.046
epoch: 60, batch: 2000 // loss: 0.048
epoch: 60, batch: 2100 // loss: 0.048
epoch: 60, batch: 2200 // loss: 0.051
epoch: 60, batch: 2300 // loss: 0.050
epoch: 60, batch: 2400 // loss: 0.043
epoch: 60, batch: 2500 // loss: 0.044
epoch: 60, batch: 2600 // loss: 0.046
epoch: 60, batch: 2700 // loss: 0.043
epoch: 60, batch: 2800 // loss: 0.047
epoch: 60, batch: 2900 // loss: 0.043
epoch: 60, batch: 3000 // loss: 0.046
epoch: 60, batch: 3100 // loss: 0.044
epoch: 60, batch: 3200 // loss: 0.040
epoch: 60, batch: 3300 // loss: 0.039
epoch: 60, batch: 3400 // loss: 0.045
epoch: 60, batch: 3500 // loss: 0.036
epoch: 60, batch: 3600 // loss: 0.044
epoch: 60, batch: 3700 // loss: 0.044

epoch: 61, batch: 0 // loss: 0.054
epoch: 61, batch: 100 // loss: 0.048
epoch: 61, batch: 200 // loss: 0.044
epoch: 61, batch: 300 // loss: 0.050
epoch: 61, batch: 400 // loss: 0.047
epoch: 61, batch: 500 // loss: 0.040
epoch: 61, batch: 600 // loss: 0.041
epoch: 61, batch: 700 // loss: 0.045
epoch: 61, batch: 800 // loss: 0.043
epoch: 61, batch: 900 // loss: 0.050
epoch: 61, batch: 1000 // loss: 0.046
epoch: 61, batch: 1100 // loss: 0.043
epoch: 61, batch: 1200 // loss: 0.045
epoch: 61, batch: 1300 // loss: 0.046
epoch: 61, batch: 1400 // loss: 0.043
epoch: 61, batch: 1500 // loss: 0.049
epoch: 61, batch: 1600 // loss: 0.052
epoch: 61, batch: 1700 // loss: 0.046
epoch: 61, batch: 1800 // loss: 0.053
epoch: 61, batch: 1900 // loss: 0.046
epoch: 61, batch: 2000 // loss: 0.047
epoch: 61, batch: 2100 // loss: 0.048
epoch: 61, batch: 2200 // loss: 0.051
epoch: 61, batch: 2300 // loss: 0.050
epoch: 61, batch: 2400 // loss: 0.043
epoch: 61, batch: 2500 // loss: 0.043
epoch: 61, batch: 2600 // loss: 0.046
epoch: 61, batch: 2700 // loss: 0.043
epoch: 61, batch: 2800 // loss: 0.047
epoch: 61, batch: 2900 // loss: 0.043
epoch: 61, batch: 3000 // loss: 0.046
epoch: 61, batch: 3100 // loss: 0.044
epoch: 61, batch: 3200 // loss: 0.040
epoch: 61, batch: 3300 // loss: 0.039
epoch: 61, batch: 3400 // loss: 0.045
epoch: 61, batch: 3500 // loss: 0.036
epoch: 61, batch: 3600 // loss: 0.044
epoch: 61, batch: 3700 // loss: 0.044

epoch: 62, batch: 0 // loss: 0.054
epoch: 62, batch: 100 // loss: 0.047
epoch: 62, batch: 200 // loss: 0.043
epoch: 62, batch: 300 // loss: 0.049
epoch: 62, batch: 400 // loss: 0.047
epoch: 62, batch: 500 // loss: 0.040
epoch: 62, batch: 600 // loss: 0.041
epoch: 62, batch: 700 // loss: 0.045
epoch: 62, batch: 800 // loss: 0.043
epoch: 62, batch: 900 // loss: 0.050
epoch: 62, batch: 1000 // loss: 0.046
epoch: 62, batch: 1100 // loss: 0.043
epoch: 62, batch: 1200 // loss: 0.045
epoch: 62, batch: 1300 // loss: 0.046
epoch: 62, batch: 1400 // loss: 0.043
epoch: 62, batch: 1500 // loss: 0.049
epoch: 62, batch: 1600 // loss: 0.052
epoch: 62, batch: 1700 // loss: 0.046
epoch: 62, batch: 1800 // loss: 0.053
epoch: 62, batch: 1900 // loss: 0.046
epoch: 62, batch: 2000 // loss: 0.047
epoch: 62, batch: 2100 // loss: 0.047
epoch: 62, batch: 2200 // loss: 0.050
epoch: 62, batch: 2300 // loss: 0.050
epoch: 62, batch: 2400 // loss: 0.043
epoch: 62, batch: 2500 // loss: 0.043
epoch: 62, batch: 2600 // loss: 0.045
epoch: 62, batch: 2700 // loss: 0.043
epoch: 62, batch: 2800 // loss: 0.047
epoch: 62, batch: 2900 // loss: 0.043
epoch: 62, batch: 3000 // loss: 0.046
epoch: 62, batch: 3100 // loss: 0.044
epoch: 62, batch: 3200 // loss: 0.040
epoch: 62, batch: 3300 // loss: 0.039
epoch: 62, batch: 3400 // loss: 0.045
epoch: 62, batch: 3500 // loss: 0.035
epoch: 62, batch: 3600 // loss: 0.044
epoch: 62, batch: 3700 // loss: 0.044

epoch: 63, batch: 0 // loss: 0.054
epoch: 63, batch: 100 // loss: 0.047
epoch: 63, batch: 200 // loss: 0.043
epoch: 63, batch: 300 // loss: 0.049
epoch: 63, batch: 400 // loss: 0.047
epoch: 63, batch: 500 // loss: 0.040
epoch: 63, batch: 600 // loss: 0.041
epoch: 63, batch: 700 // loss: 0.045
epoch: 63, batch: 800 // loss: 0.043
epoch: 63, batch: 900 // loss: 0.050
epoch: 63, batch: 1000 // loss: 0.046
epoch: 63, batch: 1100 // loss: 0.043
epoch: 63, batch: 1200 // loss: 0.045
epoch: 63, batch: 1300 // loss: 0.046
epoch: 63, batch: 1400 // loss: 0.043
epoch: 63, batch: 1500 // loss: 0.049
epoch: 63, batch: 1600 // loss: 0.052
epoch: 63, batch: 1700 // loss: 0.046
epoch: 63, batch: 1800 // loss: 0.053
epoch: 63, batch: 1900 // loss: 0.046
epoch: 63, batch: 2000 // loss: 0.047
epoch: 63, batch: 2100 // loss: 0.047
epoch: 63, batch: 2200 // loss: 0.050
epoch: 63, batch: 2300 // loss: 0.049
epoch: 63, batch: 2400 // loss: 0.043
epoch: 63, batch: 2500 // loss: 0.043
epoch: 63, batch: 2600 // loss: 0.045
epoch: 63, batch: 2700 // loss: 0.043
epoch: 63, batch: 2800 // loss: 0.047
epoch: 63, batch: 2900 // loss: 0.043
epoch: 63, batch: 3000 // loss: 0.046
epoch: 63, batch: 3100 // loss: 0.044
epoch: 63, batch: 3200 // loss: 0.040
epoch: 63, batch: 3300 // loss: 0.039
epoch: 63, batch: 3400 // loss: 0.045
epoch: 63, batch: 3500 // loss: 0.035
epoch: 63, batch: 3600 // loss: 0.044
epoch: 63, batch: 3700 // loss: 0.044

epoch: 64, batch: 0 // loss: 0.054
epoch: 64, batch: 100 // loss: 0.047
epoch: 64, batch: 200 // loss: 0.043
epoch: 64, batch: 300 // loss: 0.049
epoch: 64, batch: 400 // loss: 0.047
epoch: 64, batch: 500 // loss: 0.040
epoch: 64, batch: 600 // loss: 0.041
epoch: 64, batch: 700 // loss: 0.045
epoch: 64, batch: 800 // loss: 0.043
epoch: 64, batch: 900 // loss: 0.049
epoch: 64, batch: 1000 // loss: 0.046
epoch: 64, batch: 1100 // loss: 0.043
epoch: 64, batch: 1200 // loss: 0.045
epoch: 64, batch: 1300 // loss: 0.046
epoch: 64, batch: 1400 // loss: 0.043
epoch: 64, batch: 1500 // loss: 0.049
epoch: 64, batch: 1600 // loss: 0.052
epoch: 64, batch: 1700 // loss: 0.046
epoch: 64, batch: 1800 // loss: 0.053
epoch: 64, batch: 1900 // loss: 0.046
epoch: 64, batch: 2000 // loss: 0.047
epoch: 64, batch: 2100 // loss: 0.047
epoch: 64, batch: 2200 // loss: 0.050
epoch: 64, batch: 2300 // loss: 0.049
epoch: 64, batch: 2400 // loss: 0.043
epoch: 64, batch: 2500 // loss: 0.043
epoch: 64, batch: 2600 // loss: 0.045
epoch: 64, batch: 2700 // loss: 0.043
epoch: 64, batch: 2800 // loss: 0.047
epoch: 64, batch: 2900 // loss: 0.042
epoch: 64, batch: 3000 // loss: 0.046
epoch: 64, batch: 3100 // loss: 0.044
epoch: 64, batch: 3200 // loss: 0.040
epoch: 64, batch: 3300 // loss: 0.039
epoch: 64, batch: 3400 // loss: 0.045
epoch: 64, batch: 3500 // loss: 0.035
epoch: 64, batch: 3600 // loss: 0.044
epoch: 64, batch: 3700 // loss: 0.044

epoch: 65, batch: 0 // loss: 0.054
epoch: 65, batch: 100 // loss: 0.047
epoch: 65, batch: 200 // loss: 0.043
epoch: 65, batch: 300 // loss: 0.049
epoch: 65, batch: 400 // loss: 0.047
epoch: 65, batch: 500 // loss: 0.040
epoch: 65, batch: 600 // loss: 0.041
epoch: 65, batch: 700 // loss: 0.045
epoch: 65, batch: 800 // loss: 0.043
epoch: 65, batch: 900 // loss: 0.049
epoch: 65, batch: 1000 // loss: 0.046
epoch: 65, batch: 1100 // loss: 0.043
epoch: 65, batch: 1200 // loss: 0.044
epoch: 65, batch: 1300 // loss: 0.046
epoch: 65, batch: 1400 // loss: 0.043
epoch: 65, batch: 1500 // loss: 0.049
epoch: 65, batch: 1600 // loss: 0.052
epoch: 65, batch: 1700 // loss: 0.046
epoch: 65, batch: 1800 // loss: 0.053
epoch: 65, batch: 1900 // loss: 0.046
epoch: 65, batch: 2000 // loss: 0.047
epoch: 65, batch: 2100 // loss: 0.047
epoch: 65, batch: 2200 // loss: 0.050
epoch: 65, batch: 2300 // loss: 0.049
epoch: 65, batch: 2400 // loss: 0.042
epoch: 65, batch: 2500 // loss: 0.043
epoch: 65, batch: 2600 // loss: 0.045
epoch: 65, batch: 2700 // loss: 0.043
epoch: 65, batch: 2800 // loss: 0.047
epoch: 65, batch: 2900 // loss: 0.042
epoch: 65, batch: 3000 // loss: 0.046
epoch: 65, batch: 3100 // loss: 0.043
epoch: 65, batch: 3200 // loss: 0.039
epoch: 65, batch: 3300 // loss: 0.039
epoch: 65, batch: 3400 // loss: 0.045
epoch: 65, batch: 3500 // loss: 0.035
epoch: 65, batch: 3600 // loss: 0.044
epoch: 65, batch: 3700 // loss: 0.044

epoch: 66, batch: 0 // loss: 0.054
epoch: 66, batch: 100 // loss: 0.047
epoch: 66, batch: 200 // loss: 0.043
epoch: 66, batch: 300 // loss: 0.049
epoch: 66, batch: 400 // loss: 0.047
epoch: 66, batch: 500 // loss: 0.040
epoch: 66, batch: 600 // loss: 0.041
epoch: 66, batch: 700 // loss: 0.045
epoch: 66, batch: 800 // loss: 0.043
epoch: 66, batch: 900 // loss: 0.049
epoch: 66, batch: 1000 // loss: 0.046
epoch: 66, batch: 1100 // loss: 0.043
epoch: 66, batch: 1200 // loss: 0.044
epoch: 66, batch: 1300 // loss: 0.046
epoch: 66, batch: 1400 // loss: 0.043
epoch: 66, batch: 1500 // loss: 0.048
epoch: 66, batch: 1600 // loss: 0.052
epoch: 66, batch: 1700 // loss: 0.046
epoch: 66, batch: 1800 // loss: 0.053
epoch: 66, batch: 1900 // loss: 0.046
epoch: 66, batch: 2000 // loss: 0.047
epoch: 66, batch: 2100 // loss: 0.047
epoch: 66, batch: 2200 // loss: 0.050
epoch: 66, batch: 2300 // loss: 0.049
epoch: 66, batch: 2400 // loss: 0.042
epoch: 66, batch: 2500 // loss: 0.043
epoch: 66, batch: 2600 // loss: 0.045
epoch: 66, batch: 2700 // loss: 0.043
epoch: 66, batch: 2800 // loss: 0.047
epoch: 66, batch: 2900 // loss: 0.042
epoch: 66, batch: 3000 // loss: 0.046
epoch: 66, batch: 3100 // loss: 0.043
epoch: 66, batch: 3200 // loss: 0.039
epoch: 66, batch: 3300 // loss: 0.039
epoch: 66, batch: 3400 // loss: 0.045
epoch: 66, batch: 3500 // loss: 0.035
epoch: 66, batch: 3600 // loss: 0.043
epoch: 66, batch: 3700 // loss: 0.044

epoch: 67, batch: 0 // loss: 0.054
epoch: 67, batch: 100 // loss: 0.047
epoch: 67, batch: 200 // loss: 0.043
epoch: 67, batch: 300 // loss: 0.049
epoch: 67, batch: 400 // loss: 0.047
epoch: 67, batch: 500 // loss: 0.040
epoch: 67, batch: 600 // loss: 0.041
epoch: 67, batch: 700 // loss: 0.045
epoch: 67, batch: 800 // loss: 0.043
epoch: 67, batch: 900 // loss: 0.049
epoch: 67, batch: 1000 // loss: 0.046
epoch: 67, batch: 1100 // loss: 0.043
epoch: 67, batch: 1200 // loss: 0.044
epoch: 67, batch: 1300 // loss: 0.046
epoch: 67, batch: 1400 // loss: 0.043
epoch: 67, batch: 1500 // loss: 0.048
epoch: 67, batch: 1600 // loss: 0.052
epoch: 67, batch: 1700 // loss: 0.046
epoch: 67, batch: 1800 // loss: 0.053
epoch: 67, batch: 1900 // loss: 0.046
epoch: 67, batch: 2000 // loss: 0.047
epoch: 67, batch: 2100 // loss: 0.047
epoch: 67, batch: 2200 // loss: 0.050
epoch: 67, batch: 2300 // loss: 0.049
epoch: 67, batch: 2400 // loss: 0.042
epoch: 67, batch: 2500 // loss: 0.043
epoch: 67, batch: 2600 // loss: 0.045
epoch: 67, batch: 2700 // loss: 0.043
epoch: 67, batch: 2800 // loss: 0.046
epoch: 67, batch: 2900 // loss: 0.042
epoch: 67, batch: 3000 // loss: 0.046
epoch: 67, batch: 3100 // loss: 0.043
epoch: 67, batch: 3200 // loss: 0.039
epoch: 67, batch: 3300 // loss: 0.039
epoch: 67, batch: 3400 // loss: 0.044
epoch: 67, batch: 3500 // loss: 0.035
epoch: 67, batch: 3600 // loss: 0.043
epoch: 67, batch: 3700 // loss: 0.044

epoch: 68, batch: 0 // loss: 0.054
epoch: 68, batch: 100 // loss: 0.047
epoch: 68, batch: 200 // loss: 0.043
epoch: 68, batch: 300 // loss: 0.049
epoch: 68, batch: 400 // loss: 0.046
epoch: 68, batch: 500 // loss: 0.040
epoch: 68, batch: 600 // loss: 0.041
epoch: 68, batch: 700 // loss: 0.045
epoch: 68, batch: 800 // loss: 0.042
epoch: 68, batch: 900 // loss: 0.049
epoch: 68, batch: 1000 // loss: 0.046
epoch: 68, batch: 1100 // loss: 0.043
epoch: 68, batch: 1200 // loss: 0.044
epoch: 68, batch: 1300 // loss: 0.046
epoch: 68, batch: 1400 // loss: 0.043
epoch: 68, batch: 1500 // loss: 0.048
epoch: 68, batch: 1600 // loss: 0.052
epoch: 68, batch: 1700 // loss: 0.046
epoch: 68, batch: 1800 // loss: 0.053
epoch: 68, batch: 1900 // loss: 0.046
epoch: 68, batch: 2000 // loss: 0.047
epoch: 68, batch: 2100 // loss: 0.047
epoch: 68, batch: 2200 // loss: 0.050
epoch: 68, batch: 2300 // loss: 0.049
epoch: 68, batch: 2400 // loss: 0.042
epoch: 68, batch: 2500 // loss: 0.043
epoch: 68, batch: 2600 // loss: 0.045
epoch: 68, batch: 2700 // loss: 0.043
epoch: 68, batch: 2800 // loss: 0.046
epoch: 68, batch: 2900 // loss: 0.042
epoch: 68, batch: 3000 // loss: 0.046
epoch: 68, batch: 3100 // loss: 0.043
epoch: 68, batch: 3200 // loss: 0.039
epoch: 68, batch: 3300 // loss: 0.038
epoch: 68, batch: 3400 // loss: 0.044
epoch: 68, batch: 3500 // loss: 0.035
epoch: 68, batch: 3600 // loss: 0.043
epoch: 68, batch: 3700 // loss: 0.044

epoch: 69, batch: 0 // loss: 0.054
epoch: 69, batch: 100 // loss: 0.047
epoch: 69, batch: 200 // loss: 0.043
epoch: 69, batch: 300 // loss: 0.049
epoch: 69, batch: 400 // loss: 0.046
epoch: 69, batch: 500 // loss: 0.040
epoch: 69, batch: 600 // loss: 0.041
epoch: 69, batch: 700 // loss: 0.044
epoch: 69, batch: 800 // loss: 0.042
epoch: 69, batch: 900 // loss: 0.049
epoch: 69, batch: 1000 // loss: 0.046
epoch: 69, batch: 1100 // loss: 0.043
epoch: 69, batch: 1200 // loss: 0.044
epoch: 69, batch: 1300 // loss: 0.046
epoch: 69, batch: 1400 // loss: 0.043
epoch: 69, batch: 1500 // loss: 0.048
epoch: 69, batch: 1600 // loss: 0.052
epoch: 69, batch: 1700 // loss: 0.045
epoch: 69, batch: 1800 // loss: 0.053
epoch: 69, batch: 1900 // loss: 0.045
epoch: 69, batch: 2000 // loss: 0.047
epoch: 69, batch: 2100 // loss: 0.047
epoch: 69, batch: 2200 // loss: 0.050
epoch: 69, batch: 2300 // loss: 0.049
epoch: 69, batch: 2400 // loss: 0.042
epoch: 69, batch: 2500 // loss: 0.043
epoch: 69, batch: 2600 // loss: 0.045
epoch: 69, batch: 2700 // loss: 0.043
epoch: 69, batch: 2800 // loss: 0.046
epoch: 69, batch: 2900 // loss: 0.042
epoch: 69, batch: 3000 // loss: 0.046
epoch: 69, batch: 3100 // loss: 0.043
epoch: 69, batch: 3200 // loss: 0.039
epoch: 69, batch: 3300 // loss: 0.038
epoch: 69, batch: 3400 // loss: 0.044
epoch: 69, batch: 3500 // loss: 0.035
epoch: 69, batch: 3600 // loss: 0.043
epoch: 69, batch: 3700 // loss: 0.044

epoch: 70, batch: 0 // loss: 0.053
epoch: 70, batch: 100 // loss: 0.047
epoch: 70, batch: 200 // loss: 0.043
epoch: 70, batch: 300 // loss: 0.049
epoch: 70, batch: 400 // loss: 0.046
epoch: 70, batch: 500 // loss: 0.040
epoch: 70, batch: 600 // loss: 0.041
epoch: 70, batch: 700 // loss: 0.044
epoch: 70, batch: 800 // loss: 0.042
epoch: 70, batch: 900 // loss: 0.049
epoch: 70, batch: 1000 // loss: 0.046
epoch: 70, batch: 1100 // loss: 0.043
epoch: 70, batch: 1200 // loss: 0.044
epoch: 70, batch: 1300 // loss: 0.046
epoch: 70, batch: 1400 // loss: 0.043
epoch: 70, batch: 1500 // loss: 0.048
epoch: 70, batch: 1600 // loss: 0.052
epoch: 70, batch: 1700 // loss: 0.045
epoch: 70, batch: 1800 // loss: 0.053
epoch: 70, batch: 1900 // loss: 0.045
epoch: 70, batch: 2000 // loss: 0.047
epoch: 70, batch: 2100 // loss: 0.047
epoch: 70, batch: 2200 // loss: 0.050
epoch: 70, batch: 2300 // loss: 0.049
epoch: 70, batch: 2400 // loss: 0.042
epoch: 70, batch: 2500 // loss: 0.043
epoch: 70, batch: 2600 // loss: 0.045
epoch: 70, batch: 2700 // loss: 0.043
epoch: 70, batch: 2800 // loss: 0.046
epoch: 70, batch: 2900 // loss: 0.042
epoch: 70, batch: 3000 // loss: 0.046
epoch: 70, batch: 3100 // loss: 0.043
epoch: 70, batch: 3200 // loss: 0.039
epoch: 70, batch: 3300 // loss: 0.038
epoch: 70, batch: 3400 // loss: 0.044
epoch: 70, batch: 3500 // loss: 0.035
epoch: 70, batch: 3600 // loss: 0.043
epoch: 70, batch: 3700 // loss: 0.044

epoch: 71, batch: 0 // loss: 0.053
epoch: 71, batch: 100 // loss: 0.046
epoch: 71, batch: 200 // loss: 0.043
epoch: 71, batch: 300 // loss: 0.049
epoch: 71, batch: 400 // loss: 0.046
epoch: 71, batch: 500 // loss: 0.040
epoch: 71, batch: 600 // loss: 0.041
epoch: 71, batch: 700 // loss: 0.044
epoch: 71, batch: 800 // loss: 0.042
epoch: 71, batch: 900 // loss: 0.049
epoch: 71, batch: 1000 // loss: 0.046
epoch: 71, batch: 1100 // loss: 0.043
epoch: 71, batch: 1200 // loss: 0.044
epoch: 71, batch: 1300 // loss: 0.046
epoch: 71, batch: 1400 // loss: 0.043
epoch: 71, batch: 1500 // loss: 0.048
epoch: 71, batch: 1600 // loss: 0.052
epoch: 71, batch: 1700 // loss: 0.045
epoch: 71, batch: 1800 // loss: 0.053
epoch: 71, batch: 1900 // loss: 0.045
epoch: 71, batch: 2000 // loss: 0.047
epoch: 71, batch: 2100 // loss: 0.047
epoch: 71, batch: 2200 // loss: 0.050
epoch: 71, batch: 2300 // loss: 0.049
epoch: 71, batch: 2400 // loss: 0.042
epoch: 71, batch: 2500 // loss: 0.043
epoch: 71, batch: 2600 // loss: 0.045
epoch: 71, batch: 2700 // loss: 0.043
epoch: 71, batch: 2800 // loss: 0.046
epoch: 71, batch: 2900 // loss: 0.042
epoch: 71, batch: 3000 // loss: 0.046
epoch: 71, batch: 3100 // loss: 0.043
epoch: 71, batch: 3200 // loss: 0.039
epoch: 71, batch: 3300 // loss: 0.038
epoch: 71, batch: 3400 // loss: 0.044
epoch: 71, batch: 3500 // loss: 0.035
epoch: 71, batch: 3600 // loss: 0.043
epoch: 71, batch: 3700 // loss: 0.043

epoch: 72, batch: 0 // loss: 0.053
epoch: 72, batch: 100 // loss: 0.046
epoch: 72, batch: 200 // loss: 0.043
epoch: 72, batch: 300 // loss: 0.049
epoch: 72, batch: 400 // loss: 0.046
epoch: 72, batch: 500 // loss: 0.040
epoch: 72, batch: 600 // loss: 0.041
epoch: 72, batch: 700 // loss: 0.044
epoch: 72, batch: 800 // loss: 0.042
epoch: 72, batch: 900 // loss: 0.049
epoch: 72, batch: 1000 // loss: 0.046
epoch: 72, batch: 1100 // loss: 0.043
epoch: 72, batch: 1200 // loss: 0.044
epoch: 72, batch: 1300 // loss: 0.046
epoch: 72, batch: 1400 // loss: 0.043
epoch: 72, batch: 1500 // loss: 0.048
epoch: 72, batch: 1600 // loss: 0.052
epoch: 72, batch: 1700 // loss: 0.045
epoch: 72, batch: 1800 // loss: 0.053
epoch: 72, batch: 1900 // loss: 0.045
epoch: 72, batch: 2000 // loss: 0.047
epoch: 72, batch: 2100 // loss: 0.047
epoch: 72, batch: 2200 // loss: 0.050
epoch: 72, batch: 2300 // loss: 0.049
epoch: 72, batch: 2400 // loss: 0.042
epoch: 72, batch: 2500 // loss: 0.043
epoch: 72, batch: 2600 // loss: 0.045
epoch: 72, batch: 2700 // loss: 0.043
epoch: 72, batch: 2800 // loss: 0.046
epoch: 72, batch: 2900 // loss: 0.042
epoch: 72, batch: 3000 // loss: 0.046
epoch: 72, batch: 3100 // loss: 0.043
epoch: 72, batch: 3200 // loss: 0.039
epoch: 72, batch: 3300 // loss: 0.038
epoch: 72, batch: 3400 // loss: 0.044
epoch: 72, batch: 3500 // loss: 0.035
epoch: 72, batch: 3600 // loss: 0.043
epoch: 72, batch: 3700 // loss: 0.043

epoch: 73, batch: 0 // loss: 0.053
epoch: 73, batch: 100 // loss: 0.046
epoch: 73, batch: 200 // loss: 0.043
epoch: 73, batch: 300 // loss: 0.049
epoch: 73, batch: 400 // loss: 0.046
epoch: 73, batch: 500 // loss: 0.040
epoch: 73, batch: 600 // loss: 0.040
epoch: 73, batch: 700 // loss: 0.044
epoch: 73, batch: 800 // loss: 0.042
epoch: 73, batch: 900 // loss: 0.049
epoch: 73, batch: 1000 // loss: 0.046
epoch: 73, batch: 1100 // loss: 0.043
epoch: 73, batch: 1200 // loss: 0.044
epoch: 73, batch: 1300 // loss: 0.046
epoch: 73, batch: 1400 // loss: 0.043
epoch: 73, batch: 1500 // loss: 0.048
epoch: 73, batch: 1600 // loss: 0.052
epoch: 73, batch: 1700 // loss: 0.045
epoch: 73, batch: 1800 // loss: 0.053
epoch: 73, batch: 1900 // loss: 0.045
epoch: 73, batch: 2000 // loss: 0.047
epoch: 73, batch: 2100 // loss: 0.047
epoch: 73, batch: 2200 // loss: 0.050
epoch: 73, batch: 2300 // loss: 0.049
epoch: 73, batch: 2400 // loss: 0.042
epoch: 73, batch: 2500 // loss: 0.042
epoch: 73, batch: 2600 // loss: 0.045
epoch: 73, batch: 2700 // loss: 0.043
epoch: 73, batch: 2800 // loss: 0.046
epoch: 73, batch: 2900 // loss: 0.042
epoch: 73, batch: 3000 // loss: 0.046
epoch: 73, batch: 3100 // loss: 0.043
epoch: 73, batch: 3200 // loss: 0.039
epoch: 73, batch: 3300 // loss: 0.038
epoch: 73, batch: 3400 // loss: 0.044
epoch: 73, batch: 3500 // loss: 0.035
epoch: 73, batch: 3600 // loss: 0.043
epoch: 73, batch: 3700 // loss: 0.043

epoch: 74, batch: 0 // loss: 0.053
epoch: 74, batch: 100 // loss: 0.046
epoch: 74, batch: 200 // loss: 0.043
epoch: 74, batch: 300 // loss: 0.049
epoch: 74, batch: 400 // loss: 0.046
epoch: 74, batch: 500 // loss: 0.040
epoch: 74, batch: 600 // loss: 0.040
epoch: 74, batch: 700 // loss: 0.044
epoch: 74, batch: 800 // loss: 0.042
epoch: 74, batch: 900 // loss: 0.049
epoch: 74, batch: 1000 // loss: 0.046
epoch: 74, batch: 1100 // loss: 0.043
epoch: 74, batch: 1200 // loss: 0.044
epoch: 74, batch: 1300 // loss: 0.046
epoch: 74, batch: 1400 // loss: 0.043
epoch: 74, batch: 1500 // loss: 0.048
epoch: 74, batch: 1600 // loss: 0.052
epoch: 74, batch: 1700 // loss: 0.045
epoch: 74, batch: 1800 // loss: 0.053
epoch: 74, batch: 1900 // loss: 0.045
epoch: 74, batch: 2000 // loss: 0.047
epoch: 74, batch: 2100 // loss: 0.047
epoch: 74, batch: 2200 // loss: 0.050
epoch: 74, batch: 2300 // loss: 0.049
epoch: 74, batch: 2400 // loss: 0.042
epoch: 74, batch: 2500 // loss: 0.042
epoch: 74, batch: 2600 // loss: 0.045
epoch: 74, batch: 2700 // loss: 0.043
epoch: 74, batch: 2800 // loss: 0.046
epoch: 74, batch: 2900 // loss: 0.042
epoch: 74, batch: 3000 // loss: 0.046
epoch: 74, batch: 3100 // loss: 0.043
epoch: 74, batch: 3200 // loss: 0.039
epoch: 74, batch: 3300 // loss: 0.038
epoch: 74, batch: 3400 // loss: 0.044
epoch: 74, batch: 3500 // loss: 0.035
epoch: 74, batch: 3600 // loss: 0.043
epoch: 74, batch: 3700 // loss: 0.043

epoch: 75, batch: 0 // loss: 0.053
epoch: 75, batch: 100 // loss: 0.046
epoch: 75, batch: 200 // loss: 0.043
epoch: 75, batch: 300 // loss: 0.049
epoch: 75, batch: 400 // loss: 0.046
epoch: 75, batch: 500 // loss: 0.040
epoch: 75, batch: 600 // loss: 0.040
epoch: 75, batch: 700 // loss: 0.044
epoch: 75, batch: 800 // loss: 0.042
epoch: 75, batch: 900 // loss: 0.049
epoch: 75, batch: 1000 // loss: 0.046
epoch: 75, batch: 1100 // loss: 0.043
epoch: 75, batch: 1200 // loss: 0.044
epoch: 75, batch: 1300 // loss: 0.046
epoch: 75, batch: 1400 // loss: 0.043
epoch: 75, batch: 1500 // loss: 0.048
epoch: 75, batch: 1600 // loss: 0.052
epoch: 75, batch: 1700 // loss: 0.045
epoch: 75, batch: 1800 // loss: 0.053
epoch: 75, batch: 1900 // loss: 0.045
epoch: 75, batch: 2000 // loss: 0.047
epoch: 75, batch: 2100 // loss: 0.047
epoch: 75, batch: 2200 // loss: 0.050
epoch: 75, batch: 2300 // loss: 0.049
epoch: 75, batch: 2400 // loss: 0.042
epoch: 75, batch: 2500 // loss: 0.042
epoch: 75, batch: 2600 // loss: 0.045
epoch: 75, batch: 2700 // loss: 0.043
epoch: 75, batch: 2800 // loss: 0.046
epoch: 75, batch: 2900 // loss: 0.042
epoch: 75, batch: 3000 // loss: 0.046
epoch: 75, batch: 3100 // loss: 0.043
epoch: 75, batch: 3200 // loss: 0.039
epoch: 75, batch: 3300 // loss: 0.038
epoch: 75, batch: 3400 // loss: 0.044
epoch: 75, batch: 3500 // loss: 0.035
epoch: 75, batch: 3600 // loss: 0.043
epoch: 75, batch: 3700 // loss: 0.043

epoch: 76, batch: 0 // loss: 0.053
epoch: 76, batch: 100 // loss: 0.046
epoch: 76, batch: 200 // loss: 0.043
epoch: 76, batch: 300 // loss: 0.049
epoch: 76, batch: 400 // loss: 0.046
epoch: 76, batch: 500 // loss: 0.040
epoch: 76, batch: 600 // loss: 0.040
epoch: 76, batch: 700 // loss: 0.044
epoch: 76, batch: 800 // loss: 0.042
epoch: 76, batch: 900 // loss: 0.049
epoch: 76, batch: 1000 // loss: 0.046
epoch: 76, batch: 1100 // loss: 0.043
epoch: 76, batch: 1200 // loss: 0.044
epoch: 76, batch: 1300 // loss: 0.046
epoch: 76, batch: 1400 // loss: 0.043
epoch: 76, batch: 1500 // loss: 0.048
epoch: 76, batch: 1600 // loss: 0.052
epoch: 76, batch: 1700 // loss: 0.045
epoch: 76, batch: 1800 // loss: 0.053
epoch: 76, batch: 1900 // loss: 0.045
epoch: 76, batch: 2000 // loss: 0.047
epoch: 76, batch: 2100 // loss: 0.047
epoch: 76, batch: 2200 // loss: 0.050
epoch: 76, batch: 2300 // loss: 0.049
epoch: 76, batch: 2400 // loss: 0.042
epoch: 76, batch: 2500 // loss: 0.042
epoch: 76, batch: 2600 // loss: 0.045
epoch: 76, batch: 2700 // loss: 0.043
epoch: 76, batch: 2800 // loss: 0.046
epoch: 76, batch: 2900 // loss: 0.042
epoch: 76, batch: 3000 // loss: 0.046
epoch: 76, batch: 3100 // loss: 0.043
epoch: 76, batch: 3200 // loss: 0.039
epoch: 76, batch: 3300 // loss: 0.038
epoch: 76, batch: 3400 // loss: 0.044
epoch: 76, batch: 3500 // loss: 0.035
epoch: 76, batch: 3600 // loss: 0.043
epoch: 76, batch: 3700 // loss: 0.043

epoch: 77, batch: 0 // loss: 0.053
epoch: 77, batch: 100 // loss: 0.046
epoch: 77, batch: 200 // loss: 0.043
epoch: 77, batch: 300 // loss: 0.049
epoch: 77, batch: 400 // loss: 0.046
epoch: 77, batch: 500 // loss: 0.040
epoch: 77, batch: 600 // loss: 0.040
epoch: 77, batch: 700 // loss: 0.044
epoch: 77, batch: 800 // loss: 0.042
epoch: 77, batch: 900 // loss: 0.049
epoch: 77, batch: 1000 // loss: 0.046
epoch: 77, batch: 1100 // loss: 0.043
epoch: 77, batch: 1200 // loss: 0.044
epoch: 77, batch: 1300 // loss: 0.046
epoch: 77, batch: 1400 // loss: 0.043
epoch: 77, batch: 1500 // loss: 0.048
epoch: 77, batch: 1600 // loss: 0.052
epoch: 77, batch: 1700 // loss: 0.045
epoch: 77, batch: 1800 // loss: 0.053
epoch: 77, batch: 1900 // loss: 0.045
epoch: 77, batch: 2000 // loss: 0.047
epoch: 77, batch: 2100 // loss: 0.047
epoch: 77, batch: 2200 // loss: 0.050
epoch: 77, batch: 2300 // loss: 0.049
epoch: 77, batch: 2400 // loss: 0.042
epoch: 77, batch: 2500 // loss: 0.042
epoch: 77, batch: 2600 // loss: 0.045
epoch: 77, batch: 2700 // loss: 0.043
epoch: 77, batch: 2800 // loss: 0.046
epoch: 77, batch: 2900 // loss: 0.042
epoch: 77, batch: 3000 // loss: 0.045
epoch: 77, batch: 3100 // loss: 0.043
epoch: 77, batch: 3200 // loss: 0.039
epoch: 77, batch: 3300 // loss: 0.038
epoch: 77, batch: 3400 // loss: 0.044
epoch: 77, batch: 3500 // loss: 0.035
epoch: 77, batch: 3600 // loss: 0.043
epoch: 77, batch: 3700 // loss: 0.043

epoch: 78, batch: 0 // loss: 0.053
epoch: 78, batch: 100 // loss: 0.046
epoch: 78, batch: 200 // loss: 0.043
epoch: 78, batch: 300 // loss: 0.049
epoch: 78, batch: 400 // loss: 0.046
epoch: 78, batch: 500 // loss: 0.040
epoch: 78, batch: 600 // loss: 0.040
epoch: 78, batch: 700 // loss: 0.044
epoch: 78, batch: 800 // loss: 0.042
epoch: 78, batch: 900 // loss: 0.049
epoch: 78, batch: 1000 // loss: 0.046
epoch: 78, batch: 1100 // loss: 0.043
epoch: 78, batch: 1200 // loss: 0.044
epoch: 78, batch: 1300 // loss: 0.046
epoch: 78, batch: 1400 // loss: 0.043
epoch: 78, batch: 1500 // loss: 0.048
epoch: 78, batch: 1600 // loss: 0.052
epoch: 78, batch: 1700 // loss: 0.045
epoch: 78, batch: 1800 // loss: 0.053
epoch: 78, batch: 1900 // loss: 0.045
epoch: 78, batch: 2000 // loss: 0.047
epoch: 78, batch: 2100 // loss: 0.047
epoch: 78, batch: 2200 // loss: 0.050
epoch: 78, batch: 2300 // loss: 0.049
epoch: 78, batch: 2400 // loss: 0.042
epoch: 78, batch: 2500 // loss: 0.042
epoch: 78, batch: 2600 // loss: 0.045
epoch: 78, batch: 2700 // loss: 0.043
epoch: 78, batch: 2800 // loss: 0.046
epoch: 78, batch: 2900 // loss: 0.042
epoch: 78, batch: 3000 // loss: 0.045
epoch: 78, batch: 3100 // loss: 0.043
epoch: 78, batch: 3200 // loss: 0.039
epoch: 78, batch: 3300 // loss: 0.038
epoch: 78, batch: 3400 // loss: 0.044
epoch: 78, batch: 3500 // loss: 0.035
epoch: 78, batch: 3600 // loss: 0.043
epoch: 78, batch: 3700 // loss: 0.043

epoch: 79, batch: 0 // loss: 0.053
epoch: 79, batch: 100 // loss: 0.046
epoch: 79, batch: 200 // loss: 0.043
epoch: 79, batch: 300 // loss: 0.049
epoch: 79, batch: 400 // loss: 0.046
epoch: 79, batch: 500 // loss: 0.040
epoch: 79, batch: 600 // loss: 0.040
epoch: 79, batch: 700 // loss: 0.044
epoch: 79, batch: 800 // loss: 0.042
epoch: 79, batch: 900 // loss: 0.049
epoch: 79, batch: 1000 // loss: 0.046
epoch: 79, batch: 1100 // loss: 0.043
epoch: 79, batch: 1200 // loss: 0.044
epoch: 79, batch: 1300 // loss: 0.046
epoch: 79, batch: 1400 // loss: 0.043
epoch: 79, batch: 1500 // loss: 0.048
epoch: 79, batch: 1600 // loss: 0.052
epoch: 79, batch: 1700 // loss: 0.045
epoch: 79, batch: 1800 // loss: 0.053
epoch: 79, batch: 1900 // loss: 0.045
epoch: 79, batch: 2000 // loss: 0.047
epoch: 79, batch: 2100 // loss: 0.047
epoch: 79, batch: 2200 // loss: 0.050
epoch: 79, batch: 2300 // loss: 0.049
epoch: 79, batch: 2400 // loss: 0.042
epoch: 79, batch: 2500 // loss: 0.042
epoch: 79, batch: 2600 // loss: 0.045
epoch: 79, batch: 2700 // loss: 0.043
epoch: 79, batch: 2800 // loss: 0.046
epoch: 79, batch: 2900 // loss: 0.042
epoch: 79, batch: 3000 // loss: 0.045
epoch: 79, batch: 3100 // loss: 0.043
epoch: 79, batch: 3200 // loss: 0.039
epoch: 79, batch: 3300 // loss: 0.038
epoch: 79, batch: 3400 // loss: 0.044
epoch: 79, batch: 3500 // loss: 0.035
epoch: 79, batch: 3600 // loss: 0.043
epoch: 79, batch: 3700 // loss: 0.043

epoch: 80, batch: 0 // loss: 0.053
epoch: 80, batch: 100 // loss: 0.046
epoch: 80, batch: 200 // loss: 0.043
epoch: 80, batch: 300 // loss: 0.049
epoch: 80, batch: 400 // loss: 0.046
epoch: 80, batch: 500 // loss: 0.040
epoch: 80, batch: 600 // loss: 0.040
epoch: 80, batch: 700 // loss: 0.044
epoch: 80, batch: 800 // loss: 0.042
epoch: 80, batch: 900 // loss: 0.049
epoch: 80, batch: 1000 // loss: 0.046
epoch: 80, batch: 1100 // loss: 0.043
epoch: 80, batch: 1200 // loss: 0.044
epoch: 80, batch: 1300 // loss: 0.046
epoch: 80, batch: 1400 // loss: 0.043
epoch: 80, batch: 1500 // loss: 0.048
epoch: 80, batch: 1600 // loss: 0.052
epoch: 80, batch: 1700 // loss: 0.045
epoch: 80, batch: 1800 // loss: 0.053
epoch: 80, batch: 1900 // loss: 0.045
epoch: 80, batch: 2000 // loss: 0.047
epoch: 80, batch: 2100 // loss: 0.047
epoch: 80, batch: 2200 // loss: 0.050
epoch: 80, batch: 2300 // loss: 0.049
epoch: 80, batch: 2400 // loss: 0.042
epoch: 80, batch: 2500 // loss: 0.042
epoch: 80, batch: 2600 // loss: 0.045
epoch: 80, batch: 2700 // loss: 0.043
epoch: 80, batch: 2800 // loss: 0.046
epoch: 80, batch: 2900 // loss: 0.042
epoch: 80, batch: 3000 // loss: 0.045
epoch: 80, batch: 3100 // loss: 0.043
epoch: 80, batch: 3200 // loss: 0.039
epoch: 80, batch: 3300 // loss: 0.038
epoch: 80, batch: 3400 // loss: 0.044
epoch: 80, batch: 3500 // loss: 0.035
epoch: 80, batch: 3600 // loss: 0.043
epoch: 80, batch: 3700 // loss: 0.043

epoch: 81, batch: 0 // loss: 0.053
epoch: 81, batch: 100 // loss: 0.046
epoch: 81, batch: 200 // loss: 0.043
epoch: 81, batch: 300 // loss: 0.049
epoch: 81, batch: 400 // loss: 0.046
epoch: 81, batch: 500 // loss: 0.040
epoch: 81, batch: 600 // loss: 0.040
epoch: 81, batch: 700 // loss: 0.044
epoch: 81, batch: 800 // loss: 0.042
epoch: 81, batch: 900 // loss: 0.049
epoch: 81, batch: 1000 // loss: 0.046
epoch: 81, batch: 1100 // loss: 0.043
epoch: 81, batch: 1200 // loss: 0.044
epoch: 81, batch: 1300 // loss: 0.046
epoch: 81, batch: 1400 // loss: 0.043
epoch: 81, batch: 1500 // loss: 0.048
epoch: 81, batch: 1600 // loss: 0.052
epoch: 81, batch: 1700 // loss: 0.045
epoch: 81, batch: 1800 // loss: 0.053
epoch: 81, batch: 1900 // loss: 0.045
epoch: 81, batch: 2000 // loss: 0.047
epoch: 81, batch: 2100 // loss: 0.047
epoch: 81, batch: 2200 // loss: 0.050
epoch: 81, batch: 2300 // loss: 0.049
epoch: 81, batch: 2400 // loss: 0.042
epoch: 81, batch: 2500 // loss: 0.042
epoch: 81, batch: 2600 // loss: 0.045
epoch: 81, batch: 2700 // loss: 0.042
epoch: 81, batch: 2800 // loss: 0.046
epoch: 81, batch: 2900 // loss: 0.042
epoch: 81, batch: 3000 // loss: 0.045
epoch: 81, batch: 3100 // loss: 0.043
epoch: 81, batch: 3200 // loss: 0.039
epoch: 81, batch: 3300 // loss: 0.038
epoch: 81, batch: 3400 // loss: 0.044
epoch: 81, batch: 3500 // loss: 0.035
epoch: 81, batch: 3600 // loss: 0.043
epoch: 81, batch: 3700 // loss: 0.043

epoch: 82, batch: 0 // loss: 0.053
epoch: 82, batch: 100 // loss: 0.046
epoch: 82, batch: 200 // loss: 0.043
epoch: 82, batch: 300 // loss: 0.049
epoch: 82, batch: 400 // loss: 0.046
epoch: 82, batch: 500 // loss: 0.040
epoch: 82, batch: 600 // loss: 0.040
epoch: 82, batch: 700 // loss: 0.044
epoch: 82, batch: 800 // loss: 0.042
epoch: 82, batch: 900 // loss: 0.049
epoch: 82, batch: 1000 // loss: 0.046
epoch: 82, batch: 1100 // loss: 0.043
epoch: 82, batch: 1200 // loss: 0.044
epoch: 82, batch: 1300 // loss: 0.046
epoch: 82, batch: 1400 // loss: 0.043
epoch: 82, batch: 1500 // loss: 0.048
epoch: 82, batch: 1600 // loss: 0.052
epoch: 82, batch: 1700 // loss: 0.045
epoch: 82, batch: 1800 // loss: 0.053
epoch: 82, batch: 1900 // loss: 0.045
epoch: 82, batch: 2000 // loss: 0.047
epoch: 82, batch: 2100 // loss: 0.047
epoch: 82, batch: 2200 // loss: 0.050
epoch: 82, batch: 2300 // loss: 0.049
epoch: 82, batch: 2400 // loss: 0.042
epoch: 82, batch: 2500 // loss: 0.042
epoch: 82, batch: 2600 // loss: 0.045
epoch: 82, batch: 2700 // loss: 0.042
epoch: 82, batch: 2800 // loss: 0.046
epoch: 82, batch: 2900 // loss: 0.042
epoch: 82, batch: 3000 // loss: 0.045
epoch: 82, batch: 3100 // loss: 0.043
epoch: 82, batch: 3200 // loss: 0.039
epoch: 82, batch: 3300 // loss: 0.038
epoch: 82, batch: 3400 // loss: 0.044
epoch: 82, batch: 3500 // loss: 0.035
epoch: 82, batch: 3600 // loss: 0.043
epoch: 82, batch: 3700 // loss: 0.043

epoch: 83, batch: 0 // loss: 0.053
epoch: 83, batch: 100 // loss: 0.046
epoch: 83, batch: 200 // loss: 0.043
epoch: 83, batch: 300 // loss: 0.049
epoch: 83, batch: 400 // loss: 0.046
epoch: 83, batch: 500 // loss: 0.040
epoch: 83, batch: 600 // loss: 0.040
epoch: 83, batch: 700 // loss: 0.044
epoch: 83, batch: 800 // loss: 0.042
epoch: 83, batch: 900 // loss: 0.049
epoch: 83, batch: 1000 // loss: 0.046
epoch: 83, batch: 1100 // loss: 0.042
epoch: 83, batch: 1200 // loss: 0.044
epoch: 83, batch: 1300 // loss: 0.046
epoch: 83, batch: 1400 // loss: 0.043
epoch: 83, batch: 1500 // loss: 0.048
epoch: 83, batch: 1600 // loss: 0.052
epoch: 83, batch: 1700 // loss: 0.045
epoch: 83, batch: 1800 // loss: 0.053
epoch: 83, batch: 1900 // loss: 0.045
epoch: 83, batch: 2000 // loss: 0.047
epoch: 83, batch: 2100 // loss: 0.047
epoch: 83, batch: 2200 // loss: 0.050
epoch: 83, batch: 2300 // loss: 0.049
epoch: 83, batch: 2400 // loss: 0.042
epoch: 83, batch: 2500 // loss: 0.042
epoch: 83, batch: 2600 // loss: 0.045
epoch: 83, batch: 2700 // loss: 0.042
epoch: 83, batch: 2800 // loss: 0.046
epoch: 83, batch: 2900 // loss: 0.042
epoch: 83, batch: 3000 // loss: 0.045
epoch: 83, batch: 3100 // loss: 0.043
epoch: 83, batch: 3200 // loss: 0.039
epoch: 83, batch: 3300 // loss: 0.038
epoch: 83, batch: 3400 // loss: 0.044
epoch: 83, batch: 3500 // loss: 0.035
epoch: 83, batch: 3600 // loss: 0.043
epoch: 83, batch: 3700 // loss: 0.043

epoch: 84, batch: 0 // loss: 0.053
epoch: 84, batch: 100 // loss: 0.046
epoch: 84, batch: 200 // loss: 0.043
epoch: 84, batch: 300 // loss: 0.049
epoch: 84, batch: 400 // loss: 0.046
epoch: 84, batch: 500 // loss: 0.040
epoch: 84, batch: 600 // loss: 0.040
epoch: 84, batch: 700 // loss: 0.044
epoch: 84, batch: 800 // loss: 0.042
epoch: 84, batch: 900 // loss: 0.049
epoch: 84, batch: 1000 // loss: 0.046
epoch: 84, batch: 1100 // loss: 0.042
epoch: 84, batch: 1200 // loss: 0.044
epoch: 84, batch: 1300 // loss: 0.046
epoch: 84, batch: 1400 // loss: 0.043
epoch: 84, batch: 1500 // loss: 0.048
epoch: 84, batch: 1600 // loss: 0.052
epoch: 84, batch: 1700 // loss: 0.045
epoch: 84, batch: 1800 // loss: 0.053
epoch: 84, batch: 1900 // loss: 0.045
epoch: 84, batch: 2000 // loss: 0.047
epoch: 84, batch: 2100 // loss: 0.047
epoch: 84, batch: 2200 // loss: 0.050
epoch: 84, batch: 2300 // loss: 0.049
epoch: 84, batch: 2400 // loss: 0.042
epoch: 84, batch: 2500 // loss: 0.042
epoch: 84, batch: 2600 // loss: 0.045
epoch: 84, batch: 2700 // loss: 0.042
epoch: 84, batch: 2800 // loss: 0.046
epoch: 84, batch: 2900 // loss: 0.042
epoch: 84, batch: 3000 // loss: 0.045
epoch: 84, batch: 3100 // loss: 0.043
epoch: 84, batch: 3200 // loss: 0.039
epoch: 84, batch: 3300 // loss: 0.038
epoch: 84, batch: 3400 // loss: 0.044
epoch: 84, batch: 3500 // loss: 0.035
epoch: 84, batch: 3600 // loss: 0.043
epoch: 84, batch: 3700 // loss: 0.043

epoch: 85, batch: 0 // loss: 0.053
epoch: 85, batch: 100 // loss: 0.046
epoch: 85, batch: 200 // loss: 0.043
epoch: 85, batch: 300 // loss: 0.049
epoch: 85, batch: 400 // loss: 0.046
epoch: 85, batch: 500 // loss: 0.040
epoch: 85, batch: 600 // loss: 0.040
epoch: 85, batch: 700 // loss: 0.044
epoch: 85, batch: 800 // loss: 0.042
epoch: 85, batch: 900 // loss: 0.049
epoch: 85, batch: 1000 // loss: 0.046
epoch: 85, batch: 1100 // loss: 0.042
epoch: 85, batch: 1200 // loss: 0.044
epoch: 85, batch: 1300 // loss: 0.046
epoch: 85, batch: 1400 // loss: 0.043
epoch: 85, batch: 1500 // loss: 0.048
epoch: 85, batch: 1600 // loss: 0.052
epoch: 85, batch: 1700 // loss: 0.045
epoch: 85, batch: 1800 // loss: 0.053
epoch: 85, batch: 1900 // loss: 0.045
epoch: 85, batch: 2000 // loss: 0.047
epoch: 85, batch: 2100 // loss: 0.046
epoch: 85, batch: 2200 // loss: 0.050
epoch: 85, batch: 2300 // loss: 0.049
epoch: 85, batch: 2400 // loss: 0.042
epoch: 85, batch: 2500 // loss: 0.042
epoch: 85, batch: 2600 // loss: 0.045
epoch: 85, batch: 2700 // loss: 0.042
epoch: 85, batch: 2800 // loss: 0.046
epoch: 85, batch: 2900 // loss: 0.042
epoch: 85, batch: 3000 // loss: 0.045
epoch: 85, batch: 3100 // loss: 0.043
epoch: 85, batch: 3200 // loss: 0.039
epoch: 85, batch: 3300 // loss: 0.038
epoch: 85, batch: 3400 // loss: 0.044
epoch: 85, batch: 3500 // loss: 0.035
epoch: 85, batch: 3600 // loss: 0.043
epoch: 85, batch: 3700 // loss: 0.043

epoch: 86, batch: 0 // loss: 0.053
epoch: 86, batch: 100 // loss: 0.046
epoch: 86, batch: 200 // loss: 0.043
epoch: 86, batch: 300 // loss: 0.049
epoch: 86, batch: 400 // loss: 0.046
epoch: 86, batch: 500 // loss: 0.040
epoch: 86, batch: 600 // loss: 0.040
epoch: 86, batch: 700 // loss: 0.044
epoch: 86, batch: 800 // loss: 0.042
epoch: 86, batch: 900 // loss: 0.049
epoch: 86, batch: 1000 // loss: 0.046
epoch: 86, batch: 1100 // loss: 0.042
epoch: 86, batch: 1200 // loss: 0.044
epoch: 86, batch: 1300 // loss: 0.046
epoch: 86, batch: 1400 // loss: 0.043
epoch: 86, batch: 1500 // loss: 0.048
epoch: 86, batch: 1600 // loss: 0.052
epoch: 86, batch: 1700 // loss: 0.045
epoch: 86, batch: 1800 // loss: 0.053
epoch: 86, batch: 1900 // loss: 0.045
epoch: 86, batch: 2000 // loss: 0.047
epoch: 86, batch: 2100 // loss: 0.046
epoch: 86, batch: 2200 // loss: 0.050
epoch: 86, batch: 2300 // loss: 0.049
epoch: 86, batch: 2400 // loss: 0.042
epoch: 86, batch: 2500 // loss: 0.042
epoch: 86, batch: 2600 // loss: 0.045
epoch: 86, batch: 2700 // loss: 0.042
epoch: 86, batch: 2800 // loss: 0.046
epoch: 86, batch: 2900 // loss: 0.042
epoch: 86, batch: 3000 // loss: 0.045
epoch: 86, batch: 3100 // loss: 0.043
epoch: 86, batch: 3200 // loss: 0.039
epoch: 86, batch: 3300 // loss: 0.038
epoch: 86, batch: 3400 // loss: 0.044
epoch: 86, batch: 3500 // loss: 0.035
epoch: 86, batch: 3600 // loss: 0.043
epoch: 86, batch: 3700 // loss: 0.043

epoch: 87, batch: 0 // loss: 0.053
epoch: 87, batch: 100 // loss: 0.046
epoch: 87, batch: 200 // loss: 0.043
epoch: 87, batch: 300 // loss: 0.049
epoch: 87, batch: 400 // loss: 0.046
epoch: 87, batch: 500 // loss: 0.040
epoch: 87, batch: 600 // loss: 0.040
epoch: 87, batch: 700 // loss: 0.044
epoch: 87, batch: 800 // loss: 0.042
epoch: 87, batch: 900 // loss: 0.049
epoch: 87, batch: 1000 // loss: 0.046
epoch: 87, batch: 1100 // loss: 0.042
epoch: 87, batch: 1200 // loss: 0.044
epoch: 87, batch: 1300 // loss: 0.046
epoch: 87, batch: 1400 // loss: 0.043
epoch: 87, batch: 1500 // loss: 0.048
epoch: 87, batch: 1600 // loss: 0.052
epoch: 87, batch: 1700 // loss: 0.045
epoch: 87, batch: 1800 // loss: 0.053
epoch: 87, batch: 1900 // loss: 0.045
epoch: 87, batch: 2000 // loss: 0.047
epoch: 87, batch: 2100 // loss: 0.046
epoch: 87, batch: 2200 // loss: 0.050
epoch: 87, batch: 2300 // loss: 0.049
epoch: 87, batch: 2400 // loss: 0.042
epoch: 87, batch: 2500 // loss: 0.042
epoch: 87, batch: 2600 // loss: 0.045
epoch: 87, batch: 2700 // loss: 0.042
epoch: 87, batch: 2800 // loss: 0.046
epoch: 87, batch: 2900 // loss: 0.042
epoch: 87, batch: 3000 // loss: 0.045
epoch: 87, batch: 3100 // loss: 0.043
epoch: 87, batch: 3200 // loss: 0.039
epoch: 87, batch: 3300 // loss: 0.038
epoch: 87, batch: 3400 // loss: 0.044
epoch: 87, batch: 3500 // loss: 0.035
epoch: 87, batch: 3600 // loss: 0.043
epoch: 87, batch: 3700 // loss: 0.043

epoch: 88, batch: 0 // loss: 0.053
epoch: 88, batch: 100 // loss: 0.046
epoch: 88, batch: 200 // loss: 0.043
epoch: 88, batch: 300 // loss: 0.049
epoch: 88, batch: 400 // loss: 0.046
epoch: 88, batch: 500 // loss: 0.040
epoch: 88, batch: 600 // loss: 0.040
epoch: 88, batch: 700 // loss: 0.044
epoch: 88, batch: 800 // loss: 0.042
epoch: 88, batch: 900 // loss: 0.049
epoch: 88, batch: 1000 // loss: 0.046
epoch: 88, batch: 1100 // loss: 0.042
epoch: 88, batch: 1200 // loss: 0.044
epoch: 88, batch: 1300 // loss: 0.046
epoch: 88, batch: 1400 // loss: 0.043
epoch: 88, batch: 1500 // loss: 0.048
epoch: 88, batch: 1600 // loss: 0.052
epoch: 88, batch: 1700 // loss: 0.045
epoch: 88, batch: 1800 // loss: 0.053
epoch: 88, batch: 1900 // loss: 0.045
epoch: 88, batch: 2000 // loss: 0.047
epoch: 88, batch: 2100 // loss: 0.046
epoch: 88, batch: 2200 // loss: 0.050
epoch: 88, batch: 2300 // loss: 0.049
epoch: 88, batch: 2400 // loss: 0.042
epoch: 88, batch: 2500 // loss: 0.042
epoch: 88, batch: 2600 // loss: 0.045
epoch: 88, batch: 2700 // loss: 0.042
epoch: 88, batch: 2800 // loss: 0.046
epoch: 88, batch: 2900 // loss: 0.042
epoch: 88, batch: 3000 // loss: 0.045
epoch: 88, batch: 3100 // loss: 0.043
epoch: 88, batch: 3200 // loss: 0.039
epoch: 88, batch: 3300 // loss: 0.038
epoch: 88, batch: 3400 // loss: 0.044
epoch: 88, batch: 3500 // loss: 0.035
epoch: 88, batch: 3600 // loss: 0.043
epoch: 88, batch: 3700 // loss: 0.043

epoch: 89, batch: 0 // loss: 0.053
epoch: 89, batch: 100 // loss: 0.046
epoch: 89, batch: 200 // loss: 0.043
epoch: 89, batch: 300 // loss: 0.049
epoch: 89, batch: 400 // loss: 0.046
epoch: 89, batch: 500 // loss: 0.040
epoch: 89, batch: 600 // loss: 0.040
epoch: 89, batch: 700 // loss: 0.044
epoch: 89, batch: 800 // loss: 0.042
epoch: 89, batch: 900 // loss: 0.049
epoch: 89, batch: 1000 // loss: 0.046
epoch: 89, batch: 1100 // loss: 0.042
epoch: 89, batch: 1200 // loss: 0.044
epoch: 89, batch: 1300 // loss: 0.046
epoch: 89, batch: 1400 // loss: 0.043
epoch: 89, batch: 1500 // loss: 0.048
epoch: 89, batch: 1600 // loss: 0.052
epoch: 89, batch: 1700 // loss: 0.045
epoch: 89, batch: 1800 // loss: 0.053
epoch: 89, batch: 1900 // loss: 0.045
epoch: 89, batch: 2000 // loss: 0.047
epoch: 89, batch: 2100 // loss: 0.046
epoch: 89, batch: 2200 // loss: 0.050
epoch: 89, batch: 2300 // loss: 0.049
epoch: 89, batch: 2400 // loss: 0.042
epoch: 89, batch: 2500 // loss: 0.042
epoch: 89, batch: 2600 // loss: 0.045
epoch: 89, batch: 2700 // loss: 0.042
epoch: 89, batch: 2800 // loss: 0.046
epoch: 89, batch: 2900 // loss: 0.042
epoch: 89, batch: 3000 // loss: 0.045
epoch: 89, batch: 3100 // loss: 0.043
epoch: 89, batch: 3200 // loss: 0.039
epoch: 89, batch: 3300 // loss: 0.038
epoch: 89, batch: 3400 // loss: 0.044
epoch: 89, batch: 3500 // loss: 0.035
epoch: 89, batch: 3600 // loss: 0.043
epoch: 89, batch: 3700 // loss: 0.043

epoch: 90, batch: 0 // loss: 0.053
epoch: 90, batch: 100 // loss: 0.046
epoch: 90, batch: 200 // loss: 0.043
epoch: 90, batch: 300 // loss: 0.049
epoch: 90, batch: 400 // loss: 0.046
epoch: 90, batch: 500 // loss: 0.040
epoch: 90, batch: 600 // loss: 0.040
epoch: 90, batch: 700 // loss: 0.044
epoch: 90, batch: 800 // loss: 0.042
epoch: 90, batch: 900 // loss: 0.049
epoch: 90, batch: 1000 // loss: 0.046
epoch: 90, batch: 1100 // loss: 0.042
epoch: 90, batch: 1200 // loss: 0.044
epoch: 90, batch: 1300 // loss: 0.046
epoch: 90, batch: 1400 // loss: 0.043
epoch: 90, batch: 1500 // loss: 0.048
epoch: 90, batch: 1600 // loss: 0.052
epoch: 90, batch: 1700 // loss: 0.045
epoch: 90, batch: 1800 // loss: 0.053
epoch: 90, batch: 1900 // loss: 0.045
epoch: 90, batch: 2000 // loss: 0.047
epoch: 90, batch: 2100 // loss: 0.046
epoch: 90, batch: 2200 // loss: 0.050
epoch: 90, batch: 2300 // loss: 0.049
epoch: 90, batch: 2400 // loss: 0.042
epoch: 90, batch: 2500 // loss: 0.042
epoch: 90, batch: 2600 // loss: 0.045
epoch: 90, batch: 2700 // loss: 0.042
epoch: 90, batch: 2800 // loss: 0.046
epoch: 90, batch: 2900 // loss: 0.042
epoch: 90, batch: 3000 // loss: 0.045
epoch: 90, batch: 3100 // loss: 0.043
epoch: 90, batch: 3200 // loss: 0.039
epoch: 90, batch: 3300 // loss: 0.038
epoch: 90, batch: 3400 // loss: 0.044
epoch: 90, batch: 3500 // loss: 0.035
epoch: 90, batch: 3600 // loss: 0.043
epoch: 90, batch: 3700 // loss: 0.043

epoch: 91, batch: 0 // loss: 0.053
epoch: 91, batch: 100 // loss: 0.046
epoch: 91, batch: 200 // loss: 0.043
epoch: 91, batch: 300 // loss: 0.049
epoch: 91, batch: 400 // loss: 0.046
epoch: 91, batch: 500 // loss: 0.040
epoch: 91, batch: 600 // loss: 0.040
epoch: 91, batch: 700 // loss: 0.044
epoch: 91, batch: 800 // loss: 0.042
epoch: 91, batch: 900 // loss: 0.049
epoch: 91, batch: 1000 // loss: 0.046
epoch: 91, batch: 1100 // loss: 0.042
epoch: 91, batch: 1200 // loss: 0.044
epoch: 91, batch: 1300 // loss: 0.046
epoch: 91, batch: 1400 // loss: 0.043
epoch: 91, batch: 1500 // loss: 0.048
epoch: 91, batch: 1600 // loss: 0.052
epoch: 91, batch: 1700 // loss: 0.045
epoch: 91, batch: 1800 // loss: 0.053
epoch: 91, batch: 1900 // loss: 0.045
epoch: 91, batch: 2000 // loss: 0.047
epoch: 91, batch: 2100 // loss: 0.046
epoch: 91, batch: 2200 // loss: 0.050
epoch: 91, batch: 2300 // loss: 0.049
epoch: 91, batch: 2400 // loss: 0.042
epoch: 91, batch: 2500 // loss: 0.042
epoch: 91, batch: 2600 // loss: 0.045
epoch: 91, batch: 2700 // loss: 0.042
epoch: 91, batch: 2800 // loss: 0.046
epoch: 91, batch: 2900 // loss: 0.042
epoch: 91, batch: 3000 // loss: 0.045
epoch: 91, batch: 3100 // loss: 0.043
epoch: 91, batch: 3200 // loss: 0.039
epoch: 91, batch: 3300 // loss: 0.038
epoch: 91, batch: 3400 // loss: 0.044
epoch: 91, batch: 3500 // loss: 0.035
epoch: 91, batch: 3600 // loss: 0.043
epoch: 91, batch: 3700 // loss: 0.043

epoch: 92, batch: 0 // loss: 0.053
epoch: 92, batch: 100 // loss: 0.046
epoch: 92, batch: 200 // loss: 0.043
epoch: 92, batch: 300 // loss: 0.049
epoch: 92, batch: 400 // loss: 0.046
epoch: 92, batch: 500 // loss: 0.040
epoch: 92, batch: 600 // loss: 0.040
epoch: 92, batch: 700 // loss: 0.044
epoch: 92, batch: 800 // loss: 0.042
epoch: 92, batch: 900 // loss: 0.049
epoch: 92, batch: 1000 // loss: 0.046
epoch: 92, batch: 1100 // loss: 0.042
epoch: 92, batch: 1200 // loss: 0.044
epoch: 92, batch: 1300 // loss: 0.046
epoch: 92, batch: 1400 // loss: 0.043
epoch: 92, batch: 1500 // loss: 0.048
epoch: 92, batch: 1600 // loss: 0.052
epoch: 92, batch: 1700 // loss: 0.045
epoch: 92, batch: 1800 // loss: 0.053
epoch: 92, batch: 1900 // loss: 0.045
epoch: 92, batch: 2000 // loss: 0.047
epoch: 92, batch: 2100 // loss: 0.046
epoch: 92, batch: 2200 // loss: 0.050
epoch: 92, batch: 2300 // loss: 0.049
epoch: 92, batch: 2400 // loss: 0.042
epoch: 92, batch: 2500 // loss: 0.042
epoch: 92, batch: 2600 // loss: 0.045
epoch: 92, batch: 2700 // loss: 0.042
epoch: 92, batch: 2800 // loss: 0.046
epoch: 92, batch: 2900 // loss: 0.042
epoch: 92, batch: 3000 // loss: 0.045
epoch: 92, batch: 3100 // loss: 0.043
epoch: 92, batch: 3200 // loss: 0.039
epoch: 92, batch: 3300 // loss: 0.038
epoch: 92, batch: 3400 // loss: 0.044
epoch: 92, batch: 3500 // loss: 0.035
epoch: 92, batch: 3600 // loss: 0.043
epoch: 92, batch: 3700 // loss: 0.043

epoch: 93, batch: 0 // loss: 0.053
epoch: 93, batch: 100 // loss: 0.046
epoch: 93, batch: 200 // loss: 0.043
epoch: 93, batch: 300 // loss: 0.049
epoch: 93, batch: 400 // loss: 0.046
epoch: 93, batch: 500 // loss: 0.040
epoch: 93, batch: 600 // loss: 0.040
epoch: 93, batch: 700 // loss: 0.044
epoch: 93, batch: 800 // loss: 0.042
epoch: 93, batch: 900 // loss: 0.049
epoch: 93, batch: 1000 // loss: 0.046
epoch: 93, batch: 1100 // loss: 0.042
epoch: 93, batch: 1200 // loss: 0.044
epoch: 93, batch: 1300 // loss: 0.046
epoch: 93, batch: 1400 // loss: 0.043
epoch: 93, batch: 1500 // loss: 0.048
epoch: 93, batch: 1600 // loss: 0.052
epoch: 93, batch: 1700 // loss: 0.045
epoch: 93, batch: 1800 // loss: 0.053
epoch: 93, batch: 1900 // loss: 0.045
epoch: 93, batch: 2000 // loss: 0.047
epoch: 93, batch: 2100 // loss: 0.046
epoch: 93, batch: 2200 // loss: 0.050
epoch: 93, batch: 2300 // loss: 0.049
epoch: 93, batch: 2400 // loss: 0.042
epoch: 93, batch: 2500 // loss: 0.042
epoch: 93, batch: 2600 // loss: 0.045
epoch: 93, batch: 2700 // loss: 0.042
epoch: 93, batch: 2800 // loss: 0.046
epoch: 93, batch: 2900 // loss: 0.042
epoch: 93, batch: 3000 // loss: 0.045
epoch: 93, batch: 3100 // loss: 0.043
epoch: 93, batch: 3200 // loss: 0.039
epoch: 93, batch: 3300 // loss: 0.038
epoch: 93, batch: 3400 // loss: 0.044
epoch: 93, batch: 3500 // loss: 0.035
epoch: 93, batch: 3600 // loss: 0.043
epoch: 93, batch: 3700 // loss: 0.043

epoch: 94, batch: 0 // loss: 0.053
epoch: 94, batch: 100 // loss: 0.046
epoch: 94, batch: 200 // loss: 0.043
epoch: 94, batch: 300 // loss: 0.049
epoch: 94, batch: 400 // loss: 0.046
epoch: 94, batch: 500 // loss: 0.040
epoch: 94, batch: 600 // loss: 0.040
epoch: 94, batch: 700 // loss: 0.044
epoch: 94, batch: 800 // loss: 0.042
epoch: 94, batch: 900 // loss: 0.049
epoch: 94, batch: 1000 // loss: 0.046
epoch: 94, batch: 1100 // loss: 0.042
epoch: 94, batch: 1200 // loss: 0.044
epoch: 94, batch: 1300 // loss: 0.046
epoch: 94, batch: 1400 // loss: 0.043
epoch: 94, batch: 1500 // loss: 0.048
epoch: 94, batch: 1600 // loss: 0.052
epoch: 94, batch: 1700 // loss: 0.045
epoch: 94, batch: 1800 // loss: 0.053
epoch: 94, batch: 1900 // loss: 0.045
epoch: 94, batch: 2000 // loss: 0.047
epoch: 94, batch: 2100 // loss: 0.046
epoch: 94, batch: 2200 // loss: 0.050
epoch: 94, batch: 2300 // loss: 0.049
epoch: 94, batch: 2400 // loss: 0.042
epoch: 94, batch: 2500 // loss: 0.042
epoch: 94, batch: 2600 // loss: 0.045
epoch: 94, batch: 2700 // loss: 0.042
epoch: 94, batch: 2800 // loss: 0.046
epoch: 94, batch: 2900 // loss: 0.042
epoch: 94, batch: 3000 // loss: 0.045
epoch: 94, batch: 3100 // loss: 0.043
epoch: 94, batch: 3200 // loss: 0.039
epoch: 94, batch: 3300 // loss: 0.038
epoch: 94, batch: 3400 // loss: 0.044
epoch: 94, batch: 3500 // loss: 0.035
epoch: 94, batch: 3600 // loss: 0.043
epoch: 94, batch: 3700 // loss: 0.043

epoch: 95, batch: 0 // loss: 0.053
epoch: 95, batch: 100 // loss: 0.046
epoch: 95, batch: 200 // loss: 0.043
epoch: 95, batch: 300 // loss: 0.049
epoch: 95, batch: 400 // loss: 0.046
epoch: 95, batch: 500 // loss: 0.040
epoch: 95, batch: 600 // loss: 0.040
epoch: 95, batch: 700 // loss: 0.044
epoch: 95, batch: 800 // loss: 0.042
epoch: 95, batch: 900 // loss: 0.049
epoch: 95, batch: 1000 // loss: 0.046
epoch: 95, batch: 1100 // loss: 0.042
epoch: 95, batch: 1200 // loss: 0.044
epoch: 95, batch: 1300 // loss: 0.046
epoch: 95, batch: 1400 // loss: 0.043
epoch: 95, batch: 1500 // loss: 0.048
epoch: 95, batch: 1600 // loss: 0.052
epoch: 95, batch: 1700 // loss: 0.045
epoch: 95, batch: 1800 // loss: 0.053
epoch: 95, batch: 1900 // loss: 0.045
epoch: 95, batch: 2000 // loss: 0.047
epoch: 95, batch: 2100 // loss: 0.046
epoch: 95, batch: 2200 // loss: 0.050
epoch: 95, batch: 2300 // loss: 0.049
epoch: 95, batch: 2400 // loss: 0.042
epoch: 95, batch: 2500 // loss: 0.042
epoch: 95, batch: 2600 // loss: 0.045
epoch: 95, batch: 2700 // loss: 0.042
epoch: 95, batch: 2800 // loss: 0.046
epoch: 95, batch: 2900 // loss: 0.042
epoch: 95, batch: 3000 // loss: 0.045
epoch: 95, batch: 3100 // loss: 0.043
epoch: 95, batch: 3200 // loss: 0.039
epoch: 95, batch: 3300 // loss: 0.038
epoch: 95, batch: 3400 // loss: 0.044
epoch: 95, batch: 3500 // loss: 0.035
epoch: 95, batch: 3600 // loss: 0.043
epoch: 95, batch: 3700 // loss: 0.043

epoch: 96, batch: 0 // loss: 0.053
epoch: 96, batch: 100 // loss: 0.046
epoch: 96, batch: 200 // loss: 0.043
epoch: 96, batch: 300 // loss: 0.049
epoch: 96, batch: 400 // loss: 0.046
epoch: 96, batch: 500 // loss: 0.040
epoch: 96, batch: 600 // loss: 0.040
epoch: 96, batch: 700 // loss: 0.044
epoch: 96, batch: 800 // loss: 0.042
epoch: 96, batch: 900 // loss: 0.049
epoch: 96, batch: 1000 // loss: 0.046
epoch: 96, batch: 1100 // loss: 0.042
epoch: 96, batch: 1200 // loss: 0.044
epoch: 96, batch: 1300 // loss: 0.046
epoch: 96, batch: 1400 // loss: 0.043
epoch: 96, batch: 1500 // loss: 0.048
epoch: 96, batch: 1600 // loss: 0.052
epoch: 96, batch: 1700 // loss: 0.045
epoch: 96, batch: 1800 // loss: 0.053
epoch: 96, batch: 1900 // loss: 0.045
epoch: 96, batch: 2000 // loss: 0.047
epoch: 96, batch: 2100 // loss: 0.046
epoch: 96, batch: 2200 // loss: 0.050
epoch: 96, batch: 2300 // loss: 0.049
epoch: 96, batch: 2400 // loss: 0.042
epoch: 96, batch: 2500 // loss: 0.042
epoch: 96, batch: 2600 // loss: 0.045
epoch: 96, batch: 2700 // loss: 0.042
epoch: 96, batch: 2800 // loss: 0.046
epoch: 96, batch: 2900 // loss: 0.042
epoch: 96, batch: 3000 // loss: 0.045
epoch: 96, batch: 3100 // loss: 0.043
epoch: 96, batch: 3200 // loss: 0.039
epoch: 96, batch: 3300 // loss: 0.038
epoch: 96, batch: 3400 // loss: 0.044
epoch: 96, batch: 3500 // loss: 0.035
epoch: 96, batch: 3600 // loss: 0.043
epoch: 96, batch: 3700 // loss: 0.043

epoch: 97, batch: 0 // loss: 0.053
epoch: 97, batch: 100 // loss: 0.046
epoch: 97, batch: 200 // loss: 0.043
epoch: 97, batch: 300 // loss: 0.049
epoch: 97, batch: 400 // loss: 0.046
epoch: 97, batch: 500 // loss: 0.040
epoch: 97, batch: 600 // loss: 0.040
epoch: 97, batch: 700 // loss: 0.044
epoch: 97, batch: 800 // loss: 0.042
epoch: 97, batch: 900 // loss: 0.049
epoch: 97, batch: 1000 // loss: 0.046
epoch: 97, batch: 1100 // loss: 0.042
epoch: 97, batch: 1200 // loss: 0.044
epoch: 97, batch: 1300 // loss: 0.046
epoch: 97, batch: 1400 // loss: 0.043
epoch: 97, batch: 1500 // loss: 0.048
epoch: 97, batch: 1600 // loss: 0.052
epoch: 97, batch: 1700 // loss: 0.045
epoch: 97, batch: 1800 // loss: 0.053
epoch: 97, batch: 1900 // loss: 0.045
epoch: 97, batch: 2000 // loss: 0.047
epoch: 97, batch: 2100 // loss: 0.046
epoch: 97, batch: 2200 // loss: 0.050
epoch: 97, batch: 2300 // loss: 0.049
epoch: 97, batch: 2400 // loss: 0.042
epoch: 97, batch: 2500 // loss: 0.042
epoch: 97, batch: 2600 // loss: 0.045
epoch: 97, batch: 2700 // loss: 0.042
epoch: 97, batch: 2800 // loss: 0.046
epoch: 97, batch: 2900 // loss: 0.042
epoch: 97, batch: 3000 // loss: 0.045
epoch: 97, batch: 3100 // loss: 0.043
epoch: 97, batch: 3200 // loss: 0.039
epoch: 97, batch: 3300 // loss: 0.038
epoch: 97, batch: 3400 // loss: 0.044
epoch: 97, batch: 3500 // loss: 0.035
epoch: 97, batch: 3600 // loss: 0.043
epoch: 97, batch: 3700 // loss: 0.043

epoch: 98, batch: 0 // loss: 0.053
epoch: 98, batch: 100 // loss: 0.046
epoch: 98, batch: 200 // loss: 0.043
epoch: 98, batch: 300 // loss: 0.049
epoch: 98, batch: 400 // loss: 0.046
epoch: 98, batch: 500 // loss: 0.040
epoch: 98, batch: 600 // loss: 0.040
epoch: 98, batch: 700 // loss: 0.044
epoch: 98, batch: 800 // loss: 0.042
epoch: 98, batch: 900 // loss: 0.049
epoch: 98, batch: 1000 // loss: 0.046
epoch: 98, batch: 1100 // loss: 0.042
epoch: 98, batch: 1200 // loss: 0.044
epoch: 98, batch: 1300 // loss: 0.046
epoch: 98, batch: 1400 // loss: 0.043
epoch: 98, batch: 1500 // loss: 0.048
epoch: 98, batch: 1600 // loss: 0.052
epoch: 98, batch: 1700 // loss: 0.045
epoch: 98, batch: 1800 // loss: 0.053
epoch: 98, batch: 1900 // loss: 0.045
epoch: 98, batch: 2000 // loss: 0.047
epoch: 98, batch: 2100 // loss: 0.046
epoch: 98, batch: 2200 // loss: 0.050
epoch: 98, batch: 2300 // loss: 0.049
epoch: 98, batch: 2400 // loss: 0.042
epoch: 98, batch: 2500 // loss: 0.042
epoch: 98, batch: 2600 // loss: 0.045
epoch: 98, batch: 2700 // loss: 0.042
epoch: 98, batch: 2800 // loss: 0.046
epoch: 98, batch: 2900 // loss: 0.042
epoch: 98, batch: 3000 // loss: 0.045
epoch: 98, batch: 3100 // loss: 0.043
epoch: 98, batch: 3200 // loss: 0.039
epoch: 98, batch: 3300 // loss: 0.038
epoch: 98, batch: 3400 // loss: 0.044
epoch: 98, batch: 3500 // loss: 0.035
epoch: 98, batch: 3600 // loss: 0.043
epoch: 98, batch: 3700 // loss: 0.043

epoch: 99, batch: 0 // loss: 0.053
epoch: 99, batch: 100 // loss: 0.046
epoch: 99, batch: 200 // loss: 0.043
epoch: 99, batch: 300 // loss: 0.049
epoch: 99, batch: 400 // loss: 0.046
epoch: 99, batch: 500 // loss: 0.040
epoch: 99, batch: 600 // loss: 0.040
epoch: 99, batch: 700 // loss: 0.044
epoch: 99, batch: 800 // loss: 0.042
epoch: 99, batch: 900 // loss: 0.049
epoch: 99, batch: 1000 // loss: 0.046
epoch: 99, batch: 1100 // loss: 0.042
epoch: 99, batch: 1200 // loss: 0.044
epoch: 99, batch: 1300 // loss: 0.046
epoch: 99, batch: 1400 // loss: 0.043
epoch: 99, batch: 1500 // loss: 0.048
epoch: 99, batch: 1600 // loss: 0.052
epoch: 99, batch: 1700 // loss: 0.045
epoch: 99, batch: 1800 // loss: 0.053
epoch: 99, batch: 1900 // loss: 0.045
epoch: 99, batch: 2000 // loss: 0.047
epoch: 99, batch: 2100 // loss: 0.046
epoch: 99, batch: 2200 // loss: 0.050
epoch: 99, batch: 2300 // loss: 0.049
epoch: 99, batch: 2400 // loss: 0.042
epoch: 99, batch: 2500 // loss: 0.042
epoch: 99, batch: 2600 // loss: 0.045
epoch: 99, batch: 2700 // loss: 0.042
epoch: 99, batch: 2800 // loss: 0.046
epoch: 99, batch: 2900 // loss: 0.042
epoch: 99, batch: 3000 // loss: 0.045
epoch: 99, batch: 3100 // loss: 0.043
epoch: 99, batch: 3200 // loss: 0.039
epoch: 99, batch: 3300 // loss: 0.038
epoch: 99, batch: 3400 // loss: 0.044
epoch: 99, batch: 3500 // loss: 0.035
epoch: 99, batch: 3600 // loss: 0.043
epoch: 99, batch: 3700 // loss: 0.043

epoch: 100, batch: 0 // loss: 0.053
epoch: 100, batch: 100 // loss: 0.046
epoch: 100, batch: 200 // loss: 0.043
epoch: 100, batch: 300 // loss: 0.049
epoch: 100, batch: 400 // loss: 0.046
epoch: 100, batch: 500 // loss: 0.040
epoch: 100, batch: 600 // loss: 0.040
epoch: 100, batch: 700 // loss: 0.044
epoch: 100, batch: 800 // loss: 0.042
epoch: 100, batch: 900 // loss: 0.049
epoch: 100, batch: 1000 // loss: 0.046
epoch: 100, batch: 1100 // loss: 0.042
epoch: 100, batch: 1200 // loss: 0.044
epoch: 100, batch: 1300 // loss: 0.046
epoch: 100, batch: 1400 // loss: 0.043
epoch: 100, batch: 1500 // loss: 0.048
epoch: 100, batch: 1600 // loss: 0.052
epoch: 100, batch: 1700 // loss: 0.045
epoch: 100, batch: 1800 // loss: 0.053
epoch: 100, batch: 1900 // loss: 0.045
epoch: 100, batch: 2000 // loss: 0.047
epoch: 100, batch: 2100 // loss: 0.046
epoch: 100, batch: 2200 // loss: 0.050
epoch: 100, batch: 2300 // loss: 0.049
epoch: 100, batch: 2400 // loss: 0.042
epoch: 100, batch: 2500 // loss: 0.042
epoch: 100, batch: 2600 // loss: 0.045
epoch: 100, batch: 2700 // loss: 0.042
epoch: 100, batch: 2800 // loss: 0.046
epoch: 100, batch: 2900 // loss: 0.042
epoch: 100, batch: 3000 // loss: 0.045
epoch: 100, batch: 3100 // loss: 0.043
epoch: 100, batch: 3200 // loss: 0.039
epoch: 100, batch: 3300 // loss: 0.038
epoch: 100, batch: 3400 // loss: 0.044
epoch: 100, batch: 3500 // loss: 0.035
epoch: 100, batch: 3600 // loss: 0.043
epoch: 100, batch: 3700 // loss: 0.043

epoch: 101, batch: 0 // loss: 0.053
epoch: 101, batch: 100 // loss: 0.046
epoch: 101, batch: 200 // loss: 0.043
epoch: 101, batch: 300 // loss: 0.049
epoch: 101, batch: 400 // loss: 0.046
epoch: 101, batch: 500 // loss: 0.040
epoch: 101, batch: 600 // loss: 0.040
epoch: 101, batch: 700 // loss: 0.044
epoch: 101, batch: 800 // loss: 0.042
epoch: 101, batch: 900 // loss: 0.049
epoch: 101, batch: 1000 // loss: 0.046
epoch: 101, batch: 1100 // loss: 0.042
epoch: 101, batch: 1200 // loss: 0.044
epoch: 101, batch: 1300 // loss: 0.046
epoch: 101, batch: 1400 // loss: 0.043
epoch: 101, batch: 1500 // loss: 0.048
epoch: 101, batch: 1600 // loss: 0.052
epoch: 101, batch: 1700 // loss: 0.045
epoch: 101, batch: 1800 // loss: 0.053
epoch: 101, batch: 1900 // loss: 0.045
epoch: 101, batch: 2000 // loss: 0.047
epoch: 101, batch: 2100 // loss: 0.046
epoch: 101, batch: 2200 // loss: 0.050
epoch: 101, batch: 2300 // loss: 0.049
epoch: 101, batch: 2400 // loss: 0.042
epoch: 101, batch: 2500 // loss: 0.042
epoch: 101, batch: 2600 // loss: 0.045
epoch: 101, batch: 2700 // loss: 0.042
epoch: 101, batch: 2800 // loss: 0.046
epoch: 101, batch: 2900 // loss: 0.042
epoch: 101, batch: 3000 // loss: 0.045
epoch: 101, batch: 3100 // loss: 0.043
epoch: 101, batch: 3200 // loss: 0.039
epoch: 101, batch: 3300 // loss: 0.038
epoch: 101, batch: 3400 // loss: 0.044
epoch: 101, batch: 3500 // loss: 0.035
epoch: 101, batch: 3600 // loss: 0.043
epoch: 101, batch: 3700 // loss: 0.043

epoch: 102, batch: 0 // loss: 0.053
epoch: 102, batch: 100 // loss: 0.046
epoch: 102, batch: 200 // loss: 0.043
epoch: 102, batch: 300 // loss: 0.049
epoch: 102, batch: 400 // loss: 0.046
epoch: 102, batch: 500 // loss: 0.040
epoch: 102, batch: 600 // loss: 0.040
epoch: 102, batch: 700 // loss: 0.044
epoch: 102, batch: 800 // loss: 0.042
epoch: 102, batch: 900 // loss: 0.049
epoch: 102, batch: 1000 // loss: 0.046
epoch: 102, batch: 1100 // loss: 0.042
epoch: 102, batch: 1200 // loss: 0.044
epoch: 102, batch: 1300 // loss: 0.046
epoch: 102, batch: 1400 // loss: 0.043
epoch: 102, batch: 1500 // loss: 0.048
epoch: 102, batch: 1600 // loss: 0.052
epoch: 102, batch: 1700 // loss: 0.045
epoch: 102, batch: 1800 // loss: 0.053
epoch: 102, batch: 1900 // loss: 0.045
epoch: 102, batch: 2000 // loss: 0.047
epoch: 102, batch: 2100 // loss: 0.046
epoch: 102, batch: 2200 // loss: 0.050
epoch: 102, batch: 2300 // loss: 0.049
epoch: 102, batch: 2400 // loss: 0.042
epoch: 102, batch: 2500 // loss: 0.042
epoch: 102, batch: 2600 // loss: 0.045
epoch: 102, batch: 2700 // loss: 0.042
epoch: 102, batch: 2800 // loss: 0.046
epoch: 102, batch: 2900 // loss: 0.042
epoch: 102, batch: 3000 // loss: 0.045
epoch: 102, batch: 3100 // loss: 0.043
epoch: 102, batch: 3200 // loss: 0.039
epoch: 102, batch: 3300 // loss: 0.038
epoch: 102, batch: 3400 // loss: 0.044
epoch: 102, batch: 3500 // loss: 0.035
epoch: 102, batch: 3600 // loss: 0.043
epoch: 102, batch: 3700 // loss: 0.043

epoch: 103, batch: 0 // loss: 0.053
epoch: 103, batch: 100 // loss: 0.046
epoch: 103, batch: 200 // loss: 0.043
epoch: 103, batch: 300 // loss: 0.049
epoch: 103, batch: 400 // loss: 0.046
epoch: 103, batch: 500 // loss: 0.040
epoch: 103, batch: 600 // loss: 0.040
epoch: 103, batch: 700 // loss: 0.044
epoch: 103, batch: 800 // loss: 0.042
epoch: 103, batch: 900 // loss: 0.049
epoch: 103, batch: 1000 // loss: 0.046
epoch: 103, batch: 1100 // loss: 0.042
epoch: 103, batch: 1200 // loss: 0.044
epoch: 103, batch: 1300 // loss: 0.046
epoch: 103, batch: 1400 // loss: 0.043
epoch: 103, batch: 1500 // loss: 0.048
epoch: 103, batch: 1600 // loss: 0.052
epoch: 103, batch: 1700 // loss: 0.045
epoch: 103, batch: 1800 // loss: 0.053
epoch: 103, batch: 1900 // loss: 0.045
epoch: 103, batch: 2000 // loss: 0.047
epoch: 103, batch: 2100 // loss: 0.046
epoch: 103, batch: 2200 // loss: 0.050
epoch: 103, batch: 2300 // loss: 0.049
epoch: 103, batch: 2400 // loss: 0.042
epoch: 103, batch: 2500 // loss: 0.042
epoch: 103, batch: 2600 // loss: 0.045
epoch: 103, batch: 2700 // loss: 0.042
epoch: 103, batch: 2800 // loss: 0.046
epoch: 103, batch: 2900 // loss: 0.042
epoch: 103, batch: 3000 // loss: 0.045
epoch: 103, batch: 3100 // loss: 0.043
epoch: 103, batch: 3200 // loss: 0.039
epoch: 103, batch: 3300 // loss: 0.038
epoch: 103, batch: 3400 // loss: 0.044
epoch: 103, batch: 3500 // loss: 0.035
epoch: 103, batch: 3600 // loss: 0.043
epoch: 103, batch: 3700 // loss: 0.043

epoch: 104, batch: 0 // loss: 0.053
epoch: 104, batch: 100 // loss: 0.046
epoch: 104, batch: 200 // loss: 0.043
epoch: 104, batch: 300 // loss: 0.049
epoch: 104, batch: 400 // loss: 0.046
epoch: 104, batch: 500 // loss: 0.040
epoch: 104, batch: 600 // loss: 0.040
epoch: 104, batch: 700 // loss: 0.044
epoch: 104, batch: 800 // loss: 0.042
epoch: 104, batch: 900 // loss: 0.049
epoch: 104, batch: 1000 // loss: 0.046
epoch: 104, batch: 1100 // loss: 0.042
epoch: 104, batch: 1200 // loss: 0.044
epoch: 104, batch: 1300 // loss: 0.046
epoch: 104, batch: 1400 // loss: 0.043
epoch: 104, batch: 1500 // loss: 0.048
epoch: 104, batch: 1600 // loss: 0.052
epoch: 104, batch: 1700 // loss: 0.045
epoch: 104, batch: 1800 // loss: 0.053
epoch: 104, batch: 1900 // loss: 0.045
epoch: 104, batch: 2000 // loss: 0.047
epoch: 104, batch: 2100 // loss: 0.046
epoch: 104, batch: 2200 // loss: 0.050
epoch: 104, batch: 2300 // loss: 0.049
epoch: 104, batch: 2400 // loss: 0.042
epoch: 104, batch: 2500 // loss: 0.042
epoch: 104, batch: 2600 // loss: 0.045
epoch: 104, batch: 2700 // loss: 0.042
epoch: 104, batch: 2800 // loss: 0.046
epoch: 104, batch: 2900 // loss: 0.042
epoch: 104, batch: 3000 // loss: 0.045
epoch: 104, batch: 3100 // loss: 0.043
epoch: 104, batch: 3200 // loss: 0.039
epoch: 104, batch: 3300 // loss: 0.038
epoch: 104, batch: 3400 // loss: 0.044
epoch: 104, batch: 3500 // loss: 0.035
epoch: 104, batch: 3600 // loss: 0.043
epoch: 104, batch: 3700 // loss: 0.043

epoch: 105, batch: 0 // loss: 0.053
epoch: 105, batch: 100 // loss: 0.046
epoch: 105, batch: 200 // loss: 0.043
epoch: 105, batch: 300 // loss: 0.049
epoch: 105, batch: 400 // loss: 0.046
epoch: 105, batch: 500 // loss: 0.040
epoch: 105, batch: 600 // loss: 0.040
epoch: 105, batch: 700 // loss: 0.044
epoch: 105, batch: 800 // loss: 0.042
epoch: 105, batch: 900 // loss: 0.049
epoch: 105, batch: 1000 // loss: 0.046
epoch: 105, batch: 1100 // loss: 0.042
epoch: 105, batch: 1200 // loss: 0.044
epoch: 105, batch: 1300 // loss: 0.046
epoch: 105, batch: 1400 // loss: 0.043
epoch: 105, batch: 1500 // loss: 0.048
epoch: 105, batch: 1600 // loss: 0.052
epoch: 105, batch: 1700 // loss: 0.045
epoch: 105, batch: 1800 // loss: 0.053
epoch: 105, batch: 1900 // loss: 0.045
epoch: 105, batch: 2000 // loss: 0.047
epoch: 105, batch: 2100 // loss: 0.046
epoch: 105, batch: 2200 // loss: 0.050
epoch: 105, batch: 2300 // loss: 0.049
epoch: 105, batch: 2400 // loss: 0.042
epoch: 105, batch: 2500 // loss: 0.042
epoch: 105, batch: 2600 // loss: 0.045
epoch: 105, batch: 2700 // loss: 0.042
epoch: 105, batch: 2800 // loss: 0.046
epoch: 105, batch: 2900 // loss: 0.042
epoch: 105, batch: 3000 // loss: 0.045
epoch: 105, batch: 3100 // loss: 0.043
epoch: 105, batch: 3200 // loss: 0.039
epoch: 105, batch: 3300 // loss: 0.038
epoch: 105, batch: 3400 // loss: 0.044
epoch: 105, batch: 3500 // loss: 0.035
epoch: 105, batch: 3600 // loss: 0.043
epoch: 105, batch: 3700 // loss: 0.043

epoch: 106, batch: 0 // loss: 0.053
epoch: 106, batch: 100 // loss: 0.046
epoch: 106, batch: 200 // loss: 0.043
epoch: 106, batch: 300 // loss: 0.049
epoch: 106, batch: 400 // loss: 0.046
epoch: 106, batch: 500 // loss: 0.040
epoch: 106, batch: 600 // loss: 0.040
epoch: 106, batch: 700 // loss: 0.044
epoch: 106, batch: 800 // loss: 0.042
epoch: 106, batch: 900 // loss: 0.049
epoch: 106, batch: 1000 // loss: 0.046
epoch: 106, batch: 1100 // loss: 0.042
epoch: 106, batch: 1200 // loss: 0.044
epoch: 106, batch: 1300 // loss: 0.046
epoch: 106, batch: 1400 // loss: 0.043
epoch: 106, batch: 1500 // loss: 0.048
epoch: 106, batch: 1600 // loss: 0.052
epoch: 106, batch: 1700 // loss: 0.045
epoch: 106, batch: 1800 // loss: 0.053
epoch: 106, batch: 1900 // loss: 0.045
epoch: 106, batch: 2000 // loss: 0.047
epoch: 106, batch: 2100 // loss: 0.046
epoch: 106, batch: 2200 // loss: 0.050
epoch: 106, batch: 2300 // loss: 0.049
epoch: 106, batch: 2400 // loss: 0.042
epoch: 106, batch: 2500 // loss: 0.042
epoch: 106, batch: 2600 // loss: 0.045
epoch: 106, batch: 2700 // loss: 0.042
epoch: 106, batch: 2800 // loss: 0.046
epoch: 106, batch: 2900 // loss: 0.042
epoch: 106, batch: 3000 // loss: 0.045
epoch: 106, batch: 3100 // loss: 0.043
epoch: 106, batch: 3200 // loss: 0.039
epoch: 106, batch: 3300 // loss: 0.038
epoch: 106, batch: 3400 // loss: 0.044
epoch: 106, batch: 3500 // loss: 0.035
epoch: 106, batch: 3600 // loss: 0.043
epoch: 106, batch: 3700 // loss: 0.043

epoch: 107, batch: 0 // loss: 0.053
epoch: 107, batch: 100 // loss: 0.046
epoch: 107, batch: 200 // loss: 0.043
epoch: 107, batch: 300 // loss: 0.049
epoch: 107, batch: 400 // loss: 0.046
epoch: 107, batch: 500 // loss: 0.040
epoch: 107, batch: 600 // loss: 0.040
epoch: 107, batch: 700 // loss: 0.044
epoch: 107, batch: 800 // loss: 0.042
epoch: 107, batch: 900 // loss: 0.049
epoch: 107, batch: 1000 // loss: 0.046
epoch: 107, batch: 1100 // loss: 0.042
epoch: 107, batch: 1200 // loss: 0.044
epoch: 107, batch: 1300 // loss: 0.046
epoch: 107, batch: 1400 // loss: 0.043
epoch: 107, batch: 1500 // loss: 0.048
epoch: 107, batch: 1600 // loss: 0.052
epoch: 107, batch: 1700 // loss: 0.045
epoch: 107, batch: 1800 // loss: 0.053
epoch: 107, batch: 1900 // loss: 0.045
epoch: 107, batch: 2000 // loss: 0.047
epoch: 107, batch: 2100 // loss: 0.046
epoch: 107, batch: 2200 // loss: 0.050
epoch: 107, batch: 2300 // loss: 0.049
epoch: 107, batch: 2400 // loss: 0.042
epoch: 107, batch: 2500 // loss: 0.042
epoch: 107, batch: 2600 // loss: 0.045
epoch: 107, batch: 2700 // loss: 0.042
epoch: 107, batch: 2800 // loss: 0.046
epoch: 107, batch: 2900 // loss: 0.042
epoch: 107, batch: 3000 // loss: 0.045
epoch: 107, batch: 3100 // loss: 0.043
epoch: 107, batch: 3200 // loss: 0.039
epoch: 107, batch: 3300 // loss: 0.038
epoch: 107, batch: 3400 // loss: 0.044
epoch: 107, batch: 3500 // loss: 0.035
epoch: 107, batch: 3600 // loss: 0.043
epoch: 107, batch: 3700 // loss: 0.043

epoch: 108, batch: 0 // loss: 0.053
epoch: 108, batch: 100 // loss: 0.046
epoch: 108, batch: 200 // loss: 0.043
epoch: 108, batch: 300 // loss: 0.049
epoch: 108, batch: 400 // loss: 0.046
epoch: 108, batch: 500 // loss: 0.040
epoch: 108, batch: 600 // loss: 0.040
epoch: 108, batch: 700 // loss: 0.044
epoch: 108, batch: 800 // loss: 0.042
epoch: 108, batch: 900 // loss: 0.049
epoch: 108, batch: 1000 // loss: 0.046
epoch: 108, batch: 1100 // loss: 0.042
epoch: 108, batch: 1200 // loss: 0.044
epoch: 108, batch: 1300 // loss: 0.046
epoch: 108, batch: 1400 // loss: 0.043
epoch: 108, batch: 1500 // loss: 0.048
epoch: 108, batch: 1600 // loss: 0.052
epoch: 108, batch: 1700 // loss: 0.045
epoch: 108, batch: 1800 // loss: 0.053
epoch: 108, batch: 1900 // loss: 0.045
epoch: 108, batch: 2000 // loss: 0.047
epoch: 108, batch: 2100 // loss: 0.046
epoch: 108, batch: 2200 // loss: 0.050
epoch: 108, batch: 2300 // loss: 0.049
epoch: 108, batch: 2400 // loss: 0.042
epoch: 108, batch: 2500 // loss: 0.042
epoch: 108, batch: 2600 // loss: 0.045
epoch: 108, batch: 2700 // loss: 0.042
epoch: 108, batch: 2800 // loss: 0.046
epoch: 108, batch: 2900 // loss: 0.042
epoch: 108, batch: 3000 // loss: 0.045
epoch: 108, batch: 3100 // loss: 0.043
epoch: 108, batch: 3200 // loss: 0.039
epoch: 108, batch: 3300 // loss: 0.038
epoch: 108, batch: 3400 // loss: 0.044
epoch: 108, batch: 3500 // loss: 0.035
epoch: 108, batch: 3600 // loss: 0.043
epoch: 108, batch: 3700 // loss: 0.043

epoch: 109, batch: 0 // loss: 0.053
epoch: 109, batch: 100 // loss: 0.046
epoch: 109, batch: 200 // loss: 0.043
epoch: 109, batch: 300 // loss: 0.049
epoch: 109, batch: 400 // loss: 0.046
epoch: 109, batch: 500 // loss: 0.040
epoch: 109, batch: 600 // loss: 0.040
epoch: 109, batch: 700 // loss: 0.044
epoch: 109, batch: 800 // loss: 0.042
epoch: 109, batch: 900 // loss: 0.049
epoch: 109, batch: 1000 // loss: 0.046
epoch: 109, batch: 1100 // loss: 0.042
epoch: 109, batch: 1200 // loss: 0.044
epoch: 109, batch: 1300 // loss: 0.046
epoch: 109, batch: 1400 // loss: 0.043
epoch: 109, batch: 1500 // loss: 0.048
epoch: 109, batch: 1600 // loss: 0.052
epoch: 109, batch: 1700 // loss: 0.045
epoch: 109, batch: 1800 // loss: 0.053
epoch: 109, batch: 1900 // loss: 0.045
epoch: 109, batch: 2000 // loss: 0.047
epoch: 109, batch: 2100 // loss: 0.046
epoch: 109, batch: 2200 // loss: 0.050
epoch: 109, batch: 2300 // loss: 0.049
epoch: 109, batch: 2400 // loss: 0.042
epoch: 109, batch: 2500 // loss: 0.042
epoch: 109, batch: 2600 // loss: 0.045
epoch: 109, batch: 2700 // loss: 0.042
epoch: 109, batch: 2800 // loss: 0.046
epoch: 109, batch: 2900 // loss: 0.042
epoch: 109, batch: 3000 // loss: 0.045
epoch: 109, batch: 3100 // loss: 0.043
epoch: 109, batch: 3200 // loss: 0.039
epoch: 109, batch: 3300 // loss: 0.038
epoch: 109, batch: 3400 // loss: 0.044
epoch: 109, batch: 3500 // loss: 0.035
epoch: 109, batch: 3600 // loss: 0.043
epoch: 109, batch: 3700 // loss: 0.043

epoch: 110, batch: 0 // loss: 0.053
epoch: 110, batch: 100 // loss: 0.046
epoch: 110, batch: 200 // loss: 0.043
epoch: 110, batch: 300 // loss: 0.049
epoch: 110, batch: 400 // loss: 0.046
epoch: 110, batch: 500 // loss: 0.040
epoch: 110, batch: 600 // loss: 0.040
epoch: 110, batch: 700 // loss: 0.044
epoch: 110, batch: 800 // loss: 0.042
epoch: 110, batch: 900 // loss: 0.049
epoch: 110, batch: 1000 // loss: 0.046
epoch: 110, batch: 1100 // loss: 0.042
epoch: 110, batch: 1200 // loss: 0.044
epoch: 110, batch: 1300 // loss: 0.046
epoch: 110, batch: 1400 // loss: 0.043
epoch: 110, batch: 1500 // loss: 0.048
epoch: 110, batch: 1600 // loss: 0.052
epoch: 110, batch: 1700 // loss: 0.045
epoch: 110, batch: 1800 // loss: 0.053
epoch: 110, batch: 1900 // loss: 0.045
epoch: 110, batch: 2000 // loss: 0.047
epoch: 110, batch: 2100 // loss: 0.046
epoch: 110, batch: 2200 // loss: 0.050
epoch: 110, batch: 2300 // loss: 0.049
epoch: 110, batch: 2400 // loss: 0.042
epoch: 110, batch: 2500 // loss: 0.042
epoch: 110, batch: 2600 // loss: 0.045
epoch: 110, batch: 2700 // loss: 0.042
epoch: 110, batch: 2800 // loss: 0.046
epoch: 110, batch: 2900 // loss: 0.042
epoch: 110, batch: 3000 // loss: 0.045
epoch: 110, batch: 3100 // loss: 0.043
epoch: 110, batch: 3200 // loss: 0.039
epoch: 110, batch: 3300 // loss: 0.038
epoch: 110, batch: 3400 // loss: 0.044
epoch: 110, batch: 3500 // loss: 0.035
epoch: 110, batch: 3600 // loss: 0.043
epoch: 110, batch: 3700 // loss: 0.043

epoch: 111, batch: 0 // loss: 0.053
epoch: 111, batch: 100 // loss: 0.046
epoch: 111, batch: 200 // loss: 0.043
epoch: 111, batch: 300 // loss: 0.049
epoch: 111, batch: 400 // loss: 0.046
epoch: 111, batch: 500 // loss: 0.040
epoch: 111, batch: 600 // loss: 0.040
epoch: 111, batch: 700 // loss: 0.044
epoch: 111, batch: 800 // loss: 0.042
epoch: 111, batch: 900 // loss: 0.049
epoch: 111, batch: 1000 // loss: 0.046
epoch: 111, batch: 1100 // loss: 0.042
epoch: 111, batch: 1200 // loss: 0.044
epoch: 111, batch: 1300 // loss: 0.046
epoch: 111, batch: 1400 // loss: 0.043
epoch: 111, batch: 1500 // loss: 0.048
epoch: 111, batch: 1600 // loss: 0.052
epoch: 111, batch: 1700 // loss: 0.045
epoch: 111, batch: 1800 // loss: 0.053
epoch: 111, batch: 1900 // loss: 0.045
epoch: 111, batch: 2000 // loss: 0.047
epoch: 111, batch: 2100 // loss: 0.046
epoch: 111, batch: 2200 // loss: 0.050
epoch: 111, batch: 2300 // loss: 0.049
epoch: 111, batch: 2400 // loss: 0.042
epoch: 111, batch: 2500 // loss: 0.042
epoch: 111, batch: 2600 // loss: 0.045
epoch: 111, batch: 2700 // loss: 0.042
epoch: 111, batch: 2800 // loss: 0.046
epoch: 111, batch: 2900 // loss: 0.042
epoch: 111, batch: 3000 // loss: 0.045
epoch: 111, batch: 3100 // loss: 0.043
epoch: 111, batch: 3200 // loss: 0.039
epoch: 111, batch: 3300 // loss: 0.038
epoch: 111, batch: 3400 // loss: 0.044
epoch: 111, batch: 3500 // loss: 0.035
epoch: 111, batch: 3600 // loss: 0.043
epoch: 111, batch: 3700 // loss: 0.043

epoch: 112, batch: 0 // loss: 0.053
epoch: 112, batch: 100 // loss: 0.046
epoch: 112, batch: 200 // loss: 0.043
epoch: 112, batch: 300 // loss: 0.049
epoch: 112, batch: 400 // loss: 0.046
epoch: 112, batch: 500 // loss: 0.040
epoch: 112, batch: 600 // loss: 0.040
epoch: 112, batch: 700 // loss: 0.044
epoch: 112, batch: 800 // loss: 0.042
epoch: 112, batch: 900 // loss: 0.049
epoch: 112, batch: 1000 // loss: 0.046
epoch: 112, batch: 1100 // loss: 0.042
epoch: 112, batch: 1200 // loss: 0.044
epoch: 112, batch: 1300 // loss: 0.046
epoch: 112, batch: 1400 // loss: 0.043
epoch: 112, batch: 1500 // loss: 0.048
epoch: 112, batch: 1600 // loss: 0.052
epoch: 112, batch: 1700 // loss: 0.045
epoch: 112, batch: 1800 // loss: 0.053
epoch: 112, batch: 1900 // loss: 0.045
epoch: 112, batch: 2000 // loss: 0.047
epoch: 112, batch: 2100 // loss: 0.046
epoch: 112, batch: 2200 // loss: 0.050
epoch: 112, batch: 2300 // loss: 0.049
epoch: 112, batch: 2400 // loss: 0.042
epoch: 112, batch: 2500 // loss: 0.042
epoch: 112, batch: 2600 // loss: 0.045
epoch: 112, batch: 2700 // loss: 0.042
epoch: 112, batch: 2800 // loss: 0.046
epoch: 112, batch: 2900 // loss: 0.042
epoch: 112, batch: 3000 // loss: 0.045
epoch: 112, batch: 3100 // loss: 0.043
epoch: 112, batch: 3200 // loss: 0.039
epoch: 112, batch: 3300 // loss: 0.038
epoch: 112, batch: 3400 // loss: 0.044
epoch: 112, batch: 3500 // loss: 0.035
epoch: 112, batch: 3600 // loss: 0.043
epoch: 112, batch: 3700 // loss: 0.043

epoch: 113, batch: 0 // loss: 0.053
epoch: 113, batch: 100 // loss: 0.046
epoch: 113, batch: 200 // loss: 0.043
epoch: 113, batch: 300 // loss: 0.049
epoch: 113, batch: 400 // loss: 0.046
epoch: 113, batch: 500 // loss: 0.040
epoch: 113, batch: 600 // loss: 0.040
epoch: 113, batch: 700 // loss: 0.044
epoch: 113, batch: 800 // loss: 0.042
epoch: 113, batch: 900 // loss: 0.049
epoch: 113, batch: 1000 // loss: 0.046
epoch: 113, batch: 1100 // loss: 0.042
epoch: 113, batch: 1200 // loss: 0.044
epoch: 113, batch: 1300 // loss: 0.046
epoch: 113, batch: 1400 // loss: 0.043
epoch: 113, batch: 1500 // loss: 0.048
epoch: 113, batch: 1600 // loss: 0.052
epoch: 113, batch: 1700 // loss: 0.045
epoch: 113, batch: 1800 // loss: 0.053
epoch: 113, batch: 1900 // loss: 0.045
epoch: 113, batch: 2000 // loss: 0.047
epoch: 113, batch: 2100 // loss: 0.046
epoch: 113, batch: 2200 // loss: 0.050
epoch: 113, batch: 2300 // loss: 0.049
epoch: 113, batch: 2400 // loss: 0.042
epoch: 113, batch: 2500 // loss: 0.042
epoch: 113, batch: 2600 // loss: 0.045
epoch: 113, batch: 2700 // loss: 0.042
epoch: 113, batch: 2800 // loss: 0.046
epoch: 113, batch: 2900 // loss: 0.042
epoch: 113, batch: 3000 // loss: 0.045
epoch: 113, batch: 3100 // loss: 0.043
epoch: 113, batch: 3200 // loss: 0.039
epoch: 113, batch: 3300 // loss: 0.038
epoch: 113, batch: 3400 // loss: 0.044
epoch: 113, batch: 3500 // loss: 0.035
epoch: 113, batch: 3600 // loss: 0.043
epoch: 113, batch: 3700 // loss: 0.043

epoch: 114, batch: 0 // loss: 0.053
epoch: 114, batch: 100 // loss: 0.046
epoch: 114, batch: 200 // loss: 0.043
epoch: 114, batch: 300 // loss: 0.049
epoch: 114, batch: 400 // loss: 0.046
epoch: 114, batch: 500 // loss: 0.040
epoch: 114, batch: 600 // loss: 0.040
epoch: 114, batch: 700 // loss: 0.044
epoch: 114, batch: 800 // loss: 0.042
epoch: 114, batch: 900 // loss: 0.049
epoch: 114, batch: 1000 // loss: 0.046
epoch: 114, batch: 1100 // loss: 0.042
epoch: 114, batch: 1200 // loss: 0.044
epoch: 114, batch: 1300 // loss: 0.046
epoch: 114, batch: 1400 // loss: 0.043
epoch: 114, batch: 1500 // loss: 0.048
epoch: 114, batch: 1600 // loss: 0.052
epoch: 114, batch: 1700 // loss: 0.045
epoch: 114, batch: 1800 // loss: 0.053
epoch: 114, batch: 1900 // loss: 0.045
epoch: 114, batch: 2000 // loss: 0.047
epoch: 114, batch: 2100 // loss: 0.046
epoch: 114, batch: 2200 // loss: 0.050
epoch: 114, batch: 2300 // loss: 0.049
epoch: 114, batch: 2400 // loss: 0.042
epoch: 114, batch: 2500 // loss: 0.042
epoch: 114, batch: 2600 // loss: 0.045
epoch: 114, batch: 2700 // loss: 0.042
epoch: 114, batch: 2800 // loss: 0.046
epoch: 114, batch: 2900 // loss: 0.042
epoch: 114, batch: 3000 // loss: 0.045
epoch: 114, batch: 3100 // loss: 0.043
epoch: 114, batch: 3200 // loss: 0.039
epoch: 114, batch: 3300 // loss: 0.038
epoch: 114, batch: 3400 // loss: 0.044
epoch: 114, batch: 3500 // loss: 0.035
epoch: 114, batch: 3600 // loss: 0.043
epoch: 114, batch: 3700 // loss: 0.043

epoch: 115, batch: 0 // loss: 0.053
epoch: 115, batch: 100 // loss: 0.046
epoch: 115, batch: 200 // loss: 0.043
epoch: 115, batch: 300 // loss: 0.049
epoch: 115, batch: 400 // loss: 0.046
epoch: 115, batch: 500 // loss: 0.040
epoch: 115, batch: 600 // loss: 0.040
epoch: 115, batch: 700 // loss: 0.044
epoch: 115, batch: 800 // loss: 0.042
epoch: 115, batch: 900 // loss: 0.049
epoch: 115, batch: 1000 // loss: 0.046
epoch: 115, batch: 1100 // loss: 0.042
epoch: 115, batch: 1200 // loss: 0.044
epoch: 115, batch: 1300 // loss: 0.046
epoch: 115, batch: 1400 // loss: 0.043
epoch: 115, batch: 1500 // loss: 0.048
epoch: 115, batch: 1600 // loss: 0.052
epoch: 115, batch: 1700 // loss: 0.045
epoch: 115, batch: 1800 // loss: 0.053
epoch: 115, batch: 1900 // loss: 0.045
epoch: 115, batch: 2000 // loss: 0.047
epoch: 115, batch: 2100 // loss: 0.046
epoch: 115, batch: 2200 // loss: 0.050
epoch: 115, batch: 2300 // loss: 0.049
epoch: 115, batch: 2400 // loss: 0.042
epoch: 115, batch: 2500 // loss: 0.042
epoch: 115, batch: 2600 // loss: 0.045
epoch: 115, batch: 2700 // loss: 0.042
epoch: 115, batch: 2800 // loss: 0.046
epoch: 115, batch: 2900 // loss: 0.042
epoch: 115, batch: 3000 // loss: 0.045
epoch: 115, batch: 3100 // loss: 0.043
epoch: 115, batch: 3200 // loss: 0.039
epoch: 115, batch: 3300 // loss: 0.038
epoch: 115, batch: 3400 // loss: 0.044
epoch: 115, batch: 3500 // loss: 0.035
epoch: 115, batch: 3600 // loss: 0.043
epoch: 115, batch: 3700 // loss: 0.043

epoch: 116, batch: 0 // loss: 0.053
epoch: 116, batch: 100 // loss: 0.046
epoch: 116, batch: 200 // loss: 0.043
epoch: 116, batch: 300 // loss: 0.049
epoch: 116, batch: 400 // loss: 0.046
epoch: 116, batch: 500 // loss: 0.040
epoch: 116, batch: 600 // loss: 0.040
epoch: 116, batch: 700 // loss: 0.044
epoch: 116, batch: 800 // loss: 0.042
epoch: 116, batch: 900 // loss: 0.049
epoch: 116, batch: 1000 // loss: 0.046
epoch: 116, batch: 1100 // loss: 0.042
epoch: 116, batch: 1200 // loss: 0.044
epoch: 116, batch: 1300 // loss: 0.046
epoch: 116, batch: 1400 // loss: 0.043
epoch: 116, batch: 1500 // loss: 0.048
epoch: 116, batch: 1600 // loss: 0.052
epoch: 116, batch: 1700 // loss: 0.045
epoch: 116, batch: 1800 // loss: 0.053
epoch: 116, batch: 1900 // loss: 0.045
epoch: 116, batch: 2000 // loss: 0.047
epoch: 116, batch: 2100 // loss: 0.046
epoch: 116, batch: 2200 // loss: 0.050
epoch: 116, batch: 2300 // loss: 0.049
epoch: 116, batch: 2400 // loss: 0.042
epoch: 116, batch: 2500 // loss: 0.042
epoch: 116, batch: 2600 // loss: 0.045
epoch: 116, batch: 2700 // loss: 0.042
epoch: 116, batch: 2800 // loss: 0.046
epoch: 116, batch: 2900 // loss: 0.042
epoch: 116, batch: 3000 // loss: 0.045
epoch: 116, batch: 3100 // loss: 0.043
epoch: 116, batch: 3200 // loss: 0.039
epoch: 116, batch: 3300 // loss: 0.038
epoch: 116, batch: 3400 // loss: 0.044
epoch: 116, batch: 3500 // loss: 0.035
epoch: 116, batch: 3600 // loss: 0.043
epoch: 116, batch: 3700 // loss: 0.043

epoch: 117, batch: 0 // loss: 0.053
epoch: 117, batch: 100 // loss: 0.046
epoch: 117, batch: 200 // loss: 0.043
epoch: 117, batch: 300 // loss: 0.049
epoch: 117, batch: 400 // loss: 0.046
epoch: 117, batch: 500 // loss: 0.040
epoch: 117, batch: 600 // loss: 0.040
epoch: 117, batch: 700 // loss: 0.044
epoch: 117, batch: 800 // loss: 0.042
epoch: 117, batch: 900 // loss: 0.049
epoch: 117, batch: 1000 // loss: 0.046
epoch: 117, batch: 1100 // loss: 0.042
epoch: 117, batch: 1200 // loss: 0.044
epoch: 117, batch: 1300 // loss: 0.046
epoch: 117, batch: 1400 // loss: 0.043
epoch: 117, batch: 1500 // loss: 0.048
epoch: 117, batch: 1600 // loss: 0.052
epoch: 117, batch: 1700 // loss: 0.045
epoch: 117, batch: 1800 // loss: 0.053
epoch: 117, batch: 1900 // loss: 0.045
epoch: 117, batch: 2000 // loss: 0.047
epoch: 117, batch: 2100 // loss: 0.046
epoch: 117, batch: 2200 // loss: 0.050
epoch: 117, batch: 2300 // loss: 0.049
epoch: 117, batch: 2400 // loss: 0.042
epoch: 117, batch: 2500 // loss: 0.042
epoch: 117, batch: 2600 // loss: 0.045
epoch: 117, batch: 2700 // loss: 0.042
epoch: 117, batch: 2800 // loss: 0.046
epoch: 117, batch: 2900 // loss: 0.042
epoch: 117, batch: 3000 // loss: 0.045
epoch: 117, batch: 3100 // loss: 0.043
epoch: 117, batch: 3200 // loss: 0.039
epoch: 117, batch: 3300 // loss: 0.038
epoch: 117, batch: 3400 // loss: 0.044
epoch: 117, batch: 3500 // loss: 0.035
epoch: 117, batch: 3600 // loss: 0.043
epoch: 117, batch: 3700 // loss: 0.043

epoch: 118, batch: 0 // loss: 0.053
epoch: 118, batch: 100 // loss: 0.046
epoch: 118, batch: 200 // loss: 0.043
epoch: 118, batch: 300 // loss: 0.049
epoch: 118, batch: 400 // loss: 0.046
epoch: 118, batch: 500 // loss: 0.040
epoch: 118, batch: 600 // loss: 0.040
epoch: 118, batch: 700 // loss: 0.044
epoch: 118, batch: 800 // loss: 0.042
epoch: 118, batch: 900 // loss: 0.049
epoch: 118, batch: 1000 // loss: 0.046
epoch: 118, batch: 1100 // loss: 0.042
epoch: 118, batch: 1200 // loss: 0.044
epoch: 118, batch: 1300 // loss: 0.046
epoch: 118, batch: 1400 // loss: 0.043
epoch: 118, batch: 1500 // loss: 0.048
epoch: 118, batch: 1600 // loss: 0.052
epoch: 118, batch: 1700 // loss: 0.045
epoch: 118, batch: 1800 // loss: 0.053
epoch: 118, batch: 1900 // loss: 0.045
epoch: 118, batch: 2000 // loss: 0.047
epoch: 118, batch: 2100 // loss: 0.046
epoch: 118, batch: 2200 // loss: 0.050
epoch: 118, batch: 2300 // loss: 0.049
epoch: 118, batch: 2400 // loss: 0.042
epoch: 118, batch: 2500 // loss: 0.042
epoch: 118, batch: 2600 // loss: 0.045
epoch: 118, batch: 2700 // loss: 0.042
epoch: 118, batch: 2800 // loss: 0.046
epoch: 118, batch: 2900 // loss: 0.042
epoch: 118, batch: 3000 // loss: 0.045
epoch: 118, batch: 3100 // loss: 0.043
epoch: 118, batch: 3200 // loss: 0.039
epoch: 118, batch: 3300 // loss: 0.038
epoch: 118, batch: 3400 // loss: 0.044
epoch: 118, batch: 3500 // loss: 0.035
epoch: 118, batch: 3600 // loss: 0.043
epoch: 118, batch: 3700 // loss: 0.043

epoch: 119, batch: 0 // loss: 0.053
epoch: 119, batch: 100 // loss: 0.046
epoch: 119, batch: 200 // loss: 0.043
epoch: 119, batch: 300 // loss: 0.049
epoch: 119, batch: 400 // loss: 0.046
epoch: 119, batch: 500 // loss: 0.040
epoch: 119, batch: 600 // loss: 0.040
epoch: 119, batch: 700 // loss: 0.044
epoch: 119, batch: 800 // loss: 0.042
epoch: 119, batch: 900 // loss: 0.049
epoch: 119, batch: 1000 // loss: 0.046
epoch: 119, batch: 1100 // loss: 0.042
epoch: 119, batch: 1200 // loss: 0.044
epoch: 119, batch: 1300 // loss: 0.046
epoch: 119, batch: 1400 // loss: 0.043
epoch: 119, batch: 1500 // loss: 0.048
epoch: 119, batch: 1600 // loss: 0.052
epoch: 119, batch: 1700 // loss: 0.045
epoch: 119, batch: 1800 // loss: 0.053
epoch: 119, batch: 1900 // loss: 0.045
epoch: 119, batch: 2000 // loss: 0.047
epoch: 119, batch: 2100 // loss: 0.046
epoch: 119, batch: 2200 // loss: 0.050
epoch: 119, batch: 2300 // loss: 0.049
epoch: 119, batch: 2400 // loss: 0.042
epoch: 119, batch: 2500 // loss: 0.042
epoch: 119, batch: 2600 // loss: 0.045
epoch: 119, batch: 2700 // loss: 0.042
epoch: 119, batch: 2800 // loss: 0.046
epoch: 119, batch: 2900 // loss: 0.042
epoch: 119, batch: 3000 // loss: 0.045
epoch: 119, batch: 3100 // loss: 0.043
epoch: 119, batch: 3200 // loss: 0.039
epoch: 119, batch: 3300 // loss: 0.038
epoch: 119, batch: 3400 // loss: 0.044
epoch: 119, batch: 3500 // loss: 0.035
epoch: 119, batch: 3600 // loss: 0.043
epoch: 119, batch: 3700 // loss: 0.043

epoch: 120, batch: 0 // loss: 0.053
epoch: 120, batch: 100 // loss: 0.046
epoch: 120, batch: 200 // loss: 0.043
epoch: 120, batch: 300 // loss: 0.049
epoch: 120, batch: 400 // loss: 0.046
epoch: 120, batch: 500 // loss: 0.040
epoch: 120, batch: 600 // loss: 0.040
epoch: 120, batch: 700 // loss: 0.044
epoch: 120, batch: 800 // loss: 0.042
epoch: 120, batch: 900 // loss: 0.049
epoch: 120, batch: 1000 // loss: 0.046
epoch: 120, batch: 1100 // loss: 0.042
epoch: 120, batch: 1200 // loss: 0.044
epoch: 120, batch: 1300 // loss: 0.046
epoch: 120, batch: 1400 // loss: 0.043
epoch: 120, batch: 1500 // loss: 0.048
epoch: 120, batch: 1600 // loss: 0.052
epoch: 120, batch: 1700 // loss: 0.045
epoch: 120, batch: 1800 // loss: 0.053
epoch: 120, batch: 1900 // loss: 0.045
epoch: 120, batch: 2000 // loss: 0.047
epoch: 120, batch: 2100 // loss: 0.046
epoch: 120, batch: 2200 // loss: 0.050
epoch: 120, batch: 2300 // loss: 0.049
epoch: 120, batch: 2400 // loss: 0.042
epoch: 120, batch: 2500 // loss: 0.042
epoch: 120, batch: 2600 // loss: 0.045
epoch: 120, batch: 2700 // loss: 0.042
epoch: 120, batch: 2800 // loss: 0.046
epoch: 120, batch: 2900 // loss: 0.042
epoch: 120, batch: 3000 // loss: 0.045
epoch: 120, batch: 3100 // loss: 0.043
epoch: 120, batch: 3200 // loss: 0.039
epoch: 120, batch: 3300 // loss: 0.038
epoch: 120, batch: 3400 // loss: 0.044
epoch: 120, batch: 3500 // loss: 0.035
epoch: 120, batch: 3600 // loss: 0.043
epoch: 120, batch: 3700 // loss: 0.043

epoch: 121, batch: 0 // loss: 0.053
epoch: 121, batch: 100 // loss: 0.046
epoch: 121, batch: 200 // loss: 0.043
epoch: 121, batch: 300 // loss: 0.049
epoch: 121, batch: 400 // loss: 0.046
epoch: 121, batch: 500 // loss: 0.040
epoch: 121, batch: 600 // loss: 0.040
epoch: 121, batch: 700 // loss: 0.044
epoch: 121, batch: 800 // loss: 0.042
epoch: 121, batch: 900 // loss: 0.049
epoch: 121, batch: 1000 // loss: 0.046
epoch: 121, batch: 1100 // loss: 0.042
epoch: 121, batch: 1200 // loss: 0.044
epoch: 121, batch: 1300 // loss: 0.046
epoch: 121, batch: 1400 // loss: 0.043
epoch: 121, batch: 1500 // loss: 0.048
epoch: 121, batch: 1600 // loss: 0.052
epoch: 121, batch: 1700 // loss: 0.045
epoch: 121, batch: 1800 // loss: 0.053
epoch: 121, batch: 1900 // loss: 0.045
epoch: 121, batch: 2000 // loss: 0.047
epoch: 121, batch: 2100 // loss: 0.046
epoch: 121, batch: 2200 // loss: 0.050
epoch: 121, batch: 2300 // loss: 0.049
epoch: 121, batch: 2400 // loss: 0.042
epoch: 121, batch: 2500 // loss: 0.042
epoch: 121, batch: 2600 // loss: 0.045
epoch: 121, batch: 2700 // loss: 0.042
epoch: 121, batch: 2800 // loss: 0.046
epoch: 121, batch: 2900 // loss: 0.042
epoch: 121, batch: 3000 // loss: 0.045
epoch: 121, batch: 3100 // loss: 0.043
epoch: 121, batch: 3200 // loss: 0.039
epoch: 121, batch: 3300 // loss: 0.038
epoch: 121, batch: 3400 // loss: 0.044
epoch: 121, batch: 3500 // loss: 0.035
epoch: 121, batch: 3600 // loss: 0.043
epoch: 121, batch: 3700 // loss: 0.043

epoch: 122, batch: 0 // loss: 0.053
epoch: 122, batch: 100 // loss: 0.046
epoch: 122, batch: 200 // loss: 0.043
epoch: 122, batch: 300 // loss: 0.049
epoch: 122, batch: 400 // loss: 0.046
epoch: 122, batch: 500 // loss: 0.040
epoch: 122, batch: 600 // loss: 0.040
epoch: 122, batch: 700 // loss: 0.044
epoch: 122, batch: 800 // loss: 0.042
epoch: 122, batch: 900 // loss: 0.049
epoch: 122, batch: 1000 // loss: 0.046
epoch: 122, batch: 1100 // loss: 0.042
epoch: 122, batch: 1200 // loss: 0.044
epoch: 122, batch: 1300 // loss: 0.046
epoch: 122, batch: 1400 // loss: 0.043
epoch: 122, batch: 1500 // loss: 0.048
epoch: 122, batch: 1600 // loss: 0.052
epoch: 122, batch: 1700 // loss: 0.045
epoch: 122, batch: 1800 // loss: 0.053
epoch: 122, batch: 1900 // loss: 0.045
epoch: 122, batch: 2000 // loss: 0.047
epoch: 122, batch: 2100 // loss: 0.046
epoch: 122, batch: 2200 // loss: 0.050
epoch: 122, batch: 2300 // loss: 0.049
epoch: 122, batch: 2400 // loss: 0.042
epoch: 122, batch: 2500 // loss: 0.042
epoch: 122, batch: 2600 // loss: 0.045
epoch: 122, batch: 2700 // loss: 0.042
epoch: 122, batch: 2800 // loss: 0.046
epoch: 122, batch: 2900 // loss: 0.042
epoch: 122, batch: 3000 // loss: 0.045
epoch: 122, batch: 3100 // loss: 0.043
epoch: 122, batch: 3200 // loss: 0.039
epoch: 122, batch: 3300 // loss: 0.038
epoch: 122, batch: 3400 // loss: 0.044
epoch: 122, batch: 3500 // loss: 0.035
epoch: 122, batch: 3600 // loss: 0.043
epoch: 122, batch: 3700 // loss: 0.043

epoch: 123, batch: 0 // loss: 0.053
epoch: 123, batch: 100 // loss: 0.046
epoch: 123, batch: 200 // loss: 0.043
epoch: 123, batch: 300 // loss: 0.049
epoch: 123, batch: 400 // loss: 0.046
epoch: 123, batch: 500 // loss: 0.040
epoch: 123, batch: 600 // loss: 0.040
epoch: 123, batch: 700 // loss: 0.044
epoch: 123, batch: 800 // loss: 0.042
epoch: 123, batch: 900 // loss: 0.049
epoch: 123, batch: 1000 // loss: 0.046
epoch: 123, batch: 1100 // loss: 0.042
epoch: 123, batch: 1200 // loss: 0.044
epoch: 123, batch: 1300 // loss: 0.046
epoch: 123, batch: 1400 // loss: 0.043
epoch: 123, batch: 1500 // loss: 0.048
epoch: 123, batch: 1600 // loss: 0.052
epoch: 123, batch: 1700 // loss: 0.045
epoch: 123, batch: 1800 // loss: 0.053
epoch: 123, batch: 1900 // loss: 0.045
epoch: 123, batch: 2000 // loss: 0.047
epoch: 123, batch: 2100 // loss: 0.046
epoch: 123, batch: 2200 // loss: 0.050
epoch: 123, batch: 2300 // loss: 0.049
epoch: 123, batch: 2400 // loss: 0.042
epoch: 123, batch: 2500 // loss: 0.042
epoch: 123, batch: 2600 // loss: 0.045
epoch: 123, batch: 2700 // loss: 0.042
epoch: 123, batch: 2800 // loss: 0.046
epoch: 123, batch: 2900 // loss: 0.042
epoch: 123, batch: 3000 // loss: 0.045
epoch: 123, batch: 3100 // loss: 0.043
epoch: 123, batch: 3200 // loss: 0.039
epoch: 123, batch: 3300 // loss: 0.038
epoch: 123, batch: 3400 // loss: 0.044
epoch: 123, batch: 3500 // loss: 0.035
epoch: 123, batch: 3600 // loss: 0.043
epoch: 123, batch: 3700 // loss: 0.043

epoch: 124, batch: 0 // loss: 0.053
epoch: 124, batch: 100 // loss: 0.046
epoch: 124, batch: 200 // loss: 0.043
epoch: 124, batch: 300 // loss: 0.049
epoch: 124, batch: 400 // loss: 0.046
epoch: 124, batch: 500 // loss: 0.040
epoch: 124, batch: 600 // loss: 0.040
epoch: 124, batch: 700 // loss: 0.044
epoch: 124, batch: 800 // loss: 0.042
epoch: 124, batch: 900 // loss: 0.049
epoch: 124, batch: 1000 // loss: 0.046
epoch: 124, batch: 1100 // loss: 0.042
epoch: 124, batch: 1200 // loss: 0.044
epoch: 124, batch: 1300 // loss: 0.046
epoch: 124, batch: 1400 // loss: 0.043
epoch: 124, batch: 1500 // loss: 0.048
epoch: 124, batch: 1600 // loss: 0.052
epoch: 124, batch: 1700 // loss: 0.045
epoch: 124, batch: 1800 // loss: 0.053
epoch: 124, batch: 1900 // loss: 0.045
epoch: 124, batch: 2000 // loss: 0.047
epoch: 124, batch: 2100 // loss: 0.046
epoch: 124, batch: 2200 // loss: 0.050
epoch: 124, batch: 2300 // loss: 0.049
epoch: 124, batch: 2400 // loss: 0.042
epoch: 124, batch: 2500 // loss: 0.042
epoch: 124, batch: 2600 // loss: 0.045
epoch: 124, batch: 2700 // loss: 0.042
epoch: 124, batch: 2800 // loss: 0.046
epoch: 124, batch: 2900 // loss: 0.042
epoch: 124, batch: 3000 // loss: 0.045
epoch: 124, batch: 3100 // loss: 0.043
epoch: 124, batch: 3200 // loss: 0.039
epoch: 124, batch: 3300 // loss: 0.038
epoch: 124, batch: 3400 // loss: 0.044
epoch: 124, batch: 3500 // loss: 0.035
epoch: 124, batch: 3600 // loss: 0.043
epoch: 124, batch: 3700 // loss: 0.043

epoch: 125, batch: 0 // loss: 0.053
epoch: 125, batch: 100 // loss: 0.046
epoch: 125, batch: 200 // loss: 0.043
epoch: 125, batch: 300 // loss: 0.049
epoch: 125, batch: 400 // loss: 0.046
epoch: 125, batch: 500 // loss: 0.040
epoch: 125, batch: 600 // loss: 0.040
epoch: 125, batch: 700 // loss: 0.044
epoch: 125, batch: 800 // loss: 0.042
epoch: 125, batch: 900 // loss: 0.049
epoch: 125, batch: 1000 // loss: 0.046
epoch: 125, batch: 1100 // loss: 0.042
epoch: 125, batch: 1200 // loss: 0.044
epoch: 125, batch: 1300 // loss: 0.046
epoch: 125, batch: 1400 // loss: 0.043
epoch: 125, batch: 1500 // loss: 0.048
epoch: 125, batch: 1600 // loss: 0.052
epoch: 125, batch: 1700 // loss: 0.045
epoch: 125, batch: 1800 // loss: 0.053
epoch: 125, batch: 1900 // loss: 0.045
epoch: 125, batch: 2000 // loss: 0.047
epoch: 125, batch: 2100 // loss: 0.046
epoch: 125, batch: 2200 // loss: 0.050
epoch: 125, batch: 2300 // loss: 0.049
epoch: 125, batch: 2400 // loss: 0.042
epoch: 125, batch: 2500 // loss: 0.042
epoch: 125, batch: 2600 // loss: 0.045
epoch: 125, batch: 2700 // loss: 0.042
epoch: 125, batch: 2800 // loss: 0.046
epoch: 125, batch: 2900 // loss: 0.042
epoch: 125, batch: 3000 // loss: 0.045
epoch: 125, batch: 3100 // loss: 0.043
epoch: 125, batch: 3200 // loss: 0.039
epoch: 125, batch: 3300 // loss: 0.038
epoch: 125, batch: 3400 // loss: 0.044
epoch: 125, batch: 3500 // loss: 0.035
epoch: 125, batch: 3600 // loss: 0.043
epoch: 125, batch: 3700 // loss: 0.043

epoch: 126, batch: 0 // loss: 0.053
epoch: 126, batch: 100 // loss: 0.046
epoch: 126, batch: 200 // loss: 0.043
epoch: 126, batch: 300 // loss: 0.049
epoch: 126, batch: 400 // loss: 0.046
epoch: 126, batch: 500 // loss: 0.040
epoch: 126, batch: 600 // loss: 0.040
epoch: 126, batch: 700 // loss: 0.044
epoch: 126, batch: 800 // loss: 0.042
epoch: 126, batch: 900 // loss: 0.049
epoch: 126, batch: 1000 // loss: 0.046
epoch: 126, batch: 1100 // loss: 0.042
epoch: 126, batch: 1200 // loss: 0.044
epoch: 126, batch: 1300 // loss: 0.046
epoch: 126, batch: 1400 // loss: 0.043
epoch: 126, batch: 1500 // loss: 0.048
epoch: 126, batch: 1600 // loss: 0.052
epoch: 126, batch: 1700 // loss: 0.045
epoch: 126, batch: 1800 // loss: 0.053
epoch: 126, batch: 1900 // loss: 0.045
epoch: 126, batch: 2000 // loss: 0.047
epoch: 126, batch: 2100 // loss: 0.046
epoch: 126, batch: 2200 // loss: 0.050
epoch: 126, batch: 2300 // loss: 0.049
epoch: 126, batch: 2400 // loss: 0.042
epoch: 126, batch: 2500 // loss: 0.042
epoch: 126, batch: 2600 // loss: 0.045
epoch: 126, batch: 2700 // loss: 0.042
epoch: 126, batch: 2800 // loss: 0.046
epoch: 126, batch: 2900 // loss: 0.042
epoch: 126, batch: 3000 // loss: 0.045
epoch: 126, batch: 3100 // loss: 0.043
epoch: 126, batch: 3200 // loss: 0.039
epoch: 126, batch: 3300 // loss: 0.038
epoch: 126, batch: 3400 // loss: 0.044
epoch: 126, batch: 3500 // loss: 0.035
epoch: 126, batch: 3600 // loss: 0.043
epoch: 126, batch: 3700 // loss: 0.043

epoch: 127, batch: 0 // loss: 0.053
epoch: 127, batch: 100 // loss: 0.046
epoch: 127, batch: 200 // loss: 0.043
epoch: 127, batch: 300 // loss: 0.049
epoch: 127, batch: 400 // loss: 0.046
epoch: 127, batch: 500 // loss: 0.040
epoch: 127, batch: 600 // loss: 0.040
epoch: 127, batch: 700 // loss: 0.044
epoch: 127, batch: 800 // loss: 0.042
epoch: 127, batch: 900 // loss: 0.049
epoch: 127, batch: 1000 // loss: 0.046
epoch: 127, batch: 1100 // loss: 0.042
epoch: 127, batch: 1200 // loss: 0.044
epoch: 127, batch: 1300 // loss: 0.046
epoch: 127, batch: 1400 // loss: 0.043
epoch: 127, batch: 1500 // loss: 0.048
epoch: 127, batch: 1600 // loss: 0.052
epoch: 127, batch: 1700 // loss: 0.045
epoch: 127, batch: 1800 // loss: 0.053
epoch: 127, batch: 1900 // loss: 0.045
epoch: 127, batch: 2000 // loss: 0.047
epoch: 127, batch: 2100 // loss: 0.046
epoch: 127, batch: 2200 // loss: 0.050
epoch: 127, batch: 2300 // loss: 0.049
epoch: 127, batch: 2400 // loss: 0.042
epoch: 127, batch: 2500 // loss: 0.042
epoch: 127, batch: 2600 // loss: 0.045
epoch: 127, batch: 2700 // loss: 0.042
epoch: 127, batch: 2800 // loss: 0.046
epoch: 127, batch: 2900 // loss: 0.042
epoch: 127, batch: 3000 // loss: 0.045
epoch: 127, batch: 3100 // loss: 0.043
epoch: 127, batch: 3200 // loss: 0.039
epoch: 127, batch: 3300 // loss: 0.038
epoch: 127, batch: 3400 // loss: 0.044
epoch: 127, batch: 3500 // loss: 0.035
epoch: 127, batch: 3600 // loss: 0.043
epoch: 127, batch: 3700 // loss: 0.043

epoch: 128, batch: 0 // loss: 0.053
epoch: 128, batch: 100 // loss: 0.046
epoch: 128, batch: 200 // loss: 0.043
epoch: 128, batch: 300 // loss: 0.049
epoch: 128, batch: 400 // loss: 0.046
epoch: 128, batch: 500 // loss: 0.040
epoch: 128, batch: 600 // loss: 0.040
epoch: 128, batch: 700 // loss: 0.044
epoch: 128, batch: 800 // loss: 0.042
epoch: 128, batch: 900 // loss: 0.049
epoch: 128, batch: 1000 // loss: 0.046
epoch: 128, batch: 1100 // loss: 0.042
epoch: 128, batch: 1200 // loss: 0.044
epoch: 128, batch: 1300 // loss: 0.046
epoch: 128, batch: 1400 // loss: 0.043
epoch: 128, batch: 1500 // loss: 0.048
epoch: 128, batch: 1600 // loss: 0.052
epoch: 128, batch: 1700 // loss: 0.045
epoch: 128, batch: 1800 // loss: 0.053
epoch: 128, batch: 1900 // loss: 0.045
epoch: 128, batch: 2000 // loss: 0.047
epoch: 128, batch: 2100 // loss: 0.046
epoch: 128, batch: 2200 // loss: 0.050
epoch: 128, batch: 2300 // loss: 0.049
epoch: 128, batch: 2400 // loss: 0.042
epoch: 128, batch: 2500 // loss: 0.042
epoch: 128, batch: 2600 // loss: 0.045
epoch: 128, batch: 2700 // loss: 0.042
epoch: 128, batch: 2800 // loss: 0.046
epoch: 128, batch: 2900 // loss: 0.042
epoch: 128, batch: 3000 // loss: 0.045
epoch: 128, batch: 3100 // loss: 0.043
epoch: 128, batch: 3200 // loss: 0.039
epoch: 128, batch: 3300 // loss: 0.038
epoch: 128, batch: 3400 // loss: 0.044
epoch: 128, batch: 3500 // loss: 0.035
epoch: 128, batch: 3600 // loss: 0.043
epoch: 128, batch: 3700 // loss: 0.043

epoch: 129, batch: 0 // loss: 0.053
epoch: 129, batch: 100 // loss: 0.046
epoch: 129, batch: 200 // loss: 0.043
epoch: 129, batch: 300 // loss: 0.049
epoch: 129, batch: 400 // loss: 0.046
epoch: 129, batch: 500 // loss: 0.040
epoch: 129, batch: 600 // loss: 0.040
epoch: 129, batch: 700 // loss: 0.044
epoch: 129, batch: 800 // loss: 0.042
epoch: 129, batch: 900 // loss: 0.049
epoch: 129, batch: 1000 // loss: 0.046
epoch: 129, batch: 1100 // loss: 0.042
epoch: 129, batch: 1200 // loss: 0.044
epoch: 129, batch: 1300 // loss: 0.046
epoch: 129, batch: 1400 // loss: 0.043
epoch: 129, batch: 1500 // loss: 0.048
epoch: 129, batch: 1600 // loss: 0.052
epoch: 129, batch: 1700 // loss: 0.045
epoch: 129, batch: 1800 // loss: 0.053
epoch: 129, batch: 1900 // loss: 0.045
epoch: 129, batch: 2000 // loss: 0.047
epoch: 129, batch: 2100 // loss: 0.046
epoch: 129, batch: 2200 // loss: 0.050
epoch: 129, batch: 2300 // loss: 0.049
epoch: 129, batch: 2400 // loss: 0.042
epoch: 129, batch: 2500 // loss: 0.042
epoch: 129, batch: 2600 // loss: 0.045
epoch: 129, batch: 2700 // loss: 0.042
epoch: 129, batch: 2800 // loss: 0.046
epoch: 129, batch: 2900 // loss: 0.042
epoch: 129, batch: 3000 // loss: 0.045
epoch: 129, batch: 3100 // loss: 0.043
epoch: 129, batch: 3200 // loss: 0.039
epoch: 129, batch: 3300 // loss: 0.038
epoch: 129, batch: 3400 // loss: 0.044
epoch: 129, batch: 3500 // loss: 0.035
epoch: 129, batch: 3600 // loss: 0.043
epoch: 129, batch: 3700 // loss: 0.043

epoch: 130, batch: 0 // loss: 0.053
epoch: 130, batch: 100 // loss: 0.046
epoch: 130, batch: 200 // loss: 0.043
epoch: 130, batch: 300 // loss: 0.049
epoch: 130, batch: 400 // loss: 0.046
epoch: 130, batch: 500 // loss: 0.040
epoch: 130, batch: 600 // loss: 0.040
epoch: 130, batch: 700 // loss: 0.044
epoch: 130, batch: 800 // loss: 0.042
epoch: 130, batch: 900 // loss: 0.049
epoch: 130, batch: 1000 // loss: 0.046
epoch: 130, batch: 1100 // loss: 0.042
epoch: 130, batch: 1200 // loss: 0.044
epoch: 130, batch: 1300 // loss: 0.046
epoch: 130, batch: 1400 // loss: 0.043
epoch: 130, batch: 1500 // loss: 0.048
epoch: 130, batch: 1600 // loss: 0.052
epoch: 130, batch: 1700 // loss: 0.045
epoch: 130, batch: 1800 // loss: 0.053
epoch: 130, batch: 1900 // loss: 0.045
epoch: 130, batch: 2000 // loss: 0.047
epoch: 130, batch: 2100 // loss: 0.046
epoch: 130, batch: 2200 // loss: 0.050
epoch: 130, batch: 2300 // loss: 0.049
epoch: 130, batch: 2400 // loss: 0.042
epoch: 130, batch: 2500 // loss: 0.042
epoch: 130, batch: 2600 // loss: 0.045
epoch: 130, batch: 2700 // loss: 0.042
epoch: 130, batch: 2800 // loss: 0.046
epoch: 130, batch: 2900 // loss: 0.042
epoch: 130, batch: 3000 // loss: 0.045
epoch: 130, batch: 3100 // loss: 0.043
epoch: 130, batch: 3200 // loss: 0.039
epoch: 130, batch: 3300 // loss: 0.038
epoch: 130, batch: 3400 // loss: 0.044
epoch: 130, batch: 3500 // loss: 0.035
epoch: 130, batch: 3600 // loss: 0.043
epoch: 130, batch: 3700 // loss: 0.043

epoch: 131, batch: 0 // loss: 0.053
epoch: 131, batch: 100 // loss: 0.046
epoch: 131, batch: 200 // loss: 0.043
epoch: 131, batch: 300 // loss: 0.049
epoch: 131, batch: 400 // loss: 0.046
epoch: 131, batch: 500 // loss: 0.040
epoch: 131, batch: 600 // loss: 0.040
epoch: 131, batch: 700 // loss: 0.044
epoch: 131, batch: 800 // loss: 0.042
epoch: 131, batch: 900 // loss: 0.049
epoch: 131, batch: 1000 // loss: 0.046
epoch: 131, batch: 1100 // loss: 0.042
epoch: 131, batch: 1200 // loss: 0.044
epoch: 131, batch: 1300 // loss: 0.046
epoch: 131, batch: 1400 // loss: 0.043
epoch: 131, batch: 1500 // loss: 0.048
epoch: 131, batch: 1600 // loss: 0.052
epoch: 131, batch: 1700 // loss: 0.045
epoch: 131, batch: 1800 // loss: 0.053
epoch: 131, batch: 1900 // loss: 0.045
epoch: 131, batch: 2000 // loss: 0.047
epoch: 131, batch: 2100 // loss: 0.046
epoch: 131, batch: 2200 // loss: 0.050
epoch: 131, batch: 2300 // loss: 0.049
epoch: 131, batch: 2400 // loss: 0.042
epoch: 131, batch: 2500 // loss: 0.042
epoch: 131, batch: 2600 // loss: 0.045
epoch: 131, batch: 2700 // loss: 0.042
epoch: 131, batch: 2800 // loss: 0.046
epoch: 131, batch: 2900 // loss: 0.042
epoch: 131, batch: 3000 // loss: 0.045
epoch: 131, batch: 3100 // loss: 0.043
epoch: 131, batch: 3200 // loss: 0.039
epoch: 131, batch: 3300 // loss: 0.038
epoch: 131, batch: 3400 // loss: 0.044
epoch: 131, batch: 3500 // loss: 0.035
epoch: 131, batch: 3600 // loss: 0.043
epoch: 131, batch: 3700 // loss: 0.043

epoch: 132, batch: 0 // loss: 0.053
epoch: 132, batch: 100 // loss: 0.046
epoch: 132, batch: 200 // loss: 0.043
epoch: 132, batch: 300 // loss: 0.049
epoch: 132, batch: 400 // loss: 0.046
epoch: 132, batch: 500 // loss: 0.040
epoch: 132, batch: 600 // loss: 0.040
epoch: 132, batch: 700 // loss: 0.044
epoch: 132, batch: 800 // loss: 0.042
epoch: 132, batch: 900 // loss: 0.049
epoch: 132, batch: 1000 // loss: 0.046
epoch: 132, batch: 1100 // loss: 0.042
epoch: 132, batch: 1200 // loss: 0.044
epoch: 132, batch: 1300 // loss: 0.046
epoch: 132, batch: 1400 // loss: 0.043
epoch: 132, batch: 1500 // loss: 0.048
epoch: 132, batch: 1600 // loss: 0.052
epoch: 132, batch: 1700 // loss: 0.045
epoch: 132, batch: 1800 // loss: 0.053
epoch: 132, batch: 1900 // loss: 0.045
epoch: 132, batch: 2000 // loss: 0.047
epoch: 132, batch: 2100 // loss: 0.046
epoch: 132, batch: 2200 // loss: 0.050
epoch: 132, batch: 2300 // loss: 0.049
epoch: 132, batch: 2400 // loss: 0.042
epoch: 132, batch: 2500 // loss: 0.042
epoch: 132, batch: 2600 // loss: 0.045
epoch: 132, batch: 2700 // loss: 0.042
epoch: 132, batch: 2800 // loss: 0.046
epoch: 132, batch: 2900 // loss: 0.042
epoch: 132, batch: 3000 // loss: 0.045
epoch: 132, batch: 3100 // loss: 0.043
epoch: 132, batch: 3200 // loss: 0.039
epoch: 132, batch: 3300 // loss: 0.038
epoch: 132, batch: 3400 // loss: 0.044
epoch: 132, batch: 3500 // loss: 0.035
epoch: 132, batch: 3600 // loss: 0.043
epoch: 132, batch: 3700 // loss: 0.043

epoch: 133, batch: 0 // loss: 0.053
epoch: 133, batch: 100 // loss: 0.046
epoch: 133, batch: 200 // loss: 0.043
epoch: 133, batch: 300 // loss: 0.049
epoch: 133, batch: 400 // loss: 0.046
epoch: 133, batch: 500 // loss: 0.040
epoch: 133, batch: 600 // loss: 0.040
epoch: 133, batch: 700 // loss: 0.044
epoch: 133, batch: 800 // loss: 0.042
epoch: 133, batch: 900 // loss: 0.049
epoch: 133, batch: 1000 // loss: 0.046
epoch: 133, batch: 1100 // loss: 0.042
epoch: 133, batch: 1200 // loss: 0.044
epoch: 133, batch: 1300 // loss: 0.046
epoch: 133, batch: 1400 // loss: 0.043
epoch: 133, batch: 1500 // loss: 0.048
epoch: 133, batch: 1600 // loss: 0.052
epoch: 133, batch: 1700 // loss: 0.045
epoch: 133, batch: 1800 // loss: 0.053
epoch: 133, batch: 1900 // loss: 0.045
epoch: 133, batch: 2000 // loss: 0.047
epoch: 133, batch: 2100 // loss: 0.046
epoch: 133, batch: 2200 // loss: 0.050
epoch: 133, batch: 2300 // loss: 0.049
epoch: 133, batch: 2400 // loss: 0.042
epoch: 133, batch: 2500 // loss: 0.042
epoch: 133, batch: 2600 // loss: 0.045
epoch: 133, batch: 2700 // loss: 0.042
epoch: 133, batch: 2800 // loss: 0.046
epoch: 133, batch: 2900 // loss: 0.042
epoch: 133, batch: 3000 // loss: 0.045
epoch: 133, batch: 3100 // loss: 0.043
epoch: 133, batch: 3200 // loss: 0.039
epoch: 133, batch: 3300 // loss: 0.038
epoch: 133, batch: 3400 // loss: 0.044
epoch: 133, batch: 3500 // loss: 0.035
epoch: 133, batch: 3600 // loss: 0.043
epoch: 133, batch: 3700 // loss: 0.043

epoch: 134, batch: 0 // loss: 0.053
epoch: 134, batch: 100 // loss: 0.046
epoch: 134, batch: 200 // loss: 0.043
epoch: 134, batch: 300 // loss: 0.049
epoch: 134, batch: 400 // loss: 0.046
epoch: 134, batch: 500 // loss: 0.040
epoch: 134, batch: 600 // loss: 0.040
epoch: 134, batch: 700 // loss: 0.044
epoch: 134, batch: 800 // loss: 0.042
epoch: 134, batch: 900 // loss: 0.049
epoch: 134, batch: 1000 // loss: 0.046
epoch: 134, batch: 1100 // loss: 0.042
epoch: 134, batch: 1200 // loss: 0.044
epoch: 134, batch: 1300 // loss: 0.046
epoch: 134, batch: 1400 // loss: 0.043
epoch: 134, batch: 1500 // loss: 0.048
epoch: 134, batch: 1600 // loss: 0.052
epoch: 134, batch: 1700 // loss: 0.045
epoch: 134, batch: 1800 // loss: 0.053
epoch: 134, batch: 1900 // loss: 0.045
epoch: 134, batch: 2000 // loss: 0.047
epoch: 134, batch: 2100 // loss: 0.046
epoch: 134, batch: 2200 // loss: 0.050
epoch: 134, batch: 2300 // loss: 0.049
epoch: 134, batch: 2400 // loss: 0.042
epoch: 134, batch: 2500 // loss: 0.042
epoch: 134, batch: 2600 // loss: 0.045
epoch: 134, batch: 2700 // loss: 0.042
epoch: 134, batch: 2800 // loss: 0.046
epoch: 134, batch: 2900 // loss: 0.042
epoch: 134, batch: 3000 // loss: 0.045
epoch: 134, batch: 3100 // loss: 0.043
epoch: 134, batch: 3200 // loss: 0.039
epoch: 134, batch: 3300 // loss: 0.038
epoch: 134, batch: 3400 // loss: 0.044
epoch: 134, batch: 3500 // loss: 0.035
epoch: 134, batch: 3600 // loss: 0.043
epoch: 134, batch: 3700 // loss: 0.043

epoch: 135, batch: 0 // loss: 0.053
epoch: 135, batch: 100 // loss: 0.046
epoch: 135, batch: 200 // loss: 0.043
epoch: 135, batch: 300 // loss: 0.049
epoch: 135, batch: 400 // loss: 0.046
epoch: 135, batch: 500 // loss: 0.040
epoch: 135, batch: 600 // loss: 0.040
epoch: 135, batch: 700 // loss: 0.044
epoch: 135, batch: 800 // loss: 0.042
epoch: 135, batch: 900 // loss: 0.049
epoch: 135, batch: 1000 // loss: 0.046
epoch: 135, batch: 1100 // loss: 0.042
epoch: 135, batch: 1200 // loss: 0.044
epoch: 135, batch: 1300 // loss: 0.046
epoch: 135, batch: 1400 // loss: 0.043
epoch: 135, batch: 1500 // loss: 0.048
epoch: 135, batch: 1600 // loss: 0.052
epoch: 135, batch: 1700 // loss: 0.045
epoch: 135, batch: 1800 // loss: 0.053
epoch: 135, batch: 1900 // loss: 0.045
epoch: 135, batch: 2000 // loss: 0.047
epoch: 135, batch: 2100 // loss: 0.046
epoch: 135, batch: 2200 // loss: 0.050
epoch: 135, batch: 2300 // loss: 0.049
epoch: 135, batch: 2400 // loss: 0.042
epoch: 135, batch: 2500 // loss: 0.042
epoch: 135, batch: 2600 // loss: 0.045
epoch: 135, batch: 2700 // loss: 0.042
epoch: 135, batch: 2800 // loss: 0.046
epoch: 135, batch: 2900 // loss: 0.042
epoch: 135, batch: 3000 // loss: 0.045
epoch: 135, batch: 3100 // loss: 0.043
epoch: 135, batch: 3200 // loss: 0.039
epoch: 135, batch: 3300 // loss: 0.038
epoch: 135, batch: 3400 // loss: 0.044
epoch: 135, batch: 3500 // loss: 0.035
epoch: 135, batch: 3600 // loss: 0.043
epoch: 135, batch: 3700 // loss: 0.043

epoch: 136, batch: 0 // loss: 0.053
epoch: 136, batch: 100 // loss: 0.046
epoch: 136, batch: 200 // loss: 0.043
epoch: 136, batch: 300 // loss: 0.049
epoch: 136, batch: 400 // loss: 0.046
epoch: 136, batch: 500 // loss: 0.040
epoch: 136, batch: 600 // loss: 0.040
epoch: 136, batch: 700 // loss: 0.044
epoch: 136, batch: 800 // loss: 0.042
epoch: 136, batch: 900 // loss: 0.049
epoch: 136, batch: 1000 // loss: 0.046
epoch: 136, batch: 1100 // loss: 0.042
epoch: 136, batch: 1200 // loss: 0.044
epoch: 136, batch: 1300 // loss: 0.046
epoch: 136, batch: 1400 // loss: 0.043
epoch: 136, batch: 1500 // loss: 0.048
epoch: 136, batch: 1600 // loss: 0.052
epoch: 136, batch: 1700 // loss: 0.045
epoch: 136, batch: 1800 // loss: 0.053
epoch: 136, batch: 1900 // loss: 0.045
epoch: 136, batch: 2000 // loss: 0.047
epoch: 136, batch: 2100 // loss: 0.046
epoch: 136, batch: 2200 // loss: 0.050
epoch: 136, batch: 2300 // loss: 0.049
epoch: 136, batch: 2400 // loss: 0.042
epoch: 136, batch: 2500 // loss: 0.042
epoch: 136, batch: 2600 // loss: 0.045
epoch: 136, batch: 2700 // loss: 0.042
epoch: 136, batch: 2800 // loss: 0.046
epoch: 136, batch: 2900 // loss: 0.042
epoch: 136, batch: 3000 // loss: 0.045
epoch: 136, batch: 3100 // loss: 0.043
epoch: 136, batch: 3200 // loss: 0.039
epoch: 136, batch: 3300 // loss: 0.038
epoch: 136, batch: 3400 // loss: 0.044
epoch: 136, batch: 3500 // loss: 0.035
epoch: 136, batch: 3600 // loss: 0.043
epoch: 136, batch: 3700 // loss: 0.043

epoch: 137, batch: 0 // loss: 0.053
epoch: 137, batch: 100 // loss: 0.046
epoch: 137, batch: 200 // loss: 0.043
epoch: 137, batch: 300 // loss: 0.049
epoch: 137, batch: 400 // loss: 0.046
epoch: 137, batch: 500 // loss: 0.040
epoch: 137, batch: 600 // loss: 0.040
epoch: 137, batch: 700 // loss: 0.044
epoch: 137, batch: 800 // loss: 0.042
epoch: 137, batch: 900 // loss: 0.049
epoch: 137, batch: 1000 // loss: 0.046
epoch: 137, batch: 1100 // loss: 0.042
epoch: 137, batch: 1200 // loss: 0.044
epoch: 137, batch: 1300 // loss: 0.046
epoch: 137, batch: 1400 // loss: 0.043
epoch: 137, batch: 1500 // loss: 0.048
epoch: 137, batch: 1600 // loss: 0.052
epoch: 137, batch: 1700 // loss: 0.045
epoch: 137, batch: 1800 // loss: 0.053
epoch: 137, batch: 1900 // loss: 0.045
epoch: 137, batch: 2000 // loss: 0.047
epoch: 137, batch: 2100 // loss: 0.046
epoch: 137, batch: 2200 // loss: 0.050
epoch: 137, batch: 2300 // loss: 0.049
epoch: 137, batch: 2400 // loss: 0.042
epoch: 137, batch: 2500 // loss: 0.042
epoch: 137, batch: 2600 // loss: 0.045
epoch: 137, batch: 2700 // loss: 0.042
epoch: 137, batch: 2800 // loss: 0.046
epoch: 137, batch: 2900 // loss: 0.042
epoch: 137, batch: 3000 // loss: 0.045
epoch: 137, batch: 3100 // loss: 0.043
epoch: 137, batch: 3200 // loss: 0.039
epoch: 137, batch: 3300 // loss: 0.038
epoch: 137, batch: 3400 // loss: 0.044
epoch: 137, batch: 3500 // loss: 0.035
epoch: 137, batch: 3600 // loss: 0.043
epoch: 137, batch: 3700 // loss: 0.043

epoch: 138, batch: 0 // loss: 0.053
epoch: 138, batch: 100 // loss: 0.046
epoch: 138, batch: 200 // loss: 0.043
epoch: 138, batch: 300 // loss: 0.049
epoch: 138, batch: 400 // loss: 0.046
epoch: 138, batch: 500 // loss: 0.040
epoch: 138, batch: 600 // loss: 0.040
epoch: 138, batch: 700 // loss: 0.044
epoch: 138, batch: 800 // loss: 0.042
epoch: 138, batch: 900 // loss: 0.049
epoch: 138, batch: 1000 // loss: 0.046
epoch: 138, batch: 1100 // loss: 0.042
epoch: 138, batch: 1200 // loss: 0.044
epoch: 138, batch: 1300 // loss: 0.046
epoch: 138, batch: 1400 // loss: 0.043
epoch: 138, batch: 1500 // loss: 0.048
epoch: 138, batch: 1600 // loss: 0.052
epoch: 138, batch: 1700 // loss: 0.045
epoch: 138, batch: 1800 // loss: 0.053
epoch: 138, batch: 1900 // loss: 0.045
epoch: 138, batch: 2000 // loss: 0.047
epoch: 138, batch: 2100 // loss: 0.046
epoch: 138, batch: 2200 // loss: 0.050
epoch: 138, batch: 2300 // loss: 0.049
epoch: 138, batch: 2400 // loss: 0.042
epoch: 138, batch: 2500 // loss: 0.042
epoch: 138, batch: 2600 // loss: 0.045
epoch: 138, batch: 2700 // loss: 0.042
epoch: 138, batch: 2800 // loss: 0.046
epoch: 138, batch: 2900 // loss: 0.042
epoch: 138, batch: 3000 // loss: 0.045
epoch: 138, batch: 3100 // loss: 0.043
epoch: 138, batch: 3200 // loss: 0.039
epoch: 138, batch: 3300 // loss: 0.038
epoch: 138, batch: 3400 // loss: 0.044
epoch: 138, batch: 3500 // loss: 0.035
epoch: 138, batch: 3600 // loss: 0.043
epoch: 138, batch: 3700 // loss: 0.043

epoch: 139, batch: 0 // loss: 0.053
epoch: 139, batch: 100 // loss: 0.046
epoch: 139, batch: 200 // loss: 0.043
epoch: 139, batch: 300 // loss: 0.049
epoch: 139, batch: 400 // loss: 0.046
epoch: 139, batch: 500 // loss: 0.040
epoch: 139, batch: 600 // loss: 0.040
epoch: 139, batch: 700 // loss: 0.044
epoch: 139, batch: 800 // loss: 0.042
epoch: 139, batch: 900 // loss: 0.049
epoch: 139, batch: 1000 // loss: 0.046
epoch: 139, batch: 1100 // loss: 0.042
epoch: 139, batch: 1200 // loss: 0.044
epoch: 139, batch: 1300 // loss: 0.046
epoch: 139, batch: 1400 // loss: 0.043
epoch: 139, batch: 1500 // loss: 0.048
epoch: 139, batch: 1600 // loss: 0.052
epoch: 139, batch: 1700 // loss: 0.045
epoch: 139, batch: 1800 // loss: 0.053
epoch: 139, batch: 1900 // loss: 0.045
epoch: 139, batch: 2000 // loss: 0.047
epoch: 139, batch: 2100 // loss: 0.046
epoch: 139, batch: 2200 // loss: 0.050
epoch: 139, batch: 2300 // loss: 0.049
epoch: 139, batch: 2400 // loss: 0.042
epoch: 139, batch: 2500 // loss: 0.042
epoch: 139, batch: 2600 // loss: 0.045
epoch: 139, batch: 2700 // loss: 0.042
epoch: 139, batch: 2800 // loss: 0.046
epoch: 139, batch: 2900 // loss: 0.042
epoch: 139, batch: 3000 // loss: 0.045
epoch: 139, batch: 3100 // loss: 0.043
epoch: 139, batch: 3200 // loss: 0.039
epoch: 139, batch: 3300 // loss: 0.038
epoch: 139, batch: 3400 // loss: 0.044
epoch: 139, batch: 3500 // loss: 0.035
epoch: 139, batch: 3600 // loss: 0.043
epoch: 139, batch: 3700 // loss: 0.043

epoch: 140, batch: 0 // loss: 0.053
epoch: 140, batch: 100 // loss: 0.046
epoch: 140, batch: 200 // loss: 0.043
epoch: 140, batch: 300 // loss: 0.049
epoch: 140, batch: 400 // loss: 0.046
epoch: 140, batch: 500 // loss: 0.040
epoch: 140, batch: 600 // loss: 0.040
epoch: 140, batch: 700 // loss: 0.044
epoch: 140, batch: 800 // loss: 0.042
epoch: 140, batch: 900 // loss: 0.049
epoch: 140, batch: 1000 // loss: 0.046
epoch: 140, batch: 1100 // loss: 0.042
epoch: 140, batch: 1200 // loss: 0.044
epoch: 140, batch: 1300 // loss: 0.046
epoch: 140, batch: 1400 // loss: 0.043
epoch: 140, batch: 1500 // loss: 0.048
epoch: 140, batch: 1600 // loss: 0.052
epoch: 140, batch: 1700 // loss: 0.045
epoch: 140, batch: 1800 // loss: 0.053
epoch: 140, batch: 1900 // loss: 0.045
epoch: 140, batch: 2000 // loss: 0.047
epoch: 140, batch: 2100 // loss: 0.046
epoch: 140, batch: 2200 // loss: 0.050
epoch: 140, batch: 2300 // loss: 0.049
epoch: 140, batch: 2400 // loss: 0.042
epoch: 140, batch: 2500 // loss: 0.042
epoch: 140, batch: 2600 // loss: 0.045
epoch: 140, batch: 2700 // loss: 0.042
epoch: 140, batch: 2800 // loss: 0.046
epoch: 140, batch: 2900 // loss: 0.042
epoch: 140, batch: 3000 // loss: 0.045
epoch: 140, batch: 3100 // loss: 0.043
epoch: 140, batch: 3200 // loss: 0.039
epoch: 140, batch: 3300 // loss: 0.038
epoch: 140, batch: 3400 // loss: 0.044
epoch: 140, batch: 3500 // loss: 0.035
epoch: 140, batch: 3600 // loss: 0.043
epoch: 140, batch: 3700 // loss: 0.043

epoch: 141, batch: 0 // loss: 0.053
epoch: 141, batch: 100 // loss: 0.046
epoch: 141, batch: 200 // loss: 0.043
epoch: 141, batch: 300 // loss: 0.049
epoch: 141, batch: 400 // loss: 0.046
epoch: 141, batch: 500 // loss: 0.040
epoch: 141, batch: 600 // loss: 0.040
epoch: 141, batch: 700 // loss: 0.044
epoch: 141, batch: 800 // loss: 0.042
epoch: 141, batch: 900 // loss: 0.049
epoch: 141, batch: 1000 // loss: 0.046
epoch: 141, batch: 1100 // loss: 0.042
epoch: 141, batch: 1200 // loss: 0.044
epoch: 141, batch: 1300 // loss: 0.046
epoch: 141, batch: 1400 // loss: 0.043
epoch: 141, batch: 1500 // loss: 0.048
epoch: 141, batch: 1600 // loss: 0.052
epoch: 141, batch: 1700 // loss: 0.045
epoch: 141, batch: 1800 // loss: 0.053
epoch: 141, batch: 1900 // loss: 0.045
epoch: 141, batch: 2000 // loss: 0.047
epoch: 141, batch: 2100 // loss: 0.046
epoch: 141, batch: 2200 // loss: 0.050
epoch: 141, batch: 2300 // loss: 0.049
epoch: 141, batch: 2400 // loss: 0.042
epoch: 141, batch: 2500 // loss: 0.042
epoch: 141, batch: 2600 // loss: 0.045
epoch: 141, batch: 2700 // loss: 0.042
epoch: 141, batch: 2800 // loss: 0.046
epoch: 141, batch: 2900 // loss: 0.042
epoch: 141, batch: 3000 // loss: 0.045
epoch: 141, batch: 3100 // loss: 0.043
epoch: 141, batch: 3200 // loss: 0.039
epoch: 141, batch: 3300 // loss: 0.038
epoch: 141, batch: 3400 // loss: 0.044
epoch: 141, batch: 3500 // loss: 0.035
epoch: 141, batch: 3600 // loss: 0.043
epoch: 141, batch: 3700 // loss: 0.043

epoch: 142, batch: 0 // loss: 0.053
epoch: 142, batch: 100 // loss: 0.046
epoch: 142, batch: 200 // loss: 0.043
epoch: 142, batch: 300 // loss: 0.049
epoch: 142, batch: 400 // loss: 0.046
epoch: 142, batch: 500 // loss: 0.040
epoch: 142, batch: 600 // loss: 0.040
epoch: 142, batch: 700 // loss: 0.044
epoch: 142, batch: 800 // loss: 0.042
epoch: 142, batch: 900 // loss: 0.049
epoch: 142, batch: 1000 // loss: 0.046
epoch: 142, batch: 1100 // loss: 0.042
epoch: 142, batch: 1200 // loss: 0.044
epoch: 142, batch: 1300 // loss: 0.046
epoch: 142, batch: 1400 // loss: 0.043
epoch: 142, batch: 1500 // loss: 0.048
epoch: 142, batch: 1600 // loss: 0.052
epoch: 142, batch: 1700 // loss: 0.045
epoch: 142, batch: 1800 // loss: 0.053
epoch: 142, batch: 1900 // loss: 0.045
epoch: 142, batch: 2000 // loss: 0.047
epoch: 142, batch: 2100 // loss: 0.046
epoch: 142, batch: 2200 // loss: 0.050
epoch: 142, batch: 2300 // loss: 0.049
epoch: 142, batch: 2400 // loss: 0.042
epoch: 142, batch: 2500 // loss: 0.042
epoch: 142, batch: 2600 // loss: 0.045
epoch: 142, batch: 2700 // loss: 0.042
epoch: 142, batch: 2800 // loss: 0.046
epoch: 142, batch: 2900 // loss: 0.042
epoch: 142, batch: 3000 // loss: 0.045
epoch: 142, batch: 3100 // loss: 0.043
epoch: 142, batch: 3200 // loss: 0.039
epoch: 142, batch: 3300 // loss: 0.038
epoch: 142, batch: 3400 // loss: 0.044
epoch: 142, batch: 3500 // loss: 0.035
epoch: 142, batch: 3600 // loss: 0.043
epoch: 142, batch: 3700 // loss: 0.043

epoch: 143, batch: 0 // loss: 0.053
epoch: 143, batch: 100 // loss: 0.046
epoch: 143, batch: 200 // loss: 0.043
epoch: 143, batch: 300 // loss: 0.049
epoch: 143, batch: 400 // loss: 0.046
epoch: 143, batch: 500 // loss: 0.040
epoch: 143, batch: 600 // loss: 0.040
epoch: 143, batch: 700 // loss: 0.044
epoch: 143, batch: 800 // loss: 0.042
epoch: 143, batch: 900 // loss: 0.049
epoch: 143, batch: 1000 // loss: 0.046
epoch: 143, batch: 1100 // loss: 0.042
epoch: 143, batch: 1200 // loss: 0.044
epoch: 143, batch: 1300 // loss: 0.046
epoch: 143, batch: 1400 // loss: 0.043
epoch: 143, batch: 1500 // loss: 0.048
epoch: 143, batch: 1600 // loss: 0.052
epoch: 143, batch: 1700 // loss: 0.045
epoch: 143, batch: 1800 // loss: 0.053
epoch: 143, batch: 1900 // loss: 0.045
epoch: 143, batch: 2000 // loss: 0.047
epoch: 143, batch: 2100 // loss: 0.046
epoch: 143, batch: 2200 // loss: 0.050
epoch: 143, batch: 2300 // loss: 0.049
epoch: 143, batch: 2400 // loss: 0.042
epoch: 143, batch: 2500 // loss: 0.042
epoch: 143, batch: 2600 // loss: 0.045
epoch: 143, batch: 2700 // loss: 0.042
epoch: 143, batch: 2800 // loss: 0.046
epoch: 143, batch: 2900 // loss: 0.042
epoch: 143, batch: 3000 // loss: 0.045
epoch: 143, batch: 3100 // loss: 0.043
epoch: 143, batch: 3200 // loss: 0.039
epoch: 143, batch: 3300 // loss: 0.038
epoch: 143, batch: 3400 // loss: 0.044
epoch: 143, batch: 3500 // loss: 0.035
epoch: 143, batch: 3600 // loss: 0.043
epoch: 143, batch: 3700 // loss: 0.043

epoch: 144, batch: 0 // loss: 0.053
epoch: 144, batch: 100 // loss: 0.046
epoch: 144, batch: 200 // loss: 0.043
epoch: 144, batch: 300 // loss: 0.049
epoch: 144, batch: 400 // loss: 0.046
epoch: 144, batch: 500 // loss: 0.040
epoch: 144, batch: 600 // loss: 0.040
epoch: 144, batch: 700 // loss: 0.044
epoch: 144, batch: 800 // loss: 0.042
epoch: 144, batch: 900 // loss: 0.049
epoch: 144, batch: 1000 // loss: 0.046
epoch: 144, batch: 1100 // loss: 0.042
epoch: 144, batch: 1200 // loss: 0.044
epoch: 144, batch: 1300 // loss: 0.046
epoch: 144, batch: 1400 // loss: 0.043
epoch: 144, batch: 1500 // loss: 0.048
epoch: 144, batch: 1600 // loss: 0.052
epoch: 144, batch: 1700 // loss: 0.045
epoch: 144, batch: 1800 // loss: 0.053
epoch: 144, batch: 1900 // loss: 0.045
epoch: 144, batch: 2000 // loss: 0.047
epoch: 144, batch: 2100 // loss: 0.046
epoch: 144, batch: 2200 // loss: 0.050
epoch: 144, batch: 2300 // loss: 0.049
epoch: 144, batch: 2400 // loss: 0.042
epoch: 144, batch: 2500 // loss: 0.042
epoch: 144, batch: 2600 // loss: 0.045
epoch: 144, batch: 2700 // loss: 0.042
epoch: 144, batch: 2800 // loss: 0.046
epoch: 144, batch: 2900 // loss: 0.042
epoch: 144, batch: 3000 // loss: 0.045
epoch: 144, batch: 3100 // loss: 0.043
epoch: 144, batch: 3200 // loss: 0.039
epoch: 144, batch: 3300 // loss: 0.038
epoch: 144, batch: 3400 // loss: 0.044
epoch: 144, batch: 3500 // loss: 0.035
epoch: 144, batch: 3600 // loss: 0.043
epoch: 144, batch: 3700 // loss: 0.043

epoch: 145, batch: 0 // loss: 0.053
epoch: 145, batch: 100 // loss: 0.046
epoch: 145, batch: 200 // loss: 0.043
epoch: 145, batch: 300 // loss: 0.049
epoch: 145, batch: 400 // loss: 0.046
epoch: 145, batch: 500 // loss: 0.040
epoch: 145, batch: 600 // loss: 0.040
epoch: 145, batch: 700 // loss: 0.044
epoch: 145, batch: 800 // loss: 0.042
epoch: 145, batch: 900 // loss: 0.049
epoch: 145, batch: 1000 // loss: 0.046
epoch: 145, batch: 1100 // loss: 0.042
epoch: 145, batch: 1200 // loss: 0.044
epoch: 145, batch: 1300 // loss: 0.046
epoch: 145, batch: 1400 // loss: 0.043
epoch: 145, batch: 1500 // loss: 0.048
epoch: 145, batch: 1600 // loss: 0.052
epoch: 145, batch: 1700 // loss: 0.045
epoch: 145, batch: 1800 // loss: 0.053
epoch: 145, batch: 1900 // loss: 0.045
epoch: 145, batch: 2000 // loss: 0.047
epoch: 145, batch: 2100 // loss: 0.046
epoch: 145, batch: 2200 // loss: 0.050
epoch: 145, batch: 2300 // loss: 0.049
epoch: 145, batch: 2400 // loss: 0.042
epoch: 145, batch: 2500 // loss: 0.042
epoch: 145, batch: 2600 // loss: 0.045
epoch: 145, batch: 2700 // loss: 0.042
epoch: 145, batch: 2800 // loss: 0.046
epoch: 145, batch: 2900 // loss: 0.042
epoch: 145, batch: 3000 // loss: 0.045
epoch: 145, batch: 3100 // loss: 0.043
epoch: 145, batch: 3200 // loss: 0.039
epoch: 145, batch: 3300 // loss: 0.038
epoch: 145, batch: 3400 // loss: 0.044
epoch: 145, batch: 3500 // loss: 0.035
epoch: 145, batch: 3600 // loss: 0.043
epoch: 145, batch: 3700 // loss: 0.043

epoch: 146, batch: 0 // loss: 0.053
epoch: 146, batch: 100 // loss: 0.046
epoch: 146, batch: 200 // loss: 0.043
epoch: 146, batch: 300 // loss: 0.049
epoch: 146, batch: 400 // loss: 0.046
epoch: 146, batch: 500 // loss: 0.040
epoch: 146, batch: 600 // loss: 0.040
epoch: 146, batch: 700 // loss: 0.044
epoch: 146, batch: 800 // loss: 0.042
epoch: 146, batch: 900 // loss: 0.049
epoch: 146, batch: 1000 // loss: 0.046
epoch: 146, batch: 1100 // loss: 0.042
epoch: 146, batch: 1200 // loss: 0.044
epoch: 146, batch: 1300 // loss: 0.046
epoch: 146, batch: 1400 // loss: 0.043
epoch: 146, batch: 1500 // loss: 0.048
epoch: 146, batch: 1600 // loss: 0.052
epoch: 146, batch: 1700 // loss: 0.045
epoch: 146, batch: 1800 // loss: 0.053
epoch: 146, batch: 1900 // loss: 0.045
epoch: 146, batch: 2000 // loss: 0.047
epoch: 146, batch: 2100 // loss: 0.046
epoch: 146, batch: 2200 // loss: 0.050
epoch: 146, batch: 2300 // loss: 0.049
epoch: 146, batch: 2400 // loss: 0.042
epoch: 146, batch: 2500 // loss: 0.042
epoch: 146, batch: 2600 // loss: 0.045
epoch: 146, batch: 2700 // loss: 0.042
epoch: 146, batch: 2800 // loss: 0.046
epoch: 146, batch: 2900 // loss: 0.042
epoch: 146, batch: 3000 // loss: 0.045
epoch: 146, batch: 3100 // loss: 0.043
epoch: 146, batch: 3200 // loss: 0.039
epoch: 146, batch: 3300 // loss: 0.038
epoch: 146, batch: 3400 // loss: 0.044
epoch: 146, batch: 3500 // loss: 0.035
epoch: 146, batch: 3600 // loss: 0.043
epoch: 146, batch: 3700 // loss: 0.043

epoch: 147, batch: 0 // loss: 0.053
epoch: 147, batch: 100 // loss: 0.046
epoch: 147, batch: 200 // loss: 0.043
epoch: 147, batch: 300 // loss: 0.049
epoch: 147, batch: 400 // loss: 0.046
epoch: 147, batch: 500 // loss: 0.040
epoch: 147, batch: 600 // loss: 0.040
epoch: 147, batch: 700 // loss: 0.044
epoch: 147, batch: 800 // loss: 0.042
epoch: 147, batch: 900 // loss: 0.049
epoch: 147, batch: 1000 // loss: 0.046
epoch: 147, batch: 1100 // loss: 0.042
epoch: 147, batch: 1200 // loss: 0.044
epoch: 147, batch: 1300 // loss: 0.046
epoch: 147, batch: 1400 // loss: 0.043
epoch: 147, batch: 1500 // loss: 0.048
epoch: 147, batch: 1600 // loss: 0.052
epoch: 147, batch: 1700 // loss: 0.045
epoch: 147, batch: 1800 // loss: 0.053
epoch: 147, batch: 1900 // loss: 0.045
epoch: 147, batch: 2000 // loss: 0.047
epoch: 147, batch: 2100 // loss: 0.046
epoch: 147, batch: 2200 // loss: 0.050
epoch: 147, batch: 2300 // loss: 0.049
epoch: 147, batch: 2400 // loss: 0.042
epoch: 147, batch: 2500 // loss: 0.042
epoch: 147, batch: 2600 // loss: 0.045
epoch: 147, batch: 2700 // loss: 0.042
epoch: 147, batch: 2800 // loss: 0.046
epoch: 147, batch: 2900 // loss: 0.042
epoch: 147, batch: 3000 // loss: 0.045
epoch: 147, batch: 3100 // loss: 0.043
epoch: 147, batch: 3200 // loss: 0.039
epoch: 147, batch: 3300 // loss: 0.038
epoch: 147, batch: 3400 // loss: 0.044
epoch: 147, batch: 3500 // loss: 0.035
epoch: 147, batch: 3600 // loss: 0.043
epoch: 147, batch: 3700 // loss: 0.043

epoch: 148, batch: 0 // loss: 0.053
epoch: 148, batch: 100 // loss: 0.046
epoch: 148, batch: 200 // loss: 0.043
epoch: 148, batch: 300 // loss: 0.049
epoch: 148, batch: 400 // loss: 0.046
epoch: 148, batch: 500 // loss: 0.040
epoch: 148, batch: 600 // loss: 0.040
epoch: 148, batch: 700 // loss: 0.044
epoch: 148, batch: 800 // loss: 0.042
epoch: 148, batch: 900 // loss: 0.049
epoch: 148, batch: 1000 // loss: 0.046
epoch: 148, batch: 1100 // loss: 0.042
epoch: 148, batch: 1200 // loss: 0.044
epoch: 148, batch: 1300 // loss: 0.046
epoch: 148, batch: 1400 // loss: 0.043
epoch: 148, batch: 1500 // loss: 0.048
epoch: 148, batch: 1600 // loss: 0.052
epoch: 148, batch: 1700 // loss: 0.045
epoch: 148, batch: 1800 // loss: 0.053
epoch: 148, batch: 1900 // loss: 0.045
epoch: 148, batch: 2000 // loss: 0.047
epoch: 148, batch: 2100 // loss: 0.046
epoch: 148, batch: 2200 // loss: 0.050
epoch: 148, batch: 2300 // loss: 0.049
epoch: 148, batch: 2400 // loss: 0.042
epoch: 148, batch: 2500 // loss: 0.042
epoch: 148, batch: 2600 // loss: 0.045
epoch: 148, batch: 2700 // loss: 0.042
epoch: 148, batch: 2800 // loss: 0.046
epoch: 148, batch: 2900 // loss: 0.042
epoch: 148, batch: 3000 // loss: 0.045
epoch: 148, batch: 3100 // loss: 0.043
epoch: 148, batch: 3200 // loss: 0.039
epoch: 148, batch: 3300 // loss: 0.038
epoch: 148, batch: 3400 // loss: 0.044
epoch: 148, batch: 3500 // loss: 0.035
epoch: 148, batch: 3600 // loss: 0.043
epoch: 148, batch: 3700 // loss: 0.043

epoch: 149, batch: 0 // loss: 0.053
epoch: 149, batch: 100 // loss: 0.046
epoch: 149, batch: 200 // loss: 0.043
epoch: 149, batch: 300 // loss: 0.049
epoch: 149, batch: 400 // loss: 0.046
epoch: 149, batch: 500 // loss: 0.040
epoch: 149, batch: 600 // loss: 0.040
epoch: 149, batch: 700 // loss: 0.044
epoch: 149, batch: 800 // loss: 0.042
epoch: 149, batch: 900 // loss: 0.049
epoch: 149, batch: 1000 // loss: 0.046
epoch: 149, batch: 1100 // loss: 0.042
epoch: 149, batch: 1200 // loss: 0.044
epoch: 149, batch: 1300 // loss: 0.046
epoch: 149, batch: 1400 // loss: 0.043
epoch: 149, batch: 1500 // loss: 0.048
epoch: 149, batch: 1600 // loss: 0.052
epoch: 149, batch: 1700 // loss: 0.045
epoch: 149, batch: 1800 // loss: 0.053
epoch: 149, batch: 1900 // loss: 0.045
epoch: 149, batch: 2000 // loss: 0.047
epoch: 149, batch: 2100 // loss: 0.046
epoch: 149, batch: 2200 // loss: 0.050
epoch: 149, batch: 2300 // loss: 0.049
epoch: 149, batch: 2400 // loss: 0.042
epoch: 149, batch: 2500 // loss: 0.042
epoch: 149, batch: 2600 // loss: 0.045
epoch: 149, batch: 2700 // loss: 0.042
epoch: 149, batch: 2800 // loss: 0.046
epoch: 149, batch: 2900 // loss: 0.042
epoch: 149, batch: 3000 // loss: 0.045
epoch: 149, batch: 3100 // loss: 0.043
epoch: 149, batch: 3200 // loss: 0.039
epoch: 149, batch: 3300 // loss: 0.038
epoch: 149, batch: 3400 // loss: 0.044
epoch: 149, batch: 3500 // loss: 0.035
epoch: 149, batch: 3600 // loss: 0.043
epoch: 149, batch: 3700 // loss: 0.043

epoch: 150, batch: 0 // loss: 0.053
epoch: 150, batch: 100 // loss: 0.046
epoch: 150, batch: 200 // loss: 0.043
epoch: 150, batch: 300 // loss: 0.049
epoch: 150, batch: 400 // loss: 0.046
epoch: 150, batch: 500 // loss: 0.040
epoch: 150, batch: 600 // loss: 0.040
epoch: 150, batch: 700 // loss: 0.044
epoch: 150, batch: 800 // loss: 0.042
epoch: 150, batch: 900 // loss: 0.049
epoch: 150, batch: 1000 // loss: 0.046
epoch: 150, batch: 1100 // loss: 0.042
epoch: 150, batch: 1200 // loss: 0.044
epoch: 150, batch: 1300 // loss: 0.046
epoch: 150, batch: 1400 // loss: 0.043
epoch: 150, batch: 1500 // loss: 0.048
epoch: 150, batch: 1600 // loss: 0.052
epoch: 150, batch: 1700 // loss: 0.045
epoch: 150, batch: 1800 // loss: 0.053
epoch: 150, batch: 1900 // loss: 0.045
epoch: 150, batch: 2000 // loss: 0.047
epoch: 150, batch: 2100 // loss: 0.046
epoch: 150, batch: 2200 // loss: 0.050
epoch: 150, batch: 2300 // loss: 0.049
epoch: 150, batch: 2400 // loss: 0.042
epoch: 150, batch: 2500 // loss: 0.042
epoch: 150, batch: 2600 // loss: 0.045
epoch: 150, batch: 2700 // loss: 0.042
epoch: 150, batch: 2800 // loss: 0.046
epoch: 150, batch: 2900 // loss: 0.042
epoch: 150, batch: 3000 // loss: 0.045
epoch: 150, batch: 3100 // loss: 0.043
epoch: 150, batch: 3200 // loss: 0.039
epoch: 150, batch: 3300 // loss: 0.038
epoch: 150, batch: 3400 // loss: 0.044
epoch: 150, batch: 3500 // loss: 0.035
epoch: 150, batch: 3600 // loss: 0.043
epoch: 150, batch: 3700 // loss: 0.043

epoch: 151, batch: 0 // loss: 0.053
epoch: 151, batch: 100 // loss: 0.046
epoch: 151, batch: 200 // loss: 0.043
epoch: 151, batch: 300 // loss: 0.049
epoch: 151, batch: 400 // loss: 0.046
epoch: 151, batch: 500 // loss: 0.040
epoch: 151, batch: 600 // loss: 0.040
epoch: 151, batch: 700 // loss: 0.044
epoch: 151, batch: 800 // loss: 0.042
epoch: 151, batch: 900 // loss: 0.049
epoch: 151, batch: 1000 // loss: 0.046
epoch: 151, batch: 1100 // loss: 0.042
epoch: 151, batch: 1200 // loss: 0.044
epoch: 151, batch: 1300 // loss: 0.046
epoch: 151, batch: 1400 // loss: 0.043
epoch: 151, batch: 1500 // loss: 0.048
epoch: 151, batch: 1600 // loss: 0.052
epoch: 151, batch: 1700 // loss: 0.045
epoch: 151, batch: 1800 // loss: 0.053
epoch: 151, batch: 1900 // loss: 0.045
epoch: 151, batch: 2000 // loss: 0.047
epoch: 151, batch: 2100 // loss: 0.046
epoch: 151, batch: 2200 // loss: 0.050
epoch: 151, batch: 2300 // loss: 0.049
epoch: 151, batch: 2400 // loss: 0.042
epoch: 151, batch: 2500 // loss: 0.042
epoch: 151, batch: 2600 // loss: 0.045
epoch: 151, batch: 2700 // loss: 0.042
epoch: 151, batch: 2800 // loss: 0.046
epoch: 151, batch: 2900 // loss: 0.042
epoch: 151, batch: 3000 // loss: 0.045
epoch: 151, batch: 3100 // loss: 0.043
epoch: 151, batch: 3200 // loss: 0.039
epoch: 151, batch: 3300 // loss: 0.038
epoch: 151, batch: 3400 // loss: 0.044
epoch: 151, batch: 3500 // loss: 0.035
epoch: 151, batch: 3600 // loss: 0.043
epoch: 151, batch: 3700 // loss: 0.043

epoch: 152, batch: 0 // loss: 0.053
epoch: 152, batch: 100 // loss: 0.046
epoch: 152, batch: 200 // loss: 0.043
epoch: 152, batch: 300 // loss: 0.049
epoch: 152, batch: 400 // loss: 0.046
epoch: 152, batch: 500 // loss: 0.040
epoch: 152, batch: 600 // loss: 0.040
epoch: 152, batch: 700 // loss: 0.044
epoch: 152, batch: 800 // loss: 0.042
epoch: 152, batch: 900 // loss: 0.049
epoch: 152, batch: 1000 // loss: 0.046
epoch: 152, batch: 1100 // loss: 0.042
epoch: 152, batch: 1200 // loss: 0.044
epoch: 152, batch: 1300 // loss: 0.046
epoch: 152, batch: 1400 // loss: 0.043
epoch: 152, batch: 1500 // loss: 0.048
epoch: 152, batch: 1600 // loss: 0.052
epoch: 152, batch: 1700 // loss: 0.045
epoch: 152, batch: 1800 // loss: 0.053
epoch: 152, batch: 1900 // loss: 0.045
epoch: 152, batch: 2000 // loss: 0.047
epoch: 152, batch: 2100 // loss: 0.046
epoch: 152, batch: 2200 // loss: 0.050
epoch: 152, batch: 2300 // loss: 0.049
epoch: 152, batch: 2400 // loss: 0.042
epoch: 152, batch: 2500 // loss: 0.042
epoch: 152, batch: 2600 // loss: 0.045
epoch: 152, batch: 2700 // loss: 0.042
epoch: 152, batch: 2800 // loss: 0.046
epoch: 152, batch: 2900 // loss: 0.042
epoch: 152, batch: 3000 // loss: 0.045
epoch: 152, batch: 3100 // loss: 0.043
epoch: 152, batch: 3200 // loss: 0.039
epoch: 152, batch: 3300 // loss: 0.038
epoch: 152, batch: 3400 // loss: 0.044
epoch: 152, batch: 3500 // loss: 0.035
epoch: 152, batch: 3600 // loss: 0.043
epoch: 152, batch: 3700 // loss: 0.043

epoch: 153, batch: 0 // loss: 0.053
epoch: 153, batch: 100 // loss: 0.046
epoch: 153, batch: 200 // loss: 0.043
epoch: 153, batch: 300 // loss: 0.049
epoch: 153, batch: 400 // loss: 0.046
epoch: 153, batch: 500 // loss: 0.040
epoch: 153, batch: 600 // loss: 0.040
epoch: 153, batch: 700 // loss: 0.044
epoch: 153, batch: 800 // loss: 0.042
epoch: 153, batch: 900 // loss: 0.049
epoch: 153, batch: 1000 // loss: 0.046
epoch: 153, batch: 1100 // loss: 0.042
epoch: 153, batch: 1200 // loss: 0.044
epoch: 153, batch: 1300 // loss: 0.046
epoch: 153, batch: 1400 // loss: 0.043
epoch: 153, batch: 1500 // loss: 0.048
epoch: 153, batch: 1600 // loss: 0.052
epoch: 153, batch: 1700 // loss: 0.045
epoch: 153, batch: 1800 // loss: 0.053
epoch: 153, batch: 1900 // loss: 0.045
epoch: 153, batch: 2000 // loss: 0.047
epoch: 153, batch: 2100 // loss: 0.046
epoch: 153, batch: 2200 // loss: 0.050
epoch: 153, batch: 2300 // loss: 0.049
epoch: 153, batch: 2400 // loss: 0.042
epoch: 153, batch: 2500 // loss: 0.042
epoch: 153, batch: 2600 // loss: 0.045
epoch: 153, batch: 2700 // loss: 0.042
epoch: 153, batch: 2800 // loss: 0.046
epoch: 153, batch: 2900 // loss: 0.042
epoch: 153, batch: 3000 // loss: 0.045
epoch: 153, batch: 3100 // loss: 0.043
epoch: 153, batch: 3200 // loss: 0.039
epoch: 153, batch: 3300 // loss: 0.038
epoch: 153, batch: 3400 // loss: 0.044
epoch: 153, batch: 3500 // loss: 0.035
epoch: 153, batch: 3600 // loss: 0.043
epoch: 153, batch: 3700 // loss: 0.043

epoch: 154, batch: 0 // loss: 0.053
epoch: 154, batch: 100 // loss: 0.046
epoch: 154, batch: 200 // loss: 0.043
epoch: 154, batch: 300 // loss: 0.049
epoch: 154, batch: 400 // loss: 0.046
epoch: 154, batch: 500 // loss: 0.040
epoch: 154, batch: 600 // loss: 0.040
epoch: 154, batch: 700 // loss: 0.044
epoch: 154, batch: 800 // loss: 0.042
epoch: 154, batch: 900 // loss: 0.049
epoch: 154, batch: 1000 // loss: 0.046
epoch: 154, batch: 1100 // loss: 0.042
epoch: 154, batch: 1200 // loss: 0.044
epoch: 154, batch: 1300 // loss: 0.046
epoch: 154, batch: 1400 // loss: 0.043
epoch: 154, batch: 1500 // loss: 0.048
epoch: 154, batch: 1600 // loss: 0.052
epoch: 154, batch: 1700 // loss: 0.045
epoch: 154, batch: 1800 // loss: 0.053
epoch: 154, batch: 1900 // loss: 0.045
epoch: 154, batch: 2000 // loss: 0.047
epoch: 154, batch: 2100 // loss: 0.046
epoch: 154, batch: 2200 // loss: 0.050
epoch: 154, batch: 2300 // loss: 0.049
epoch: 154, batch: 2400 // loss: 0.042
epoch: 154, batch: 2500 // loss: 0.042
epoch: 154, batch: 2600 // loss: 0.045
epoch: 154, batch: 2700 // loss: 0.042
epoch: 154, batch: 2800 // loss: 0.046
epoch: 154, batch: 2900 // loss: 0.042
epoch: 154, batch: 3000 // loss: 0.045
epoch: 154, batch: 3100 // loss: 0.043
epoch: 154, batch: 3200 // loss: 0.039
epoch: 154, batch: 3300 // loss: 0.038
epoch: 154, batch: 3400 // loss: 0.044
epoch: 154, batch: 3500 // loss: 0.035
epoch: 154, batch: 3600 // loss: 0.043
epoch: 154, batch: 3700 // loss: 0.043

epoch: 155, batch: 0 // loss: 0.053
epoch: 155, batch: 100 // loss: 0.046
epoch: 155, batch: 200 // loss: 0.043
epoch: 155, batch: 300 // loss: 0.049
epoch: 155, batch: 400 // loss: 0.046
epoch: 155, batch: 500 // loss: 0.040
epoch: 155, batch: 600 // loss: 0.040
epoch: 155, batch: 700 // loss: 0.044
epoch: 155, batch: 800 // loss: 0.042
epoch: 155, batch: 900 // loss: 0.049
epoch: 155, batch: 1000 // loss: 0.046
epoch: 155, batch: 1100 // loss: 0.042
epoch: 155, batch: 1200 // loss: 0.044
epoch: 155, batch: 1300 // loss: 0.046
epoch: 155, batch: 1400 // loss: 0.043
epoch: 155, batch: 1500 // loss: 0.048
epoch: 155, batch: 1600 // loss: 0.052
epoch: 155, batch: 1700 // loss: 0.045
epoch: 155, batch: 1800 // loss: 0.053
epoch: 155, batch: 1900 // loss: 0.045
epoch: 155, batch: 2000 // loss: 0.047
epoch: 155, batch: 2100 // loss: 0.046
epoch: 155, batch: 2200 // loss: 0.050
epoch: 155, batch: 2300 // loss: 0.049
epoch: 155, batch: 2400 // loss: 0.042
epoch: 155, batch: 2500 // loss: 0.042
epoch: 155, batch: 2600 // loss: 0.045
epoch: 155, batch: 2700 // loss: 0.042
epoch: 155, batch: 2800 // loss: 0.046
epoch: 155, batch: 2900 // loss: 0.042
epoch: 155, batch: 3000 // loss: 0.045
epoch: 155, batch: 3100 // loss: 0.043
epoch: 155, batch: 3200 // loss: 0.039
epoch: 155, batch: 3300 // loss: 0.038
epoch: 155, batch: 3400 // loss: 0.044
epoch: 155, batch: 3500 // loss: 0.035
epoch: 155, batch: 3600 // loss: 0.043
epoch: 155, batch: 3700 // loss: 0.043

epoch: 156, batch: 0 // loss: 0.053
epoch: 156, batch: 100 // loss: 0.046
epoch: 156, batch: 200 // loss: 0.043
epoch: 156, batch: 300 // loss: 0.049
epoch: 156, batch: 400 // loss: 0.046
epoch: 156, batch: 500 // loss: 0.040
epoch: 156, batch: 600 // loss: 0.040
epoch: 156, batch: 700 // loss: 0.044
epoch: 156, batch: 800 // loss: 0.042
epoch: 156, batch: 900 // loss: 0.049
epoch: 156, batch: 1000 // loss: 0.046
epoch: 156, batch: 1100 // loss: 0.042
epoch: 156, batch: 1200 // loss: 0.044
epoch: 156, batch: 1300 // loss: 0.046
epoch: 156, batch: 1400 // loss: 0.043
epoch: 156, batch: 1500 // loss: 0.048
epoch: 156, batch: 1600 // loss: 0.052
epoch: 156, batch: 1700 // loss: 0.045
epoch: 156, batch: 1800 // loss: 0.053
epoch: 156, batch: 1900 // loss: 0.045
epoch: 156, batch: 2000 // loss: 0.047
epoch: 156, batch: 2100 // loss: 0.046
epoch: 156, batch: 2200 // loss: 0.050
epoch: 156, batch: 2300 // loss: 0.049
epoch: 156, batch: 2400 // loss: 0.042
epoch: 156, batch: 2500 // loss: 0.042
epoch: 156, batch: 2600 // loss: 0.045
epoch: 156, batch: 2700 // loss: 0.042
epoch: 156, batch: 2800 // loss: 0.046
epoch: 156, batch: 2900 // loss: 0.042
epoch: 156, batch: 3000 // loss: 0.045
epoch: 156, batch: 3100 // loss: 0.043
epoch: 156, batch: 3200 // loss: 0.039
epoch: 156, batch: 3300 // loss: 0.038
epoch: 156, batch: 3400 // loss: 0.044
epoch: 156, batch: 3500 // loss: 0.035
epoch: 156, batch: 3600 // loss: 0.043
epoch: 156, batch: 3700 // loss: 0.043

epoch: 157, batch: 0 // loss: 0.053
epoch: 157, batch: 100 // loss: 0.046
epoch: 157, batch: 200 // loss: 0.043
epoch: 157, batch: 300 // loss: 0.049
epoch: 157, batch: 400 // loss: 0.046
epoch: 157, batch: 500 // loss: 0.040
epoch: 157, batch: 600 // loss: 0.040
epoch: 157, batch: 700 // loss: 0.044
epoch: 157, batch: 800 // loss: 0.042
epoch: 157, batch: 900 // loss: 0.049
epoch: 157, batch: 1000 // loss: 0.046
epoch: 157, batch: 1100 // loss: 0.042
epoch: 157, batch: 1200 // loss: 0.044
epoch: 157, batch: 1300 // loss: 0.046
epoch: 157, batch: 1400 // loss: 0.043
epoch: 157, batch: 1500 // loss: 0.048
epoch: 157, batch: 1600 // loss: 0.052
epoch: 157, batch: 1700 // loss: 0.045
epoch: 157, batch: 1800 // loss: 0.053
epoch: 157, batch: 1900 // loss: 0.045
epoch: 157, batch: 2000 // loss: 0.047
epoch: 157, batch: 2100 // loss: 0.046
epoch: 157, batch: 2200 // loss: 0.050
epoch: 157, batch: 2300 // loss: 0.049
epoch: 157, batch: 2400 // loss: 0.042
epoch: 157, batch: 2500 // loss: 0.042
epoch: 157, batch: 2600 // loss: 0.045
epoch: 157, batch: 2700 // loss: 0.042
epoch: 157, batch: 2800 // loss: 0.046
epoch: 157, batch: 2900 // loss: 0.042
epoch: 157, batch: 3000 // loss: 0.045
epoch: 157, batch: 3100 // loss: 0.043
epoch: 157, batch: 3200 // loss: 0.039
epoch: 157, batch: 3300 // loss: 0.038
epoch: 157, batch: 3400 // loss: 0.044
epoch: 157, batch: 3500 // loss: 0.035
epoch: 157, batch: 3600 // loss: 0.043
epoch: 157, batch: 3700 // loss: 0.043

epoch: 158, batch: 0 // loss: 0.053
epoch: 158, batch: 100 // loss: 0.046
epoch: 158, batch: 200 // loss: 0.043
epoch: 158, batch: 300 // loss: 0.049
epoch: 158, batch: 400 // loss: 0.046
epoch: 158, batch: 500 // loss: 0.040
epoch: 158, batch: 600 // loss: 0.040
epoch: 158, batch: 700 // loss: 0.044
epoch: 158, batch: 800 // loss: 0.042
epoch: 158, batch: 900 // loss: 0.049
epoch: 158, batch: 1000 // loss: 0.046
epoch: 158, batch: 1100 // loss: 0.042
epoch: 158, batch: 1200 // loss: 0.044
epoch: 158, batch: 1300 // loss: 0.046
epoch: 158, batch: 1400 // loss: 0.043
epoch: 158, batch: 1500 // loss: 0.048
epoch: 158, batch: 1600 // loss: 0.052
epoch: 158, batch: 1700 // loss: 0.045
epoch: 158, batch: 1800 // loss: 0.053
epoch: 158, batch: 1900 // loss: 0.045
epoch: 158, batch: 2000 // loss: 0.047
epoch: 158, batch: 2100 // loss: 0.046
epoch: 158, batch: 2200 // loss: 0.050
epoch: 158, batch: 2300 // loss: 0.049
epoch: 158, batch: 2400 // loss: 0.042
epoch: 158, batch: 2500 // loss: 0.042
epoch: 158, batch: 2600 // loss: 0.045
epoch: 158, batch: 2700 // loss: 0.042
epoch: 158, batch: 2800 // loss: 0.046
epoch: 158, batch: 2900 // loss: 0.042
epoch: 158, batch: 3000 // loss: 0.045
epoch: 158, batch: 3100 // loss: 0.043
epoch: 158, batch: 3200 // loss: 0.039
epoch: 158, batch: 3300 // loss: 0.038
epoch: 158, batch: 3400 // loss: 0.044
epoch: 158, batch: 3500 // loss: 0.035
epoch: 158, batch: 3600 // loss: 0.043
epoch: 158, batch: 3700 // loss: 0.043

epoch: 159, batch: 0 // loss: 0.053
epoch: 159, batch: 100 // loss: 0.046
epoch: 159, batch: 200 // loss: 0.043
epoch: 159, batch: 300 // loss: 0.049
epoch: 159, batch: 400 // loss: 0.046
epoch: 159, batch: 500 // loss: 0.040
epoch: 159, batch: 600 // loss: 0.040
epoch: 159, batch: 700 // loss: 0.044
epoch: 159, batch: 800 // loss: 0.042
epoch: 159, batch: 900 // loss: 0.049
epoch: 159, batch: 1000 // loss: 0.046
epoch: 159, batch: 1100 // loss: 0.042
epoch: 159, batch: 1200 // loss: 0.044
epoch: 159, batch: 1300 // loss: 0.046
epoch: 159, batch: 1400 // loss: 0.043
epoch: 159, batch: 1500 // loss: 0.048
epoch: 159, batch: 1600 // loss: 0.052
epoch: 159, batch: 1700 // loss: 0.045
epoch: 159, batch: 1800 // loss: 0.053
epoch: 159, batch: 1900 // loss: 0.045
epoch: 159, batch: 2000 // loss: 0.047
epoch: 159, batch: 2100 // loss: 0.046
epoch: 159, batch: 2200 // loss: 0.050
epoch: 159, batch: 2300 // loss: 0.049
epoch: 159, batch: 2400 // loss: 0.042
epoch: 159, batch: 2500 // loss: 0.042
epoch: 159, batch: 2600 // loss: 0.045
epoch: 159, batch: 2700 // loss: 0.042
epoch: 159, batch: 2800 // loss: 0.046
epoch: 159, batch: 2900 // loss: 0.042
epoch: 159, batch: 3000 // loss: 0.045
epoch: 159, batch: 3100 // loss: 0.043
epoch: 159, batch: 3200 // loss: 0.039
epoch: 159, batch: 3300 // loss: 0.038
epoch: 159, batch: 3400 // loss: 0.044
epoch: 159, batch: 3500 // loss: 0.035
epoch: 159, batch: 3600 // loss: 0.043
epoch: 159, batch: 3700 // loss: 0.043

epoch: 160, batch: 0 // loss: 0.053
epoch: 160, batch: 100 // loss: 0.046
epoch: 160, batch: 200 // loss: 0.043
epoch: 160, batch: 300 // loss: 0.049
epoch: 160, batch: 400 // loss: 0.046
epoch: 160, batch: 500 // loss: 0.040
epoch: 160, batch: 600 // loss: 0.040
epoch: 160, batch: 700 // loss: 0.044
epoch: 160, batch: 800 // loss: 0.042
epoch: 160, batch: 900 // loss: 0.049
epoch: 160, batch: 1000 // loss: 0.046
epoch: 160, batch: 1100 // loss: 0.042
epoch: 160, batch: 1200 // loss: 0.044
epoch: 160, batch: 1300 // loss: 0.046
epoch: 160, batch: 1400 // loss: 0.043
epoch: 160, batch: 1500 // loss: 0.048
epoch: 160, batch: 1600 // loss: 0.052
epoch: 160, batch: 1700 // loss: 0.045
epoch: 160, batch: 1800 // loss: 0.053
epoch: 160, batch: 1900 // loss: 0.045
epoch: 160, batch: 2000 // loss: 0.047
epoch: 160, batch: 2100 // loss: 0.046
epoch: 160, batch: 2200 // loss: 0.050
epoch: 160, batch: 2300 // loss: 0.049
epoch: 160, batch: 2400 // loss: 0.042
epoch: 160, batch: 2500 // loss: 0.042
epoch: 160, batch: 2600 // loss: 0.045
epoch: 160, batch: 2700 // loss: 0.042
epoch: 160, batch: 2800 // loss: 0.046
epoch: 160, batch: 2900 // loss: 0.042
epoch: 160, batch: 3000 // loss: 0.045
epoch: 160, batch: 3100 // loss: 0.043
epoch: 160, batch: 3200 // loss: 0.039
epoch: 160, batch: 3300 // loss: 0.038
epoch: 160, batch: 3400 // loss: 0.044
epoch: 160, batch: 3500 // loss: 0.035
epoch: 160, batch: 3600 // loss: 0.043
epoch: 160, batch: 3700 // loss: 0.043

epoch: 161, batch: 0 // loss: 0.053
epoch: 161, batch: 100 // loss: 0.046
epoch: 161, batch: 200 // loss: 0.043
epoch: 161, batch: 300 // loss: 0.049
epoch: 161, batch: 400 // loss: 0.046
epoch: 161, batch: 500 // loss: 0.040
epoch: 161, batch: 600 // loss: 0.040
epoch: 161, batch: 700 // loss: 0.044
epoch: 161, batch: 800 // loss: 0.042
epoch: 161, batch: 900 // loss: 0.049
epoch: 161, batch: 1000 // loss: 0.046
epoch: 161, batch: 1100 // loss: 0.042
epoch: 161, batch: 1200 // loss: 0.044
epoch: 161, batch: 1300 // loss: 0.046
epoch: 161, batch: 1400 // loss: 0.043
epoch: 161, batch: 1500 // loss: 0.048
epoch: 161, batch: 1600 // loss: 0.052
epoch: 161, batch: 1700 // loss: 0.045
epoch: 161, batch: 1800 // loss: 0.053
epoch: 161, batch: 1900 // loss: 0.045
epoch: 161, batch: 2000 // loss: 0.047
epoch: 161, batch: 2100 // loss: 0.046
epoch: 161, batch: 2200 // loss: 0.050
epoch: 161, batch: 2300 // loss: 0.049
epoch: 161, batch: 2400 // loss: 0.042
epoch: 161, batch: 2500 // loss: 0.042
epoch: 161, batch: 2600 // loss: 0.045
epoch: 161, batch: 2700 // loss: 0.042
epoch: 161, batch: 2800 // loss: 0.046
epoch: 161, batch: 2900 // loss: 0.042
epoch: 161, batch: 3000 // loss: 0.045
epoch: 161, batch: 3100 // loss: 0.043
epoch: 161, batch: 3200 // loss: 0.039
epoch: 161, batch: 3300 // loss: 0.038
epoch: 161, batch: 3400 // loss: 0.044
epoch: 161, batch: 3500 // loss: 0.035
epoch: 161, batch: 3600 // loss: 0.043
epoch: 161, batch: 3700 // loss: 0.043

epoch: 162, batch: 0 // loss: 0.053
epoch: 162, batch: 100 // loss: 0.046
epoch: 162, batch: 200 // loss: 0.043
epoch: 162, batch: 300 // loss: 0.049
epoch: 162, batch: 400 // loss: 0.046
epoch: 162, batch: 500 // loss: 0.040
epoch: 162, batch: 600 // loss: 0.040
epoch: 162, batch: 700 // loss: 0.044
epoch: 162, batch: 800 // loss: 0.042
epoch: 162, batch: 900 // loss: 0.049
epoch: 162, batch: 1000 // loss: 0.046
epoch: 162, batch: 1100 // loss: 0.042
epoch: 162, batch: 1200 // loss: 0.044
epoch: 162, batch: 1300 // loss: 0.046
epoch: 162, batch: 1400 // loss: 0.043
epoch: 162, batch: 1500 // loss: 0.048
epoch: 162, batch: 1600 // loss: 0.052
epoch: 162, batch: 1700 // loss: 0.045
epoch: 162, batch: 1800 // loss: 0.053
epoch: 162, batch: 1900 // loss: 0.045
epoch: 162, batch: 2000 // loss: 0.047
epoch: 162, batch: 2100 // loss: 0.046
epoch: 162, batch: 2200 // loss: 0.050
epoch: 162, batch: 2300 // loss: 0.049
epoch: 162, batch: 2400 // loss: 0.042
epoch: 162, batch: 2500 // loss: 0.042
epoch: 162, batch: 2600 // loss: 0.045
epoch: 162, batch: 2700 // loss: 0.042
epoch: 162, batch: 2800 // loss: 0.046
epoch: 162, batch: 2900 // loss: 0.042
epoch: 162, batch: 3000 // loss: 0.045
epoch: 162, batch: 3100 // loss: 0.043
epoch: 162, batch: 3200 // loss: 0.039
epoch: 162, batch: 3300 // loss: 0.038
epoch: 162, batch: 3400 // loss: 0.044
epoch: 162, batch: 3500 // loss: 0.035
epoch: 162, batch: 3600 // loss: 0.043
epoch: 162, batch: 3700 // loss: 0.043

epoch: 163, batch: 0 // loss: 0.053
epoch: 163, batch: 100 // loss: 0.046
epoch: 163, batch: 200 // loss: 0.043
epoch: 163, batch: 300 // loss: 0.049
epoch: 163, batch: 400 // loss: 0.046
epoch: 163, batch: 500 // loss: 0.040
epoch: 163, batch: 600 // loss: 0.040
epoch: 163, batch: 700 // loss: 0.044
epoch: 163, batch: 800 // loss: 0.042
epoch: 163, batch: 900 // loss: 0.049
epoch: 163, batch: 1000 // loss: 0.046
epoch: 163, batch: 1100 // loss: 0.042
epoch: 163, batch: 1200 // loss: 0.044
epoch: 163, batch: 1300 // loss: 0.046
epoch: 163, batch: 1400 // loss: 0.043
epoch: 163, batch: 1500 // loss: 0.048
epoch: 163, batch: 1600 // loss: 0.052
epoch: 163, batch: 1700 // loss: 0.045
epoch: 163, batch: 1800 // loss: 0.053
epoch: 163, batch: 1900 // loss: 0.045
epoch: 163, batch: 2000 // loss: 0.047
epoch: 163, batch: 2100 // loss: 0.046
epoch: 163, batch: 2200 // loss: 0.050
epoch: 163, batch: 2300 // loss: 0.049
epoch: 163, batch: 2400 // loss: 0.042
epoch: 163, batch: 2500 // loss: 0.042
epoch: 163, batch: 2600 // loss: 0.045
epoch: 163, batch: 2700 // loss: 0.042
epoch: 163, batch: 2800 // loss: 0.046
epoch: 163, batch: 2900 // loss: 0.042
epoch: 163, batch: 3000 // loss: 0.045
epoch: 163, batch: 3100 // loss: 0.043
epoch: 163, batch: 3200 // loss: 0.039
epoch: 163, batch: 3300 // loss: 0.038
epoch: 163, batch: 3400 // loss: 0.044
epoch: 163, batch: 3500 // loss: 0.035
epoch: 163, batch: 3600 // loss: 0.043
epoch: 163, batch: 3700 // loss: 0.043

epoch: 164, batch: 0 // loss: 0.053
epoch: 164, batch: 100 // loss: 0.046
epoch: 164, batch: 200 // loss: 0.043
epoch: 164, batch: 300 // loss: 0.049
epoch: 164, batch: 400 // loss: 0.046
epoch: 164, batch: 500 // loss: 0.040
epoch: 164, batch: 600 // loss: 0.040
epoch: 164, batch: 700 // loss: 0.044
epoch: 164, batch: 800 // loss: 0.042
epoch: 164, batch: 900 // loss: 0.049
epoch: 164, batch: 1000 // loss: 0.046
epoch: 164, batch: 1100 // loss: 0.042
epoch: 164, batch: 1200 // loss: 0.044
epoch: 164, batch: 1300 // loss: 0.046
epoch: 164, batch: 1400 // loss: 0.043
epoch: 164, batch: 1500 // loss: 0.048
epoch: 164, batch: 1600 // loss: 0.052
epoch: 164, batch: 1700 // loss: 0.045
epoch: 164, batch: 1800 // loss: 0.053
epoch: 164, batch: 1900 // loss: 0.045
epoch: 164, batch: 2000 // loss: 0.047
epoch: 164, batch: 2100 // loss: 0.046
epoch: 164, batch: 2200 // loss: 0.050
epoch: 164, batch: 2300 // loss: 0.049
epoch: 164, batch: 2400 // loss: 0.042
epoch: 164, batch: 2500 // loss: 0.042
epoch: 164, batch: 2600 // loss: 0.045
epoch: 164, batch: 2700 // loss: 0.042
epoch: 164, batch: 2800 // loss: 0.046
epoch: 164, batch: 2900 // loss: 0.042
epoch: 164, batch: 3000 // loss: 0.045
epoch: 164, batch: 3100 // loss: 0.043
epoch: 164, batch: 3200 // loss: 0.039
epoch: 164, batch: 3300 // loss: 0.038
epoch: 164, batch: 3400 // loss: 0.044
epoch: 164, batch: 3500 // loss: 0.035
epoch: 164, batch: 3600 // loss: 0.043
epoch: 164, batch: 3700 // loss: 0.043

epoch: 165, batch: 0 // loss: 0.053
epoch: 165, batch: 100 // loss: 0.046
epoch: 165, batch: 200 // loss: 0.043
epoch: 165, batch: 300 // loss: 0.049
epoch: 165, batch: 400 // loss: 0.046
epoch: 165, batch: 500 // loss: 0.040
epoch: 165, batch: 600 // loss: 0.040
epoch: 165, batch: 700 // loss: 0.044
epoch: 165, batch: 800 // loss: 0.042
epoch: 165, batch: 900 // loss: 0.049
epoch: 165, batch: 1000 // loss: 0.046
epoch: 165, batch: 1100 // loss: 0.042
epoch: 165, batch: 1200 // loss: 0.044
epoch: 165, batch: 1300 // loss: 0.046
epoch: 165, batch: 1400 // loss: 0.043
epoch: 165, batch: 1500 // loss: 0.048
epoch: 165, batch: 1600 // loss: 0.052
epoch: 165, batch: 1700 // loss: 0.045
epoch: 165, batch: 1800 // loss: 0.053
epoch: 165, batch: 1900 // loss: 0.045
epoch: 165, batch: 2000 // loss: 0.047
epoch: 165, batch: 2100 // loss: 0.046
epoch: 165, batch: 2200 // loss: 0.050
epoch: 165, batch: 2300 // loss: 0.049
epoch: 165, batch: 2400 // loss: 0.042
epoch: 165, batch: 2500 // loss: 0.042
epoch: 165, batch: 2600 // loss: 0.045
epoch: 165, batch: 2700 // loss: 0.042
epoch: 165, batch: 2800 // loss: 0.046
epoch: 165, batch: 2900 // loss: 0.042
epoch: 165, batch: 3000 // loss: 0.045
epoch: 165, batch: 3100 // loss: 0.043
epoch: 165, batch: 3200 // loss: 0.039
epoch: 165, batch: 3300 // loss: 0.038
epoch: 165, batch: 3400 // loss: 0.044
epoch: 165, batch: 3500 // loss: 0.035
epoch: 165, batch: 3600 // loss: 0.043
epoch: 165, batch: 3700 // loss: 0.043

epoch: 166, batch: 0 // loss: 0.053
epoch: 166, batch: 100 // loss: 0.046
epoch: 166, batch: 200 // loss: 0.043
epoch: 166, batch: 300 // loss: 0.049
epoch: 166, batch: 400 // loss: 0.046
epoch: 166, batch: 500 // loss: 0.040
epoch: 166, batch: 600 // loss: 0.040
epoch: 166, batch: 700 // loss: 0.044
epoch: 166, batch: 800 // loss: 0.042
epoch: 166, batch: 900 // loss: 0.049
epoch: 166, batch: 1000 // loss: 0.046
epoch: 166, batch: 1100 // loss: 0.042
epoch: 166, batch: 1200 // loss: 0.044
epoch: 166, batch: 1300 // loss: 0.046
epoch: 166, batch: 1400 // loss: 0.043
epoch: 166, batch: 1500 // loss: 0.048
epoch: 166, batch: 1600 // loss: 0.052
epoch: 166, batch: 1700 // loss: 0.045
epoch: 166, batch: 1800 // loss: 0.053
epoch: 166, batch: 1900 // loss: 0.045
epoch: 166, batch: 2000 // loss: 0.047
epoch: 166, batch: 2100 // loss: 0.046
epoch: 166, batch: 2200 // loss: 0.050
epoch: 166, batch: 2300 // loss: 0.049
epoch: 166, batch: 2400 // loss: 0.042
epoch: 166, batch: 2500 // loss: 0.042
epoch: 166, batch: 2600 // loss: 0.045
epoch: 166, batch: 2700 // loss: 0.042
epoch: 166, batch: 2800 // loss: 0.046
epoch: 166, batch: 2900 // loss: 0.042
epoch: 166, batch: 3000 // loss: 0.045
epoch: 166, batch: 3100 // loss: 0.043
epoch: 166, batch: 3200 // loss: 0.039
epoch: 166, batch: 3300 // loss: 0.038
epoch: 166, batch: 3400 // loss: 0.044
epoch: 166, batch: 3500 // loss: 0.035
epoch: 166, batch: 3600 // loss: 0.043
epoch: 166, batch: 3700 // loss: 0.043

epoch: 167, batch: 0 // loss: 0.053
epoch: 167, batch: 100 // loss: 0.046
epoch: 167, batch: 200 // loss: 0.043
epoch: 167, batch: 300 // loss: 0.049
epoch: 167, batch: 400 // loss: 0.046
epoch: 167, batch: 500 // loss: 0.040
epoch: 167, batch: 600 // loss: 0.040
epoch: 167, batch: 700 // loss: 0.044
epoch: 167, batch: 800 // loss: 0.042
epoch: 167, batch: 900 // loss: 0.049
epoch: 167, batch: 1000 // loss: 0.046
epoch: 167, batch: 1100 // loss: 0.042
epoch: 167, batch: 1200 // loss: 0.044
epoch: 167, batch: 1300 // loss: 0.046
epoch: 167, batch: 1400 // loss: 0.043
epoch: 167, batch: 1500 // loss: 0.048
epoch: 167, batch: 1600 // loss: 0.052
epoch: 167, batch: 1700 // loss: 0.045
epoch: 167, batch: 1800 // loss: 0.053
epoch: 167, batch: 1900 // loss: 0.045
epoch: 167, batch: 2000 // loss: 0.047
epoch: 167, batch: 2100 // loss: 0.046
epoch: 167, batch: 2200 // loss: 0.050
epoch: 167, batch: 2300 // loss: 0.049
epoch: 167, batch: 2400 // loss: 0.042
epoch: 167, batch: 2500 // loss: 0.042
epoch: 167, batch: 2600 // loss: 0.045
epoch: 167, batch: 2700 // loss: 0.042
epoch: 167, batch: 2800 // loss: 0.046
epoch: 167, batch: 2900 // loss: 0.042
epoch: 167, batch: 3000 // loss: 0.045
epoch: 167, batch: 3100 // loss: 0.043
epoch: 167, batch: 3200 // loss: 0.039
epoch: 167, batch: 3300 // loss: 0.038
epoch: 167, batch: 3400 // loss: 0.044
epoch: 167, batch: 3500 // loss: 0.035
epoch: 167, batch: 3600 // loss: 0.043
epoch: 167, batch: 3700 // loss: 0.043

epoch: 168, batch: 0 // loss: 0.053
epoch: 168, batch: 100 // loss: 0.046
epoch: 168, batch: 200 // loss: 0.043
epoch: 168, batch: 300 // loss: 0.049
epoch: 168, batch: 400 // loss: 0.046
epoch: 168, batch: 500 // loss: 0.040
epoch: 168, batch: 600 // loss: 0.040
epoch: 168, batch: 700 // loss: 0.044
epoch: 168, batch: 800 // loss: 0.042
epoch: 168, batch: 900 // loss: 0.049
epoch: 168, batch: 1000 // loss: 0.046
epoch: 168, batch: 1100 // loss: 0.042
epoch: 168, batch: 1200 // loss: 0.044
epoch: 168, batch: 1300 // loss: 0.046
epoch: 168, batch: 1400 // loss: 0.043
epoch: 168, batch: 1500 // loss: 0.048
epoch: 168, batch: 1600 // loss: 0.052
epoch: 168, batch: 1700 // loss: 0.045
epoch: 168, batch: 1800 // loss: 0.053
epoch: 168, batch: 1900 // loss: 0.045
epoch: 168, batch: 2000 // loss: 0.047
epoch: 168, batch: 2100 // loss: 0.046
epoch: 168, batch: 2200 // loss: 0.050
epoch: 168, batch: 2300 // loss: 0.049
epoch: 168, batch: 2400 // loss: 0.042
epoch: 168, batch: 2500 // loss: 0.042
epoch: 168, batch: 2600 // loss: 0.045
epoch: 168, batch: 2700 // loss: 0.042
epoch: 168, batch: 2800 // loss: 0.046
epoch: 168, batch: 2900 // loss: 0.042
epoch: 168, batch: 3000 // loss: 0.045
epoch: 168, batch: 3100 // loss: 0.043
epoch: 168, batch: 3200 // loss: 0.039
epoch: 168, batch: 3300 // loss: 0.038
epoch: 168, batch: 3400 // loss: 0.044
epoch: 168, batch: 3500 // loss: 0.035
epoch: 168, batch: 3600 // loss: 0.043
epoch: 168, batch: 3700 // loss: 0.043

epoch: 169, batch: 0 // loss: 0.053
epoch: 169, batch: 100 // loss: 0.046
epoch: 169, batch: 200 // loss: 0.043
epoch: 169, batch: 300 // loss: 0.049
epoch: 169, batch: 400 // loss: 0.046
epoch: 169, batch: 500 // loss: 0.040
epoch: 169, batch: 600 // loss: 0.040
epoch: 169, batch: 700 // loss: 0.044
epoch: 169, batch: 800 // loss: 0.042
epoch: 169, batch: 900 // loss: 0.049
epoch: 169, batch: 1000 // loss: 0.046
epoch: 169, batch: 1100 // loss: 0.042
epoch: 169, batch: 1200 // loss: 0.044
epoch: 169, batch: 1300 // loss: 0.046
epoch: 169, batch: 1400 // loss: 0.043
epoch: 169, batch: 1500 // loss: 0.048
epoch: 169, batch: 1600 // loss: 0.052
epoch: 169, batch: 1700 // loss: 0.045
epoch: 169, batch: 1800 // loss: 0.053
epoch: 169, batch: 1900 // loss: 0.045
epoch: 169, batch: 2000 // loss: 0.047
epoch: 169, batch: 2100 // loss: 0.046
epoch: 169, batch: 2200 // loss: 0.050
epoch: 169, batch: 2300 // loss: 0.049
epoch: 169, batch: 2400 // loss: 0.042
epoch: 169, batch: 2500 // loss: 0.042
epoch: 169, batch: 2600 // loss: 0.045
epoch: 169, batch: 2700 // loss: 0.042
epoch: 169, batch: 2800 // loss: 0.046
epoch: 169, batch: 2900 // loss: 0.042
epoch: 169, batch: 3000 // loss: 0.045
epoch: 169, batch: 3100 // loss: 0.043
epoch: 169, batch: 3200 // loss: 0.039
epoch: 169, batch: 3300 // loss: 0.038
epoch: 169, batch: 3400 // loss: 0.044
epoch: 169, batch: 3500 // loss: 0.035
epoch: 169, batch: 3600 // loss: 0.043
epoch: 169, batch: 3700 // loss: 0.043

epoch: 170, batch: 0 // loss: 0.053
epoch: 170, batch: 100 // loss: 0.046
epoch: 170, batch: 200 // loss: 0.043
epoch: 170, batch: 300 // loss: 0.049
epoch: 170, batch: 400 // loss: 0.046
epoch: 170, batch: 500 // loss: 0.040
epoch: 170, batch: 600 // loss: 0.040
epoch: 170, batch: 700 // loss: 0.044
epoch: 170, batch: 800 // loss: 0.042
epoch: 170, batch: 900 // loss: 0.049
epoch: 170, batch: 1000 // loss: 0.046
epoch: 170, batch: 1100 // loss: 0.042
epoch: 170, batch: 1200 // loss: 0.044
epoch: 170, batch: 1300 // loss: 0.046
epoch: 170, batch: 1400 // loss: 0.043
epoch: 170, batch: 1500 // loss: 0.048
epoch: 170, batch: 1600 // loss: 0.052
epoch: 170, batch: 1700 // loss: 0.045
epoch: 170, batch: 1800 // loss: 0.053
epoch: 170, batch: 1900 // loss: 0.045
epoch: 170, batch: 2000 // loss: 0.047
epoch: 170, batch: 2100 // loss: 0.046
epoch: 170, batch: 2200 // loss: 0.050
epoch: 170, batch: 2300 // loss: 0.049
epoch: 170, batch: 2400 // loss: 0.042
epoch: 170, batch: 2500 // loss: 0.042
epoch: 170, batch: 2600 // loss: 0.045
epoch: 170, batch: 2700 // loss: 0.042
epoch: 170, batch: 2800 // loss: 0.046
epoch: 170, batch: 2900 // loss: 0.042
epoch: 170, batch: 3000 // loss: 0.045
epoch: 170, batch: 3100 // loss: 0.043
epoch: 170, batch: 3200 // loss: 0.039
epoch: 170, batch: 3300 // loss: 0.038
epoch: 170, batch: 3400 // loss: 0.044
epoch: 170, batch: 3500 // loss: 0.035
epoch: 170, batch: 3600 // loss: 0.043
epoch: 170, batch: 3700 // loss: 0.043

epoch: 171, batch: 0 // loss: 0.053
epoch: 171, batch: 100 // loss: 0.046
epoch: 171, batch: 200 // loss: 0.043
epoch: 171, batch: 300 // loss: 0.049
epoch: 171, batch: 400 // loss: 0.046
epoch: 171, batch: 500 // loss: 0.040
epoch: 171, batch: 600 // loss: 0.040
epoch: 171, batch: 700 // loss: 0.044
epoch: 171, batch: 800 // loss: 0.042
epoch: 171, batch: 900 // loss: 0.049
epoch: 171, batch: 1000 // loss: 0.046
epoch: 171, batch: 1100 // loss: 0.042
epoch: 171, batch: 1200 // loss: 0.044
epoch: 171, batch: 1300 // loss: 0.046
epoch: 171, batch: 1400 // loss: 0.043
epoch: 171, batch: 1500 // loss: 0.048
epoch: 171, batch: 1600 // loss: 0.052
epoch: 171, batch: 1700 // loss: 0.045
epoch: 171, batch: 1800 // loss: 0.053
epoch: 171, batch: 1900 // loss: 0.045
epoch: 171, batch: 2000 // loss: 0.047
epoch: 171, batch: 2100 // loss: 0.046
epoch: 171, batch: 2200 // loss: 0.050
epoch: 171, batch: 2300 // loss: 0.049
epoch: 171, batch: 2400 // loss: 0.042
epoch: 171, batch: 2500 // loss: 0.042
epoch: 171, batch: 2600 // loss: 0.045
epoch: 171, batch: 2700 // loss: 0.042
epoch: 171, batch: 2800 // loss: 0.046
epoch: 171, batch: 2900 // loss: 0.042
epoch: 171, batch: 3000 // loss: 0.045
epoch: 171, batch: 3100 // loss: 0.043
epoch: 171, batch: 3200 // loss: 0.039
epoch: 171, batch: 3300 // loss: 0.038
epoch: 171, batch: 3400 // loss: 0.044
epoch: 171, batch: 3500 // loss: 0.035
epoch: 171, batch: 3600 // loss: 0.043
epoch: 171, batch: 3700 // loss: 0.043

epoch: 172, batch: 0 // loss: 0.053
epoch: 172, batch: 100 // loss: 0.046
epoch: 172, batch: 200 // loss: 0.043
epoch: 172, batch: 300 // loss: 0.049
epoch: 172, batch: 400 // loss: 0.046
epoch: 172, batch: 500 // loss: 0.040
epoch: 172, batch: 600 // loss: 0.040
epoch: 172, batch: 700 // loss: 0.044
epoch: 172, batch: 800 // loss: 0.042
epoch: 172, batch: 900 // loss: 0.049
epoch: 172, batch: 1000 // loss: 0.046
epoch: 172, batch: 1100 // loss: 0.042
epoch: 172, batch: 1200 // loss: 0.044
epoch: 172, batch: 1300 // loss: 0.046
epoch: 172, batch: 1400 // loss: 0.043
epoch: 172, batch: 1500 // loss: 0.048
epoch: 172, batch: 1600 // loss: 0.052
epoch: 172, batch: 1700 // loss: 0.045
epoch: 172, batch: 1800 // loss: 0.053
epoch: 172, batch: 1900 // loss: 0.045
epoch: 172, batch: 2000 // loss: 0.047
epoch: 172, batch: 2100 // loss: 0.046
epoch: 172, batch: 2200 // loss: 0.050
epoch: 172, batch: 2300 // loss: 0.049
epoch: 172, batch: 2400 // loss: 0.042
epoch: 172, batch: 2500 // loss: 0.042
epoch: 172, batch: 2600 // loss: 0.045
epoch: 172, batch: 2700 // loss: 0.042
epoch: 172, batch: 2800 // loss: 0.046
epoch: 172, batch: 2900 // loss: 0.042
epoch: 172, batch: 3000 // loss: 0.045
epoch: 172, batch: 3100 // loss: 0.043
epoch: 172, batch: 3200 // loss: 0.039
epoch: 172, batch: 3300 // loss: 0.038
epoch: 172, batch: 3400 // loss: 0.044
epoch: 172, batch: 3500 // loss: 0.035
epoch: 172, batch: 3600 // loss: 0.043
epoch: 172, batch: 3700 // loss: 0.043

epoch: 173, batch: 0 // loss: 0.053
epoch: 173, batch: 100 // loss: 0.046
epoch: 173, batch: 200 // loss: 0.043
epoch: 173, batch: 300 // loss: 0.049
epoch: 173, batch: 400 // loss: 0.046
epoch: 173, batch: 500 // loss: 0.040
epoch: 173, batch: 600 // loss: 0.040
epoch: 173, batch: 700 // loss: 0.044
epoch: 173, batch: 800 // loss: 0.042
epoch: 173, batch: 900 // loss: 0.049
epoch: 173, batch: 1000 // loss: 0.046
epoch: 173, batch: 1100 // loss: 0.042
epoch: 173, batch: 1200 // loss: 0.044
epoch: 173, batch: 1300 // loss: 0.046
epoch: 173, batch: 1400 // loss: 0.043
epoch: 173, batch: 1500 // loss: 0.048
epoch: 173, batch: 1600 // loss: 0.052
epoch: 173, batch: 1700 // loss: 0.045
epoch: 173, batch: 1800 // loss: 0.053
epoch: 173, batch: 1900 // loss: 0.045
epoch: 173, batch: 2000 // loss: 0.047
epoch: 173, batch: 2100 // loss: 0.046
epoch: 173, batch: 2200 // loss: 0.050
epoch: 173, batch: 2300 // loss: 0.049
epoch: 173, batch: 2400 // loss: 0.042
epoch: 173, batch: 2500 // loss: 0.042
epoch: 173, batch: 2600 // loss: 0.045
epoch: 173, batch: 2700 // loss: 0.042
epoch: 173, batch: 2800 // loss: 0.046
epoch: 173, batch: 2900 // loss: 0.042
epoch: 173, batch: 3000 // loss: 0.045
epoch: 173, batch: 3100 // loss: 0.043
epoch: 173, batch: 3200 // loss: 0.039
epoch: 173, batch: 3300 // loss: 0.038
epoch: 173, batch: 3400 // loss: 0.044
epoch: 173, batch: 3500 // loss: 0.035
epoch: 173, batch: 3600 // loss: 0.043
epoch: 173, batch: 3700 // loss: 0.043

epoch: 174, batch: 0 // loss: 0.053
epoch: 174, batch: 100 // loss: 0.046
epoch: 174, batch: 200 // loss: 0.043
epoch: 174, batch: 300 // loss: 0.049
epoch: 174, batch: 400 // loss: 0.046
epoch: 174, batch: 500 // loss: 0.040
epoch: 174, batch: 600 // loss: 0.040
epoch: 174, batch: 700 // loss: 0.044
epoch: 174, batch: 800 // loss: 0.042
epoch: 174, batch: 900 // loss: 0.049
epoch: 174, batch: 1000 // loss: 0.046
epoch: 174, batch: 1100 // loss: 0.042
epoch: 174, batch: 1200 // loss: 0.044
epoch: 174, batch: 1300 // loss: 0.046
epoch: 174, batch: 1400 // loss: 0.043
epoch: 174, batch: 1500 // loss: 0.048
epoch: 174, batch: 1600 // loss: 0.052
epoch: 174, batch: 1700 // loss: 0.045
epoch: 174, batch: 1800 // loss: 0.053
epoch: 174, batch: 1900 // loss: 0.045
epoch: 174, batch: 2000 // loss: 0.047
epoch: 174, batch: 2100 // loss: 0.046
epoch: 174, batch: 2200 // loss: 0.050
epoch: 174, batch: 2300 // loss: 0.049
epoch: 174, batch: 2400 // loss: 0.042
epoch: 174, batch: 2500 // loss: 0.042
epoch: 174, batch: 2600 // loss: 0.045
epoch: 174, batch: 2700 // loss: 0.042
epoch: 174, batch: 2800 // loss: 0.046
epoch: 174, batch: 2900 // loss: 0.042
epoch: 174, batch: 3000 // loss: 0.045
epoch: 174, batch: 3100 // loss: 0.043
epoch: 174, batch: 3200 // loss: 0.039
epoch: 174, batch: 3300 // loss: 0.038
epoch: 174, batch: 3400 // loss: 0.044
epoch: 174, batch: 3500 // loss: 0.035
epoch: 174, batch: 3600 // loss: 0.043
epoch: 174, batch: 3700 // loss: 0.043

epoch: 175, batch: 0 // loss: 0.053
epoch: 175, batch: 100 // loss: 0.046
epoch: 175, batch: 200 // loss: 0.043
epoch: 175, batch: 300 // loss: 0.049
epoch: 175, batch: 400 // loss: 0.046
epoch: 175, batch: 500 // loss: 0.040
epoch: 175, batch: 600 // loss: 0.040
epoch: 175, batch: 700 // loss: 0.044
epoch: 175, batch: 800 // loss: 0.042
epoch: 175, batch: 900 // loss: 0.049
epoch: 175, batch: 1000 // loss: 0.046
epoch: 175, batch: 1100 // loss: 0.042
epoch: 175, batch: 1200 // loss: 0.044
epoch: 175, batch: 1300 // loss: 0.046
epoch: 175, batch: 1400 // loss: 0.043
epoch: 175, batch: 1500 // loss: 0.048
epoch: 175, batch: 1600 // loss: 0.052
epoch: 175, batch: 1700 // loss: 0.045
epoch: 175, batch: 1800 // loss: 0.053
epoch: 175, batch: 1900 // loss: 0.045
epoch: 175, batch: 2000 // loss: 0.047
epoch: 175, batch: 2100 // loss: 0.046
epoch: 175, batch: 2200 // loss: 0.050
epoch: 175, batch: 2300 // loss: 0.049
epoch: 175, batch: 2400 // loss: 0.042
epoch: 175, batch: 2500 // loss: 0.042
epoch: 175, batch: 2600 // loss: 0.045
epoch: 175, batch: 2700 // loss: 0.042
epoch: 175, batch: 2800 // loss: 0.046
epoch: 175, batch: 2900 // loss: 0.042
epoch: 175, batch: 3000 // loss: 0.045
epoch: 175, batch: 3100 // loss: 0.043
epoch: 175, batch: 3200 // loss: 0.039
epoch: 175, batch: 3300 // loss: 0.038
epoch: 175, batch: 3400 // loss: 0.044
epoch: 175, batch: 3500 // loss: 0.035
epoch: 175, batch: 3600 // loss: 0.043
epoch: 175, batch: 3700 // loss: 0.043

epoch: 176, batch: 0 // loss: 0.053
epoch: 176, batch: 100 // loss: 0.046
epoch: 176, batch: 200 // loss: 0.043
epoch: 176, batch: 300 // loss: 0.049
epoch: 176, batch: 400 // loss: 0.046
epoch: 176, batch: 500 // loss: 0.040
epoch: 176, batch: 600 // loss: 0.040
epoch: 176, batch: 700 // loss: 0.044
epoch: 176, batch: 800 // loss: 0.042
epoch: 176, batch: 900 // loss: 0.049
epoch: 176, batch: 1000 // loss: 0.046
epoch: 176, batch: 1100 // loss: 0.042
epoch: 176, batch: 1200 // loss: 0.044
epoch: 176, batch: 1300 // loss: 0.046
epoch: 176, batch: 1400 // loss: 0.043
epoch: 176, batch: 1500 // loss: 0.048
epoch: 176, batch: 1600 // loss: 0.052
epoch: 176, batch: 1700 // loss: 0.045
epoch: 176, batch: 1800 // loss: 0.053
epoch: 176, batch: 1900 // loss: 0.045
epoch: 176, batch: 2000 // loss: 0.047
epoch: 176, batch: 2100 // loss: 0.046
epoch: 176, batch: 2200 // loss: 0.050
epoch: 176, batch: 2300 // loss: 0.049
epoch: 176, batch: 2400 // loss: 0.042
epoch: 176, batch: 2500 // loss: 0.042
epoch: 176, batch: 2600 // loss: 0.045
epoch: 176, batch: 2700 // loss: 0.042
epoch: 176, batch: 2800 // loss: 0.046
epoch: 176, batch: 2900 // loss: 0.042
epoch: 176, batch: 3000 // loss: 0.045
epoch: 176, batch: 3100 // loss: 0.043
epoch: 176, batch: 3200 // loss: 0.039
epoch: 176, batch: 3300 // loss: 0.038
epoch: 176, batch: 3400 // loss: 0.044
epoch: 176, batch: 3500 // loss: 0.035
epoch: 176, batch: 3600 // loss: 0.043
epoch: 176, batch: 3700 // loss: 0.043

epoch: 177, batch: 0 // loss: 0.053
epoch: 177, batch: 100 // loss: 0.046
epoch: 177, batch: 200 // loss: 0.043
epoch: 177, batch: 300 // loss: 0.049
epoch: 177, batch: 400 // loss: 0.046
epoch: 177, batch: 500 // loss: 0.040
epoch: 177, batch: 600 // loss: 0.040
epoch: 177, batch: 700 // loss: 0.044
epoch: 177, batch: 800 // loss: 0.042
epoch: 177, batch: 900 // loss: 0.049
epoch: 177, batch: 1000 // loss: 0.046
epoch: 177, batch: 1100 // loss: 0.042
epoch: 177, batch: 1200 // loss: 0.044
epoch: 177, batch: 1300 // loss: 0.046
epoch: 177, batch: 1400 // loss: 0.043
epoch: 177, batch: 1500 // loss: 0.048
epoch: 177, batch: 1600 // loss: 0.052
epoch: 177, batch: 1700 // loss: 0.045
epoch: 177, batch: 1800 // loss: 0.053
epoch: 177, batch: 1900 // loss: 0.045
epoch: 177, batch: 2000 // loss: 0.047
epoch: 177, batch: 2100 // loss: 0.046
epoch: 177, batch: 2200 // loss: 0.050
epoch: 177, batch: 2300 // loss: 0.049
epoch: 177, batch: 2400 // loss: 0.042
epoch: 177, batch: 2500 // loss: 0.042
epoch: 177, batch: 2600 // loss: 0.045
epoch: 177, batch: 2700 // loss: 0.042
epoch: 177, batch: 2800 // loss: 0.046
epoch: 177, batch: 2900 // loss: 0.042
epoch: 177, batch: 3000 // loss: 0.045
epoch: 177, batch: 3100 // loss: 0.043
epoch: 177, batch: 3200 // loss: 0.039
epoch: 177, batch: 3300 // loss: 0.038
epoch: 177, batch: 3400 // loss: 0.044
epoch: 177, batch: 3500 // loss: 0.035
epoch: 177, batch: 3600 // loss: 0.043
epoch: 177, batch: 3700 // loss: 0.043

epoch: 178, batch: 0 // loss: 0.053
epoch: 178, batch: 100 // loss: 0.046
epoch: 178, batch: 200 // loss: 0.043
epoch: 178, batch: 300 // loss: 0.049
epoch: 178, batch: 400 // loss: 0.046
epoch: 178, batch: 500 // loss: 0.040
epoch: 178, batch: 600 // loss: 0.040
epoch: 178, batch: 700 // loss: 0.044
epoch: 178, batch: 800 // loss: 0.042
epoch: 178, batch: 900 // loss: 0.049
epoch: 178, batch: 1000 // loss: 0.046
epoch: 178, batch: 1100 // loss: 0.042
epoch: 178, batch: 1200 // loss: 0.044
epoch: 178, batch: 1300 // loss: 0.046
epoch: 178, batch: 1400 // loss: 0.043
epoch: 178, batch: 1500 // loss: 0.048
epoch: 178, batch: 1600 // loss: 0.052
epoch: 178, batch: 1700 // loss: 0.045
epoch: 178, batch: 1800 // loss: 0.053
epoch: 178, batch: 1900 // loss: 0.045
epoch: 178, batch: 2000 // loss: 0.047
epoch: 178, batch: 2100 // loss: 0.046
epoch: 178, batch: 2200 // loss: 0.050
epoch: 178, batch: 2300 // loss: 0.049
epoch: 178, batch: 2400 // loss: 0.042
epoch: 178, batch: 2500 // loss: 0.042
epoch: 178, batch: 2600 // loss: 0.045
epoch: 178, batch: 2700 // loss: 0.042
epoch: 178, batch: 2800 // loss: 0.046
epoch: 178, batch: 2900 // loss: 0.042
epoch: 178, batch: 3000 // loss: 0.045
epoch: 178, batch: 3100 // loss: 0.043
epoch: 178, batch: 3200 // loss: 0.039
epoch: 178, batch: 3300 // loss: 0.038
epoch: 178, batch: 3400 // loss: 0.044
epoch: 178, batch: 3500 // loss: 0.035
epoch: 178, batch: 3600 // loss: 0.043
epoch: 178, batch: 3700 // loss: 0.043

epoch: 179, batch: 0 // loss: 0.053
epoch: 179, batch: 100 // loss: 0.046
epoch: 179, batch: 200 // loss: 0.043
epoch: 179, batch: 300 // loss: 0.049
epoch: 179, batch: 400 // loss: 0.046
epoch: 179, batch: 500 // loss: 0.040
epoch: 179, batch: 600 // loss: 0.040
epoch: 179, batch: 700 // loss: 0.044
epoch: 179, batch: 800 // loss: 0.042
epoch: 179, batch: 900 // loss: 0.049
epoch: 179, batch: 1000 // loss: 0.046
epoch: 179, batch: 1100 // loss: 0.042
epoch: 179, batch: 1200 // loss: 0.044
epoch: 179, batch: 1300 // loss: 0.046
epoch: 179, batch: 1400 // loss: 0.043
epoch: 179, batch: 1500 // loss: 0.048
epoch: 179, batch: 1600 // loss: 0.052
epoch: 179, batch: 1700 // loss: 0.045
epoch: 179, batch: 1800 // loss: 0.053
epoch: 179, batch: 1900 // loss: 0.045
epoch: 179, batch: 2000 // loss: 0.047
epoch: 179, batch: 2100 // loss: 0.046
epoch: 179, batch: 2200 // loss: 0.050
epoch: 179, batch: 2300 // loss: 0.049
epoch: 179, batch: 2400 // loss: 0.042
epoch: 179, batch: 2500 // loss: 0.042
epoch: 179, batch: 2600 // loss: 0.045
epoch: 179, batch: 2700 // loss: 0.042
epoch: 179, batch: 2800 // loss: 0.046
epoch: 179, batch: 2900 // loss: 0.042
epoch: 179, batch: 3000 // loss: 0.045
epoch: 179, batch: 3100 // loss: 0.043
epoch: 179, batch: 3200 // loss: 0.039
epoch: 179, batch: 3300 // loss: 0.038
epoch: 179, batch: 3400 // loss: 0.044
epoch: 179, batch: 3500 // loss: 0.035
epoch: 179, batch: 3600 // loss: 0.043
epoch: 179, batch: 3700 // loss: 0.043

epoch: 180, batch: 0 // loss: 0.053
epoch: 180, batch: 100 // loss: 0.046
epoch: 180, batch: 200 // loss: 0.043
epoch: 180, batch: 300 // loss: 0.049
epoch: 180, batch: 400 // loss: 0.046
epoch: 180, batch: 500 // loss: 0.040
epoch: 180, batch: 600 // loss: 0.040
epoch: 180, batch: 700 // loss: 0.044
epoch: 180, batch: 800 // loss: 0.042
epoch: 180, batch: 900 // loss: 0.049
epoch: 180, batch: 1000 // loss: 0.046
epoch: 180, batch: 1100 // loss: 0.042
epoch: 180, batch: 1200 // loss: 0.044
epoch: 180, batch: 1300 // loss: 0.046
epoch: 180, batch: 1400 // loss: 0.043
epoch: 180, batch: 1500 // loss: 0.048
epoch: 180, batch: 1600 // loss: 0.052
epoch: 180, batch: 1700 // loss: 0.045
epoch: 180, batch: 1800 // loss: 0.053
epoch: 180, batch: 1900 // loss: 0.045
epoch: 180, batch: 2000 // loss: 0.047
epoch: 180, batch: 2100 // loss: 0.046
epoch: 180, batch: 2200 // loss: 0.050
epoch: 180, batch: 2300 // loss: 0.049
epoch: 180, batch: 2400 // loss: 0.042
epoch: 180, batch: 2500 // loss: 0.042
epoch: 180, batch: 2600 // loss: 0.045
epoch: 180, batch: 2700 // loss: 0.042
epoch: 180, batch: 2800 // loss: 0.046
epoch: 180, batch: 2900 // loss: 0.042
epoch: 180, batch: 3000 // loss: 0.045
epoch: 180, batch: 3100 // loss: 0.043
epoch: 180, batch: 3200 // loss: 0.039
epoch: 180, batch: 3300 // loss: 0.038
epoch: 180, batch: 3400 // loss: 0.044
epoch: 180, batch: 3500 // loss: 0.035
epoch: 180, batch: 3600 // loss: 0.043
epoch: 180, batch: 3700 // loss: 0.043

epoch: 181, batch: 0 // loss: 0.053
epoch: 181, batch: 100 // loss: 0.046
epoch: 181, batch: 200 // loss: 0.043
epoch: 181, batch: 300 // loss: 0.049
epoch: 181, batch: 400 // loss: 0.046
epoch: 181, batch: 500 // loss: 0.040
epoch: 181, batch: 600 // loss: 0.040
epoch: 181, batch: 700 // loss: 0.044
epoch: 181, batch: 800 // loss: 0.042
epoch: 181, batch: 900 // loss: 0.049
epoch: 181, batch: 1000 // loss: 0.046
epoch: 181, batch: 1100 // loss: 0.042
epoch: 181, batch: 1200 // loss: 0.044
epoch: 181, batch: 1300 // loss: 0.046
epoch: 181, batch: 1400 // loss: 0.043
epoch: 181, batch: 1500 // loss: 0.048
epoch: 181, batch: 1600 // loss: 0.052
epoch: 181, batch: 1700 // loss: 0.045
epoch: 181, batch: 1800 // loss: 0.053
epoch: 181, batch: 1900 // loss: 0.045
epoch: 181, batch: 2000 // loss: 0.047
epoch: 181, batch: 2100 // loss: 0.046
epoch: 181, batch: 2200 // loss: 0.050
epoch: 181, batch: 2300 // loss: 0.049
epoch: 181, batch: 2400 // loss: 0.042
epoch: 181, batch: 2500 // loss: 0.042
epoch: 181, batch: 2600 // loss: 0.045
epoch: 181, batch: 2700 // loss: 0.042
epoch: 181, batch: 2800 // loss: 0.046
epoch: 181, batch: 2900 // loss: 0.042
epoch: 181, batch: 3000 // loss: 0.045
epoch: 181, batch: 3100 // loss: 0.043
epoch: 181, batch: 3200 // loss: 0.039
epoch: 181, batch: 3300 // loss: 0.038
epoch: 181, batch: 3400 // loss: 0.044
epoch: 181, batch: 3500 // loss: 0.035
epoch: 181, batch: 3600 // loss: 0.043
epoch: 181, batch: 3700 // loss: 0.043

epoch: 182, batch: 0 // loss: 0.053
epoch: 182, batch: 100 // loss: 0.046
epoch: 182, batch: 200 // loss: 0.043
epoch: 182, batch: 300 // loss: 0.049
epoch: 182, batch: 400 // loss: 0.046
epoch: 182, batch: 500 // loss: 0.040
epoch: 182, batch: 600 // loss: 0.040
epoch: 182, batch: 700 // loss: 0.044
epoch: 182, batch: 800 // loss: 0.042
epoch: 182, batch: 900 // loss: 0.049
epoch: 182, batch: 1000 // loss: 0.046
epoch: 182, batch: 1100 // loss: 0.042
epoch: 182, batch: 1200 // loss: 0.044
epoch: 182, batch: 1300 // loss: 0.046
epoch: 182, batch: 1400 // loss: 0.043
epoch: 182, batch: 1500 // loss: 0.048
epoch: 182, batch: 1600 // loss: 0.052
epoch: 182, batch: 1700 // loss: 0.045
epoch: 182, batch: 1800 // loss: 0.053
epoch: 182, batch: 1900 // loss: 0.045
epoch: 182, batch: 2000 // loss: 0.047
epoch: 182, batch: 2100 // loss: 0.046
epoch: 182, batch: 2200 // loss: 0.050
epoch: 182, batch: 2300 // loss: 0.049
epoch: 182, batch: 2400 // loss: 0.042
epoch: 182, batch: 2500 // loss: 0.042
epoch: 182, batch: 2600 // loss: 0.045
epoch: 182, batch: 2700 // loss: 0.042
epoch: 182, batch: 2800 // loss: 0.046
epoch: 182, batch: 2900 // loss: 0.042
epoch: 182, batch: 3000 // loss: 0.045
epoch: 182, batch: 3100 // loss: 0.043
epoch: 182, batch: 3200 // loss: 0.039
epoch: 182, batch: 3300 // loss: 0.038
epoch: 182, batch: 3400 // loss: 0.044
epoch: 182, batch: 3500 // loss: 0.035
epoch: 182, batch: 3600 // loss: 0.043
epoch: 182, batch: 3700 // loss: 0.043

epoch: 183, batch: 0 // loss: 0.053
epoch: 183, batch: 100 // loss: 0.046
epoch: 183, batch: 200 // loss: 0.043
epoch: 183, batch: 300 // loss: 0.049
epoch: 183, batch: 400 // loss: 0.046
epoch: 183, batch: 500 // loss: 0.040
epoch: 183, batch: 600 // loss: 0.040
epoch: 183, batch: 700 // loss: 0.044
epoch: 183, batch: 800 // loss: 0.042
epoch: 183, batch: 900 // loss: 0.049
epoch: 183, batch: 1000 // loss: 0.046
epoch: 183, batch: 1100 // loss: 0.042
epoch: 183, batch: 1200 // loss: 0.044
epoch: 183, batch: 1300 // loss: 0.046
epoch: 183, batch: 1400 // loss: 0.043
epoch: 183, batch: 1500 // loss: 0.048
epoch: 183, batch: 1600 // loss: 0.052
epoch: 183, batch: 1700 // loss: 0.045
epoch: 183, batch: 1800 // loss: 0.053
epoch: 183, batch: 1900 // loss: 0.045
epoch: 183, batch: 2000 // loss: 0.047
epoch: 183, batch: 2100 // loss: 0.046
epoch: 183, batch: 2200 // loss: 0.050
epoch: 183, batch: 2300 // loss: 0.049
epoch: 183, batch: 2400 // loss: 0.042
epoch: 183, batch: 2500 // loss: 0.042
epoch: 183, batch: 2600 // loss: 0.045
epoch: 183, batch: 2700 // loss: 0.042
epoch: 183, batch: 2800 // loss: 0.046
epoch: 183, batch: 2900 // loss: 0.042
epoch: 183, batch: 3000 // loss: 0.045
epoch: 183, batch: 3100 // loss: 0.043
epoch: 183, batch: 3200 // loss: 0.039
epoch: 183, batch: 3300 // loss: 0.038
epoch: 183, batch: 3400 // loss: 0.044
epoch: 183, batch: 3500 // loss: 0.035
epoch: 183, batch: 3600 // loss: 0.043
epoch: 183, batch: 3700 // loss: 0.043

epoch: 184, batch: 0 // loss: 0.053
epoch: 184, batch: 100 // loss: 0.046
epoch: 184, batch: 200 // loss: 0.043
epoch: 184, batch: 300 // loss: 0.049
epoch: 184, batch: 400 // loss: 0.046
epoch: 184, batch: 500 // loss: 0.040
epoch: 184, batch: 600 // loss: 0.040
epoch: 184, batch: 700 // loss: 0.044
epoch: 184, batch: 800 // loss: 0.042
epoch: 184, batch: 900 // loss: 0.049
epoch: 184, batch: 1000 // loss: 0.046
epoch: 184, batch: 1100 // loss: 0.042
epoch: 184, batch: 1200 // loss: 0.044
epoch: 184, batch: 1300 // loss: 0.046
epoch: 184, batch: 1400 // loss: 0.043
epoch: 184, batch: 1500 // loss: 0.048
epoch: 184, batch: 1600 // loss: 0.052
epoch: 184, batch: 1700 // loss: 0.045
epoch: 184, batch: 1800 // loss: 0.053
epoch: 184, batch: 1900 // loss: 0.045
epoch: 184, batch: 2000 // loss: 0.047
epoch: 184, batch: 2100 // loss: 0.046
epoch: 184, batch: 2200 // loss: 0.050
epoch: 184, batch: 2300 // loss: 0.049
epoch: 184, batch: 2400 // loss: 0.042
epoch: 184, batch: 2500 // loss: 0.042
epoch: 184, batch: 2600 // loss: 0.045
epoch: 184, batch: 2700 // loss: 0.042
epoch: 184, batch: 2800 // loss: 0.046
epoch: 184, batch: 2900 // loss: 0.042
epoch: 184, batch: 3000 // loss: 0.045
epoch: 184, batch: 3100 // loss: 0.043
epoch: 184, batch: 3200 // loss: 0.039
epoch: 184, batch: 3300 // loss: 0.038
epoch: 184, batch: 3400 // loss: 0.044
epoch: 184, batch: 3500 // loss: 0.035
epoch: 184, batch: 3600 // loss: 0.043
epoch: 184, batch: 3700 // loss: 0.043

epoch: 185, batch: 0 // loss: 0.053
epoch: 185, batch: 100 // loss: 0.046
epoch: 185, batch: 200 // loss: 0.043
epoch: 185, batch: 300 // loss: 0.049
epoch: 185, batch: 400 // loss: 0.046
epoch: 185, batch: 500 // loss: 0.040
epoch: 185, batch: 600 // loss: 0.040
epoch: 185, batch: 700 // loss: 0.044
epoch: 185, batch: 800 // loss: 0.042
epoch: 185, batch: 900 // loss: 0.049
epoch: 185, batch: 1000 // loss: 0.046
epoch: 185, batch: 1100 // loss: 0.042
epoch: 185, batch: 1200 // loss: 0.044
epoch: 185, batch: 1300 // loss: 0.046
epoch: 185, batch: 1400 // loss: 0.043
epoch: 185, batch: 1500 // loss: 0.048
epoch: 185, batch: 1600 // loss: 0.052
epoch: 185, batch: 1700 // loss: 0.045
epoch: 185, batch: 1800 // loss: 0.053
epoch: 185, batch: 1900 // loss: 0.045
epoch: 185, batch: 2000 // loss: 0.047
epoch: 185, batch: 2100 // loss: 0.046
epoch: 185, batch: 2200 // loss: 0.050
epoch: 185, batch: 2300 // loss: 0.049
epoch: 185, batch: 2400 // loss: 0.042
epoch: 185, batch: 2500 // loss: 0.042
epoch: 185, batch: 2600 // loss: 0.045
epoch: 185, batch: 2700 // loss: 0.042
epoch: 185, batch: 2800 // loss: 0.046
epoch: 185, batch: 2900 // loss: 0.042
epoch: 185, batch: 3000 // loss: 0.045
epoch: 185, batch: 3100 // loss: 0.043
epoch: 185, batch: 3200 // loss: 0.039
epoch: 185, batch: 3300 // loss: 0.038
epoch: 185, batch: 3400 // loss: 0.044
epoch: 185, batch: 3500 // loss: 0.035
epoch: 185, batch: 3600 // loss: 0.043
epoch: 185, batch: 3700 // loss: 0.043

epoch: 186, batch: 0 // loss: 0.053
epoch: 186, batch: 100 // loss: 0.046
epoch: 186, batch: 200 // loss: 0.043
epoch: 186, batch: 300 // loss: 0.049
epoch: 186, batch: 400 // loss: 0.046
epoch: 186, batch: 500 // loss: 0.040
epoch: 186, batch: 600 // loss: 0.040
epoch: 186, batch: 700 // loss: 0.044
epoch: 186, batch: 800 // loss: 0.042
epoch: 186, batch: 900 // loss: 0.049
epoch: 186, batch: 1000 // loss: 0.046
epoch: 186, batch: 1100 // loss: 0.042
epoch: 186, batch: 1200 // loss: 0.044
epoch: 186, batch: 1300 // loss: 0.046
epoch: 186, batch: 1400 // loss: 0.043
epoch: 186, batch: 1500 // loss: 0.048
epoch: 186, batch: 1600 // loss: 0.052
epoch: 186, batch: 1700 // loss: 0.045
epoch: 186, batch: 1800 // loss: 0.053
epoch: 186, batch: 1900 // loss: 0.045
epoch: 186, batch: 2000 // loss: 0.047
epoch: 186, batch: 2100 // loss: 0.046
epoch: 186, batch: 2200 // loss: 0.050
epoch: 186, batch: 2300 // loss: 0.049
epoch: 186, batch: 2400 // loss: 0.042
epoch: 186, batch: 2500 // loss: 0.042
epoch: 186, batch: 2600 // loss: 0.045
epoch: 186, batch: 2700 // loss: 0.042
epoch: 186, batch: 2800 // loss: 0.046
epoch: 186, batch: 2900 // loss: 0.042
epoch: 186, batch: 3000 // loss: 0.045
epoch: 186, batch: 3100 // loss: 0.043
epoch: 186, batch: 3200 // loss: 0.039
epoch: 186, batch: 3300 // loss: 0.038
epoch: 186, batch: 3400 // loss: 0.044
epoch: 186, batch: 3500 // loss: 0.035
epoch: 186, batch: 3600 // loss: 0.043
epoch: 186, batch: 3700 // loss: 0.043

epoch: 187, batch: 0 // loss: 0.053
epoch: 187, batch: 100 // loss: 0.046
epoch: 187, batch: 200 // loss: 0.043
epoch: 187, batch: 300 // loss: 0.049
epoch: 187, batch: 400 // loss: 0.046
epoch: 187, batch: 500 // loss: 0.040
epoch: 187, batch: 600 // loss: 0.040
epoch: 187, batch: 700 // loss: 0.044
epoch: 187, batch: 800 // loss: 0.042
epoch: 187, batch: 900 // loss: 0.049
epoch: 187, batch: 1000 // loss: 0.046
epoch: 187, batch: 1100 // loss: 0.042
epoch: 187, batch: 1200 // loss: 0.044
epoch: 187, batch: 1300 // loss: 0.046
epoch: 187, batch: 1400 // loss: 0.043
epoch: 187, batch: 1500 // loss: 0.048
epoch: 187, batch: 1600 // loss: 0.052
epoch: 187, batch: 1700 // loss: 0.045
epoch: 187, batch: 1800 // loss: 0.053
epoch: 187, batch: 1900 // loss: 0.045
epoch: 187, batch: 2000 // loss: 0.047
epoch: 187, batch: 2100 // loss: 0.046
epoch: 187, batch: 2200 // loss: 0.050
epoch: 187, batch: 2300 // loss: 0.049
epoch: 187, batch: 2400 // loss: 0.042
epoch: 187, batch: 2500 // loss: 0.042
epoch: 187, batch: 2600 // loss: 0.045
epoch: 187, batch: 2700 // loss: 0.042
epoch: 187, batch: 2800 // loss: 0.046
epoch: 187, batch: 2900 // loss: 0.042
epoch: 187, batch: 3000 // loss: 0.045
epoch: 187, batch: 3100 // loss: 0.043
epoch: 187, batch: 3200 // loss: 0.039
epoch: 187, batch: 3300 // loss: 0.038
epoch: 187, batch: 3400 // loss: 0.044
epoch: 187, batch: 3500 // loss: 0.035
epoch: 187, batch: 3600 // loss: 0.043
epoch: 187, batch: 3700 // loss: 0.043

epoch: 188, batch: 0 // loss: 0.053
epoch: 188, batch: 100 // loss: 0.046
epoch: 188, batch: 200 // loss: 0.043
epoch: 188, batch: 300 // loss: 0.049
epoch: 188, batch: 400 // loss: 0.046
epoch: 188, batch: 500 // loss: 0.040
epoch: 188, batch: 600 // loss: 0.040
epoch: 188, batch: 700 // loss: 0.044
epoch: 188, batch: 800 // loss: 0.042
epoch: 188, batch: 900 // loss: 0.049
epoch: 188, batch: 1000 // loss: 0.046
epoch: 188, batch: 1100 // loss: 0.042
epoch: 188, batch: 1200 // loss: 0.044
epoch: 188, batch: 1300 // loss: 0.046
epoch: 188, batch: 1400 // loss: 0.043
epoch: 188, batch: 1500 // loss: 0.048
epoch: 188, batch: 1600 // loss: 0.052
epoch: 188, batch: 1700 // loss: 0.045
epoch: 188, batch: 1800 // loss: 0.053
epoch: 188, batch: 1900 // loss: 0.045
epoch: 188, batch: 2000 // loss: 0.047
epoch: 188, batch: 2100 // loss: 0.046
epoch: 188, batch: 2200 // loss: 0.050
epoch: 188, batch: 2300 // loss: 0.049
epoch: 188, batch: 2400 // loss: 0.042
epoch: 188, batch: 2500 // loss: 0.042
epoch: 188, batch: 2600 // loss: 0.045
epoch: 188, batch: 2700 // loss: 0.042
epoch: 188, batch: 2800 // loss: 0.046
epoch: 188, batch: 2900 // loss: 0.042
epoch: 188, batch: 3000 // loss: 0.045
epoch: 188, batch: 3100 // loss: 0.043
epoch: 188, batch: 3200 // loss: 0.039
epoch: 188, batch: 3300 // loss: 0.038
epoch: 188, batch: 3400 // loss: 0.044
epoch: 188, batch: 3500 // loss: 0.035
epoch: 188, batch: 3600 // loss: 0.043
epoch: 188, batch: 3700 // loss: 0.043

epoch: 189, batch: 0 // loss: 0.053
epoch: 189, batch: 100 // loss: 0.046
epoch: 189, batch: 200 // loss: 0.043
epoch: 189, batch: 300 // loss: 0.049
epoch: 189, batch: 400 // loss: 0.046
epoch: 189, batch: 500 // loss: 0.040
epoch: 189, batch: 600 // loss: 0.040
epoch: 189, batch: 700 // loss: 0.044
epoch: 189, batch: 800 // loss: 0.042
epoch: 189, batch: 900 // loss: 0.049
epoch: 189, batch: 1000 // loss: 0.046
epoch: 189, batch: 1100 // loss: 0.042
epoch: 189, batch: 1200 // loss: 0.044
epoch: 189, batch: 1300 // loss: 0.046
epoch: 189, batch: 1400 // loss: 0.043
epoch: 189, batch: 1500 // loss: 0.048
epoch: 189, batch: 1600 // loss: 0.052
epoch: 189, batch: 1700 // loss: 0.045
epoch: 189, batch: 1800 // loss: 0.053
epoch: 189, batch: 1900 // loss: 0.045
epoch: 189, batch: 2000 // loss: 0.047
epoch: 189, batch: 2100 // loss: 0.046
epoch: 189, batch: 2200 // loss: 0.050
epoch: 189, batch: 2300 // loss: 0.049
epoch: 189, batch: 2400 // loss: 0.042
epoch: 189, batch: 2500 // loss: 0.042
epoch: 189, batch: 2600 // loss: 0.045
epoch: 189, batch: 2700 // loss: 0.042
epoch: 189, batch: 2800 // loss: 0.046
epoch: 189, batch: 2900 // loss: 0.042
epoch: 189, batch: 3000 // loss: 0.045
epoch: 189, batch: 3100 // loss: 0.043
epoch: 189, batch: 3200 // loss: 0.039
epoch: 189, batch: 3300 // loss: 0.038
epoch: 189, batch: 3400 // loss: 0.044
epoch: 189, batch: 3500 // loss: 0.035
epoch: 189, batch: 3600 // loss: 0.043
epoch: 189, batch: 3700 // loss: 0.043

epoch: 190, batch: 0 // loss: 0.053
epoch: 190, batch: 100 // loss: 0.046
epoch: 190, batch: 200 // loss: 0.043
epoch: 190, batch: 300 // loss: 0.049
epoch: 190, batch: 400 // loss: 0.046
epoch: 190, batch: 500 // loss: 0.040
epoch: 190, batch: 600 // loss: 0.040
epoch: 190, batch: 700 // loss: 0.044
epoch: 190, batch: 800 // loss: 0.042
epoch: 190, batch: 900 // loss: 0.049
epoch: 190, batch: 1000 // loss: 0.046
epoch: 190, batch: 1100 // loss: 0.042
epoch: 190, batch: 1200 // loss: 0.044
epoch: 190, batch: 1300 // loss: 0.046
epoch: 190, batch: 1400 // loss: 0.043
epoch: 190, batch: 1500 // loss: 0.048
epoch: 190, batch: 1600 // loss: 0.052
epoch: 190, batch: 1700 // loss: 0.045
epoch: 190, batch: 1800 // loss: 0.053
epoch: 190, batch: 1900 // loss: 0.045
epoch: 190, batch: 2000 // loss: 0.047
epoch: 190, batch: 2100 // loss: 0.046
epoch: 190, batch: 2200 // loss: 0.050
epoch: 190, batch: 2300 // loss: 0.049
epoch: 190, batch: 2400 // loss: 0.042
epoch: 190, batch: 2500 // loss: 0.042
epoch: 190, batch: 2600 // loss: 0.045
epoch: 190, batch: 2700 // loss: 0.042
epoch: 190, batch: 2800 // loss: 0.046
epoch: 190, batch: 2900 // loss: 0.042
epoch: 190, batch: 3000 // loss: 0.045
epoch: 190, batch: 3100 // loss: 0.043
epoch: 190, batch: 3200 // loss: 0.039
epoch: 190, batch: 3300 // loss: 0.038
epoch: 190, batch: 3400 // loss: 0.044
epoch: 190, batch: 3500 // loss: 0.035
epoch: 190, batch: 3600 // loss: 0.043
epoch: 190, batch: 3700 // loss: 0.043

epoch: 191, batch: 0 // loss: 0.053
epoch: 191, batch: 100 // loss: 0.046
epoch: 191, batch: 200 // loss: 0.043
epoch: 191, batch: 300 // loss: 0.049
epoch: 191, batch: 400 // loss: 0.046
epoch: 191, batch: 500 // loss: 0.040
epoch: 191, batch: 600 // loss: 0.040
epoch: 191, batch: 700 // loss: 0.044
epoch: 191, batch: 800 // loss: 0.042
epoch: 191, batch: 900 // loss: 0.049
epoch: 191, batch: 1000 // loss: 0.046
epoch: 191, batch: 1100 // loss: 0.042
epoch: 191, batch: 1200 // loss: 0.044
epoch: 191, batch: 1300 // loss: 0.046
epoch: 191, batch: 1400 // loss: 0.043
epoch: 191, batch: 1500 // loss: 0.048
epoch: 191, batch: 1600 // loss: 0.052
epoch: 191, batch: 1700 // loss: 0.045
epoch: 191, batch: 1800 // loss: 0.053
epoch: 191, batch: 1900 // loss: 0.045
epoch: 191, batch: 2000 // loss: 0.047
epoch: 191, batch: 2100 // loss: 0.046
epoch: 191, batch: 2200 // loss: 0.050
epoch: 191, batch: 2300 // loss: 0.049
epoch: 191, batch: 2400 // loss: 0.042
epoch: 191, batch: 2500 // loss: 0.042
epoch: 191, batch: 2600 // loss: 0.045
epoch: 191, batch: 2700 // loss: 0.042
epoch: 191, batch: 2800 // loss: 0.046
epoch: 191, batch: 2900 // loss: 0.042
epoch: 191, batch: 3000 // loss: 0.045
epoch: 191, batch: 3100 // loss: 0.043
epoch: 191, batch: 3200 // loss: 0.039
epoch: 191, batch: 3300 // loss: 0.038
epoch: 191, batch: 3400 // loss: 0.044
epoch: 191, batch: 3500 // loss: 0.035
epoch: 191, batch: 3600 // loss: 0.043
epoch: 191, batch: 3700 // loss: 0.043

epoch: 192, batch: 0 // loss: 0.053
epoch: 192, batch: 100 // loss: 0.046
epoch: 192, batch: 200 // loss: 0.043
epoch: 192, batch: 300 // loss: 0.049
epoch: 192, batch: 400 // loss: 0.046
epoch: 192, batch: 500 // loss: 0.040
epoch: 192, batch: 600 // loss: 0.040
epoch: 192, batch: 700 // loss: 0.044
epoch: 192, batch: 800 // loss: 0.042
epoch: 192, batch: 900 // loss: 0.049
epoch: 192, batch: 1000 // loss: 0.046
epoch: 192, batch: 1100 // loss: 0.042
epoch: 192, batch: 1200 // loss: 0.044
epoch: 192, batch: 1300 // loss: 0.046
epoch: 192, batch: 1400 // loss: 0.043
epoch: 192, batch: 1500 // loss: 0.048
epoch: 192, batch: 1600 // loss: 0.052
epoch: 192, batch: 1700 // loss: 0.045
epoch: 192, batch: 1800 // loss: 0.053
epoch: 192, batch: 1900 // loss: 0.045
epoch: 192, batch: 2000 // loss: 0.047
epoch: 192, batch: 2100 // loss: 0.046
epoch: 192, batch: 2200 // loss: 0.050
epoch: 192, batch: 2300 // loss: 0.049
epoch: 192, batch: 2400 // loss: 0.042
epoch: 192, batch: 2500 // loss: 0.042
epoch: 192, batch: 2600 // loss: 0.045
epoch: 192, batch: 2700 // loss: 0.042
epoch: 192, batch: 2800 // loss: 0.046
epoch: 192, batch: 2900 // loss: 0.042
epoch: 192, batch: 3000 // loss: 0.045
epoch: 192, batch: 3100 // loss: 0.043
epoch: 192, batch: 3200 // loss: 0.039
epoch: 192, batch: 3300 // loss: 0.038
epoch: 192, batch: 3400 // loss: 0.044
epoch: 192, batch: 3500 // loss: 0.035
epoch: 192, batch: 3600 // loss: 0.043
epoch: 192, batch: 3700 // loss: 0.043

epoch: 193, batch: 0 // loss: 0.053
epoch: 193, batch: 100 // loss: 0.046
epoch: 193, batch: 200 // loss: 0.043
epoch: 193, batch: 300 // loss: 0.049
epoch: 193, batch: 400 // loss: 0.046
epoch: 193, batch: 500 // loss: 0.040
epoch: 193, batch: 600 // loss: 0.040
epoch: 193, batch: 700 // loss: 0.044
epoch: 193, batch: 800 // loss: 0.042
epoch: 193, batch: 900 // loss: 0.049
epoch: 193, batch: 1000 // loss: 0.046
epoch: 193, batch: 1100 // loss: 0.042
epoch: 193, batch: 1200 // loss: 0.044
epoch: 193, batch: 1300 // loss: 0.046
epoch: 193, batch: 1400 // loss: 0.043
epoch: 193, batch: 1500 // loss: 0.048
epoch: 193, batch: 1600 // loss: 0.052
epoch: 193, batch: 1700 // loss: 0.045
epoch: 193, batch: 1800 // loss: 0.053
epoch: 193, batch: 1900 // loss: 0.045
epoch: 193, batch: 2000 // loss: 0.047
epoch: 193, batch: 2100 // loss: 0.046
epoch: 193, batch: 2200 // loss: 0.050
epoch: 193, batch: 2300 // loss: 0.049
epoch: 193, batch: 2400 // loss: 0.042
epoch: 193, batch: 2500 // loss: 0.042
epoch: 193, batch: 2600 // loss: 0.045
epoch: 193, batch: 2700 // loss: 0.042
epoch: 193, batch: 2800 // loss: 0.046
epoch: 193, batch: 2900 // loss: 0.042
epoch: 193, batch: 3000 // loss: 0.045
epoch: 193, batch: 3100 // loss: 0.043
epoch: 193, batch: 3200 // loss: 0.039
epoch: 193, batch: 3300 // loss: 0.038
epoch: 193, batch: 3400 // loss: 0.044
epoch: 193, batch: 3500 // loss: 0.035
epoch: 193, batch: 3600 // loss: 0.043
epoch: 193, batch: 3700 // loss: 0.043

epoch: 194, batch: 0 // loss: 0.053
epoch: 194, batch: 100 // loss: 0.046
epoch: 194, batch: 200 // loss: 0.043
epoch: 194, batch: 300 // loss: 0.049
epoch: 194, batch: 400 // loss: 0.046
epoch: 194, batch: 500 // loss: 0.040
epoch: 194, batch: 600 // loss: 0.040
epoch: 194, batch: 700 // loss: 0.044
epoch: 194, batch: 800 // loss: 0.042
epoch: 194, batch: 900 // loss: 0.049
epoch: 194, batch: 1000 // loss: 0.046
epoch: 194, batch: 1100 // loss: 0.042
epoch: 194, batch: 1200 // loss: 0.044
epoch: 194, batch: 1300 // loss: 0.046
epoch: 194, batch: 1400 // loss: 0.043
epoch: 194, batch: 1500 // loss: 0.048
epoch: 194, batch: 1600 // loss: 0.052
epoch: 194, batch: 1700 // loss: 0.045
epoch: 194, batch: 1800 // loss: 0.053
epoch: 194, batch: 1900 // loss: 0.045
epoch: 194, batch: 2000 // loss: 0.047
epoch: 194, batch: 2100 // loss: 0.046
epoch: 194, batch: 2200 // loss: 0.050
epoch: 194, batch: 2300 // loss: 0.049
epoch: 194, batch: 2400 // loss: 0.042
epoch: 194, batch: 2500 // loss: 0.042
epoch: 194, batch: 2600 // loss: 0.045
epoch: 194, batch: 2700 // loss: 0.042
epoch: 194, batch: 2800 // loss: 0.046
epoch: 194, batch: 2900 // loss: 0.042
epoch: 194, batch: 3000 // loss: 0.045
epoch: 194, batch: 3100 // loss: 0.043
epoch: 194, batch: 3200 // loss: 0.039
epoch: 194, batch: 3300 // loss: 0.038
epoch: 194, batch: 3400 // loss: 0.044
epoch: 194, batch: 3500 // loss: 0.035
epoch: 194, batch: 3600 // loss: 0.043
epoch: 194, batch: 3700 // loss: 0.043

epoch: 195, batch: 0 // loss: 0.053
epoch: 195, batch: 100 // loss: 0.046
epoch: 195, batch: 200 // loss: 0.043
epoch: 195, batch: 300 // loss: 0.049
epoch: 195, batch: 400 // loss: 0.046
epoch: 195, batch: 500 // loss: 0.040
epoch: 195, batch: 600 // loss: 0.040
epoch: 195, batch: 700 // loss: 0.044
epoch: 195, batch: 800 // loss: 0.042
epoch: 195, batch: 900 // loss: 0.049
epoch: 195, batch: 1000 // loss: 0.046
epoch: 195, batch: 1100 // loss: 0.042
epoch: 195, batch: 1200 // loss: 0.044
epoch: 195, batch: 1300 // loss: 0.046
epoch: 195, batch: 1400 // loss: 0.043
epoch: 195, batch: 1500 // loss: 0.048
epoch: 195, batch: 1600 // loss: 0.052
epoch: 195, batch: 1700 // loss: 0.045
epoch: 195, batch: 1800 // loss: 0.053
epoch: 195, batch: 1900 // loss: 0.045
epoch: 195, batch: 2000 // loss: 0.047
epoch: 195, batch: 2100 // loss: 0.046
epoch: 195, batch: 2200 // loss: 0.050
epoch: 195, batch: 2300 // loss: 0.049
epoch: 195, batch: 2400 // loss: 0.042
epoch: 195, batch: 2500 // loss: 0.042
epoch: 195, batch: 2600 // loss: 0.045
epoch: 195, batch: 2700 // loss: 0.042
epoch: 195, batch: 2800 // loss: 0.046
epoch: 195, batch: 2900 // loss: 0.042
epoch: 195, batch: 3000 // loss: 0.045
epoch: 195, batch: 3100 // loss: 0.043
epoch: 195, batch: 3200 // loss: 0.039
epoch: 195, batch: 3300 // loss: 0.038
epoch: 195, batch: 3400 // loss: 0.044
epoch: 195, batch: 3500 // loss: 0.035
epoch: 195, batch: 3600 // loss: 0.043
epoch: 195, batch: 3700 // loss: 0.043

epoch: 196, batch: 0 // loss: 0.053
epoch: 196, batch: 100 // loss: 0.046
epoch: 196, batch: 200 // loss: 0.043
epoch: 196, batch: 300 // loss: 0.049
epoch: 196, batch: 400 // loss: 0.046
epoch: 196, batch: 500 // loss: 0.040
epoch: 196, batch: 600 // loss: 0.040
epoch: 196, batch: 700 // loss: 0.044
epoch: 196, batch: 800 // loss: 0.042
epoch: 196, batch: 900 // loss: 0.049
epoch: 196, batch: 1000 // loss: 0.046
epoch: 196, batch: 1100 // loss: 0.042
epoch: 196, batch: 1200 // loss: 0.044
epoch: 196, batch: 1300 // loss: 0.046
epoch: 196, batch: 1400 // loss: 0.043
epoch: 196, batch: 1500 // loss: 0.048
epoch: 196, batch: 1600 // loss: 0.052
epoch: 196, batch: 1700 // loss: 0.045
epoch: 196, batch: 1800 // loss: 0.053
epoch: 196, batch: 1900 // loss: 0.045
epoch: 196, batch: 2000 // loss: 0.047
epoch: 196, batch: 2100 // loss: 0.046
epoch: 196, batch: 2200 // loss: 0.050
epoch: 196, batch: 2300 // loss: 0.049
epoch: 196, batch: 2400 // loss: 0.042
epoch: 196, batch: 2500 // loss: 0.042
epoch: 196, batch: 2600 // loss: 0.045
epoch: 196, batch: 2700 // loss: 0.042
epoch: 196, batch: 2800 // loss: 0.046
epoch: 196, batch: 2900 // loss: 0.042
epoch: 196, batch: 3000 // loss: 0.045
epoch: 196, batch: 3100 // loss: 0.043
epoch: 196, batch: 3200 // loss: 0.039
epoch: 196, batch: 3300 // loss: 0.038
epoch: 196, batch: 3400 // loss: 0.044
epoch: 196, batch: 3500 // loss: 0.035
epoch: 196, batch: 3600 // loss: 0.043
epoch: 196, batch: 3700 // loss: 0.043

epoch: 197, batch: 0 // loss: 0.053
epoch: 197, batch: 100 // loss: 0.046
epoch: 197, batch: 200 // loss: 0.043
epoch: 197, batch: 300 // loss: 0.049
epoch: 197, batch: 400 // loss: 0.046
epoch: 197, batch: 500 // loss: 0.040
epoch: 197, batch: 600 // loss: 0.040
epoch: 197, batch: 700 // loss: 0.044
epoch: 197, batch: 800 // loss: 0.042
epoch: 197, batch: 900 // loss: 0.049
epoch: 197, batch: 1000 // loss: 0.046
epoch: 197, batch: 1100 // loss: 0.042
epoch: 197, batch: 1200 // loss: 0.044
epoch: 197, batch: 1300 // loss: 0.046
epoch: 197, batch: 1400 // loss: 0.043
epoch: 197, batch: 1500 // loss: 0.048
epoch: 197, batch: 1600 // loss: 0.052
epoch: 197, batch: 1700 // loss: 0.045
epoch: 197, batch: 1800 // loss: 0.053
epoch: 197, batch: 1900 // loss: 0.045
epoch: 197, batch: 2000 // loss: 0.047
epoch: 197, batch: 2100 // loss: 0.046
epoch: 197, batch: 2200 // loss: 0.050
epoch: 197, batch: 2300 // loss: 0.049
epoch: 197, batch: 2400 // loss: 0.042
epoch: 197, batch: 2500 // loss: 0.042
epoch: 197, batch: 2600 // loss: 0.045
epoch: 197, batch: 2700 // loss: 0.042
epoch: 197, batch: 2800 // loss: 0.046
epoch: 197, batch: 2900 // loss: 0.042
epoch: 197, batch: 3000 // loss: 0.045
epoch: 197, batch: 3100 // loss: 0.043
epoch: 197, batch: 3200 // loss: 0.039
epoch: 197, batch: 3300 // loss: 0.038
epoch: 197, batch: 3400 // loss: 0.044
epoch: 197, batch: 3500 // loss: 0.035
epoch: 197, batch: 3600 // loss: 0.043
epoch: 197, batch: 3700 // loss: 0.043

epoch: 198, batch: 0 // loss: 0.053
epoch: 198, batch: 100 // loss: 0.046
epoch: 198, batch: 200 // loss: 0.043
epoch: 198, batch: 300 // loss: 0.049
epoch: 198, batch: 400 // loss: 0.046
epoch: 198, batch: 500 // loss: 0.040
epoch: 198, batch: 600 // loss: 0.040
epoch: 198, batch: 700 // loss: 0.044
epoch: 198, batch: 800 // loss: 0.042
epoch: 198, batch: 900 // loss: 0.049
epoch: 198, batch: 1000 // loss: 0.046
epoch: 198, batch: 1100 // loss: 0.042
epoch: 198, batch: 1200 // loss: 0.044
epoch: 198, batch: 1300 // loss: 0.046
epoch: 198, batch: 1400 // loss: 0.043
epoch: 198, batch: 1500 // loss: 0.048
epoch: 198, batch: 1600 // loss: 0.052
epoch: 198, batch: 1700 // loss: 0.045
epoch: 198, batch: 1800 // loss: 0.053
epoch: 198, batch: 1900 // loss: 0.045
epoch: 198, batch: 2000 // loss: 0.047
epoch: 198, batch: 2100 // loss: 0.046
epoch: 198, batch: 2200 // loss: 0.050
epoch: 198, batch: 2300 // loss: 0.049
epoch: 198, batch: 2400 // loss: 0.042
epoch: 198, batch: 2500 // loss: 0.042
epoch: 198, batch: 2600 // loss: 0.045
epoch: 198, batch: 2700 // loss: 0.042
epoch: 198, batch: 2800 // loss: 0.046
epoch: 198, batch: 2900 // loss: 0.042
epoch: 198, batch: 3000 // loss: 0.045
epoch: 198, batch: 3100 // loss: 0.043
epoch: 198, batch: 3200 // loss: 0.039
epoch: 198, batch: 3300 // loss: 0.038
epoch: 198, batch: 3400 // loss: 0.044
epoch: 198, batch: 3500 // loss: 0.035
epoch: 198, batch: 3600 // loss: 0.043
epoch: 198, batch: 3700 // loss: 0.043

epoch: 199, batch: 0 // loss: 0.053
epoch: 199, batch: 100 // loss: 0.046
epoch: 199, batch: 200 // loss: 0.043
epoch: 199, batch: 300 // loss: 0.049
epoch: 199, batch: 400 // loss: 0.046
epoch: 199, batch: 500 // loss: 0.040
epoch: 199, batch: 600 // loss: 0.040
epoch: 199, batch: 700 // loss: 0.044
epoch: 199, batch: 800 // loss: 0.042
epoch: 199, batch: 900 // loss: 0.049
epoch: 199, batch: 1000 // loss: 0.046
epoch: 199, batch: 1100 // loss: 0.042
epoch: 199, batch: 1200 // loss: 0.044
epoch: 199, batch: 1300 // loss: 0.046
epoch: 199, batch: 1400 // loss: 0.043
epoch: 199, batch: 1500 // loss: 0.048
epoch: 199, batch: 1600 // loss: 0.052
epoch: 199, batch: 1700 // loss: 0.045
epoch: 199, batch: 1800 // loss: 0.053
epoch: 199, batch: 1900 // loss: 0.045
epoch: 199, batch: 2000 // loss: 0.047
epoch: 199, batch: 2100 // loss: 0.046
epoch: 199, batch: 2200 // loss: 0.050
epoch: 199, batch: 2300 // loss: 0.049
epoch: 199, batch: 2400 // loss: 0.042
epoch: 199, batch: 2500 // loss: 0.042
epoch: 199, batch: 2600 // loss: 0.045
epoch: 199, batch: 2700 // loss: 0.042
epoch: 199, batch: 2800 // loss: 0.046
epoch: 199, batch: 2900 // loss: 0.042
epoch: 199, batch: 3000 // loss: 0.045
epoch: 199, batch: 3100 // loss: 0.043
epoch: 199, batch: 3200 // loss: 0.039
epoch: 199, batch: 3300 // loss: 0.038
epoch: 199, batch: 3400 // loss: 0.044
epoch: 199, batch: 3500 // loss: 0.035
epoch: 199, batch: 3600 // loss: 0.043
epoch: 199, batch: 3700 // loss: 0.043
In [196]:
X_tilde =  auto(X[:5000].float()).detach().numpy()
In [197]:
imshow(np.asarray(X[3]).reshape(28,28), cmap='gray')
Out[197]:
<matplotlib.image.AxesImage at 0x7f8f43d165c0>
In [198]:
imshow(np.asarray(X_tilde[2]).reshape(28,28), cmap='gray')
Out[198]:
<matplotlib.image.AxesImage at 0x7f8f44507f28>

Pull out the hidden representations (for first 5k points) and plot them.

In [199]:
Zs = auto(X[:5000].float(), return_z=True).detach().numpy()
In [200]:
colors = ['r', 'g', 'b', 'c', 'm', 'y', 'k', 'w', 'orange', 'purple']
c = [colors[y_i] for y_i in y[:5000]]
In [201]:
plt.scatter(Zs[:,0], Zs[:,1], c=c)
Out[201]:
<matplotlib.collections.PathCollection at 0x7f8f445308d0>

Let's implement AE2 extending the above by adding a non-linear activation function (try Sigmoid).

In [202]:
class AE2(nn.Module):
    
    def __init__(self, input_size=784, hidden_size=16):
        '''
        In the initializer we setup model parameters/layers.
        '''
        super(AE2, self).__init__() 

        self.input_size = input_size
        self.hidden_size = hidden_size
       
        # input layer; from x -> z
        self.i = nn.Linear(self.input_size, self.hidden_size)
        
        self.a = nn.Sigmoid()
        
        # output layer
        self.o = nn.Linear(self.hidden_size, self.input_size)
        

    def forward(self, X, return_z=False):
        z = self.a(self.i(X))
        if return_z:
            return z
        return self.o(z)
In [203]:
auto2 = AE2()
optimizer = optim.SGD(auto2.parameters(), lr=0.001, momentum=0.9)
In [204]:
train_AE(X, X, auto2, optimizer, loss_function)
epoch: 0, batch: 0 // loss: 0.309
epoch: 0, batch: 100 // loss: 0.292
epoch: 0, batch: 200 // loss: 0.255
epoch: 0, batch: 300 // loss: 0.236
epoch: 0, batch: 400 // loss: 0.238
epoch: 0, batch: 500 // loss: 0.233
epoch: 0, batch: 600 // loss: 0.235
epoch: 0, batch: 700 // loss: 0.242
epoch: 0, batch: 800 // loss: 0.213
epoch: 0, batch: 900 // loss: 0.255
epoch: 0, batch: 1000 // loss: 0.230
epoch: 0, batch: 1100 // loss: 0.260
epoch: 0, batch: 1200 // loss: 0.204
epoch: 0, batch: 1300 // loss: 0.248
epoch: 0, batch: 1400 // loss: 0.205
epoch: 0, batch: 1500 // loss: 0.204
epoch: 0, batch: 1600 // loss: 0.234
epoch: 0, batch: 1700 // loss: 0.219
epoch: 0, batch: 1800 // loss: 0.253
epoch: 0, batch: 1900 // loss: 0.223
epoch: 0, batch: 2000 // loss: 0.197
epoch: 0, batch: 2100 // loss: 0.197
epoch: 0, batch: 2200 // loss: 0.244
epoch: 0, batch: 2300 // loss: 0.216
epoch: 0, batch: 2400 // loss: 0.168
epoch: 0, batch: 2500 // loss: 0.184
epoch: 0, batch: 2600 // loss: 0.233
epoch: 0, batch: 2700 // loss: 0.183
epoch: 0, batch: 2800 // loss: 0.236
epoch: 0, batch: 2900 // loss: 0.172
epoch: 0, batch: 3000 // loss: 0.190
epoch: 0, batch: 3100 // loss: 0.226
epoch: 0, batch: 3200 // loss: 0.164
epoch: 0, batch: 3300 // loss: 0.192
epoch: 0, batch: 3400 // loss: 0.194
epoch: 0, batch: 3500 // loss: 0.205
epoch: 0, batch: 3600 // loss: 0.203
epoch: 0, batch: 3700 // loss: 0.224

epoch: 1, batch: 0 // loss: 0.204
epoch: 1, batch: 100 // loss: 0.213
epoch: 1, batch: 200 // loss: 0.194
epoch: 1, batch: 300 // loss: 0.182
epoch: 1, batch: 400 // loss: 0.191
epoch: 1, batch: 500 // loss: 0.189
epoch: 1, batch: 600 // loss: 0.193
epoch: 1, batch: 700 // loss: 0.201
epoch: 1, batch: 800 // loss: 0.176
epoch: 1, batch: 900 // loss: 0.217
epoch: 1, batch: 1000 // loss: 0.193
epoch: 1, batch: 1100 // loss: 0.222
epoch: 1, batch: 1200 // loss: 0.171
epoch: 1, batch: 1300 // loss: 0.212
epoch: 1, batch: 1400 // loss: 0.172
epoch: 1, batch: 1500 // loss: 0.172
epoch: 1, batch: 1600 // loss: 0.199
epoch: 1, batch: 1700 // loss: 0.186
epoch: 1, batch: 1800 // loss: 0.217
epoch: 1, batch: 1900 // loss: 0.189
epoch: 1, batch: 2000 // loss: 0.166
epoch: 1, batch: 2100 // loss: 0.167
epoch: 1, batch: 2200 // loss: 0.209
epoch: 1, batch: 2300 // loss: 0.184
epoch: 1, batch: 2400 // loss: 0.141
epoch: 1, batch: 2500 // loss: 0.154
epoch: 1, batch: 2600 // loss: 0.198
epoch: 1, batch: 2700 // loss: 0.154
epoch: 1, batch: 2800 // loss: 0.201
epoch: 1, batch: 2900 // loss: 0.145
epoch: 1, batch: 3000 // loss: 0.160
epoch: 1, batch: 3100 // loss: 0.191
epoch: 1, batch: 3200 // loss: 0.138
epoch: 1, batch: 3300 // loss: 0.161
epoch: 1, batch: 3400 // loss: 0.162
epoch: 1, batch: 3500 // loss: 0.171
epoch: 1, batch: 3600 // loss: 0.171
epoch: 1, batch: 3700 // loss: 0.189

epoch: 2, batch: 0 // loss: 0.174
epoch: 2, batch: 100 // loss: 0.178
epoch: 2, batch: 200 // loss: 0.163
epoch: 2, batch: 300 // loss: 0.153
epoch: 2, batch: 400 // loss: 0.160
epoch: 2, batch: 500 // loss: 0.156
epoch: 2, batch: 600 // loss: 0.160
epoch: 2, batch: 700 // loss: 0.167
epoch: 2, batch: 800 // loss: 0.147
epoch: 2, batch: 900 // loss: 0.181
epoch: 2, batch: 1000 // loss: 0.158
epoch: 2, batch: 1100 // loss: 0.183
epoch: 2, batch: 1200 // loss: 0.143
epoch: 2, batch: 1300 // loss: 0.176
epoch: 2, batch: 1400 // loss: 0.142
epoch: 2, batch: 1500 // loss: 0.142
epoch: 2, batch: 1600 // loss: 0.164
epoch: 2, batch: 1700 // loss: 0.153
epoch: 2, batch: 1800 // loss: 0.178
epoch: 2, batch: 1900 // loss: 0.155
epoch: 2, batch: 2000 // loss: 0.136
epoch: 2, batch: 2100 // loss: 0.139
epoch: 2, batch: 2200 // loss: 0.172
epoch: 2, batch: 2300 // loss: 0.152
epoch: 2, batch: 2400 // loss: 0.116
epoch: 2, batch: 2500 // loss: 0.126
epoch: 2, batch: 2600 // loss: 0.161
epoch: 2, batch: 2700 // loss: 0.125
epoch: 2, batch: 2800 // loss: 0.165
epoch: 2, batch: 2900 // loss: 0.118
epoch: 2, batch: 3000 // loss: 0.130
epoch: 2, batch: 3100 // loss: 0.154
epoch: 2, batch: 3200 // loss: 0.113
epoch: 2, batch: 3300 // loss: 0.129
epoch: 2, batch: 3400 // loss: 0.130
epoch: 2, batch: 3500 // loss: 0.137
epoch: 2, batch: 3600 // loss: 0.139
epoch: 2, batch: 3700 // loss: 0.153

epoch: 3, batch: 0 // loss: 0.144
epoch: 3, batch: 100 // loss: 0.143
epoch: 3, batch: 200 // loss: 0.134
epoch: 3, batch: 300 // loss: 0.126
epoch: 3, batch: 400 // loss: 0.130
epoch: 3, batch: 500 // loss: 0.125
epoch: 3, batch: 600 // loss: 0.127
epoch: 3, batch: 700 // loss: 0.133
epoch: 3, batch: 800 // loss: 0.120
epoch: 3, batch: 900 // loss: 0.147
epoch: 3, batch: 1000 // loss: 0.125
epoch: 3, batch: 1100 // loss: 0.145
epoch: 3, batch: 1200 // loss: 0.116
epoch: 3, batch: 1300 // loss: 0.141
epoch: 3, batch: 1400 // loss: 0.114
epoch: 3, batch: 1500 // loss: 0.115
epoch: 3, batch: 1600 // loss: 0.132
epoch: 3, batch: 1700 // loss: 0.124
epoch: 3, batch: 1800 // loss: 0.142
epoch: 3, batch: 1900 // loss: 0.125
epoch: 3, batch: 2000 // loss: 0.110
epoch: 3, batch: 2100 // loss: 0.115
epoch: 3, batch: 2200 // loss: 0.139
epoch: 3, batch: 2300 // loss: 0.123
epoch: 3, batch: 2400 // loss: 0.096
epoch: 3, batch: 2500 // loss: 0.102
epoch: 3, batch: 2600 // loss: 0.129
epoch: 3, batch: 2700 // loss: 0.101
epoch: 3, batch: 2800 // loss: 0.133
epoch: 3, batch: 2900 // loss: 0.096
epoch: 3, batch: 3000 // loss: 0.106
epoch: 3, batch: 3100 // loss: 0.122
epoch: 3, batch: 3200 // loss: 0.094
epoch: 3, batch: 3300 // loss: 0.104
epoch: 3, batch: 3400 // loss: 0.103
epoch: 3, batch: 3500 // loss: 0.109
epoch: 3, batch: 3600 // loss: 0.114
epoch: 3, batch: 3700 // loss: 0.124

epoch: 4, batch: 0 // loss: 0.120
epoch: 4, batch: 100 // loss: 0.115
epoch: 4, batch: 200 // loss: 0.112
epoch: 4, batch: 300 // loss: 0.105
epoch: 4, batch: 400 // loss: 0.108
epoch: 4, batch: 500 // loss: 0.101
epoch: 4, batch: 600 // loss: 0.103
epoch: 4, batch: 700 // loss: 0.108
epoch: 4, batch: 800 // loss: 0.101
epoch: 4, batch: 900 // loss: 0.121
epoch: 4, batch: 1000 // loss: 0.100
epoch: 4, batch: 1100 // loss: 0.117
epoch: 4, batch: 1200 // loss: 0.097
epoch: 4, batch: 1300 // loss: 0.116
epoch: 4, batch: 1400 // loss: 0.095
epoch: 4, batch: 1500 // loss: 0.096
epoch: 4, batch: 1600 // loss: 0.109
epoch: 4, batch: 1700 // loss: 0.104
epoch: 4, batch: 1800 // loss: 0.116
epoch: 4, batch: 1900 // loss: 0.104
epoch: 4, batch: 2000 // loss: 0.092
epoch: 4, batch: 2100 // loss: 0.098
epoch: 4, batch: 2200 // loss: 0.117
epoch: 4, batch: 2300 // loss: 0.104
epoch: 4, batch: 2400 // loss: 0.084
epoch: 4, batch: 2500 // loss: 0.086
epoch: 4, batch: 2600 // loss: 0.108
epoch: 4, batch: 2700 // loss: 0.086
epoch: 4, batch: 2800 // loss: 0.112
epoch: 4, batch: 2900 // loss: 0.082
epoch: 4, batch: 3000 // loss: 0.090
epoch: 4, batch: 3100 // loss: 0.102
epoch: 4, batch: 3200 // loss: 0.082
epoch: 4, batch: 3300 // loss: 0.088
epoch: 4, batch: 3400 // loss: 0.086
epoch: 4, batch: 3500 // loss: 0.091
epoch: 4, batch: 3600 // loss: 0.098
epoch: 4, batch: 3700 // loss: 0.105

epoch: 5, batch: 0 // loss: 0.105
epoch: 5, batch: 100 // loss: 0.098
epoch: 5, batch: 200 // loss: 0.098
epoch: 5, batch: 300 // loss: 0.092
epoch: 5, batch: 400 // loss: 0.094
epoch: 5, batch: 500 // loss: 0.086
epoch: 5, batch: 600 // loss: 0.089
epoch: 5, batch: 700 // loss: 0.093
epoch: 5, batch: 800 // loss: 0.089
epoch: 5, batch: 900 // loss: 0.105
epoch: 5, batch: 1000 // loss: 0.086
epoch: 5, batch: 1100 // loss: 0.099
epoch: 5, batch: 1200 // loss: 0.087
epoch: 5, batch: 1300 // loss: 0.100
epoch: 5, batch: 1400 // loss: 0.084
epoch: 5, batch: 1500 // loss: 0.086
epoch: 5, batch: 1600 // loss: 0.095
epoch: 5, batch: 1700 // loss: 0.092
epoch: 5, batch: 1800 // loss: 0.100
epoch: 5, batch: 1900 // loss: 0.091
epoch: 5, batch: 2000 // loss: 0.083
epoch: 5, batch: 2100 // loss: 0.089
epoch: 5, batch: 2200 // loss: 0.103
epoch: 5, batch: 2300 // loss: 0.093
epoch: 5, batch: 2400 // loss: 0.078
epoch: 5, batch: 2500 // loss: 0.078
epoch: 5, batch: 2600 // loss: 0.095
epoch: 5, batch: 2700 // loss: 0.078
epoch: 5, batch: 2800 // loss: 0.099
epoch: 5, batch: 2900 // loss: 0.075
epoch: 5, batch: 3000 // loss: 0.081
epoch: 5, batch: 3100 // loss: 0.090
epoch: 5, batch: 3200 // loss: 0.076
epoch: 5, batch: 3300 // loss: 0.079
epoch: 5, batch: 3400 // loss: 0.077
epoch: 5, batch: 3500 // loss: 0.081
epoch: 5, batch: 3600 // loss: 0.089
epoch: 5, batch: 3700 // loss: 0.094

epoch: 6, batch: 0 // loss: 0.097
epoch: 6, batch: 100 // loss: 0.088
epoch: 6, batch: 200 // loss: 0.091
epoch: 6, batch: 300 // loss: 0.085
epoch: 6, batch: 400 // loss: 0.087
epoch: 6, batch: 500 // loss: 0.078
epoch: 6, batch: 600 // loss: 0.080
epoch: 6, batch: 700 // loss: 0.084
epoch: 6, batch: 800 // loss: 0.083
epoch: 6, batch: 900 // loss: 0.096
epoch: 6, batch: 1000 // loss: 0.077
epoch: 6, batch: 1100 // loss: 0.089
epoch: 6, batch: 1200 // loss: 0.081
epoch: 6, batch: 1300 // loss: 0.092
epoch: 6, batch: 1400 // loss: 0.078
epoch: 6, batch: 1500 // loss: 0.081
epoch: 6, batch: 1600 // loss: 0.088
epoch: 6, batch: 1700 // loss: 0.085
epoch: 6, batch: 1800 // loss: 0.092
epoch: 6, batch: 1900 // loss: 0.085
epoch: 6, batch: 2000 // loss: 0.078
epoch: 6, batch: 2100 // loss: 0.085
epoch: 6, batch: 2200 // loss: 0.096
epoch: 6, batch: 2300 // loss: 0.087
epoch: 6, batch: 2400 // loss: 0.075
epoch: 6, batch: 2500 // loss: 0.073
epoch: 6, batch: 2600 // loss: 0.088
epoch: 6, batch: 2700 // loss: 0.073
epoch: 6, batch: 2800 // loss: 0.092
epoch: 6, batch: 2900 // loss: 0.071
epoch: 6, batch: 3000 // loss: 0.077
epoch: 6, batch: 3100 // loss: 0.083
epoch: 6, batch: 3200 // loss: 0.073
epoch: 6, batch: 3300 // loss: 0.075
epoch: 6, batch: 3400 // loss: 0.072
epoch: 6, batch: 3500 // loss: 0.076
epoch: 6, batch: 3600 // loss: 0.084
epoch: 6, batch: 3700 // loss: 0.088

epoch: 7, batch: 0 // loss: 0.092
epoch: 7, batch: 100 // loss: 0.083
epoch: 7, batch: 200 // loss: 0.086
epoch: 7, batch: 300 // loss: 0.082
epoch: 7, batch: 400 // loss: 0.083
epoch: 7, batch: 500 // loss: 0.073
epoch: 7, batch: 600 // loss: 0.076
epoch: 7, batch: 700 // loss: 0.079
epoch: 7, batch: 800 // loss: 0.080
epoch: 7, batch: 900 // loss: 0.090
epoch: 7, batch: 1000 // loss: 0.073
epoch: 7, batch: 1100 // loss: 0.083
epoch: 7, batch: 1200 // loss: 0.079
epoch: 7, batch: 1300 // loss: 0.087
epoch: 7, batch: 1400 // loss: 0.075
epoch: 7, batch: 1500 // loss: 0.078
epoch: 7, batch: 1600 // loss: 0.084
epoch: 7, batch: 1700 // loss: 0.081
epoch: 7, batch: 1800 // loss: 0.087
epoch: 7, batch: 1900 // loss: 0.081
epoch: 7, batch: 2000 // loss: 0.075
epoch: 7, batch: 2100 // loss: 0.083
epoch: 7, batch: 2200 // loss: 0.091
epoch: 7, batch: 2300 // loss: 0.084
epoch: 7, batch: 2400 // loss: 0.073
epoch: 7, batch: 2500 // loss: 0.071
epoch: 7, batch: 2600 // loss: 0.084
epoch: 7, batch: 2700 // loss: 0.071
epoch: 7, batch: 2800 // loss: 0.088
epoch: 7, batch: 2900 // loss: 0.069
epoch: 7, batch: 3000 // loss: 0.074
epoch: 7, batch: 3100 // loss: 0.079
epoch: 7, batch: 3200 // loss: 0.072
epoch: 7, batch: 3300 // loss: 0.072
epoch: 7, batch: 3400 // loss: 0.069
epoch: 7, batch: 3500 // loss: 0.072
epoch: 7, batch: 3600 // loss: 0.081
epoch: 7, batch: 3700 // loss: 0.085

epoch: 8, batch: 0 // loss: 0.089
epoch: 8, batch: 100 // loss: 0.080
epoch: 8, batch: 200 // loss: 0.084
epoch: 8, batch: 300 // loss: 0.080
epoch: 8, batch: 400 // loss: 0.081
epoch: 8, batch: 500 // loss: 0.071
epoch: 8, batch: 600 // loss: 0.073
epoch: 8, batch: 700 // loss: 0.076
epoch: 8, batch: 800 // loss: 0.078
epoch: 8, batch: 900 // loss: 0.087
epoch: 8, batch: 1000 // loss: 0.070
epoch: 8, batch: 1100 // loss: 0.079
epoch: 8, batch: 1200 // loss: 0.077
epoch: 8, batch: 1300 // loss: 0.084
epoch: 8, batch: 1400 // loss: 0.074
epoch: 8, batch: 1500 // loss: 0.077
epoch: 8, batch: 1600 // loss: 0.081
epoch: 8, batch: 1700 // loss: 0.079
epoch: 8, batch: 1800 // loss: 0.084
epoch: 8, batch: 1900 // loss: 0.079
epoch: 8, batch: 2000 // loss: 0.074
epoch: 8, batch: 2100 // loss: 0.081
epoch: 8, batch: 2200 // loss: 0.089
epoch: 8, batch: 2300 // loss: 0.082
epoch: 8, batch: 2400 // loss: 0.072
epoch: 8, batch: 2500 // loss: 0.070
epoch: 8, batch: 2600 // loss: 0.081
epoch: 8, batch: 2700 // loss: 0.070
epoch: 8, batch: 2800 // loss: 0.085
epoch: 8, batch: 2900 // loss: 0.068
epoch: 8, batch: 3000 // loss: 0.072
epoch: 8, batch: 3100 // loss: 0.077
epoch: 8, batch: 3200 // loss: 0.071
epoch: 8, batch: 3300 // loss: 0.070
epoch: 8, batch: 3400 // loss: 0.068
epoch: 8, batch: 3500 // loss: 0.070
epoch: 8, batch: 3600 // loss: 0.079
epoch: 8, batch: 3700 // loss: 0.082

epoch: 9, batch: 0 // loss: 0.088
epoch: 9, batch: 100 // loss: 0.078
epoch: 9, batch: 200 // loss: 0.082
epoch: 9, batch: 300 // loss: 0.079
epoch: 9, batch: 400 // loss: 0.079
epoch: 9, batch: 500 // loss: 0.069
epoch: 9, batch: 600 // loss: 0.071
epoch: 9, batch: 700 // loss: 0.074
epoch: 9, batch: 800 // loss: 0.077
epoch: 9, batch: 900 // loss: 0.085
epoch: 9, batch: 1000 // loss: 0.069
epoch: 9, batch: 1100 // loss: 0.077
epoch: 9, batch: 1200 // loss: 0.076
epoch: 9, batch: 1300 // loss: 0.082
epoch: 9, batch: 1400 // loss: 0.072
epoch: 9, batch: 1500 // loss: 0.076
epoch: 9, batch: 1600 // loss: 0.080
epoch: 9, batch: 1700 // loss: 0.077
epoch: 9, batch: 1800 // loss: 0.082
epoch: 9, batch: 1900 // loss: 0.077
epoch: 9, batch: 2000 // loss: 0.073
epoch: 9, batch: 2100 // loss: 0.080
epoch: 9, batch: 2200 // loss: 0.087
epoch: 9, batch: 2300 // loss: 0.080
epoch: 9, batch: 2400 // loss: 0.072
epoch: 9, batch: 2500 // loss: 0.069
epoch: 9, batch: 2600 // loss: 0.079
epoch: 9, batch: 2700 // loss: 0.069
epoch: 9, batch: 2800 // loss: 0.084
epoch: 9, batch: 2900 // loss: 0.068
epoch: 9, batch: 3000 // loss: 0.071
epoch: 9, batch: 3100 // loss: 0.075
epoch: 9, batch: 3200 // loss: 0.071
epoch: 9, batch: 3300 // loss: 0.069
epoch: 9, batch: 3400 // loss: 0.067
epoch: 9, batch: 3500 // loss: 0.068
epoch: 9, batch: 3600 // loss: 0.077
epoch: 9, batch: 3700 // loss: 0.081
In [205]:
Zs = auto2(X[:5000].float(), return_z=True).detach().numpy()
In [206]:
plt.scatter(Zs[:,0], Zs[:,1], c=c)
Out[206]:
<matplotlib.collections.PathCollection at 0x7f8f4460fba8>
In [207]:
Zs.shape
Out[207]:
(5000, 16)

TODO 3

Denoising auto-encoder. Now let's take as our target corrupted versions of the inputs. To create a corrupt version we will perturb the input pixel values by some random noise.

In [208]:
def corrupt(x, var=0.01):
    return x + np.random.normal(np.zeros(x.shape), var)
In [209]:
X[0,:10]
Out[209]:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=torch.float64)
In [210]:
corrupt(X[0])[:10]
Out[210]:
tensor([ 0.0073,  0.0166, -0.0076,  0.0072,  0.0170,  0.0171,  0.0167,  0.0069,
         0.0050,  0.0150], dtype=torch.float64)
In [211]:
imshow(np.asarray(X[0].reshape((28,28))), cmap='gray')
Out[211]:
<matplotlib.image.AxesImage at 0x7f8f445eabe0>
In [212]:
imshow(np.asarray(corrupt(X[0], var=0.1).reshape((28,28))), cmap='gray')
Out[212]:
<matplotlib.image.AxesImage at 0x7f8f724245c0>
In [213]:
X_corrupt = corrupt(X)
In [214]:
auto3 = AE2(hidden_size=16)
optimizer = optim.SGD(auto3.parameters(), lr=0.01, momentum=0.9)
train_AE(X_corrupt, X, auto3, optimizer, loss_function)
epoch: 0, batch: 0 // loss: 0.306
epoch: 0, batch: 100 // loss: 0.235
epoch: 0, batch: 200 // loss: 0.204
epoch: 0, batch: 300 // loss: 0.184
epoch: 0, batch: 400 // loss: 0.186
epoch: 0, batch: 500 // loss: 0.175
epoch: 0, batch: 600 // loss: 0.171
epoch: 0, batch: 700 // loss: 0.168
epoch: 0, batch: 800 // loss: 0.141
epoch: 0, batch: 900 // loss: 0.162
epoch: 0, batch: 1000 // loss: 0.131
epoch: 0, batch: 1100 // loss: 0.140
epoch: 0, batch: 1200 // loss: 0.108
epoch: 0, batch: 1300 // loss: 0.120
epoch: 0, batch: 1400 // loss: 0.095
epoch: 0, batch: 1500 // loss: 0.092
epoch: 0, batch: 1600 // loss: 0.097
epoch: 0, batch: 1700 // loss: 0.090
epoch: 0, batch: 1800 // loss: 0.095
epoch: 0, batch: 1900 // loss: 0.086
epoch: 0, batch: 2000 // loss: 0.077
epoch: 0, batch: 2100 // loss: 0.084
epoch: 0, batch: 2200 // loss: 0.091
epoch: 0, batch: 2300 // loss: 0.083
epoch: 0, batch: 2400 // loss: 0.073
epoch: 0, batch: 2500 // loss: 0.070
epoch: 0, batch: 2600 // loss: 0.080
epoch: 0, batch: 2700 // loss: 0.070
epoch: 0, batch: 2800 // loss: 0.084
epoch: 0, batch: 2900 // loss: 0.068
epoch: 0, batch: 3000 // loss: 0.070
epoch: 0, batch: 3100 // loss: 0.075
epoch: 0, batch: 3200 // loss: 0.071
epoch: 0, batch: 3300 // loss: 0.068
epoch: 0, batch: 3400 // loss: 0.066
epoch: 0, batch: 3500 // loss: 0.067
epoch: 0, batch: 3600 // loss: 0.076
epoch: 0, batch: 3700 // loss: 0.079

epoch: 1, batch: 0 // loss: 0.085
epoch: 1, batch: 100 // loss: 0.074
epoch: 1, batch: 200 // loss: 0.078
epoch: 1, batch: 300 // loss: 0.077
epoch: 1, batch: 400 // loss: 0.076
epoch: 1, batch: 500 // loss: 0.065
epoch: 1, batch: 600 // loss: 0.067
epoch: 1, batch: 700 // loss: 0.071
epoch: 1, batch: 800 // loss: 0.074
epoch: 1, batch: 900 // loss: 0.080
epoch: 1, batch: 1000 // loss: 0.065
epoch: 1, batch: 1100 // loss: 0.071
epoch: 1, batch: 1200 // loss: 0.074
epoch: 1, batch: 1300 // loss: 0.075
epoch: 1, batch: 1400 // loss: 0.069
epoch: 1, batch: 1500 // loss: 0.074
epoch: 1, batch: 1600 // loss: 0.076
epoch: 1, batch: 1700 // loss: 0.071
epoch: 1, batch: 1800 // loss: 0.076
epoch: 1, batch: 1900 // loss: 0.072
epoch: 1, batch: 2000 // loss: 0.069
epoch: 1, batch: 2100 // loss: 0.076
epoch: 1, batch: 2200 // loss: 0.079
epoch: 1, batch: 2300 // loss: 0.074
epoch: 1, batch: 2400 // loss: 0.068
epoch: 1, batch: 2500 // loss: 0.065
epoch: 1, batch: 2600 // loss: 0.071
epoch: 1, batch: 2700 // loss: 0.065
epoch: 1, batch: 2800 // loss: 0.075
epoch: 1, batch: 2900 // loss: 0.063
epoch: 1, batch: 3000 // loss: 0.065
epoch: 1, batch: 3100 // loss: 0.068
epoch: 1, batch: 3200 // loss: 0.065
epoch: 1, batch: 3300 // loss: 0.063
epoch: 1, batch: 3400 // loss: 0.062
epoch: 1, batch: 3500 // loss: 0.060
epoch: 1, batch: 3600 // loss: 0.069
epoch: 1, batch: 3700 // loss: 0.072

epoch: 2, batch: 0 // loss: 0.078
epoch: 2, batch: 100 // loss: 0.069
epoch: 2, batch: 200 // loss: 0.071
epoch: 2, batch: 300 // loss: 0.071
epoch: 2, batch: 400 // loss: 0.070
epoch: 2, batch: 500 // loss: 0.060
epoch: 2, batch: 600 // loss: 0.062
epoch: 2, batch: 700 // loss: 0.065
epoch: 2, batch: 800 // loss: 0.067
epoch: 2, batch: 900 // loss: 0.073
epoch: 2, batch: 1000 // loss: 0.061
epoch: 2, batch: 1100 // loss: 0.065
epoch: 2, batch: 1200 // loss: 0.068
epoch: 2, batch: 1300 // loss: 0.068
epoch: 2, batch: 1400 // loss: 0.063
epoch: 2, batch: 1500 // loss: 0.069
epoch: 2, batch: 1600 // loss: 0.070
epoch: 2, batch: 1700 // loss: 0.064
epoch: 2, batch: 1800 // loss: 0.071
epoch: 2, batch: 1900 // loss: 0.065
epoch: 2, batch: 2000 // loss: 0.064
epoch: 2, batch: 2100 // loss: 0.069
epoch: 2, batch: 2200 // loss: 0.072
epoch: 2, batch: 2300 // loss: 0.068
epoch: 2, batch: 2400 // loss: 0.061
epoch: 2, batch: 2500 // loss: 0.059
epoch: 2, batch: 2600 // loss: 0.065
epoch: 2, batch: 2700 // loss: 0.059
epoch: 2, batch: 2800 // loss: 0.067
epoch: 2, batch: 2900 // loss: 0.057
epoch: 2, batch: 3000 // loss: 0.060
epoch: 2, batch: 3100 // loss: 0.061
epoch: 2, batch: 3200 // loss: 0.057
epoch: 2, batch: 3300 // loss: 0.056
epoch: 2, batch: 3400 // loss: 0.056
epoch: 2, batch: 3500 // loss: 0.053
epoch: 2, batch: 3600 // loss: 0.061
epoch: 2, batch: 3700 // loss: 0.064

epoch: 3, batch: 0 // loss: 0.070
epoch: 3, batch: 100 // loss: 0.062
epoch: 3, batch: 200 // loss: 0.064
epoch: 3, batch: 300 // loss: 0.064
epoch: 3, batch: 400 // loss: 0.062
epoch: 3, batch: 500 // loss: 0.054
epoch: 3, batch: 600 // loss: 0.055
epoch: 3, batch: 700 // loss: 0.059
epoch: 3, batch: 800 // loss: 0.059
epoch: 3, batch: 900 // loss: 0.065
epoch: 3, batch: 1000 // loss: 0.056
epoch: 3, batch: 1100 // loss: 0.058
epoch: 3, batch: 1200 // loss: 0.060
epoch: 3, batch: 1300 // loss: 0.061
epoch: 3, batch: 1400 // loss: 0.057
epoch: 3, batch: 1500 // loss: 0.062
epoch: 3, batch: 1600 // loss: 0.064
epoch: 3, batch: 1700 // loss: 0.058
epoch: 3, batch: 1800 // loss: 0.065
epoch: 3, batch: 1900 // loss: 0.058
epoch: 3, batch: 2000 // loss: 0.058
epoch: 3, batch: 2100 // loss: 0.060
epoch: 3, batch: 2200 // loss: 0.065
epoch: 3, batch: 2300 // loss: 0.061
epoch: 3, batch: 2400 // loss: 0.054
epoch: 3, batch: 2500 // loss: 0.053
epoch: 3, batch: 2600 // loss: 0.058
epoch: 3, batch: 2700 // loss: 0.054
epoch: 3, batch: 2800 // loss: 0.059
epoch: 3, batch: 2900 // loss: 0.051
epoch: 3, batch: 3000 // loss: 0.055
epoch: 3, batch: 3100 // loss: 0.054
epoch: 3, batch: 3200 // loss: 0.050
epoch: 3, batch: 3300 // loss: 0.049
epoch: 3, batch: 3400 // loss: 0.051
epoch: 3, batch: 3500 // loss: 0.047
epoch: 3, batch: 3600 // loss: 0.055
epoch: 3, batch: 3700 // loss: 0.056

epoch: 4, batch: 0 // loss: 0.064
epoch: 4, batch: 100 // loss: 0.056
epoch: 4, batch: 200 // loss: 0.057
epoch: 4, batch: 300 // loss: 0.058
epoch: 4, batch: 400 // loss: 0.056
epoch: 4, batch: 500 // loss: 0.048
epoch: 4, batch: 600 // loss: 0.050
epoch: 4, batch: 700 // loss: 0.054
epoch: 4, batch: 800 // loss: 0.052
epoch: 4, batch: 900 // loss: 0.059
epoch: 4, batch: 1000 // loss: 0.052
epoch: 4, batch: 1100 // loss: 0.052
epoch: 4, batch: 1200 // loss: 0.054
epoch: 4, batch: 1300 // loss: 0.055
epoch: 4, batch: 1400 // loss: 0.052
epoch: 4, batch: 1500 // loss: 0.057
epoch: 4, batch: 1600 // loss: 0.060
epoch: 4, batch: 1700 // loss: 0.053
epoch: 4, batch: 1800 // loss: 0.060
epoch: 4, batch: 1900 // loss: 0.053
epoch: 4, batch: 2000 // loss: 0.053
epoch: 4, batch: 2100 // loss: 0.054
epoch: 4, batch: 2200 // loss: 0.060
epoch: 4, batch: 2300 // loss: 0.056
epoch: 4, batch: 2400 // loss: 0.048
epoch: 4, batch: 2500 // loss: 0.048
epoch: 4, batch: 2600 // loss: 0.053
epoch: 4, batch: 2700 // loss: 0.050
epoch: 4, batch: 2800 // loss: 0.053
epoch: 4, batch: 2900 // loss: 0.047
epoch: 4, batch: 3000 // loss: 0.051
epoch: 4, batch: 3100 // loss: 0.050
epoch: 4, batch: 3200 // loss: 0.045
epoch: 4, batch: 3300 // loss: 0.045
epoch: 4, batch: 3400 // loss: 0.048
epoch: 4, batch: 3500 // loss: 0.042
epoch: 4, batch: 3600 // loss: 0.050
epoch: 4, batch: 3700 // loss: 0.051

epoch: 5, batch: 0 // loss: 0.059
epoch: 5, batch: 100 // loss: 0.052
epoch: 5, batch: 200 // loss: 0.052
epoch: 5, batch: 300 // loss: 0.054
epoch: 5, batch: 400 // loss: 0.052
epoch: 5, batch: 500 // loss: 0.045
epoch: 5, batch: 600 // loss: 0.046
epoch: 5, batch: 700 // loss: 0.050
epoch: 5, batch: 800 // loss: 0.048
epoch: 5, batch: 900 // loss: 0.055
epoch: 5, batch: 1000 // loss: 0.049
epoch: 5, batch: 1100 // loss: 0.049
epoch: 5, batch: 1200 // loss: 0.050
epoch: 5, batch: 1300 // loss: 0.051
epoch: 5, batch: 1400 // loss: 0.048
epoch: 5, batch: 1500 // loss: 0.054
epoch: 5, batch: 1600 // loss: 0.056
epoch: 5, batch: 1700 // loss: 0.050
epoch: 5, batch: 1800 // loss: 0.057
epoch: 5, batch: 1900 // loss: 0.050
epoch: 5, batch: 2000 // loss: 0.050
epoch: 5, batch: 2100 // loss: 0.051
epoch: 5, batch: 2200 // loss: 0.056
epoch: 5, batch: 2300 // loss: 0.053
epoch: 5, batch: 2400 // loss: 0.045
epoch: 5, batch: 2500 // loss: 0.045
epoch: 5, batch: 2600 // loss: 0.050
epoch: 5, batch: 2700 // loss: 0.047
epoch: 5, batch: 2800 // loss: 0.050
epoch: 5, batch: 2900 // loss: 0.044
epoch: 5, batch: 3000 // loss: 0.049
epoch: 5, batch: 3100 // loss: 0.047
epoch: 5, batch: 3200 // loss: 0.042
epoch: 5, batch: 3300 // loss: 0.042
epoch: 5, batch: 3400 // loss: 0.045
epoch: 5, batch: 3500 // loss: 0.039
epoch: 5, batch: 3600 // loss: 0.047
epoch: 5, batch: 3700 // loss: 0.048

epoch: 6, batch: 0 // loss: 0.056
epoch: 6, batch: 100 // loss: 0.049
epoch: 6, batch: 200 // loss: 0.049
epoch: 6, batch: 300 // loss: 0.052
epoch: 6, batch: 400 // loss: 0.049
epoch: 6, batch: 500 // loss: 0.043
epoch: 6, batch: 600 // loss: 0.044
epoch: 6, batch: 700 // loss: 0.048
epoch: 6, batch: 800 // loss: 0.045
epoch: 6, batch: 900 // loss: 0.052
epoch: 6, batch: 1000 // loss: 0.047
epoch: 6, batch: 1100 // loss: 0.046
epoch: 6, batch: 1200 // loss: 0.048
epoch: 6, batch: 1300 // loss: 0.048
epoch: 6, batch: 1400 // loss: 0.046
epoch: 6, batch: 1500 // loss: 0.052
epoch: 6, batch: 1600 // loss: 0.054
epoch: 6, batch: 1700 // loss: 0.048
epoch: 6, batch: 1800 // loss: 0.055
epoch: 6, batch: 1900 // loss: 0.047
epoch: 6, batch: 2000 // loss: 0.048
epoch: 6, batch: 2100 // loss: 0.049
epoch: 6, batch: 2200 // loss: 0.053
epoch: 6, batch: 2300 // loss: 0.051
epoch: 6, batch: 2400 // loss: 0.043
epoch: 6, batch: 2500 // loss: 0.043
epoch: 6, batch: 2600 // loss: 0.048
epoch: 6, batch: 2700 // loss: 0.046
epoch: 6, batch: 2800 // loss: 0.048
epoch: 6, batch: 2900 // loss: 0.043
epoch: 6, batch: 3000 // loss: 0.047
epoch: 6, batch: 3100 // loss: 0.045
epoch: 6, batch: 3200 // loss: 0.040
epoch: 6, batch: 3300 // loss: 0.040
epoch: 6, batch: 3400 // loss: 0.044
epoch: 6, batch: 3500 // loss: 0.037
epoch: 6, batch: 3600 // loss: 0.045
epoch: 6, batch: 3700 // loss: 0.045

epoch: 7, batch: 0 // loss: 0.054
epoch: 7, batch: 100 // loss: 0.047
epoch: 7, batch: 200 // loss: 0.046
epoch: 7, batch: 300 // loss: 0.050
epoch: 7, batch: 400 // loss: 0.047
epoch: 7, batch: 500 // loss: 0.041
epoch: 7, batch: 600 // loss: 0.042
epoch: 7, batch: 700 // loss: 0.046
epoch: 7, batch: 800 // loss: 0.043
epoch: 7, batch: 900 // loss: 0.050
epoch: 7, batch: 1000 // loss: 0.046
epoch: 7, batch: 1100 // loss: 0.044
epoch: 7, batch: 1200 // loss: 0.046
epoch: 7, batch: 1300 // loss: 0.046
epoch: 7, batch: 1400 // loss: 0.044
epoch: 7, batch: 1500 // loss: 0.050
epoch: 7, batch: 1600 // loss: 0.052
epoch: 7, batch: 1700 // loss: 0.046
epoch: 7, batch: 1800 // loss: 0.053
epoch: 7, batch: 1900 // loss: 0.046
epoch: 7, batch: 2000 // loss: 0.046
epoch: 7, batch: 2100 // loss: 0.047
epoch: 7, batch: 2200 // loss: 0.051
epoch: 7, batch: 2300 // loss: 0.049
epoch: 7, batch: 2400 // loss: 0.042
epoch: 7, batch: 2500 // loss: 0.042
epoch: 7, batch: 2600 // loss: 0.046
epoch: 7, batch: 2700 // loss: 0.044
epoch: 7, batch: 2800 // loss: 0.046
epoch: 7, batch: 2900 // loss: 0.041
epoch: 7, batch: 3000 // loss: 0.046
epoch: 7, batch: 3100 // loss: 0.044
epoch: 7, batch: 3200 // loss: 0.039
epoch: 7, batch: 3300 // loss: 0.038
epoch: 7, batch: 3400 // loss: 0.042
epoch: 7, batch: 3500 // loss: 0.035
epoch: 7, batch: 3600 // loss: 0.044
epoch: 7, batch: 3700 // loss: 0.044

epoch: 8, batch: 0 // loss: 0.052
epoch: 8, batch: 100 // loss: 0.046
epoch: 8, batch: 200 // loss: 0.045
epoch: 8, batch: 300 // loss: 0.048
epoch: 8, batch: 400 // loss: 0.045
epoch: 8, batch: 500 // loss: 0.040
epoch: 8, batch: 600 // loss: 0.041
epoch: 8, batch: 700 // loss: 0.045
epoch: 8, batch: 800 // loss: 0.041
epoch: 8, batch: 900 // loss: 0.049
epoch: 8, batch: 1000 // loss: 0.044
epoch: 8, batch: 1100 // loss: 0.043
epoch: 8, batch: 1200 // loss: 0.045
epoch: 8, batch: 1300 // loss: 0.044
epoch: 8, batch: 1400 // loss: 0.042
epoch: 8, batch: 1500 // loss: 0.048
epoch: 8, batch: 1600 // loss: 0.051
epoch: 8, batch: 1700 // loss: 0.045
epoch: 8, batch: 1800 // loss: 0.052
epoch: 8, batch: 1900 // loss: 0.044
epoch: 8, batch: 2000 // loss: 0.045
epoch: 8, batch: 2100 // loss: 0.046
epoch: 8, batch: 2200 // loss: 0.050
epoch: 8, batch: 2300 // loss: 0.048
epoch: 8, batch: 2400 // loss: 0.041
epoch: 8, batch: 2500 // loss: 0.041
epoch: 8, batch: 2600 // loss: 0.045
epoch: 8, batch: 2700 // loss: 0.043
epoch: 8, batch: 2800 // loss: 0.044
epoch: 8, batch: 2900 // loss: 0.040
epoch: 8, batch: 3000 // loss: 0.044
epoch: 8, batch: 3100 // loss: 0.042
epoch: 8, batch: 3200 // loss: 0.037
epoch: 8, batch: 3300 // loss: 0.037
epoch: 8, batch: 3400 // loss: 0.041
epoch: 8, batch: 3500 // loss: 0.034
epoch: 8, batch: 3600 // loss: 0.043
epoch: 8, batch: 3700 // loss: 0.042

epoch: 9, batch: 0 // loss: 0.051
epoch: 9, batch: 100 // loss: 0.045
epoch: 9, batch: 200 // loss: 0.043
epoch: 9, batch: 300 // loss: 0.047
epoch: 9, batch: 400 // loss: 0.044
epoch: 9, batch: 500 // loss: 0.039
epoch: 9, batch: 600 // loss: 0.039
epoch: 9, batch: 700 // loss: 0.044
epoch: 9, batch: 800 // loss: 0.040
epoch: 9, batch: 900 // loss: 0.048
epoch: 9, batch: 1000 // loss: 0.043
epoch: 9, batch: 1100 // loss: 0.042
epoch: 9, batch: 1200 // loss: 0.043
epoch: 9, batch: 1300 // loss: 0.043
epoch: 9, batch: 1400 // loss: 0.041
epoch: 9, batch: 1500 // loss: 0.047
epoch: 9, batch: 1600 // loss: 0.049
epoch: 9, batch: 1700 // loss: 0.044
epoch: 9, batch: 1800 // loss: 0.051
epoch: 9, batch: 1900 // loss: 0.043
epoch: 9, batch: 2000 // loss: 0.043
epoch: 9, batch: 2100 // loss: 0.045
epoch: 9, batch: 2200 // loss: 0.048
epoch: 9, batch: 2300 // loss: 0.047
epoch: 9, batch: 2400 // loss: 0.040
epoch: 9, batch: 2500 // loss: 0.040
epoch: 9, batch: 2600 // loss: 0.044
epoch: 9, batch: 2700 // loss: 0.042
epoch: 9, batch: 2800 // loss: 0.043
epoch: 9, batch: 2900 // loss: 0.039
epoch: 9, batch: 3000 // loss: 0.043
epoch: 9, batch: 3100 // loss: 0.042
epoch: 9, batch: 3200 // loss: 0.036
epoch: 9, batch: 3300 // loss: 0.036
epoch: 9, batch: 3400 // loss: 0.040
epoch: 9, batch: 3500 // loss: 0.033
epoch: 9, batch: 3600 // loss: 0.041
epoch: 9, batch: 3700 // loss: 0.041
In [215]:
Zs = auto3(X[:5000].float(), return_z=True).detach().numpy()
plt.scatter(Zs[:,0], Zs[:,1], c=c)
Out[215]:
<matplotlib.collections.PathCollection at 0x7f8f8205c9e8>
In [216]:
X_tilde =  auto3(X[:5000].float()).detach().numpy()
In [222]:
imshow(np.asarray(X_tilde[2]).reshape(28,28), cmap='gray')
Out[222]:
<matplotlib.image.AxesImage at 0x7f8f22f93f60>

Finally, let's add a regularization penalty on the hidden layer

First, let's define the model appropriately. Consider: What do we need to change from above variants? Think about the training loop (below.)

In [223]:
class AE_regularized(nn.Module):
    
    def __init__(self, input_size=784, hidden_size=2):
        '''
        In the initializer we setup model parameters/layers.
        '''
        super(AE_regularized, self).__init__() 

        self.input_size = input_size
        self.hidden_size = hidden_size
       
        # input layer; from x -> z
        self.i = nn.Linear(self.input_size, self.hidden_size)
        
        self.a = nn.Sigmoid()
        
        # output layer
        self.o = nn.Linear(self.hidden_size, self.input_size)
        

    def forward(self, X):
        z = self.a(self.i(X))
        # Now we always return z along with the output
        return self.o(z), z

Now update the training loop to incorporate regularization. This will take a parameter lambda_ that encodes how much weight to put on the regularization penalty (vs typical/reconstruction loss).

Two hints:

(1) Consider that we want to incur a loss associated with our regularization (an l1 norm); where should we do that?

(2) See torch.norm (https://pytorch.org/docs/stable/torch.html#torch.norm).

In [228]:
def train_regularized_AE(X_in, X_target, model, optimizer, loss_function, lambda_, EPOCHS=10):
    for epoch in range(EPOCHS):  
        idx, batch_num = 0, 0
        batch_size = 16

        print("")
        while idx < 60000:
            # zero the parameter gradients
            optimizer.zero_grad()

            X_batch = X_in[idx: idx + batch_size].float()
            X_target_batch = X_target[idx: idx + batch_size].float()
            idx += batch_size

            # now run our X's forward, get preds, incur
            # loss, backprop, and step the optimizer.
            X_tilde_batch, z = model(X_batch)
            output_loss = loss_function(X_tilde_batch, X_target_batch)
            
            # here is the regularization loss.
            reg_loss = torch.norm(z, 1)
            
            loss = output_loss + lambda_ * reg_loss
            loss.backward()
            optimizer.step()

            # print out loss
            if batch_num % 100 == 0:
                print("epoch: {}, batch: {} // loss: {:.3f} // reg. loss (* \lambda): {:.3f}".format(
                        epoch, batch_num, output_loss.item(), lambda_ * reg_loss.item()))

            batch_num += 1
In [229]:
AER = AE_regularized(hidden_size=16)
optimizer = optim.SGD(AER.parameters(), lr=0.01, momentum=0.9)
train_regularized_AE(X_corrupt, X, AER, optimizer, loss_function, 1)
epoch: 0, batch: 0 // loss: 0.311 // reg. loss (* \lambda): 125.579
epoch: 0, batch: 100 // loss: 0.239 // reg. loss (* \lambda): 0.000
epoch: 0, batch: 200 // loss: 0.207 // reg. loss (* \lambda): 0.000
epoch: 0, batch: 300 // loss: 0.192 // reg. loss (* \lambda): 0.000
epoch: 0, batch: 400 // loss: 0.196 // reg. loss (* \lambda): 0.000
epoch: 0, batch: 500 // loss: 0.191 // reg. loss (* \lambda): 0.000
epoch: 0, batch: 600 // loss: 0.189 // reg. loss (* \lambda): 0.000
epoch: 0, batch: 700 // loss: 0.195 // reg. loss (* \lambda): 0.000
epoch: 0, batch: 800 // loss: 0.162 // reg. loss (* \lambda): 0.005
epoch: 0, batch: 900 // loss: 0.198 // reg. loss (* \lambda): 0.184
epoch: 0, batch: 1000 // loss: 0.173 // reg. loss (* \lambda): 0.000
epoch: 0, batch: 1100 // loss: 0.196 // reg. loss (* \lambda): 0.000
epoch: 0, batch: 1200 // loss: 0.149 // reg. loss (* \lambda): 0.000
epoch: 0, batch: 1300 // loss: 0.179 // reg. loss (* \lambda): 0.000
epoch: 0, batch: 1400 // loss: 0.140 // reg. loss (* \lambda): 0.000
epoch: 0, batch: 1500 // loss: 0.140 // reg. loss (* \lambda): 0.000
epoch: 0, batch: 1600 // loss: 0.160 // reg. loss (* \lambda): 0.000
epoch: 0, batch: 1700 // loss: 0.144 // reg. loss (* \lambda): 0.006
epoch: 0, batch: 1800 // loss: 0.170 // reg. loss (* \lambda): 0.000
epoch: 0, batch: 1900 // loss: 0.144 // reg. loss (* \lambda): 0.001
epoch: 0, batch: 2000 // loss: 0.122 // reg. loss (* \lambda): 0.000
epoch: 0, batch: 2100 // loss: 0.125 // reg. loss (* \lambda): 0.000
epoch: 0, batch: 2200 // loss: 0.155 // reg. loss (* \lambda): 0.001
epoch: 0, batch: 2300 // loss: 0.132 // reg. loss (* \lambda): 0.000
epoch: 0, batch: 2400 // loss: 0.099 // reg. loss (* \lambda): 0.000
epoch: 0, batch: 2500 // loss: 0.106 // reg. loss (* \lambda): 0.000
epoch: 0, batch: 2600 // loss: 0.140 // reg. loss (* \lambda): 0.000
epoch: 0, batch: 2700 // loss: 0.103 // reg. loss (* \lambda): 0.000
epoch: 0, batch: 2800 // loss: 0.139 // reg. loss (* \lambda): 0.000
epoch: 0, batch: 2900 // loss: 0.094 // reg. loss (* \lambda): 0.000
epoch: 0, batch: 3000 // loss: 0.106 // reg. loss (* \lambda): 0.000
epoch: 0, batch: 3100 // loss: 0.127 // reg. loss (* \lambda): 0.000
epoch: 0, batch: 3200 // loss: 0.090 // reg. loss (* \lambda): 0.002
epoch: 0, batch: 3300 // loss: 0.103 // reg. loss (* \lambda): 0.000
epoch: 0, batch: 3400 // loss: 0.101 // reg. loss (* \lambda): 0.000
epoch: 0, batch: 3500 // loss: 0.108 // reg. loss (* \lambda): 0.000
epoch: 0, batch: 3600 // loss: 0.111 // reg. loss (* \lambda): 0.000
epoch: 0, batch: 3700 // loss: 0.122 // reg. loss (* \lambda): 0.000

epoch: 1, batch: 0 // loss: 0.115 // reg. loss (* \lambda): 0.000
epoch: 1, batch: 100 // loss: 0.110 // reg. loss (* \lambda): 0.000
epoch: 1, batch: 200 // loss: 0.108 // reg. loss (* \lambda): 0.000
epoch: 1, batch: 300 // loss: 0.097 // reg. loss (* \lambda): 0.000
epoch: 1, batch: 400 // loss: 0.101 // reg. loss (* \lambda): 0.000
epoch: 1, batch: 500 // loss: 0.095 // reg. loss (* \lambda): 0.000
epoch: 1, batch: 600 // loss: 0.097 // reg. loss (* \lambda): 0.000
epoch: 1, batch: 700 // loss: 0.102 // reg. loss (* \lambda): 0.000
epoch: 1, batch: 800 // loss: 0.092 // reg. loss (* \lambda): 0.004
epoch: 1, batch: 900 // loss: 0.113 // reg. loss (* \lambda): 0.161
epoch: 1, batch: 1000 // loss: 0.093 // reg. loss (* \lambda): 0.000
epoch: 1, batch: 1100 // loss: 0.110 // reg. loss (* \lambda): 0.000
epoch: 1, batch: 1200 // loss: 0.089 // reg. loss (* \lambda): 0.000
epoch: 1, batch: 1300 // loss: 0.107 // reg. loss (* \lambda): 0.000
epoch: 1, batch: 1400 // loss: 0.088 // reg. loss (* \lambda): 0.000
epoch: 1, batch: 1500 // loss: 0.088 // reg. loss (* \lambda): 0.000
epoch: 1, batch: 1600 // loss: 0.101 // reg. loss (* \lambda): 0.000
epoch: 1, batch: 1700 // loss: 0.097 // reg. loss (* \lambda): 0.005
epoch: 1, batch: 1800 // loss: 0.107 // reg. loss (* \lambda): 0.000
epoch: 1, batch: 1900 // loss: 0.095 // reg. loss (* \lambda): 0.000
epoch: 1, batch: 2000 // loss: 0.083 // reg. loss (* \lambda): 0.000
epoch: 1, batch: 2100 // loss: 0.091 // reg. loss (* \lambda): 0.000
epoch: 1, batch: 2200 // loss: 0.108 // reg. loss (* \lambda): 0.001
epoch: 1, batch: 2300 // loss: 0.096 // reg. loss (* \lambda): 0.000
epoch: 1, batch: 2400 // loss: 0.078 // reg. loss (* \lambda): 0.000
epoch: 1, batch: 2500 // loss: 0.078 // reg. loss (* \lambda): 0.000
epoch: 1, batch: 2600 // loss: 0.102 // reg. loss (* \lambda): 0.000
epoch: 1, batch: 2700 // loss: 0.079 // reg. loss (* \lambda): 0.000
epoch: 1, batch: 2800 // loss: 0.105 // reg. loss (* \lambda): 0.000
epoch: 1, batch: 2900 // loss: 0.075 // reg. loss (* \lambda): 0.000
epoch: 1, batch: 3000 // loss: 0.084 // reg. loss (* \lambda): 0.000
epoch: 1, batch: 3100 // loss: 0.096 // reg. loss (* \lambda): 0.000
epoch: 1, batch: 3200 // loss: 0.077 // reg. loss (* \lambda): 0.001
epoch: 1, batch: 3300 // loss: 0.082 // reg. loss (* \lambda): 0.000
epoch: 1, batch: 3400 // loss: 0.079 // reg. loss (* \lambda): 0.000
epoch: 1, batch: 3500 // loss: 0.086 // reg. loss (* \lambda): 0.000
epoch: 1, batch: 3600 // loss: 0.093 // reg. loss (* \lambda): 0.000
epoch: 1, batch: 3700 // loss: 0.099 // reg. loss (* \lambda): 0.000

epoch: 2, batch: 0 // loss: 0.100 // reg. loss (* \lambda): 0.000
epoch: 2, batch: 100 // loss: 0.089 // reg. loss (* \lambda): 0.000
epoch: 2, batch: 200 // loss: 0.096 // reg. loss (* \lambda): 0.000
epoch: 2, batch: 300 // loss: 0.086 // reg. loss (* \lambda): 0.000
epoch: 2, batch: 400 // loss: 0.089 // reg. loss (* \lambda): 0.000
epoch: 2, batch: 500 // loss: 0.081 // reg. loss (* \lambda): 0.000
epoch: 2, batch: 600 // loss: 0.083 // reg. loss (* \lambda): 0.000
epoch: 2, batch: 700 // loss: 0.087 // reg. loss (* \lambda): 0.000
epoch: 2, batch: 800 // loss: 0.085 // reg. loss (* \lambda): 0.003
epoch: 2, batch: 900 // loss: 0.099 // reg. loss (* \lambda): 0.144
epoch: 2, batch: 1000 // loss: 0.081 // reg. loss (* \lambda): 0.000
epoch: 2, batch: 1100 // loss: 0.094 // reg. loss (* \lambda): 0.000
epoch: 2, batch: 1200 // loss: 0.083 // reg. loss (* \lambda): 0.000
epoch: 2, batch: 1300 // loss: 0.095 // reg. loss (* \lambda): 0.000
epoch: 2, batch: 1400 // loss: 0.082 // reg. loss (* \lambda): 0.000
epoch: 2, batch: 1500 // loss: 0.082 // reg. loss (* \lambda): 0.000
epoch: 2, batch: 1600 // loss: 0.092 // reg. loss (* \lambda): 0.000
epoch: 2, batch: 1700 // loss: 0.092 // reg. loss (* \lambda): 0.004
epoch: 2, batch: 1800 // loss: 0.095 // reg. loss (* \lambda): 0.000
epoch: 2, batch: 1900 // loss: 0.087 // reg. loss (* \lambda): 0.000
epoch: 2, batch: 2000 // loss: 0.079 // reg. loss (* \lambda): 0.000
epoch: 2, batch: 2100 // loss: 0.088 // reg. loss (* \lambda): 0.000
epoch: 2, batch: 2200 // loss: 0.100 // reg. loss (* \lambda): 0.000
epoch: 2, batch: 2300 // loss: 0.091 // reg. loss (* \lambda): 0.000
epoch: 2, batch: 2400 // loss: 0.079 // reg. loss (* \lambda): 0.000
epoch: 2, batch: 2500 // loss: 0.076 // reg. loss (* \lambda): 0.000
epoch: 2, batch: 2600 // loss: 0.095 // reg. loss (* \lambda): 0.000
epoch: 2, batch: 2700 // loss: 0.077 // reg. loss (* \lambda): 0.000
epoch: 2, batch: 2800 // loss: 0.098 // reg. loss (* \lambda): 0.000
epoch: 2, batch: 2900 // loss: 0.075 // reg. loss (* \lambda): 0.000
epoch: 2, batch: 3000 // loss: 0.082 // reg. loss (* \lambda): 0.000
epoch: 2, batch: 3100 // loss: 0.090 // reg. loss (* \lambda): 0.000
epoch: 2, batch: 3200 // loss: 0.078 // reg. loss (* \lambda): 0.001
epoch: 2, batch: 3300 // loss: 0.080 // reg. loss (* \lambda): 0.000
epoch: 2, batch: 3400 // loss: 0.076 // reg. loss (* \lambda): 0.000
epoch: 2, batch: 3500 // loss: 0.083 // reg. loss (* \lambda): 0.000
epoch: 2, batch: 3600 // loss: 0.091 // reg. loss (* \lambda): 0.000
epoch: 2, batch: 3700 // loss: 0.094 // reg. loss (* \lambda): 0.000

epoch: 3, batch: 0 // loss: 0.098 // reg. loss (* \lambda): 0.000
epoch: 3, batch: 100 // loss: 0.085 // reg. loss (* \lambda): 0.000
epoch: 3, batch: 200 // loss: 0.095 // reg. loss (* \lambda): 0.000
epoch: 3, batch: 300 // loss: 0.086 // reg. loss (* \lambda): 0.000
epoch: 3, batch: 400 // loss: 0.087 // reg. loss (* \lambda): 0.000
epoch: 3, batch: 500 // loss: 0.079 // reg. loss (* \lambda): 0.000
epoch: 3, batch: 600 // loss: 0.081 // reg. loss (* \lambda): 0.000
epoch: 3, batch: 700 // loss: 0.085 // reg. loss (* \lambda): 0.000
epoch: 3, batch: 800 // loss: 0.085 // reg. loss (* \lambda): 0.003
epoch: 3, batch: 900 // loss: 0.097 // reg. loss (* \lambda): 0.130
epoch: 3, batch: 1000 // loss: 0.078 // reg. loss (* \lambda): 0.000
epoch: 3, batch: 1100 // loss: 0.091 // reg. loss (* \lambda): 0.000
epoch: 3, batch: 1200 // loss: 0.082 // reg. loss (* \lambda): 0.000
epoch: 3, batch: 1300 // loss: 0.093 // reg. loss (* \lambda): 0.000
epoch: 3, batch: 1400 // loss: 0.082 // reg. loss (* \lambda): 0.000
epoch: 3, batch: 1500 // loss: 0.082 // reg. loss (* \lambda): 0.000
epoch: 3, batch: 1600 // loss: 0.090 // reg. loss (* \lambda): 0.000
epoch: 3, batch: 1700 // loss: 0.092 // reg. loss (* \lambda): 0.004
epoch: 3, batch: 1800 // loss: 0.092 // reg. loss (* \lambda): 0.000
epoch: 3, batch: 1900 // loss: 0.086 // reg. loss (* \lambda): 0.000
epoch: 3, batch: 2000 // loss: 0.079 // reg. loss (* \lambda): 0.000
epoch: 3, batch: 2100 // loss: 0.088 // reg. loss (* \lambda): 0.000
epoch: 3, batch: 2200 // loss: 0.098 // reg. loss (* \lambda): 0.000
epoch: 3, batch: 2300 // loss: 0.090 // reg. loss (* \lambda): 0.000
epoch: 3, batch: 2400 // loss: 0.081 // reg. loss (* \lambda): 0.000
epoch: 3, batch: 2500 // loss: 0.076 // reg. loss (* \lambda): 0.000
epoch: 3, batch: 2600 // loss: 0.093 // reg. loss (* \lambda): 0.000
epoch: 3, batch: 2700 // loss: 0.077 // reg. loss (* \lambda): 0.000
epoch: 3, batch: 2800 // loss: 0.097 // reg. loss (* \lambda): 0.000
epoch: 3, batch: 2900 // loss: 0.076 // reg. loss (* \lambda): 0.000
epoch: 3, batch: 3000 // loss: 0.082 // reg. loss (* \lambda): 0.000
epoch: 3, batch: 3100 // loss: 0.088 // reg. loss (* \lambda): 0.000
epoch: 3, batch: 3200 // loss: 0.079 // reg. loss (* \lambda): 0.001
epoch: 3, batch: 3300 // loss: 0.080 // reg. loss (* \lambda): 0.000
epoch: 3, batch: 3400 // loss: 0.076 // reg. loss (* \lambda): 0.000
epoch: 3, batch: 3500 // loss: 0.083 // reg. loss (* \lambda): 0.000
epoch: 3, batch: 3600 // loss: 0.090 // reg. loss (* \lambda): 0.000
epoch: 3, batch: 3700 // loss: 0.093 // reg. loss (* \lambda): 0.000

epoch: 4, batch: 0 // loss: 0.099 // reg. loss (* \lambda): 0.000
epoch: 4, batch: 100 // loss: 0.084 // reg. loss (* \lambda): 0.000
epoch: 4, batch: 200 // loss: 0.096 // reg. loss (* \lambda): 0.000
epoch: 4, batch: 300 // loss: 0.086 // reg. loss (* \lambda): 0.000
epoch: 4, batch: 400 // loss: 0.088 // reg. loss (* \lambda): 0.000
epoch: 4, batch: 500 // loss: 0.078 // reg. loss (* \lambda): 0.000
epoch: 4, batch: 600 // loss: 0.081 // reg. loss (* \lambda): 0.000
epoch: 4, batch: 700 // loss: 0.084 // reg. loss (* \lambda): 0.000
epoch: 4, batch: 800 // loss: 0.085 // reg. loss (* \lambda): 0.002
epoch: 4, batch: 900 // loss: 0.096 // reg. loss (* \lambda): 0.119
epoch: 4, batch: 1000 // loss: 0.078 // reg. loss (* \lambda): 0.000
epoch: 4, batch: 1100 // loss: 0.090 // reg. loss (* \lambda): 0.000
epoch: 4, batch: 1200 // loss: 0.083 // reg. loss (* \lambda): 0.000
epoch: 4, batch: 1300 // loss: 0.092 // reg. loss (* \lambda): 0.000
epoch: 4, batch: 1400 // loss: 0.082 // reg. loss (* \lambda): 0.000
epoch: 4, batch: 1500 // loss: 0.082 // reg. loss (* \lambda): 0.000
epoch: 4, batch: 1600 // loss: 0.090 // reg. loss (* \lambda): 0.000
epoch: 4, batch: 1700 // loss: 0.092 // reg. loss (* \lambda): 0.003
epoch: 4, batch: 1800 // loss: 0.091 // reg. loss (* \lambda): 0.000
epoch: 4, batch: 1900 // loss: 0.086 // reg. loss (* \lambda): 0.000
epoch: 4, batch: 2000 // loss: 0.079 // reg. loss (* \lambda): 0.000
epoch: 4, batch: 2100 // loss: 0.088 // reg. loss (* \lambda): 0.000
epoch: 4, batch: 2200 // loss: 0.098 // reg. loss (* \lambda): 0.000
epoch: 4, batch: 2300 // loss: 0.090 // reg. loss (* \lambda): 0.000
epoch: 4, batch: 2400 // loss: 0.081 // reg. loss (* \lambda): 0.000
epoch: 4, batch: 2500 // loss: 0.076 // reg. loss (* \lambda): 0.000
epoch: 4, batch: 2600 // loss: 0.093 // reg. loss (* \lambda): 0.000
epoch: 4, batch: 2700 // loss: 0.077 // reg. loss (* \lambda): 0.000
epoch: 4, batch: 2800 // loss: 0.096 // reg. loss (* \lambda): 0.000
epoch: 4, batch: 2900 // loss: 0.076 // reg. loss (* \lambda): 0.000
epoch: 4, batch: 3000 // loss: 0.083 // reg. loss (* \lambda): 0.000
epoch: 4, batch: 3100 // loss: 0.088 // reg. loss (* \lambda): 0.000
epoch: 4, batch: 3200 // loss: 0.080 // reg. loss (* \lambda): 0.001
epoch: 4, batch: 3300 // loss: 0.080 // reg. loss (* \lambda): 0.000
epoch: 4, batch: 3400 // loss: 0.076 // reg. loss (* \lambda): 0.000
epoch: 4, batch: 3500 // loss: 0.082 // reg. loss (* \lambda): 0.000
epoch: 4, batch: 3600 // loss: 0.090 // reg. loss (* \lambda): 0.000
epoch: 4, batch: 3700 // loss: 0.093 // reg. loss (* \lambda): 0.000

epoch: 5, batch: 0 // loss: 0.099 // reg. loss (* \lambda): 0.000
epoch: 5, batch: 100 // loss: 0.084 // reg. loss (* \lambda): 0.000
epoch: 5, batch: 200 // loss: 0.096 // reg. loss (* \lambda): 0.000
epoch: 5, batch: 300 // loss: 0.086 // reg. loss (* \lambda): 0.000
epoch: 5, batch: 400 // loss: 0.088 // reg. loss (* \lambda): 0.000
epoch: 5, batch: 500 // loss: 0.078 // reg. loss (* \lambda): 0.000
epoch: 5, batch: 600 // loss: 0.081 // reg. loss (* \lambda): 0.000
epoch: 5, batch: 700 // loss: 0.084 // reg. loss (* \lambda): 0.000
epoch: 5, batch: 800 // loss: 0.086 // reg. loss (* \lambda): 0.002
epoch: 5, batch: 900 // loss: 0.096 // reg. loss (* \lambda): 0.110
epoch: 5, batch: 1000 // loss: 0.078 // reg. loss (* \lambda): 0.000
epoch: 5, batch: 1100 // loss: 0.089 // reg. loss (* \lambda): 0.000
epoch: 5, batch: 1200 // loss: 0.083 // reg. loss (* \lambda): 0.000
epoch: 5, batch: 1300 // loss: 0.092 // reg. loss (* \lambda): 0.000
epoch: 5, batch: 1400 // loss: 0.083 // reg. loss (* \lambda): 0.000
epoch: 5, batch: 1500 // loss: 0.082 // reg. loss (* \lambda): 0.000
epoch: 5, batch: 1600 // loss: 0.090 // reg. loss (* \lambda): 0.000
epoch: 5, batch: 1700 // loss: 0.092 // reg. loss (* \lambda): 0.003
epoch: 5, batch: 1800 // loss: 0.091 // reg. loss (* \lambda): 0.000
epoch: 5, batch: 1900 // loss: 0.086 // reg. loss (* \lambda): 0.000
epoch: 5, batch: 2000 // loss: 0.079 // reg. loss (* \lambda): 0.000
epoch: 5, batch: 2100 // loss: 0.088 // reg. loss (* \lambda): 0.000
epoch: 5, batch: 2200 // loss: 0.098 // reg. loss (* \lambda): 0.000
epoch: 5, batch: 2300 // loss: 0.090 // reg. loss (* \lambda): 0.000
epoch: 5, batch: 2400 // loss: 0.082 // reg. loss (* \lambda): 0.000
epoch: 5, batch: 2500 // loss: 0.076 // reg. loss (* \lambda): 0.000
epoch: 5, batch: 2600 // loss: 0.093 // reg. loss (* \lambda): 0.000
epoch: 5, batch: 2700 // loss: 0.077 // reg. loss (* \lambda): 0.000
epoch: 5, batch: 2800 // loss: 0.096 // reg. loss (* \lambda): 0.000
epoch: 5, batch: 2900 // loss: 0.076 // reg. loss (* \lambda): 0.000
epoch: 5, batch: 3000 // loss: 0.083 // reg. loss (* \lambda): 0.000
epoch: 5, batch: 3100 // loss: 0.088 // reg. loss (* \lambda): 0.000
epoch: 5, batch: 3200 // loss: 0.080 // reg. loss (* \lambda): 0.001
epoch: 5, batch: 3300 // loss: 0.080 // reg. loss (* \lambda): 0.000
epoch: 5, batch: 3400 // loss: 0.076 // reg. loss (* \lambda): 0.000
epoch: 5, batch: 3500 // loss: 0.082 // reg. loss (* \lambda): 0.000
epoch: 5, batch: 3600 // loss: 0.090 // reg. loss (* \lambda): 0.000
epoch: 5, batch: 3700 // loss: 0.092 // reg. loss (* \lambda): 0.000

epoch: 6, batch: 0 // loss: 0.099 // reg. loss (* \lambda): 0.000
epoch: 6, batch: 100 // loss: 0.084 // reg. loss (* \lambda): 0.000
epoch: 6, batch: 200 // loss: 0.096 // reg. loss (* \lambda): 0.000
epoch: 6, batch: 300 // loss: 0.086 // reg. loss (* \lambda): 0.000
epoch: 6, batch: 400 // loss: 0.088 // reg. loss (* \lambda): 0.000
epoch: 6, batch: 500 // loss: 0.078 // reg. loss (* \lambda): 0.000
epoch: 6, batch: 600 // loss: 0.081 // reg. loss (* \lambda): 0.000
epoch: 6, batch: 700 // loss: 0.084 // reg. loss (* \lambda): 0.000
epoch: 6, batch: 800 // loss: 0.086 // reg. loss (* \lambda): 0.002
epoch: 6, batch: 900 // loss: 0.096 // reg. loss (* \lambda): 0.102
epoch: 6, batch: 1000 // loss: 0.077 // reg. loss (* \lambda): 0.000
epoch: 6, batch: 1100 // loss: 0.089 // reg. loss (* \lambda): 0.000
epoch: 6, batch: 1200 // loss: 0.083 // reg. loss (* \lambda): 0.000
epoch: 6, batch: 1300 // loss: 0.092 // reg. loss (* \lambda): 0.000
epoch: 6, batch: 1400 // loss: 0.083 // reg. loss (* \lambda): 0.000
epoch: 6, batch: 1500 // loss: 0.082 // reg. loss (* \lambda): 0.000
epoch: 6, batch: 1600 // loss: 0.090 // reg. loss (* \lambda): 0.000
epoch: 6, batch: 1700 // loss: 0.092 // reg. loss (* \lambda): 0.003
epoch: 6, batch: 1800 // loss: 0.091 // reg. loss (* \lambda): 0.000
epoch: 6, batch: 1900 // loss: 0.086 // reg. loss (* \lambda): 0.000
epoch: 6, batch: 2000 // loss: 0.079 // reg. loss (* \lambda): 0.000
epoch: 6, batch: 2100 // loss: 0.089 // reg. loss (* \lambda): 0.000
epoch: 6, batch: 2200 // loss: 0.098 // reg. loss (* \lambda): 0.000
epoch: 6, batch: 2300 // loss: 0.090 // reg. loss (* \lambda): 0.000
epoch: 6, batch: 2400 // loss: 0.082 // reg. loss (* \lambda): 0.000
epoch: 6, batch: 2500 // loss: 0.077 // reg. loss (* \lambda): 0.000
epoch: 6, batch: 2600 // loss: 0.093 // reg. loss (* \lambda): 0.000
epoch: 6, batch: 2700 // loss: 0.077 // reg. loss (* \lambda): 0.000
epoch: 6, batch: 2800 // loss: 0.096 // reg. loss (* \lambda): 0.000
epoch: 6, batch: 2900 // loss: 0.076 // reg. loss (* \lambda): 0.000
epoch: 6, batch: 3000 // loss: 0.083 // reg. loss (* \lambda): 0.000
epoch: 6, batch: 3100 // loss: 0.088 // reg. loss (* \lambda): 0.000
epoch: 6, batch: 3200 // loss: 0.080 // reg. loss (* \lambda): 0.001
epoch: 6, batch: 3300 // loss: 0.080 // reg. loss (* \lambda): 0.000
epoch: 6, batch: 3400 // loss: 0.076 // reg. loss (* \lambda): 0.000
epoch: 6, batch: 3500 // loss: 0.082 // reg. loss (* \lambda): 0.000
epoch: 6, batch: 3600 // loss: 0.090 // reg. loss (* \lambda): 0.000
epoch: 6, batch: 3700 // loss: 0.092 // reg. loss (* \lambda): 0.000

epoch: 7, batch: 0 // loss: 0.099 // reg. loss (* \lambda): 0.000
epoch: 7, batch: 100 // loss: 0.084 // reg. loss (* \lambda): 0.000
epoch: 7, batch: 200 // loss: 0.096 // reg. loss (* \lambda): 0.000
epoch: 7, batch: 300 // loss: 0.087 // reg. loss (* \lambda): 0.000
epoch: 7, batch: 400 // loss: 0.088 // reg. loss (* \lambda): 0.000
epoch: 7, batch: 500 // loss: 0.078 // reg. loss (* \lambda): 0.000
epoch: 7, batch: 600 // loss: 0.081 // reg. loss (* \lambda): 0.000
epoch: 7, batch: 700 // loss: 0.084 // reg. loss (* \lambda): 0.000
epoch: 7, batch: 800 // loss: 0.086 // reg. loss (* \lambda): 0.001
epoch: 7, batch: 900 // loss: 0.096 // reg. loss (* \lambda): 0.096
epoch: 7, batch: 1000 // loss: 0.077 // reg. loss (* \lambda): 0.000
epoch: 7, batch: 1100 // loss: 0.089 // reg. loss (* \lambda): 0.000
epoch: 7, batch: 1200 // loss: 0.083 // reg. loss (* \lambda): 0.000
epoch: 7, batch: 1300 // loss: 0.092 // reg. loss (* \lambda): 0.000
epoch: 7, batch: 1400 // loss: 0.083 // reg. loss (* \lambda): 0.000
epoch: 7, batch: 1500 // loss: 0.082 // reg. loss (* \lambda): 0.000
epoch: 7, batch: 1600 // loss: 0.090 // reg. loss (* \lambda): 0.000
epoch: 7, batch: 1700 // loss: 0.092 // reg. loss (* \lambda): 0.003
epoch: 7, batch: 1800 // loss: 0.091 // reg. loss (* \lambda): 0.000
epoch: 7, batch: 1900 // loss: 0.086 // reg. loss (* \lambda): 0.000
epoch: 7, batch: 2000 // loss: 0.079 // reg. loss (* \lambda): 0.000
epoch: 7, batch: 2100 // loss: 0.089 // reg. loss (* \lambda): 0.000
epoch: 7, batch: 2200 // loss: 0.098 // reg. loss (* \lambda): 0.000
epoch: 7, batch: 2300 // loss: 0.090 // reg. loss (* \lambda): 0.000
epoch: 7, batch: 2400 // loss: 0.082 // reg. loss (* \lambda): 0.000
epoch: 7, batch: 2500 // loss: 0.077 // reg. loss (* \lambda): 0.000
epoch: 7, batch: 2600 // loss: 0.093 // reg. loss (* \lambda): 0.000
epoch: 7, batch: 2700 // loss: 0.077 // reg. loss (* \lambda): 0.000
epoch: 7, batch: 2800 // loss: 0.096 // reg. loss (* \lambda): 0.000
epoch: 7, batch: 2900 // loss: 0.076 // reg. loss (* \lambda): 0.000
epoch: 7, batch: 3000 // loss: 0.083 // reg. loss (* \lambda): 0.000
epoch: 7, batch: 3100 // loss: 0.088 // reg. loss (* \lambda): 0.000
epoch: 7, batch: 3200 // loss: 0.080 // reg. loss (* \lambda): 0.001
epoch: 7, batch: 3300 // loss: 0.080 // reg. loss (* \lambda): 0.000
epoch: 7, batch: 3400 // loss: 0.076 // reg. loss (* \lambda): 0.000
epoch: 7, batch: 3500 // loss: 0.082 // reg. loss (* \lambda): 0.000
epoch: 7, batch: 3600 // loss: 0.090 // reg. loss (* \lambda): 0.000
epoch: 7, batch: 3700 // loss: 0.092 // reg. loss (* \lambda): 0.000

epoch: 8, batch: 0 // loss: 0.099 // reg. loss (* \lambda): 0.000
epoch: 8, batch: 100 // loss: 0.084 // reg. loss (* \lambda): 0.000
epoch: 8, batch: 200 // loss: 0.096 // reg. loss (* \lambda): 0.000
epoch: 8, batch: 300 // loss: 0.087 // reg. loss (* \lambda): 0.000
epoch: 8, batch: 400 // loss: 0.088 // reg. loss (* \lambda): 0.000
epoch: 8, batch: 500 // loss: 0.078 // reg. loss (* \lambda): 0.000
epoch: 8, batch: 600 // loss: 0.081 // reg. loss (* \lambda): 0.000
epoch: 8, batch: 700 // loss: 0.084 // reg. loss (* \lambda): 0.000
epoch: 8, batch: 800 // loss: 0.086 // reg. loss (* \lambda): 0.001
epoch: 8, batch: 900 // loss: 0.096 // reg. loss (* \lambda): 0.090
epoch: 8, batch: 1000 // loss: 0.077 // reg. loss (* \lambda): 0.000
epoch: 8, batch: 1100 // loss: 0.089 // reg. loss (* \lambda): 0.000
epoch: 8, batch: 1200 // loss: 0.083 // reg. loss (* \lambda): 0.000
epoch: 8, batch: 1300 // loss: 0.092 // reg. loss (* \lambda): 0.000
epoch: 8, batch: 1400 // loss: 0.083 // reg. loss (* \lambda): 0.000
epoch: 8, batch: 1500 // loss: 0.082 // reg. loss (* \lambda): 0.000
epoch: 8, batch: 1600 // loss: 0.090 // reg. loss (* \lambda): 0.000
epoch: 8, batch: 1700 // loss: 0.092 // reg. loss (* \lambda): 0.002
epoch: 8, batch: 1800 // loss: 0.090 // reg. loss (* \lambda): 0.000
epoch: 8, batch: 1900 // loss: 0.086 // reg. loss (* \lambda): 0.000
epoch: 8, batch: 2000 // loss: 0.079 // reg. loss (* \lambda): 0.000
epoch: 8, batch: 2100 // loss: 0.089 // reg. loss (* \lambda): 0.000
epoch: 8, batch: 2200 // loss: 0.098 // reg. loss (* \lambda): 0.000
epoch: 8, batch: 2300 // loss: 0.090 // reg. loss (* \lambda): 0.000
epoch: 8, batch: 2400 // loss: 0.082 // reg. loss (* \lambda): 0.000
epoch: 8, batch: 2500 // loss: 0.077 // reg. loss (* \lambda): 0.000
epoch: 8, batch: 2600 // loss: 0.093 // reg. loss (* \lambda): 0.000
epoch: 8, batch: 2700 // loss: 0.077 // reg. loss (* \lambda): 0.000
epoch: 8, batch: 2800 // loss: 0.096 // reg. loss (* \lambda): 0.000
epoch: 8, batch: 2900 // loss: 0.076 // reg. loss (* \lambda): 0.000
epoch: 8, batch: 3000 // loss: 0.083 // reg. loss (* \lambda): 0.000
epoch: 8, batch: 3100 // loss: 0.088 // reg. loss (* \lambda): 0.000
epoch: 8, batch: 3200 // loss: 0.080 // reg. loss (* \lambda): 0.001
epoch: 8, batch: 3300 // loss: 0.080 // reg. loss (* \lambda): 0.000
epoch: 8, batch: 3400 // loss: 0.076 // reg. loss (* \lambda): 0.000
epoch: 8, batch: 3500 // loss: 0.082 // reg. loss (* \lambda): 0.000
epoch: 8, batch: 3600 // loss: 0.090 // reg. loss (* \lambda): 0.000
epoch: 8, batch: 3700 // loss: 0.092 // reg. loss (* \lambda): 0.000

epoch: 9, batch: 0 // loss: 0.099 // reg. loss (* \lambda): 0.000
epoch: 9, batch: 100 // loss: 0.084 // reg. loss (* \lambda): 0.000
epoch: 9, batch: 200 // loss: 0.096 // reg. loss (* \lambda): 0.000
epoch: 9, batch: 300 // loss: 0.087 // reg. loss (* \lambda): 0.000
epoch: 9, batch: 400 // loss: 0.088 // reg. loss (* \lambda): 0.000
epoch: 9, batch: 500 // loss: 0.078 // reg. loss (* \lambda): 0.000
epoch: 9, batch: 600 // loss: 0.081 // reg. loss (* \lambda): 0.000
epoch: 9, batch: 700 // loss: 0.084 // reg. loss (* \lambda): 0.000
epoch: 9, batch: 800 // loss: 0.086 // reg. loss (* \lambda): 0.001
epoch: 9, batch: 900 // loss: 0.096 // reg. loss (* \lambda): 0.085
epoch: 9, batch: 1000 // loss: 0.077 // reg. loss (* \lambda): 0.000
epoch: 9, batch: 1100 // loss: 0.089 // reg. loss (* \lambda): 0.000
epoch: 9, batch: 1200 // loss: 0.083 // reg. loss (* \lambda): 0.000
epoch: 9, batch: 1300 // loss: 0.092 // reg. loss (* \lambda): 0.000
epoch: 9, batch: 1400 // loss: 0.083 // reg. loss (* \lambda): 0.000
epoch: 9, batch: 1500 // loss: 0.082 // reg. loss (* \lambda): 0.000
epoch: 9, batch: 1600 // loss: 0.090 // reg. loss (* \lambda): 0.000
epoch: 9, batch: 1700 // loss: 0.092 // reg. loss (* \lambda): 0.002
epoch: 9, batch: 1800 // loss: 0.090 // reg. loss (* \lambda): 0.000
epoch: 9, batch: 1900 // loss: 0.086 // reg. loss (* \lambda): 0.000
epoch: 9, batch: 2000 // loss: 0.079 // reg. loss (* \lambda): 0.000
epoch: 9, batch: 2100 // loss: 0.089 // reg. loss (* \lambda): 0.000
epoch: 9, batch: 2200 // loss: 0.098 // reg. loss (* \lambda): 0.000
epoch: 9, batch: 2300 // loss: 0.090 // reg. loss (* \lambda): 0.000
epoch: 9, batch: 2400 // loss: 0.082 // reg. loss (* \lambda): 0.000
epoch: 9, batch: 2500 // loss: 0.077 // reg. loss (* \lambda): 0.000
epoch: 9, batch: 2600 // loss: 0.093 // reg. loss (* \lambda): 0.000
epoch: 9, batch: 2700 // loss: 0.077 // reg. loss (* \lambda): 0.000
epoch: 9, batch: 2800 // loss: 0.096 // reg. loss (* \lambda): 0.000
epoch: 9, batch: 2900 // loss: 0.076 // reg. loss (* \lambda): 0.000
epoch: 9, batch: 3000 // loss: 0.083 // reg. loss (* \lambda): 0.000
epoch: 9, batch: 3100 // loss: 0.088 // reg. loss (* \lambda): 0.000
epoch: 9, batch: 3200 // loss: 0.080 // reg. loss (* \lambda): 0.001
epoch: 9, batch: 3300 // loss: 0.080 // reg. loss (* \lambda): 0.000
epoch: 9, batch: 3400 // loss: 0.076 // reg. loss (* \lambda): 0.000
epoch: 9, batch: 3500 // loss: 0.082 // reg. loss (* \lambda): 0.000
epoch: 9, batch: 3600 // loss: 0.090 // reg. loss (* \lambda): 0.000
epoch: 9, batch: 3700 // loss: 0.092 // reg. loss (* \lambda): 0.000

Variational auto-encoders

First, let's review on board...

In [176]:
from torch.nn import functional as F

class VAE(nn.Module):
    
    def __init__(self, input_size=784, hidden_size1=32, hidden_size2=32):
        '''
        In the initializer we setup model parameters/layers.
        '''
        super(VAE, self).__init__() 
        
        ### encoder layers
        self.fc_e = nn.Linear(784, hidden_size1)
        self.fc_mean = nn.Linear(hidden_size1, hidden_size2)
        self.fc_logvar = nn.Linear(hidden_size1, hidden_size2)
        
        ### decoder layers
        self.fc_d1 = nn.Linear(hidden_size2, hidden_size1)
        self.fc_d2 = nn.Linear(hidden_size1, 784)
        
        
    def encoder(self, x_in):
        x = self.fc_e(x_in)
        mean = self.fc_mean(x)
        logvar = self.fc_logvar(x)
        return mean, logvar
    
    def decoder(self, z):
        z = F.relu(self.fc_d1(z))
        x_out = F.sigmoid(self.fc_d2(z))
        #return x_out.view(-1,1,28,28)
        return x_out
    
    def sample_normal(self, mean, logvar):
        # Using torch.normal(means,sds) returns a stochastic tensor which we cannot backpropogate through.
        # Instead we utilize the 'reparameterization trick'.
        # http://stats.stackexchange.com/a/205336
        # http://dpkingma.com/wordpress/wp-content/uploads/2015/12/talk_nips_workshop_2015.pdf
        sd = torch.exp(logvar*0.5)
        e = torch.tensor((torch.randn(sd.size()))) # Sample from standard normal
        z = e.mul(sd).add_(mean)
        return z
    
    def forward(self, x_in):
        z_mean, z_logvar = self.encoder(x_in)
        z = self.sample_normal(z_mean, z_logvar)
        x_out = self.decoder(z)
        return x_out, z_mean, z_logvar
In [177]:
def train_VAE(X_in, X_target, model, optimizer, loss_function, EPOCHS=10):
    for epoch in range(EPOCHS):  
        idx, batch_num = 0, 0
        batch_size = 16

        print("")
        while idx < 60000:
            # zero the parameter gradients
            optimizer.zero_grad()

            X_batch = X_in[idx: idx + batch_size].float()
            X_target_batch = X_target[idx: idx + batch_size].float()
            idx += batch_size

            # now run our X's forward, get preds, incur
            # loss, backprop, and step the optimizer.
            X_tilde_batch, _, _ = model(X_batch)
            loss = loss_function(X_tilde_batch, X_target_batch)
            loss.backward()
            optimizer.step()

            # print out loss
            if batch_num % 100 == 0:
                print("epoch: {}, batch: {} // loss: {:.3f}".format(epoch, batch_num, loss.item()))

            batch_num += 1
In [178]:
m = VAE()
optimizer = optim.SGD(m.parameters(), lr=0.01, momentum=0.9)
train_VAE(X, X, m, optimizer, loss_function,  EPOCHS=20)
epoch: 0, batch: 0 // loss: 0.186
epoch: 0, batch: 100 // loss: 0.157
/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:39: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
epoch: 0, batch: 200 // loss: 0.187
epoch: 0, batch: 300 // loss: 0.175
epoch: 0, batch: 400 // loss: 0.172
epoch: 0, batch: 500 // loss: 0.162
epoch: 0, batch: 600 // loss: 0.167
epoch: 0, batch: 700 // loss: 0.164
epoch: 0, batch: 800 // loss: 0.173
epoch: 0, batch: 900 // loss: 0.167
epoch: 0, batch: 1000 // loss: 0.152
epoch: 0, batch: 1100 // loss: 0.157
epoch: 0, batch: 1200 // loss: 0.153
epoch: 0, batch: 1300 // loss: 0.148
epoch: 0, batch: 1400 // loss: 0.155
epoch: 0, batch: 1500 // loss: 0.148
epoch: 0, batch: 1600 // loss: 0.147
epoch: 0, batch: 1700 // loss: 0.144
epoch: 0, batch: 1800 // loss: 0.132
epoch: 0, batch: 1900 // loss: 0.135
epoch: 0, batch: 2000 // loss: 0.119
epoch: 0, batch: 2100 // loss: 0.123
epoch: 0, batch: 2200 // loss: 0.127
epoch: 0, batch: 2300 // loss: 0.119
epoch: 0, batch: 2400 // loss: 0.109
epoch: 0, batch: 2500 // loss: 0.100
epoch: 0, batch: 2600 // loss: 0.107
epoch: 0, batch: 2700 // loss: 0.094
epoch: 0, batch: 2800 // loss: 0.104
epoch: 0, batch: 2900 // loss: 0.091
epoch: 0, batch: 3000 // loss: 0.094
epoch: 0, batch: 3100 // loss: 0.089
epoch: 0, batch: 3200 // loss: 0.093
epoch: 0, batch: 3300 // loss: 0.088
epoch: 0, batch: 3400 // loss: 0.078
epoch: 0, batch: 3500 // loss: 0.084
epoch: 0, batch: 3600 // loss: 0.099
epoch: 0, batch: 3700 // loss: 0.092

epoch: 1, batch: 0 // loss: 0.108
epoch: 1, batch: 100 // loss: 0.089
epoch: 1, batch: 200 // loss: 0.101
epoch: 1, batch: 300 // loss: 0.090
epoch: 1, batch: 400 // loss: 0.094
epoch: 1, batch: 500 // loss: 0.080
epoch: 1, batch: 600 // loss: 0.086
epoch: 1, batch: 700 // loss: 0.086
epoch: 1, batch: 800 // loss: 0.089
epoch: 1, batch: 900 // loss: 0.093
epoch: 1, batch: 1000 // loss: 0.075
epoch: 1, batch: 1100 // loss: 0.086
epoch: 1, batch: 1200 // loss: 0.084
epoch: 1, batch: 1300 // loss: 0.086
epoch: 1, batch: 1400 // loss: 0.084
epoch: 1, batch: 1500 // loss: 0.079
epoch: 1, batch: 1600 // loss: 0.089
epoch: 1, batch: 1700 // loss: 0.089
epoch: 1, batch: 1800 // loss: 0.086
epoch: 1, batch: 1900 // loss: 0.082
epoch: 1, batch: 2000 // loss: 0.071
epoch: 1, batch: 2100 // loss: 0.085
epoch: 1, batch: 2200 // loss: 0.093
epoch: 1, batch: 2300 // loss: 0.087
epoch: 1, batch: 2400 // loss: 0.075
epoch: 1, batch: 2500 // loss: 0.072
epoch: 1, batch: 2600 // loss: 0.083
epoch: 1, batch: 2700 // loss: 0.074
epoch: 1, batch: 2800 // loss: 0.084
epoch: 1, batch: 2900 // loss: 0.073
epoch: 1, batch: 3000 // loss: 0.075
epoch: 1, batch: 3100 // loss: 0.071
epoch: 1, batch: 3200 // loss: 0.070
epoch: 1, batch: 3300 // loss: 0.068
epoch: 1, batch: 3400 // loss: 0.062
epoch: 1, batch: 3500 // loss: 0.063
epoch: 1, batch: 3600 // loss: 0.074
epoch: 1, batch: 3700 // loss: 0.071

epoch: 2, batch: 0 // loss: 0.086
epoch: 2, batch: 100 // loss: 0.075
epoch: 2, batch: 200 // loss: 0.070
epoch: 2, batch: 300 // loss: 0.072
epoch: 2, batch: 400 // loss: 0.073
epoch: 2, batch: 500 // loss: 0.059
epoch: 2, batch: 600 // loss: 0.065
epoch: 2, batch: 700 // loss: 0.065
epoch: 2, batch: 800 // loss: 0.067
epoch: 2, batch: 900 // loss: 0.072
epoch: 2, batch: 1000 // loss: 0.056
epoch: 2, batch: 1100 // loss: 0.062
epoch: 2, batch: 1200 // loss: 0.060
epoch: 2, batch: 1300 // loss: 0.067
epoch: 2, batch: 1400 // loss: 0.060
epoch: 2, batch: 1500 // loss: 0.061
epoch: 2, batch: 1600 // loss: 0.068
epoch: 2, batch: 1700 // loss: 0.063
epoch: 2, batch: 1800 // loss: 0.066
epoch: 2, batch: 1900 // loss: 0.060
epoch: 2, batch: 2000 // loss: 0.057
epoch: 2, batch: 2100 // loss: 0.062
epoch: 2, batch: 2200 // loss: 0.070
epoch: 2, batch: 2300 // loss: 0.067
epoch: 2, batch: 2400 // loss: 0.049
epoch: 2, batch: 2500 // loss: 0.054
epoch: 2, batch: 2600 // loss: 0.058
epoch: 2, batch: 2700 // loss: 0.055
epoch: 2, batch: 2800 // loss: 0.059
epoch: 2, batch: 2900 // loss: 0.056
epoch: 2, batch: 3000 // loss: 0.057
epoch: 2, batch: 3100 // loss: 0.050
epoch: 2, batch: 3200 // loss: 0.050
epoch: 2, batch: 3300 // loss: 0.049
epoch: 2, batch: 3400 // loss: 0.048
epoch: 2, batch: 3500 // loss: 0.042
epoch: 2, batch: 3600 // loss: 0.052
epoch: 2, batch: 3700 // loss: 0.051

epoch: 3, batch: 0 // loss: 0.067
epoch: 3, batch: 100 // loss: 0.060
epoch: 3, batch: 200 // loss: 0.051
epoch: 3, batch: 300 // loss: 0.058
epoch: 3, batch: 400 // loss: 0.056
epoch: 3, batch: 500 // loss: 0.045
epoch: 3, batch: 600 // loss: 0.048
epoch: 3, batch: 700 // loss: 0.051
epoch: 3, batch: 800 // loss: 0.052
epoch: 3, batch: 900 // loss: 0.058
epoch: 3, batch: 1000 // loss: 0.046
epoch: 3, batch: 1100 // loss: 0.048
epoch: 3, batch: 1200 // loss: 0.050
epoch: 3, batch: 1300 // loss: 0.053
epoch: 3, batch: 1400 // loss: 0.047
epoch: 3, batch: 1500 // loss: 0.052
epoch: 3, batch: 1600 // loss: 0.059
epoch: 3, batch: 1700 // loss: 0.050
epoch: 3, batch: 1800 // loss: 0.055
epoch: 3, batch: 1900 // loss: 0.050
epoch: 3, batch: 2000 // loss: 0.049
epoch: 3, batch: 2100 // loss: 0.052
epoch: 3, batch: 2200 // loss: 0.055
epoch: 3, batch: 2300 // loss: 0.054
epoch: 3, batch: 2400 // loss: 0.043
epoch: 3, batch: 2500 // loss: 0.046
epoch: 3, batch: 2600 // loss: 0.048
epoch: 3, batch: 2700 // loss: 0.046
epoch: 3, batch: 2800 // loss: 0.048
epoch: 3, batch: 2900 // loss: 0.048
epoch: 3, batch: 3000 // loss: 0.050
epoch: 3, batch: 3100 // loss: 0.043
epoch: 3, batch: 3200 // loss: 0.043
epoch: 3, batch: 3300 // loss: 0.041
epoch: 3, batch: 3400 // loss: 0.044
epoch: 3, batch: 3500 // loss: 0.035
epoch: 3, batch: 3600 // loss: 0.044
epoch: 3, batch: 3700 // loss: 0.044

epoch: 4, batch: 0 // loss: 0.057
epoch: 4, batch: 100 // loss: 0.049
epoch: 4, batch: 200 // loss: 0.044
epoch: 4, batch: 300 // loss: 0.052
epoch: 4, batch: 400 // loss: 0.047
epoch: 4, batch: 500 // loss: 0.040
epoch: 4, batch: 600 // loss: 0.041
epoch: 4, batch: 700 // loss: 0.045
epoch: 4, batch: 800 // loss: 0.042
epoch: 4, batch: 900 // loss: 0.050
epoch: 4, batch: 1000 // loss: 0.043
epoch: 4, batch: 1100 // loss: 0.043
epoch: 4, batch: 1200 // loss: 0.045
epoch: 4, batch: 1300 // loss: 0.046
epoch: 4, batch: 1400 // loss: 0.042
epoch: 4, batch: 1500 // loss: 0.046
epoch: 4, batch: 1600 // loss: 0.053
epoch: 4, batch: 1700 // loss: 0.043
epoch: 4, batch: 1800 // loss: 0.050
epoch: 4, batch: 1900 // loss: 0.044
epoch: 4, batch: 2000 // loss: 0.045
epoch: 4, batch: 2100 // loss: 0.046
epoch: 4, batch: 2200 // loss: 0.048
epoch: 4, batch: 2300 // loss: 0.049
epoch: 4, batch: 2400 // loss: 0.041
epoch: 4, batch: 2500 // loss: 0.042
epoch: 4, batch: 2600 // loss: 0.044
epoch: 4, batch: 2700 // loss: 0.042
epoch: 4, batch: 2800 // loss: 0.043
epoch: 4, batch: 2900 // loss: 0.044
epoch: 4, batch: 3000 // loss: 0.046
epoch: 4, batch: 3100 // loss: 0.040
epoch: 4, batch: 3200 // loss: 0.038
epoch: 4, batch: 3300 // loss: 0.038
epoch: 4, batch: 3400 // loss: 0.042
epoch: 4, batch: 3500 // loss: 0.032
epoch: 4, batch: 3600 // loss: 0.041
epoch: 4, batch: 3700 // loss: 0.040

epoch: 5, batch: 0 // loss: 0.052
epoch: 5, batch: 100 // loss: 0.045
epoch: 5, batch: 200 // loss: 0.041
epoch: 5, batch: 300 // loss: 0.048
epoch: 5, batch: 400 // loss: 0.044
epoch: 5, batch: 500 // loss: 0.038
epoch: 5, batch: 600 // loss: 0.038
epoch: 5, batch: 700 // loss: 0.041
epoch: 5, batch: 800 // loss: 0.039
epoch: 5, batch: 900 // loss: 0.046
epoch: 5, batch: 1000 // loss: 0.042
epoch: 5, batch: 1100 // loss: 0.041
epoch: 5, batch: 1200 // loss: 0.043
epoch: 5, batch: 1300 // loss: 0.043
epoch: 5, batch: 1400 // loss: 0.040
epoch: 5, batch: 1500 // loss: 0.043
epoch: 5, batch: 1600 // loss: 0.050
epoch: 5, batch: 1700 // loss: 0.040
epoch: 5, batch: 1800 // loss: 0.048
epoch: 5, batch: 1900 // loss: 0.042
epoch: 5, batch: 2000 // loss: 0.043
epoch: 5, batch: 2100 // loss: 0.044
epoch: 5, batch: 2200 // loss: 0.045
epoch: 5, batch: 2300 // loss: 0.046
epoch: 5, batch: 2400 // loss: 0.039
epoch: 5, batch: 2500 // loss: 0.040
epoch: 5, batch: 2600 // loss: 0.042
epoch: 5, batch: 2700 // loss: 0.041
epoch: 5, batch: 2800 // loss: 0.041
epoch: 5, batch: 2900 // loss: 0.042
epoch: 5, batch: 3000 // loss: 0.044
epoch: 5, batch: 3100 // loss: 0.040
epoch: 5, batch: 3200 // loss: 0.036
epoch: 5, batch: 3300 // loss: 0.036
epoch: 5, batch: 3400 // loss: 0.041
epoch: 5, batch: 3500 // loss: 0.031
epoch: 5, batch: 3600 // loss: 0.040
epoch: 5, batch: 3700 // loss: 0.038

epoch: 6, batch: 0 // loss: 0.051
epoch: 6, batch: 100 // loss: 0.043
epoch: 6, batch: 200 // loss: 0.039
epoch: 6, batch: 300 // loss: 0.047
epoch: 6, batch: 400 // loss: 0.043
epoch: 6, batch: 500 // loss: 0.037
epoch: 6, batch: 600 // loss: 0.037
epoch: 6, batch: 700 // loss: 0.041
epoch: 6, batch: 800 // loss: 0.037
epoch: 6, batch: 900 // loss: 0.045
epoch: 6, batch: 1000 // loss: 0.042
epoch: 6, batch: 1100 // loss: 0.041
epoch: 6, batch: 1200 // loss: 0.042
epoch: 6, batch: 1300 // loss: 0.042
epoch: 6, batch: 1400 // loss: 0.040
epoch: 6, batch: 1500 // loss: 0.042
epoch: 6, batch: 1600 // loss: 0.049
epoch: 6, batch: 1700 // loss: 0.039
epoch: 6, batch: 1800 // loss: 0.047
epoch: 6, batch: 1900 // loss: 0.041
epoch: 6, batch: 2000 // loss: 0.042
epoch: 6, batch: 2100 // loss: 0.042
epoch: 6, batch: 2200 // loss: 0.044
epoch: 6, batch: 2300 // loss: 0.045
epoch: 6, batch: 2400 // loss: 0.038
epoch: 6, batch: 2500 // loss: 0.039
epoch: 6, batch: 2600 // loss: 0.042
epoch: 6, batch: 2700 // loss: 0.041
epoch: 6, batch: 2800 // loss: 0.040
epoch: 6, batch: 2900 // loss: 0.041
epoch: 6, batch: 3000 // loss: 0.044
epoch: 6, batch: 3100 // loss: 0.039
epoch: 6, batch: 3200 // loss: 0.036
epoch: 6, batch: 3300 // loss: 0.035
epoch: 6, batch: 3400 // loss: 0.040
epoch: 6, batch: 3500 // loss: 0.030
epoch: 6, batch: 3600 // loss: 0.039
epoch: 6, batch: 3700 // loss: 0.037

epoch: 7, batch: 0 // loss: 0.049
epoch: 7, batch: 100 // loss: 0.042
epoch: 7, batch: 200 // loss: 0.038
epoch: 7, batch: 300 // loss: 0.046
epoch: 7, batch: 400 // loss: 0.042
epoch: 7, batch: 500 // loss: 0.036
epoch: 7, batch: 600 // loss: 0.036
epoch: 7, batch: 700 // loss: 0.040
epoch: 7, batch: 800 // loss: 0.036
epoch: 7, batch: 900 // loss: 0.044
epoch: 7, batch: 1000 // loss: 0.042
epoch: 7, batch: 1100 // loss: 0.040
epoch: 7, batch: 1200 // loss: 0.041
epoch: 7, batch: 1300 // loss: 0.042
epoch: 7, batch: 1400 // loss: 0.039
epoch: 7, batch: 1500 // loss: 0.042
epoch: 7, batch: 1600 // loss: 0.048
epoch: 7, batch: 1700 // loss: 0.038
epoch: 7, batch: 1800 // loss: 0.046
epoch: 7, batch: 1900 // loss: 0.041
epoch: 7, batch: 2000 // loss: 0.042
epoch: 7, batch: 2100 // loss: 0.041
epoch: 7, batch: 2200 // loss: 0.043
epoch: 7, batch: 2300 // loss: 0.044
epoch: 7, batch: 2400 // loss: 0.037
epoch: 7, batch: 2500 // loss: 0.039
epoch: 7, batch: 2600 // loss: 0.041
epoch: 7, batch: 2700 // loss: 0.040
epoch: 7, batch: 2800 // loss: 0.039
epoch: 7, batch: 2900 // loss: 0.040
epoch: 7, batch: 3000 // loss: 0.043
epoch: 7, batch: 3100 // loss: 0.039
epoch: 7, batch: 3200 // loss: 0.035
epoch: 7, batch: 3300 // loss: 0.034
epoch: 7, batch: 3400 // loss: 0.040
epoch: 7, batch: 3500 // loss: 0.030
epoch: 7, batch: 3600 // loss: 0.038
epoch: 7, batch: 3700 // loss: 0.037

epoch: 8, batch: 0 // loss: 0.048
epoch: 8, batch: 100 // loss: 0.041
epoch: 8, batch: 200 // loss: 0.037
epoch: 8, batch: 300 // loss: 0.045
epoch: 8, batch: 400 // loss: 0.041
epoch: 8, batch: 500 // loss: 0.035
epoch: 8, batch: 600 // loss: 0.036
epoch: 8, batch: 700 // loss: 0.039
epoch: 8, batch: 800 // loss: 0.036
epoch: 8, batch: 900 // loss: 0.043
epoch: 8, batch: 1000 // loss: 0.041
epoch: 8, batch: 1100 // loss: 0.039
epoch: 8, batch: 1200 // loss: 0.040
epoch: 8, batch: 1300 // loss: 0.041
epoch: 8, batch: 1400 // loss: 0.038
epoch: 8, batch: 1500 // loss: 0.041
epoch: 8, batch: 1600 // loss: 0.047
epoch: 8, batch: 1700 // loss: 0.037
epoch: 8, batch: 1800 // loss: 0.046
epoch: 8, batch: 1900 // loss: 0.039
epoch: 8, batch: 2000 // loss: 0.041
epoch: 8, batch: 2100 // loss: 0.040
epoch: 8, batch: 2200 // loss: 0.042
epoch: 8, batch: 2300 // loss: 0.043
epoch: 8, batch: 2400 // loss: 0.037
epoch: 8, batch: 2500 // loss: 0.038
epoch: 8, batch: 2600 // loss: 0.041
epoch: 8, batch: 2700 // loss: 0.039
epoch: 8, batch: 2800 // loss: 0.038
epoch: 8, batch: 2900 // loss: 0.039
epoch: 8, batch: 3000 // loss: 0.042
epoch: 8, batch: 3100 // loss: 0.038
epoch: 8, batch: 3200 // loss: 0.034
epoch: 8, batch: 3300 // loss: 0.034
epoch: 8, batch: 3400 // loss: 0.039
epoch: 8, batch: 3500 // loss: 0.029
epoch: 8, batch: 3600 // loss: 0.037
epoch: 8, batch: 3700 // loss: 0.036

epoch: 9, batch: 0 // loss: 0.047
epoch: 9, batch: 100 // loss: 0.040
epoch: 9, batch: 200 // loss: 0.036
epoch: 9, batch: 300 // loss: 0.044
epoch: 9, batch: 400 // loss: 0.040
epoch: 9, batch: 500 // loss: 0.035
epoch: 9, batch: 600 // loss: 0.035
epoch: 9, batch: 700 // loss: 0.038
epoch: 9, batch: 800 // loss: 0.035
epoch: 9, batch: 900 // loss: 0.042
epoch: 9, batch: 1000 // loss: 0.040
epoch: 9, batch: 1100 // loss: 0.038
epoch: 9, batch: 1200 // loss: 0.039
epoch: 9, batch: 1300 // loss: 0.040
epoch: 9, batch: 1400 // loss: 0.037
epoch: 9, batch: 1500 // loss: 0.039
epoch: 9, batch: 1600 // loss: 0.046
epoch: 9, batch: 1700 // loss: 0.036
epoch: 9, batch: 1800 // loss: 0.044
epoch: 9, batch: 1900 // loss: 0.037
epoch: 9, batch: 2000 // loss: 0.040
epoch: 9, batch: 2100 // loss: 0.039
epoch: 9, batch: 2200 // loss: 0.041
epoch: 9, batch: 2300 // loss: 0.041
epoch: 9, batch: 2400 // loss: 0.036
epoch: 9, batch: 2500 // loss: 0.037
epoch: 9, batch: 2600 // loss: 0.040
epoch: 9, batch: 2700 // loss: 0.037
epoch: 9, batch: 2800 // loss: 0.036
epoch: 9, batch: 2900 // loss: 0.038
epoch: 9, batch: 3000 // loss: 0.041
epoch: 9, batch: 3100 // loss: 0.038
epoch: 9, batch: 3200 // loss: 0.033
epoch: 9, batch: 3300 // loss: 0.033
epoch: 9, batch: 3400 // loss: 0.037
epoch: 9, batch: 3500 // loss: 0.028
epoch: 9, batch: 3600 // loss: 0.036
epoch: 9, batch: 3700 // loss: 0.035

epoch: 10, batch: 0 // loss: 0.045
epoch: 10, batch: 100 // loss: 0.039
epoch: 10, batch: 200 // loss: 0.035
epoch: 10, batch: 300 // loss: 0.043
epoch: 10, batch: 400 // loss: 0.039
epoch: 10, batch: 500 // loss: 0.034
epoch: 10, batch: 600 // loss: 0.034
epoch: 10, batch: 700 // loss: 0.037
epoch: 10, batch: 800 // loss: 0.034
epoch: 10, batch: 900 // loss: 0.041
epoch: 10, batch: 1000 // loss: 0.039
epoch: 10, batch: 1100 // loss: 0.037
epoch: 10, batch: 1200 // loss: 0.037
epoch: 10, batch: 1300 // loss: 0.039
epoch: 10, batch: 1400 // loss: 0.036
epoch: 10, batch: 1500 // loss: 0.038
epoch: 10, batch: 1600 // loss: 0.044
epoch: 10, batch: 1700 // loss: 0.035
epoch: 10, batch: 1800 // loss: 0.043
epoch: 10, batch: 1900 // loss: 0.035
epoch: 10, batch: 2000 // loss: 0.039
epoch: 10, batch: 2100 // loss: 0.038
epoch: 10, batch: 2200 // loss: 0.039
epoch: 10, batch: 2300 // loss: 0.040
epoch: 10, batch: 2400 // loss: 0.034
epoch: 10, batch: 2500 // loss: 0.035
epoch: 10, batch: 2600 // loss: 0.038
epoch: 10, batch: 2700 // loss: 0.036
epoch: 10, batch: 2800 // loss: 0.034
epoch: 10, batch: 2900 // loss: 0.037
epoch: 10, batch: 3000 // loss: 0.040
epoch: 10, batch: 3100 // loss: 0.037
epoch: 10, batch: 3200 // loss: 0.031
epoch: 10, batch: 3300 // loss: 0.031
epoch: 10, batch: 3400 // loss: 0.036
epoch: 10, batch: 3500 // loss: 0.027
epoch: 10, batch: 3600 // loss: 0.034
epoch: 10, batch: 3700 // loss: 0.033

epoch: 11, batch: 0 // loss: 0.044
epoch: 11, batch: 100 // loss: 0.037
epoch: 11, batch: 200 // loss: 0.034
epoch: 11, batch: 300 // loss: 0.041
epoch: 11, batch: 400 // loss: 0.037
epoch: 11, batch: 500 // loss: 0.033
epoch: 11, batch: 600 // loss: 0.033
epoch: 11, batch: 700 // loss: 0.035
epoch: 11, batch: 800 // loss: 0.032
epoch: 11, batch: 900 // loss: 0.040
epoch: 11, batch: 1000 // loss: 0.038
epoch: 11, batch: 1100 // loss: 0.036
epoch: 11, batch: 1200 // loss: 0.036
epoch: 11, batch: 1300 // loss: 0.039
epoch: 11, batch: 1400 // loss: 0.035
epoch: 11, batch: 1500 // loss: 0.037
epoch: 11, batch: 1600 // loss: 0.042
epoch: 11, batch: 1700 // loss: 0.033
epoch: 11, batch: 1800 // loss: 0.041
epoch: 11, batch: 1900 // loss: 0.033
epoch: 11, batch: 2000 // loss: 0.038
epoch: 11, batch: 2100 // loss: 0.036
epoch: 11, batch: 2200 // loss: 0.038
epoch: 11, batch: 2300 // loss: 0.039
epoch: 11, batch: 2400 // loss: 0.033
epoch: 11, batch: 2500 // loss: 0.034
epoch: 11, batch: 2600 // loss: 0.037
epoch: 11, batch: 2700 // loss: 0.035
epoch: 11, batch: 2800 // loss: 0.033
epoch: 11, batch: 2900 // loss: 0.036
epoch: 11, batch: 3000 // loss: 0.039
epoch: 11, batch: 3100 // loss: 0.037
epoch: 11, batch: 3200 // loss: 0.030
epoch: 11, batch: 3300 // loss: 0.030
epoch: 11, batch: 3400 // loss: 0.035
epoch: 11, batch: 3500 // loss: 0.026
epoch: 11, batch: 3600 // loss: 0.033
epoch: 11, batch: 3700 // loss: 0.032

epoch: 12, batch: 0 // loss: 0.042
epoch: 12, batch: 100 // loss: 0.036
epoch: 12, batch: 200 // loss: 0.033
epoch: 12, batch: 300 // loss: 0.040
epoch: 12, batch: 400 // loss: 0.036
epoch: 12, batch: 500 // loss: 0.033
epoch: 12, batch: 600 // loss: 0.033
epoch: 12, batch: 700 // loss: 0.035
epoch: 12, batch: 800 // loss: 0.031
epoch: 12, batch: 900 // loss: 0.039
epoch: 12, batch: 1000 // loss: 0.037
epoch: 12, batch: 1100 // loss: 0.036
epoch: 12, batch: 1200 // loss: 0.035
epoch: 12, batch: 1300 // loss: 0.037
epoch: 12, batch: 1400 // loss: 0.034
epoch: 12, batch: 1500 // loss: 0.035
epoch: 12, batch: 1600 // loss: 0.041
epoch: 12, batch: 1700 // loss: 0.033
epoch: 12, batch: 1800 // loss: 0.040
epoch: 12, batch: 1900 // loss: 0.032
epoch: 12, batch: 2000 // loss: 0.037
epoch: 12, batch: 2100 // loss: 0.035
epoch: 12, batch: 2200 // loss: 0.037
epoch: 12, batch: 2300 // loss: 0.038
epoch: 12, batch: 2400 // loss: 0.032
epoch: 12, batch: 2500 // loss: 0.034
epoch: 12, batch: 2600 // loss: 0.037
epoch: 12, batch: 2700 // loss: 0.034
epoch: 12, batch: 2800 // loss: 0.031
epoch: 12, batch: 2900 // loss: 0.035
epoch: 12, batch: 3000 // loss: 0.038
epoch: 12, batch: 3100 // loss: 0.036
epoch: 12, batch: 3200 // loss: 0.030
epoch: 12, batch: 3300 // loss: 0.030
epoch: 12, batch: 3400 // loss: 0.034
epoch: 12, batch: 3500 // loss: 0.025
epoch: 12, batch: 3600 // loss: 0.032
epoch: 12, batch: 3700 // loss: 0.032

epoch: 13, batch: 0 // loss: 0.042
epoch: 13, batch: 100 // loss: 0.035
epoch: 13, batch: 200 // loss: 0.032
epoch: 13, batch: 300 // loss: 0.039
epoch: 13, batch: 400 // loss: 0.035
epoch: 13, batch: 500 // loss: 0.032
epoch: 13, batch: 600 // loss: 0.032
epoch: 13, batch: 700 // loss: 0.034
epoch: 13, batch: 800 // loss: 0.031
epoch: 13, batch: 900 // loss: 0.039
epoch: 13, batch: 1000 // loss: 0.036
epoch: 13, batch: 1100 // loss: 0.035
epoch: 13, batch: 1200 // loss: 0.034
epoch: 13, batch: 1300 // loss: 0.037
epoch: 13, batch: 1400 // loss: 0.033
epoch: 13, batch: 1500 // loss: 0.035
epoch: 13, batch: 1600 // loss: 0.040
epoch: 13, batch: 1700 // loss: 0.032
epoch: 13, batch: 1800 // loss: 0.039
epoch: 13, batch: 1900 // loss: 0.031
epoch: 13, batch: 2000 // loss: 0.037
epoch: 13, batch: 2100 // loss: 0.034
epoch: 13, batch: 2200 // loss: 0.037
epoch: 13, batch: 2300 // loss: 0.037
epoch: 13, batch: 2400 // loss: 0.032
epoch: 13, batch: 2500 // loss: 0.033
epoch: 13, batch: 2600 // loss: 0.036
epoch: 13, batch: 2700 // loss: 0.034
epoch: 13, batch: 2800 // loss: 0.030
epoch: 13, batch: 2900 // loss: 0.035
epoch: 13, batch: 3000 // loss: 0.037
epoch: 13, batch: 3100 // loss: 0.036
epoch: 13, batch: 3200 // loss: 0.029
epoch: 13, batch: 3300 // loss: 0.029
epoch: 13, batch: 3400 // loss: 0.034
epoch: 13, batch: 3500 // loss: 0.025
epoch: 13, batch: 3600 // loss: 0.031
epoch: 13, batch: 3700 // loss: 0.031

epoch: 14, batch: 0 // loss: 0.041
epoch: 14, batch: 100 // loss: 0.035
epoch: 14, batch: 200 // loss: 0.031
epoch: 14, batch: 300 // loss: 0.038
epoch: 14, batch: 400 // loss: 0.034
epoch: 14, batch: 500 // loss: 0.032
epoch: 14, batch: 600 // loss: 0.032
epoch: 14, batch: 700 // loss: 0.033
epoch: 14, batch: 800 // loss: 0.030
epoch: 14, batch: 900 // loss: 0.038
epoch: 14, batch: 1000 // loss: 0.036
epoch: 14, batch: 1100 // loss: 0.034
epoch: 14, batch: 1200 // loss: 0.033
epoch: 14, batch: 1300 // loss: 0.036
epoch: 14, batch: 1400 // loss: 0.033
epoch: 14, batch: 1500 // loss: 0.034
epoch: 14, batch: 1600 // loss: 0.039
epoch: 14, batch: 1700 // loss: 0.031
epoch: 14, batch: 1800 // loss: 0.038
epoch: 14, batch: 1900 // loss: 0.030
epoch: 14, batch: 2000 // loss: 0.036
epoch: 14, batch: 2100 // loss: 0.034
epoch: 14, batch: 2200 // loss: 0.036
epoch: 14, batch: 2300 // loss: 0.036
epoch: 14, batch: 2400 // loss: 0.031
epoch: 14, batch: 2500 // loss: 0.033
epoch: 14, batch: 2600 // loss: 0.035
epoch: 14, batch: 2700 // loss: 0.033
epoch: 14, batch: 2800 // loss: 0.030
epoch: 14, batch: 2900 // loss: 0.034
epoch: 14, batch: 3000 // loss: 0.037
epoch: 14, batch: 3100 // loss: 0.035
epoch: 14, batch: 3200 // loss: 0.029
epoch: 14, batch: 3300 // loss: 0.029
epoch: 14, batch: 3400 // loss: 0.033
epoch: 14, batch: 3500 // loss: 0.025
epoch: 14, batch: 3600 // loss: 0.031
epoch: 14, batch: 3700 // loss: 0.030

epoch: 15, batch: 0 // loss: 0.040
epoch: 15, batch: 100 // loss: 0.034
epoch: 15, batch: 200 // loss: 0.031
epoch: 15, batch: 300 // loss: 0.038
epoch: 15, batch: 400 // loss: 0.034
epoch: 15, batch: 500 // loss: 0.032
epoch: 15, batch: 600 // loss: 0.031
epoch: 15, batch: 700 // loss: 0.033
epoch: 15, batch: 800 // loss: 0.029
epoch: 15, batch: 900 // loss: 0.038
epoch: 15, batch: 1000 // loss: 0.036
epoch: 15, batch: 1100 // loss: 0.034
epoch: 15, batch: 1200 // loss: 0.033
epoch: 15, batch: 1300 // loss: 0.036
epoch: 15, batch: 1400 // loss: 0.033
epoch: 15, batch: 1500 // loss: 0.034
epoch: 15, batch: 1600 // loss: 0.039
epoch: 15, batch: 1700 // loss: 0.031
epoch: 15, batch: 1800 // loss: 0.037
epoch: 15, batch: 1900 // loss: 0.030
epoch: 15, batch: 2000 // loss: 0.035
epoch: 15, batch: 2100 // loss: 0.033
epoch: 15, batch: 2200 // loss: 0.036
epoch: 15, batch: 2300 // loss: 0.036
epoch: 15, batch: 2400 // loss: 0.031
epoch: 15, batch: 2500 // loss: 0.032
epoch: 15, batch: 2600 // loss: 0.035
epoch: 15, batch: 2700 // loss: 0.033
epoch: 15, batch: 2800 // loss: 0.029
epoch: 15, batch: 2900 // loss: 0.034
epoch: 15, batch: 3000 // loss: 0.036
epoch: 15, batch: 3100 // loss: 0.035
epoch: 15, batch: 3200 // loss: 0.028
epoch: 15, batch: 3300 // loss: 0.028
epoch: 15, batch: 3400 // loss: 0.033
epoch: 15, batch: 3500 // loss: 0.024
epoch: 15, batch: 3600 // loss: 0.030
epoch: 15, batch: 3700 // loss: 0.030

epoch: 16, batch: 0 // loss: 0.040
epoch: 16, batch: 100 // loss: 0.033
epoch: 16, batch: 200 // loss: 0.030
epoch: 16, batch: 300 // loss: 0.037
epoch: 16, batch: 400 // loss: 0.033
epoch: 16, batch: 500 // loss: 0.031
epoch: 16, batch: 600 // loss: 0.031
epoch: 16, batch: 700 // loss: 0.032
epoch: 16, batch: 800 // loss: 0.029
epoch: 16, batch: 900 // loss: 0.038
epoch: 16, batch: 1000 // loss: 0.035
epoch: 16, batch: 1100 // loss: 0.033
epoch: 16, batch: 1200 // loss: 0.032
epoch: 16, batch: 1300 // loss: 0.036
epoch: 16, batch: 1400 // loss: 0.032
epoch: 16, batch: 1500 // loss: 0.034
epoch: 16, batch: 1600 // loss: 0.038
epoch: 16, batch: 1700 // loss: 0.030
epoch: 16, batch: 1800 // loss: 0.036
epoch: 16, batch: 1900 // loss: 0.029
epoch: 16, batch: 2000 // loss: 0.035
epoch: 16, batch: 2100 // loss: 0.033
epoch: 16, batch: 2200 // loss: 0.035
epoch: 16, batch: 2300 // loss: 0.035
epoch: 16, batch: 2400 // loss: 0.031
epoch: 16, batch: 2500 // loss: 0.031
epoch: 16, batch: 2600 // loss: 0.035
epoch: 16, batch: 2700 // loss: 0.032
epoch: 16, batch: 2800 // loss: 0.029
epoch: 16, batch: 2900 // loss: 0.033
epoch: 16, batch: 3000 // loss: 0.036
epoch: 16, batch: 3100 // loss: 0.034
epoch: 16, batch: 3200 // loss: 0.027
epoch: 16, batch: 3300 // loss: 0.028
epoch: 16, batch: 3400 // loss: 0.032
epoch: 16, batch: 3500 // loss: 0.024
epoch: 16, batch: 3600 // loss: 0.029
epoch: 16, batch: 3700 // loss: 0.029

epoch: 17, batch: 0 // loss: 0.039
epoch: 17, batch: 100 // loss: 0.032
epoch: 17, batch: 200 // loss: 0.029
epoch: 17, batch: 300 // loss: 0.037
epoch: 17, batch: 400 // loss: 0.033
epoch: 17, batch: 500 // loss: 0.031
epoch: 17, batch: 600 // loss: 0.030
epoch: 17, batch: 700 // loss: 0.032
epoch: 17, batch: 800 // loss: 0.028
epoch: 17, batch: 900 // loss: 0.037
epoch: 17, batch: 1000 // loss: 0.035
epoch: 17, batch: 1100 // loss: 0.032
epoch: 17, batch: 1200 // loss: 0.031
epoch: 17, batch: 1300 // loss: 0.035
epoch: 17, batch: 1400 // loss: 0.031
epoch: 17, batch: 1500 // loss: 0.033
epoch: 17, batch: 1600 // loss: 0.037
epoch: 17, batch: 1700 // loss: 0.030
epoch: 17, batch: 1800 // loss: 0.035
epoch: 17, batch: 1900 // loss: 0.029
epoch: 17, batch: 2000 // loss: 0.034
epoch: 17, batch: 2100 // loss: 0.032
epoch: 17, batch: 2200 // loss: 0.034
epoch: 17, batch: 2300 // loss: 0.033
epoch: 17, batch: 2400 // loss: 0.030
epoch: 17, batch: 2500 // loss: 0.030
epoch: 17, batch: 2600 // loss: 0.034
epoch: 17, batch: 2700 // loss: 0.031
epoch: 17, batch: 2800 // loss: 0.028
epoch: 17, batch: 2900 // loss: 0.032
epoch: 17, batch: 3000 // loss: 0.035
epoch: 17, batch: 3100 // loss: 0.034
epoch: 17, batch: 3200 // loss: 0.027
epoch: 17, batch: 3300 // loss: 0.027
epoch: 17, batch: 3400 // loss: 0.031
epoch: 17, batch: 3500 // loss: 0.024
epoch: 17, batch: 3600 // loss: 0.029
epoch: 17, batch: 3700 // loss: 0.028

epoch: 18, batch: 0 // loss: 0.039
epoch: 18, batch: 100 // loss: 0.031
epoch: 18, batch: 200 // loss: 0.028
epoch: 18, batch: 300 // loss: 0.036
epoch: 18, batch: 400 // loss: 0.032
epoch: 18, batch: 500 // loss: 0.031
epoch: 18, batch: 600 // loss: 0.030
epoch: 18, batch: 700 // loss: 0.031
epoch: 18, batch: 800 // loss: 0.027
epoch: 18, batch: 900 // loss: 0.036
epoch: 18, batch: 1000 // loss: 0.034
epoch: 18, batch: 1100 // loss: 0.031
epoch: 18, batch: 1200 // loss: 0.031
epoch: 18, batch: 1300 // loss: 0.034
epoch: 18, batch: 1400 // loss: 0.031
epoch: 18, batch: 1500 // loss: 0.033
epoch: 18, batch: 1600 // loss: 0.036
epoch: 18, batch: 1700 // loss: 0.030
epoch: 18, batch: 1800 // loss: 0.033
epoch: 18, batch: 1900 // loss: 0.028
epoch: 18, batch: 2000 // loss: 0.033
epoch: 18, batch: 2100 // loss: 0.031
epoch: 18, batch: 2200 // loss: 0.033
epoch: 18, batch: 2300 // loss: 0.032
epoch: 18, batch: 2400 // loss: 0.029
epoch: 18, batch: 2500 // loss: 0.029
epoch: 18, batch: 2600 // loss: 0.033
epoch: 18, batch: 2700 // loss: 0.030
epoch: 18, batch: 2800 // loss: 0.027
epoch: 18, batch: 2900 // loss: 0.031
epoch: 18, batch: 3000 // loss: 0.034
epoch: 18, batch: 3100 // loss: 0.033
epoch: 18, batch: 3200 // loss: 0.026
epoch: 18, batch: 3300 // loss: 0.026
epoch: 18, batch: 3400 // loss: 0.030
epoch: 18, batch: 3500 // loss: 0.023
epoch: 18, batch: 3600 // loss: 0.028
epoch: 18, batch: 3700 // loss: 0.027

epoch: 19, batch: 0 // loss: 0.038
epoch: 19, batch: 100 // loss: 0.030
epoch: 19, batch: 200 // loss: 0.027
epoch: 19, batch: 300 // loss: 0.035
epoch: 19, batch: 400 // loss: 0.031
epoch: 19, batch: 500 // loss: 0.030
epoch: 19, batch: 600 // loss: 0.029
epoch: 19, batch: 700 // loss: 0.030
epoch: 19, batch: 800 // loss: 0.026
epoch: 19, batch: 900 // loss: 0.036
epoch: 19, batch: 1000 // loss: 0.033
epoch: 19, batch: 1100 // loss: 0.031
epoch: 19, batch: 1200 // loss: 0.030
epoch: 19, batch: 1300 // loss: 0.034
epoch: 19, batch: 1400 // loss: 0.030
epoch: 19, batch: 1500 // loss: 0.033
epoch: 19, batch: 1600 // loss: 0.035
epoch: 19, batch: 1700 // loss: 0.029
epoch: 19, batch: 1800 // loss: 0.032
epoch: 19, batch: 1900 // loss: 0.027
epoch: 19, batch: 2000 // loss: 0.033
epoch: 19, batch: 2100 // loss: 0.030
epoch: 19, batch: 2200 // loss: 0.031
epoch: 19, batch: 2300 // loss: 0.031
epoch: 19, batch: 2400 // loss: 0.029
epoch: 19, batch: 2500 // loss: 0.028
epoch: 19, batch: 2600 // loss: 0.033
epoch: 19, batch: 2700 // loss: 0.029
epoch: 19, batch: 2800 // loss: 0.026
epoch: 19, batch: 2900 // loss: 0.030
epoch: 19, batch: 3000 // loss: 0.033
epoch: 19, batch: 3100 // loss: 0.032
epoch: 19, batch: 3200 // loss: 0.025
epoch: 19, batch: 3300 // loss: 0.025
epoch: 19, batch: 3400 // loss: 0.029
epoch: 19, batch: 3500 // loss: 0.023
epoch: 19, batch: 3600 // loss: 0.027
epoch: 19, batch: 3700 // loss: 0.026
In [169]:
samples = [m(X[2,:].float())[0].detach().numpy() for _ in range(5)]
/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:39: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
In [174]:
imshow(np.asarray(samples[4]).reshape(28,28), cmap='gray')
Out[174]:
<matplotlib.image.AxesImage at 0x7f8f309202b0>
In [157]:
X[0,:].shape
Out[157]:
torch.Size([784])
In [156]:
X.shape
Out[156]:
torch.Size([60000, 784])
In [ ]: