Your name:
import numpy as np
import matplotlib.pyplot as plt
import torch
from torch import nn
# conda install -c pytorch torchvision
import torchvision
# note: if you cannot get torchvision installed
# using the above sequence, you can resort to
# the colab version here:
# -- just be sure to download and then upload
# the notebook to blackboard when complete.
fMNIST = torchvision.datasets.FashionMNIST(
root = './data/FashionMNIST',
train = True,
download = True)
Once again, we are playing with Fashion-MNIST here, following the last few lectures.
from IPython.display import Image
from matplotlib.pyplot import imshow
%matplotlib inline
imshow(np.asarray(fMNIST.data[6]), cmap='gray')
<matplotlib.image.AxesImage at 0x7f8f71d81b70>
X = fMNIST.data
X = np.array([x_i.flatten().numpy() for x_i in X])
X = X / 255 # normalize
X.shape
(60000, 784)
We're going to implement a few autoencoder (AE) variants in torch
.
Given that for some of you this may serve as something of an introduction to (or at least refresher for) torch
, Here is one way to define and train a simple model.
Note that you can also use the simple Sequential
pipeline to build such straightforward models, but this style affords more flexibility (though overkill for something like this).
class SimpleMLP(nn.Module):
def __init__(self, input_size=784, hidden_size=32, n_labels=10):
'''
In the initializer we setup model parameters/layers.
'''
super(SimpleMLP, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.n_labels = 10
# input layer; from x -> z
self.i = nn.Linear(self.input_size, self.hidden_size, bias=False)
# nonlinear activation
self.a = nn.ReLU()
# output layer
self.o = nn.Linear(self.hidden_size, 10)
self.sm = nn.Softmax()
def forward(self, X):
'''
The forward pass defines how inputs flow forward through
the model (linking layers together).
'''
z = self.i(X)
z = self.a(z)
y_hat = self.o(z)
return y_hat
Now to actually train the model, we need to define an optimizer
and a loss function.
model = SimpleMLP().float()
from torch import optim
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
loss_function = nn.CrossEntropyLoss()
y = fMNIST.targets
y
tensor([9, 0, 0, ..., 3, 0, 5])
# convert X to a torch tensor
X = torch.tensor(X)
Let's take a look at making predictions and calculating a loss.
# make a prediction for the first 5 instances
# (note that this is "batched"; we are pushing
# through 5 instances at once)
y_hat = model(X[:5,:].float())
# calculate loss
loss = loss_function(y_hat, y[:5])
print(loss)
tensor(2.3589, grad_fn=<NllLossBackward>)
And now take some number of passes over our training data, incurring loss, and performing backprop.
EPOCHS = 100
for epoch in range(EPOCHS):
running_loss = 0.0
idx, batch_num = 0, 0
batch_size = 16
print("")
while idx < 20000:
# zero the parameter gradients
optimizer.zero_grad()
X_batch = X[idx: idx + batch_size].float()
y_batch = y[idx: idx + batch_size]
idx += batch_size
# now run our X's forward, get preds, incur
# loss, backprop, and step the optimizer.
y_hat_batch = model(X_batch)
loss = loss_function(y_hat_batch, y_batch)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
if batch_num % 100 == 0:
print("epoch: {}, batch: {} // loss: {:.3f}".format(epoch, batch_num, loss.item()))
batch_num += 1
epoch: 0, batch: 0 // loss: 2.305 epoch: 0, batch: 100 // loss: 1.929 epoch: 0, batch: 200 // loss: 1.576 epoch: 0, batch: 300 // loss: 1.377 epoch: 0, batch: 400 // loss: 1.068 epoch: 0, batch: 500 // loss: 1.100 epoch: 0, batch: 600 // loss: 0.984 epoch: 0, batch: 700 // loss: 0.792 epoch: 0, batch: 800 // loss: 0.861 epoch: 0, batch: 900 // loss: 0.682 epoch: 0, batch: 1000 // loss: 0.832 epoch: 0, batch: 1100 // loss: 0.547 epoch: 0, batch: 1200 // loss: 0.574 epoch: 1, batch: 0 // loss: 0.745 epoch: 1, batch: 100 // loss: 0.593 epoch: 1, batch: 200 // loss: 0.689 epoch: 1, batch: 300 // loss: 0.644 epoch: 1, batch: 400 // loss: 0.532 epoch: 1, batch: 500 // loss: 0.638 epoch: 1, batch: 600 // loss: 0.727 epoch: 1, batch: 700 // loss: 0.405 epoch: 1, batch: 800 // loss: 0.587 epoch: 1, batch: 900 // loss: 0.369 epoch: 1, batch: 1000 // loss: 0.702 epoch: 1, batch: 1100 // loss: 0.356 epoch: 1, batch: 1200 // loss: 0.446 epoch: 2, batch: 0 // loss: 0.595 epoch: 2, batch: 100 // loss: 0.479 epoch: 2, batch: 200 // loss: 0.678 epoch: 2, batch: 300 // loss: 0.514 epoch: 2, batch: 400 // loss: 0.383 epoch: 2, batch: 500 // loss: 0.534 epoch: 2, batch: 600 // loss: 0.659 epoch: 2, batch: 700 // loss: 0.322 epoch: 2, batch: 800 // loss: 0.469 epoch: 2, batch: 900 // loss: 0.271 epoch: 2, batch: 1000 // loss: 0.668 epoch: 2, batch: 1100 // loss: 0.271 epoch: 2, batch: 1200 // loss: 0.404 epoch: 3, batch: 0 // loss: 0.523 epoch: 3, batch: 100 // loss: 0.408 epoch: 3, batch: 200 // loss: 0.694 epoch: 3, batch: 300 // loss: 0.457 epoch: 3, batch: 400 // loss: 0.311 epoch: 3, batch: 500 // loss: 0.498 epoch: 3, batch: 600 // loss: 0.619 epoch: 3, batch: 700 // loss: 0.296 epoch: 3, batch: 800 // loss: 0.407 epoch: 3, batch: 900 // loss: 0.221 epoch: 3, batch: 1000 // loss: 0.658 epoch: 3, batch: 1100 // loss: 0.227 epoch: 3, batch: 1200 // loss: 0.379 epoch: 4, batch: 0 // loss: 0.470 epoch: 4, batch: 100 // loss: 0.372 epoch: 4, batch: 200 // loss: 0.691 epoch: 4, batch: 300 // loss: 0.421 epoch: 4, batch: 400 // loss: 0.277 epoch: 4, batch: 500 // loss: 0.480 epoch: 4, batch: 600 // loss: 0.593 epoch: 4, batch: 700 // loss: 0.285 epoch: 4, batch: 800 // loss: 0.368 epoch: 4, batch: 900 // loss: 0.191 epoch: 4, batch: 1000 // loss: 0.654 epoch: 4, batch: 1100 // loss: 0.201 epoch: 4, batch: 1200 // loss: 0.352 epoch: 5, batch: 0 // loss: 0.428 epoch: 5, batch: 100 // loss: 0.352 epoch: 5, batch: 200 // loss: 0.670 epoch: 5, batch: 300 // loss: 0.397 epoch: 5, batch: 400 // loss: 0.258 epoch: 5, batch: 500 // loss: 0.471 epoch: 5, batch: 600 // loss: 0.576 epoch: 5, batch: 700 // loss: 0.279 epoch: 5, batch: 800 // loss: 0.340 epoch: 5, batch: 900 // loss: 0.173 epoch: 5, batch: 1000 // loss: 0.653 epoch: 5, batch: 1100 // loss: 0.185 epoch: 5, batch: 1200 // loss: 0.330 epoch: 6, batch: 0 // loss: 0.399 epoch: 6, batch: 100 // loss: 0.343 epoch: 6, batch: 200 // loss: 0.645 epoch: 6, batch: 300 // loss: 0.381 epoch: 6, batch: 400 // loss: 0.244 epoch: 6, batch: 500 // loss: 0.459 epoch: 6, batch: 600 // loss: 0.566 epoch: 6, batch: 700 // loss: 0.273 epoch: 6, batch: 800 // loss: 0.322 epoch: 6, batch: 900 // loss: 0.161 epoch: 6, batch: 1000 // loss: 0.650 epoch: 6, batch: 1100 // loss: 0.174 epoch: 6, batch: 1200 // loss: 0.310 epoch: 7, batch: 0 // loss: 0.375 epoch: 7, batch: 100 // loss: 0.336 epoch: 7, batch: 200 // loss: 0.620 epoch: 7, batch: 300 // loss: 0.372 epoch: 7, batch: 400 // loss: 0.234 epoch: 7, batch: 500 // loss: 0.452 epoch: 7, batch: 600 // loss: 0.556 epoch: 7, batch: 700 // loss: 0.265 epoch: 7, batch: 800 // loss: 0.311 epoch: 7, batch: 900 // loss: 0.149 epoch: 7, batch: 1000 // loss: 0.646 epoch: 7, batch: 1100 // loss: 0.166 epoch: 7, batch: 1200 // loss: 0.293 epoch: 8, batch: 0 // loss: 0.358 epoch: 8, batch: 100 // loss: 0.331 epoch: 8, batch: 200 // loss: 0.600 epoch: 8, batch: 300 // loss: 0.357 epoch: 8, batch: 400 // loss: 0.226 epoch: 8, batch: 500 // loss: 0.443 epoch: 8, batch: 600 // loss: 0.546 epoch: 8, batch: 700 // loss: 0.259 epoch: 8, batch: 800 // loss: 0.302 epoch: 8, batch: 900 // loss: 0.139 epoch: 8, batch: 1000 // loss: 0.639 epoch: 8, batch: 1100 // loss: 0.160 epoch: 8, batch: 1200 // loss: 0.280 epoch: 9, batch: 0 // loss: 0.345 epoch: 9, batch: 100 // loss: 0.331 epoch: 9, batch: 200 // loss: 0.582 epoch: 9, batch: 300 // loss: 0.345 epoch: 9, batch: 400 // loss: 0.220 epoch: 9, batch: 500 // loss: 0.432 epoch: 9, batch: 600 // loss: 0.535 epoch: 9, batch: 700 // loss: 0.257 epoch: 9, batch: 800 // loss: 0.295 epoch: 9, batch: 900 // loss: 0.132 epoch: 9, batch: 1000 // loss: 0.633 epoch: 9, batch: 1100 // loss: 0.154 epoch: 9, batch: 1200 // loss: 0.266 epoch: 10, batch: 0 // loss: 0.332 epoch: 10, batch: 100 // loss: 0.330 epoch: 10, batch: 200 // loss: 0.571 epoch: 10, batch: 300 // loss: 0.337 epoch: 10, batch: 400 // loss: 0.213 epoch: 10, batch: 500 // loss: 0.429 epoch: 10, batch: 600 // loss: 0.527 epoch: 10, batch: 700 // loss: 0.256 epoch: 10, batch: 800 // loss: 0.291 epoch: 10, batch: 900 // loss: 0.126 epoch: 10, batch: 1000 // loss: 0.627 epoch: 10, batch: 1100 // loss: 0.151 epoch: 10, batch: 1200 // loss: 0.253 epoch: 11, batch: 0 // loss: 0.323 epoch: 11, batch: 100 // loss: 0.330 epoch: 11, batch: 200 // loss: 0.550 epoch: 11, batch: 300 // loss: 0.328 epoch: 11, batch: 400 // loss: 0.207 epoch: 11, batch: 500 // loss: 0.423 epoch: 11, batch: 600 // loss: 0.517 epoch: 11, batch: 700 // loss: 0.252 epoch: 11, batch: 800 // loss: 0.286 epoch: 11, batch: 900 // loss: 0.122 epoch: 11, batch: 1000 // loss: 0.618 epoch: 11, batch: 1100 // loss: 0.149 epoch: 11, batch: 1200 // loss: 0.245 epoch: 12, batch: 0 // loss: 0.319 epoch: 12, batch: 100 // loss: 0.330 epoch: 12, batch: 200 // loss: 0.537 epoch: 12, batch: 300 // loss: 0.324 epoch: 12, batch: 400 // loss: 0.200 epoch: 12, batch: 500 // loss: 0.414 epoch: 12, batch: 600 // loss: 0.513 epoch: 12, batch: 700 // loss: 0.251 epoch: 12, batch: 800 // loss: 0.284 epoch: 12, batch: 900 // loss: 0.120 epoch: 12, batch: 1000 // loss: 0.609 epoch: 12, batch: 1100 // loss: 0.147 epoch: 12, batch: 1200 // loss: 0.232 epoch: 13, batch: 0 // loss: 0.311 epoch: 13, batch: 100 // loss: 0.328 epoch: 13, batch: 200 // loss: 0.523 epoch: 13, batch: 300 // loss: 0.325 epoch: 13, batch: 400 // loss: 0.194 epoch: 13, batch: 500 // loss: 0.409 epoch: 13, batch: 600 // loss: 0.501 epoch: 13, batch: 700 // loss: 0.248 epoch: 13, batch: 800 // loss: 0.281 epoch: 13, batch: 900 // loss: 0.117 epoch: 13, batch: 1000 // loss: 0.603 epoch: 13, batch: 1100 // loss: 0.145 epoch: 13, batch: 1200 // loss: 0.222 epoch: 14, batch: 0 // loss: 0.307 epoch: 14, batch: 100 // loss: 0.328 epoch: 14, batch: 200 // loss: 0.508 epoch: 14, batch: 300 // loss: 0.322 epoch: 14, batch: 400 // loss: 0.188 epoch: 14, batch: 500 // loss: 0.407 epoch: 14, batch: 600 // loss: 0.490 epoch: 14, batch: 700 // loss: 0.249 epoch: 14, batch: 800 // loss: 0.286 epoch: 14, batch: 900 // loss: 0.112 epoch: 14, batch: 1000 // loss: 0.597 epoch: 14, batch: 1100 // loss: 0.143 epoch: 14, batch: 1200 // loss: 0.215 epoch: 15, batch: 0 // loss: 0.303 epoch: 15, batch: 100 // loss: 0.326 epoch: 15, batch: 200 // loss: 0.487 epoch: 15, batch: 300 // loss: 0.315 epoch: 15, batch: 400 // loss: 0.184 epoch: 15, batch: 500 // loss: 0.405 epoch: 15, batch: 600 // loss: 0.484 epoch: 15, batch: 700 // loss: 0.250 epoch: 15, batch: 800 // loss: 0.284 epoch: 15, batch: 900 // loss: 0.109 epoch: 15, batch: 1000 // loss: 0.593 epoch: 15, batch: 1100 // loss: 0.139 epoch: 15, batch: 1200 // loss: 0.209 epoch: 16, batch: 0 // loss: 0.297 epoch: 16, batch: 100 // loss: 0.325 epoch: 16, batch: 200 // loss: 0.476 epoch: 16, batch: 300 // loss: 0.316 epoch: 16, batch: 400 // loss: 0.180 epoch: 16, batch: 500 // loss: 0.406 epoch: 16, batch: 600 // loss: 0.477 epoch: 16, batch: 700 // loss: 0.247 epoch: 16, batch: 800 // loss: 0.287 epoch: 16, batch: 900 // loss: 0.109 epoch: 16, batch: 1000 // loss: 0.590 epoch: 16, batch: 1100 // loss: 0.135 epoch: 16, batch: 1200 // loss: 0.200 epoch: 17, batch: 0 // loss: 0.294 epoch: 17, batch: 100 // loss: 0.323 epoch: 17, batch: 200 // loss: 0.467 epoch: 17, batch: 300 // loss: 0.314 epoch: 17, batch: 400 // loss: 0.176 epoch: 17, batch: 500 // loss: 0.406 epoch: 17, batch: 600 // loss: 0.471 epoch: 17, batch: 700 // loss: 0.247 epoch: 17, batch: 800 // loss: 0.289 epoch: 17, batch: 900 // loss: 0.106 epoch: 17, batch: 1000 // loss: 0.585 epoch: 17, batch: 1100 // loss: 0.135 epoch: 17, batch: 1200 // loss: 0.198 epoch: 18, batch: 0 // loss: 0.291 epoch: 18, batch: 100 // loss: 0.325 epoch: 18, batch: 200 // loss: 0.458 epoch: 18, batch: 300 // loss: 0.313 epoch: 18, batch: 400 // loss: 0.173 epoch: 18, batch: 500 // loss: 0.412 epoch: 18, batch: 600 // loss: 0.465 epoch: 18, batch: 700 // loss: 0.245 epoch: 18, batch: 800 // loss: 0.287 epoch: 18, batch: 900 // loss: 0.102 epoch: 18, batch: 1000 // loss: 0.576 epoch: 18, batch: 1100 // loss: 0.133 epoch: 18, batch: 1200 // loss: 0.193 epoch: 19, batch: 0 // loss: 0.287 epoch: 19, batch: 100 // loss: 0.324 epoch: 19, batch: 200 // loss: 0.450 epoch: 19, batch: 300 // loss: 0.312 epoch: 19, batch: 400 // loss: 0.169 epoch: 19, batch: 500 // loss: 0.418 epoch: 19, batch: 600 // loss: 0.459 epoch: 19, batch: 700 // loss: 0.241 epoch: 19, batch: 800 // loss: 0.281 epoch: 19, batch: 900 // loss: 0.099 epoch: 19, batch: 1000 // loss: 0.569 epoch: 19, batch: 1100 // loss: 0.133 epoch: 19, batch: 1200 // loss: 0.187 epoch: 20, batch: 0 // loss: 0.287 epoch: 20, batch: 100 // loss: 0.325 epoch: 20, batch: 200 // loss: 0.440 epoch: 20, batch: 300 // loss: 0.309 epoch: 20, batch: 400 // loss: 0.165 epoch: 20, batch: 500 // loss: 0.416 epoch: 20, batch: 600 // loss: 0.453 epoch: 20, batch: 700 // loss: 0.238 epoch: 20, batch: 800 // loss: 0.286 epoch: 20, batch: 900 // loss: 0.096 epoch: 20, batch: 1000 // loss: 0.568 epoch: 20, batch: 1100 // loss: 0.131 epoch: 20, batch: 1200 // loss: 0.185 epoch: 21, batch: 0 // loss: 0.285 epoch: 21, batch: 100 // loss: 0.324 epoch: 21, batch: 200 // loss: 0.434 epoch: 21, batch: 300 // loss: 0.306 epoch: 21, batch: 400 // loss: 0.162 epoch: 21, batch: 500 // loss: 0.418 epoch: 21, batch: 600 // loss: 0.445 epoch: 21, batch: 700 // loss: 0.237 epoch: 21, batch: 800 // loss: 0.286 epoch: 21, batch: 900 // loss: 0.096 epoch: 21, batch: 1000 // loss: 0.566 epoch: 21, batch: 1100 // loss: 0.130 epoch: 21, batch: 1200 // loss: 0.181 epoch: 22, batch: 0 // loss: 0.281 epoch: 22, batch: 100 // loss: 0.320 epoch: 22, batch: 200 // loss: 0.422 epoch: 22, batch: 300 // loss: 0.302 epoch: 22, batch: 400 // loss: 0.160 epoch: 22, batch: 500 // loss: 0.420 epoch: 22, batch: 600 // loss: 0.439 epoch: 22, batch: 700 // loss: 0.235 epoch: 22, batch: 800 // loss: 0.294 epoch: 22, batch: 900 // loss: 0.092 epoch: 22, batch: 1000 // loss: 0.565 epoch: 22, batch: 1100 // loss: 0.128 epoch: 22, batch: 1200 // loss: 0.176 epoch: 23, batch: 0 // loss: 0.285 epoch: 23, batch: 100 // loss: 0.318 epoch: 23, batch: 200 // loss: 0.424 epoch: 23, batch: 300 // loss: 0.298 epoch: 23, batch: 400 // loss: 0.157 epoch: 23, batch: 500 // loss: 0.423 epoch: 23, batch: 600 // loss: 0.436 epoch: 23, batch: 700 // loss: 0.239 epoch: 23, batch: 800 // loss: 0.292 epoch: 23, batch: 900 // loss: 0.091 epoch: 23, batch: 1000 // loss: 0.562 epoch: 23, batch: 1100 // loss: 0.122 epoch: 23, batch: 1200 // loss: 0.172 epoch: 24, batch: 0 // loss: 0.285 epoch: 24, batch: 100 // loss: 0.310 epoch: 24, batch: 200 // loss: 0.418 epoch: 24, batch: 300 // loss: 0.299 epoch: 24, batch: 400 // loss: 0.154 epoch: 24, batch: 500 // loss: 0.425 epoch: 24, batch: 600 // loss: 0.431 epoch: 24, batch: 700 // loss: 0.228 epoch: 24, batch: 800 // loss: 0.309 epoch: 24, batch: 900 // loss: 0.088 epoch: 24, batch: 1000 // loss: 0.561 epoch: 24, batch: 1100 // loss: 0.121 epoch: 24, batch: 1200 // loss: 0.170 epoch: 25, batch: 0 // loss: 0.283 epoch: 25, batch: 100 // loss: 0.309 epoch: 25, batch: 200 // loss: 0.409 epoch: 25, batch: 300 // loss: 0.296 epoch: 25, batch: 400 // loss: 0.151 epoch: 25, batch: 500 // loss: 0.425 epoch: 25, batch: 600 // loss: 0.427 epoch: 25, batch: 700 // loss: 0.220 epoch: 25, batch: 800 // loss: 0.311 epoch: 25, batch: 900 // loss: 0.085 epoch: 25, batch: 1000 // loss: 0.559 epoch: 25, batch: 1100 // loss: 0.121 epoch: 25, batch: 1200 // loss: 0.169 epoch: 26, batch: 0 // loss: 0.286 epoch: 26, batch: 100 // loss: 0.309 epoch: 26, batch: 200 // loss: 0.401 epoch: 26, batch: 300 // loss: 0.291 epoch: 26, batch: 400 // loss: 0.149 epoch: 26, batch: 500 // loss: 0.428 epoch: 26, batch: 600 // loss: 0.423 epoch: 26, batch: 700 // loss: 0.236 epoch: 26, batch: 800 // loss: 0.310 epoch: 26, batch: 900 // loss: 0.084 epoch: 26, batch: 1000 // loss: 0.558 epoch: 26, batch: 1100 // loss: 0.119 epoch: 26, batch: 1200 // loss: 0.166 epoch: 27, batch: 0 // loss: 0.282 epoch: 27, batch: 100 // loss: 0.310 epoch: 27, batch: 200 // loss: 0.394 epoch: 27, batch: 300 // loss: 0.293 epoch: 27, batch: 400 // loss: 0.146 epoch: 27, batch: 500 // loss: 0.428 epoch: 27, batch: 600 // loss: 0.420 epoch: 27, batch: 700 // loss: 0.242 epoch: 27, batch: 800 // loss: 0.312 epoch: 27, batch: 900 // loss: 0.083 epoch: 27, batch: 1000 // loss: 0.558 epoch: 27, batch: 1100 // loss: 0.118 epoch: 27, batch: 1200 // loss: 0.163 epoch: 28, batch: 0 // loss: 0.285 epoch: 28, batch: 100 // loss: 0.310 epoch: 28, batch: 200 // loss: 0.389 epoch: 28, batch: 300 // loss: 0.287 epoch: 28, batch: 400 // loss: 0.144 epoch: 28, batch: 500 // loss: 0.429 epoch: 28, batch: 600 // loss: 0.419 epoch: 28, batch: 700 // loss: 0.242 epoch: 28, batch: 800 // loss: 0.310 epoch: 28, batch: 900 // loss: 0.081 epoch: 28, batch: 1000 // loss: 0.551 epoch: 28, batch: 1100 // loss: 0.115 epoch: 28, batch: 1200 // loss: 0.160 epoch: 29, batch: 0 // loss: 0.282 epoch: 29, batch: 100 // loss: 0.298 epoch: 29, batch: 200 // loss: 0.400 epoch: 29, batch: 300 // loss: 0.283 epoch: 29, batch: 400 // loss: 0.143 epoch: 29, batch: 500 // loss: 0.427 epoch: 29, batch: 600 // loss: 0.420 epoch: 29, batch: 700 // loss: 0.248 epoch: 29, batch: 800 // loss: 0.311 epoch: 29, batch: 900 // loss: 0.080 epoch: 29, batch: 1000 // loss: 0.549 epoch: 29, batch: 1100 // loss: 0.113 epoch: 29, batch: 1200 // loss: 0.157 epoch: 30, batch: 0 // loss: 0.283 epoch: 30, batch: 100 // loss: 0.294 epoch: 30, batch: 200 // loss: 0.406 epoch: 30, batch: 300 // loss: 0.281 epoch: 30, batch: 400 // loss: 0.141 epoch: 30, batch: 500 // loss: 0.421 epoch: 30, batch: 600 // loss: 0.419 epoch: 30, batch: 700 // loss: 0.250 epoch: 30, batch: 800 // loss: 0.312 epoch: 30, batch: 900 // loss: 0.080 epoch: 30, batch: 1000 // loss: 0.548 epoch: 30, batch: 1100 // loss: 0.114 epoch: 30, batch: 1200 // loss: 0.154 epoch: 31, batch: 0 // loss: 0.279 epoch: 31, batch: 100 // loss: 0.290 epoch: 31, batch: 200 // loss: 0.401 epoch: 31, batch: 300 // loss: 0.278 epoch: 31, batch: 400 // loss: 0.140 epoch: 31, batch: 500 // loss: 0.418 epoch: 31, batch: 600 // loss: 0.419 epoch: 31, batch: 700 // loss: 0.251 epoch: 31, batch: 800 // loss: 0.313 epoch: 31, batch: 900 // loss: 0.079 epoch: 31, batch: 1000 // loss: 0.544 epoch: 31, batch: 1100 // loss: 0.114 epoch: 31, batch: 1200 // loss: 0.154 epoch: 32, batch: 0 // loss: 0.281 epoch: 32, batch: 100 // loss: 0.284 epoch: 32, batch: 200 // loss: 0.404 epoch: 32, batch: 300 // loss: 0.277 epoch: 32, batch: 400 // loss: 0.138 epoch: 32, batch: 500 // loss: 0.424 epoch: 32, batch: 600 // loss: 0.419 epoch: 32, batch: 700 // loss: 0.254 epoch: 32, batch: 800 // loss: 0.309 epoch: 32, batch: 900 // loss: 0.077 epoch: 32, batch: 1000 // loss: 0.547 epoch: 32, batch: 1100 // loss: 0.114 epoch: 32, batch: 1200 // loss: 0.151 epoch: 33, batch: 0 // loss: 0.281 epoch: 33, batch: 100 // loss: 0.287 epoch: 33, batch: 200 // loss: 0.402 epoch: 33, batch: 300 // loss: 0.279 epoch: 33, batch: 400 // loss: 0.137 epoch: 33, batch: 500 // loss: 0.424 epoch: 33, batch: 600 // loss: 0.416 epoch: 33, batch: 700 // loss: 0.254 epoch: 33, batch: 800 // loss: 0.313 epoch: 33, batch: 900 // loss: 0.077 epoch: 33, batch: 1000 // loss: 0.543 epoch: 33, batch: 1100 // loss: 0.116 epoch: 33, batch: 1200 // loss: 0.149 epoch: 34, batch: 0 // loss: 0.286 epoch: 34, batch: 100 // loss: 0.278 epoch: 34, batch: 200 // loss: 0.394 epoch: 34, batch: 300 // loss: 0.277 epoch: 34, batch: 400 // loss: 0.137 epoch: 34, batch: 500 // loss: 0.429 epoch: 34, batch: 600 // loss: 0.413 epoch: 34, batch: 700 // loss: 0.257 epoch: 34, batch: 800 // loss: 0.309 epoch: 34, batch: 900 // loss: 0.075 epoch: 34, batch: 1000 // loss: 0.539 epoch: 34, batch: 1100 // loss: 0.116 epoch: 34, batch: 1200 // loss: 0.147 epoch: 35, batch: 0 // loss: 0.279 epoch: 35, batch: 100 // loss: 0.279 epoch: 35, batch: 200 // loss: 0.397 epoch: 35, batch: 300 // loss: 0.277 epoch: 35, batch: 400 // loss: 0.137 epoch: 35, batch: 500 // loss: 0.426 epoch: 35, batch: 600 // loss: 0.409 epoch: 35, batch: 700 // loss: 0.258 epoch: 35, batch: 800 // loss: 0.316 epoch: 35, batch: 900 // loss: 0.075 epoch: 35, batch: 1000 // loss: 0.541 epoch: 35, batch: 1100 // loss: 0.115 epoch: 35, batch: 1200 // loss: 0.145 epoch: 36, batch: 0 // loss: 0.276 epoch: 36, batch: 100 // loss: 0.274 epoch: 36, batch: 200 // loss: 0.400 epoch: 36, batch: 300 // loss: 0.277 epoch: 36, batch: 400 // loss: 0.136 epoch: 36, batch: 500 // loss: 0.434 epoch: 36, batch: 600 // loss: 0.406 epoch: 36, batch: 700 // loss: 0.259 epoch: 36, batch: 800 // loss: 0.315 epoch: 36, batch: 900 // loss: 0.073 epoch: 36, batch: 1000 // loss: 0.540 epoch: 36, batch: 1100 // loss: 0.115 epoch: 36, batch: 1200 // loss: 0.143 epoch: 37, batch: 0 // loss: 0.272 epoch: 37, batch: 100 // loss: 0.269 epoch: 37, batch: 200 // loss: 0.386 epoch: 37, batch: 300 // loss: 0.271 epoch: 37, batch: 400 // loss: 0.138 epoch: 37, batch: 500 // loss: 0.432 epoch: 37, batch: 600 // loss: 0.404 epoch: 37, batch: 700 // loss: 0.265 epoch: 37, batch: 800 // loss: 0.309 epoch: 37, batch: 900 // loss: 0.074 epoch: 37, batch: 1000 // loss: 0.534 epoch: 37, batch: 1100 // loss: 0.116 epoch: 37, batch: 1200 // loss: 0.143 epoch: 38, batch: 0 // loss: 0.272 epoch: 38, batch: 100 // loss: 0.264 epoch: 38, batch: 200 // loss: 0.391 epoch: 38, batch: 300 // loss: 0.269 epoch: 38, batch: 400 // loss: 0.136 epoch: 38, batch: 500 // loss: 0.439 epoch: 38, batch: 600 // loss: 0.402 epoch: 38, batch: 700 // loss: 0.266 epoch: 38, batch: 800 // loss: 0.304 epoch: 38, batch: 900 // loss: 0.072 epoch: 38, batch: 1000 // loss: 0.531 epoch: 38, batch: 1100 // loss: 0.117 epoch: 38, batch: 1200 // loss: 0.140 epoch: 39, batch: 0 // loss: 0.273 epoch: 39, batch: 100 // loss: 0.260 epoch: 39, batch: 200 // loss: 0.385 epoch: 39, batch: 300 // loss: 0.267 epoch: 39, batch: 400 // loss: 0.137 epoch: 39, batch: 500 // loss: 0.435 epoch: 39, batch: 600 // loss: 0.404 epoch: 39, batch: 700 // loss: 0.269 epoch: 39, batch: 800 // loss: 0.303 epoch: 39, batch: 900 // loss: 0.072 epoch: 39, batch: 1000 // loss: 0.525 epoch: 39, batch: 1100 // loss: 0.118 epoch: 39, batch: 1200 // loss: 0.139 epoch: 40, batch: 0 // loss: 0.268 epoch: 40, batch: 100 // loss: 0.256 epoch: 40, batch: 200 // loss: 0.383 epoch: 40, batch: 300 // loss: 0.267 epoch: 40, batch: 400 // loss: 0.136 epoch: 40, batch: 500 // loss: 0.433 epoch: 40, batch: 600 // loss: 0.400 epoch: 40, batch: 700 // loss: 0.277 epoch: 40, batch: 800 // loss: 0.299 epoch: 40, batch: 900 // loss: 0.073 epoch: 40, batch: 1000 // loss: 0.524 epoch: 40, batch: 1100 // loss: 0.118 epoch: 40, batch: 1200 // loss: 0.139 epoch: 41, batch: 0 // loss: 0.268 epoch: 41, batch: 100 // loss: 0.255 epoch: 41, batch: 200 // loss: 0.390 epoch: 41, batch: 300 // loss: 0.267 epoch: 41, batch: 400 // loss: 0.138 epoch: 41, batch: 500 // loss: 0.434 epoch: 41, batch: 600 // loss: 0.396 epoch: 41, batch: 700 // loss: 0.277 epoch: 41, batch: 800 // loss: 0.297 epoch: 41, batch: 900 // loss: 0.070 epoch: 41, batch: 1000 // loss: 0.524 epoch: 41, batch: 1100 // loss: 0.120 epoch: 41, batch: 1200 // loss: 0.137 epoch: 42, batch: 0 // loss: 0.264 epoch: 42, batch: 100 // loss: 0.252 epoch: 42, batch: 200 // loss: 0.388 epoch: 42, batch: 300 // loss: 0.263 epoch: 42, batch: 400 // loss: 0.139 epoch: 42, batch: 500 // loss: 0.431 epoch: 42, batch: 600 // loss: 0.400 epoch: 42, batch: 700 // loss: 0.277 epoch: 42, batch: 800 // loss: 0.294 epoch: 42, batch: 900 // loss: 0.070 epoch: 42, batch: 1000 // loss: 0.530 epoch: 42, batch: 1100 // loss: 0.125 epoch: 42, batch: 1200 // loss: 0.137 epoch: 43, batch: 0 // loss: 0.259 epoch: 43, batch: 100 // loss: 0.247 epoch: 43, batch: 200 // loss: 0.382 epoch: 43, batch: 300 // loss: 0.264 epoch: 43, batch: 400 // loss: 0.138 epoch: 43, batch: 500 // loss: 0.437 epoch: 43, batch: 600 // loss: 0.403 epoch: 43, batch: 700 // loss: 0.267 epoch: 43, batch: 800 // loss: 0.301 epoch: 43, batch: 900 // loss: 0.068 epoch: 43, batch: 1000 // loss: 0.525 epoch: 43, batch: 1100 // loss: 0.124 epoch: 43, batch: 1200 // loss: 0.139 epoch: 44, batch: 0 // loss: 0.258 epoch: 44, batch: 100 // loss: 0.245 epoch: 44, batch: 200 // loss: 0.384 epoch: 44, batch: 300 // loss: 0.261 epoch: 44, batch: 400 // loss: 0.138 epoch: 44, batch: 500 // loss: 0.434 epoch: 44, batch: 600 // loss: 0.404 epoch: 44, batch: 700 // loss: 0.267 epoch: 44, batch: 800 // loss: 0.292 epoch: 44, batch: 900 // loss: 0.072 epoch: 44, batch: 1000 // loss: 0.528 epoch: 44, batch: 1100 // loss: 0.126 epoch: 44, batch: 1200 // loss: 0.137 epoch: 45, batch: 0 // loss: 0.254 epoch: 45, batch: 100 // loss: 0.243 epoch: 45, batch: 200 // loss: 0.371 epoch: 45, batch: 300 // loss: 0.257 epoch: 45, batch: 400 // loss: 0.137 epoch: 45, batch: 500 // loss: 0.436 epoch: 45, batch: 600 // loss: 0.404 epoch: 45, batch: 700 // loss: 0.269 epoch: 45, batch: 800 // loss: 0.286 epoch: 45, batch: 900 // loss: 0.069 epoch: 45, batch: 1000 // loss: 0.531 epoch: 45, batch: 1100 // loss: 0.124 epoch: 45, batch: 1200 // loss: 0.138 epoch: 46, batch: 0 // loss: 0.248 epoch: 46, batch: 100 // loss: 0.243 epoch: 46, batch: 200 // loss: 0.372 epoch: 46, batch: 300 // loss: 0.257 epoch: 46, batch: 400 // loss: 0.137 epoch: 46, batch: 500 // loss: 0.440 epoch: 46, batch: 600 // loss: 0.406 epoch: 46, batch: 700 // loss: 0.262 epoch: 46, batch: 800 // loss: 0.285 epoch: 46, batch: 900 // loss: 0.070 epoch: 46, batch: 1000 // loss: 0.531 epoch: 46, batch: 1100 // loss: 0.127 epoch: 46, batch: 1200 // loss: 0.140 epoch: 47, batch: 0 // loss: 0.247 epoch: 47, batch: 100 // loss: 0.243 epoch: 47, batch: 200 // loss: 0.366 epoch: 47, batch: 300 // loss: 0.255 epoch: 47, batch: 400 // loss: 0.137 epoch: 47, batch: 500 // loss: 0.438 epoch: 47, batch: 600 // loss: 0.407 epoch: 47, batch: 700 // loss: 0.263 epoch: 47, batch: 800 // loss: 0.284 epoch: 47, batch: 900 // loss: 0.068 epoch: 47, batch: 1000 // loss: 0.531 epoch: 47, batch: 1100 // loss: 0.127 epoch: 47, batch: 1200 // loss: 0.140 epoch: 48, batch: 0 // loss: 0.243 epoch: 48, batch: 100 // loss: 0.243 epoch: 48, batch: 200 // loss: 0.361 epoch: 48, batch: 300 // loss: 0.250 epoch: 48, batch: 400 // loss: 0.136 epoch: 48, batch: 500 // loss: 0.440 epoch: 48, batch: 600 // loss: 0.410 epoch: 48, batch: 700 // loss: 0.268 epoch: 48, batch: 800 // loss: 0.279 epoch: 48, batch: 900 // loss: 0.065 epoch: 48, batch: 1000 // loss: 0.528 epoch: 48, batch: 1100 // loss: 0.129 epoch: 48, batch: 1200 // loss: 0.132 epoch: 49, batch: 0 // loss: 0.239 epoch: 49, batch: 100 // loss: 0.240 epoch: 49, batch: 200 // loss: 0.361 epoch: 49, batch: 300 // loss: 0.253 epoch: 49, batch: 400 // loss: 0.135 epoch: 49, batch: 500 // loss: 0.439 epoch: 49, batch: 600 // loss: 0.411 epoch: 49, batch: 700 // loss: 0.271 epoch: 49, batch: 800 // loss: 0.271 epoch: 49, batch: 900 // loss: 0.066 epoch: 49, batch: 1000 // loss: 0.527 epoch: 49, batch: 1100 // loss: 0.130 epoch: 49, batch: 1200 // loss: 0.132 epoch: 50, batch: 0 // loss: 0.231 epoch: 50, batch: 100 // loss: 0.237 epoch: 50, batch: 200 // loss: 0.363 epoch: 50, batch: 300 // loss: 0.245 epoch: 50, batch: 400 // loss: 0.135 epoch: 50, batch: 500 // loss: 0.444 epoch: 50, batch: 600 // loss: 0.409 epoch: 50, batch: 700 // loss: 0.269 epoch: 50, batch: 800 // loss: 0.269 epoch: 50, batch: 900 // loss: 0.065 epoch: 50, batch: 1000 // loss: 0.525 epoch: 50, batch: 1100 // loss: 0.130 epoch: 50, batch: 1200 // loss: 0.136 epoch: 51, batch: 0 // loss: 0.231 epoch: 51, batch: 100 // loss: 0.233 epoch: 51, batch: 200 // loss: 0.350 epoch: 51, batch: 300 // loss: 0.243 epoch: 51, batch: 400 // loss: 0.135 epoch: 51, batch: 500 // loss: 0.444 epoch: 51, batch: 600 // loss: 0.408 epoch: 51, batch: 700 // loss: 0.266 epoch: 51, batch: 800 // loss: 0.269 epoch: 51, batch: 900 // loss: 0.064 epoch: 51, batch: 1000 // loss: 0.524 epoch: 51, batch: 1100 // loss: 0.131 epoch: 51, batch: 1200 // loss: 0.130 epoch: 52, batch: 0 // loss: 0.225 epoch: 52, batch: 100 // loss: 0.232 epoch: 52, batch: 200 // loss: 0.349 epoch: 52, batch: 300 // loss: 0.243 epoch: 52, batch: 400 // loss: 0.135 epoch: 52, batch: 500 // loss: 0.443 epoch: 52, batch: 600 // loss: 0.404 epoch: 52, batch: 700 // loss: 0.268 epoch: 52, batch: 800 // loss: 0.264 epoch: 52, batch: 900 // loss: 0.064 epoch: 52, batch: 1000 // loss: 0.522 epoch: 52, batch: 1100 // loss: 0.133 epoch: 52, batch: 1200 // loss: 0.137 epoch: 53, batch: 0 // loss: 0.224 epoch: 53, batch: 100 // loss: 0.225 epoch: 53, batch: 200 // loss: 0.341 epoch: 53, batch: 300 // loss: 0.236 epoch: 53, batch: 400 // loss: 0.135 epoch: 53, batch: 500 // loss: 0.448 epoch: 53, batch: 600 // loss: 0.403 epoch: 53, batch: 700 // loss: 0.266 epoch: 53, batch: 800 // loss: 0.259 epoch: 53, batch: 900 // loss: 0.064 epoch: 53, batch: 1000 // loss: 0.526 epoch: 53, batch: 1100 // loss: 0.133 epoch: 53, batch: 1200 // loss: 0.134 epoch: 54, batch: 0 // loss: 0.220 epoch: 54, batch: 100 // loss: 0.222 epoch: 54, batch: 200 // loss: 0.352 epoch: 54, batch: 300 // loss: 0.241 epoch: 54, batch: 400 // loss: 0.131 epoch: 54, batch: 500 // loss: 0.440 epoch: 54, batch: 600 // loss: 0.401 epoch: 54, batch: 700 // loss: 0.273 epoch: 54, batch: 800 // loss: 0.257 epoch: 54, batch: 900 // loss: 0.063 epoch: 54, batch: 1000 // loss: 0.527 epoch: 54, batch: 1100 // loss: 0.134 epoch: 54, batch: 1200 // loss: 0.134 epoch: 55, batch: 0 // loss: 0.220 epoch: 55, batch: 100 // loss: 0.226 epoch: 55, batch: 200 // loss: 0.347 epoch: 55, batch: 300 // loss: 0.240 epoch: 55, batch: 400 // loss: 0.131 epoch: 55, batch: 500 // loss: 0.451 epoch: 55, batch: 600 // loss: 0.402 epoch: 55, batch: 700 // loss: 0.268 epoch: 55, batch: 800 // loss: 0.256 epoch: 55, batch: 900 // loss: 0.064 epoch: 55, batch: 1000 // loss: 0.526 epoch: 55, batch: 1100 // loss: 0.131 epoch: 55, batch: 1200 // loss: 0.134 epoch: 56, batch: 0 // loss: 0.218 epoch: 56, batch: 100 // loss: 0.220 epoch: 56, batch: 200 // loss: 0.343 epoch: 56, batch: 300 // loss: 0.234 epoch: 56, batch: 400 // loss: 0.132 epoch: 56, batch: 500 // loss: 0.451 epoch: 56, batch: 600 // loss: 0.400 epoch: 56, batch: 700 // loss: 0.260 epoch: 56, batch: 800 // loss: 0.250 epoch: 56, batch: 900 // loss: 0.063 epoch: 56, batch: 1000 // loss: 0.528 epoch: 56, batch: 1100 // loss: 0.135 epoch: 56, batch: 1200 // loss: 0.133 epoch: 57, batch: 0 // loss: 0.217 epoch: 57, batch: 100 // loss: 0.219 epoch: 57, batch: 200 // loss: 0.343 epoch: 57, batch: 300 // loss: 0.235 epoch: 57, batch: 400 // loss: 0.131 epoch: 57, batch: 500 // loss: 0.443 epoch: 57, batch: 600 // loss: 0.396 epoch: 57, batch: 700 // loss: 0.261 epoch: 57, batch: 800 // loss: 0.242 epoch: 57, batch: 900 // loss: 0.063 epoch: 57, batch: 1000 // loss: 0.524 epoch: 57, batch: 1100 // loss: 0.134 epoch: 57, batch: 1200 // loss: 0.130 epoch: 58, batch: 0 // loss: 0.217 epoch: 58, batch: 100 // loss: 0.220 epoch: 58, batch: 200 // loss: 0.348 epoch: 58, batch: 300 // loss: 0.233 epoch: 58, batch: 400 // loss: 0.131 epoch: 58, batch: 500 // loss: 0.446 epoch: 58, batch: 600 // loss: 0.398 epoch: 58, batch: 700 // loss: 0.258 epoch: 58, batch: 800 // loss: 0.245 epoch: 58, batch: 900 // loss: 0.063 epoch: 58, batch: 1000 // loss: 0.525 epoch: 58, batch: 1100 // loss: 0.133 epoch: 58, batch: 1200 // loss: 0.130 epoch: 59, batch: 0 // loss: 0.218 epoch: 59, batch: 100 // loss: 0.215 epoch: 59, batch: 200 // loss: 0.345 epoch: 59, batch: 300 // loss: 0.233 epoch: 59, batch: 400 // loss: 0.131 epoch: 59, batch: 500 // loss: 0.445 epoch: 59, batch: 600 // loss: 0.399 epoch: 59, batch: 700 // loss: 0.256 epoch: 59, batch: 800 // loss: 0.237 epoch: 59, batch: 900 // loss: 0.064 epoch: 59, batch: 1000 // loss: 0.529 epoch: 59, batch: 1100 // loss: 0.142 epoch: 59, batch: 1200 // loss: 0.127 epoch: 60, batch: 0 // loss: 0.211 epoch: 60, batch: 100 // loss: 0.217 epoch: 60, batch: 200 // loss: 0.335 epoch: 60, batch: 300 // loss: 0.233 epoch: 60, batch: 400 // loss: 0.130 epoch: 60, batch: 500 // loss: 0.450 epoch: 60, batch: 600 // loss: 0.397 epoch: 60, batch: 700 // loss: 0.255 epoch: 60, batch: 800 // loss: 0.235 epoch: 60, batch: 900 // loss: 0.063 epoch: 60, batch: 1000 // loss: 0.522 epoch: 60, batch: 1100 // loss: 0.139 epoch: 60, batch: 1200 // loss: 0.130 epoch: 61, batch: 0 // loss: 0.208 epoch: 61, batch: 100 // loss: 0.220 epoch: 61, batch: 200 // loss: 0.329 epoch: 61, batch: 300 // loss: 0.229 epoch: 61, batch: 400 // loss: 0.132 epoch: 61, batch: 500 // loss: 0.449 epoch: 61, batch: 600 // loss: 0.394 epoch: 61, batch: 700 // loss: 0.253 epoch: 61, batch: 800 // loss: 0.229 epoch: 61, batch: 900 // loss: 0.062 epoch: 61, batch: 1000 // loss: 0.523 epoch: 61, batch: 1100 // loss: 0.139 epoch: 61, batch: 1200 // loss: 0.127 epoch: 62, batch: 0 // loss: 0.204 epoch: 62, batch: 100 // loss: 0.215 epoch: 62, batch: 200 // loss: 0.331 epoch: 62, batch: 300 // loss: 0.229 epoch: 62, batch: 400 // loss: 0.131 epoch: 62, batch: 500 // loss: 0.449 epoch: 62, batch: 600 // loss: 0.391 epoch: 62, batch: 700 // loss: 0.251 epoch: 62, batch: 800 // loss: 0.226 epoch: 62, batch: 900 // loss: 0.062 epoch: 62, batch: 1000 // loss: 0.522 epoch: 62, batch: 1100 // loss: 0.141 epoch: 62, batch: 1200 // loss: 0.129 epoch: 63, batch: 0 // loss: 0.207 epoch: 63, batch: 100 // loss: 0.215 epoch: 63, batch: 200 // loss: 0.323 epoch: 63, batch: 300 // loss: 0.230 epoch: 63, batch: 400 // loss: 0.130 epoch: 63, batch: 500 // loss: 0.455 epoch: 63, batch: 600 // loss: 0.391 epoch: 63, batch: 700 // loss: 0.245 epoch: 63, batch: 800 // loss: 0.224 epoch: 63, batch: 900 // loss: 0.060 epoch: 63, batch: 1000 // loss: 0.520 epoch: 63, batch: 1100 // loss: 0.140 epoch: 63, batch: 1200 // loss: 0.127 epoch: 64, batch: 0 // loss: 0.206 epoch: 64, batch: 100 // loss: 0.211 epoch: 64, batch: 200 // loss: 0.316 epoch: 64, batch: 300 // loss: 0.224 epoch: 64, batch: 400 // loss: 0.132 epoch: 64, batch: 500 // loss: 0.453 epoch: 64, batch: 600 // loss: 0.388 epoch: 64, batch: 700 // loss: 0.244 epoch: 64, batch: 800 // loss: 0.219 epoch: 64, batch: 900 // loss: 0.059 epoch: 64, batch: 1000 // loss: 0.506 epoch: 64, batch: 1100 // loss: 0.140 epoch: 64, batch: 1200 // loss: 0.127 epoch: 65, batch: 0 // loss: 0.202 epoch: 65, batch: 100 // loss: 0.211 epoch: 65, batch: 200 // loss: 0.308 epoch: 65, batch: 300 // loss: 0.226 epoch: 65, batch: 400 // loss: 0.131 epoch: 65, batch: 500 // loss: 0.453 epoch: 65, batch: 600 // loss: 0.385 epoch: 65, batch: 700 // loss: 0.245 epoch: 65, batch: 800 // loss: 0.219 epoch: 65, batch: 900 // loss: 0.059 epoch: 65, batch: 1000 // loss: 0.518 epoch: 65, batch: 1100 // loss: 0.142 epoch: 65, batch: 1200 // loss: 0.128 epoch: 66, batch: 0 // loss: 0.203 epoch: 66, batch: 100 // loss: 0.205 epoch: 66, batch: 200 // loss: 0.305 epoch: 66, batch: 300 // loss: 0.226 epoch: 66, batch: 400 // loss: 0.130 epoch: 66, batch: 500 // loss: 0.443 epoch: 66, batch: 600 // loss: 0.383 epoch: 66, batch: 700 // loss: 0.242 epoch: 66, batch: 800 // loss: 0.215 epoch: 66, batch: 900 // loss: 0.058 epoch: 66, batch: 1000 // loss: 0.509 epoch: 66, batch: 1100 // loss: 0.140 epoch: 66, batch: 1200 // loss: 0.130 epoch: 67, batch: 0 // loss: 0.201 epoch: 67, batch: 100 // loss: 0.210 epoch: 67, batch: 200 // loss: 0.303 epoch: 67, batch: 300 // loss: 0.223 epoch: 67, batch: 400 // loss: 0.129 epoch: 67, batch: 500 // loss: 0.439 epoch: 67, batch: 600 // loss: 0.379 epoch: 67, batch: 700 // loss: 0.237 epoch: 67, batch: 800 // loss: 0.207 epoch: 67, batch: 900 // loss: 0.057 epoch: 67, batch: 1000 // loss: 0.505 epoch: 67, batch: 1100 // loss: 0.140 epoch: 67, batch: 1200 // loss: 0.125 epoch: 68, batch: 0 // loss: 0.202 epoch: 68, batch: 100 // loss: 0.210 epoch: 68, batch: 200 // loss: 0.297 epoch: 68, batch: 300 // loss: 0.218 epoch: 68, batch: 400 // loss: 0.130 epoch: 68, batch: 500 // loss: 0.439 epoch: 68, batch: 600 // loss: 0.378 epoch: 68, batch: 700 // loss: 0.245 epoch: 68, batch: 800 // loss: 0.208 epoch: 68, batch: 900 // loss: 0.058 epoch: 68, batch: 1000 // loss: 0.504 epoch: 68, batch: 1100 // loss: 0.140 epoch: 68, batch: 1200 // loss: 0.126 epoch: 69, batch: 0 // loss: 0.197 epoch: 69, batch: 100 // loss: 0.205 epoch: 69, batch: 200 // loss: 0.295 epoch: 69, batch: 300 // loss: 0.221 epoch: 69, batch: 400 // loss: 0.133 epoch: 69, batch: 500 // loss: 0.441 epoch: 69, batch: 600 // loss: 0.377 epoch: 69, batch: 700 // loss: 0.235 epoch: 69, batch: 800 // loss: 0.202 epoch: 69, batch: 900 // loss: 0.057 epoch: 69, batch: 1000 // loss: 0.499 epoch: 69, batch: 1100 // loss: 0.140 epoch: 69, batch: 1200 // loss: 0.127 epoch: 70, batch: 0 // loss: 0.199 epoch: 70, batch: 100 // loss: 0.204 epoch: 70, batch: 200 // loss: 0.289 epoch: 70, batch: 300 // loss: 0.228 epoch: 70, batch: 400 // loss: 0.133 epoch: 70, batch: 500 // loss: 0.440 epoch: 70, batch: 600 // loss: 0.372 epoch: 70, batch: 700 // loss: 0.236 epoch: 70, batch: 800 // loss: 0.205 epoch: 70, batch: 900 // loss: 0.057 epoch: 70, batch: 1000 // loss: 0.507 epoch: 70, batch: 1100 // loss: 0.137 epoch: 70, batch: 1200 // loss: 0.127 epoch: 71, batch: 0 // loss: 0.198 epoch: 71, batch: 100 // loss: 0.199 epoch: 71, batch: 200 // loss: 0.278 epoch: 71, batch: 300 // loss: 0.218 epoch: 71, batch: 400 // loss: 0.133 epoch: 71, batch: 500 // loss: 0.441 epoch: 71, batch: 600 // loss: 0.370 epoch: 71, batch: 700 // loss: 0.238 epoch: 71, batch: 800 // loss: 0.204 epoch: 71, batch: 900 // loss: 0.056 epoch: 71, batch: 1000 // loss: 0.511 epoch: 71, batch: 1100 // loss: 0.137 epoch: 71, batch: 1200 // loss: 0.123 epoch: 72, batch: 0 // loss: 0.197 epoch: 72, batch: 100 // loss: 0.197 epoch: 72, batch: 200 // loss: 0.274 epoch: 72, batch: 300 // loss: 0.215 epoch: 72, batch: 400 // loss: 0.133 epoch: 72, batch: 500 // loss: 0.426 epoch: 72, batch: 600 // loss: 0.366 epoch: 72, batch: 700 // loss: 0.238 epoch: 72, batch: 800 // loss: 0.196 epoch: 72, batch: 900 // loss: 0.057 epoch: 72, batch: 1000 // loss: 0.493 epoch: 72, batch: 1100 // loss: 0.139 epoch: 72, batch: 1200 // loss: 0.125 epoch: 73, batch: 0 // loss: 0.198 epoch: 73, batch: 100 // loss: 0.199 epoch: 73, batch: 200 // loss: 0.273 epoch: 73, batch: 300 // loss: 0.213 epoch: 73, batch: 400 // loss: 0.133 epoch: 73, batch: 500 // loss: 0.421 epoch: 73, batch: 600 // loss: 0.365 epoch: 73, batch: 700 // loss: 0.240 epoch: 73, batch: 800 // loss: 0.196 epoch: 73, batch: 900 // loss: 0.058 epoch: 73, batch: 1000 // loss: 0.507 epoch: 73, batch: 1100 // loss: 0.137 epoch: 73, batch: 1200 // loss: 0.123 epoch: 74, batch: 0 // loss: 0.197 epoch: 74, batch: 100 // loss: 0.196 epoch: 74, batch: 200 // loss: 0.262 epoch: 74, batch: 300 // loss: 0.214 epoch: 74, batch: 400 // loss: 0.131 epoch: 74, batch: 500 // loss: 0.422 epoch: 74, batch: 600 // loss: 0.359 epoch: 74, batch: 700 // loss: 0.235 epoch: 74, batch: 800 // loss: 0.197 epoch: 74, batch: 900 // loss: 0.057 epoch: 74, batch: 1000 // loss: 0.490 epoch: 74, batch: 1100 // loss: 0.139 epoch: 74, batch: 1200 // loss: 0.125 epoch: 75, batch: 0 // loss: 0.194 epoch: 75, batch: 100 // loss: 0.193 epoch: 75, batch: 200 // loss: 0.263 epoch: 75, batch: 300 // loss: 0.209 epoch: 75, batch: 400 // loss: 0.132 epoch: 75, batch: 500 // loss: 0.414 epoch: 75, batch: 600 // loss: 0.362 epoch: 75, batch: 700 // loss: 0.239 epoch: 75, batch: 800 // loss: 0.197 epoch: 75, batch: 900 // loss: 0.058 epoch: 75, batch: 1000 // loss: 0.506 epoch: 75, batch: 1100 // loss: 0.131 epoch: 75, batch: 1200 // loss: 0.124 epoch: 76, batch: 0 // loss: 0.189 epoch: 76, batch: 100 // loss: 0.193 epoch: 76, batch: 200 // loss: 0.262 epoch: 76, batch: 300 // loss: 0.213 epoch: 76, batch: 400 // loss: 0.132 epoch: 76, batch: 500 // loss: 0.418 epoch: 76, batch: 600 // loss: 0.359 epoch: 76, batch: 700 // loss: 0.232 epoch: 76, batch: 800 // loss: 0.193 epoch: 76, batch: 900 // loss: 0.057 epoch: 76, batch: 1000 // loss: 0.490 epoch: 76, batch: 1100 // loss: 0.137 epoch: 76, batch: 1200 // loss: 0.122 epoch: 77, batch: 0 // loss: 0.192 epoch: 77, batch: 100 // loss: 0.195 epoch: 77, batch: 200 // loss: 0.259 epoch: 77, batch: 300 // loss: 0.209 epoch: 77, batch: 400 // loss: 0.136 epoch: 77, batch: 500 // loss: 0.421 epoch: 77, batch: 600 // loss: 0.355 epoch: 77, batch: 700 // loss: 0.235 epoch: 77, batch: 800 // loss: 0.194 epoch: 77, batch: 900 // loss: 0.059 epoch: 77, batch: 1000 // loss: 0.507 epoch: 77, batch: 1100 // loss: 0.133 epoch: 77, batch: 1200 // loss: 0.121 epoch: 78, batch: 0 // loss: 0.186 epoch: 78, batch: 100 // loss: 0.191 epoch: 78, batch: 200 // loss: 0.258 epoch: 78, batch: 300 // loss: 0.210 epoch: 78, batch: 400 // loss: 0.135 epoch: 78, batch: 500 // loss: 0.414 epoch: 78, batch: 600 // loss: 0.354 epoch: 78, batch: 700 // loss: 0.233 epoch: 78, batch: 800 // loss: 0.190 epoch: 78, batch: 900 // loss: 0.060 epoch: 78, batch: 1000 // loss: 0.503 epoch: 78, batch: 1100 // loss: 0.139 epoch: 78, batch: 1200 // loss: 0.123 epoch: 79, batch: 0 // loss: 0.187 epoch: 79, batch: 100 // loss: 0.187 epoch: 79, batch: 200 // loss: 0.252 epoch: 79, batch: 300 // loss: 0.213 epoch: 79, batch: 400 // loss: 0.136 epoch: 79, batch: 500 // loss: 0.417 epoch: 79, batch: 600 // loss: 0.351 epoch: 79, batch: 700 // loss: 0.229 epoch: 79, batch: 800 // loss: 0.191 epoch: 79, batch: 900 // loss: 0.061 epoch: 79, batch: 1000 // loss: 0.501 epoch: 79, batch: 1100 // loss: 0.133 epoch: 79, batch: 1200 // loss: 0.124 epoch: 80, batch: 0 // loss: 0.191 epoch: 80, batch: 100 // loss: 0.189 epoch: 80, batch: 200 // loss: 0.245 epoch: 80, batch: 300 // loss: 0.213 epoch: 80, batch: 400 // loss: 0.137 epoch: 80, batch: 500 // loss: 0.412 epoch: 80, batch: 600 // loss: 0.348 epoch: 80, batch: 700 // loss: 0.226 epoch: 80, batch: 800 // loss: 0.189 epoch: 80, batch: 900 // loss: 0.061 epoch: 80, batch: 1000 // loss: 0.484 epoch: 80, batch: 1100 // loss: 0.137 epoch: 80, batch: 1200 // loss: 0.126 epoch: 81, batch: 0 // loss: 0.189 epoch: 81, batch: 100 // loss: 0.184 epoch: 81, batch: 200 // loss: 0.251 epoch: 81, batch: 300 // loss: 0.211 epoch: 81, batch: 400 // loss: 0.135 epoch: 81, batch: 500 // loss: 0.410 epoch: 81, batch: 600 // loss: 0.341 epoch: 81, batch: 700 // loss: 0.224 epoch: 81, batch: 800 // loss: 0.181 epoch: 81, batch: 900 // loss: 0.063 epoch: 81, batch: 1000 // loss: 0.477 epoch: 81, batch: 1100 // loss: 0.134 epoch: 81, batch: 1200 // loss: 0.127 epoch: 82, batch: 0 // loss: 0.185 epoch: 82, batch: 100 // loss: 0.182 epoch: 82, batch: 200 // loss: 0.249 epoch: 82, batch: 300 // loss: 0.206 epoch: 82, batch: 400 // loss: 0.137 epoch: 82, batch: 500 // loss: 0.402 epoch: 82, batch: 600 // loss: 0.342 epoch: 82, batch: 700 // loss: 0.220 epoch: 82, batch: 800 // loss: 0.182 epoch: 82, batch: 900 // loss: 0.063 epoch: 82, batch: 1000 // loss: 0.494 epoch: 82, batch: 1100 // loss: 0.137 epoch: 82, batch: 1200 // loss: 0.121 epoch: 83, batch: 0 // loss: 0.189 epoch: 83, batch: 100 // loss: 0.176 epoch: 83, batch: 200 // loss: 0.249 epoch: 83, batch: 300 // loss: 0.204 epoch: 83, batch: 400 // loss: 0.137 epoch: 83, batch: 500 // loss: 0.400 epoch: 83, batch: 600 // loss: 0.332 epoch: 83, batch: 700 // loss: 0.220 epoch: 83, batch: 800 // loss: 0.177 epoch: 83, batch: 900 // loss: 0.064 epoch: 83, batch: 1000 // loss: 0.487 epoch: 83, batch: 1100 // loss: 0.129 epoch: 83, batch: 1200 // loss: 0.121 epoch: 84, batch: 0 // loss: 0.188 epoch: 84, batch: 100 // loss: 0.177 epoch: 84, batch: 200 // loss: 0.239 epoch: 84, batch: 300 // loss: 0.202 epoch: 84, batch: 400 // loss: 0.137 epoch: 84, batch: 500 // loss: 0.405 epoch: 84, batch: 600 // loss: 0.330 epoch: 84, batch: 700 // loss: 0.216 epoch: 84, batch: 800 // loss: 0.176 epoch: 84, batch: 900 // loss: 0.064 epoch: 84, batch: 1000 // loss: 0.488 epoch: 84, batch: 1100 // loss: 0.136 epoch: 84, batch: 1200 // loss: 0.122 epoch: 85, batch: 0 // loss: 0.185 epoch: 85, batch: 100 // loss: 0.175 epoch: 85, batch: 200 // loss: 0.241 epoch: 85, batch: 300 // loss: 0.207 epoch: 85, batch: 400 // loss: 0.139 epoch: 85, batch: 500 // loss: 0.398 epoch: 85, batch: 600 // loss: 0.325 epoch: 85, batch: 700 // loss: 0.219 epoch: 85, batch: 800 // loss: 0.170 epoch: 85, batch: 900 // loss: 0.067 epoch: 85, batch: 1000 // loss: 0.471 epoch: 85, batch: 1100 // loss: 0.138 epoch: 85, batch: 1200 // loss: 0.123 epoch: 86, batch: 0 // loss: 0.181 epoch: 86, batch: 100 // loss: 0.177 epoch: 86, batch: 200 // loss: 0.237 epoch: 86, batch: 300 // loss: 0.210 epoch: 86, batch: 400 // loss: 0.141 epoch: 86, batch: 500 // loss: 0.394 epoch: 86, batch: 600 // loss: 0.321 epoch: 86, batch: 700 // loss: 0.206 epoch: 86, batch: 800 // loss: 0.165 epoch: 86, batch: 900 // loss: 0.064 epoch: 86, batch: 1000 // loss: 0.481 epoch: 86, batch: 1100 // loss: 0.130 epoch: 86, batch: 1200 // loss: 0.122 epoch: 87, batch: 0 // loss: 0.184 epoch: 87, batch: 100 // loss: 0.180 epoch: 87, batch: 200 // loss: 0.240 epoch: 87, batch: 300 // loss: 0.208 epoch: 87, batch: 400 // loss: 0.140 epoch: 87, batch: 500 // loss: 0.382 epoch: 87, batch: 600 // loss: 0.323 epoch: 87, batch: 700 // loss: 0.211 epoch: 87, batch: 800 // loss: 0.164 epoch: 87, batch: 900 // loss: 0.064 epoch: 87, batch: 1000 // loss: 0.480 epoch: 87, batch: 1100 // loss: 0.136 epoch: 87, batch: 1200 // loss: 0.126 epoch: 88, batch: 0 // loss: 0.182 epoch: 88, batch: 100 // loss: 0.179 epoch: 88, batch: 200 // loss: 0.231 epoch: 88, batch: 300 // loss: 0.205 epoch: 88, batch: 400 // loss: 0.140 epoch: 88, batch: 500 // loss: 0.388 epoch: 88, batch: 600 // loss: 0.317 epoch: 88, batch: 700 // loss: 0.214 epoch: 88, batch: 800 // loss: 0.166 epoch: 88, batch: 900 // loss: 0.067 epoch: 88, batch: 1000 // loss: 0.466 epoch: 88, batch: 1100 // loss: 0.134 epoch: 88, batch: 1200 // loss: 0.124 epoch: 89, batch: 0 // loss: 0.186 epoch: 89, batch: 100 // loss: 0.175 epoch: 89, batch: 200 // loss: 0.224 epoch: 89, batch: 300 // loss: 0.209 epoch: 89, batch: 400 // loss: 0.138 epoch: 89, batch: 500 // loss: 0.385 epoch: 89, batch: 600 // loss: 0.315 epoch: 89, batch: 700 // loss: 0.215 epoch: 89, batch: 800 // loss: 0.162 epoch: 89, batch: 900 // loss: 0.065 epoch: 89, batch: 1000 // loss: 0.461 epoch: 89, batch: 1100 // loss: 0.133 epoch: 89, batch: 1200 // loss: 0.124 epoch: 90, batch: 0 // loss: 0.179 epoch: 90, batch: 100 // loss: 0.175 epoch: 90, batch: 200 // loss: 0.225 epoch: 90, batch: 300 // loss: 0.207 epoch: 90, batch: 400 // loss: 0.140 epoch: 90, batch: 500 // loss: 0.379 epoch: 90, batch: 600 // loss: 0.308 epoch: 90, batch: 700 // loss: 0.208 epoch: 90, batch: 800 // loss: 0.156 epoch: 90, batch: 900 // loss: 0.065 epoch: 90, batch: 1000 // loss: 0.473 epoch: 90, batch: 1100 // loss: 0.131 epoch: 90, batch: 1200 // loss: 0.125 epoch: 91, batch: 0 // loss: 0.174 epoch: 91, batch: 100 // loss: 0.173 epoch: 91, batch: 200 // loss: 0.222 epoch: 91, batch: 300 // loss: 0.200 epoch: 91, batch: 400 // loss: 0.139 epoch: 91, batch: 500 // loss: 0.379 epoch: 91, batch: 600 // loss: 0.303 epoch: 91, batch: 700 // loss: 0.199 epoch: 91, batch: 800 // loss: 0.153 epoch: 91, batch: 900 // loss: 0.067 epoch: 91, batch: 1000 // loss: 0.455 epoch: 91, batch: 1100 // loss: 0.124 epoch: 91, batch: 1200 // loss: 0.123 epoch: 92, batch: 0 // loss: 0.176 epoch: 92, batch: 100 // loss: 0.175 epoch: 92, batch: 200 // loss: 0.230 epoch: 92, batch: 300 // loss: 0.205 epoch: 92, batch: 400 // loss: 0.139 epoch: 92, batch: 500 // loss: 0.376 epoch: 92, batch: 600 // loss: 0.295 epoch: 92, batch: 700 // loss: 0.199 epoch: 92, batch: 800 // loss: 0.155 epoch: 92, batch: 900 // loss: 0.068 epoch: 92, batch: 1000 // loss: 0.455 epoch: 92, batch: 1100 // loss: 0.129 epoch: 92, batch: 1200 // loss: 0.123 epoch: 93, batch: 0 // loss: 0.177 epoch: 93, batch: 100 // loss: 0.176 epoch: 93, batch: 200 // loss: 0.216 epoch: 93, batch: 300 // loss: 0.205 epoch: 93, batch: 400 // loss: 0.137 epoch: 93, batch: 500 // loss: 0.380 epoch: 93, batch: 600 // loss: 0.298 epoch: 93, batch: 700 // loss: 0.211 epoch: 93, batch: 800 // loss: 0.155 epoch: 93, batch: 900 // loss: 0.067 epoch: 93, batch: 1000 // loss: 0.464 epoch: 93, batch: 1100 // loss: 0.127 epoch: 93, batch: 1200 // loss: 0.124 epoch: 94, batch: 0 // loss: 0.171 epoch: 94, batch: 100 // loss: 0.175 epoch: 94, batch: 200 // loss: 0.228 epoch: 94, batch: 300 // loss: 0.206 epoch: 94, batch: 400 // loss: 0.139 epoch: 94, batch: 500 // loss: 0.372 epoch: 94, batch: 600 // loss: 0.292 epoch: 94, batch: 700 // loss: 0.207 epoch: 94, batch: 800 // loss: 0.156 epoch: 94, batch: 900 // loss: 0.067 epoch: 94, batch: 1000 // loss: 0.446 epoch: 94, batch: 1100 // loss: 0.125 epoch: 94, batch: 1200 // loss: 0.120 epoch: 95, batch: 0 // loss: 0.178 epoch: 95, batch: 100 // loss: 0.172 epoch: 95, batch: 200 // loss: 0.229 epoch: 95, batch: 300 // loss: 0.205 epoch: 95, batch: 400 // loss: 0.138 epoch: 95, batch: 500 // loss: 0.365 epoch: 95, batch: 600 // loss: 0.292 epoch: 95, batch: 700 // loss: 0.204 epoch: 95, batch: 800 // loss: 0.155 epoch: 95, batch: 900 // loss: 0.068 epoch: 95, batch: 1000 // loss: 0.463 epoch: 95, batch: 1100 // loss: 0.128 epoch: 95, batch: 1200 // loss: 0.123 epoch: 96, batch: 0 // loss: 0.172 epoch: 96, batch: 100 // loss: 0.178 epoch: 96, batch: 200 // loss: 0.225 epoch: 96, batch: 300 // loss: 0.207 epoch: 96, batch: 400 // loss: 0.140 epoch: 96, batch: 500 // loss: 0.372 epoch: 96, batch: 600 // loss: 0.289 epoch: 96, batch: 700 // loss: 0.199 epoch: 96, batch: 800 // loss: 0.154 epoch: 96, batch: 900 // loss: 0.067 epoch: 96, batch: 1000 // loss: 0.439 epoch: 96, batch: 1100 // loss: 0.124 epoch: 96, batch: 1200 // loss: 0.125 epoch: 97, batch: 0 // loss: 0.177 epoch: 97, batch: 100 // loss: 0.172 epoch: 97, batch: 200 // loss: 0.231 epoch: 97, batch: 300 // loss: 0.212 epoch: 97, batch: 400 // loss: 0.139 epoch: 97, batch: 500 // loss: 0.364 epoch: 97, batch: 600 // loss: 0.285 epoch: 97, batch: 700 // loss: 0.204 epoch: 97, batch: 800 // loss: 0.155 epoch: 97, batch: 900 // loss: 0.066 epoch: 97, batch: 1000 // loss: 0.446 epoch: 97, batch: 1100 // loss: 0.126 epoch: 97, batch: 1200 // loss: 0.124 epoch: 98, batch: 0 // loss: 0.185 epoch: 98, batch: 100 // loss: 0.177 epoch: 98, batch: 200 // loss: 0.226 epoch: 98, batch: 300 // loss: 0.211 epoch: 98, batch: 400 // loss: 0.140 epoch: 98, batch: 500 // loss: 0.361 epoch: 98, batch: 600 // loss: 0.286 epoch: 98, batch: 700 // loss: 0.198 epoch: 98, batch: 800 // loss: 0.153 epoch: 98, batch: 900 // loss: 0.066 epoch: 98, batch: 1000 // loss: 0.460 epoch: 98, batch: 1100 // loss: 0.120 epoch: 98, batch: 1200 // loss: 0.126 epoch: 99, batch: 0 // loss: 0.178 epoch: 99, batch: 100 // loss: 0.176 epoch: 99, batch: 200 // loss: 0.223 epoch: 99, batch: 300 // loss: 0.201 epoch: 99, batch: 400 // loss: 0.140 epoch: 99, batch: 500 // loss: 0.357 epoch: 99, batch: 600 // loss: 0.282 epoch: 99, batch: 700 // loss: 0.197 epoch: 99, batch: 800 // loss: 0.150 epoch: 99, batch: 900 // loss: 0.067 epoch: 99, batch: 1000 // loss: 0.450 epoch: 99, batch: 1100 // loss: 0.120 epoch: 99, batch: 1200 // loss: 0.123
Implement a simple autoencoder in torch
. In particular, let's start with a vanilla linear auto-encoder, mapping to two dimensions in the hidden space.
class AE(nn.Module):
def __init__(self, input_size=784, hidden_size=2):
'''
In the initializer we setup model parameters/layers.
'''
super(AE, self).__init__()
### REMOVE BELOW
self.input_size = input_size
self.hidden_size = hidden_size
# input layer; from x -> z
self.i = nn.Linear(self.input_size, self.hidden_size)
# output layer
self.o = nn.Linear(self.hidden_size, self.input_size)
def forward(self, X, return_z=False):
### REMOVE BELOW
z = self.i(X)
if return_z:
return z
return self.o(z)
auto = AE(hidden_size=50)
X_tilde = auto(X[:5,:].float())
X_tilde.shape
torch.Size([5, 784])
Define a training loop -- follow the above example.
hint check out: https://pytorch.org/docs/stable/nn.html#loss-functions
def train_AE(X_in, X_target, model, optimizer, loss_function, EPOCHS=10):
for epoch in range(EPOCHS):
idx, batch_num = 0, 0
batch_size = 16
print("")
while idx < 60000:
# zero the parameter gradients
optimizer.zero_grad()
X_batch = X_in[idx: idx + batch_size].float()
X_target_batch = X_target[idx: idx + batch_size].float()
idx += batch_size
# now run our X's forward, get preds, incur
# loss, backprop, and step the optimizer.
X_tilde_batch = model(X_batch)
loss = loss_function(X_tilde_batch, X_target_batch)
loss.backward()
optimizer.step()
# print out loss
if batch_num % 100 == 0:
print("epoch: {}, batch: {} // loss: {:.3f}".format(epoch, batch_num, loss.item()))
batch_num += 1
loss_function = nn.MSELoss()
auto = AE()
optimizer = optim.SGD(auto.parameters(), lr=0.01, momentum=0.9)
train_AE(X, X, auto, optimizer, loss_function, EPOCHS=50)
epoch: 0, batch: 0 // loss: 0.383 epoch: 0, batch: 100 // loss: 0.357 epoch: 0, batch: 200 // loss: 0.321 epoch: 0, batch: 300 // loss: 0.303 epoch: 0, batch: 400 // loss: 0.298 epoch: 0, batch: 500 // loss: 0.288 epoch: 0, batch: 600 // loss: 0.284 epoch: 0, batch: 700 // loss: 0.283 epoch: 0, batch: 800 // loss: 0.248 epoch: 0, batch: 900 // loss: 0.278 epoch: 0, batch: 1000 // loss: 0.245 epoch: 0, batch: 1100 // loss: 0.265 epoch: 0, batch: 1200 // loss: 0.218 epoch: 0, batch: 1300 // loss: 0.243 epoch: 0, batch: 1400 // loss: 0.203 epoch: 0, batch: 1500 // loss: 0.199 epoch: 0, batch: 1600 // loss: 0.214 epoch: 0, batch: 1700 // loss: 0.196 epoch: 0, batch: 1800 // loss: 0.219 epoch: 0, batch: 1900 // loss: 0.192 epoch: 0, batch: 2000 // loss: 0.168 epoch: 0, batch: 2100 // loss: 0.169 epoch: 0, batch: 2200 // loss: 0.195 epoch: 0, batch: 2300 // loss: 0.171 epoch: 0, batch: 2400 // loss: 0.138 epoch: 0, batch: 2500 // loss: 0.140 epoch: 0, batch: 2600 // loss: 0.172 epoch: 0, batch: 2700 // loss: 0.134 epoch: 0, batch: 2800 // loss: 0.167 epoch: 0, batch: 2900 // loss: 0.124 epoch: 0, batch: 3000 // loss: 0.134 epoch: 0, batch: 3100 // loss: 0.151 epoch: 0, batch: 3200 // loss: 0.116 epoch: 0, batch: 3300 // loss: 0.126 epoch: 0, batch: 3400 // loss: 0.123 epoch: 0, batch: 3500 // loss: 0.127 epoch: 0, batch: 3600 // loss: 0.131 epoch: 0, batch: 3700 // loss: 0.140 epoch: 1, batch: 0 // loss: 0.134 epoch: 1, batch: 100 // loss: 0.127 epoch: 1, batch: 200 // loss: 0.124 epoch: 1, batch: 300 // loss: 0.114 epoch: 1, batch: 400 // loss: 0.116 epoch: 1, batch: 500 // loss: 0.109 epoch: 1, batch: 600 // loss: 0.111 epoch: 1, batch: 700 // loss: 0.115 epoch: 1, batch: 800 // loss: 0.105 epoch: 1, batch: 900 // loss: 0.125 epoch: 1, batch: 1000 // loss: 0.103 epoch: 1, batch: 1100 // loss: 0.120 epoch: 1, batch: 1200 // loss: 0.099 epoch: 1, batch: 1300 // loss: 0.116 epoch: 1, batch: 1400 // loss: 0.097 epoch: 1, batch: 1500 // loss: 0.097 epoch: 1, batch: 1600 // loss: 0.109 epoch: 1, batch: 1700 // loss: 0.105 epoch: 1, batch: 1800 // loss: 0.114 epoch: 1, batch: 1900 // loss: 0.102 epoch: 1, batch: 2000 // loss: 0.090 epoch: 1, batch: 2100 // loss: 0.097 epoch: 1, batch: 2200 // loss: 0.114 epoch: 1, batch: 2300 // loss: 0.101 epoch: 1, batch: 2400 // loss: 0.084 epoch: 1, batch: 2500 // loss: 0.083 epoch: 1, batch: 2600 // loss: 0.106 epoch: 1, batch: 2700 // loss: 0.083 epoch: 1, batch: 2800 // loss: 0.108 epoch: 1, batch: 2900 // loss: 0.080 epoch: 1, batch: 3000 // loss: 0.088 epoch: 1, batch: 3100 // loss: 0.098 epoch: 1, batch: 3200 // loss: 0.081 epoch: 1, batch: 3300 // loss: 0.086 epoch: 1, batch: 3400 // loss: 0.082 epoch: 1, batch: 3500 // loss: 0.089 epoch: 1, batch: 3600 // loss: 0.095 epoch: 1, batch: 3700 // loss: 0.101 epoch: 2, batch: 0 // loss: 0.103 epoch: 2, batch: 100 // loss: 0.091 epoch: 2, batch: 200 // loss: 0.098 epoch: 2, batch: 300 // loss: 0.089 epoch: 2, batch: 400 // loss: 0.090 epoch: 2, batch: 500 // loss: 0.083 epoch: 2, batch: 600 // loss: 0.085 epoch: 2, batch: 700 // loss: 0.089 epoch: 2, batch: 800 // loss: 0.087 epoch: 2, batch: 900 // loss: 0.101 epoch: 2, batch: 1000 // loss: 0.082 epoch: 2, batch: 1100 // loss: 0.095 epoch: 2, batch: 1200 // loss: 0.084 epoch: 2, batch: 1300 // loss: 0.096 epoch: 2, batch: 1400 // loss: 0.083 epoch: 2, batch: 1500 // loss: 0.083 epoch: 2, batch: 1600 // loss: 0.093 epoch: 2, batch: 1700 // loss: 0.093 epoch: 2, batch: 1800 // loss: 0.095 epoch: 2, batch: 1900 // loss: 0.088 epoch: 2, batch: 2000 // loss: 0.079 epoch: 2, batch: 2100 // loss: 0.088 epoch: 2, batch: 2200 // loss: 0.100 epoch: 2, batch: 2300 // loss: 0.091 epoch: 2, batch: 2400 // loss: 0.080 epoch: 2, batch: 2500 // loss: 0.076 epoch: 2, batch: 2600 // loss: 0.095 epoch: 2, batch: 2700 // loss: 0.077 epoch: 2, batch: 2800 // loss: 0.098 epoch: 2, batch: 2900 // loss: 0.076 epoch: 2, batch: 3000 // loss: 0.083 epoch: 2, batch: 3100 // loss: 0.090 epoch: 2, batch: 3200 // loss: 0.079 epoch: 2, batch: 3300 // loss: 0.080 epoch: 2, batch: 3400 // loss: 0.076 epoch: 2, batch: 3500 // loss: 0.083 epoch: 2, batch: 3600 // loss: 0.091 epoch: 2, batch: 3700 // loss: 0.094 epoch: 3, batch: 0 // loss: 0.099 epoch: 3, batch: 100 // loss: 0.085 epoch: 3, batch: 200 // loss: 0.095 epoch: 3, batch: 300 // loss: 0.086 epoch: 3, batch: 400 // loss: 0.087 epoch: 3, batch: 500 // loss: 0.079 epoch: 3, batch: 600 // loss: 0.081 epoch: 3, batch: 700 // loss: 0.085 epoch: 3, batch: 800 // loss: 0.085 epoch: 3, batch: 900 // loss: 0.096 epoch: 3, batch: 1000 // loss: 0.078 epoch: 3, batch: 1100 // loss: 0.090 epoch: 3, batch: 1200 // loss: 0.082 epoch: 3, batch: 1300 // loss: 0.092 epoch: 3, batch: 1400 // loss: 0.082 epoch: 3, batch: 1500 // loss: 0.082 epoch: 3, batch: 1600 // loss: 0.090 epoch: 3, batch: 1700 // loss: 0.091 epoch: 3, batch: 1800 // loss: 0.092 epoch: 3, batch: 1900 // loss: 0.086 epoch: 3, batch: 2000 // loss: 0.078 epoch: 3, batch: 2100 // loss: 0.088 epoch: 3, batch: 2200 // loss: 0.098 epoch: 3, batch: 2300 // loss: 0.090 epoch: 3, batch: 2400 // loss: 0.081 epoch: 3, batch: 2500 // loss: 0.076 epoch: 3, batch: 2600 // loss: 0.093 epoch: 3, batch: 2700 // loss: 0.077 epoch: 3, batch: 2800 // loss: 0.096 epoch: 3, batch: 2900 // loss: 0.076 epoch: 3, batch: 3000 // loss: 0.082 epoch: 3, batch: 3100 // loss: 0.088 epoch: 3, batch: 3200 // loss: 0.079 epoch: 3, batch: 3300 // loss: 0.079 epoch: 3, batch: 3400 // loss: 0.076 epoch: 3, batch: 3500 // loss: 0.082 epoch: 3, batch: 3600 // loss: 0.090 epoch: 3, batch: 3700 // loss: 0.092 epoch: 4, batch: 0 // loss: 0.098 epoch: 4, batch: 100 // loss: 0.084 epoch: 4, batch: 200 // loss: 0.095 epoch: 4, batch: 300 // loss: 0.086 epoch: 4, batch: 400 // loss: 0.087 epoch: 4, batch: 500 // loss: 0.078 epoch: 4, batch: 600 // loss: 0.081 epoch: 4, batch: 700 // loss: 0.084 epoch: 4, batch: 800 // loss: 0.085 epoch: 4, batch: 900 // loss: 0.096 epoch: 4, batch: 1000 // loss: 0.077 epoch: 4, batch: 1100 // loss: 0.089 epoch: 4, batch: 1200 // loss: 0.082 epoch: 4, batch: 1300 // loss: 0.091 epoch: 4, batch: 1400 // loss: 0.082 epoch: 4, batch: 1500 // loss: 0.082 epoch: 4, batch: 1600 // loss: 0.090 epoch: 4, batch: 1700 // loss: 0.091 epoch: 4, batch: 1800 // loss: 0.090 epoch: 4, batch: 1900 // loss: 0.085 epoch: 4, batch: 2000 // loss: 0.078 epoch: 4, batch: 2100 // loss: 0.088 epoch: 4, batch: 2200 // loss: 0.097 epoch: 4, batch: 2300 // loss: 0.089 epoch: 4, batch: 2400 // loss: 0.081 epoch: 4, batch: 2500 // loss: 0.076 epoch: 4, batch: 2600 // loss: 0.092 epoch: 4, batch: 2700 // loss: 0.077 epoch: 4, batch: 2800 // loss: 0.096 epoch: 4, batch: 2900 // loss: 0.076 epoch: 4, batch: 3000 // loss: 0.082 epoch: 4, batch: 3100 // loss: 0.087 epoch: 4, batch: 3200 // loss: 0.080 epoch: 4, batch: 3300 // loss: 0.079 epoch: 4, batch: 3400 // loss: 0.075 epoch: 4, batch: 3500 // loss: 0.082 epoch: 4, batch: 3600 // loss: 0.090 epoch: 4, batch: 3700 // loss: 0.092 epoch: 5, batch: 0 // loss: 0.098 epoch: 5, batch: 100 // loss: 0.083 epoch: 5, batch: 200 // loss: 0.095 epoch: 5, batch: 300 // loss: 0.086 epoch: 5, batch: 400 // loss: 0.087 epoch: 5, batch: 500 // loss: 0.078 epoch: 5, batch: 600 // loss: 0.080 epoch: 5, batch: 700 // loss: 0.084 epoch: 5, batch: 800 // loss: 0.085 epoch: 5, batch: 900 // loss: 0.095 epoch: 5, batch: 1000 // loss: 0.077 epoch: 5, batch: 1100 // loss: 0.088 epoch: 5, batch: 1200 // loss: 0.082 epoch: 5, batch: 1300 // loss: 0.091 epoch: 5, batch: 1400 // loss: 0.082 epoch: 5, batch: 1500 // loss: 0.082 epoch: 5, batch: 1600 // loss: 0.090 epoch: 5, batch: 1700 // loss: 0.091 epoch: 5, batch: 1800 // loss: 0.090 epoch: 5, batch: 1900 // loss: 0.085 epoch: 5, batch: 2000 // loss: 0.078 epoch: 5, batch: 2100 // loss: 0.088 epoch: 5, batch: 2200 // loss: 0.097 epoch: 5, batch: 2300 // loss: 0.089 epoch: 5, batch: 2400 // loss: 0.081 epoch: 5, batch: 2500 // loss: 0.076 epoch: 5, batch: 2600 // loss: 0.092 epoch: 5, batch: 2700 // loss: 0.077 epoch: 5, batch: 2800 // loss: 0.096 epoch: 5, batch: 2900 // loss: 0.076 epoch: 5, batch: 3000 // loss: 0.082 epoch: 5, batch: 3100 // loss: 0.087 epoch: 5, batch: 3200 // loss: 0.080 epoch: 5, batch: 3300 // loss: 0.079 epoch: 5, batch: 3400 // loss: 0.075 epoch: 5, batch: 3500 // loss: 0.082 epoch: 5, batch: 3600 // loss: 0.090 epoch: 5, batch: 3700 // loss: 0.092 epoch: 6, batch: 0 // loss: 0.098 epoch: 6, batch: 100 // loss: 0.083 epoch: 6, batch: 200 // loss: 0.095 epoch: 6, batch: 300 // loss: 0.086 epoch: 6, batch: 400 // loss: 0.087 epoch: 6, batch: 500 // loss: 0.078 epoch: 6, batch: 600 // loss: 0.080 epoch: 6, batch: 700 // loss: 0.083 epoch: 6, batch: 800 // loss: 0.085 epoch: 6, batch: 900 // loss: 0.095 epoch: 6, batch: 1000 // loss: 0.077 epoch: 6, batch: 1100 // loss: 0.088 epoch: 6, batch: 1200 // loss: 0.082 epoch: 6, batch: 1300 // loss: 0.091 epoch: 6, batch: 1400 // loss: 0.082 epoch: 6, batch: 1500 // loss: 0.082 epoch: 6, batch: 1600 // loss: 0.089 epoch: 6, batch: 1700 // loss: 0.091 epoch: 6, batch: 1800 // loss: 0.090 epoch: 6, batch: 1900 // loss: 0.085 epoch: 6, batch: 2000 // loss: 0.078 epoch: 6, batch: 2100 // loss: 0.088 epoch: 6, batch: 2200 // loss: 0.096 epoch: 6, batch: 2300 // loss: 0.089 epoch: 6, batch: 2400 // loss: 0.081 epoch: 6, batch: 2500 // loss: 0.076 epoch: 6, batch: 2600 // loss: 0.092 epoch: 6, batch: 2700 // loss: 0.077 epoch: 6, batch: 2800 // loss: 0.095 epoch: 6, batch: 2900 // loss: 0.076 epoch: 6, batch: 3000 // loss: 0.082 epoch: 6, batch: 3100 // loss: 0.087 epoch: 6, batch: 3200 // loss: 0.080 epoch: 6, batch: 3300 // loss: 0.079 epoch: 6, batch: 3400 // loss: 0.075 epoch: 6, batch: 3500 // loss: 0.081 epoch: 6, batch: 3600 // loss: 0.090 epoch: 6, batch: 3700 // loss: 0.092 epoch: 7, batch: 0 // loss: 0.098 epoch: 7, batch: 100 // loss: 0.083 epoch: 7, batch: 200 // loss: 0.095 epoch: 7, batch: 300 // loss: 0.086 epoch: 7, batch: 400 // loss: 0.087 epoch: 7, batch: 500 // loss: 0.078 epoch: 7, batch: 600 // loss: 0.080 epoch: 7, batch: 700 // loss: 0.083 epoch: 7, batch: 800 // loss: 0.085 epoch: 7, batch: 900 // loss: 0.095 epoch: 7, batch: 1000 // loss: 0.077 epoch: 7, batch: 1100 // loss: 0.088 epoch: 7, batch: 1200 // loss: 0.082 epoch: 7, batch: 1300 // loss: 0.090 epoch: 7, batch: 1400 // loss: 0.082 epoch: 7, batch: 1500 // loss: 0.082 epoch: 7, batch: 1600 // loss: 0.089 epoch: 7, batch: 1700 // loss: 0.090 epoch: 7, batch: 1800 // loss: 0.090 epoch: 7, batch: 1900 // loss: 0.085 epoch: 7, batch: 2000 // loss: 0.078 epoch: 7, batch: 2100 // loss: 0.088 epoch: 7, batch: 2200 // loss: 0.096 epoch: 7, batch: 2300 // loss: 0.089 epoch: 7, batch: 2400 // loss: 0.081 epoch: 7, batch: 2500 // loss: 0.076 epoch: 7, batch: 2600 // loss: 0.091 epoch: 7, batch: 2700 // loss: 0.077 epoch: 7, batch: 2800 // loss: 0.095 epoch: 7, batch: 2900 // loss: 0.076 epoch: 7, batch: 3000 // loss: 0.082 epoch: 7, batch: 3100 // loss: 0.087 epoch: 7, batch: 3200 // loss: 0.080 epoch: 7, batch: 3300 // loss: 0.079 epoch: 7, batch: 3400 // loss: 0.075 epoch: 7, batch: 3500 // loss: 0.081 epoch: 7, batch: 3600 // loss: 0.089 epoch: 7, batch: 3700 // loss: 0.091 epoch: 8, batch: 0 // loss: 0.098 epoch: 8, batch: 100 // loss: 0.082 epoch: 8, batch: 200 // loss: 0.095 epoch: 8, batch: 300 // loss: 0.086 epoch: 8, batch: 400 // loss: 0.087 epoch: 8, batch: 500 // loss: 0.078 epoch: 8, batch: 600 // loss: 0.080 epoch: 8, batch: 700 // loss: 0.083 epoch: 8, batch: 800 // loss: 0.085 epoch: 8, batch: 900 // loss: 0.094 epoch: 8, batch: 1000 // loss: 0.077 epoch: 8, batch: 1100 // loss: 0.088 epoch: 8, batch: 1200 // loss: 0.082 epoch: 8, batch: 1300 // loss: 0.090 epoch: 8, batch: 1400 // loss: 0.082 epoch: 8, batch: 1500 // loss: 0.081 epoch: 8, batch: 1600 // loss: 0.089 epoch: 8, batch: 1700 // loss: 0.090 epoch: 8, batch: 1800 // loss: 0.089 epoch: 8, batch: 1900 // loss: 0.085 epoch: 8, batch: 2000 // loss: 0.078 epoch: 8, batch: 2100 // loss: 0.087 epoch: 8, batch: 2200 // loss: 0.096 epoch: 8, batch: 2300 // loss: 0.089 epoch: 8, batch: 2400 // loss: 0.081 epoch: 8, batch: 2500 // loss: 0.076 epoch: 8, batch: 2600 // loss: 0.091 epoch: 8, batch: 2700 // loss: 0.076 epoch: 8, batch: 2800 // loss: 0.095 epoch: 8, batch: 2900 // loss: 0.076 epoch: 8, batch: 3000 // loss: 0.081 epoch: 8, batch: 3100 // loss: 0.086 epoch: 8, batch: 3200 // loss: 0.079 epoch: 8, batch: 3300 // loss: 0.079 epoch: 8, batch: 3400 // loss: 0.075 epoch: 8, batch: 3500 // loss: 0.081 epoch: 8, batch: 3600 // loss: 0.089 epoch: 8, batch: 3700 // loss: 0.091 epoch: 9, batch: 0 // loss: 0.097 epoch: 9, batch: 100 // loss: 0.082 epoch: 9, batch: 200 // loss: 0.094 epoch: 9, batch: 300 // loss: 0.085 epoch: 9, batch: 400 // loss: 0.086 epoch: 9, batch: 500 // loss: 0.077 epoch: 9, batch: 600 // loss: 0.079 epoch: 9, batch: 700 // loss: 0.083 epoch: 9, batch: 800 // loss: 0.084 epoch: 9, batch: 900 // loss: 0.094 epoch: 9, batch: 1000 // loss: 0.076 epoch: 9, batch: 1100 // loss: 0.087 epoch: 9, batch: 1200 // loss: 0.082 epoch: 9, batch: 1300 // loss: 0.090 epoch: 9, batch: 1400 // loss: 0.082 epoch: 9, batch: 1500 // loss: 0.081 epoch: 9, batch: 1600 // loss: 0.089 epoch: 9, batch: 1700 // loss: 0.090 epoch: 9, batch: 1800 // loss: 0.089 epoch: 9, batch: 1900 // loss: 0.084 epoch: 9, batch: 2000 // loss: 0.078 epoch: 9, batch: 2100 // loss: 0.087 epoch: 9, batch: 2200 // loss: 0.095 epoch: 9, batch: 2300 // loss: 0.088 epoch: 9, batch: 2400 // loss: 0.081 epoch: 9, batch: 2500 // loss: 0.075 epoch: 9, batch: 2600 // loss: 0.091 epoch: 9, batch: 2700 // loss: 0.076 epoch: 9, batch: 2800 // loss: 0.094 epoch: 9, batch: 2900 // loss: 0.075 epoch: 9, batch: 3000 // loss: 0.081 epoch: 9, batch: 3100 // loss: 0.086 epoch: 9, batch: 3200 // loss: 0.079 epoch: 9, batch: 3300 // loss: 0.078 epoch: 9, batch: 3400 // loss: 0.075 epoch: 9, batch: 3500 // loss: 0.080 epoch: 9, batch: 3600 // loss: 0.089 epoch: 9, batch: 3700 // loss: 0.091 epoch: 10, batch: 0 // loss: 0.097 epoch: 10, batch: 100 // loss: 0.082 epoch: 10, batch: 200 // loss: 0.094 epoch: 10, batch: 300 // loss: 0.085 epoch: 10, batch: 400 // loss: 0.086 epoch: 10, batch: 500 // loss: 0.077 epoch: 10, batch: 600 // loss: 0.079 epoch: 10, batch: 700 // loss: 0.082 epoch: 10, batch: 800 // loss: 0.084 epoch: 10, batch: 900 // loss: 0.094 epoch: 10, batch: 1000 // loss: 0.076 epoch: 10, batch: 1100 // loss: 0.087 epoch: 10, batch: 1200 // loss: 0.081 epoch: 10, batch: 1300 // loss: 0.089 epoch: 10, batch: 1400 // loss: 0.081 epoch: 10, batch: 1500 // loss: 0.081 epoch: 10, batch: 1600 // loss: 0.088 epoch: 10, batch: 1700 // loss: 0.089 epoch: 10, batch: 1800 // loss: 0.089 epoch: 10, batch: 1900 // loss: 0.084 epoch: 10, batch: 2000 // loss: 0.077 epoch: 10, batch: 2100 // loss: 0.087 epoch: 10, batch: 2200 // loss: 0.095 epoch: 10, batch: 2300 // loss: 0.088 epoch: 10, batch: 2400 // loss: 0.081 epoch: 10, batch: 2500 // loss: 0.075 epoch: 10, batch: 2600 // loss: 0.090 epoch: 10, batch: 2700 // loss: 0.076 epoch: 10, batch: 2800 // loss: 0.094 epoch: 10, batch: 2900 // loss: 0.075 epoch: 10, batch: 3000 // loss: 0.081 epoch: 10, batch: 3100 // loss: 0.085 epoch: 10, batch: 3200 // loss: 0.079 epoch: 10, batch: 3300 // loss: 0.078 epoch: 10, batch: 3400 // loss: 0.074 epoch: 10, batch: 3500 // loss: 0.080 epoch: 10, batch: 3600 // loss: 0.088 epoch: 10, batch: 3700 // loss: 0.090 epoch: 11, batch: 0 // loss: 0.096 epoch: 11, batch: 100 // loss: 0.081 epoch: 11, batch: 200 // loss: 0.093 epoch: 11, batch: 300 // loss: 0.085 epoch: 11, batch: 400 // loss: 0.085 epoch: 11, batch: 500 // loss: 0.076 epoch: 11, batch: 600 // loss: 0.078 epoch: 11, batch: 700 // loss: 0.082 epoch: 11, batch: 800 // loss: 0.083 epoch: 11, batch: 900 // loss: 0.093 epoch: 11, batch: 1000 // loss: 0.076 epoch: 11, batch: 1100 // loss: 0.086 epoch: 11, batch: 1200 // loss: 0.081 epoch: 11, batch: 1300 // loss: 0.088 epoch: 11, batch: 1400 // loss: 0.081 epoch: 11, batch: 1500 // loss: 0.081 epoch: 11, batch: 1600 // loss: 0.088 epoch: 11, batch: 1700 // loss: 0.088 epoch: 11, batch: 1800 // loss: 0.088 epoch: 11, batch: 1900 // loss: 0.084 epoch: 11, batch: 2000 // loss: 0.077 epoch: 11, batch: 2100 // loss: 0.086 epoch: 11, batch: 2200 // loss: 0.094 epoch: 11, batch: 2300 // loss: 0.087 epoch: 11, batch: 2400 // loss: 0.080 epoch: 11, batch: 2500 // loss: 0.075 epoch: 11, batch: 2600 // loss: 0.089 epoch: 11, batch: 2700 // loss: 0.075 epoch: 11, batch: 2800 // loss: 0.093 epoch: 11, batch: 2900 // loss: 0.075 epoch: 11, batch: 3000 // loss: 0.080 epoch: 11, batch: 3100 // loss: 0.085 epoch: 11, batch: 3200 // loss: 0.078 epoch: 11, batch: 3300 // loss: 0.077 epoch: 11, batch: 3400 // loss: 0.074 epoch: 11, batch: 3500 // loss: 0.079 epoch: 11, batch: 3600 // loss: 0.087 epoch: 11, batch: 3700 // loss: 0.089 epoch: 12, batch: 0 // loss: 0.096 epoch: 12, batch: 100 // loss: 0.081 epoch: 12, batch: 200 // loss: 0.092 epoch: 12, batch: 300 // loss: 0.084 epoch: 12, batch: 400 // loss: 0.085 epoch: 12, batch: 500 // loss: 0.076 epoch: 12, batch: 600 // loss: 0.078 epoch: 12, batch: 700 // loss: 0.081 epoch: 12, batch: 800 // loss: 0.083 epoch: 12, batch: 900 // loss: 0.092 epoch: 12, batch: 1000 // loss: 0.075 epoch: 12, batch: 1100 // loss: 0.085 epoch: 12, batch: 1200 // loss: 0.081 epoch: 12, batch: 1300 // loss: 0.088 epoch: 12, batch: 1400 // loss: 0.080 epoch: 12, batch: 1500 // loss: 0.080 epoch: 12, batch: 1600 // loss: 0.087 epoch: 12, batch: 1700 // loss: 0.087 epoch: 12, batch: 1800 // loss: 0.088 epoch: 12, batch: 1900 // loss: 0.083 epoch: 12, batch: 2000 // loss: 0.077 epoch: 12, batch: 2100 // loss: 0.086 epoch: 12, batch: 2200 // loss: 0.093 epoch: 12, batch: 2300 // loss: 0.086 epoch: 12, batch: 2400 // loss: 0.080 epoch: 12, batch: 2500 // loss: 0.074 epoch: 12, batch: 2600 // loss: 0.088 epoch: 12, batch: 2700 // loss: 0.075 epoch: 12, batch: 2800 // loss: 0.092 epoch: 12, batch: 2900 // loss: 0.074 epoch: 12, batch: 3000 // loss: 0.079 epoch: 12, batch: 3100 // loss: 0.084 epoch: 12, batch: 3200 // loss: 0.078 epoch: 12, batch: 3300 // loss: 0.077 epoch: 12, batch: 3400 // loss: 0.073 epoch: 12, batch: 3500 // loss: 0.078 epoch: 12, batch: 3600 // loss: 0.086 epoch: 12, batch: 3700 // loss: 0.089 epoch: 13, batch: 0 // loss: 0.095 epoch: 13, batch: 100 // loss: 0.080 epoch: 13, batch: 200 // loss: 0.091 epoch: 13, batch: 300 // loss: 0.084 epoch: 13, batch: 400 // loss: 0.084 epoch: 13, batch: 500 // loss: 0.075 epoch: 13, batch: 600 // loss: 0.077 epoch: 13, batch: 700 // loss: 0.080 epoch: 13, batch: 800 // loss: 0.082 epoch: 13, batch: 900 // loss: 0.091 epoch: 13, batch: 1000 // loss: 0.075 epoch: 13, batch: 1100 // loss: 0.084 epoch: 13, batch: 1200 // loss: 0.080 epoch: 13, batch: 1300 // loss: 0.087 epoch: 13, batch: 1400 // loss: 0.079 epoch: 13, batch: 1500 // loss: 0.080 epoch: 13, batch: 1600 // loss: 0.087 epoch: 13, batch: 1700 // loss: 0.086 epoch: 13, batch: 1800 // loss: 0.087 epoch: 13, batch: 1900 // loss: 0.082 epoch: 13, batch: 2000 // loss: 0.076 epoch: 13, batch: 2100 // loss: 0.085 epoch: 13, batch: 2200 // loss: 0.092 epoch: 13, batch: 2300 // loss: 0.085 epoch: 13, batch: 2400 // loss: 0.079 epoch: 13, batch: 2500 // loss: 0.073 epoch: 13, batch: 2600 // loss: 0.087 epoch: 13, batch: 2700 // loss: 0.074 epoch: 13, batch: 2800 // loss: 0.091 epoch: 13, batch: 2900 // loss: 0.073 epoch: 13, batch: 3000 // loss: 0.078 epoch: 13, batch: 3100 // loss: 0.083 epoch: 13, batch: 3200 // loss: 0.077 epoch: 13, batch: 3300 // loss: 0.076 epoch: 13, batch: 3400 // loss: 0.073 epoch: 13, batch: 3500 // loss: 0.077 epoch: 13, batch: 3600 // loss: 0.085 epoch: 13, batch: 3700 // loss: 0.088 epoch: 14, batch: 0 // loss: 0.094 epoch: 14, batch: 100 // loss: 0.079 epoch: 14, batch: 200 // loss: 0.090 epoch: 14, batch: 300 // loss: 0.083 epoch: 14, batch: 400 // loss: 0.083 epoch: 14, batch: 500 // loss: 0.074 epoch: 14, batch: 600 // loss: 0.076 epoch: 14, batch: 700 // loss: 0.079 epoch: 14, batch: 800 // loss: 0.081 epoch: 14, batch: 900 // loss: 0.090 epoch: 14, batch: 1000 // loss: 0.074 epoch: 14, batch: 1100 // loss: 0.083 epoch: 14, batch: 1200 // loss: 0.079 epoch: 14, batch: 1300 // loss: 0.085 epoch: 14, batch: 1400 // loss: 0.079 epoch: 14, batch: 1500 // loss: 0.079 epoch: 14, batch: 1600 // loss: 0.086 epoch: 14, batch: 1700 // loss: 0.085 epoch: 14, batch: 1800 // loss: 0.086 epoch: 14, batch: 1900 // loss: 0.081 epoch: 14, batch: 2000 // loss: 0.076 epoch: 14, batch: 2100 // loss: 0.084 epoch: 14, batch: 2200 // loss: 0.091 epoch: 14, batch: 2300 // loss: 0.084 epoch: 14, batch: 2400 // loss: 0.078 epoch: 14, batch: 2500 // loss: 0.073 epoch: 14, batch: 2600 // loss: 0.086 epoch: 14, batch: 2700 // loss: 0.073 epoch: 14, batch: 2800 // loss: 0.090 epoch: 14, batch: 2900 // loss: 0.073 epoch: 14, batch: 3000 // loss: 0.077 epoch: 14, batch: 3100 // loss: 0.082 epoch: 14, batch: 3200 // loss: 0.076 epoch: 14, batch: 3300 // loss: 0.075 epoch: 14, batch: 3400 // loss: 0.072 epoch: 14, batch: 3500 // loss: 0.075 epoch: 14, batch: 3600 // loss: 0.084 epoch: 14, batch: 3700 // loss: 0.086 epoch: 15, batch: 0 // loss: 0.092 epoch: 15, batch: 100 // loss: 0.078 epoch: 15, batch: 200 // loss: 0.088 epoch: 15, batch: 300 // loss: 0.082 epoch: 15, batch: 400 // loss: 0.082 epoch: 15, batch: 500 // loss: 0.073 epoch: 15, batch: 600 // loss: 0.074 epoch: 15, batch: 700 // loss: 0.078 epoch: 15, batch: 800 // loss: 0.080 epoch: 15, batch: 900 // loss: 0.089 epoch: 15, batch: 1000 // loss: 0.073 epoch: 15, batch: 1100 // loss: 0.081 epoch: 15, batch: 1200 // loss: 0.079 epoch: 15, batch: 1300 // loss: 0.084 epoch: 15, batch: 1400 // loss: 0.077 epoch: 15, batch: 1500 // loss: 0.079 epoch: 15, batch: 1600 // loss: 0.085 epoch: 15, batch: 1700 // loss: 0.083 epoch: 15, batch: 1800 // loss: 0.085 epoch: 15, batch: 1900 // loss: 0.080 epoch: 15, batch: 2000 // loss: 0.075 epoch: 15, batch: 2100 // loss: 0.083 epoch: 15, batch: 2200 // loss: 0.089 epoch: 15, batch: 2300 // loss: 0.083 epoch: 15, batch: 2400 // loss: 0.077 epoch: 15, batch: 2500 // loss: 0.072 epoch: 15, batch: 2600 // loss: 0.084 epoch: 15, batch: 2700 // loss: 0.072 epoch: 15, batch: 2800 // loss: 0.089 epoch: 15, batch: 2900 // loss: 0.072 epoch: 15, batch: 3000 // loss: 0.076 epoch: 15, batch: 3100 // loss: 0.081 epoch: 15, batch: 3200 // loss: 0.075 epoch: 15, batch: 3300 // loss: 0.073 epoch: 15, batch: 3400 // loss: 0.071 epoch: 15, batch: 3500 // loss: 0.074 epoch: 15, batch: 3600 // loss: 0.082 epoch: 15, batch: 3700 // loss: 0.085 epoch: 16, batch: 0 // loss: 0.091 epoch: 16, batch: 100 // loss: 0.077 epoch: 16, batch: 200 // loss: 0.086 epoch: 16, batch: 300 // loss: 0.081 epoch: 16, batch: 400 // loss: 0.081 epoch: 16, batch: 500 // loss: 0.072 epoch: 16, batch: 600 // loss: 0.073 epoch: 16, batch: 700 // loss: 0.077 epoch: 16, batch: 800 // loss: 0.078 epoch: 16, batch: 900 // loss: 0.087 epoch: 16, batch: 1000 // loss: 0.072 epoch: 16, batch: 1100 // loss: 0.080 epoch: 16, batch: 1200 // loss: 0.078 epoch: 16, batch: 1300 // loss: 0.083 epoch: 16, batch: 1400 // loss: 0.076 epoch: 16, batch: 1500 // loss: 0.078 epoch: 16, batch: 1600 // loss: 0.083 epoch: 16, batch: 1700 // loss: 0.081 epoch: 16, batch: 1800 // loss: 0.084 epoch: 16, batch: 1900 // loss: 0.079 epoch: 16, batch: 2000 // loss: 0.074 epoch: 16, batch: 2100 // loss: 0.082 epoch: 16, batch: 2200 // loss: 0.087 epoch: 16, batch: 2300 // loss: 0.082 epoch: 16, batch: 2400 // loss: 0.076 epoch: 16, batch: 2500 // loss: 0.071 epoch: 16, batch: 2600 // loss: 0.082 epoch: 16, batch: 2700 // loss: 0.071 epoch: 16, batch: 2800 // loss: 0.087 epoch: 16, batch: 2900 // loss: 0.071 epoch: 16, batch: 3000 // loss: 0.074 epoch: 16, batch: 3100 // loss: 0.079 epoch: 16, batch: 3200 // loss: 0.074 epoch: 16, batch: 3300 // loss: 0.072 epoch: 16, batch: 3400 // loss: 0.070 epoch: 16, batch: 3500 // loss: 0.072 epoch: 16, batch: 3600 // loss: 0.080 epoch: 16, batch: 3700 // loss: 0.083 epoch: 17, batch: 0 // loss: 0.089 epoch: 17, batch: 100 // loss: 0.076 epoch: 17, batch: 200 // loss: 0.084 epoch: 17, batch: 300 // loss: 0.079 epoch: 17, batch: 400 // loss: 0.079 epoch: 17, batch: 500 // loss: 0.070 epoch: 17, batch: 600 // loss: 0.071 epoch: 17, batch: 700 // loss: 0.075 epoch: 17, batch: 800 // loss: 0.077 epoch: 17, batch: 900 // loss: 0.086 epoch: 17, batch: 1000 // loss: 0.071 epoch: 17, batch: 1100 // loss: 0.078 epoch: 17, batch: 1200 // loss: 0.076 epoch: 17, batch: 1300 // loss: 0.081 epoch: 17, batch: 1400 // loss: 0.075 epoch: 17, batch: 1500 // loss: 0.077 epoch: 17, batch: 1600 // loss: 0.082 epoch: 17, batch: 1700 // loss: 0.079 epoch: 17, batch: 1800 // loss: 0.082 epoch: 17, batch: 1900 // loss: 0.078 epoch: 17, batch: 2000 // loss: 0.073 epoch: 17, batch: 2100 // loss: 0.080 epoch: 17, batch: 2200 // loss: 0.085 epoch: 17, batch: 2300 // loss: 0.080 epoch: 17, batch: 2400 // loss: 0.074 epoch: 17, batch: 2500 // loss: 0.069 epoch: 17, batch: 2600 // loss: 0.080 epoch: 17, batch: 2700 // loss: 0.069 epoch: 17, batch: 2800 // loss: 0.085 epoch: 17, batch: 2900 // loss: 0.069 epoch: 17, batch: 3000 // loss: 0.072 epoch: 17, batch: 3100 // loss: 0.077 epoch: 17, batch: 3200 // loss: 0.072 epoch: 17, batch: 3300 // loss: 0.070 epoch: 17, batch: 3400 // loss: 0.069 epoch: 17, batch: 3500 // loss: 0.070 epoch: 17, batch: 3600 // loss: 0.078 epoch: 17, batch: 3700 // loss: 0.082 epoch: 18, batch: 0 // loss: 0.087 epoch: 18, batch: 100 // loss: 0.075 epoch: 18, batch: 200 // loss: 0.081 epoch: 18, batch: 300 // loss: 0.078 epoch: 18, batch: 400 // loss: 0.078 epoch: 18, batch: 500 // loss: 0.069 epoch: 18, batch: 600 // loss: 0.069 epoch: 18, batch: 700 // loss: 0.073 epoch: 18, batch: 800 // loss: 0.075 epoch: 18, batch: 900 // loss: 0.084 epoch: 18, batch: 1000 // loss: 0.070 epoch: 18, batch: 1100 // loss: 0.075 epoch: 18, batch: 1200 // loss: 0.075 epoch: 18, batch: 1300 // loss: 0.079 epoch: 18, batch: 1400 // loss: 0.073 epoch: 18, batch: 1500 // loss: 0.076 epoch: 18, batch: 1600 // loss: 0.080 epoch: 18, batch: 1700 // loss: 0.076 epoch: 18, batch: 1800 // loss: 0.081 epoch: 18, batch: 1900 // loss: 0.076 epoch: 18, batch: 2000 // loss: 0.072 epoch: 18, batch: 2100 // loss: 0.079 epoch: 18, batch: 2200 // loss: 0.083 epoch: 18, batch: 2300 // loss: 0.078 epoch: 18, batch: 2400 // loss: 0.073 epoch: 18, batch: 2500 // loss: 0.068 epoch: 18, batch: 2600 // loss: 0.078 epoch: 18, batch: 2700 // loss: 0.068 epoch: 18, batch: 2800 // loss: 0.083 epoch: 18, batch: 2900 // loss: 0.068 epoch: 18, batch: 3000 // loss: 0.070 epoch: 18, batch: 3100 // loss: 0.076 epoch: 18, batch: 3200 // loss: 0.071 epoch: 18, batch: 3300 // loss: 0.069 epoch: 18, batch: 3400 // loss: 0.068 epoch: 18, batch: 3500 // loss: 0.068 epoch: 18, batch: 3600 // loss: 0.076 epoch: 18, batch: 3700 // loss: 0.080 epoch: 19, batch: 0 // loss: 0.085 epoch: 19, batch: 100 // loss: 0.073 epoch: 19, batch: 200 // loss: 0.078 epoch: 19, batch: 300 // loss: 0.076 epoch: 19, batch: 400 // loss: 0.076 epoch: 19, batch: 500 // loss: 0.067 epoch: 19, batch: 600 // loss: 0.067 epoch: 19, batch: 700 // loss: 0.071 epoch: 19, batch: 800 // loss: 0.073 epoch: 19, batch: 900 // loss: 0.082 epoch: 19, batch: 1000 // loss: 0.068 epoch: 19, batch: 1100 // loss: 0.073 epoch: 19, batch: 1200 // loss: 0.074 epoch: 19, batch: 1300 // loss: 0.077 epoch: 19, batch: 1400 // loss: 0.071 epoch: 19, batch: 1500 // loss: 0.074 epoch: 19, batch: 1600 // loss: 0.078 epoch: 19, batch: 1700 // loss: 0.074 epoch: 19, batch: 1800 // loss: 0.079 epoch: 19, batch: 1900 // loss: 0.075 epoch: 19, batch: 2000 // loss: 0.071 epoch: 19, batch: 2100 // loss: 0.077 epoch: 19, batch: 2200 // loss: 0.081 epoch: 19, batch: 2300 // loss: 0.076 epoch: 19, batch: 2400 // loss: 0.071 epoch: 19, batch: 2500 // loss: 0.067 epoch: 19, batch: 2600 // loss: 0.075 epoch: 19, batch: 2700 // loss: 0.067 epoch: 19, batch: 2800 // loss: 0.081 epoch: 19, batch: 2900 // loss: 0.067 epoch: 19, batch: 3000 // loss: 0.068 epoch: 19, batch: 3100 // loss: 0.074 epoch: 19, batch: 3200 // loss: 0.069 epoch: 19, batch: 3300 // loss: 0.067 epoch: 19, batch: 3400 // loss: 0.067 epoch: 19, batch: 3500 // loss: 0.065 epoch: 19, batch: 3600 // loss: 0.073 epoch: 19, batch: 3700 // loss: 0.077 epoch: 20, batch: 0 // loss: 0.083 epoch: 20, batch: 100 // loss: 0.072 epoch: 20, batch: 200 // loss: 0.076 epoch: 20, batch: 300 // loss: 0.075 epoch: 20, batch: 400 // loss: 0.074 epoch: 20, batch: 500 // loss: 0.065 epoch: 20, batch: 600 // loss: 0.065 epoch: 20, batch: 700 // loss: 0.069 epoch: 20, batch: 800 // loss: 0.071 epoch: 20, batch: 900 // loss: 0.080 epoch: 20, batch: 1000 // loss: 0.067 epoch: 20, batch: 1100 // loss: 0.071 epoch: 20, batch: 1200 // loss: 0.072 epoch: 20, batch: 1300 // loss: 0.075 epoch: 20, batch: 1400 // loss: 0.069 epoch: 20, batch: 1500 // loss: 0.073 epoch: 20, batch: 1600 // loss: 0.077 epoch: 20, batch: 1700 // loss: 0.071 epoch: 20, batch: 1800 // loss: 0.077 epoch: 20, batch: 1900 // loss: 0.073 epoch: 20, batch: 2000 // loss: 0.070 epoch: 20, batch: 2100 // loss: 0.075 epoch: 20, batch: 2200 // loss: 0.079 epoch: 20, batch: 2300 // loss: 0.074 epoch: 20, batch: 2400 // loss: 0.069 epoch: 20, batch: 2500 // loss: 0.065 epoch: 20, batch: 2600 // loss: 0.073 epoch: 20, batch: 2700 // loss: 0.065 epoch: 20, batch: 2800 // loss: 0.079 epoch: 20, batch: 2900 // loss: 0.065 epoch: 20, batch: 3000 // loss: 0.066 epoch: 20, batch: 3100 // loss: 0.072 epoch: 20, batch: 3200 // loss: 0.067 epoch: 20, batch: 3300 // loss: 0.065 epoch: 20, batch: 3400 // loss: 0.066 epoch: 20, batch: 3500 // loss: 0.063 epoch: 20, batch: 3600 // loss: 0.071 epoch: 20, batch: 3700 // loss: 0.075 epoch: 21, batch: 0 // loss: 0.081 epoch: 21, batch: 100 // loss: 0.071 epoch: 21, batch: 200 // loss: 0.073 epoch: 21, batch: 300 // loss: 0.073 epoch: 21, batch: 400 // loss: 0.072 epoch: 21, batch: 500 // loss: 0.063 epoch: 21, batch: 600 // loss: 0.063 epoch: 21, batch: 700 // loss: 0.067 epoch: 21, batch: 800 // loss: 0.069 epoch: 21, batch: 900 // loss: 0.078 epoch: 21, batch: 1000 // loss: 0.065 epoch: 21, batch: 1100 // loss: 0.068 epoch: 21, batch: 1200 // loss: 0.071 epoch: 21, batch: 1300 // loss: 0.074 epoch: 21, batch: 1400 // loss: 0.068 epoch: 21, batch: 1500 // loss: 0.072 epoch: 21, batch: 1600 // loss: 0.075 epoch: 21, batch: 1700 // loss: 0.069 epoch: 21, batch: 1800 // loss: 0.076 epoch: 21, batch: 1900 // loss: 0.071 epoch: 21, batch: 2000 // loss: 0.069 epoch: 21, batch: 2100 // loss: 0.074 epoch: 21, batch: 2200 // loss: 0.077 epoch: 21, batch: 2300 // loss: 0.072 epoch: 21, batch: 2400 // loss: 0.067 epoch: 21, batch: 2500 // loss: 0.064 epoch: 21, batch: 2600 // loss: 0.070 epoch: 21, batch: 2700 // loss: 0.064 epoch: 21, batch: 2800 // loss: 0.077 epoch: 21, batch: 2900 // loss: 0.064 epoch: 21, batch: 3000 // loss: 0.065 epoch: 21, batch: 3100 // loss: 0.070 epoch: 21, batch: 3200 // loss: 0.065 epoch: 21, batch: 3300 // loss: 0.063 epoch: 21, batch: 3400 // loss: 0.064 epoch: 21, batch: 3500 // loss: 0.061 epoch: 21, batch: 3600 // loss: 0.068 epoch: 21, batch: 3700 // loss: 0.073 epoch: 22, batch: 0 // loss: 0.079 epoch: 22, batch: 100 // loss: 0.070 epoch: 22, batch: 200 // loss: 0.070 epoch: 22, batch: 300 // loss: 0.072 epoch: 22, batch: 400 // loss: 0.071 epoch: 22, batch: 500 // loss: 0.061 epoch: 22, batch: 600 // loss: 0.061 epoch: 22, batch: 700 // loss: 0.065 epoch: 22, batch: 800 // loss: 0.068 epoch: 22, batch: 900 // loss: 0.076 epoch: 22, batch: 1000 // loss: 0.064 epoch: 22, batch: 1100 // loss: 0.066 epoch: 22, batch: 1200 // loss: 0.069 epoch: 22, batch: 1300 // loss: 0.072 epoch: 22, batch: 1400 // loss: 0.066 epoch: 22, batch: 1500 // loss: 0.071 epoch: 22, batch: 1600 // loss: 0.073 epoch: 22, batch: 1700 // loss: 0.067 epoch: 22, batch: 1800 // loss: 0.074 epoch: 22, batch: 1900 // loss: 0.070 epoch: 22, batch: 2000 // loss: 0.069 epoch: 22, batch: 2100 // loss: 0.072 epoch: 22, batch: 2200 // loss: 0.075 epoch: 22, batch: 2300 // loss: 0.071 epoch: 22, batch: 2400 // loss: 0.066 epoch: 22, batch: 2500 // loss: 0.063 epoch: 22, batch: 2600 // loss: 0.068 epoch: 22, batch: 2700 // loss: 0.062 epoch: 22, batch: 2800 // loss: 0.076 epoch: 22, batch: 2900 // loss: 0.062 epoch: 22, batch: 3000 // loss: 0.063 epoch: 22, batch: 3100 // loss: 0.068 epoch: 22, batch: 3200 // loss: 0.064 epoch: 22, batch: 3300 // loss: 0.061 epoch: 22, batch: 3400 // loss: 0.063 epoch: 22, batch: 3500 // loss: 0.059 epoch: 22, batch: 3600 // loss: 0.066 epoch: 22, batch: 3700 // loss: 0.071 epoch: 23, batch: 0 // loss: 0.077 epoch: 23, batch: 100 // loss: 0.069 epoch: 23, batch: 200 // loss: 0.068 epoch: 23, batch: 300 // loss: 0.070 epoch: 23, batch: 400 // loss: 0.069 epoch: 23, batch: 500 // loss: 0.059 epoch: 23, batch: 600 // loss: 0.059 epoch: 23, batch: 700 // loss: 0.063 epoch: 23, batch: 800 // loss: 0.066 epoch: 23, batch: 900 // loss: 0.074 epoch: 23, batch: 1000 // loss: 0.063 epoch: 23, batch: 1100 // loss: 0.064 epoch: 23, batch: 1200 // loss: 0.068 epoch: 23, batch: 1300 // loss: 0.070 epoch: 23, batch: 1400 // loss: 0.064 epoch: 23, batch: 1500 // loss: 0.069 epoch: 23, batch: 1600 // loss: 0.072 epoch: 23, batch: 1700 // loss: 0.065 epoch: 23, batch: 1800 // loss: 0.073 epoch: 23, batch: 1900 // loss: 0.069 epoch: 23, batch: 2000 // loss: 0.068 epoch: 23, batch: 2100 // loss: 0.071 epoch: 23, batch: 2200 // loss: 0.073 epoch: 23, batch: 2300 // loss: 0.069 epoch: 23, batch: 2400 // loss: 0.064 epoch: 23, batch: 2500 // loss: 0.061 epoch: 23, batch: 2600 // loss: 0.066 epoch: 23, batch: 2700 // loss: 0.061 epoch: 23, batch: 2800 // loss: 0.074 epoch: 23, batch: 2900 // loss: 0.061 epoch: 23, batch: 3000 // loss: 0.061 epoch: 23, batch: 3100 // loss: 0.067 epoch: 23, batch: 3200 // loss: 0.062 epoch: 23, batch: 3300 // loss: 0.060 epoch: 23, batch: 3400 // loss: 0.062 epoch: 23, batch: 3500 // loss: 0.057 epoch: 23, batch: 3600 // loss: 0.064 epoch: 23, batch: 3700 // loss: 0.070 epoch: 24, batch: 0 // loss: 0.076 epoch: 24, batch: 100 // loss: 0.068 epoch: 24, batch: 200 // loss: 0.065 epoch: 24, batch: 300 // loss: 0.069 epoch: 24, batch: 400 // loss: 0.068 epoch: 24, batch: 500 // loss: 0.058 epoch: 24, batch: 600 // loss: 0.058 epoch: 24, batch: 700 // loss: 0.062 epoch: 24, batch: 800 // loss: 0.065 epoch: 24, batch: 900 // loss: 0.072 epoch: 24, batch: 1000 // loss: 0.061 epoch: 24, batch: 1100 // loss: 0.062 epoch: 24, batch: 1200 // loss: 0.067 epoch: 24, batch: 1300 // loss: 0.069 epoch: 24, batch: 1400 // loss: 0.063 epoch: 24, batch: 1500 // loss: 0.068 epoch: 24, batch: 1600 // loss: 0.070 epoch: 24, batch: 1700 // loss: 0.063 epoch: 24, batch: 1800 // loss: 0.071 epoch: 24, batch: 1900 // loss: 0.067 epoch: 24, batch: 2000 // loss: 0.067 epoch: 24, batch: 2100 // loss: 0.070 epoch: 24, batch: 2200 // loss: 0.072 epoch: 24, batch: 2300 // loss: 0.068 epoch: 24, batch: 2400 // loss: 0.062 epoch: 24, batch: 2500 // loss: 0.060 epoch: 24, batch: 2600 // loss: 0.065 epoch: 24, batch: 2700 // loss: 0.060 epoch: 24, batch: 2800 // loss: 0.072 epoch: 24, batch: 2900 // loss: 0.060 epoch: 24, batch: 3000 // loss: 0.060 epoch: 24, batch: 3100 // loss: 0.065 epoch: 24, batch: 3200 // loss: 0.061 epoch: 24, batch: 3300 // loss: 0.059 epoch: 24, batch: 3400 // loss: 0.061 epoch: 24, batch: 3500 // loss: 0.056 epoch: 24, batch: 3600 // loss: 0.062 epoch: 24, batch: 3700 // loss: 0.068 epoch: 25, batch: 0 // loss: 0.075 epoch: 25, batch: 100 // loss: 0.067 epoch: 25, batch: 200 // loss: 0.063 epoch: 25, batch: 300 // loss: 0.068 epoch: 25, batch: 400 // loss: 0.067 epoch: 25, batch: 500 // loss: 0.057 epoch: 25, batch: 600 // loss: 0.057 epoch: 25, batch: 700 // loss: 0.060 epoch: 25, batch: 800 // loss: 0.063 epoch: 25, batch: 900 // loss: 0.071 epoch: 25, batch: 1000 // loss: 0.060 epoch: 25, batch: 1100 // loss: 0.061 epoch: 25, batch: 1200 // loss: 0.066 epoch: 25, batch: 1300 // loss: 0.068 epoch: 25, batch: 1400 // loss: 0.061 epoch: 25, batch: 1500 // loss: 0.068 epoch: 25, batch: 1600 // loss: 0.069 epoch: 25, batch: 1700 // loss: 0.061 epoch: 25, batch: 1800 // loss: 0.070 epoch: 25, batch: 1900 // loss: 0.066 epoch: 25, batch: 2000 // loss: 0.066 epoch: 25, batch: 2100 // loss: 0.069 epoch: 25, batch: 2200 // loss: 0.071 epoch: 25, batch: 2300 // loss: 0.067 epoch: 25, batch: 2400 // loss: 0.061 epoch: 25, batch: 2500 // loss: 0.060 epoch: 25, batch: 2600 // loss: 0.063 epoch: 25, batch: 2700 // loss: 0.059 epoch: 25, batch: 2800 // loss: 0.071 epoch: 25, batch: 2900 // loss: 0.059 epoch: 25, batch: 3000 // loss: 0.059 epoch: 25, batch: 3100 // loss: 0.064 epoch: 25, batch: 3200 // loss: 0.059 epoch: 25, batch: 3300 // loss: 0.057 epoch: 25, batch: 3400 // loss: 0.061 epoch: 25, batch: 3500 // loss: 0.055 epoch: 25, batch: 3600 // loss: 0.060 epoch: 25, batch: 3700 // loss: 0.067 epoch: 26, batch: 0 // loss: 0.073 epoch: 26, batch: 100 // loss: 0.067 epoch: 26, batch: 200 // loss: 0.062 epoch: 26, batch: 300 // loss: 0.067 epoch: 26, batch: 400 // loss: 0.066 epoch: 26, batch: 500 // loss: 0.055 epoch: 26, batch: 600 // loss: 0.055 epoch: 26, batch: 700 // loss: 0.059 epoch: 26, batch: 800 // loss: 0.062 epoch: 26, batch: 900 // loss: 0.070 epoch: 26, batch: 1000 // loss: 0.060 epoch: 26, batch: 1100 // loss: 0.060 epoch: 26, batch: 1200 // loss: 0.065 epoch: 26, batch: 1300 // loss: 0.067 epoch: 26, batch: 1400 // loss: 0.060 epoch: 26, batch: 1500 // loss: 0.067 epoch: 26, batch: 1600 // loss: 0.068 epoch: 26, batch: 1700 // loss: 0.060 epoch: 26, batch: 1800 // loss: 0.069 epoch: 26, batch: 1900 // loss: 0.065 epoch: 26, batch: 2000 // loss: 0.066 epoch: 26, batch: 2100 // loss: 0.068 epoch: 26, batch: 2200 // loss: 0.070 epoch: 26, batch: 2300 // loss: 0.066 epoch: 26, batch: 2400 // loss: 0.060 epoch: 26, batch: 2500 // loss: 0.059 epoch: 26, batch: 2600 // loss: 0.062 epoch: 26, batch: 2700 // loss: 0.058 epoch: 26, batch: 2800 // loss: 0.070 epoch: 26, batch: 2900 // loss: 0.058 epoch: 26, batch: 3000 // loss: 0.058 epoch: 26, batch: 3100 // loss: 0.063 epoch: 26, batch: 3200 // loss: 0.058 epoch: 26, batch: 3300 // loss: 0.056 epoch: 26, batch: 3400 // loss: 0.060 epoch: 26, batch: 3500 // loss: 0.053 epoch: 26, batch: 3600 // loss: 0.059 epoch: 26, batch: 3700 // loss: 0.066 epoch: 27, batch: 0 // loss: 0.072 epoch: 27, batch: 100 // loss: 0.066 epoch: 27, batch: 200 // loss: 0.060 epoch: 27, batch: 300 // loss: 0.066 epoch: 27, batch: 400 // loss: 0.065 epoch: 27, batch: 500 // loss: 0.054 epoch: 27, batch: 600 // loss: 0.054 epoch: 27, batch: 700 // loss: 0.058 epoch: 27, batch: 800 // loss: 0.062 epoch: 27, batch: 900 // loss: 0.069 epoch: 27, batch: 1000 // loss: 0.059 epoch: 27, batch: 1100 // loss: 0.058 epoch: 27, batch: 1200 // loss: 0.064 epoch: 27, batch: 1300 // loss: 0.067 epoch: 27, batch: 1400 // loss: 0.059 epoch: 27, batch: 1500 // loss: 0.066 epoch: 27, batch: 1600 // loss: 0.067 epoch: 27, batch: 1700 // loss: 0.059 epoch: 27, batch: 1800 // loss: 0.069 epoch: 27, batch: 1900 // loss: 0.065 epoch: 27, batch: 2000 // loss: 0.065 epoch: 27, batch: 2100 // loss: 0.067 epoch: 27, batch: 2200 // loss: 0.069 epoch: 27, batch: 2300 // loss: 0.065 epoch: 27, batch: 2400 // loss: 0.059 epoch: 27, batch: 2500 // loss: 0.058 epoch: 27, batch: 2600 // loss: 0.061 epoch: 27, batch: 2700 // loss: 0.058 epoch: 27, batch: 2800 // loss: 0.069 epoch: 27, batch: 2900 // loss: 0.058 epoch: 27, batch: 3000 // loss: 0.057 epoch: 27, batch: 3100 // loss: 0.062 epoch: 27, batch: 3200 // loss: 0.057 epoch: 27, batch: 3300 // loss: 0.056 epoch: 27, batch: 3400 // loss: 0.059 epoch: 27, batch: 3500 // loss: 0.053 epoch: 27, batch: 3600 // loss: 0.058 epoch: 27, batch: 3700 // loss: 0.065 epoch: 28, batch: 0 // loss: 0.071 epoch: 28, batch: 100 // loss: 0.066 epoch: 28, batch: 200 // loss: 0.059 epoch: 28, batch: 300 // loss: 0.066 epoch: 28, batch: 400 // loss: 0.064 epoch: 28, batch: 500 // loss: 0.054 epoch: 28, batch: 600 // loss: 0.054 epoch: 28, batch: 700 // loss: 0.057 epoch: 28, batch: 800 // loss: 0.061 epoch: 28, batch: 900 // loss: 0.068 epoch: 28, batch: 1000 // loss: 0.058 epoch: 28, batch: 1100 // loss: 0.058 epoch: 28, batch: 1200 // loss: 0.063 epoch: 28, batch: 1300 // loss: 0.066 epoch: 28, batch: 1400 // loss: 0.059 epoch: 28, batch: 1500 // loss: 0.065 epoch: 28, batch: 1600 // loss: 0.066 epoch: 28, batch: 1700 // loss: 0.059 epoch: 28, batch: 1800 // loss: 0.068 epoch: 28, batch: 1900 // loss: 0.064 epoch: 28, batch: 2000 // loss: 0.065 epoch: 28, batch: 2100 // loss: 0.066 epoch: 28, batch: 2200 // loss: 0.068 epoch: 28, batch: 2300 // loss: 0.065 epoch: 28, batch: 2400 // loss: 0.058 epoch: 28, batch: 2500 // loss: 0.058 epoch: 28, batch: 2600 // loss: 0.060 epoch: 28, batch: 2700 // loss: 0.057 epoch: 28, batch: 2800 // loss: 0.068 epoch: 28, batch: 2900 // loss: 0.057 epoch: 28, batch: 3000 // loss: 0.056 epoch: 28, batch: 3100 // loss: 0.061 epoch: 28, batch: 3200 // loss: 0.056 epoch: 28, batch: 3300 // loss: 0.055 epoch: 28, batch: 3400 // loss: 0.059 epoch: 28, batch: 3500 // loss: 0.052 epoch: 28, batch: 3600 // loss: 0.057 epoch: 28, batch: 3700 // loss: 0.064 epoch: 29, batch: 0 // loss: 0.071 epoch: 29, batch: 100 // loss: 0.065 epoch: 29, batch: 200 // loss: 0.058 epoch: 29, batch: 300 // loss: 0.065 epoch: 29, batch: 400 // loss: 0.063 epoch: 29, batch: 500 // loss: 0.053 epoch: 29, batch: 600 // loss: 0.053 epoch: 29, batch: 700 // loss: 0.056 epoch: 29, batch: 800 // loss: 0.060 epoch: 29, batch: 900 // loss: 0.068 epoch: 29, batch: 1000 // loss: 0.058 epoch: 29, batch: 1100 // loss: 0.057 epoch: 29, batch: 1200 // loss: 0.063 epoch: 29, batch: 1300 // loss: 0.066 epoch: 29, batch: 1400 // loss: 0.058 epoch: 29, batch: 1500 // loss: 0.065 epoch: 29, batch: 1600 // loss: 0.066 epoch: 29, batch: 1700 // loss: 0.058 epoch: 29, batch: 1800 // loss: 0.067 epoch: 29, batch: 1900 // loss: 0.063 epoch: 29, batch: 2000 // loss: 0.064 epoch: 29, batch: 2100 // loss: 0.066 epoch: 29, batch: 2200 // loss: 0.068 epoch: 29, batch: 2300 // loss: 0.064 epoch: 29, batch: 2400 // loss: 0.057 epoch: 29, batch: 2500 // loss: 0.057 epoch: 29, batch: 2600 // loss: 0.059 epoch: 29, batch: 2700 // loss: 0.057 epoch: 29, batch: 2800 // loss: 0.068 epoch: 29, batch: 2900 // loss: 0.057 epoch: 29, batch: 3000 // loss: 0.056 epoch: 29, batch: 3100 // loss: 0.061 epoch: 29, batch: 3200 // loss: 0.056 epoch: 29, batch: 3300 // loss: 0.054 epoch: 29, batch: 3400 // loss: 0.059 epoch: 29, batch: 3500 // loss: 0.051 epoch: 29, batch: 3600 // loss: 0.056 epoch: 29, batch: 3700 // loss: 0.063 epoch: 30, batch: 0 // loss: 0.070 epoch: 30, batch: 100 // loss: 0.065 epoch: 30, batch: 200 // loss: 0.058 epoch: 30, batch: 300 // loss: 0.065 epoch: 30, batch: 400 // loss: 0.063 epoch: 30, batch: 500 // loss: 0.052 epoch: 30, batch: 600 // loss: 0.052 epoch: 30, batch: 700 // loss: 0.056 epoch: 30, batch: 800 // loss: 0.060 epoch: 30, batch: 900 // loss: 0.067 epoch: 30, batch: 1000 // loss: 0.057 epoch: 30, batch: 1100 // loss: 0.056 epoch: 30, batch: 1200 // loss: 0.062 epoch: 30, batch: 1300 // loss: 0.065 epoch: 30, batch: 1400 // loss: 0.057 epoch: 30, batch: 1500 // loss: 0.064 epoch: 30, batch: 1600 // loss: 0.065 epoch: 30, batch: 1700 // loss: 0.058 epoch: 30, batch: 1800 // loss: 0.067 epoch: 30, batch: 1900 // loss: 0.063 epoch: 30, batch: 2000 // loss: 0.064 epoch: 30, batch: 2100 // loss: 0.065 epoch: 30, batch: 2200 // loss: 0.067 epoch: 30, batch: 2300 // loss: 0.064 epoch: 30, batch: 2400 // loss: 0.056 epoch: 30, batch: 2500 // loss: 0.057 epoch: 30, batch: 2600 // loss: 0.058 epoch: 30, batch: 2700 // loss: 0.056 epoch: 30, batch: 2800 // loss: 0.067 epoch: 30, batch: 2900 // loss: 0.056 epoch: 30, batch: 3000 // loss: 0.055 epoch: 30, batch: 3100 // loss: 0.060 epoch: 30, batch: 3200 // loss: 0.055 epoch: 30, batch: 3300 // loss: 0.054 epoch: 30, batch: 3400 // loss: 0.058 epoch: 30, batch: 3500 // loss: 0.051 epoch: 30, batch: 3600 // loss: 0.055 epoch: 30, batch: 3700 // loss: 0.063 epoch: 31, batch: 0 // loss: 0.070 epoch: 31, batch: 100 // loss: 0.065 epoch: 31, batch: 200 // loss: 0.057 epoch: 31, batch: 300 // loss: 0.064 epoch: 31, batch: 400 // loss: 0.062 epoch: 31, batch: 500 // loss: 0.052 epoch: 31, batch: 600 // loss: 0.052 epoch: 31, batch: 700 // loss: 0.055 epoch: 31, batch: 800 // loss: 0.059 epoch: 31, batch: 900 // loss: 0.066 epoch: 31, batch: 1000 // loss: 0.057 epoch: 31, batch: 1100 // loss: 0.056 epoch: 31, batch: 1200 // loss: 0.062 epoch: 31, batch: 1300 // loss: 0.065 epoch: 31, batch: 1400 // loss: 0.057 epoch: 31, batch: 1500 // loss: 0.064 epoch: 31, batch: 1600 // loss: 0.064 epoch: 31, batch: 1700 // loss: 0.057 epoch: 31, batch: 1800 // loss: 0.067 epoch: 31, batch: 1900 // loss: 0.062 epoch: 31, batch: 2000 // loss: 0.064 epoch: 31, batch: 2100 // loss: 0.065 epoch: 31, batch: 2200 // loss: 0.067 epoch: 31, batch: 2300 // loss: 0.063 epoch: 31, batch: 2400 // loss: 0.056 epoch: 31, batch: 2500 // loss: 0.056 epoch: 31, batch: 2600 // loss: 0.058 epoch: 31, batch: 2700 // loss: 0.056 epoch: 31, batch: 2800 // loss: 0.067 epoch: 31, batch: 2900 // loss: 0.056 epoch: 31, batch: 3000 // loss: 0.055 epoch: 31, batch: 3100 // loss: 0.059 epoch: 31, batch: 3200 // loss: 0.054 epoch: 31, batch: 3300 // loss: 0.053 epoch: 31, batch: 3400 // loss: 0.058 epoch: 31, batch: 3500 // loss: 0.050 epoch: 31, batch: 3600 // loss: 0.055 epoch: 31, batch: 3700 // loss: 0.062 epoch: 32, batch: 0 // loss: 0.069 epoch: 32, batch: 100 // loss: 0.065 epoch: 32, batch: 200 // loss: 0.056 epoch: 32, batch: 300 // loss: 0.064 epoch: 32, batch: 400 // loss: 0.062 epoch: 32, batch: 500 // loss: 0.051 epoch: 32, batch: 600 // loss: 0.052 epoch: 32, batch: 700 // loss: 0.055 epoch: 32, batch: 800 // loss: 0.059 epoch: 32, batch: 900 // loss: 0.066 epoch: 32, batch: 1000 // loss: 0.056 epoch: 32, batch: 1100 // loss: 0.055 epoch: 32, batch: 1200 // loss: 0.061 epoch: 32, batch: 1300 // loss: 0.065 epoch: 32, batch: 1400 // loss: 0.056 epoch: 32, batch: 1500 // loss: 0.063 epoch: 32, batch: 1600 // loss: 0.064 epoch: 32, batch: 1700 // loss: 0.057 epoch: 32, batch: 1800 // loss: 0.066 epoch: 32, batch: 1900 // loss: 0.062 epoch: 32, batch: 2000 // loss: 0.063 epoch: 32, batch: 2100 // loss: 0.064 epoch: 32, batch: 2200 // loss: 0.067 epoch: 32, batch: 2300 // loss: 0.063 epoch: 32, batch: 2400 // loss: 0.055 epoch: 32, batch: 2500 // loss: 0.056 epoch: 32, batch: 2600 // loss: 0.057 epoch: 32, batch: 2700 // loss: 0.056 epoch: 32, batch: 2800 // loss: 0.066 epoch: 32, batch: 2900 // loss: 0.055 epoch: 32, batch: 3000 // loss: 0.055 epoch: 32, batch: 3100 // loss: 0.059 epoch: 32, batch: 3200 // loss: 0.054 epoch: 32, batch: 3300 // loss: 0.053 epoch: 32, batch: 3400 // loss: 0.057 epoch: 32, batch: 3500 // loss: 0.050 epoch: 32, batch: 3600 // loss: 0.054 epoch: 32, batch: 3700 // loss: 0.061 epoch: 33, batch: 0 // loss: 0.069 epoch: 33, batch: 100 // loss: 0.064 epoch: 33, batch: 200 // loss: 0.056 epoch: 33, batch: 300 // loss: 0.063 epoch: 33, batch: 400 // loss: 0.061 epoch: 33, batch: 500 // loss: 0.051 epoch: 33, batch: 600 // loss: 0.051 epoch: 33, batch: 700 // loss: 0.054 epoch: 33, batch: 800 // loss: 0.059 epoch: 33, batch: 900 // loss: 0.065 epoch: 33, batch: 1000 // loss: 0.056 epoch: 33, batch: 1100 // loss: 0.055 epoch: 33, batch: 1200 // loss: 0.061 epoch: 33, batch: 1300 // loss: 0.064 epoch: 33, batch: 1400 // loss: 0.056 epoch: 33, batch: 1500 // loss: 0.063 epoch: 33, batch: 1600 // loss: 0.064 epoch: 33, batch: 1700 // loss: 0.056 epoch: 33, batch: 1800 // loss: 0.066 epoch: 33, batch: 1900 // loss: 0.062 epoch: 33, batch: 2000 // loss: 0.063 epoch: 33, batch: 2100 // loss: 0.064 epoch: 33, batch: 2200 // loss: 0.066 epoch: 33, batch: 2300 // loss: 0.063 epoch: 33, batch: 2400 // loss: 0.055 epoch: 33, batch: 2500 // loss: 0.056 epoch: 33, batch: 2600 // loss: 0.057 epoch: 33, batch: 2700 // loss: 0.055 epoch: 33, batch: 2800 // loss: 0.065 epoch: 33, batch: 2900 // loss: 0.055 epoch: 33, batch: 3000 // loss: 0.054 epoch: 33, batch: 3100 // loss: 0.058 epoch: 33, batch: 3200 // loss: 0.053 epoch: 33, batch: 3300 // loss: 0.052 epoch: 33, batch: 3400 // loss: 0.057 epoch: 33, batch: 3500 // loss: 0.049 epoch: 33, batch: 3600 // loss: 0.054 epoch: 33, batch: 3700 // loss: 0.061 epoch: 34, batch: 0 // loss: 0.068 epoch: 34, batch: 100 // loss: 0.064 epoch: 34, batch: 200 // loss: 0.055 epoch: 34, batch: 300 // loss: 0.063 epoch: 34, batch: 400 // loss: 0.061 epoch: 34, batch: 500 // loss: 0.050 epoch: 34, batch: 600 // loss: 0.051 epoch: 34, batch: 700 // loss: 0.054 epoch: 34, batch: 800 // loss: 0.058 epoch: 34, batch: 900 // loss: 0.065 epoch: 34, batch: 1000 // loss: 0.055 epoch: 34, batch: 1100 // loss: 0.054 epoch: 34, batch: 1200 // loss: 0.060 epoch: 34, batch: 1300 // loss: 0.064 epoch: 34, batch: 1400 // loss: 0.055 epoch: 34, batch: 1500 // loss: 0.062 epoch: 34, batch: 1600 // loss: 0.063 epoch: 34, batch: 1700 // loss: 0.056 epoch: 34, batch: 1800 // loss: 0.065 epoch: 34, batch: 1900 // loss: 0.061 epoch: 34, batch: 2000 // loss: 0.062 epoch: 34, batch: 2100 // loss: 0.063 epoch: 34, batch: 2200 // loss: 0.066 epoch: 34, batch: 2300 // loss: 0.062 epoch: 34, batch: 2400 // loss: 0.054 epoch: 34, batch: 2500 // loss: 0.055 epoch: 34, batch: 2600 // loss: 0.057 epoch: 34, batch: 2700 // loss: 0.055 epoch: 34, batch: 2800 // loss: 0.065 epoch: 34, batch: 2900 // loss: 0.054 epoch: 34, batch: 3000 // loss: 0.054 epoch: 34, batch: 3100 // loss: 0.058 epoch: 34, batch: 3200 // loss: 0.053 epoch: 34, batch: 3300 // loss: 0.052 epoch: 34, batch: 3400 // loss: 0.057 epoch: 34, batch: 3500 // loss: 0.049 epoch: 34, batch: 3600 // loss: 0.053 epoch: 34, batch: 3700 // loss: 0.060 epoch: 35, batch: 0 // loss: 0.068 epoch: 35, batch: 100 // loss: 0.064 epoch: 35, batch: 200 // loss: 0.055 epoch: 35, batch: 300 // loss: 0.062 epoch: 35, batch: 400 // loss: 0.061 epoch: 35, batch: 500 // loss: 0.050 epoch: 35, batch: 600 // loss: 0.050 epoch: 35, batch: 700 // loss: 0.054 epoch: 35, batch: 800 // loss: 0.058 epoch: 35, batch: 900 // loss: 0.064 epoch: 35, batch: 1000 // loss: 0.055 epoch: 35, batch: 1100 // loss: 0.054 epoch: 35, batch: 1200 // loss: 0.060 epoch: 35, batch: 1300 // loss: 0.063 epoch: 35, batch: 1400 // loss: 0.055 epoch: 35, batch: 1500 // loss: 0.062 epoch: 35, batch: 1600 // loss: 0.063 epoch: 35, batch: 1700 // loss: 0.056 epoch: 35, batch: 1800 // loss: 0.065 epoch: 35, batch: 1900 // loss: 0.061 epoch: 35, batch: 2000 // loss: 0.062 epoch: 35, batch: 2100 // loss: 0.063 epoch: 35, batch: 2200 // loss: 0.065 epoch: 35, batch: 2300 // loss: 0.062 epoch: 35, batch: 2400 // loss: 0.054 epoch: 35, batch: 2500 // loss: 0.055 epoch: 35, batch: 2600 // loss: 0.056 epoch: 35, batch: 2700 // loss: 0.054 epoch: 35, batch: 2800 // loss: 0.064 epoch: 35, batch: 2900 // loss: 0.054 epoch: 35, batch: 3000 // loss: 0.054 epoch: 35, batch: 3100 // loss: 0.057 epoch: 35, batch: 3200 // loss: 0.052 epoch: 35, batch: 3300 // loss: 0.051 epoch: 35, batch: 3400 // loss: 0.056 epoch: 35, batch: 3500 // loss: 0.048 epoch: 35, batch: 3600 // loss: 0.053 epoch: 35, batch: 3700 // loss: 0.060 epoch: 36, batch: 0 // loss: 0.067 epoch: 36, batch: 100 // loss: 0.063 epoch: 36, batch: 200 // loss: 0.054 epoch: 36, batch: 300 // loss: 0.062 epoch: 36, batch: 400 // loss: 0.060 epoch: 36, batch: 500 // loss: 0.050 epoch: 36, batch: 600 // loss: 0.050 epoch: 36, batch: 700 // loss: 0.053 epoch: 36, batch: 800 // loss: 0.057 epoch: 36, batch: 900 // loss: 0.064 epoch: 36, batch: 1000 // loss: 0.055 epoch: 36, batch: 1100 // loss: 0.054 epoch: 36, batch: 1200 // loss: 0.059 epoch: 36, batch: 1300 // loss: 0.063 epoch: 36, batch: 1400 // loss: 0.054 epoch: 36, batch: 1500 // loss: 0.061 epoch: 36, batch: 1600 // loss: 0.062 epoch: 36, batch: 1700 // loss: 0.056 epoch: 36, batch: 1800 // loss: 0.064 epoch: 36, batch: 1900 // loss: 0.060 epoch: 36, batch: 2000 // loss: 0.061 epoch: 36, batch: 2100 // loss: 0.062 epoch: 36, batch: 2200 // loss: 0.065 epoch: 36, batch: 2300 // loss: 0.062 epoch: 36, batch: 2400 // loss: 0.053 epoch: 36, batch: 2500 // loss: 0.055 epoch: 36, batch: 2600 // loss: 0.056 epoch: 36, batch: 2700 // loss: 0.054 epoch: 36, batch: 2800 // loss: 0.064 epoch: 36, batch: 2900 // loss: 0.054 epoch: 36, batch: 3000 // loss: 0.053 epoch: 36, batch: 3100 // loss: 0.057 epoch: 36, batch: 3200 // loss: 0.051 epoch: 36, batch: 3300 // loss: 0.051 epoch: 36, batch: 3400 // loss: 0.056 epoch: 36, batch: 3500 // loss: 0.048 epoch: 36, batch: 3600 // loss: 0.053 epoch: 36, batch: 3700 // loss: 0.059 epoch: 37, batch: 0 // loss: 0.067 epoch: 37, batch: 100 // loss: 0.063 epoch: 37, batch: 200 // loss: 0.054 epoch: 37, batch: 300 // loss: 0.061 epoch: 37, batch: 400 // loss: 0.060 epoch: 37, batch: 500 // loss: 0.049 epoch: 37, batch: 600 // loss: 0.050 epoch: 37, batch: 700 // loss: 0.053 epoch: 37, batch: 800 // loss: 0.057 epoch: 37, batch: 900 // loss: 0.063 epoch: 37, batch: 1000 // loss: 0.054 epoch: 37, batch: 1100 // loss: 0.053 epoch: 37, batch: 1200 // loss: 0.058 epoch: 37, batch: 1300 // loss: 0.062 epoch: 37, batch: 1400 // loss: 0.054 epoch: 37, batch: 1500 // loss: 0.061 epoch: 37, batch: 1600 // loss: 0.062 epoch: 37, batch: 1700 // loss: 0.055 epoch: 37, batch: 1800 // loss: 0.064 epoch: 37, batch: 1900 // loss: 0.060 epoch: 37, batch: 2000 // loss: 0.061 epoch: 37, batch: 2100 // loss: 0.062 epoch: 37, batch: 2200 // loss: 0.065 epoch: 37, batch: 2300 // loss: 0.061 epoch: 37, batch: 2400 // loss: 0.052 epoch: 37, batch: 2500 // loss: 0.054 epoch: 37, batch: 2600 // loss: 0.055 epoch: 37, batch: 2700 // loss: 0.054 epoch: 37, batch: 2800 // loss: 0.063 epoch: 37, batch: 2900 // loss: 0.053 epoch: 37, batch: 3000 // loss: 0.053 epoch: 37, batch: 3100 // loss: 0.056 epoch: 37, batch: 3200 // loss: 0.051 epoch: 37, batch: 3300 // loss: 0.050 epoch: 37, batch: 3400 // loss: 0.055 epoch: 37, batch: 3500 // loss: 0.047 epoch: 37, batch: 3600 // loss: 0.052 epoch: 37, batch: 3700 // loss: 0.058 epoch: 38, batch: 0 // loss: 0.066 epoch: 38, batch: 100 // loss: 0.062 epoch: 38, batch: 200 // loss: 0.053 epoch: 38, batch: 300 // loss: 0.061 epoch: 38, batch: 400 // loss: 0.059 epoch: 38, batch: 500 // loss: 0.049 epoch: 38, batch: 600 // loss: 0.049 epoch: 38, batch: 700 // loss: 0.052 epoch: 38, batch: 800 // loss: 0.056 epoch: 38, batch: 900 // loss: 0.063 epoch: 38, batch: 1000 // loss: 0.054 epoch: 38, batch: 1100 // loss: 0.053 epoch: 38, batch: 1200 // loss: 0.058 epoch: 38, batch: 1300 // loss: 0.062 epoch: 38, batch: 1400 // loss: 0.053 epoch: 38, batch: 1500 // loss: 0.060 epoch: 38, batch: 1600 // loss: 0.061 epoch: 38, batch: 1700 // loss: 0.055 epoch: 38, batch: 1800 // loss: 0.063 epoch: 38, batch: 1900 // loss: 0.059 epoch: 38, batch: 2000 // loss: 0.060 epoch: 38, batch: 2100 // loss: 0.061 epoch: 38, batch: 2200 // loss: 0.064 epoch: 38, batch: 2300 // loss: 0.061 epoch: 38, batch: 2400 // loss: 0.052 epoch: 38, batch: 2500 // loss: 0.054 epoch: 38, batch: 2600 // loss: 0.055 epoch: 38, batch: 2700 // loss: 0.053 epoch: 38, batch: 2800 // loss: 0.062 epoch: 38, batch: 2900 // loss: 0.053 epoch: 38, batch: 3000 // loss: 0.053 epoch: 38, batch: 3100 // loss: 0.056 epoch: 38, batch: 3200 // loss: 0.050 epoch: 38, batch: 3300 // loss: 0.050 epoch: 38, batch: 3400 // loss: 0.055 epoch: 38, batch: 3500 // loss: 0.047 epoch: 38, batch: 3600 // loss: 0.052 epoch: 38, batch: 3700 // loss: 0.058 epoch: 39, batch: 0 // loss: 0.066 epoch: 39, batch: 100 // loss: 0.062 epoch: 39, batch: 200 // loss: 0.053 epoch: 39, batch: 300 // loss: 0.060 epoch: 39, batch: 400 // loss: 0.059 epoch: 39, batch: 500 // loss: 0.048 epoch: 39, batch: 600 // loss: 0.049 epoch: 39, batch: 700 // loss: 0.052 epoch: 39, batch: 800 // loss: 0.056 epoch: 39, batch: 900 // loss: 0.062 epoch: 39, batch: 1000 // loss: 0.053 epoch: 39, batch: 1100 // loss: 0.052 epoch: 39, batch: 1200 // loss: 0.057 epoch: 39, batch: 1300 // loss: 0.061 epoch: 39, batch: 1400 // loss: 0.053 epoch: 39, batch: 1500 // loss: 0.060 epoch: 39, batch: 1600 // loss: 0.061 epoch: 39, batch: 1700 // loss: 0.055 epoch: 39, batch: 1800 // loss: 0.063 epoch: 39, batch: 1900 // loss: 0.058 epoch: 39, batch: 2000 // loss: 0.060 epoch: 39, batch: 2100 // loss: 0.060 epoch: 39, batch: 2200 // loss: 0.063 epoch: 39, batch: 2300 // loss: 0.060 epoch: 39, batch: 2400 // loss: 0.051 epoch: 39, batch: 2500 // loss: 0.053 epoch: 39, batch: 2600 // loss: 0.054 epoch: 39, batch: 2700 // loss: 0.053 epoch: 39, batch: 2800 // loss: 0.061 epoch: 39, batch: 2900 // loss: 0.052 epoch: 39, batch: 3000 // loss: 0.052 epoch: 39, batch: 3100 // loss: 0.055 epoch: 39, batch: 3200 // loss: 0.050 epoch: 39, batch: 3300 // loss: 0.049 epoch: 39, batch: 3400 // loss: 0.054 epoch: 39, batch: 3500 // loss: 0.046 epoch: 39, batch: 3600 // loss: 0.051 epoch: 39, batch: 3700 // loss: 0.057 epoch: 40, batch: 0 // loss: 0.065 epoch: 40, batch: 100 // loss: 0.061 epoch: 40, batch: 200 // loss: 0.052 epoch: 40, batch: 300 // loss: 0.060 epoch: 40, batch: 400 // loss: 0.058 epoch: 40, batch: 500 // loss: 0.048 epoch: 40, batch: 600 // loss: 0.048 epoch: 40, batch: 700 // loss: 0.052 epoch: 40, batch: 800 // loss: 0.055 epoch: 40, batch: 900 // loss: 0.061 epoch: 40, batch: 1000 // loss: 0.053 epoch: 40, batch: 1100 // loss: 0.052 epoch: 40, batch: 1200 // loss: 0.056 epoch: 40, batch: 1300 // loss: 0.060 epoch: 40, batch: 1400 // loss: 0.052 epoch: 40, batch: 1500 // loss: 0.059 epoch: 40, batch: 1600 // loss: 0.060 epoch: 40, batch: 1700 // loss: 0.054 epoch: 40, batch: 1800 // loss: 0.062 epoch: 40, batch: 1900 // loss: 0.058 epoch: 40, batch: 2000 // loss: 0.059 epoch: 40, batch: 2100 // loss: 0.060 epoch: 40, batch: 2200 // loss: 0.063 epoch: 40, batch: 2300 // loss: 0.060 epoch: 40, batch: 2400 // loss: 0.051 epoch: 40, batch: 2500 // loss: 0.053 epoch: 40, batch: 2600 // loss: 0.054 epoch: 40, batch: 2700 // loss: 0.052 epoch: 40, batch: 2800 // loss: 0.060 epoch: 40, batch: 2900 // loss: 0.052 epoch: 40, batch: 3000 // loss: 0.052 epoch: 40, batch: 3100 // loss: 0.054 epoch: 40, batch: 3200 // loss: 0.049 epoch: 40, batch: 3300 // loss: 0.048 epoch: 40, batch: 3400 // loss: 0.054 epoch: 40, batch: 3500 // loss: 0.046 epoch: 40, batch: 3600 // loss: 0.051 epoch: 40, batch: 3700 // loss: 0.056 epoch: 41, batch: 0 // loss: 0.065 epoch: 41, batch: 100 // loss: 0.060 epoch: 41, batch: 200 // loss: 0.052 epoch: 41, batch: 300 // loss: 0.059 epoch: 41, batch: 400 // loss: 0.057 epoch: 41, batch: 500 // loss: 0.047 epoch: 41, batch: 600 // loss: 0.048 epoch: 41, batch: 700 // loss: 0.051 epoch: 41, batch: 800 // loss: 0.054 epoch: 41, batch: 900 // loss: 0.061 epoch: 41, batch: 1000 // loss: 0.052 epoch: 41, batch: 1100 // loss: 0.051 epoch: 41, batch: 1200 // loss: 0.056 epoch: 41, batch: 1300 // loss: 0.060 epoch: 41, batch: 1400 // loss: 0.051 epoch: 41, batch: 1500 // loss: 0.058 epoch: 41, batch: 1600 // loss: 0.060 epoch: 41, batch: 1700 // loss: 0.054 epoch: 41, batch: 1800 // loss: 0.062 epoch: 41, batch: 1900 // loss: 0.057 epoch: 41, batch: 2000 // loss: 0.058 epoch: 41, batch: 2100 // loss: 0.059 epoch: 41, batch: 2200 // loss: 0.062 epoch: 41, batch: 2300 // loss: 0.059 epoch: 41, batch: 2400 // loss: 0.050 epoch: 41, batch: 2500 // loss: 0.052 epoch: 41, batch: 2600 // loss: 0.053 epoch: 41, batch: 2700 // loss: 0.051 epoch: 41, batch: 2800 // loss: 0.060 epoch: 41, batch: 2900 // loss: 0.051 epoch: 41, batch: 3000 // loss: 0.052 epoch: 41, batch: 3100 // loss: 0.054 epoch: 41, batch: 3200 // loss: 0.048 epoch: 41, batch: 3300 // loss: 0.048 epoch: 41, batch: 3400 // loss: 0.053 epoch: 41, batch: 3500 // loss: 0.045 epoch: 41, batch: 3600 // loss: 0.050 epoch: 41, batch: 3700 // loss: 0.055 epoch: 42, batch: 0 // loss: 0.064 epoch: 42, batch: 100 // loss: 0.060 epoch: 42, batch: 200 // loss: 0.051 epoch: 42, batch: 300 // loss: 0.058 epoch: 42, batch: 400 // loss: 0.057 epoch: 42, batch: 500 // loss: 0.047 epoch: 42, batch: 600 // loss: 0.048 epoch: 42, batch: 700 // loss: 0.051 epoch: 42, batch: 800 // loss: 0.053 epoch: 42, batch: 900 // loss: 0.060 epoch: 42, batch: 1000 // loss: 0.052 epoch: 42, batch: 1100 // loss: 0.051 epoch: 42, batch: 1200 // loss: 0.055 epoch: 42, batch: 1300 // loss: 0.059 epoch: 42, batch: 1400 // loss: 0.051 epoch: 42, batch: 1500 // loss: 0.058 epoch: 42, batch: 1600 // loss: 0.059 epoch: 42, batch: 1700 // loss: 0.053 epoch: 42, batch: 1800 // loss: 0.061 epoch: 42, batch: 1900 // loss: 0.056 epoch: 42, batch: 2000 // loss: 0.057 epoch: 42, batch: 2100 // loss: 0.058 epoch: 42, batch: 2200 // loss: 0.061 epoch: 42, batch: 2300 // loss: 0.059 epoch: 42, batch: 2400 // loss: 0.050 epoch: 42, batch: 2500 // loss: 0.052 epoch: 42, batch: 2600 // loss: 0.053 epoch: 42, batch: 2700 // loss: 0.051 epoch: 42, batch: 2800 // loss: 0.059 epoch: 42, batch: 2900 // loss: 0.050 epoch: 42, batch: 3000 // loss: 0.051 epoch: 42, batch: 3100 // loss: 0.053 epoch: 42, batch: 3200 // loss: 0.048 epoch: 42, batch: 3300 // loss: 0.047 epoch: 42, batch: 3400 // loss: 0.052 epoch: 42, batch: 3500 // loss: 0.044 epoch: 42, batch: 3600 // loss: 0.050 epoch: 42, batch: 3700 // loss: 0.055 epoch: 43, batch: 0 // loss: 0.063 epoch: 43, batch: 100 // loss: 0.059 epoch: 43, batch: 200 // loss: 0.050 epoch: 43, batch: 300 // loss: 0.058 epoch: 43, batch: 400 // loss: 0.056 epoch: 43, batch: 500 // loss: 0.046 epoch: 43, batch: 600 // loss: 0.047 epoch: 43, batch: 700 // loss: 0.050 epoch: 43, batch: 800 // loss: 0.053 epoch: 43, batch: 900 // loss: 0.059 epoch: 43, batch: 1000 // loss: 0.051 epoch: 43, batch: 1100 // loss: 0.050 epoch: 43, batch: 1200 // loss: 0.054 epoch: 43, batch: 1300 // loss: 0.058 epoch: 43, batch: 1400 // loss: 0.050 epoch: 43, batch: 1500 // loss: 0.057 epoch: 43, batch: 1600 // loss: 0.059 epoch: 43, batch: 1700 // loss: 0.053 epoch: 43, batch: 1800 // loss: 0.060 epoch: 43, batch: 1900 // loss: 0.055 epoch: 43, batch: 2000 // loss: 0.056 epoch: 43, batch: 2100 // loss: 0.057 epoch: 43, batch: 2200 // loss: 0.060 epoch: 43, batch: 2300 // loss: 0.058 epoch: 43, batch: 2400 // loss: 0.049 epoch: 43, batch: 2500 // loss: 0.051 epoch: 43, batch: 2600 // loss: 0.052 epoch: 43, batch: 2700 // loss: 0.050 epoch: 43, batch: 2800 // loss: 0.058 epoch: 43, batch: 2900 // loss: 0.050 epoch: 43, batch: 3000 // loss: 0.051 epoch: 43, batch: 3100 // loss: 0.052 epoch: 43, batch: 3200 // loss: 0.047 epoch: 43, batch: 3300 // loss: 0.046 epoch: 43, batch: 3400 // loss: 0.052 epoch: 43, batch: 3500 // loss: 0.044 epoch: 43, batch: 3600 // loss: 0.049 epoch: 43, batch: 3700 // loss: 0.054 epoch: 44, batch: 0 // loss: 0.063 epoch: 44, batch: 100 // loss: 0.058 epoch: 44, batch: 200 // loss: 0.050 epoch: 44, batch: 300 // loss: 0.057 epoch: 44, batch: 400 // loss: 0.055 epoch: 44, batch: 500 // loss: 0.046 epoch: 44, batch: 600 // loss: 0.047 epoch: 44, batch: 700 // loss: 0.050 epoch: 44, batch: 800 // loss: 0.052 epoch: 44, batch: 900 // loss: 0.058 epoch: 44, batch: 1000 // loss: 0.051 epoch: 44, batch: 1100 // loss: 0.050 epoch: 44, batch: 1200 // loss: 0.053 epoch: 44, batch: 1300 // loss: 0.057 epoch: 44, batch: 1400 // loss: 0.050 epoch: 44, batch: 1500 // loss: 0.056 epoch: 44, batch: 1600 // loss: 0.058 epoch: 44, batch: 1700 // loss: 0.052 epoch: 44, batch: 1800 // loss: 0.060 epoch: 44, batch: 1900 // loss: 0.055 epoch: 44, batch: 2000 // loss: 0.056 epoch: 44, batch: 2100 // loss: 0.056 epoch: 44, batch: 2200 // loss: 0.060 epoch: 44, batch: 2300 // loss: 0.057 epoch: 44, batch: 2400 // loss: 0.048 epoch: 44, batch: 2500 // loss: 0.050 epoch: 44, batch: 2600 // loss: 0.052 epoch: 44, batch: 2700 // loss: 0.050 epoch: 44, batch: 2800 // loss: 0.057 epoch: 44, batch: 2900 // loss: 0.049 epoch: 44, batch: 3000 // loss: 0.051 epoch: 44, batch: 3100 // loss: 0.051 epoch: 44, batch: 3200 // loss: 0.046 epoch: 44, batch: 3300 // loss: 0.046 epoch: 44, batch: 3400 // loss: 0.051 epoch: 44, batch: 3500 // loss: 0.043 epoch: 44, batch: 3600 // loss: 0.049 epoch: 44, batch: 3700 // loss: 0.053 epoch: 45, batch: 0 // loss: 0.062 epoch: 45, batch: 100 // loss: 0.057 epoch: 45, batch: 200 // loss: 0.049 epoch: 45, batch: 300 // loss: 0.056 epoch: 45, batch: 400 // loss: 0.055 epoch: 45, batch: 500 // loss: 0.045 epoch: 45, batch: 600 // loss: 0.046 epoch: 45, batch: 700 // loss: 0.049 epoch: 45, batch: 800 // loss: 0.051 epoch: 45, batch: 900 // loss: 0.057 epoch: 45, batch: 1000 // loss: 0.050 epoch: 45, batch: 1100 // loss: 0.049 epoch: 45, batch: 1200 // loss: 0.052 epoch: 45, batch: 1300 // loss: 0.056 epoch: 45, batch: 1400 // loss: 0.049 epoch: 45, batch: 1500 // loss: 0.056 epoch: 45, batch: 1600 // loss: 0.057 epoch: 45, batch: 1700 // loss: 0.052 epoch: 45, batch: 1800 // loss: 0.059 epoch: 45, batch: 1900 // loss: 0.054 epoch: 45, batch: 2000 // loss: 0.055 epoch: 45, batch: 2100 // loss: 0.056 epoch: 45, batch: 2200 // loss: 0.059 epoch: 45, batch: 2300 // loss: 0.057 epoch: 45, batch: 2400 // loss: 0.048 epoch: 45, batch: 2500 // loss: 0.050 epoch: 45, batch: 2600 // loss: 0.051 epoch: 45, batch: 2700 // loss: 0.049 epoch: 45, batch: 2800 // loss: 0.056 epoch: 45, batch: 2900 // loss: 0.049 epoch: 45, batch: 3000 // loss: 0.050 epoch: 45, batch: 3100 // loss: 0.051 epoch: 45, batch: 3200 // loss: 0.046 epoch: 45, batch: 3300 // loss: 0.045 epoch: 45, batch: 3400 // loss: 0.051 epoch: 45, batch: 3500 // loss: 0.042 epoch: 45, batch: 3600 // loss: 0.048 epoch: 45, batch: 3700 // loss: 0.052 epoch: 46, batch: 0 // loss: 0.061 epoch: 46, batch: 100 // loss: 0.056 epoch: 46, batch: 200 // loss: 0.049 epoch: 46, batch: 300 // loss: 0.055 epoch: 46, batch: 400 // loss: 0.054 epoch: 46, batch: 500 // loss: 0.045 epoch: 46, batch: 600 // loss: 0.046 epoch: 46, batch: 700 // loss: 0.049 epoch: 46, batch: 800 // loss: 0.050 epoch: 46, batch: 900 // loss: 0.057 epoch: 46, batch: 1000 // loss: 0.050 epoch: 46, batch: 1100 // loss: 0.049 epoch: 46, batch: 1200 // loss: 0.052 epoch: 46, batch: 1300 // loss: 0.055 epoch: 46, batch: 1400 // loss: 0.048 epoch: 46, batch: 1500 // loss: 0.055 epoch: 46, batch: 1600 // loss: 0.057 epoch: 46, batch: 1700 // loss: 0.051 epoch: 46, batch: 1800 // loss: 0.059 epoch: 46, batch: 1900 // loss: 0.053 epoch: 46, batch: 2000 // loss: 0.054 epoch: 46, batch: 2100 // loss: 0.055 epoch: 46, batch: 2200 // loss: 0.058 epoch: 46, batch: 2300 // loss: 0.056 epoch: 46, batch: 2400 // loss: 0.047 epoch: 46, batch: 2500 // loss: 0.049 epoch: 46, batch: 2600 // loss: 0.051 epoch: 46, batch: 2700 // loss: 0.048 epoch: 46, batch: 2800 // loss: 0.055 epoch: 46, batch: 2900 // loss: 0.048 epoch: 46, batch: 3000 // loss: 0.050 epoch: 46, batch: 3100 // loss: 0.050 epoch: 46, batch: 3200 // loss: 0.045 epoch: 46, batch: 3300 // loss: 0.044 epoch: 46, batch: 3400 // loss: 0.050 epoch: 46, batch: 3500 // loss: 0.042 epoch: 46, batch: 3600 // loss: 0.048 epoch: 46, batch: 3700 // loss: 0.051 epoch: 47, batch: 0 // loss: 0.060 epoch: 47, batch: 100 // loss: 0.055 epoch: 47, batch: 200 // loss: 0.048 epoch: 47, batch: 300 // loss: 0.055 epoch: 47, batch: 400 // loss: 0.053 epoch: 47, batch: 500 // loss: 0.044 epoch: 47, batch: 600 // loss: 0.045 epoch: 47, batch: 700 // loss: 0.048 epoch: 47, batch: 800 // loss: 0.050 epoch: 47, batch: 900 // loss: 0.056 epoch: 47, batch: 1000 // loss: 0.050 epoch: 47, batch: 1100 // loss: 0.048 epoch: 47, batch: 1200 // loss: 0.051 epoch: 47, batch: 1300 // loss: 0.054 epoch: 47, batch: 1400 // loss: 0.048 epoch: 47, batch: 1500 // loss: 0.054 epoch: 47, batch: 1600 // loss: 0.056 epoch: 47, batch: 1700 // loss: 0.050 epoch: 47, batch: 1800 // loss: 0.058 epoch: 47, batch: 1900 // loss: 0.052 epoch: 47, batch: 2000 // loss: 0.053 epoch: 47, batch: 2100 // loss: 0.054 epoch: 47, batch: 2200 // loss: 0.057 epoch: 47, batch: 2300 // loss: 0.055 epoch: 47, batch: 2400 // loss: 0.047 epoch: 47, batch: 2500 // loss: 0.049 epoch: 47, batch: 2600 // loss: 0.050 epoch: 47, batch: 2700 // loss: 0.048 epoch: 47, batch: 2800 // loss: 0.054 epoch: 47, batch: 2900 // loss: 0.047 epoch: 47, batch: 3000 // loss: 0.049 epoch: 47, batch: 3100 // loss: 0.049 epoch: 47, batch: 3200 // loss: 0.044 epoch: 47, batch: 3300 // loss: 0.044 epoch: 47, batch: 3400 // loss: 0.049 epoch: 47, batch: 3500 // loss: 0.041 epoch: 47, batch: 3600 // loss: 0.047 epoch: 47, batch: 3700 // loss: 0.050 epoch: 48, batch: 0 // loss: 0.060 epoch: 48, batch: 100 // loss: 0.055 epoch: 48, batch: 200 // loss: 0.047 epoch: 48, batch: 300 // loss: 0.054 epoch: 48, batch: 400 // loss: 0.052 epoch: 48, batch: 500 // loss: 0.044 epoch: 48, batch: 600 // loss: 0.045 epoch: 48, batch: 700 // loss: 0.048 epoch: 48, batch: 800 // loss: 0.049 epoch: 48, batch: 900 // loss: 0.055 epoch: 48, batch: 1000 // loss: 0.049 epoch: 48, batch: 1100 // loss: 0.048 epoch: 48, batch: 1200 // loss: 0.050 epoch: 48, batch: 1300 // loss: 0.053 epoch: 48, batch: 1400 // loss: 0.047 epoch: 48, batch: 1500 // loss: 0.054 epoch: 48, batch: 1600 // loss: 0.056 epoch: 48, batch: 1700 // loss: 0.050 epoch: 48, batch: 1800 // loss: 0.057 epoch: 48, batch: 1900 // loss: 0.052 epoch: 48, batch: 2000 // loss: 0.052 epoch: 48, batch: 2100 // loss: 0.053 epoch: 48, batch: 2200 // loss: 0.056 epoch: 48, batch: 2300 // loss: 0.055 epoch: 48, batch: 2400 // loss: 0.046 epoch: 48, batch: 2500 // loss: 0.048 epoch: 48, batch: 2600 // loss: 0.049 epoch: 48, batch: 2700 // loss: 0.047 epoch: 48, batch: 2800 // loss: 0.053 epoch: 48, batch: 2900 // loss: 0.047 epoch: 48, batch: 3000 // loss: 0.049 epoch: 48, batch: 3100 // loss: 0.049 epoch: 48, batch: 3200 // loss: 0.044 epoch: 48, batch: 3300 // loss: 0.043 epoch: 48, batch: 3400 // loss: 0.049 epoch: 48, batch: 3500 // loss: 0.040 epoch: 48, batch: 3600 // loss: 0.047 epoch: 48, batch: 3700 // loss: 0.050 epoch: 49, batch: 0 // loss: 0.059 epoch: 49, batch: 100 // loss: 0.054 epoch: 49, batch: 200 // loss: 0.047 epoch: 49, batch: 300 // loss: 0.054 epoch: 49, batch: 400 // loss: 0.052 epoch: 49, batch: 500 // loss: 0.043 epoch: 49, batch: 600 // loss: 0.044 epoch: 49, batch: 700 // loss: 0.048 epoch: 49, batch: 800 // loss: 0.048 epoch: 49, batch: 900 // loss: 0.054 epoch: 49, batch: 1000 // loss: 0.049 epoch: 49, batch: 1100 // loss: 0.047 epoch: 49, batch: 1200 // loss: 0.049 epoch: 49, batch: 1300 // loss: 0.052 epoch: 49, batch: 1400 // loss: 0.047 epoch: 49, batch: 1500 // loss: 0.053 epoch: 49, batch: 1600 // loss: 0.055 epoch: 49, batch: 1700 // loss: 0.049 epoch: 49, batch: 1800 // loss: 0.057 epoch: 49, batch: 1900 // loss: 0.051 epoch: 49, batch: 2000 // loss: 0.052 epoch: 49, batch: 2100 // loss: 0.053 epoch: 49, batch: 2200 // loss: 0.056 epoch: 49, batch: 2300 // loss: 0.054 epoch: 49, batch: 2400 // loss: 0.046 epoch: 49, batch: 2500 // loss: 0.047 epoch: 49, batch: 2600 // loss: 0.049 epoch: 49, batch: 2700 // loss: 0.047 epoch: 49, batch: 2800 // loss: 0.052 epoch: 49, batch: 2900 // loss: 0.046 epoch: 49, batch: 3000 // loss: 0.049 epoch: 49, batch: 3100 // loss: 0.048 epoch: 49, batch: 3200 // loss: 0.043 epoch: 49, batch: 3300 // loss: 0.043 epoch: 49, batch: 3400 // loss: 0.048 epoch: 49, batch: 3500 // loss: 0.040 epoch: 49, batch: 3600 // loss: 0.047 epoch: 49, batch: 3700 // loss: 0.049 epoch: 50, batch: 0 // loss: 0.058 epoch: 50, batch: 100 // loss: 0.053 epoch: 50, batch: 200 // loss: 0.046 epoch: 50, batch: 300 // loss: 0.053 epoch: 50, batch: 400 // loss: 0.051 epoch: 50, batch: 500 // loss: 0.043 epoch: 50, batch: 600 // loss: 0.044 epoch: 50, batch: 700 // loss: 0.047 epoch: 50, batch: 800 // loss: 0.047 epoch: 50, batch: 900 // loss: 0.054 epoch: 50, batch: 1000 // loss: 0.048 epoch: 50, batch: 1100 // loss: 0.047 epoch: 50, batch: 1200 // loss: 0.049 epoch: 50, batch: 1300 // loss: 0.051 epoch: 50, batch: 1400 // loss: 0.046 epoch: 50, batch: 1500 // loss: 0.052 epoch: 50, batch: 1600 // loss: 0.055 epoch: 50, batch: 1700 // loss: 0.049 epoch: 50, batch: 1800 // loss: 0.056 epoch: 50, batch: 1900 // loss: 0.050 epoch: 50, batch: 2000 // loss: 0.051 epoch: 50, batch: 2100 // loss: 0.052 epoch: 50, batch: 2200 // loss: 0.055 epoch: 50, batch: 2300 // loss: 0.053 epoch: 50, batch: 2400 // loss: 0.045 epoch: 50, batch: 2500 // loss: 0.047 epoch: 50, batch: 2600 // loss: 0.048 epoch: 50, batch: 2700 // loss: 0.046 epoch: 50, batch: 2800 // loss: 0.051 epoch: 50, batch: 2900 // loss: 0.046 epoch: 50, batch: 3000 // loss: 0.048 epoch: 50, batch: 3100 // loss: 0.047 epoch: 50, batch: 3200 // loss: 0.043 epoch: 50, batch: 3300 // loss: 0.042 epoch: 50, batch: 3400 // loss: 0.048 epoch: 50, batch: 3500 // loss: 0.039 epoch: 50, batch: 3600 // loss: 0.046 epoch: 50, batch: 3700 // loss: 0.048 epoch: 51, batch: 0 // loss: 0.058 epoch: 51, batch: 100 // loss: 0.052 epoch: 51, batch: 200 // loss: 0.046 epoch: 51, batch: 300 // loss: 0.052 epoch: 51, batch: 400 // loss: 0.051 epoch: 51, batch: 500 // loss: 0.042 epoch: 51, batch: 600 // loss: 0.043 epoch: 51, batch: 700 // loss: 0.047 epoch: 51, batch: 800 // loss: 0.047 epoch: 51, batch: 900 // loss: 0.053 epoch: 51, batch: 1000 // loss: 0.048 epoch: 51, batch: 1100 // loss: 0.046 epoch: 51, batch: 1200 // loss: 0.048 epoch: 51, batch: 1300 // loss: 0.051 epoch: 51, batch: 1400 // loss: 0.046 epoch: 51, batch: 1500 // loss: 0.052 epoch: 51, batch: 1600 // loss: 0.054 epoch: 51, batch: 1700 // loss: 0.049 epoch: 51, batch: 1800 // loss: 0.056 epoch: 51, batch: 1900 // loss: 0.050 epoch: 51, batch: 2000 // loss: 0.051 epoch: 51, batch: 2100 // loss: 0.051 epoch: 51, batch: 2200 // loss: 0.054 epoch: 51, batch: 2300 // loss: 0.053 epoch: 51, batch: 2400 // loss: 0.045 epoch: 51, batch: 2500 // loss: 0.046 epoch: 51, batch: 2600 // loss: 0.048 epoch: 51, batch: 2700 // loss: 0.046 epoch: 51, batch: 2800 // loss: 0.051 epoch: 51, batch: 2900 // loss: 0.045 epoch: 51, batch: 3000 // loss: 0.048 epoch: 51, batch: 3100 // loss: 0.047 epoch: 51, batch: 3200 // loss: 0.042 epoch: 51, batch: 3300 // loss: 0.042 epoch: 51, batch: 3400 // loss: 0.047 epoch: 51, batch: 3500 // loss: 0.039 epoch: 51, batch: 3600 // loss: 0.046 epoch: 51, batch: 3700 // loss: 0.048 epoch: 52, batch: 0 // loss: 0.057 epoch: 52, batch: 100 // loss: 0.051 epoch: 52, batch: 200 // loss: 0.046 epoch: 52, batch: 300 // loss: 0.052 epoch: 52, batch: 400 // loss: 0.050 epoch: 52, batch: 500 // loss: 0.042 epoch: 52, batch: 600 // loss: 0.043 epoch: 52, batch: 700 // loss: 0.047 epoch: 52, batch: 800 // loss: 0.046 epoch: 52, batch: 900 // loss: 0.052 epoch: 52, batch: 1000 // loss: 0.048 epoch: 52, batch: 1100 // loss: 0.046 epoch: 52, batch: 1200 // loss: 0.048 epoch: 52, batch: 1300 // loss: 0.050 epoch: 52, batch: 1400 // loss: 0.045 epoch: 52, batch: 1500 // loss: 0.051 epoch: 52, batch: 1600 // loss: 0.054 epoch: 52, batch: 1700 // loss: 0.048 epoch: 52, batch: 1800 // loss: 0.055 epoch: 52, batch: 1900 // loss: 0.049 epoch: 52, batch: 2000 // loss: 0.050 epoch: 52, batch: 2100 // loss: 0.051 epoch: 52, batch: 2200 // loss: 0.054 epoch: 52, batch: 2300 // loss: 0.052 epoch: 52, batch: 2400 // loss: 0.044 epoch: 52, batch: 2500 // loss: 0.046 epoch: 52, batch: 2600 // loss: 0.048 epoch: 52, batch: 2700 // loss: 0.045 epoch: 52, batch: 2800 // loss: 0.050 epoch: 52, batch: 2900 // loss: 0.045 epoch: 52, batch: 3000 // loss: 0.048 epoch: 52, batch: 3100 // loss: 0.046 epoch: 52, batch: 3200 // loss: 0.042 epoch: 52, batch: 3300 // loss: 0.041 epoch: 52, batch: 3400 // loss: 0.047 epoch: 52, batch: 3500 // loss: 0.038 epoch: 52, batch: 3600 // loss: 0.046 epoch: 52, batch: 3700 // loss: 0.047 epoch: 53, batch: 0 // loss: 0.057 epoch: 53, batch: 100 // loss: 0.051 epoch: 53, batch: 200 // loss: 0.045 epoch: 53, batch: 300 // loss: 0.051 epoch: 53, batch: 400 // loss: 0.050 epoch: 53, batch: 500 // loss: 0.042 epoch: 53, batch: 600 // loss: 0.043 epoch: 53, batch: 700 // loss: 0.046 epoch: 53, batch: 800 // loss: 0.046 epoch: 53, batch: 900 // loss: 0.052 epoch: 53, batch: 1000 // loss: 0.047 epoch: 53, batch: 1100 // loss: 0.045 epoch: 53, batch: 1200 // loss: 0.047 epoch: 53, batch: 1300 // loss: 0.049 epoch: 53, batch: 1400 // loss: 0.045 epoch: 53, batch: 1500 // loss: 0.051 epoch: 53, batch: 1600 // loss: 0.054 epoch: 53, batch: 1700 // loss: 0.048 epoch: 53, batch: 1800 // loss: 0.055 epoch: 53, batch: 1900 // loss: 0.049 epoch: 53, batch: 2000 // loss: 0.050 epoch: 53, batch: 2100 // loss: 0.050 epoch: 53, batch: 2200 // loss: 0.053 epoch: 53, batch: 2300 // loss: 0.052 epoch: 53, batch: 2400 // loss: 0.044 epoch: 53, batch: 2500 // loss: 0.045 epoch: 53, batch: 2600 // loss: 0.047 epoch: 53, batch: 2700 // loss: 0.045 epoch: 53, batch: 2800 // loss: 0.050 epoch: 53, batch: 2900 // loss: 0.044 epoch: 53, batch: 3000 // loss: 0.047 epoch: 53, batch: 3100 // loss: 0.046 epoch: 53, batch: 3200 // loss: 0.041 epoch: 53, batch: 3300 // loss: 0.041 epoch: 53, batch: 3400 // loss: 0.046 epoch: 53, batch: 3500 // loss: 0.038 epoch: 53, batch: 3600 // loss: 0.045 epoch: 53, batch: 3700 // loss: 0.047 epoch: 54, batch: 0 // loss: 0.056 epoch: 54, batch: 100 // loss: 0.050 epoch: 54, batch: 200 // loss: 0.045 epoch: 54, batch: 300 // loss: 0.051 epoch: 54, batch: 400 // loss: 0.049 epoch: 54, batch: 500 // loss: 0.041 epoch: 54, batch: 600 // loss: 0.042 epoch: 54, batch: 700 // loss: 0.046 epoch: 54, batch: 800 // loss: 0.045 epoch: 54, batch: 900 // loss: 0.052 epoch: 54, batch: 1000 // loss: 0.047 epoch: 54, batch: 1100 // loss: 0.045 epoch: 54, batch: 1200 // loss: 0.047 epoch: 54, batch: 1300 // loss: 0.049 epoch: 54, batch: 1400 // loss: 0.045 epoch: 54, batch: 1500 // loss: 0.051 epoch: 54, batch: 1600 // loss: 0.053 epoch: 54, batch: 1700 // loss: 0.047 epoch: 54, batch: 1800 // loss: 0.055 epoch: 54, batch: 1900 // loss: 0.048 epoch: 54, batch: 2000 // loss: 0.049 epoch: 54, batch: 2100 // loss: 0.050 epoch: 54, batch: 2200 // loss: 0.053 epoch: 54, batch: 2300 // loss: 0.051 epoch: 54, batch: 2400 // loss: 0.044 epoch: 54, batch: 2500 // loss: 0.045 epoch: 54, batch: 2600 // loss: 0.047 epoch: 54, batch: 2700 // loss: 0.044 epoch: 54, batch: 2800 // loss: 0.049 epoch: 54, batch: 2900 // loss: 0.044 epoch: 54, batch: 3000 // loss: 0.047 epoch: 54, batch: 3100 // loss: 0.045 epoch: 54, batch: 3200 // loss: 0.041 epoch: 54, batch: 3300 // loss: 0.040 epoch: 54, batch: 3400 // loss: 0.046 epoch: 54, batch: 3500 // loss: 0.037 epoch: 54, batch: 3600 // loss: 0.045 epoch: 54, batch: 3700 // loss: 0.046 epoch: 55, batch: 0 // loss: 0.056 epoch: 55, batch: 100 // loss: 0.050 epoch: 55, batch: 200 // loss: 0.045 epoch: 55, batch: 300 // loss: 0.051 epoch: 55, batch: 400 // loss: 0.049 epoch: 55, batch: 500 // loss: 0.041 epoch: 55, batch: 600 // loss: 0.042 epoch: 55, batch: 700 // loss: 0.046 epoch: 55, batch: 800 // loss: 0.045 epoch: 55, batch: 900 // loss: 0.051 epoch: 55, batch: 1000 // loss: 0.047 epoch: 55, batch: 1100 // loss: 0.045 epoch: 55, batch: 1200 // loss: 0.046 epoch: 55, batch: 1300 // loss: 0.048 epoch: 55, batch: 1400 // loss: 0.044 epoch: 55, batch: 1500 // loss: 0.050 epoch: 55, batch: 1600 // loss: 0.053 epoch: 55, batch: 1700 // loss: 0.047 epoch: 55, batch: 1800 // loss: 0.054 epoch: 55, batch: 1900 // loss: 0.048 epoch: 55, batch: 2000 // loss: 0.049 epoch: 55, batch: 2100 // loss: 0.049 epoch: 55, batch: 2200 // loss: 0.052 epoch: 55, batch: 2300 // loss: 0.051 epoch: 55, batch: 2400 // loss: 0.044 epoch: 55, batch: 2500 // loss: 0.045 epoch: 55, batch: 2600 // loss: 0.047 epoch: 55, batch: 2700 // loss: 0.044 epoch: 55, batch: 2800 // loss: 0.049 epoch: 55, batch: 2900 // loss: 0.044 epoch: 55, batch: 3000 // loss: 0.047 epoch: 55, batch: 3100 // loss: 0.045 epoch: 55, batch: 3200 // loss: 0.041 epoch: 55, batch: 3300 // loss: 0.040 epoch: 55, batch: 3400 // loss: 0.046 epoch: 55, batch: 3500 // loss: 0.037 epoch: 55, batch: 3600 // loss: 0.045 epoch: 55, batch: 3700 // loss: 0.046 epoch: 56, batch: 0 // loss: 0.056 epoch: 56, batch: 100 // loss: 0.049 epoch: 56, batch: 200 // loss: 0.044 epoch: 56, batch: 300 // loss: 0.050 epoch: 56, batch: 400 // loss: 0.048 epoch: 56, batch: 500 // loss: 0.041 epoch: 56, batch: 600 // loss: 0.042 epoch: 56, batch: 700 // loss: 0.046 epoch: 56, batch: 800 // loss: 0.044 epoch: 56, batch: 900 // loss: 0.051 epoch: 56, batch: 1000 // loss: 0.047 epoch: 56, batch: 1100 // loss: 0.044 epoch: 56, batch: 1200 // loss: 0.046 epoch: 56, batch: 1300 // loss: 0.048 epoch: 56, batch: 1400 // loss: 0.044 epoch: 56, batch: 1500 // loss: 0.050 epoch: 56, batch: 1600 // loss: 0.053 epoch: 56, batch: 1700 // loss: 0.047 epoch: 56, batch: 1800 // loss: 0.054 epoch: 56, batch: 1900 // loss: 0.047 epoch: 56, batch: 2000 // loss: 0.048 epoch: 56, batch: 2100 // loss: 0.049 epoch: 56, batch: 2200 // loss: 0.052 epoch: 56, batch: 2300 // loss: 0.051 epoch: 56, batch: 2400 // loss: 0.043 epoch: 56, batch: 2500 // loss: 0.044 epoch: 56, batch: 2600 // loss: 0.046 epoch: 56, batch: 2700 // loss: 0.044 epoch: 56, batch: 2800 // loss: 0.048 epoch: 56, batch: 2900 // loss: 0.044 epoch: 56, batch: 3000 // loss: 0.047 epoch: 56, batch: 3100 // loss: 0.045 epoch: 56, batch: 3200 // loss: 0.041 epoch: 56, batch: 3300 // loss: 0.040 epoch: 56, batch: 3400 // loss: 0.046 epoch: 56, batch: 3500 // loss: 0.037 epoch: 56, batch: 3600 // loss: 0.045 epoch: 56, batch: 3700 // loss: 0.045 epoch: 57, batch: 0 // loss: 0.055 epoch: 57, batch: 100 // loss: 0.049 epoch: 57, batch: 200 // loss: 0.044 epoch: 57, batch: 300 // loss: 0.050 epoch: 57, batch: 400 // loss: 0.048 epoch: 57, batch: 500 // loss: 0.041 epoch: 57, batch: 600 // loss: 0.042 epoch: 57, batch: 700 // loss: 0.045 epoch: 57, batch: 800 // loss: 0.044 epoch: 57, batch: 900 // loss: 0.051 epoch: 57, batch: 1000 // loss: 0.046 epoch: 57, batch: 1100 // loss: 0.044 epoch: 57, batch: 1200 // loss: 0.046 epoch: 57, batch: 1300 // loss: 0.047 epoch: 57, batch: 1400 // loss: 0.044 epoch: 57, batch: 1500 // loss: 0.050 epoch: 57, batch: 1600 // loss: 0.053 epoch: 57, batch: 1700 // loss: 0.047 epoch: 57, batch: 1800 // loss: 0.054 epoch: 57, batch: 1900 // loss: 0.047 epoch: 57, batch: 2000 // loss: 0.048 epoch: 57, batch: 2100 // loss: 0.049 epoch: 57, batch: 2200 // loss: 0.051 epoch: 57, batch: 2300 // loss: 0.051 epoch: 57, batch: 2400 // loss: 0.043 epoch: 57, batch: 2500 // loss: 0.044 epoch: 57, batch: 2600 // loss: 0.046 epoch: 57, batch: 2700 // loss: 0.044 epoch: 57, batch: 2800 // loss: 0.048 epoch: 57, batch: 2900 // loss: 0.043 epoch: 57, batch: 3000 // loss: 0.047 epoch: 57, batch: 3100 // loss: 0.045 epoch: 57, batch: 3200 // loss: 0.040 epoch: 57, batch: 3300 // loss: 0.040 epoch: 57, batch: 3400 // loss: 0.045 epoch: 57, batch: 3500 // loss: 0.036 epoch: 57, batch: 3600 // loss: 0.044 epoch: 57, batch: 3700 // loss: 0.045 epoch: 58, batch: 0 // loss: 0.055 epoch: 58, batch: 100 // loss: 0.048 epoch: 58, batch: 200 // loss: 0.044 epoch: 58, batch: 300 // loss: 0.050 epoch: 58, batch: 400 // loss: 0.048 epoch: 58, batch: 500 // loss: 0.041 epoch: 58, batch: 600 // loss: 0.041 epoch: 58, batch: 700 // loss: 0.045 epoch: 58, batch: 800 // loss: 0.044 epoch: 58, batch: 900 // loss: 0.050 epoch: 58, batch: 1000 // loss: 0.046 epoch: 58, batch: 1100 // loss: 0.044 epoch: 58, batch: 1200 // loss: 0.045 epoch: 58, batch: 1300 // loss: 0.047 epoch: 58, batch: 1400 // loss: 0.044 epoch: 58, batch: 1500 // loss: 0.049 epoch: 58, batch: 1600 // loss: 0.053 epoch: 58, batch: 1700 // loss: 0.046 epoch: 58, batch: 1800 // loss: 0.054 epoch: 58, batch: 1900 // loss: 0.047 epoch: 58, batch: 2000 // loss: 0.048 epoch: 58, batch: 2100 // loss: 0.048 epoch: 58, batch: 2200 // loss: 0.051 epoch: 58, batch: 2300 // loss: 0.050 epoch: 58, batch: 2400 // loss: 0.043 epoch: 58, batch: 2500 // loss: 0.044 epoch: 58, batch: 2600 // loss: 0.046 epoch: 58, batch: 2700 // loss: 0.044 epoch: 58, batch: 2800 // loss: 0.048 epoch: 58, batch: 2900 // loss: 0.043 epoch: 58, batch: 3000 // loss: 0.046 epoch: 58, batch: 3100 // loss: 0.044 epoch: 58, batch: 3200 // loss: 0.040 epoch: 58, batch: 3300 // loss: 0.039 epoch: 58, batch: 3400 // loss: 0.045 epoch: 58, batch: 3500 // loss: 0.036 epoch: 58, batch: 3600 // loss: 0.044 epoch: 58, batch: 3700 // loss: 0.045 epoch: 59, batch: 0 // loss: 0.055 epoch: 59, batch: 100 // loss: 0.048 epoch: 59, batch: 200 // loss: 0.044 epoch: 59, batch: 300 // loss: 0.050 epoch: 59, batch: 400 // loss: 0.048 epoch: 59, batch: 500 // loss: 0.040 epoch: 59, batch: 600 // loss: 0.041 epoch: 59, batch: 700 // loss: 0.045 epoch: 59, batch: 800 // loss: 0.044 epoch: 59, batch: 900 // loss: 0.050 epoch: 59, batch: 1000 // loss: 0.046 epoch: 59, batch: 1100 // loss: 0.044 epoch: 59, batch: 1200 // loss: 0.045 epoch: 59, batch: 1300 // loss: 0.047 epoch: 59, batch: 1400 // loss: 0.043 epoch: 59, batch: 1500 // loss: 0.049 epoch: 59, batch: 1600 // loss: 0.052 epoch: 59, batch: 1700 // loss: 0.046 epoch: 59, batch: 1800 // loss: 0.054 epoch: 59, batch: 1900 // loss: 0.047 epoch: 59, batch: 2000 // loss: 0.048 epoch: 59, batch: 2100 // loss: 0.048 epoch: 59, batch: 2200 // loss: 0.051 epoch: 59, batch: 2300 // loss: 0.050 epoch: 59, batch: 2400 // loss: 0.043 epoch: 59, batch: 2500 // loss: 0.044 epoch: 59, batch: 2600 // loss: 0.046 epoch: 59, batch: 2700 // loss: 0.043 epoch: 59, batch: 2800 // loss: 0.047 epoch: 59, batch: 2900 // loss: 0.043 epoch: 59, batch: 3000 // loss: 0.046 epoch: 59, batch: 3100 // loss: 0.044 epoch: 59, batch: 3200 // loss: 0.040 epoch: 59, batch: 3300 // loss: 0.039 epoch: 59, batch: 3400 // loss: 0.045 epoch: 59, batch: 3500 // loss: 0.036 epoch: 59, batch: 3600 // loss: 0.044 epoch: 59, batch: 3700 // loss: 0.045 epoch: 60, batch: 0 // loss: 0.054 epoch: 60, batch: 100 // loss: 0.048 epoch: 60, batch: 200 // loss: 0.044 epoch: 60, batch: 300 // loss: 0.050 epoch: 60, batch: 400 // loss: 0.047 epoch: 60, batch: 500 // loss: 0.040 epoch: 60, batch: 600 // loss: 0.041 epoch: 60, batch: 700 // loss: 0.045 epoch: 60, batch: 800 // loss: 0.043 epoch: 60, batch: 900 // loss: 0.050 epoch: 60, batch: 1000 // loss: 0.046 epoch: 60, batch: 1100 // loss: 0.044 epoch: 60, batch: 1200 // loss: 0.045 epoch: 60, batch: 1300 // loss: 0.047 epoch: 60, batch: 1400 // loss: 0.043 epoch: 60, batch: 1500 // loss: 0.049 epoch: 60, batch: 1600 // loss: 0.052 epoch: 60, batch: 1700 // loss: 0.046 epoch: 60, batch: 1800 // loss: 0.053 epoch: 60, batch: 1900 // loss: 0.046 epoch: 60, batch: 2000 // loss: 0.048 epoch: 60, batch: 2100 // loss: 0.048 epoch: 60, batch: 2200 // loss: 0.051 epoch: 60, batch: 2300 // loss: 0.050 epoch: 60, batch: 2400 // loss: 0.043 epoch: 60, batch: 2500 // loss: 0.044 epoch: 60, batch: 2600 // loss: 0.046 epoch: 60, batch: 2700 // loss: 0.043 epoch: 60, batch: 2800 // loss: 0.047 epoch: 60, batch: 2900 // loss: 0.043 epoch: 60, batch: 3000 // loss: 0.046 epoch: 60, batch: 3100 // loss: 0.044 epoch: 60, batch: 3200 // loss: 0.040 epoch: 60, batch: 3300 // loss: 0.039 epoch: 60, batch: 3400 // loss: 0.045 epoch: 60, batch: 3500 // loss: 0.036 epoch: 60, batch: 3600 // loss: 0.044 epoch: 60, batch: 3700 // loss: 0.044 epoch: 61, batch: 0 // loss: 0.054 epoch: 61, batch: 100 // loss: 0.048 epoch: 61, batch: 200 // loss: 0.044 epoch: 61, batch: 300 // loss: 0.050 epoch: 61, batch: 400 // loss: 0.047 epoch: 61, batch: 500 // loss: 0.040 epoch: 61, batch: 600 // loss: 0.041 epoch: 61, batch: 700 // loss: 0.045 epoch: 61, batch: 800 // loss: 0.043 epoch: 61, batch: 900 // loss: 0.050 epoch: 61, batch: 1000 // loss: 0.046 epoch: 61, batch: 1100 // loss: 0.043 epoch: 61, batch: 1200 // loss: 0.045 epoch: 61, batch: 1300 // loss: 0.046 epoch: 61, batch: 1400 // loss: 0.043 epoch: 61, batch: 1500 // loss: 0.049 epoch: 61, batch: 1600 // loss: 0.052 epoch: 61, batch: 1700 // loss: 0.046 epoch: 61, batch: 1800 // loss: 0.053 epoch: 61, batch: 1900 // loss: 0.046 epoch: 61, batch: 2000 // loss: 0.047 epoch: 61, batch: 2100 // loss: 0.048 epoch: 61, batch: 2200 // loss: 0.051 epoch: 61, batch: 2300 // loss: 0.050 epoch: 61, batch: 2400 // loss: 0.043 epoch: 61, batch: 2500 // loss: 0.043 epoch: 61, batch: 2600 // loss: 0.046 epoch: 61, batch: 2700 // loss: 0.043 epoch: 61, batch: 2800 // loss: 0.047 epoch: 61, batch: 2900 // loss: 0.043 epoch: 61, batch: 3000 // loss: 0.046 epoch: 61, batch: 3100 // loss: 0.044 epoch: 61, batch: 3200 // loss: 0.040 epoch: 61, batch: 3300 // loss: 0.039 epoch: 61, batch: 3400 // loss: 0.045 epoch: 61, batch: 3500 // loss: 0.036 epoch: 61, batch: 3600 // loss: 0.044 epoch: 61, batch: 3700 // loss: 0.044 epoch: 62, batch: 0 // loss: 0.054 epoch: 62, batch: 100 // loss: 0.047 epoch: 62, batch: 200 // loss: 0.043 epoch: 62, batch: 300 // loss: 0.049 epoch: 62, batch: 400 // loss: 0.047 epoch: 62, batch: 500 // loss: 0.040 epoch: 62, batch: 600 // loss: 0.041 epoch: 62, batch: 700 // loss: 0.045 epoch: 62, batch: 800 // loss: 0.043 epoch: 62, batch: 900 // loss: 0.050 epoch: 62, batch: 1000 // loss: 0.046 epoch: 62, batch: 1100 // loss: 0.043 epoch: 62, batch: 1200 // loss: 0.045 epoch: 62, batch: 1300 // loss: 0.046 epoch: 62, batch: 1400 // loss: 0.043 epoch: 62, batch: 1500 // loss: 0.049 epoch: 62, batch: 1600 // loss: 0.052 epoch: 62, batch: 1700 // loss: 0.046 epoch: 62, batch: 1800 // loss: 0.053 epoch: 62, batch: 1900 // loss: 0.046 epoch: 62, batch: 2000 // loss: 0.047 epoch: 62, batch: 2100 // loss: 0.047 epoch: 62, batch: 2200 // loss: 0.050 epoch: 62, batch: 2300 // loss: 0.050 epoch: 62, batch: 2400 // loss: 0.043 epoch: 62, batch: 2500 // loss: 0.043 epoch: 62, batch: 2600 // loss: 0.045 epoch: 62, batch: 2700 // loss: 0.043 epoch: 62, batch: 2800 // loss: 0.047 epoch: 62, batch: 2900 // loss: 0.043 epoch: 62, batch: 3000 // loss: 0.046 epoch: 62, batch: 3100 // loss: 0.044 epoch: 62, batch: 3200 // loss: 0.040 epoch: 62, batch: 3300 // loss: 0.039 epoch: 62, batch: 3400 // loss: 0.045 epoch: 62, batch: 3500 // loss: 0.035 epoch: 62, batch: 3600 // loss: 0.044 epoch: 62, batch: 3700 // loss: 0.044 epoch: 63, batch: 0 // loss: 0.054 epoch: 63, batch: 100 // loss: 0.047 epoch: 63, batch: 200 // loss: 0.043 epoch: 63, batch: 300 // loss: 0.049 epoch: 63, batch: 400 // loss: 0.047 epoch: 63, batch: 500 // loss: 0.040 epoch: 63, batch: 600 // loss: 0.041 epoch: 63, batch: 700 // loss: 0.045 epoch: 63, batch: 800 // loss: 0.043 epoch: 63, batch: 900 // loss: 0.050 epoch: 63, batch: 1000 // loss: 0.046 epoch: 63, batch: 1100 // loss: 0.043 epoch: 63, batch: 1200 // loss: 0.045 epoch: 63, batch: 1300 // loss: 0.046 epoch: 63, batch: 1400 // loss: 0.043 epoch: 63, batch: 1500 // loss: 0.049 epoch: 63, batch: 1600 // loss: 0.052 epoch: 63, batch: 1700 // loss: 0.046 epoch: 63, batch: 1800 // loss: 0.053 epoch: 63, batch: 1900 // loss: 0.046 epoch: 63, batch: 2000 // loss: 0.047 epoch: 63, batch: 2100 // loss: 0.047 epoch: 63, batch: 2200 // loss: 0.050 epoch: 63, batch: 2300 // loss: 0.049 epoch: 63, batch: 2400 // loss: 0.043 epoch: 63, batch: 2500 // loss: 0.043 epoch: 63, batch: 2600 // loss: 0.045 epoch: 63, batch: 2700 // loss: 0.043 epoch: 63, batch: 2800 // loss: 0.047 epoch: 63, batch: 2900 // loss: 0.043 epoch: 63, batch: 3000 // loss: 0.046 epoch: 63, batch: 3100 // loss: 0.044 epoch: 63, batch: 3200 // loss: 0.040 epoch: 63, batch: 3300 // loss: 0.039 epoch: 63, batch: 3400 // loss: 0.045 epoch: 63, batch: 3500 // loss: 0.035 epoch: 63, batch: 3600 // loss: 0.044 epoch: 63, batch: 3700 // loss: 0.044 epoch: 64, batch: 0 // loss: 0.054 epoch: 64, batch: 100 // loss: 0.047 epoch: 64, batch: 200 // loss: 0.043 epoch: 64, batch: 300 // loss: 0.049 epoch: 64, batch: 400 // loss: 0.047 epoch: 64, batch: 500 // loss: 0.040 epoch: 64, batch: 600 // loss: 0.041 epoch: 64, batch: 700 // loss: 0.045 epoch: 64, batch: 800 // loss: 0.043 epoch: 64, batch: 900 // loss: 0.049 epoch: 64, batch: 1000 // loss: 0.046 epoch: 64, batch: 1100 // loss: 0.043 epoch: 64, batch: 1200 // loss: 0.045 epoch: 64, batch: 1300 // loss: 0.046 epoch: 64, batch: 1400 // loss: 0.043 epoch: 64, batch: 1500 // loss: 0.049 epoch: 64, batch: 1600 // loss: 0.052 epoch: 64, batch: 1700 // loss: 0.046 epoch: 64, batch: 1800 // loss: 0.053 epoch: 64, batch: 1900 // loss: 0.046 epoch: 64, batch: 2000 // loss: 0.047 epoch: 64, batch: 2100 // loss: 0.047 epoch: 64, batch: 2200 // loss: 0.050 epoch: 64, batch: 2300 // loss: 0.049 epoch: 64, batch: 2400 // loss: 0.043 epoch: 64, batch: 2500 // loss: 0.043 epoch: 64, batch: 2600 // loss: 0.045 epoch: 64, batch: 2700 // loss: 0.043 epoch: 64, batch: 2800 // loss: 0.047 epoch: 64, batch: 2900 // loss: 0.042 epoch: 64, batch: 3000 // loss: 0.046 epoch: 64, batch: 3100 // loss: 0.044 epoch: 64, batch: 3200 // loss: 0.040 epoch: 64, batch: 3300 // loss: 0.039 epoch: 64, batch: 3400 // loss: 0.045 epoch: 64, batch: 3500 // loss: 0.035 epoch: 64, batch: 3600 // loss: 0.044 epoch: 64, batch: 3700 // loss: 0.044 epoch: 65, batch: 0 // loss: 0.054 epoch: 65, batch: 100 // loss: 0.047 epoch: 65, batch: 200 // loss: 0.043 epoch: 65, batch: 300 // loss: 0.049 epoch: 65, batch: 400 // loss: 0.047 epoch: 65, batch: 500 // loss: 0.040 epoch: 65, batch: 600 // loss: 0.041 epoch: 65, batch: 700 // loss: 0.045 epoch: 65, batch: 800 // loss: 0.043 epoch: 65, batch: 900 // loss: 0.049 epoch: 65, batch: 1000 // loss: 0.046 epoch: 65, batch: 1100 // loss: 0.043 epoch: 65, batch: 1200 // loss: 0.044 epoch: 65, batch: 1300 // loss: 0.046 epoch: 65, batch: 1400 // loss: 0.043 epoch: 65, batch: 1500 // loss: 0.049 epoch: 65, batch: 1600 // loss: 0.052 epoch: 65, batch: 1700 // loss: 0.046 epoch: 65, batch: 1800 // loss: 0.053 epoch: 65, batch: 1900 // loss: 0.046 epoch: 65, batch: 2000 // loss: 0.047 epoch: 65, batch: 2100 // loss: 0.047 epoch: 65, batch: 2200 // loss: 0.050 epoch: 65, batch: 2300 // loss: 0.049 epoch: 65, batch: 2400 // loss: 0.042 epoch: 65, batch: 2500 // loss: 0.043 epoch: 65, batch: 2600 // loss: 0.045 epoch: 65, batch: 2700 // loss: 0.043 epoch: 65, batch: 2800 // loss: 0.047 epoch: 65, batch: 2900 // loss: 0.042 epoch: 65, batch: 3000 // loss: 0.046 epoch: 65, batch: 3100 // loss: 0.043 epoch: 65, batch: 3200 // loss: 0.039 epoch: 65, batch: 3300 // loss: 0.039 epoch: 65, batch: 3400 // loss: 0.045 epoch: 65, batch: 3500 // loss: 0.035 epoch: 65, batch: 3600 // loss: 0.044 epoch: 65, batch: 3700 // loss: 0.044 epoch: 66, batch: 0 // loss: 0.054 epoch: 66, batch: 100 // loss: 0.047 epoch: 66, batch: 200 // loss: 0.043 epoch: 66, batch: 300 // loss: 0.049 epoch: 66, batch: 400 // loss: 0.047 epoch: 66, batch: 500 // loss: 0.040 epoch: 66, batch: 600 // loss: 0.041 epoch: 66, batch: 700 // loss: 0.045 epoch: 66, batch: 800 // loss: 0.043 epoch: 66, batch: 900 // loss: 0.049 epoch: 66, batch: 1000 // loss: 0.046 epoch: 66, batch: 1100 // loss: 0.043 epoch: 66, batch: 1200 // loss: 0.044 epoch: 66, batch: 1300 // loss: 0.046 epoch: 66, batch: 1400 // loss: 0.043 epoch: 66, batch: 1500 // loss: 0.048 epoch: 66, batch: 1600 // loss: 0.052 epoch: 66, batch: 1700 // loss: 0.046 epoch: 66, batch: 1800 // loss: 0.053 epoch: 66, batch: 1900 // loss: 0.046 epoch: 66, batch: 2000 // loss: 0.047 epoch: 66, batch: 2100 // loss: 0.047 epoch: 66, batch: 2200 // loss: 0.050 epoch: 66, batch: 2300 // loss: 0.049 epoch: 66, batch: 2400 // loss: 0.042 epoch: 66, batch: 2500 // loss: 0.043 epoch: 66, batch: 2600 // loss: 0.045 epoch: 66, batch: 2700 // loss: 0.043 epoch: 66, batch: 2800 // loss: 0.047 epoch: 66, batch: 2900 // loss: 0.042 epoch: 66, batch: 3000 // loss: 0.046 epoch: 66, batch: 3100 // loss: 0.043 epoch: 66, batch: 3200 // loss: 0.039 epoch: 66, batch: 3300 // loss: 0.039 epoch: 66, batch: 3400 // loss: 0.045 epoch: 66, batch: 3500 // loss: 0.035 epoch: 66, batch: 3600 // loss: 0.043 epoch: 66, batch: 3700 // loss: 0.044 epoch: 67, batch: 0 // loss: 0.054 epoch: 67, batch: 100 // loss: 0.047 epoch: 67, batch: 200 // loss: 0.043 epoch: 67, batch: 300 // loss: 0.049 epoch: 67, batch: 400 // loss: 0.047 epoch: 67, batch: 500 // loss: 0.040 epoch: 67, batch: 600 // loss: 0.041 epoch: 67, batch: 700 // loss: 0.045 epoch: 67, batch: 800 // loss: 0.043 epoch: 67, batch: 900 // loss: 0.049 epoch: 67, batch: 1000 // loss: 0.046 epoch: 67, batch: 1100 // loss: 0.043 epoch: 67, batch: 1200 // loss: 0.044 epoch: 67, batch: 1300 // loss: 0.046 epoch: 67, batch: 1400 // loss: 0.043 epoch: 67, batch: 1500 // loss: 0.048 epoch: 67, batch: 1600 // loss: 0.052 epoch: 67, batch: 1700 // loss: 0.046 epoch: 67, batch: 1800 // loss: 0.053 epoch: 67, batch: 1900 // loss: 0.046 epoch: 67, batch: 2000 // loss: 0.047 epoch: 67, batch: 2100 // loss: 0.047 epoch: 67, batch: 2200 // loss: 0.050 epoch: 67, batch: 2300 // loss: 0.049 epoch: 67, batch: 2400 // loss: 0.042 epoch: 67, batch: 2500 // loss: 0.043 epoch: 67, batch: 2600 // loss: 0.045 epoch: 67, batch: 2700 // loss: 0.043 epoch: 67, batch: 2800 // loss: 0.046 epoch: 67, batch: 2900 // loss: 0.042 epoch: 67, batch: 3000 // loss: 0.046 epoch: 67, batch: 3100 // loss: 0.043 epoch: 67, batch: 3200 // loss: 0.039 epoch: 67, batch: 3300 // loss: 0.039 epoch: 67, batch: 3400 // loss: 0.044 epoch: 67, batch: 3500 // loss: 0.035 epoch: 67, batch: 3600 // loss: 0.043 epoch: 67, batch: 3700 // loss: 0.044 epoch: 68, batch: 0 // loss: 0.054 epoch: 68, batch: 100 // loss: 0.047 epoch: 68, batch: 200 // loss: 0.043 epoch: 68, batch: 300 // loss: 0.049 epoch: 68, batch: 400 // loss: 0.046 epoch: 68, batch: 500 // loss: 0.040 epoch: 68, batch: 600 // loss: 0.041 epoch: 68, batch: 700 // loss: 0.045 epoch: 68, batch: 800 // loss: 0.042 epoch: 68, batch: 900 // loss: 0.049 epoch: 68, batch: 1000 // loss: 0.046 epoch: 68, batch: 1100 // loss: 0.043 epoch: 68, batch: 1200 // loss: 0.044 epoch: 68, batch: 1300 // loss: 0.046 epoch: 68, batch: 1400 // loss: 0.043 epoch: 68, batch: 1500 // loss: 0.048 epoch: 68, batch: 1600 // loss: 0.052 epoch: 68, batch: 1700 // loss: 0.046 epoch: 68, batch: 1800 // loss: 0.053 epoch: 68, batch: 1900 // loss: 0.046 epoch: 68, batch: 2000 // loss: 0.047 epoch: 68, batch: 2100 // loss: 0.047 epoch: 68, batch: 2200 // loss: 0.050 epoch: 68, batch: 2300 // loss: 0.049 epoch: 68, batch: 2400 // loss: 0.042 epoch: 68, batch: 2500 // loss: 0.043 epoch: 68, batch: 2600 // loss: 0.045 epoch: 68, batch: 2700 // loss: 0.043 epoch: 68, batch: 2800 // loss: 0.046 epoch: 68, batch: 2900 // loss: 0.042 epoch: 68, batch: 3000 // loss: 0.046 epoch: 68, batch: 3100 // loss: 0.043 epoch: 68, batch: 3200 // loss: 0.039 epoch: 68, batch: 3300 // loss: 0.038 epoch: 68, batch: 3400 // loss: 0.044 epoch: 68, batch: 3500 // loss: 0.035 epoch: 68, batch: 3600 // loss: 0.043 epoch: 68, batch: 3700 // loss: 0.044 epoch: 69, batch: 0 // loss: 0.054 epoch: 69, batch: 100 // loss: 0.047 epoch: 69, batch: 200 // loss: 0.043 epoch: 69, batch: 300 // loss: 0.049 epoch: 69, batch: 400 // loss: 0.046 epoch: 69, batch: 500 // loss: 0.040 epoch: 69, batch: 600 // loss: 0.041 epoch: 69, batch: 700 // loss: 0.044 epoch: 69, batch: 800 // loss: 0.042 epoch: 69, batch: 900 // loss: 0.049 epoch: 69, batch: 1000 // loss: 0.046 epoch: 69, batch: 1100 // loss: 0.043 epoch: 69, batch: 1200 // loss: 0.044 epoch: 69, batch: 1300 // loss: 0.046 epoch: 69, batch: 1400 // loss: 0.043 epoch: 69, batch: 1500 // loss: 0.048 epoch: 69, batch: 1600 // loss: 0.052 epoch: 69, batch: 1700 // loss: 0.045 epoch: 69, batch: 1800 // loss: 0.053 epoch: 69, batch: 1900 // loss: 0.045 epoch: 69, batch: 2000 // loss: 0.047 epoch: 69, batch: 2100 // loss: 0.047 epoch: 69, batch: 2200 // loss: 0.050 epoch: 69, batch: 2300 // loss: 0.049 epoch: 69, batch: 2400 // loss: 0.042 epoch: 69, batch: 2500 // loss: 0.043 epoch: 69, batch: 2600 // loss: 0.045 epoch: 69, batch: 2700 // loss: 0.043 epoch: 69, batch: 2800 // loss: 0.046 epoch: 69, batch: 2900 // loss: 0.042 epoch: 69, batch: 3000 // loss: 0.046 epoch: 69, batch: 3100 // loss: 0.043 epoch: 69, batch: 3200 // loss: 0.039 epoch: 69, batch: 3300 // loss: 0.038 epoch: 69, batch: 3400 // loss: 0.044 epoch: 69, batch: 3500 // loss: 0.035 epoch: 69, batch: 3600 // loss: 0.043 epoch: 69, batch: 3700 // loss: 0.044 epoch: 70, batch: 0 // loss: 0.053 epoch: 70, batch: 100 // loss: 0.047 epoch: 70, batch: 200 // loss: 0.043 epoch: 70, batch: 300 // loss: 0.049 epoch: 70, batch: 400 // loss: 0.046 epoch: 70, batch: 500 // loss: 0.040 epoch: 70, batch: 600 // loss: 0.041 epoch: 70, batch: 700 // loss: 0.044 epoch: 70, batch: 800 // loss: 0.042 epoch: 70, batch: 900 // loss: 0.049 epoch: 70, batch: 1000 // loss: 0.046 epoch: 70, batch: 1100 // loss: 0.043 epoch: 70, batch: 1200 // loss: 0.044 epoch: 70, batch: 1300 // loss: 0.046 epoch: 70, batch: 1400 // loss: 0.043 epoch: 70, batch: 1500 // loss: 0.048 epoch: 70, batch: 1600 // loss: 0.052 epoch: 70, batch: 1700 // loss: 0.045 epoch: 70, batch: 1800 // loss: 0.053 epoch: 70, batch: 1900 // loss: 0.045 epoch: 70, batch: 2000 // loss: 0.047 epoch: 70, batch: 2100 // loss: 0.047 epoch: 70, batch: 2200 // loss: 0.050 epoch: 70, batch: 2300 // loss: 0.049 epoch: 70, batch: 2400 // loss: 0.042 epoch: 70, batch: 2500 // loss: 0.043 epoch: 70, batch: 2600 // loss: 0.045 epoch: 70, batch: 2700 // loss: 0.043 epoch: 70, batch: 2800 // loss: 0.046 epoch: 70, batch: 2900 // loss: 0.042 epoch: 70, batch: 3000 // loss: 0.046 epoch: 70, batch: 3100 // loss: 0.043 epoch: 70, batch: 3200 // loss: 0.039 epoch: 70, batch: 3300 // loss: 0.038 epoch: 70, batch: 3400 // loss: 0.044 epoch: 70, batch: 3500 // loss: 0.035 epoch: 70, batch: 3600 // loss: 0.043 epoch: 70, batch: 3700 // loss: 0.044 epoch: 71, batch: 0 // loss: 0.053 epoch: 71, batch: 100 // loss: 0.046 epoch: 71, batch: 200 // loss: 0.043 epoch: 71, batch: 300 // loss: 0.049 epoch: 71, batch: 400 // loss: 0.046 epoch: 71, batch: 500 // loss: 0.040 epoch: 71, batch: 600 // loss: 0.041 epoch: 71, batch: 700 // loss: 0.044 epoch: 71, batch: 800 // loss: 0.042 epoch: 71, batch: 900 // loss: 0.049 epoch: 71, batch: 1000 // loss: 0.046 epoch: 71, batch: 1100 // loss: 0.043 epoch: 71, batch: 1200 // loss: 0.044 epoch: 71, batch: 1300 // loss: 0.046 epoch: 71, batch: 1400 // loss: 0.043 epoch: 71, batch: 1500 // loss: 0.048 epoch: 71, batch: 1600 // loss: 0.052 epoch: 71, batch: 1700 // loss: 0.045 epoch: 71, batch: 1800 // loss: 0.053 epoch: 71, batch: 1900 // loss: 0.045 epoch: 71, batch: 2000 // loss: 0.047 epoch: 71, batch: 2100 // loss: 0.047 epoch: 71, batch: 2200 // loss: 0.050 epoch: 71, batch: 2300 // loss: 0.049 epoch: 71, batch: 2400 // loss: 0.042 epoch: 71, batch: 2500 // loss: 0.043 epoch: 71, batch: 2600 // loss: 0.045 epoch: 71, batch: 2700 // loss: 0.043 epoch: 71, batch: 2800 // loss: 0.046 epoch: 71, batch: 2900 // loss: 0.042 epoch: 71, batch: 3000 // loss: 0.046 epoch: 71, batch: 3100 // loss: 0.043 epoch: 71, batch: 3200 // loss: 0.039 epoch: 71, batch: 3300 // loss: 0.038 epoch: 71, batch: 3400 // loss: 0.044 epoch: 71, batch: 3500 // loss: 0.035 epoch: 71, batch: 3600 // loss: 0.043 epoch: 71, batch: 3700 // loss: 0.043 epoch: 72, batch: 0 // loss: 0.053 epoch: 72, batch: 100 // loss: 0.046 epoch: 72, batch: 200 // loss: 0.043 epoch: 72, batch: 300 // loss: 0.049 epoch: 72, batch: 400 // loss: 0.046 epoch: 72, batch: 500 // loss: 0.040 epoch: 72, batch: 600 // loss: 0.041 epoch: 72, batch: 700 // loss: 0.044 epoch: 72, batch: 800 // loss: 0.042 epoch: 72, batch: 900 // loss: 0.049 epoch: 72, batch: 1000 // loss: 0.046 epoch: 72, batch: 1100 // loss: 0.043 epoch: 72, batch: 1200 // loss: 0.044 epoch: 72, batch: 1300 // loss: 0.046 epoch: 72, batch: 1400 // loss: 0.043 epoch: 72, batch: 1500 // loss: 0.048 epoch: 72, batch: 1600 // loss: 0.052 epoch: 72, batch: 1700 // loss: 0.045 epoch: 72, batch: 1800 // loss: 0.053 epoch: 72, batch: 1900 // loss: 0.045 epoch: 72, batch: 2000 // loss: 0.047 epoch: 72, batch: 2100 // loss: 0.047 epoch: 72, batch: 2200 // loss: 0.050 epoch: 72, batch: 2300 // loss: 0.049 epoch: 72, batch: 2400 // loss: 0.042 epoch: 72, batch: 2500 // loss: 0.043 epoch: 72, batch: 2600 // loss: 0.045 epoch: 72, batch: 2700 // loss: 0.043 epoch: 72, batch: 2800 // loss: 0.046 epoch: 72, batch: 2900 // loss: 0.042 epoch: 72, batch: 3000 // loss: 0.046 epoch: 72, batch: 3100 // loss: 0.043 epoch: 72, batch: 3200 // loss: 0.039 epoch: 72, batch: 3300 // loss: 0.038 epoch: 72, batch: 3400 // loss: 0.044 epoch: 72, batch: 3500 // loss: 0.035 epoch: 72, batch: 3600 // loss: 0.043 epoch: 72, batch: 3700 // loss: 0.043 epoch: 73, batch: 0 // loss: 0.053 epoch: 73, batch: 100 // loss: 0.046 epoch: 73, batch: 200 // loss: 0.043 epoch: 73, batch: 300 // loss: 0.049 epoch: 73, batch: 400 // loss: 0.046 epoch: 73, batch: 500 // loss: 0.040 epoch: 73, batch: 600 // loss: 0.040 epoch: 73, batch: 700 // loss: 0.044 epoch: 73, batch: 800 // loss: 0.042 epoch: 73, batch: 900 // loss: 0.049 epoch: 73, batch: 1000 // loss: 0.046 epoch: 73, batch: 1100 // loss: 0.043 epoch: 73, batch: 1200 // loss: 0.044 epoch: 73, batch: 1300 // loss: 0.046 epoch: 73, batch: 1400 // loss: 0.043 epoch: 73, batch: 1500 // loss: 0.048 epoch: 73, batch: 1600 // loss: 0.052 epoch: 73, batch: 1700 // loss: 0.045 epoch: 73, batch: 1800 // loss: 0.053 epoch: 73, batch: 1900 // loss: 0.045 epoch: 73, batch: 2000 // loss: 0.047 epoch: 73, batch: 2100 // loss: 0.047 epoch: 73, batch: 2200 // loss: 0.050 epoch: 73, batch: 2300 // loss: 0.049 epoch: 73, batch: 2400 // loss: 0.042 epoch: 73, batch: 2500 // loss: 0.042 epoch: 73, batch: 2600 // loss: 0.045 epoch: 73, batch: 2700 // loss: 0.043 epoch: 73, batch: 2800 // loss: 0.046 epoch: 73, batch: 2900 // loss: 0.042 epoch: 73, batch: 3000 // loss: 0.046 epoch: 73, batch: 3100 // loss: 0.043 epoch: 73, batch: 3200 // loss: 0.039 epoch: 73, batch: 3300 // loss: 0.038 epoch: 73, batch: 3400 // loss: 0.044 epoch: 73, batch: 3500 // loss: 0.035 epoch: 73, batch: 3600 // loss: 0.043 epoch: 73, batch: 3700 // loss: 0.043 epoch: 74, batch: 0 // loss: 0.053 epoch: 74, batch: 100 // loss: 0.046 epoch: 74, batch: 200 // loss: 0.043 epoch: 74, batch: 300 // loss: 0.049 epoch: 74, batch: 400 // loss: 0.046 epoch: 74, batch: 500 // loss: 0.040 epoch: 74, batch: 600 // loss: 0.040 epoch: 74, batch: 700 // loss: 0.044 epoch: 74, batch: 800 // loss: 0.042 epoch: 74, batch: 900 // loss: 0.049 epoch: 74, batch: 1000 // loss: 0.046 epoch: 74, batch: 1100 // loss: 0.043 epoch: 74, batch: 1200 // loss: 0.044 epoch: 74, batch: 1300 // loss: 0.046 epoch: 74, batch: 1400 // loss: 0.043 epoch: 74, batch: 1500 // loss: 0.048 epoch: 74, batch: 1600 // loss: 0.052 epoch: 74, batch: 1700 // loss: 0.045 epoch: 74, batch: 1800 // loss: 0.053 epoch: 74, batch: 1900 // loss: 0.045 epoch: 74, batch: 2000 // loss: 0.047 epoch: 74, batch: 2100 // loss: 0.047 epoch: 74, batch: 2200 // loss: 0.050 epoch: 74, batch: 2300 // loss: 0.049 epoch: 74, batch: 2400 // loss: 0.042 epoch: 74, batch: 2500 // loss: 0.042 epoch: 74, batch: 2600 // loss: 0.045 epoch: 74, batch: 2700 // loss: 0.043 epoch: 74, batch: 2800 // loss: 0.046 epoch: 74, batch: 2900 // loss: 0.042 epoch: 74, batch: 3000 // loss: 0.046 epoch: 74, batch: 3100 // loss: 0.043 epoch: 74, batch: 3200 // loss: 0.039 epoch: 74, batch: 3300 // loss: 0.038 epoch: 74, batch: 3400 // loss: 0.044 epoch: 74, batch: 3500 // loss: 0.035 epoch: 74, batch: 3600 // loss: 0.043 epoch: 74, batch: 3700 // loss: 0.043 epoch: 75, batch: 0 // loss: 0.053 epoch: 75, batch: 100 // loss: 0.046 epoch: 75, batch: 200 // loss: 0.043 epoch: 75, batch: 300 // loss: 0.049 epoch: 75, batch: 400 // loss: 0.046 epoch: 75, batch: 500 // loss: 0.040 epoch: 75, batch: 600 // loss: 0.040 epoch: 75, batch: 700 // loss: 0.044 epoch: 75, batch: 800 // loss: 0.042 epoch: 75, batch: 900 // loss: 0.049 epoch: 75, batch: 1000 // loss: 0.046 epoch: 75, batch: 1100 // loss: 0.043 epoch: 75, batch: 1200 // loss: 0.044 epoch: 75, batch: 1300 // loss: 0.046 epoch: 75, batch: 1400 // loss: 0.043 epoch: 75, batch: 1500 // loss: 0.048 epoch: 75, batch: 1600 // loss: 0.052 epoch: 75, batch: 1700 // loss: 0.045 epoch: 75, batch: 1800 // loss: 0.053 epoch: 75, batch: 1900 // loss: 0.045 epoch: 75, batch: 2000 // loss: 0.047 epoch: 75, batch: 2100 // loss: 0.047 epoch: 75, batch: 2200 // loss: 0.050 epoch: 75, batch: 2300 // loss: 0.049 epoch: 75, batch: 2400 // loss: 0.042 epoch: 75, batch: 2500 // loss: 0.042 epoch: 75, batch: 2600 // loss: 0.045 epoch: 75, batch: 2700 // loss: 0.043 epoch: 75, batch: 2800 // loss: 0.046 epoch: 75, batch: 2900 // loss: 0.042 epoch: 75, batch: 3000 // loss: 0.046 epoch: 75, batch: 3100 // loss: 0.043 epoch: 75, batch: 3200 // loss: 0.039 epoch: 75, batch: 3300 // loss: 0.038 epoch: 75, batch: 3400 // loss: 0.044 epoch: 75, batch: 3500 // loss: 0.035 epoch: 75, batch: 3600 // loss: 0.043 epoch: 75, batch: 3700 // loss: 0.043 epoch: 76, batch: 0 // loss: 0.053 epoch: 76, batch: 100 // loss: 0.046 epoch: 76, batch: 200 // loss: 0.043 epoch: 76, batch: 300 // loss: 0.049 epoch: 76, batch: 400 // loss: 0.046 epoch: 76, batch: 500 // loss: 0.040 epoch: 76, batch: 600 // loss: 0.040 epoch: 76, batch: 700 // loss: 0.044 epoch: 76, batch: 800 // loss: 0.042 epoch: 76, batch: 900 // loss: 0.049 epoch: 76, batch: 1000 // loss: 0.046 epoch: 76, batch: 1100 // loss: 0.043 epoch: 76, batch: 1200 // loss: 0.044 epoch: 76, batch: 1300 // loss: 0.046 epoch: 76, batch: 1400 // loss: 0.043 epoch: 76, batch: 1500 // loss: 0.048 epoch: 76, batch: 1600 // loss: 0.052 epoch: 76, batch: 1700 // loss: 0.045 epoch: 76, batch: 1800 // loss: 0.053 epoch: 76, batch: 1900 // loss: 0.045 epoch: 76, batch: 2000 // loss: 0.047 epoch: 76, batch: 2100 // loss: 0.047 epoch: 76, batch: 2200 // loss: 0.050 epoch: 76, batch: 2300 // loss: 0.049 epoch: 76, batch: 2400 // loss: 0.042 epoch: 76, batch: 2500 // loss: 0.042 epoch: 76, batch: 2600 // loss: 0.045 epoch: 76, batch: 2700 // loss: 0.043 epoch: 76, batch: 2800 // loss: 0.046 epoch: 76, batch: 2900 // loss: 0.042 epoch: 76, batch: 3000 // loss: 0.046 epoch: 76, batch: 3100 // loss: 0.043 epoch: 76, batch: 3200 // loss: 0.039 epoch: 76, batch: 3300 // loss: 0.038 epoch: 76, batch: 3400 // loss: 0.044 epoch: 76, batch: 3500 // loss: 0.035 epoch: 76, batch: 3600 // loss: 0.043 epoch: 76, batch: 3700 // loss: 0.043 epoch: 77, batch: 0 // loss: 0.053 epoch: 77, batch: 100 // loss: 0.046 epoch: 77, batch: 200 // loss: 0.043 epoch: 77, batch: 300 // loss: 0.049 epoch: 77, batch: 400 // loss: 0.046 epoch: 77, batch: 500 // loss: 0.040 epoch: 77, batch: 600 // loss: 0.040 epoch: 77, batch: 700 // loss: 0.044 epoch: 77, batch: 800 // loss: 0.042 epoch: 77, batch: 900 // loss: 0.049 epoch: 77, batch: 1000 // loss: 0.046 epoch: 77, batch: 1100 // loss: 0.043 epoch: 77, batch: 1200 // loss: 0.044 epoch: 77, batch: 1300 // loss: 0.046 epoch: 77, batch: 1400 // loss: 0.043 epoch: 77, batch: 1500 // loss: 0.048 epoch: 77, batch: 1600 // loss: 0.052 epoch: 77, batch: 1700 // loss: 0.045 epoch: 77, batch: 1800 // loss: 0.053 epoch: 77, batch: 1900 // loss: 0.045 epoch: 77, batch: 2000 // loss: 0.047 epoch: 77, batch: 2100 // loss: 0.047 epoch: 77, batch: 2200 // loss: 0.050 epoch: 77, batch: 2300 // loss: 0.049 epoch: 77, batch: 2400 // loss: 0.042 epoch: 77, batch: 2500 // loss: 0.042 epoch: 77, batch: 2600 // loss: 0.045 epoch: 77, batch: 2700 // loss: 0.043 epoch: 77, batch: 2800 // loss: 0.046 epoch: 77, batch: 2900 // loss: 0.042 epoch: 77, batch: 3000 // loss: 0.045 epoch: 77, batch: 3100 // loss: 0.043 epoch: 77, batch: 3200 // loss: 0.039 epoch: 77, batch: 3300 // loss: 0.038 epoch: 77, batch: 3400 // loss: 0.044 epoch: 77, batch: 3500 // loss: 0.035 epoch: 77, batch: 3600 // loss: 0.043 epoch: 77, batch: 3700 // loss: 0.043 epoch: 78, batch: 0 // loss: 0.053 epoch: 78, batch: 100 // loss: 0.046 epoch: 78, batch: 200 // loss: 0.043 epoch: 78, batch: 300 // loss: 0.049 epoch: 78, batch: 400 // loss: 0.046 epoch: 78, batch: 500 // loss: 0.040 epoch: 78, batch: 600 // loss: 0.040 epoch: 78, batch: 700 // loss: 0.044 epoch: 78, batch: 800 // loss: 0.042 epoch: 78, batch: 900 // loss: 0.049 epoch: 78, batch: 1000 // loss: 0.046 epoch: 78, batch: 1100 // loss: 0.043 epoch: 78, batch: 1200 // loss: 0.044 epoch: 78, batch: 1300 // loss: 0.046 epoch: 78, batch: 1400 // loss: 0.043 epoch: 78, batch: 1500 // loss: 0.048 epoch: 78, batch: 1600 // loss: 0.052 epoch: 78, batch: 1700 // loss: 0.045 epoch: 78, batch: 1800 // loss: 0.053 epoch: 78, batch: 1900 // loss: 0.045 epoch: 78, batch: 2000 // loss: 0.047 epoch: 78, batch: 2100 // loss: 0.047 epoch: 78, batch: 2200 // loss: 0.050 epoch: 78, batch: 2300 // loss: 0.049 epoch: 78, batch: 2400 // loss: 0.042 epoch: 78, batch: 2500 // loss: 0.042 epoch: 78, batch: 2600 // loss: 0.045 epoch: 78, batch: 2700 // loss: 0.043 epoch: 78, batch: 2800 // loss: 0.046 epoch: 78, batch: 2900 // loss: 0.042 epoch: 78, batch: 3000 // loss: 0.045 epoch: 78, batch: 3100 // loss: 0.043 epoch: 78, batch: 3200 // loss: 0.039 epoch: 78, batch: 3300 // loss: 0.038 epoch: 78, batch: 3400 // loss: 0.044 epoch: 78, batch: 3500 // loss: 0.035 epoch: 78, batch: 3600 // loss: 0.043 epoch: 78, batch: 3700 // loss: 0.043 epoch: 79, batch: 0 // loss: 0.053 epoch: 79, batch: 100 // loss: 0.046 epoch: 79, batch: 200 // loss: 0.043 epoch: 79, batch: 300 // loss: 0.049 epoch: 79, batch: 400 // loss: 0.046 epoch: 79, batch: 500 // loss: 0.040 epoch: 79, batch: 600 // loss: 0.040 epoch: 79, batch: 700 // loss: 0.044 epoch: 79, batch: 800 // loss: 0.042 epoch: 79, batch: 900 // loss: 0.049 epoch: 79, batch: 1000 // loss: 0.046 epoch: 79, batch: 1100 // loss: 0.043 epoch: 79, batch: 1200 // loss: 0.044 epoch: 79, batch: 1300 // loss: 0.046 epoch: 79, batch: 1400 // loss: 0.043 epoch: 79, batch: 1500 // loss: 0.048 epoch: 79, batch: 1600 // loss: 0.052 epoch: 79, batch: 1700 // loss: 0.045 epoch: 79, batch: 1800 // loss: 0.053 epoch: 79, batch: 1900 // loss: 0.045 epoch: 79, batch: 2000 // loss: 0.047 epoch: 79, batch: 2100 // loss: 0.047 epoch: 79, batch: 2200 // loss: 0.050 epoch: 79, batch: 2300 // loss: 0.049 epoch: 79, batch: 2400 // loss: 0.042 epoch: 79, batch: 2500 // loss: 0.042 epoch: 79, batch: 2600 // loss: 0.045 epoch: 79, batch: 2700 // loss: 0.043 epoch: 79, batch: 2800 // loss: 0.046 epoch: 79, batch: 2900 // loss: 0.042 epoch: 79, batch: 3000 // loss: 0.045 epoch: 79, batch: 3100 // loss: 0.043 epoch: 79, batch: 3200 // loss: 0.039 epoch: 79, batch: 3300 // loss: 0.038 epoch: 79, batch: 3400 // loss: 0.044 epoch: 79, batch: 3500 // loss: 0.035 epoch: 79, batch: 3600 // loss: 0.043 epoch: 79, batch: 3700 // loss: 0.043 epoch: 80, batch: 0 // loss: 0.053 epoch: 80, batch: 100 // loss: 0.046 epoch: 80, batch: 200 // loss: 0.043 epoch: 80, batch: 300 // loss: 0.049 epoch: 80, batch: 400 // loss: 0.046 epoch: 80, batch: 500 // loss: 0.040 epoch: 80, batch: 600 // loss: 0.040 epoch: 80, batch: 700 // loss: 0.044 epoch: 80, batch: 800 // loss: 0.042 epoch: 80, batch: 900 // loss: 0.049 epoch: 80, batch: 1000 // loss: 0.046 epoch: 80, batch: 1100 // loss: 0.043 epoch: 80, batch: 1200 // loss: 0.044 epoch: 80, batch: 1300 // loss: 0.046 epoch: 80, batch: 1400 // loss: 0.043 epoch: 80, batch: 1500 // loss: 0.048 epoch: 80, batch: 1600 // loss: 0.052 epoch: 80, batch: 1700 // loss: 0.045 epoch: 80, batch: 1800 // loss: 0.053 epoch: 80, batch: 1900 // loss: 0.045 epoch: 80, batch: 2000 // loss: 0.047 epoch: 80, batch: 2100 // loss: 0.047 epoch: 80, batch: 2200 // loss: 0.050 epoch: 80, batch: 2300 // loss: 0.049 epoch: 80, batch: 2400 // loss: 0.042 epoch: 80, batch: 2500 // loss: 0.042 epoch: 80, batch: 2600 // loss: 0.045 epoch: 80, batch: 2700 // loss: 0.043 epoch: 80, batch: 2800 // loss: 0.046 epoch: 80, batch: 2900 // loss: 0.042 epoch: 80, batch: 3000 // loss: 0.045 epoch: 80, batch: 3100 // loss: 0.043 epoch: 80, batch: 3200 // loss: 0.039 epoch: 80, batch: 3300 // loss: 0.038 epoch: 80, batch: 3400 // loss: 0.044 epoch: 80, batch: 3500 // loss: 0.035 epoch: 80, batch: 3600 // loss: 0.043 epoch: 80, batch: 3700 // loss: 0.043 epoch: 81, batch: 0 // loss: 0.053 epoch: 81, batch: 100 // loss: 0.046 epoch: 81, batch: 200 // loss: 0.043 epoch: 81, batch: 300 // loss: 0.049 epoch: 81, batch: 400 // loss: 0.046 epoch: 81, batch: 500 // loss: 0.040 epoch: 81, batch: 600 // loss: 0.040 epoch: 81, batch: 700 // loss: 0.044 epoch: 81, batch: 800 // loss: 0.042 epoch: 81, batch: 900 // loss: 0.049 epoch: 81, batch: 1000 // loss: 0.046 epoch: 81, batch: 1100 // loss: 0.043 epoch: 81, batch: 1200 // loss: 0.044 epoch: 81, batch: 1300 // loss: 0.046 epoch: 81, batch: 1400 // loss: 0.043 epoch: 81, batch: 1500 // loss: 0.048 epoch: 81, batch: 1600 // loss: 0.052 epoch: 81, batch: 1700 // loss: 0.045 epoch: 81, batch: 1800 // loss: 0.053 epoch: 81, batch: 1900 // loss: 0.045 epoch: 81, batch: 2000 // loss: 0.047 epoch: 81, batch: 2100 // loss: 0.047 epoch: 81, batch: 2200 // loss: 0.050 epoch: 81, batch: 2300 // loss: 0.049 epoch: 81, batch: 2400 // loss: 0.042 epoch: 81, batch: 2500 // loss: 0.042 epoch: 81, batch: 2600 // loss: 0.045 epoch: 81, batch: 2700 // loss: 0.042 epoch: 81, batch: 2800 // loss: 0.046 epoch: 81, batch: 2900 // loss: 0.042 epoch: 81, batch: 3000 // loss: 0.045 epoch: 81, batch: 3100 // loss: 0.043 epoch: 81, batch: 3200 // loss: 0.039 epoch: 81, batch: 3300 // loss: 0.038 epoch: 81, batch: 3400 // loss: 0.044 epoch: 81, batch: 3500 // loss: 0.035 epoch: 81, batch: 3600 // loss: 0.043 epoch: 81, batch: 3700 // loss: 0.043 epoch: 82, batch: 0 // loss: 0.053 epoch: 82, batch: 100 // loss: 0.046 epoch: 82, batch: 200 // loss: 0.043 epoch: 82, batch: 300 // loss: 0.049 epoch: 82, batch: 400 // loss: 0.046 epoch: 82, batch: 500 // loss: 0.040 epoch: 82, batch: 600 // loss: 0.040 epoch: 82, batch: 700 // loss: 0.044 epoch: 82, batch: 800 // loss: 0.042 epoch: 82, batch: 900 // loss: 0.049 epoch: 82, batch: 1000 // loss: 0.046 epoch: 82, batch: 1100 // loss: 0.043 epoch: 82, batch: 1200 // loss: 0.044 epoch: 82, batch: 1300 // loss: 0.046 epoch: 82, batch: 1400 // loss: 0.043 epoch: 82, batch: 1500 // loss: 0.048 epoch: 82, batch: 1600 // loss: 0.052 epoch: 82, batch: 1700 // loss: 0.045 epoch: 82, batch: 1800 // loss: 0.053 epoch: 82, batch: 1900 // loss: 0.045 epoch: 82, batch: 2000 // loss: 0.047 epoch: 82, batch: 2100 // loss: 0.047 epoch: 82, batch: 2200 // loss: 0.050 epoch: 82, batch: 2300 // loss: 0.049 epoch: 82, batch: 2400 // loss: 0.042 epoch: 82, batch: 2500 // loss: 0.042 epoch: 82, batch: 2600 // loss: 0.045 epoch: 82, batch: 2700 // loss: 0.042 epoch: 82, batch: 2800 // loss: 0.046 epoch: 82, batch: 2900 // loss: 0.042 epoch: 82, batch: 3000 // loss: 0.045 epoch: 82, batch: 3100 // loss: 0.043 epoch: 82, batch: 3200 // loss: 0.039 epoch: 82, batch: 3300 // loss: 0.038 epoch: 82, batch: 3400 // loss: 0.044 epoch: 82, batch: 3500 // loss: 0.035 epoch: 82, batch: 3600 // loss: 0.043 epoch: 82, batch: 3700 // loss: 0.043 epoch: 83, batch: 0 // loss: 0.053 epoch: 83, batch: 100 // loss: 0.046 epoch: 83, batch: 200 // loss: 0.043 epoch: 83, batch: 300 // loss: 0.049 epoch: 83, batch: 400 // loss: 0.046 epoch: 83, batch: 500 // loss: 0.040 epoch: 83, batch: 600 // loss: 0.040 epoch: 83, batch: 700 // loss: 0.044 epoch: 83, batch: 800 // loss: 0.042 epoch: 83, batch: 900 // loss: 0.049 epoch: 83, batch: 1000 // loss: 0.046 epoch: 83, batch: 1100 // loss: 0.042 epoch: 83, batch: 1200 // loss: 0.044 epoch: 83, batch: 1300 // loss: 0.046 epoch: 83, batch: 1400 // loss: 0.043 epoch: 83, batch: 1500 // loss: 0.048 epoch: 83, batch: 1600 // loss: 0.052 epoch: 83, batch: 1700 // loss: 0.045 epoch: 83, batch: 1800 // loss: 0.053 epoch: 83, batch: 1900 // loss: 0.045 epoch: 83, batch: 2000 // loss: 0.047 epoch: 83, batch: 2100 // loss: 0.047 epoch: 83, batch: 2200 // loss: 0.050 epoch: 83, batch: 2300 // loss: 0.049 epoch: 83, batch: 2400 // loss: 0.042 epoch: 83, batch: 2500 // loss: 0.042 epoch: 83, batch: 2600 // loss: 0.045 epoch: 83, batch: 2700 // loss: 0.042 epoch: 83, batch: 2800 // loss: 0.046 epoch: 83, batch: 2900 // loss: 0.042 epoch: 83, batch: 3000 // loss: 0.045 epoch: 83, batch: 3100 // loss: 0.043 epoch: 83, batch: 3200 // loss: 0.039 epoch: 83, batch: 3300 // loss: 0.038 epoch: 83, batch: 3400 // loss: 0.044 epoch: 83, batch: 3500 // loss: 0.035 epoch: 83, batch: 3600 // loss: 0.043 epoch: 83, batch: 3700 // loss: 0.043 epoch: 84, batch: 0 // loss: 0.053 epoch: 84, batch: 100 // loss: 0.046 epoch: 84, batch: 200 // loss: 0.043 epoch: 84, batch: 300 // loss: 0.049 epoch: 84, batch: 400 // loss: 0.046 epoch: 84, batch: 500 // loss: 0.040 epoch: 84, batch: 600 // loss: 0.040 epoch: 84, batch: 700 // loss: 0.044 epoch: 84, batch: 800 // loss: 0.042 epoch: 84, batch: 900 // loss: 0.049 epoch: 84, batch: 1000 // loss: 0.046 epoch: 84, batch: 1100 // loss: 0.042 epoch: 84, batch: 1200 // loss: 0.044 epoch: 84, batch: 1300 // loss: 0.046 epoch: 84, batch: 1400 // loss: 0.043 epoch: 84, batch: 1500 // loss: 0.048 epoch: 84, batch: 1600 // loss: 0.052 epoch: 84, batch: 1700 // loss: 0.045 epoch: 84, batch: 1800 // loss: 0.053 epoch: 84, batch: 1900 // loss: 0.045 epoch: 84, batch: 2000 // loss: 0.047 epoch: 84, batch: 2100 // loss: 0.047 epoch: 84, batch: 2200 // loss: 0.050 epoch: 84, batch: 2300 // loss: 0.049 epoch: 84, batch: 2400 // loss: 0.042 epoch: 84, batch: 2500 // loss: 0.042 epoch: 84, batch: 2600 // loss: 0.045 epoch: 84, batch: 2700 // loss: 0.042 epoch: 84, batch: 2800 // loss: 0.046 epoch: 84, batch: 2900 // loss: 0.042 epoch: 84, batch: 3000 // loss: 0.045 epoch: 84, batch: 3100 // loss: 0.043 epoch: 84, batch: 3200 // loss: 0.039 epoch: 84, batch: 3300 // loss: 0.038 epoch: 84, batch: 3400 // loss: 0.044 epoch: 84, batch: 3500 // loss: 0.035 epoch: 84, batch: 3600 // loss: 0.043 epoch: 84, batch: 3700 // loss: 0.043 epoch: 85, batch: 0 // loss: 0.053 epoch: 85, batch: 100 // loss: 0.046 epoch: 85, batch: 200 // loss: 0.043 epoch: 85, batch: 300 // loss: 0.049 epoch: 85, batch: 400 // loss: 0.046 epoch: 85, batch: 500 // loss: 0.040 epoch: 85, batch: 600 // loss: 0.040 epoch: 85, batch: 700 // loss: 0.044 epoch: 85, batch: 800 // loss: 0.042 epoch: 85, batch: 900 // loss: 0.049 epoch: 85, batch: 1000 // loss: 0.046 epoch: 85, batch: 1100 // loss: 0.042 epoch: 85, batch: 1200 // loss: 0.044 epoch: 85, batch: 1300 // loss: 0.046 epoch: 85, batch: 1400 // loss: 0.043 epoch: 85, batch: 1500 // loss: 0.048 epoch: 85, batch: 1600 // loss: 0.052 epoch: 85, batch: 1700 // loss: 0.045 epoch: 85, batch: 1800 // loss: 0.053 epoch: 85, batch: 1900 // loss: 0.045 epoch: 85, batch: 2000 // loss: 0.047 epoch: 85, batch: 2100 // loss: 0.046 epoch: 85, batch: 2200 // loss: 0.050 epoch: 85, batch: 2300 // loss: 0.049 epoch: 85, batch: 2400 // loss: 0.042 epoch: 85, batch: 2500 // loss: 0.042 epoch: 85, batch: 2600 // loss: 0.045 epoch: 85, batch: 2700 // loss: 0.042 epoch: 85, batch: 2800 // loss: 0.046 epoch: 85, batch: 2900 // loss: 0.042 epoch: 85, batch: 3000 // loss: 0.045 epoch: 85, batch: 3100 // loss: 0.043 epoch: 85, batch: 3200 // loss: 0.039 epoch: 85, batch: 3300 // loss: 0.038 epoch: 85, batch: 3400 // loss: 0.044 epoch: 85, batch: 3500 // loss: 0.035 epoch: 85, batch: 3600 // loss: 0.043 epoch: 85, batch: 3700 // loss: 0.043 epoch: 86, batch: 0 // loss: 0.053 epoch: 86, batch: 100 // loss: 0.046 epoch: 86, batch: 200 // loss: 0.043 epoch: 86, batch: 300 // loss: 0.049 epoch: 86, batch: 400 // loss: 0.046 epoch: 86, batch: 500 // loss: 0.040 epoch: 86, batch: 600 // loss: 0.040 epoch: 86, batch: 700 // loss: 0.044 epoch: 86, batch: 800 // loss: 0.042 epoch: 86, batch: 900 // loss: 0.049 epoch: 86, batch: 1000 // loss: 0.046 epoch: 86, batch: 1100 // loss: 0.042 epoch: 86, batch: 1200 // loss: 0.044 epoch: 86, batch: 1300 // loss: 0.046 epoch: 86, batch: 1400 // loss: 0.043 epoch: 86, batch: 1500 // loss: 0.048 epoch: 86, batch: 1600 // loss: 0.052 epoch: 86, batch: 1700 // loss: 0.045 epoch: 86, batch: 1800 // loss: 0.053 epoch: 86, batch: 1900 // loss: 0.045 epoch: 86, batch: 2000 // loss: 0.047 epoch: 86, batch: 2100 // loss: 0.046 epoch: 86, batch: 2200 // loss: 0.050 epoch: 86, batch: 2300 // loss: 0.049 epoch: 86, batch: 2400 // loss: 0.042 epoch: 86, batch: 2500 // loss: 0.042 epoch: 86, batch: 2600 // loss: 0.045 epoch: 86, batch: 2700 // loss: 0.042 epoch: 86, batch: 2800 // loss: 0.046 epoch: 86, batch: 2900 // loss: 0.042 epoch: 86, batch: 3000 // loss: 0.045 epoch: 86, batch: 3100 // loss: 0.043 epoch: 86, batch: 3200 // loss: 0.039 epoch: 86, batch: 3300 // loss: 0.038 epoch: 86, batch: 3400 // loss: 0.044 epoch: 86, batch: 3500 // loss: 0.035 epoch: 86, batch: 3600 // loss: 0.043 epoch: 86, batch: 3700 // loss: 0.043 epoch: 87, batch: 0 // loss: 0.053 epoch: 87, batch: 100 // loss: 0.046 epoch: 87, batch: 200 // loss: 0.043 epoch: 87, batch: 300 // loss: 0.049 epoch: 87, batch: 400 // loss: 0.046 epoch: 87, batch: 500 // loss: 0.040 epoch: 87, batch: 600 // loss: 0.040 epoch: 87, batch: 700 // loss: 0.044 epoch: 87, batch: 800 // loss: 0.042 epoch: 87, batch: 900 // loss: 0.049 epoch: 87, batch: 1000 // loss: 0.046 epoch: 87, batch: 1100 // loss: 0.042 epoch: 87, batch: 1200 // loss: 0.044 epoch: 87, batch: 1300 // loss: 0.046 epoch: 87, batch: 1400 // loss: 0.043 epoch: 87, batch: 1500 // loss: 0.048 epoch: 87, batch: 1600 // loss: 0.052 epoch: 87, batch: 1700 // loss: 0.045 epoch: 87, batch: 1800 // loss: 0.053 epoch: 87, batch: 1900 // loss: 0.045 epoch: 87, batch: 2000 // loss: 0.047 epoch: 87, batch: 2100 // loss: 0.046 epoch: 87, batch: 2200 // loss: 0.050 epoch: 87, batch: 2300 // loss: 0.049 epoch: 87, batch: 2400 // loss: 0.042 epoch: 87, batch: 2500 // loss: 0.042 epoch: 87, batch: 2600 // loss: 0.045 epoch: 87, batch: 2700 // loss: 0.042 epoch: 87, batch: 2800 // loss: 0.046 epoch: 87, batch: 2900 // loss: 0.042 epoch: 87, batch: 3000 // loss: 0.045 epoch: 87, batch: 3100 // loss: 0.043 epoch: 87, batch: 3200 // loss: 0.039 epoch: 87, batch: 3300 // loss: 0.038 epoch: 87, batch: 3400 // loss: 0.044 epoch: 87, batch: 3500 // loss: 0.035 epoch: 87, batch: 3600 // loss: 0.043 epoch: 87, batch: 3700 // loss: 0.043 epoch: 88, batch: 0 // loss: 0.053 epoch: 88, batch: 100 // loss: 0.046 epoch: 88, batch: 200 // loss: 0.043 epoch: 88, batch: 300 // loss: 0.049 epoch: 88, batch: 400 // loss: 0.046 epoch: 88, batch: 500 // loss: 0.040 epoch: 88, batch: 600 // loss: 0.040 epoch: 88, batch: 700 // loss: 0.044 epoch: 88, batch: 800 // loss: 0.042 epoch: 88, batch: 900 // loss: 0.049 epoch: 88, batch: 1000 // loss: 0.046 epoch: 88, batch: 1100 // loss: 0.042 epoch: 88, batch: 1200 // loss: 0.044 epoch: 88, batch: 1300 // loss: 0.046 epoch: 88, batch: 1400 // loss: 0.043 epoch: 88, batch: 1500 // loss: 0.048 epoch: 88, batch: 1600 // loss: 0.052 epoch: 88, batch: 1700 // loss: 0.045 epoch: 88, batch: 1800 // loss: 0.053 epoch: 88, batch: 1900 // loss: 0.045 epoch: 88, batch: 2000 // loss: 0.047 epoch: 88, batch: 2100 // loss: 0.046 epoch: 88, batch: 2200 // loss: 0.050 epoch: 88, batch: 2300 // loss: 0.049 epoch: 88, batch: 2400 // loss: 0.042 epoch: 88, batch: 2500 // loss: 0.042 epoch: 88, batch: 2600 // loss: 0.045 epoch: 88, batch: 2700 // loss: 0.042 epoch: 88, batch: 2800 // loss: 0.046 epoch: 88, batch: 2900 // loss: 0.042 epoch: 88, batch: 3000 // loss: 0.045 epoch: 88, batch: 3100 // loss: 0.043 epoch: 88, batch: 3200 // loss: 0.039 epoch: 88, batch: 3300 // loss: 0.038 epoch: 88, batch: 3400 // loss: 0.044 epoch: 88, batch: 3500 // loss: 0.035 epoch: 88, batch: 3600 // loss: 0.043 epoch: 88, batch: 3700 // loss: 0.043 epoch: 89, batch: 0 // loss: 0.053 epoch: 89, batch: 100 // loss: 0.046 epoch: 89, batch: 200 // loss: 0.043 epoch: 89, batch: 300 // loss: 0.049 epoch: 89, batch: 400 // loss: 0.046 epoch: 89, batch: 500 // loss: 0.040 epoch: 89, batch: 600 // loss: 0.040 epoch: 89, batch: 700 // loss: 0.044 epoch: 89, batch: 800 // loss: 0.042 epoch: 89, batch: 900 // loss: 0.049 epoch: 89, batch: 1000 // loss: 0.046 epoch: 89, batch: 1100 // loss: 0.042 epoch: 89, batch: 1200 // loss: 0.044 epoch: 89, batch: 1300 // loss: 0.046 epoch: 89, batch: 1400 // loss: 0.043 epoch: 89, batch: 1500 // loss: 0.048 epoch: 89, batch: 1600 // loss: 0.052 epoch: 89, batch: 1700 // loss: 0.045 epoch: 89, batch: 1800 // loss: 0.053 epoch: 89, batch: 1900 // loss: 0.045 epoch: 89, batch: 2000 // loss: 0.047 epoch: 89, batch: 2100 // loss: 0.046 epoch: 89, batch: 2200 // loss: 0.050 epoch: 89, batch: 2300 // loss: 0.049 epoch: 89, batch: 2400 // loss: 0.042 epoch: 89, batch: 2500 // loss: 0.042 epoch: 89, batch: 2600 // loss: 0.045 epoch: 89, batch: 2700 // loss: 0.042 epoch: 89, batch: 2800 // loss: 0.046 epoch: 89, batch: 2900 // loss: 0.042 epoch: 89, batch: 3000 // loss: 0.045 epoch: 89, batch: 3100 // loss: 0.043 epoch: 89, batch: 3200 // loss: 0.039 epoch: 89, batch: 3300 // loss: 0.038 epoch: 89, batch: 3400 // loss: 0.044 epoch: 89, batch: 3500 // loss: 0.035 epoch: 89, batch: 3600 // loss: 0.043 epoch: 89, batch: 3700 // loss: 0.043 epoch: 90, batch: 0 // loss: 0.053 epoch: 90, batch: 100 // loss: 0.046 epoch: 90, batch: 200 // loss: 0.043 epoch: 90, batch: 300 // loss: 0.049 epoch: 90, batch: 400 // loss: 0.046 epoch: 90, batch: 500 // loss: 0.040 epoch: 90, batch: 600 // loss: 0.040 epoch: 90, batch: 700 // loss: 0.044 epoch: 90, batch: 800 // loss: 0.042 epoch: 90, batch: 900 // loss: 0.049 epoch: 90, batch: 1000 // loss: 0.046 epoch: 90, batch: 1100 // loss: 0.042 epoch: 90, batch: 1200 // loss: 0.044 epoch: 90, batch: 1300 // loss: 0.046 epoch: 90, batch: 1400 // loss: 0.043 epoch: 90, batch: 1500 // loss: 0.048 epoch: 90, batch: 1600 // loss: 0.052 epoch: 90, batch: 1700 // loss: 0.045 epoch: 90, batch: 1800 // loss: 0.053 epoch: 90, batch: 1900 // loss: 0.045 epoch: 90, batch: 2000 // loss: 0.047 epoch: 90, batch: 2100 // loss: 0.046 epoch: 90, batch: 2200 // loss: 0.050 epoch: 90, batch: 2300 // loss: 0.049 epoch: 90, batch: 2400 // loss: 0.042 epoch: 90, batch: 2500 // loss: 0.042 epoch: 90, batch: 2600 // loss: 0.045 epoch: 90, batch: 2700 // loss: 0.042 epoch: 90, batch: 2800 // loss: 0.046 epoch: 90, batch: 2900 // loss: 0.042 epoch: 90, batch: 3000 // loss: 0.045 epoch: 90, batch: 3100 // loss: 0.043 epoch: 90, batch: 3200 // loss: 0.039 epoch: 90, batch: 3300 // loss: 0.038 epoch: 90, batch: 3400 // loss: 0.044 epoch: 90, batch: 3500 // loss: 0.035 epoch: 90, batch: 3600 // loss: 0.043 epoch: 90, batch: 3700 // loss: 0.043 epoch: 91, batch: 0 // loss: 0.053 epoch: 91, batch: 100 // loss: 0.046 epoch: 91, batch: 200 // loss: 0.043 epoch: 91, batch: 300 // loss: 0.049 epoch: 91, batch: 400 // loss: 0.046 epoch: 91, batch: 500 // loss: 0.040 epoch: 91, batch: 600 // loss: 0.040 epoch: 91, batch: 700 // loss: 0.044 epoch: 91, batch: 800 // loss: 0.042 epoch: 91, batch: 900 // loss: 0.049 epoch: 91, batch: 1000 // loss: 0.046 epoch: 91, batch: 1100 // loss: 0.042 epoch: 91, batch: 1200 // loss: 0.044 epoch: 91, batch: 1300 // loss: 0.046 epoch: 91, batch: 1400 // loss: 0.043 epoch: 91, batch: 1500 // loss: 0.048 epoch: 91, batch: 1600 // loss: 0.052 epoch: 91, batch: 1700 // loss: 0.045 epoch: 91, batch: 1800 // loss: 0.053 epoch: 91, batch: 1900 // loss: 0.045 epoch: 91, batch: 2000 // loss: 0.047 epoch: 91, batch: 2100 // loss: 0.046 epoch: 91, batch: 2200 // loss: 0.050 epoch: 91, batch: 2300 // loss: 0.049 epoch: 91, batch: 2400 // loss: 0.042 epoch: 91, batch: 2500 // loss: 0.042 epoch: 91, batch: 2600 // loss: 0.045 epoch: 91, batch: 2700 // loss: 0.042 epoch: 91, batch: 2800 // loss: 0.046 epoch: 91, batch: 2900 // loss: 0.042 epoch: 91, batch: 3000 // loss: 0.045 epoch: 91, batch: 3100 // loss: 0.043 epoch: 91, batch: 3200 // loss: 0.039 epoch: 91, batch: 3300 // loss: 0.038 epoch: 91, batch: 3400 // loss: 0.044 epoch: 91, batch: 3500 // loss: 0.035 epoch: 91, batch: 3600 // loss: 0.043 epoch: 91, batch: 3700 // loss: 0.043 epoch: 92, batch: 0 // loss: 0.053 epoch: 92, batch: 100 // loss: 0.046 epoch: 92, batch: 200 // loss: 0.043 epoch: 92, batch: 300 // loss: 0.049 epoch: 92, batch: 400 // loss: 0.046 epoch: 92, batch: 500 // loss: 0.040 epoch: 92, batch: 600 // loss: 0.040 epoch: 92, batch: 700 // loss: 0.044 epoch: 92, batch: 800 // loss: 0.042 epoch: 92, batch: 900 // loss: 0.049 epoch: 92, batch: 1000 // loss: 0.046 epoch: 92, batch: 1100 // loss: 0.042 epoch: 92, batch: 1200 // loss: 0.044 epoch: 92, batch: 1300 // loss: 0.046 epoch: 92, batch: 1400 // loss: 0.043 epoch: 92, batch: 1500 // loss: 0.048 epoch: 92, batch: 1600 // loss: 0.052 epoch: 92, batch: 1700 // loss: 0.045 epoch: 92, batch: 1800 // loss: 0.053 epoch: 92, batch: 1900 // loss: 0.045 epoch: 92, batch: 2000 // loss: 0.047 epoch: 92, batch: 2100 // loss: 0.046 epoch: 92, batch: 2200 // loss: 0.050 epoch: 92, batch: 2300 // loss: 0.049 epoch: 92, batch: 2400 // loss: 0.042 epoch: 92, batch: 2500 // loss: 0.042 epoch: 92, batch: 2600 // loss: 0.045 epoch: 92, batch: 2700 // loss: 0.042 epoch: 92, batch: 2800 // loss: 0.046 epoch: 92, batch: 2900 // loss: 0.042 epoch: 92, batch: 3000 // loss: 0.045 epoch: 92, batch: 3100 // loss: 0.043 epoch: 92, batch: 3200 // loss: 0.039 epoch: 92, batch: 3300 // loss: 0.038 epoch: 92, batch: 3400 // loss: 0.044 epoch: 92, batch: 3500 // loss: 0.035 epoch: 92, batch: 3600 // loss: 0.043 epoch: 92, batch: 3700 // loss: 0.043 epoch: 93, batch: 0 // loss: 0.053 epoch: 93, batch: 100 // loss: 0.046 epoch: 93, batch: 200 // loss: 0.043 epoch: 93, batch: 300 // loss: 0.049 epoch: 93, batch: 400 // loss: 0.046 epoch: 93, batch: 500 // loss: 0.040 epoch: 93, batch: 600 // loss: 0.040 epoch: 93, batch: 700 // loss: 0.044 epoch: 93, batch: 800 // loss: 0.042 epoch: 93, batch: 900 // loss: 0.049 epoch: 93, batch: 1000 // loss: 0.046 epoch: 93, batch: 1100 // loss: 0.042 epoch: 93, batch: 1200 // loss: 0.044 epoch: 93, batch: 1300 // loss: 0.046 epoch: 93, batch: 1400 // loss: 0.043 epoch: 93, batch: 1500 // loss: 0.048 epoch: 93, batch: 1600 // loss: 0.052 epoch: 93, batch: 1700 // loss: 0.045 epoch: 93, batch: 1800 // loss: 0.053 epoch: 93, batch: 1900 // loss: 0.045 epoch: 93, batch: 2000 // loss: 0.047 epoch: 93, batch: 2100 // loss: 0.046 epoch: 93, batch: 2200 // loss: 0.050 epoch: 93, batch: 2300 // loss: 0.049 epoch: 93, batch: 2400 // loss: 0.042 epoch: 93, batch: 2500 // loss: 0.042 epoch: 93, batch: 2600 // loss: 0.045 epoch: 93, batch: 2700 // loss: 0.042 epoch: 93, batch: 2800 // loss: 0.046 epoch: 93, batch: 2900 // loss: 0.042 epoch: 93, batch: 3000 // loss: 0.045 epoch: 93, batch: 3100 // loss: 0.043 epoch: 93, batch: 3200 // loss: 0.039 epoch: 93, batch: 3300 // loss: 0.038 epoch: 93, batch: 3400 // loss: 0.044 epoch: 93, batch: 3500 // loss: 0.035 epoch: 93, batch: 3600 // loss: 0.043 epoch: 93, batch: 3700 // loss: 0.043 epoch: 94, batch: 0 // loss: 0.053 epoch: 94, batch: 100 // loss: 0.046 epoch: 94, batch: 200 // loss: 0.043 epoch: 94, batch: 300 // loss: 0.049 epoch: 94, batch: 400 // loss: 0.046 epoch: 94, batch: 500 // loss: 0.040 epoch: 94, batch: 600 // loss: 0.040 epoch: 94, batch: 700 // loss: 0.044 epoch: 94, batch: 800 // loss: 0.042 epoch: 94, batch: 900 // loss: 0.049 epoch: 94, batch: 1000 // loss: 0.046 epoch: 94, batch: 1100 // loss: 0.042 epoch: 94, batch: 1200 // loss: 0.044 epoch: 94, batch: 1300 // loss: 0.046 epoch: 94, batch: 1400 // loss: 0.043 epoch: 94, batch: 1500 // loss: 0.048 epoch: 94, batch: 1600 // loss: 0.052 epoch: 94, batch: 1700 // loss: 0.045 epoch: 94, batch: 1800 // loss: 0.053 epoch: 94, batch: 1900 // loss: 0.045 epoch: 94, batch: 2000 // loss: 0.047 epoch: 94, batch: 2100 // loss: 0.046 epoch: 94, batch: 2200 // loss: 0.050 epoch: 94, batch: 2300 // loss: 0.049 epoch: 94, batch: 2400 // loss: 0.042 epoch: 94, batch: 2500 // loss: 0.042 epoch: 94, batch: 2600 // loss: 0.045 epoch: 94, batch: 2700 // loss: 0.042 epoch: 94, batch: 2800 // loss: 0.046 epoch: 94, batch: 2900 // loss: 0.042 epoch: 94, batch: 3000 // loss: 0.045 epoch: 94, batch: 3100 // loss: 0.043 epoch: 94, batch: 3200 // loss: 0.039 epoch: 94, batch: 3300 // loss: 0.038 epoch: 94, batch: 3400 // loss: 0.044 epoch: 94, batch: 3500 // loss: 0.035 epoch: 94, batch: 3600 // loss: 0.043 epoch: 94, batch: 3700 // loss: 0.043 epoch: 95, batch: 0 // loss: 0.053 epoch: 95, batch: 100 // loss: 0.046 epoch: 95, batch: 200 // loss: 0.043 epoch: 95, batch: 300 // loss: 0.049 epoch: 95, batch: 400 // loss: 0.046 epoch: 95, batch: 500 // loss: 0.040 epoch: 95, batch: 600 // loss: 0.040 epoch: 95, batch: 700 // loss: 0.044 epoch: 95, batch: 800 // loss: 0.042 epoch: 95, batch: 900 // loss: 0.049 epoch: 95, batch: 1000 // loss: 0.046 epoch: 95, batch: 1100 // loss: 0.042 epoch: 95, batch: 1200 // loss: 0.044 epoch: 95, batch: 1300 // loss: 0.046 epoch: 95, batch: 1400 // loss: 0.043 epoch: 95, batch: 1500 // loss: 0.048 epoch: 95, batch: 1600 // loss: 0.052 epoch: 95, batch: 1700 // loss: 0.045 epoch: 95, batch: 1800 // loss: 0.053 epoch: 95, batch: 1900 // loss: 0.045 epoch: 95, batch: 2000 // loss: 0.047 epoch: 95, batch: 2100 // loss: 0.046 epoch: 95, batch: 2200 // loss: 0.050 epoch: 95, batch: 2300 // loss: 0.049 epoch: 95, batch: 2400 // loss: 0.042 epoch: 95, batch: 2500 // loss: 0.042 epoch: 95, batch: 2600 // loss: 0.045 epoch: 95, batch: 2700 // loss: 0.042 epoch: 95, batch: 2800 // loss: 0.046 epoch: 95, batch: 2900 // loss: 0.042 epoch: 95, batch: 3000 // loss: 0.045 epoch: 95, batch: 3100 // loss: 0.043 epoch: 95, batch: 3200 // loss: 0.039 epoch: 95, batch: 3300 // loss: 0.038 epoch: 95, batch: 3400 // loss: 0.044 epoch: 95, batch: 3500 // loss: 0.035 epoch: 95, batch: 3600 // loss: 0.043 epoch: 95, batch: 3700 // loss: 0.043 epoch: 96, batch: 0 // loss: 0.053 epoch: 96, batch: 100 // loss: 0.046 epoch: 96, batch: 200 // loss: 0.043 epoch: 96, batch: 300 // loss: 0.049 epoch: 96, batch: 400 // loss: 0.046 epoch: 96, batch: 500 // loss: 0.040 epoch: 96, batch: 600 // loss: 0.040 epoch: 96, batch: 700 // loss: 0.044 epoch: 96, batch: 800 // loss: 0.042 epoch: 96, batch: 900 // loss: 0.049 epoch: 96, batch: 1000 // loss: 0.046 epoch: 96, batch: 1100 // loss: 0.042 epoch: 96, batch: 1200 // loss: 0.044 epoch: 96, batch: 1300 // loss: 0.046 epoch: 96, batch: 1400 // loss: 0.043 epoch: 96, batch: 1500 // loss: 0.048 epoch: 96, batch: 1600 // loss: 0.052 epoch: 96, batch: 1700 // loss: 0.045 epoch: 96, batch: 1800 // loss: 0.053 epoch: 96, batch: 1900 // loss: 0.045 epoch: 96, batch: 2000 // loss: 0.047 epoch: 96, batch: 2100 // loss: 0.046 epoch: 96, batch: 2200 // loss: 0.050 epoch: 96, batch: 2300 // loss: 0.049 epoch: 96, batch: 2400 // loss: 0.042 epoch: 96, batch: 2500 // loss: 0.042 epoch: 96, batch: 2600 // loss: 0.045 epoch: 96, batch: 2700 // loss: 0.042 epoch: 96, batch: 2800 // loss: 0.046 epoch: 96, batch: 2900 // loss: 0.042 epoch: 96, batch: 3000 // loss: 0.045 epoch: 96, batch: 3100 // loss: 0.043 epoch: 96, batch: 3200 // loss: 0.039 epoch: 96, batch: 3300 // loss: 0.038 epoch: 96, batch: 3400 // loss: 0.044 epoch: 96, batch: 3500 // loss: 0.035 epoch: 96, batch: 3600 // loss: 0.043 epoch: 96, batch: 3700 // loss: 0.043 epoch: 97, batch: 0 // loss: 0.053 epoch: 97, batch: 100 // loss: 0.046 epoch: 97, batch: 200 // loss: 0.043 epoch: 97, batch: 300 // loss: 0.049 epoch: 97, batch: 400 // loss: 0.046 epoch: 97, batch: 500 // loss: 0.040 epoch: 97, batch: 600 // loss: 0.040 epoch: 97, batch: 700 // loss: 0.044 epoch: 97, batch: 800 // loss: 0.042 epoch: 97, batch: 900 // loss: 0.049 epoch: 97, batch: 1000 // loss: 0.046 epoch: 97, batch: 1100 // loss: 0.042 epoch: 97, batch: 1200 // loss: 0.044 epoch: 97, batch: 1300 // loss: 0.046 epoch: 97, batch: 1400 // loss: 0.043 epoch: 97, batch: 1500 // loss: 0.048 epoch: 97, batch: 1600 // loss: 0.052 epoch: 97, batch: 1700 // loss: 0.045 epoch: 97, batch: 1800 // loss: 0.053 epoch: 97, batch: 1900 // loss: 0.045 epoch: 97, batch: 2000 // loss: 0.047 epoch: 97, batch: 2100 // loss: 0.046 epoch: 97, batch: 2200 // loss: 0.050 epoch: 97, batch: 2300 // loss: 0.049 epoch: 97, batch: 2400 // loss: 0.042 epoch: 97, batch: 2500 // loss: 0.042 epoch: 97, batch: 2600 // loss: 0.045 epoch: 97, batch: 2700 // loss: 0.042 epoch: 97, batch: 2800 // loss: 0.046 epoch: 97, batch: 2900 // loss: 0.042 epoch: 97, batch: 3000 // loss: 0.045 epoch: 97, batch: 3100 // loss: 0.043 epoch: 97, batch: 3200 // loss: 0.039 epoch: 97, batch: 3300 // loss: 0.038 epoch: 97, batch: 3400 // loss: 0.044 epoch: 97, batch: 3500 // loss: 0.035 epoch: 97, batch: 3600 // loss: 0.043 epoch: 97, batch: 3700 // loss: 0.043 epoch: 98, batch: 0 // loss: 0.053 epoch: 98, batch: 100 // loss: 0.046 epoch: 98, batch: 200 // loss: 0.043 epoch: 98, batch: 300 // loss: 0.049 epoch: 98, batch: 400 // loss: 0.046 epoch: 98, batch: 500 // loss: 0.040 epoch: 98, batch: 600 // loss: 0.040 epoch: 98, batch: 700 // loss: 0.044 epoch: 98, batch: 800 // loss: 0.042 epoch: 98, batch: 900 // loss: 0.049 epoch: 98, batch: 1000 // loss: 0.046 epoch: 98, batch: 1100 // loss: 0.042 epoch: 98, batch: 1200 // loss: 0.044 epoch: 98, batch: 1300 // loss: 0.046 epoch: 98, batch: 1400 // loss: 0.043 epoch: 98, batch: 1500 // loss: 0.048 epoch: 98, batch: 1600 // loss: 0.052 epoch: 98, batch: 1700 // loss: 0.045 epoch: 98, batch: 1800 // loss: 0.053 epoch: 98, batch: 1900 // loss: 0.045 epoch: 98, batch: 2000 // loss: 0.047 epoch: 98, batch: 2100 // loss: 0.046 epoch: 98, batch: 2200 // loss: 0.050 epoch: 98, batch: 2300 // loss: 0.049 epoch: 98, batch: 2400 // loss: 0.042 epoch: 98, batch: 2500 // loss: 0.042 epoch: 98, batch: 2600 // loss: 0.045 epoch: 98, batch: 2700 // loss: 0.042 epoch: 98, batch: 2800 // loss: 0.046 epoch: 98, batch: 2900 // loss: 0.042 epoch: 98, batch: 3000 // loss: 0.045 epoch: 98, batch: 3100 // loss: 0.043 epoch: 98, batch: 3200 // loss: 0.039 epoch: 98, batch: 3300 // loss: 0.038 epoch: 98, batch: 3400 // loss: 0.044 epoch: 98, batch: 3500 // loss: 0.035 epoch: 98, batch: 3600 // loss: 0.043 epoch: 98, batch: 3700 // loss: 0.043 epoch: 99, batch: 0 // loss: 0.053 epoch: 99, batch: 100 // loss: 0.046 epoch: 99, batch: 200 // loss: 0.043 epoch: 99, batch: 300 // loss: 0.049 epoch: 99, batch: 400 // loss: 0.046 epoch: 99, batch: 500 // loss: 0.040 epoch: 99, batch: 600 // loss: 0.040 epoch: 99, batch: 700 // loss: 0.044 epoch: 99, batch: 800 // loss: 0.042 epoch: 99, batch: 900 // loss: 0.049 epoch: 99, batch: 1000 // loss: 0.046 epoch: 99, batch: 1100 // loss: 0.042 epoch: 99, batch: 1200 // loss: 0.044 epoch: 99, batch: 1300 // loss: 0.046 epoch: 99, batch: 1400 // loss: 0.043 epoch: 99, batch: 1500 // loss: 0.048 epoch: 99, batch: 1600 // loss: 0.052 epoch: 99, batch: 1700 // loss: 0.045 epoch: 99, batch: 1800 // loss: 0.053 epoch: 99, batch: 1900 // loss: 0.045 epoch: 99, batch: 2000 // loss: 0.047 epoch: 99, batch: 2100 // loss: 0.046 epoch: 99, batch: 2200 // loss: 0.050 epoch: 99, batch: 2300 // loss: 0.049 epoch: 99, batch: 2400 // loss: 0.042 epoch: 99, batch: 2500 // loss: 0.042 epoch: 99, batch: 2600 // loss: 0.045 epoch: 99, batch: 2700 // loss: 0.042 epoch: 99, batch: 2800 // loss: 0.046 epoch: 99, batch: 2900 // loss: 0.042 epoch: 99, batch: 3000 // loss: 0.045 epoch: 99, batch: 3100 // loss: 0.043 epoch: 99, batch: 3200 // loss: 0.039 epoch: 99, batch: 3300 // loss: 0.038 epoch: 99, batch: 3400 // loss: 0.044 epoch: 99, batch: 3500 // loss: 0.035 epoch: 99, batch: 3600 // loss: 0.043 epoch: 99, batch: 3700 // loss: 0.043 epoch: 100, batch: 0 // loss: 0.053 epoch: 100, batch: 100 // loss: 0.046 epoch: 100, batch: 200 // loss: 0.043 epoch: 100, batch: 300 // loss: 0.049 epoch: 100, batch: 400 // loss: 0.046 epoch: 100, batch: 500 // loss: 0.040 epoch: 100, batch: 600 // loss: 0.040 epoch: 100, batch: 700 // loss: 0.044 epoch: 100, batch: 800 // loss: 0.042 epoch: 100, batch: 900 // loss: 0.049 epoch: 100, batch: 1000 // loss: 0.046 epoch: 100, batch: 1100 // loss: 0.042 epoch: 100, batch: 1200 // loss: 0.044 epoch: 100, batch: 1300 // loss: 0.046 epoch: 100, batch: 1400 // loss: 0.043 epoch: 100, batch: 1500 // loss: 0.048 epoch: 100, batch: 1600 // loss: 0.052 epoch: 100, batch: 1700 // loss: 0.045 epoch: 100, batch: 1800 // loss: 0.053 epoch: 100, batch: 1900 // loss: 0.045 epoch: 100, batch: 2000 // loss: 0.047 epoch: 100, batch: 2100 // loss: 0.046 epoch: 100, batch: 2200 // loss: 0.050 epoch: 100, batch: 2300 // loss: 0.049 epoch: 100, batch: 2400 // loss: 0.042 epoch: 100, batch: 2500 // loss: 0.042 epoch: 100, batch: 2600 // loss: 0.045 epoch: 100, batch: 2700 // loss: 0.042 epoch: 100, batch: 2800 // loss: 0.046 epoch: 100, batch: 2900 // loss: 0.042 epoch: 100, batch: 3000 // loss: 0.045 epoch: 100, batch: 3100 // loss: 0.043 epoch: 100, batch: 3200 // loss: 0.039 epoch: 100, batch: 3300 // loss: 0.038 epoch: 100, batch: 3400 // loss: 0.044 epoch: 100, batch: 3500 // loss: 0.035 epoch: 100, batch: 3600 // loss: 0.043 epoch: 100, batch: 3700 // loss: 0.043 epoch: 101, batch: 0 // loss: 0.053 epoch: 101, batch: 100 // loss: 0.046 epoch: 101, batch: 200 // loss: 0.043 epoch: 101, batch: 300 // loss: 0.049 epoch: 101, batch: 400 // loss: 0.046 epoch: 101, batch: 500 // loss: 0.040 epoch: 101, batch: 600 // loss: 0.040 epoch: 101, batch: 700 // loss: 0.044 epoch: 101, batch: 800 // loss: 0.042 epoch: 101, batch: 900 // loss: 0.049 epoch: 101, batch: 1000 // loss: 0.046 epoch: 101, batch: 1100 // loss: 0.042 epoch: 101, batch: 1200 // loss: 0.044 epoch: 101, batch: 1300 // loss: 0.046 epoch: 101, batch: 1400 // loss: 0.043 epoch: 101, batch: 1500 // loss: 0.048 epoch: 101, batch: 1600 // loss: 0.052 epoch: 101, batch: 1700 // loss: 0.045 epoch: 101, batch: 1800 // loss: 0.053 epoch: 101, batch: 1900 // loss: 0.045 epoch: 101, batch: 2000 // loss: 0.047 epoch: 101, batch: 2100 // loss: 0.046 epoch: 101, batch: 2200 // loss: 0.050 epoch: 101, batch: 2300 // loss: 0.049 epoch: 101, batch: 2400 // loss: 0.042 epoch: 101, batch: 2500 // loss: 0.042 epoch: 101, batch: 2600 // loss: 0.045 epoch: 101, batch: 2700 // loss: 0.042 epoch: 101, batch: 2800 // loss: 0.046 epoch: 101, batch: 2900 // loss: 0.042 epoch: 101, batch: 3000 // loss: 0.045 epoch: 101, batch: 3100 // loss: 0.043 epoch: 101, batch: 3200 // loss: 0.039 epoch: 101, batch: 3300 // loss: 0.038 epoch: 101, batch: 3400 // loss: 0.044 epoch: 101, batch: 3500 // loss: 0.035 epoch: 101, batch: 3600 // loss: 0.043 epoch: 101, batch: 3700 // loss: 0.043 epoch: 102, batch: 0 // loss: 0.053 epoch: 102, batch: 100 // loss: 0.046 epoch: 102, batch: 200 // loss: 0.043 epoch: 102, batch: 300 // loss: 0.049 epoch: 102, batch: 400 // loss: 0.046 epoch: 102, batch: 500 // loss: 0.040 epoch: 102, batch: 600 // loss: 0.040 epoch: 102, batch: 700 // loss: 0.044 epoch: 102, batch: 800 // loss: 0.042 epoch: 102, batch: 900 // loss: 0.049 epoch: 102, batch: 1000 // loss: 0.046 epoch: 102, batch: 1100 // loss: 0.042 epoch: 102, batch: 1200 // loss: 0.044 epoch: 102, batch: 1300 // loss: 0.046 epoch: 102, batch: 1400 // loss: 0.043 epoch: 102, batch: 1500 // loss: 0.048 epoch: 102, batch: 1600 // loss: 0.052 epoch: 102, batch: 1700 // loss: 0.045 epoch: 102, batch: 1800 // loss: 0.053 epoch: 102, batch: 1900 // loss: 0.045 epoch: 102, batch: 2000 // loss: 0.047 epoch: 102, batch: 2100 // loss: 0.046 epoch: 102, batch: 2200 // loss: 0.050 epoch: 102, batch: 2300 // loss: 0.049 epoch: 102, batch: 2400 // loss: 0.042 epoch: 102, batch: 2500 // loss: 0.042 epoch: 102, batch: 2600 // loss: 0.045 epoch: 102, batch: 2700 // loss: 0.042 epoch: 102, batch: 2800 // loss: 0.046 epoch: 102, batch: 2900 // loss: 0.042 epoch: 102, batch: 3000 // loss: 0.045 epoch: 102, batch: 3100 // loss: 0.043 epoch: 102, batch: 3200 // loss: 0.039 epoch: 102, batch: 3300 // loss: 0.038 epoch: 102, batch: 3400 // loss: 0.044 epoch: 102, batch: 3500 // loss: 0.035 epoch: 102, batch: 3600 // loss: 0.043 epoch: 102, batch: 3700 // loss: 0.043 epoch: 103, batch: 0 // loss: 0.053 epoch: 103, batch: 100 // loss: 0.046 epoch: 103, batch: 200 // loss: 0.043 epoch: 103, batch: 300 // loss: 0.049 epoch: 103, batch: 400 // loss: 0.046 epoch: 103, batch: 500 // loss: 0.040 epoch: 103, batch: 600 // loss: 0.040 epoch: 103, batch: 700 // loss: 0.044 epoch: 103, batch: 800 // loss: 0.042 epoch: 103, batch: 900 // loss: 0.049 epoch: 103, batch: 1000 // loss: 0.046 epoch: 103, batch: 1100 // loss: 0.042 epoch: 103, batch: 1200 // loss: 0.044 epoch: 103, batch: 1300 // loss: 0.046 epoch: 103, batch: 1400 // loss: 0.043 epoch: 103, batch: 1500 // loss: 0.048 epoch: 103, batch: 1600 // loss: 0.052 epoch: 103, batch: 1700 // loss: 0.045 epoch: 103, batch: 1800 // loss: 0.053 epoch: 103, batch: 1900 // loss: 0.045 epoch: 103, batch: 2000 // loss: 0.047 epoch: 103, batch: 2100 // loss: 0.046 epoch: 103, batch: 2200 // loss: 0.050 epoch: 103, batch: 2300 // loss: 0.049 epoch: 103, batch: 2400 // loss: 0.042 epoch: 103, batch: 2500 // loss: 0.042 epoch: 103, batch: 2600 // loss: 0.045 epoch: 103, batch: 2700 // loss: 0.042 epoch: 103, batch: 2800 // loss: 0.046 epoch: 103, batch: 2900 // loss: 0.042 epoch: 103, batch: 3000 // loss: 0.045 epoch: 103, batch: 3100 // loss: 0.043 epoch: 103, batch: 3200 // loss: 0.039 epoch: 103, batch: 3300 // loss: 0.038 epoch: 103, batch: 3400 // loss: 0.044 epoch: 103, batch: 3500 // loss: 0.035 epoch: 103, batch: 3600 // loss: 0.043 epoch: 103, batch: 3700 // loss: 0.043 epoch: 104, batch: 0 // loss: 0.053 epoch: 104, batch: 100 // loss: 0.046 epoch: 104, batch: 200 // loss: 0.043 epoch: 104, batch: 300 // loss: 0.049 epoch: 104, batch: 400 // loss: 0.046 epoch: 104, batch: 500 // loss: 0.040 epoch: 104, batch: 600 // loss: 0.040 epoch: 104, batch: 700 // loss: 0.044 epoch: 104, batch: 800 // loss: 0.042 epoch: 104, batch: 900 // loss: 0.049 epoch: 104, batch: 1000 // loss: 0.046 epoch: 104, batch: 1100 // loss: 0.042 epoch: 104, batch: 1200 // loss: 0.044 epoch: 104, batch: 1300 // loss: 0.046 epoch: 104, batch: 1400 // loss: 0.043 epoch: 104, batch: 1500 // loss: 0.048 epoch: 104, batch: 1600 // loss: 0.052 epoch: 104, batch: 1700 // loss: 0.045 epoch: 104, batch: 1800 // loss: 0.053 epoch: 104, batch: 1900 // loss: 0.045 epoch: 104, batch: 2000 // loss: 0.047 epoch: 104, batch: 2100 // loss: 0.046 epoch: 104, batch: 2200 // loss: 0.050 epoch: 104, batch: 2300 // loss: 0.049 epoch: 104, batch: 2400 // loss: 0.042 epoch: 104, batch: 2500 // loss: 0.042 epoch: 104, batch: 2600 // loss: 0.045 epoch: 104, batch: 2700 // loss: 0.042 epoch: 104, batch: 2800 // loss: 0.046 epoch: 104, batch: 2900 // loss: 0.042 epoch: 104, batch: 3000 // loss: 0.045 epoch: 104, batch: 3100 // loss: 0.043 epoch: 104, batch: 3200 // loss: 0.039 epoch: 104, batch: 3300 // loss: 0.038 epoch: 104, batch: 3400 // loss: 0.044 epoch: 104, batch: 3500 // loss: 0.035 epoch: 104, batch: 3600 // loss: 0.043 epoch: 104, batch: 3700 // loss: 0.043 epoch: 105, batch: 0 // loss: 0.053 epoch: 105, batch: 100 // loss: 0.046 epoch: 105, batch: 200 // loss: 0.043 epoch: 105, batch: 300 // loss: 0.049 epoch: 105, batch: 400 // loss: 0.046 epoch: 105, batch: 500 // loss: 0.040 epoch: 105, batch: 600 // loss: 0.040 epoch: 105, batch: 700 // loss: 0.044 epoch: 105, batch: 800 // loss: 0.042 epoch: 105, batch: 900 // loss: 0.049 epoch: 105, batch: 1000 // loss: 0.046 epoch: 105, batch: 1100 // loss: 0.042 epoch: 105, batch: 1200 // loss: 0.044 epoch: 105, batch: 1300 // loss: 0.046 epoch: 105, batch: 1400 // loss: 0.043 epoch: 105, batch: 1500 // loss: 0.048 epoch: 105, batch: 1600 // loss: 0.052 epoch: 105, batch: 1700 // loss: 0.045 epoch: 105, batch: 1800 // loss: 0.053 epoch: 105, batch: 1900 // loss: 0.045 epoch: 105, batch: 2000 // loss: 0.047 epoch: 105, batch: 2100 // loss: 0.046 epoch: 105, batch: 2200 // loss: 0.050 epoch: 105, batch: 2300 // loss: 0.049 epoch: 105, batch: 2400 // loss: 0.042 epoch: 105, batch: 2500 // loss: 0.042 epoch: 105, batch: 2600 // loss: 0.045 epoch: 105, batch: 2700 // loss: 0.042 epoch: 105, batch: 2800 // loss: 0.046 epoch: 105, batch: 2900 // loss: 0.042 epoch: 105, batch: 3000 // loss: 0.045 epoch: 105, batch: 3100 // loss: 0.043 epoch: 105, batch: 3200 // loss: 0.039 epoch: 105, batch: 3300 // loss: 0.038 epoch: 105, batch: 3400 // loss: 0.044 epoch: 105, batch: 3500 // loss: 0.035 epoch: 105, batch: 3600 // loss: 0.043 epoch: 105, batch: 3700 // loss: 0.043 epoch: 106, batch: 0 // loss: 0.053 epoch: 106, batch: 100 // loss: 0.046 epoch: 106, batch: 200 // loss: 0.043 epoch: 106, batch: 300 // loss: 0.049 epoch: 106, batch: 400 // loss: 0.046 epoch: 106, batch: 500 // loss: 0.040 epoch: 106, batch: 600 // loss: 0.040 epoch: 106, batch: 700 // loss: 0.044 epoch: 106, batch: 800 // loss: 0.042 epoch: 106, batch: 900 // loss: 0.049 epoch: 106, batch: 1000 // loss: 0.046 epoch: 106, batch: 1100 // loss: 0.042 epoch: 106, batch: 1200 // loss: 0.044 epoch: 106, batch: 1300 // loss: 0.046 epoch: 106, batch: 1400 // loss: 0.043 epoch: 106, batch: 1500 // loss: 0.048 epoch: 106, batch: 1600 // loss: 0.052 epoch: 106, batch: 1700 // loss: 0.045 epoch: 106, batch: 1800 // loss: 0.053 epoch: 106, batch: 1900 // loss: 0.045 epoch: 106, batch: 2000 // loss: 0.047 epoch: 106, batch: 2100 // loss: 0.046 epoch: 106, batch: 2200 // loss: 0.050 epoch: 106, batch: 2300 // loss: 0.049 epoch: 106, batch: 2400 // loss: 0.042 epoch: 106, batch: 2500 // loss: 0.042 epoch: 106, batch: 2600 // loss: 0.045 epoch: 106, batch: 2700 // loss: 0.042 epoch: 106, batch: 2800 // loss: 0.046 epoch: 106, batch: 2900 // loss: 0.042 epoch: 106, batch: 3000 // loss: 0.045 epoch: 106, batch: 3100 // loss: 0.043 epoch: 106, batch: 3200 // loss: 0.039 epoch: 106, batch: 3300 // loss: 0.038 epoch: 106, batch: 3400 // loss: 0.044 epoch: 106, batch: 3500 // loss: 0.035 epoch: 106, batch: 3600 // loss: 0.043 epoch: 106, batch: 3700 // loss: 0.043 epoch: 107, batch: 0 // loss: 0.053 epoch: 107, batch: 100 // loss: 0.046 epoch: 107, batch: 200 // loss: 0.043 epoch: 107, batch: 300 // loss: 0.049 epoch: 107, batch: 400 // loss: 0.046 epoch: 107, batch: 500 // loss: 0.040 epoch: 107, batch: 600 // loss: 0.040 epoch: 107, batch: 700 // loss: 0.044 epoch: 107, batch: 800 // loss: 0.042 epoch: 107, batch: 900 // loss: 0.049 epoch: 107, batch: 1000 // loss: 0.046 epoch: 107, batch: 1100 // loss: 0.042 epoch: 107, batch: 1200 // loss: 0.044 epoch: 107, batch: 1300 // loss: 0.046 epoch: 107, batch: 1400 // loss: 0.043 epoch: 107, batch: 1500 // loss: 0.048 epoch: 107, batch: 1600 // loss: 0.052 epoch: 107, batch: 1700 // loss: 0.045 epoch: 107, batch: 1800 // loss: 0.053 epoch: 107, batch: 1900 // loss: 0.045 epoch: 107, batch: 2000 // loss: 0.047 epoch: 107, batch: 2100 // loss: 0.046 epoch: 107, batch: 2200 // loss: 0.050 epoch: 107, batch: 2300 // loss: 0.049 epoch: 107, batch: 2400 // loss: 0.042 epoch: 107, batch: 2500 // loss: 0.042 epoch: 107, batch: 2600 // loss: 0.045 epoch: 107, batch: 2700 // loss: 0.042 epoch: 107, batch: 2800 // loss: 0.046 epoch: 107, batch: 2900 // loss: 0.042 epoch: 107, batch: 3000 // loss: 0.045 epoch: 107, batch: 3100 // loss: 0.043 epoch: 107, batch: 3200 // loss: 0.039 epoch: 107, batch: 3300 // loss: 0.038 epoch: 107, batch: 3400 // loss: 0.044 epoch: 107, batch: 3500 // loss: 0.035 epoch: 107, batch: 3600 // loss: 0.043 epoch: 107, batch: 3700 // loss: 0.043 epoch: 108, batch: 0 // loss: 0.053 epoch: 108, batch: 100 // loss: 0.046 epoch: 108, batch: 200 // loss: 0.043 epoch: 108, batch: 300 // loss: 0.049 epoch: 108, batch: 400 // loss: 0.046 epoch: 108, batch: 500 // loss: 0.040 epoch: 108, batch: 600 // loss: 0.040 epoch: 108, batch: 700 // loss: 0.044 epoch: 108, batch: 800 // loss: 0.042 epoch: 108, batch: 900 // loss: 0.049 epoch: 108, batch: 1000 // loss: 0.046 epoch: 108, batch: 1100 // loss: 0.042 epoch: 108, batch: 1200 // loss: 0.044 epoch: 108, batch: 1300 // loss: 0.046 epoch: 108, batch: 1400 // loss: 0.043 epoch: 108, batch: 1500 // loss: 0.048 epoch: 108, batch: 1600 // loss: 0.052 epoch: 108, batch: 1700 // loss: 0.045 epoch: 108, batch: 1800 // loss: 0.053 epoch: 108, batch: 1900 // loss: 0.045 epoch: 108, batch: 2000 // loss: 0.047 epoch: 108, batch: 2100 // loss: 0.046 epoch: 108, batch: 2200 // loss: 0.050 epoch: 108, batch: 2300 // loss: 0.049 epoch: 108, batch: 2400 // loss: 0.042 epoch: 108, batch: 2500 // loss: 0.042 epoch: 108, batch: 2600 // loss: 0.045 epoch: 108, batch: 2700 // loss: 0.042 epoch: 108, batch: 2800 // loss: 0.046 epoch: 108, batch: 2900 // loss: 0.042 epoch: 108, batch: 3000 // loss: 0.045 epoch: 108, batch: 3100 // loss: 0.043 epoch: 108, batch: 3200 // loss: 0.039 epoch: 108, batch: 3300 // loss: 0.038 epoch: 108, batch: 3400 // loss: 0.044 epoch: 108, batch: 3500 // loss: 0.035 epoch: 108, batch: 3600 // loss: 0.043 epoch: 108, batch: 3700 // loss: 0.043 epoch: 109, batch: 0 // loss: 0.053 epoch: 109, batch: 100 // loss: 0.046 epoch: 109, batch: 200 // loss: 0.043 epoch: 109, batch: 300 // loss: 0.049 epoch: 109, batch: 400 // loss: 0.046 epoch: 109, batch: 500 // loss: 0.040 epoch: 109, batch: 600 // loss: 0.040 epoch: 109, batch: 700 // loss: 0.044 epoch: 109, batch: 800 // loss: 0.042 epoch: 109, batch: 900 // loss: 0.049 epoch: 109, batch: 1000 // loss: 0.046 epoch: 109, batch: 1100 // loss: 0.042 epoch: 109, batch: 1200 // loss: 0.044 epoch: 109, batch: 1300 // loss: 0.046 epoch: 109, batch: 1400 // loss: 0.043 epoch: 109, batch: 1500 // loss: 0.048 epoch: 109, batch: 1600 // loss: 0.052 epoch: 109, batch: 1700 // loss: 0.045 epoch: 109, batch: 1800 // loss: 0.053 epoch: 109, batch: 1900 // loss: 0.045 epoch: 109, batch: 2000 // loss: 0.047 epoch: 109, batch: 2100 // loss: 0.046 epoch: 109, batch: 2200 // loss: 0.050 epoch: 109, batch: 2300 // loss: 0.049 epoch: 109, batch: 2400 // loss: 0.042 epoch: 109, batch: 2500 // loss: 0.042 epoch: 109, batch: 2600 // loss: 0.045 epoch: 109, batch: 2700 // loss: 0.042 epoch: 109, batch: 2800 // loss: 0.046 epoch: 109, batch: 2900 // loss: 0.042 epoch: 109, batch: 3000 // loss: 0.045 epoch: 109, batch: 3100 // loss: 0.043 epoch: 109, batch: 3200 // loss: 0.039 epoch: 109, batch: 3300 // loss: 0.038 epoch: 109, batch: 3400 // loss: 0.044 epoch: 109, batch: 3500 // loss: 0.035 epoch: 109, batch: 3600 // loss: 0.043 epoch: 109, batch: 3700 // loss: 0.043 epoch: 110, batch: 0 // loss: 0.053 epoch: 110, batch: 100 // loss: 0.046 epoch: 110, batch: 200 // loss: 0.043 epoch: 110, batch: 300 // loss: 0.049 epoch: 110, batch: 400 // loss: 0.046 epoch: 110, batch: 500 // loss: 0.040 epoch: 110, batch: 600 // loss: 0.040 epoch: 110, batch: 700 // loss: 0.044 epoch: 110, batch: 800 // loss: 0.042 epoch: 110, batch: 900 // loss: 0.049 epoch: 110, batch: 1000 // loss: 0.046 epoch: 110, batch: 1100 // loss: 0.042 epoch: 110, batch: 1200 // loss: 0.044 epoch: 110, batch: 1300 // loss: 0.046 epoch: 110, batch: 1400 // loss: 0.043 epoch: 110, batch: 1500 // loss: 0.048 epoch: 110, batch: 1600 // loss: 0.052 epoch: 110, batch: 1700 // loss: 0.045 epoch: 110, batch: 1800 // loss: 0.053 epoch: 110, batch: 1900 // loss: 0.045 epoch: 110, batch: 2000 // loss: 0.047 epoch: 110, batch: 2100 // loss: 0.046 epoch: 110, batch: 2200 // loss: 0.050 epoch: 110, batch: 2300 // loss: 0.049 epoch: 110, batch: 2400 // loss: 0.042 epoch: 110, batch: 2500 // loss: 0.042 epoch: 110, batch: 2600 // loss: 0.045 epoch: 110, batch: 2700 // loss: 0.042 epoch: 110, batch: 2800 // loss: 0.046 epoch: 110, batch: 2900 // loss: 0.042 epoch: 110, batch: 3000 // loss: 0.045 epoch: 110, batch: 3100 // loss: 0.043 epoch: 110, batch: 3200 // loss: 0.039 epoch: 110, batch: 3300 // loss: 0.038 epoch: 110, batch: 3400 // loss: 0.044 epoch: 110, batch: 3500 // loss: 0.035 epoch: 110, batch: 3600 // loss: 0.043 epoch: 110, batch: 3700 // loss: 0.043 epoch: 111, batch: 0 // loss: 0.053 epoch: 111, batch: 100 // loss: 0.046 epoch: 111, batch: 200 // loss: 0.043 epoch: 111, batch: 300 // loss: 0.049 epoch: 111, batch: 400 // loss: 0.046 epoch: 111, batch: 500 // loss: 0.040 epoch: 111, batch: 600 // loss: 0.040 epoch: 111, batch: 700 // loss: 0.044 epoch: 111, batch: 800 // loss: 0.042 epoch: 111, batch: 900 // loss: 0.049 epoch: 111, batch: 1000 // loss: 0.046 epoch: 111, batch: 1100 // loss: 0.042 epoch: 111, batch: 1200 // loss: 0.044 epoch: 111, batch: 1300 // loss: 0.046 epoch: 111, batch: 1400 // loss: 0.043 epoch: 111, batch: 1500 // loss: 0.048 epoch: 111, batch: 1600 // loss: 0.052 epoch: 111, batch: 1700 // loss: 0.045 epoch: 111, batch: 1800 // loss: 0.053 epoch: 111, batch: 1900 // loss: 0.045 epoch: 111, batch: 2000 // loss: 0.047 epoch: 111, batch: 2100 // loss: 0.046 epoch: 111, batch: 2200 // loss: 0.050 epoch: 111, batch: 2300 // loss: 0.049 epoch: 111, batch: 2400 // loss: 0.042 epoch: 111, batch: 2500 // loss: 0.042 epoch: 111, batch: 2600 // loss: 0.045 epoch: 111, batch: 2700 // loss: 0.042 epoch: 111, batch: 2800 // loss: 0.046 epoch: 111, batch: 2900 // loss: 0.042 epoch: 111, batch: 3000 // loss: 0.045 epoch: 111, batch: 3100 // loss: 0.043 epoch: 111, batch: 3200 // loss: 0.039 epoch: 111, batch: 3300 // loss: 0.038 epoch: 111, batch: 3400 // loss: 0.044 epoch: 111, batch: 3500 // loss: 0.035 epoch: 111, batch: 3600 // loss: 0.043 epoch: 111, batch: 3700 // loss: 0.043 epoch: 112, batch: 0 // loss: 0.053 epoch: 112, batch: 100 // loss: 0.046 epoch: 112, batch: 200 // loss: 0.043 epoch: 112, batch: 300 // loss: 0.049 epoch: 112, batch: 400 // loss: 0.046 epoch: 112, batch: 500 // loss: 0.040 epoch: 112, batch: 600 // loss: 0.040 epoch: 112, batch: 700 // loss: 0.044 epoch: 112, batch: 800 // loss: 0.042 epoch: 112, batch: 900 // loss: 0.049 epoch: 112, batch: 1000 // loss: 0.046 epoch: 112, batch: 1100 // loss: 0.042 epoch: 112, batch: 1200 // loss: 0.044 epoch: 112, batch: 1300 // loss: 0.046 epoch: 112, batch: 1400 // loss: 0.043 epoch: 112, batch: 1500 // loss: 0.048 epoch: 112, batch: 1600 // loss: 0.052 epoch: 112, batch: 1700 // loss: 0.045 epoch: 112, batch: 1800 // loss: 0.053 epoch: 112, batch: 1900 // loss: 0.045 epoch: 112, batch: 2000 // loss: 0.047 epoch: 112, batch: 2100 // loss: 0.046 epoch: 112, batch: 2200 // loss: 0.050 epoch: 112, batch: 2300 // loss: 0.049 epoch: 112, batch: 2400 // loss: 0.042 epoch: 112, batch: 2500 // loss: 0.042 epoch: 112, batch: 2600 // loss: 0.045 epoch: 112, batch: 2700 // loss: 0.042 epoch: 112, batch: 2800 // loss: 0.046 epoch: 112, batch: 2900 // loss: 0.042 epoch: 112, batch: 3000 // loss: 0.045 epoch: 112, batch: 3100 // loss: 0.043 epoch: 112, batch: 3200 // loss: 0.039 epoch: 112, batch: 3300 // loss: 0.038 epoch: 112, batch: 3400 // loss: 0.044 epoch: 112, batch: 3500 // loss: 0.035 epoch: 112, batch: 3600 // loss: 0.043 epoch: 112, batch: 3700 // loss: 0.043 epoch: 113, batch: 0 // loss: 0.053 epoch: 113, batch: 100 // loss: 0.046 epoch: 113, batch: 200 // loss: 0.043 epoch: 113, batch: 300 // loss: 0.049 epoch: 113, batch: 400 // loss: 0.046 epoch: 113, batch: 500 // loss: 0.040 epoch: 113, batch: 600 // loss: 0.040 epoch: 113, batch: 700 // loss: 0.044 epoch: 113, batch: 800 // loss: 0.042 epoch: 113, batch: 900 // loss: 0.049 epoch: 113, batch: 1000 // loss: 0.046 epoch: 113, batch: 1100 // loss: 0.042 epoch: 113, batch: 1200 // loss: 0.044 epoch: 113, batch: 1300 // loss: 0.046 epoch: 113, batch: 1400 // loss: 0.043 epoch: 113, batch: 1500 // loss: 0.048 epoch: 113, batch: 1600 // loss: 0.052 epoch: 113, batch: 1700 // loss: 0.045 epoch: 113, batch: 1800 // loss: 0.053 epoch: 113, batch: 1900 // loss: 0.045 epoch: 113, batch: 2000 // loss: 0.047 epoch: 113, batch: 2100 // loss: 0.046 epoch: 113, batch: 2200 // loss: 0.050 epoch: 113, batch: 2300 // loss: 0.049 epoch: 113, batch: 2400 // loss: 0.042 epoch: 113, batch: 2500 // loss: 0.042 epoch: 113, batch: 2600 // loss: 0.045 epoch: 113, batch: 2700 // loss: 0.042 epoch: 113, batch: 2800 // loss: 0.046 epoch: 113, batch: 2900 // loss: 0.042 epoch: 113, batch: 3000 // loss: 0.045 epoch: 113, batch: 3100 // loss: 0.043 epoch: 113, batch: 3200 // loss: 0.039 epoch: 113, batch: 3300 // loss: 0.038 epoch: 113, batch: 3400 // loss: 0.044 epoch: 113, batch: 3500 // loss: 0.035 epoch: 113, batch: 3600 // loss: 0.043 epoch: 113, batch: 3700 // loss: 0.043 epoch: 114, batch: 0 // loss: 0.053 epoch: 114, batch: 100 // loss: 0.046 epoch: 114, batch: 200 // loss: 0.043 epoch: 114, batch: 300 // loss: 0.049 epoch: 114, batch: 400 // loss: 0.046 epoch: 114, batch: 500 // loss: 0.040 epoch: 114, batch: 600 // loss: 0.040 epoch: 114, batch: 700 // loss: 0.044 epoch: 114, batch: 800 // loss: 0.042 epoch: 114, batch: 900 // loss: 0.049 epoch: 114, batch: 1000 // loss: 0.046 epoch: 114, batch: 1100 // loss: 0.042 epoch: 114, batch: 1200 // loss: 0.044 epoch: 114, batch: 1300 // loss: 0.046 epoch: 114, batch: 1400 // loss: 0.043 epoch: 114, batch: 1500 // loss: 0.048 epoch: 114, batch: 1600 // loss: 0.052 epoch: 114, batch: 1700 // loss: 0.045 epoch: 114, batch: 1800 // loss: 0.053 epoch: 114, batch: 1900 // loss: 0.045 epoch: 114, batch: 2000 // loss: 0.047 epoch: 114, batch: 2100 // loss: 0.046 epoch: 114, batch: 2200 // loss: 0.050 epoch: 114, batch: 2300 // loss: 0.049 epoch: 114, batch: 2400 // loss: 0.042 epoch: 114, batch: 2500 // loss: 0.042 epoch: 114, batch: 2600 // loss: 0.045 epoch: 114, batch: 2700 // loss: 0.042 epoch: 114, batch: 2800 // loss: 0.046 epoch: 114, batch: 2900 // loss: 0.042 epoch: 114, batch: 3000 // loss: 0.045 epoch: 114, batch: 3100 // loss: 0.043 epoch: 114, batch: 3200 // loss: 0.039 epoch: 114, batch: 3300 // loss: 0.038 epoch: 114, batch: 3400 // loss: 0.044 epoch: 114, batch: 3500 // loss: 0.035 epoch: 114, batch: 3600 // loss: 0.043 epoch: 114, batch: 3700 // loss: 0.043 epoch: 115, batch: 0 // loss: 0.053 epoch: 115, batch: 100 // loss: 0.046 epoch: 115, batch: 200 // loss: 0.043 epoch: 115, batch: 300 // loss: 0.049 epoch: 115, batch: 400 // loss: 0.046 epoch: 115, batch: 500 // loss: 0.040 epoch: 115, batch: 600 // loss: 0.040 epoch: 115, batch: 700 // loss: 0.044 epoch: 115, batch: 800 // loss: 0.042 epoch: 115, batch: 900 // loss: 0.049 epoch: 115, batch: 1000 // loss: 0.046 epoch: 115, batch: 1100 // loss: 0.042 epoch: 115, batch: 1200 // loss: 0.044 epoch: 115, batch: 1300 // loss: 0.046 epoch: 115, batch: 1400 // loss: 0.043 epoch: 115, batch: 1500 // loss: 0.048 epoch: 115, batch: 1600 // loss: 0.052 epoch: 115, batch: 1700 // loss: 0.045 epoch: 115, batch: 1800 // loss: 0.053 epoch: 115, batch: 1900 // loss: 0.045 epoch: 115, batch: 2000 // loss: 0.047 epoch: 115, batch: 2100 // loss: 0.046 epoch: 115, batch: 2200 // loss: 0.050 epoch: 115, batch: 2300 // loss: 0.049 epoch: 115, batch: 2400 // loss: 0.042 epoch: 115, batch: 2500 // loss: 0.042 epoch: 115, batch: 2600 // loss: 0.045 epoch: 115, batch: 2700 // loss: 0.042 epoch: 115, batch: 2800 // loss: 0.046 epoch: 115, batch: 2900 // loss: 0.042 epoch: 115, batch: 3000 // loss: 0.045 epoch: 115, batch: 3100 // loss: 0.043 epoch: 115, batch: 3200 // loss: 0.039 epoch: 115, batch: 3300 // loss: 0.038 epoch: 115, batch: 3400 // loss: 0.044 epoch: 115, batch: 3500 // loss: 0.035 epoch: 115, batch: 3600 // loss: 0.043 epoch: 115, batch: 3700 // loss: 0.043 epoch: 116, batch: 0 // loss: 0.053 epoch: 116, batch: 100 // loss: 0.046 epoch: 116, batch: 200 // loss: 0.043 epoch: 116, batch: 300 // loss: 0.049 epoch: 116, batch: 400 // loss: 0.046 epoch: 116, batch: 500 // loss: 0.040 epoch: 116, batch: 600 // loss: 0.040 epoch: 116, batch: 700 // loss: 0.044 epoch: 116, batch: 800 // loss: 0.042 epoch: 116, batch: 900 // loss: 0.049 epoch: 116, batch: 1000 // loss: 0.046 epoch: 116, batch: 1100 // loss: 0.042 epoch: 116, batch: 1200 // loss: 0.044 epoch: 116, batch: 1300 // loss: 0.046 epoch: 116, batch: 1400 // loss: 0.043 epoch: 116, batch: 1500 // loss: 0.048 epoch: 116, batch: 1600 // loss: 0.052 epoch: 116, batch: 1700 // loss: 0.045 epoch: 116, batch: 1800 // loss: 0.053 epoch: 116, batch: 1900 // loss: 0.045 epoch: 116, batch: 2000 // loss: 0.047 epoch: 116, batch: 2100 // loss: 0.046 epoch: 116, batch: 2200 // loss: 0.050 epoch: 116, batch: 2300 // loss: 0.049 epoch: 116, batch: 2400 // loss: 0.042 epoch: 116, batch: 2500 // loss: 0.042 epoch: 116, batch: 2600 // loss: 0.045 epoch: 116, batch: 2700 // loss: 0.042 epoch: 116, batch: 2800 // loss: 0.046 epoch: 116, batch: 2900 // loss: 0.042 epoch: 116, batch: 3000 // loss: 0.045 epoch: 116, batch: 3100 // loss: 0.043 epoch: 116, batch: 3200 // loss: 0.039 epoch: 116, batch: 3300 // loss: 0.038 epoch: 116, batch: 3400 // loss: 0.044 epoch: 116, batch: 3500 // loss: 0.035 epoch: 116, batch: 3600 // loss: 0.043 epoch: 116, batch: 3700 // loss: 0.043 epoch: 117, batch: 0 // loss: 0.053 epoch: 117, batch: 100 // loss: 0.046 epoch: 117, batch: 200 // loss: 0.043 epoch: 117, batch: 300 // loss: 0.049 epoch: 117, batch: 400 // loss: 0.046 epoch: 117, batch: 500 // loss: 0.040 epoch: 117, batch: 600 // loss: 0.040 epoch: 117, batch: 700 // loss: 0.044 epoch: 117, batch: 800 // loss: 0.042 epoch: 117, batch: 900 // loss: 0.049 epoch: 117, batch: 1000 // loss: 0.046 epoch: 117, batch: 1100 // loss: 0.042 epoch: 117, batch: 1200 // loss: 0.044 epoch: 117, batch: 1300 // loss: 0.046 epoch: 117, batch: 1400 // loss: 0.043 epoch: 117, batch: 1500 // loss: 0.048 epoch: 117, batch: 1600 // loss: 0.052 epoch: 117, batch: 1700 // loss: 0.045 epoch: 117, batch: 1800 // loss: 0.053 epoch: 117, batch: 1900 // loss: 0.045 epoch: 117, batch: 2000 // loss: 0.047 epoch: 117, batch: 2100 // loss: 0.046 epoch: 117, batch: 2200 // loss: 0.050 epoch: 117, batch: 2300 // loss: 0.049 epoch: 117, batch: 2400 // loss: 0.042 epoch: 117, batch: 2500 // loss: 0.042 epoch: 117, batch: 2600 // loss: 0.045 epoch: 117, batch: 2700 // loss: 0.042 epoch: 117, batch: 2800 // loss: 0.046 epoch: 117, batch: 2900 // loss: 0.042 epoch: 117, batch: 3000 // loss: 0.045 epoch: 117, batch: 3100 // loss: 0.043 epoch: 117, batch: 3200 // loss: 0.039 epoch: 117, batch: 3300 // loss: 0.038 epoch: 117, batch: 3400 // loss: 0.044 epoch: 117, batch: 3500 // loss: 0.035 epoch: 117, batch: 3600 // loss: 0.043 epoch: 117, batch: 3700 // loss: 0.043 epoch: 118, batch: 0 // loss: 0.053 epoch: 118, batch: 100 // loss: 0.046 epoch: 118, batch: 200 // loss: 0.043 epoch: 118, batch: 300 // loss: 0.049 epoch: 118, batch: 400 // loss: 0.046 epoch: 118, batch: 500 // loss: 0.040 epoch: 118, batch: 600 // loss: 0.040 epoch: 118, batch: 700 // loss: 0.044 epoch: 118, batch: 800 // loss: 0.042 epoch: 118, batch: 900 // loss: 0.049 epoch: 118, batch: 1000 // loss: 0.046 epoch: 118, batch: 1100 // loss: 0.042 epoch: 118, batch: 1200 // loss: 0.044 epoch: 118, batch: 1300 // loss: 0.046 epoch: 118, batch: 1400 // loss: 0.043 epoch: 118, batch: 1500 // loss: 0.048 epoch: 118, batch: 1600 // loss: 0.052 epoch: 118, batch: 1700 // loss: 0.045 epoch: 118, batch: 1800 // loss: 0.053 epoch: 118, batch: 1900 // loss: 0.045 epoch: 118, batch: 2000 // loss: 0.047 epoch: 118, batch: 2100 // loss: 0.046 epoch: 118, batch: 2200 // loss: 0.050 epoch: 118, batch: 2300 // loss: 0.049 epoch: 118, batch: 2400 // loss: 0.042 epoch: 118, batch: 2500 // loss: 0.042 epoch: 118, batch: 2600 // loss: 0.045 epoch: 118, batch: 2700 // loss: 0.042 epoch: 118, batch: 2800 // loss: 0.046 epoch: 118, batch: 2900 // loss: 0.042 epoch: 118, batch: 3000 // loss: 0.045 epoch: 118, batch: 3100 // loss: 0.043 epoch: 118, batch: 3200 // loss: 0.039 epoch: 118, batch: 3300 // loss: 0.038 epoch: 118, batch: 3400 // loss: 0.044 epoch: 118, batch: 3500 // loss: 0.035 epoch: 118, batch: 3600 // loss: 0.043 epoch: 118, batch: 3700 // loss: 0.043 epoch: 119, batch: 0 // loss: 0.053 epoch: 119, batch: 100 // loss: 0.046 epoch: 119, batch: 200 // loss: 0.043 epoch: 119, batch: 300 // loss: 0.049 epoch: 119, batch: 400 // loss: 0.046 epoch: 119, batch: 500 // loss: 0.040 epoch: 119, batch: 600 // loss: 0.040 epoch: 119, batch: 700 // loss: 0.044 epoch: 119, batch: 800 // loss: 0.042 epoch: 119, batch: 900 // loss: 0.049 epoch: 119, batch: 1000 // loss: 0.046 epoch: 119, batch: 1100 // loss: 0.042 epoch: 119, batch: 1200 // loss: 0.044 epoch: 119, batch: 1300 // loss: 0.046 epoch: 119, batch: 1400 // loss: 0.043 epoch: 119, batch: 1500 // loss: 0.048 epoch: 119, batch: 1600 // loss: 0.052 epoch: 119, batch: 1700 // loss: 0.045 epoch: 119, batch: 1800 // loss: 0.053 epoch: 119, batch: 1900 // loss: 0.045 epoch: 119, batch: 2000 // loss: 0.047 epoch: 119, batch: 2100 // loss: 0.046 epoch: 119, batch: 2200 // loss: 0.050 epoch: 119, batch: 2300 // loss: 0.049 epoch: 119, batch: 2400 // loss: 0.042 epoch: 119, batch: 2500 // loss: 0.042 epoch: 119, batch: 2600 // loss: 0.045 epoch: 119, batch: 2700 // loss: 0.042 epoch: 119, batch: 2800 // loss: 0.046 epoch: 119, batch: 2900 // loss: 0.042 epoch: 119, batch: 3000 // loss: 0.045 epoch: 119, batch: 3100 // loss: 0.043 epoch: 119, batch: 3200 // loss: 0.039 epoch: 119, batch: 3300 // loss: 0.038 epoch: 119, batch: 3400 // loss: 0.044 epoch: 119, batch: 3500 // loss: 0.035 epoch: 119, batch: 3600 // loss: 0.043 epoch: 119, batch: 3700 // loss: 0.043 epoch: 120, batch: 0 // loss: 0.053 epoch: 120, batch: 100 // loss: 0.046 epoch: 120, batch: 200 // loss: 0.043 epoch: 120, batch: 300 // loss: 0.049 epoch: 120, batch: 400 // loss: 0.046 epoch: 120, batch: 500 // loss: 0.040 epoch: 120, batch: 600 // loss: 0.040 epoch: 120, batch: 700 // loss: 0.044 epoch: 120, batch: 800 // loss: 0.042 epoch: 120, batch: 900 // loss: 0.049 epoch: 120, batch: 1000 // loss: 0.046 epoch: 120, batch: 1100 // loss: 0.042 epoch: 120, batch: 1200 // loss: 0.044 epoch: 120, batch: 1300 // loss: 0.046 epoch: 120, batch: 1400 // loss: 0.043 epoch: 120, batch: 1500 // loss: 0.048 epoch: 120, batch: 1600 // loss: 0.052 epoch: 120, batch: 1700 // loss: 0.045 epoch: 120, batch: 1800 // loss: 0.053 epoch: 120, batch: 1900 // loss: 0.045 epoch: 120, batch: 2000 // loss: 0.047 epoch: 120, batch: 2100 // loss: 0.046 epoch: 120, batch: 2200 // loss: 0.050 epoch: 120, batch: 2300 // loss: 0.049 epoch: 120, batch: 2400 // loss: 0.042 epoch: 120, batch: 2500 // loss: 0.042 epoch: 120, batch: 2600 // loss: 0.045 epoch: 120, batch: 2700 // loss: 0.042 epoch: 120, batch: 2800 // loss: 0.046 epoch: 120, batch: 2900 // loss: 0.042 epoch: 120, batch: 3000 // loss: 0.045 epoch: 120, batch: 3100 // loss: 0.043 epoch: 120, batch: 3200 // loss: 0.039 epoch: 120, batch: 3300 // loss: 0.038 epoch: 120, batch: 3400 // loss: 0.044 epoch: 120, batch: 3500 // loss: 0.035 epoch: 120, batch: 3600 // loss: 0.043 epoch: 120, batch: 3700 // loss: 0.043 epoch: 121, batch: 0 // loss: 0.053 epoch: 121, batch: 100 // loss: 0.046 epoch: 121, batch: 200 // loss: 0.043 epoch: 121, batch: 300 // loss: 0.049 epoch: 121, batch: 400 // loss: 0.046 epoch: 121, batch: 500 // loss: 0.040 epoch: 121, batch: 600 // loss: 0.040 epoch: 121, batch: 700 // loss: 0.044 epoch: 121, batch: 800 // loss: 0.042 epoch: 121, batch: 900 // loss: 0.049 epoch: 121, batch: 1000 // loss: 0.046 epoch: 121, batch: 1100 // loss: 0.042 epoch: 121, batch: 1200 // loss: 0.044 epoch: 121, batch: 1300 // loss: 0.046 epoch: 121, batch: 1400 // loss: 0.043 epoch: 121, batch: 1500 // loss: 0.048 epoch: 121, batch: 1600 // loss: 0.052 epoch: 121, batch: 1700 // loss: 0.045 epoch: 121, batch: 1800 // loss: 0.053 epoch: 121, batch: 1900 // loss: 0.045 epoch: 121, batch: 2000 // loss: 0.047 epoch: 121, batch: 2100 // loss: 0.046 epoch: 121, batch: 2200 // loss: 0.050 epoch: 121, batch: 2300 // loss: 0.049 epoch: 121, batch: 2400 // loss: 0.042 epoch: 121, batch: 2500 // loss: 0.042 epoch: 121, batch: 2600 // loss: 0.045 epoch: 121, batch: 2700 // loss: 0.042 epoch: 121, batch: 2800 // loss: 0.046 epoch: 121, batch: 2900 // loss: 0.042 epoch: 121, batch: 3000 // loss: 0.045 epoch: 121, batch: 3100 // loss: 0.043 epoch: 121, batch: 3200 // loss: 0.039 epoch: 121, batch: 3300 // loss: 0.038 epoch: 121, batch: 3400 // loss: 0.044 epoch: 121, batch: 3500 // loss: 0.035 epoch: 121, batch: 3600 // loss: 0.043 epoch: 121, batch: 3700 // loss: 0.043 epoch: 122, batch: 0 // loss: 0.053 epoch: 122, batch: 100 // loss: 0.046 epoch: 122, batch: 200 // loss: 0.043 epoch: 122, batch: 300 // loss: 0.049 epoch: 122, batch: 400 // loss: 0.046 epoch: 122, batch: 500 // loss: 0.040 epoch: 122, batch: 600 // loss: 0.040 epoch: 122, batch: 700 // loss: 0.044 epoch: 122, batch: 800 // loss: 0.042 epoch: 122, batch: 900 // loss: 0.049 epoch: 122, batch: 1000 // loss: 0.046 epoch: 122, batch: 1100 // loss: 0.042 epoch: 122, batch: 1200 // loss: 0.044 epoch: 122, batch: 1300 // loss: 0.046 epoch: 122, batch: 1400 // loss: 0.043 epoch: 122, batch: 1500 // loss: 0.048 epoch: 122, batch: 1600 // loss: 0.052 epoch: 122, batch: 1700 // loss: 0.045 epoch: 122, batch: 1800 // loss: 0.053 epoch: 122, batch: 1900 // loss: 0.045 epoch: 122, batch: 2000 // loss: 0.047 epoch: 122, batch: 2100 // loss: 0.046 epoch: 122, batch: 2200 // loss: 0.050 epoch: 122, batch: 2300 // loss: 0.049 epoch: 122, batch: 2400 // loss: 0.042 epoch: 122, batch: 2500 // loss: 0.042 epoch: 122, batch: 2600 // loss: 0.045 epoch: 122, batch: 2700 // loss: 0.042 epoch: 122, batch: 2800 // loss: 0.046 epoch: 122, batch: 2900 // loss: 0.042 epoch: 122, batch: 3000 // loss: 0.045 epoch: 122, batch: 3100 // loss: 0.043 epoch: 122, batch: 3200 // loss: 0.039 epoch: 122, batch: 3300 // loss: 0.038 epoch: 122, batch: 3400 // loss: 0.044 epoch: 122, batch: 3500 // loss: 0.035 epoch: 122, batch: 3600 // loss: 0.043 epoch: 122, batch: 3700 // loss: 0.043 epoch: 123, batch: 0 // loss: 0.053 epoch: 123, batch: 100 // loss: 0.046 epoch: 123, batch: 200 // loss: 0.043 epoch: 123, batch: 300 // loss: 0.049 epoch: 123, batch: 400 // loss: 0.046 epoch: 123, batch: 500 // loss: 0.040 epoch: 123, batch: 600 // loss: 0.040 epoch: 123, batch: 700 // loss: 0.044 epoch: 123, batch: 800 // loss: 0.042 epoch: 123, batch: 900 // loss: 0.049 epoch: 123, batch: 1000 // loss: 0.046 epoch: 123, batch: 1100 // loss: 0.042 epoch: 123, batch: 1200 // loss: 0.044 epoch: 123, batch: 1300 // loss: 0.046 epoch: 123, batch: 1400 // loss: 0.043 epoch: 123, batch: 1500 // loss: 0.048 epoch: 123, batch: 1600 // loss: 0.052 epoch: 123, batch: 1700 // loss: 0.045 epoch: 123, batch: 1800 // loss: 0.053 epoch: 123, batch: 1900 // loss: 0.045 epoch: 123, batch: 2000 // loss: 0.047 epoch: 123, batch: 2100 // loss: 0.046 epoch: 123, batch: 2200 // loss: 0.050 epoch: 123, batch: 2300 // loss: 0.049 epoch: 123, batch: 2400 // loss: 0.042 epoch: 123, batch: 2500 // loss: 0.042 epoch: 123, batch: 2600 // loss: 0.045 epoch: 123, batch: 2700 // loss: 0.042 epoch: 123, batch: 2800 // loss: 0.046 epoch: 123, batch: 2900 // loss: 0.042 epoch: 123, batch: 3000 // loss: 0.045 epoch: 123, batch: 3100 // loss: 0.043 epoch: 123, batch: 3200 // loss: 0.039 epoch: 123, batch: 3300 // loss: 0.038 epoch: 123, batch: 3400 // loss: 0.044 epoch: 123, batch: 3500 // loss: 0.035 epoch: 123, batch: 3600 // loss: 0.043 epoch: 123, batch: 3700 // loss: 0.043 epoch: 124, batch: 0 // loss: 0.053 epoch: 124, batch: 100 // loss: 0.046 epoch: 124, batch: 200 // loss: 0.043 epoch: 124, batch: 300 // loss: 0.049 epoch: 124, batch: 400 // loss: 0.046 epoch: 124, batch: 500 // loss: 0.040 epoch: 124, batch: 600 // loss: 0.040 epoch: 124, batch: 700 // loss: 0.044 epoch: 124, batch: 800 // loss: 0.042 epoch: 124, batch: 900 // loss: 0.049 epoch: 124, batch: 1000 // loss: 0.046 epoch: 124, batch: 1100 // loss: 0.042 epoch: 124, batch: 1200 // loss: 0.044 epoch: 124, batch: 1300 // loss: 0.046 epoch: 124, batch: 1400 // loss: 0.043 epoch: 124, batch: 1500 // loss: 0.048 epoch: 124, batch: 1600 // loss: 0.052 epoch: 124, batch: 1700 // loss: 0.045 epoch: 124, batch: 1800 // loss: 0.053 epoch: 124, batch: 1900 // loss: 0.045 epoch: 124, batch: 2000 // loss: 0.047 epoch: 124, batch: 2100 // loss: 0.046 epoch: 124, batch: 2200 // loss: 0.050 epoch: 124, batch: 2300 // loss: 0.049 epoch: 124, batch: 2400 // loss: 0.042 epoch: 124, batch: 2500 // loss: 0.042 epoch: 124, batch: 2600 // loss: 0.045 epoch: 124, batch: 2700 // loss: 0.042 epoch: 124, batch: 2800 // loss: 0.046 epoch: 124, batch: 2900 // loss: 0.042 epoch: 124, batch: 3000 // loss: 0.045 epoch: 124, batch: 3100 // loss: 0.043 epoch: 124, batch: 3200 // loss: 0.039 epoch: 124, batch: 3300 // loss: 0.038 epoch: 124, batch: 3400 // loss: 0.044 epoch: 124, batch: 3500 // loss: 0.035 epoch: 124, batch: 3600 // loss: 0.043 epoch: 124, batch: 3700 // loss: 0.043 epoch: 125, batch: 0 // loss: 0.053 epoch: 125, batch: 100 // loss: 0.046 epoch: 125, batch: 200 // loss: 0.043 epoch: 125, batch: 300 // loss: 0.049 epoch: 125, batch: 400 // loss: 0.046 epoch: 125, batch: 500 // loss: 0.040 epoch: 125, batch: 600 // loss: 0.040 epoch: 125, batch: 700 // loss: 0.044 epoch: 125, batch: 800 // loss: 0.042 epoch: 125, batch: 900 // loss: 0.049 epoch: 125, batch: 1000 // loss: 0.046 epoch: 125, batch: 1100 // loss: 0.042 epoch: 125, batch: 1200 // loss: 0.044 epoch: 125, batch: 1300 // loss: 0.046 epoch: 125, batch: 1400 // loss: 0.043 epoch: 125, batch: 1500 // loss: 0.048 epoch: 125, batch: 1600 // loss: 0.052 epoch: 125, batch: 1700 // loss: 0.045 epoch: 125, batch: 1800 // loss: 0.053 epoch: 125, batch: 1900 // loss: 0.045 epoch: 125, batch: 2000 // loss: 0.047 epoch: 125, batch: 2100 // loss: 0.046 epoch: 125, batch: 2200 // loss: 0.050 epoch: 125, batch: 2300 // loss: 0.049 epoch: 125, batch: 2400 // loss: 0.042 epoch: 125, batch: 2500 // loss: 0.042 epoch: 125, batch: 2600 // loss: 0.045 epoch: 125, batch: 2700 // loss: 0.042 epoch: 125, batch: 2800 // loss: 0.046 epoch: 125, batch: 2900 // loss: 0.042 epoch: 125, batch: 3000 // loss: 0.045 epoch: 125, batch: 3100 // loss: 0.043 epoch: 125, batch: 3200 // loss: 0.039 epoch: 125, batch: 3300 // loss: 0.038 epoch: 125, batch: 3400 // loss: 0.044 epoch: 125, batch: 3500 // loss: 0.035 epoch: 125, batch: 3600 // loss: 0.043 epoch: 125, batch: 3700 // loss: 0.043 epoch: 126, batch: 0 // loss: 0.053 epoch: 126, batch: 100 // loss: 0.046 epoch: 126, batch: 200 // loss: 0.043 epoch: 126, batch: 300 // loss: 0.049 epoch: 126, batch: 400 // loss: 0.046 epoch: 126, batch: 500 // loss: 0.040 epoch: 126, batch: 600 // loss: 0.040 epoch: 126, batch: 700 // loss: 0.044 epoch: 126, batch: 800 // loss: 0.042 epoch: 126, batch: 900 // loss: 0.049 epoch: 126, batch: 1000 // loss: 0.046 epoch: 126, batch: 1100 // loss: 0.042 epoch: 126, batch: 1200 // loss: 0.044 epoch: 126, batch: 1300 // loss: 0.046 epoch: 126, batch: 1400 // loss: 0.043 epoch: 126, batch: 1500 // loss: 0.048 epoch: 126, batch: 1600 // loss: 0.052 epoch: 126, batch: 1700 // loss: 0.045 epoch: 126, batch: 1800 // loss: 0.053 epoch: 126, batch: 1900 // loss: 0.045 epoch: 126, batch: 2000 // loss: 0.047 epoch: 126, batch: 2100 // loss: 0.046 epoch: 126, batch: 2200 // loss: 0.050 epoch: 126, batch: 2300 // loss: 0.049 epoch: 126, batch: 2400 // loss: 0.042 epoch: 126, batch: 2500 // loss: 0.042 epoch: 126, batch: 2600 // loss: 0.045 epoch: 126, batch: 2700 // loss: 0.042 epoch: 126, batch: 2800 // loss: 0.046 epoch: 126, batch: 2900 // loss: 0.042 epoch: 126, batch: 3000 // loss: 0.045 epoch: 126, batch: 3100 // loss: 0.043 epoch: 126, batch: 3200 // loss: 0.039 epoch: 126, batch: 3300 // loss: 0.038 epoch: 126, batch: 3400 // loss: 0.044 epoch: 126, batch: 3500 // loss: 0.035 epoch: 126, batch: 3600 // loss: 0.043 epoch: 126, batch: 3700 // loss: 0.043 epoch: 127, batch: 0 // loss: 0.053 epoch: 127, batch: 100 // loss: 0.046 epoch: 127, batch: 200 // loss: 0.043 epoch: 127, batch: 300 // loss: 0.049 epoch: 127, batch: 400 // loss: 0.046 epoch: 127, batch: 500 // loss: 0.040 epoch: 127, batch: 600 // loss: 0.040 epoch: 127, batch: 700 // loss: 0.044 epoch: 127, batch: 800 // loss: 0.042 epoch: 127, batch: 900 // loss: 0.049 epoch: 127, batch: 1000 // loss: 0.046 epoch: 127, batch: 1100 // loss: 0.042 epoch: 127, batch: 1200 // loss: 0.044 epoch: 127, batch: 1300 // loss: 0.046 epoch: 127, batch: 1400 // loss: 0.043 epoch: 127, batch: 1500 // loss: 0.048 epoch: 127, batch: 1600 // loss: 0.052 epoch: 127, batch: 1700 // loss: 0.045 epoch: 127, batch: 1800 // loss: 0.053 epoch: 127, batch: 1900 // loss: 0.045 epoch: 127, batch: 2000 // loss: 0.047 epoch: 127, batch: 2100 // loss: 0.046 epoch: 127, batch: 2200 // loss: 0.050 epoch: 127, batch: 2300 // loss: 0.049 epoch: 127, batch: 2400 // loss: 0.042 epoch: 127, batch: 2500 // loss: 0.042 epoch: 127, batch: 2600 // loss: 0.045 epoch: 127, batch: 2700 // loss: 0.042 epoch: 127, batch: 2800 // loss: 0.046 epoch: 127, batch: 2900 // loss: 0.042 epoch: 127, batch: 3000 // loss: 0.045 epoch: 127, batch: 3100 // loss: 0.043 epoch: 127, batch: 3200 // loss: 0.039 epoch: 127, batch: 3300 // loss: 0.038 epoch: 127, batch: 3400 // loss: 0.044 epoch: 127, batch: 3500 // loss: 0.035 epoch: 127, batch: 3600 // loss: 0.043 epoch: 127, batch: 3700 // loss: 0.043 epoch: 128, batch: 0 // loss: 0.053 epoch: 128, batch: 100 // loss: 0.046 epoch: 128, batch: 200 // loss: 0.043 epoch: 128, batch: 300 // loss: 0.049 epoch: 128, batch: 400 // loss: 0.046 epoch: 128, batch: 500 // loss: 0.040 epoch: 128, batch: 600 // loss: 0.040 epoch: 128, batch: 700 // loss: 0.044 epoch: 128, batch: 800 // loss: 0.042 epoch: 128, batch: 900 // loss: 0.049 epoch: 128, batch: 1000 // loss: 0.046 epoch: 128, batch: 1100 // loss: 0.042 epoch: 128, batch: 1200 // loss: 0.044 epoch: 128, batch: 1300 // loss: 0.046 epoch: 128, batch: 1400 // loss: 0.043 epoch: 128, batch: 1500 // loss: 0.048 epoch: 128, batch: 1600 // loss: 0.052 epoch: 128, batch: 1700 // loss: 0.045 epoch: 128, batch: 1800 // loss: 0.053 epoch: 128, batch: 1900 // loss: 0.045 epoch: 128, batch: 2000 // loss: 0.047 epoch: 128, batch: 2100 // loss: 0.046 epoch: 128, batch: 2200 // loss: 0.050 epoch: 128, batch: 2300 // loss: 0.049 epoch: 128, batch: 2400 // loss: 0.042 epoch: 128, batch: 2500 // loss: 0.042 epoch: 128, batch: 2600 // loss: 0.045 epoch: 128, batch: 2700 // loss: 0.042 epoch: 128, batch: 2800 // loss: 0.046 epoch: 128, batch: 2900 // loss: 0.042 epoch: 128, batch: 3000 // loss: 0.045 epoch: 128, batch: 3100 // loss: 0.043 epoch: 128, batch: 3200 // loss: 0.039 epoch: 128, batch: 3300 // loss: 0.038 epoch: 128, batch: 3400 // loss: 0.044 epoch: 128, batch: 3500 // loss: 0.035 epoch: 128, batch: 3600 // loss: 0.043 epoch: 128, batch: 3700 // loss: 0.043 epoch: 129, batch: 0 // loss: 0.053 epoch: 129, batch: 100 // loss: 0.046 epoch: 129, batch: 200 // loss: 0.043 epoch: 129, batch: 300 // loss: 0.049 epoch: 129, batch: 400 // loss: 0.046 epoch: 129, batch: 500 // loss: 0.040 epoch: 129, batch: 600 // loss: 0.040 epoch: 129, batch: 700 // loss: 0.044 epoch: 129, batch: 800 // loss: 0.042 epoch: 129, batch: 900 // loss: 0.049 epoch: 129, batch: 1000 // loss: 0.046 epoch: 129, batch: 1100 // loss: 0.042 epoch: 129, batch: 1200 // loss: 0.044 epoch: 129, batch: 1300 // loss: 0.046 epoch: 129, batch: 1400 // loss: 0.043 epoch: 129, batch: 1500 // loss: 0.048 epoch: 129, batch: 1600 // loss: 0.052 epoch: 129, batch: 1700 // loss: 0.045 epoch: 129, batch: 1800 // loss: 0.053 epoch: 129, batch: 1900 // loss: 0.045 epoch: 129, batch: 2000 // loss: 0.047 epoch: 129, batch: 2100 // loss: 0.046 epoch: 129, batch: 2200 // loss: 0.050 epoch: 129, batch: 2300 // loss: 0.049 epoch: 129, batch: 2400 // loss: 0.042 epoch: 129, batch: 2500 // loss: 0.042 epoch: 129, batch: 2600 // loss: 0.045 epoch: 129, batch: 2700 // loss: 0.042 epoch: 129, batch: 2800 // loss: 0.046 epoch: 129, batch: 2900 // loss: 0.042 epoch: 129, batch: 3000 // loss: 0.045 epoch: 129, batch: 3100 // loss: 0.043 epoch: 129, batch: 3200 // loss: 0.039 epoch: 129, batch: 3300 // loss: 0.038 epoch: 129, batch: 3400 // loss: 0.044 epoch: 129, batch: 3500 // loss: 0.035 epoch: 129, batch: 3600 // loss: 0.043 epoch: 129, batch: 3700 // loss: 0.043 epoch: 130, batch: 0 // loss: 0.053 epoch: 130, batch: 100 // loss: 0.046 epoch: 130, batch: 200 // loss: 0.043 epoch: 130, batch: 300 // loss: 0.049 epoch: 130, batch: 400 // loss: 0.046 epoch: 130, batch: 500 // loss: 0.040 epoch: 130, batch: 600 // loss: 0.040 epoch: 130, batch: 700 // loss: 0.044 epoch: 130, batch: 800 // loss: 0.042 epoch: 130, batch: 900 // loss: 0.049 epoch: 130, batch: 1000 // loss: 0.046 epoch: 130, batch: 1100 // loss: 0.042 epoch: 130, batch: 1200 // loss: 0.044 epoch: 130, batch: 1300 // loss: 0.046 epoch: 130, batch: 1400 // loss: 0.043 epoch: 130, batch: 1500 // loss: 0.048 epoch: 130, batch: 1600 // loss: 0.052 epoch: 130, batch: 1700 // loss: 0.045 epoch: 130, batch: 1800 // loss: 0.053 epoch: 130, batch: 1900 // loss: 0.045 epoch: 130, batch: 2000 // loss: 0.047 epoch: 130, batch: 2100 // loss: 0.046 epoch: 130, batch: 2200 // loss: 0.050 epoch: 130, batch: 2300 // loss: 0.049 epoch: 130, batch: 2400 // loss: 0.042 epoch: 130, batch: 2500 // loss: 0.042 epoch: 130, batch: 2600 // loss: 0.045 epoch: 130, batch: 2700 // loss: 0.042 epoch: 130, batch: 2800 // loss: 0.046 epoch: 130, batch: 2900 // loss: 0.042 epoch: 130, batch: 3000 // loss: 0.045 epoch: 130, batch: 3100 // loss: 0.043 epoch: 130, batch: 3200 // loss: 0.039 epoch: 130, batch: 3300 // loss: 0.038 epoch: 130, batch: 3400 // loss: 0.044 epoch: 130, batch: 3500 // loss: 0.035 epoch: 130, batch: 3600 // loss: 0.043 epoch: 130, batch: 3700 // loss: 0.043 epoch: 131, batch: 0 // loss: 0.053 epoch: 131, batch: 100 // loss: 0.046 epoch: 131, batch: 200 // loss: 0.043 epoch: 131, batch: 300 // loss: 0.049 epoch: 131, batch: 400 // loss: 0.046 epoch: 131, batch: 500 // loss: 0.040 epoch: 131, batch: 600 // loss: 0.040 epoch: 131, batch: 700 // loss: 0.044 epoch: 131, batch: 800 // loss: 0.042 epoch: 131, batch: 900 // loss: 0.049 epoch: 131, batch: 1000 // loss: 0.046 epoch: 131, batch: 1100 // loss: 0.042 epoch: 131, batch: 1200 // loss: 0.044 epoch: 131, batch: 1300 // loss: 0.046 epoch: 131, batch: 1400 // loss: 0.043 epoch: 131, batch: 1500 // loss: 0.048 epoch: 131, batch: 1600 // loss: 0.052 epoch: 131, batch: 1700 // loss: 0.045 epoch: 131, batch: 1800 // loss: 0.053 epoch: 131, batch: 1900 // loss: 0.045 epoch: 131, batch: 2000 // loss: 0.047 epoch: 131, batch: 2100 // loss: 0.046 epoch: 131, batch: 2200 // loss: 0.050 epoch: 131, batch: 2300 // loss: 0.049 epoch: 131, batch: 2400 // loss: 0.042 epoch: 131, batch: 2500 // loss: 0.042 epoch: 131, batch: 2600 // loss: 0.045 epoch: 131, batch: 2700 // loss: 0.042 epoch: 131, batch: 2800 // loss: 0.046 epoch: 131, batch: 2900 // loss: 0.042 epoch: 131, batch: 3000 // loss: 0.045 epoch: 131, batch: 3100 // loss: 0.043 epoch: 131, batch: 3200 // loss: 0.039 epoch: 131, batch: 3300 // loss: 0.038 epoch: 131, batch: 3400 // loss: 0.044 epoch: 131, batch: 3500 // loss: 0.035 epoch: 131, batch: 3600 // loss: 0.043 epoch: 131, batch: 3700 // loss: 0.043 epoch: 132, batch: 0 // loss: 0.053 epoch: 132, batch: 100 // loss: 0.046 epoch: 132, batch: 200 // loss: 0.043 epoch: 132, batch: 300 // loss: 0.049 epoch: 132, batch: 400 // loss: 0.046 epoch: 132, batch: 500 // loss: 0.040 epoch: 132, batch: 600 // loss: 0.040 epoch: 132, batch: 700 // loss: 0.044 epoch: 132, batch: 800 // loss: 0.042 epoch: 132, batch: 900 // loss: 0.049 epoch: 132, batch: 1000 // loss: 0.046 epoch: 132, batch: 1100 // loss: 0.042 epoch: 132, batch: 1200 // loss: 0.044 epoch: 132, batch: 1300 // loss: 0.046 epoch: 132, batch: 1400 // loss: 0.043 epoch: 132, batch: 1500 // loss: 0.048 epoch: 132, batch: 1600 // loss: 0.052 epoch: 132, batch: 1700 // loss: 0.045 epoch: 132, batch: 1800 // loss: 0.053 epoch: 132, batch: 1900 // loss: 0.045 epoch: 132, batch: 2000 // loss: 0.047 epoch: 132, batch: 2100 // loss: 0.046 epoch: 132, batch: 2200 // loss: 0.050 epoch: 132, batch: 2300 // loss: 0.049 epoch: 132, batch: 2400 // loss: 0.042 epoch: 132, batch: 2500 // loss: 0.042 epoch: 132, batch: 2600 // loss: 0.045 epoch: 132, batch: 2700 // loss: 0.042 epoch: 132, batch: 2800 // loss: 0.046 epoch: 132, batch: 2900 // loss: 0.042 epoch: 132, batch: 3000 // loss: 0.045 epoch: 132, batch: 3100 // loss: 0.043 epoch: 132, batch: 3200 // loss: 0.039 epoch: 132, batch: 3300 // loss: 0.038 epoch: 132, batch: 3400 // loss: 0.044 epoch: 132, batch: 3500 // loss: 0.035 epoch: 132, batch: 3600 // loss: 0.043 epoch: 132, batch: 3700 // loss: 0.043 epoch: 133, batch: 0 // loss: 0.053 epoch: 133, batch: 100 // loss: 0.046 epoch: 133, batch: 200 // loss: 0.043 epoch: 133, batch: 300 // loss: 0.049 epoch: 133, batch: 400 // loss: 0.046 epoch: 133, batch: 500 // loss: 0.040 epoch: 133, batch: 600 // loss: 0.040 epoch: 133, batch: 700 // loss: 0.044 epoch: 133, batch: 800 // loss: 0.042 epoch: 133, batch: 900 // loss: 0.049 epoch: 133, batch: 1000 // loss: 0.046 epoch: 133, batch: 1100 // loss: 0.042 epoch: 133, batch: 1200 // loss: 0.044 epoch: 133, batch: 1300 // loss: 0.046 epoch: 133, batch: 1400 // loss: 0.043 epoch: 133, batch: 1500 // loss: 0.048 epoch: 133, batch: 1600 // loss: 0.052 epoch: 133, batch: 1700 // loss: 0.045 epoch: 133, batch: 1800 // loss: 0.053 epoch: 133, batch: 1900 // loss: 0.045 epoch: 133, batch: 2000 // loss: 0.047 epoch: 133, batch: 2100 // loss: 0.046 epoch: 133, batch: 2200 // loss: 0.050 epoch: 133, batch: 2300 // loss: 0.049 epoch: 133, batch: 2400 // loss: 0.042 epoch: 133, batch: 2500 // loss: 0.042 epoch: 133, batch: 2600 // loss: 0.045 epoch: 133, batch: 2700 // loss: 0.042 epoch: 133, batch: 2800 // loss: 0.046 epoch: 133, batch: 2900 // loss: 0.042 epoch: 133, batch: 3000 // loss: 0.045 epoch: 133, batch: 3100 // loss: 0.043 epoch: 133, batch: 3200 // loss: 0.039 epoch: 133, batch: 3300 // loss: 0.038 epoch: 133, batch: 3400 // loss: 0.044 epoch: 133, batch: 3500 // loss: 0.035 epoch: 133, batch: 3600 // loss: 0.043 epoch: 133, batch: 3700 // loss: 0.043 epoch: 134, batch: 0 // loss: 0.053 epoch: 134, batch: 100 // loss: 0.046 epoch: 134, batch: 200 // loss: 0.043 epoch: 134, batch: 300 // loss: 0.049 epoch: 134, batch: 400 // loss: 0.046 epoch: 134, batch: 500 // loss: 0.040 epoch: 134, batch: 600 // loss: 0.040 epoch: 134, batch: 700 // loss: 0.044 epoch: 134, batch: 800 // loss: 0.042 epoch: 134, batch: 900 // loss: 0.049 epoch: 134, batch: 1000 // loss: 0.046 epoch: 134, batch: 1100 // loss: 0.042 epoch: 134, batch: 1200 // loss: 0.044 epoch: 134, batch: 1300 // loss: 0.046 epoch: 134, batch: 1400 // loss: 0.043 epoch: 134, batch: 1500 // loss: 0.048 epoch: 134, batch: 1600 // loss: 0.052 epoch: 134, batch: 1700 // loss: 0.045 epoch: 134, batch: 1800 // loss: 0.053 epoch: 134, batch: 1900 // loss: 0.045 epoch: 134, batch: 2000 // loss: 0.047 epoch: 134, batch: 2100 // loss: 0.046 epoch: 134, batch: 2200 // loss: 0.050 epoch: 134, batch: 2300 // loss: 0.049 epoch: 134, batch: 2400 // loss: 0.042 epoch: 134, batch: 2500 // loss: 0.042 epoch: 134, batch: 2600 // loss: 0.045 epoch: 134, batch: 2700 // loss: 0.042 epoch: 134, batch: 2800 // loss: 0.046 epoch: 134, batch: 2900 // loss: 0.042 epoch: 134, batch: 3000 // loss: 0.045 epoch: 134, batch: 3100 // loss: 0.043 epoch: 134, batch: 3200 // loss: 0.039 epoch: 134, batch: 3300 // loss: 0.038 epoch: 134, batch: 3400 // loss: 0.044 epoch: 134, batch: 3500 // loss: 0.035 epoch: 134, batch: 3600 // loss: 0.043 epoch: 134, batch: 3700 // loss: 0.043 epoch: 135, batch: 0 // loss: 0.053 epoch: 135, batch: 100 // loss: 0.046 epoch: 135, batch: 200 // loss: 0.043 epoch: 135, batch: 300 // loss: 0.049 epoch: 135, batch: 400 // loss: 0.046 epoch: 135, batch: 500 // loss: 0.040 epoch: 135, batch: 600 // loss: 0.040 epoch: 135, batch: 700 // loss: 0.044 epoch: 135, batch: 800 // loss: 0.042 epoch: 135, batch: 900 // loss: 0.049 epoch: 135, batch: 1000 // loss: 0.046 epoch: 135, batch: 1100 // loss: 0.042 epoch: 135, batch: 1200 // loss: 0.044 epoch: 135, batch: 1300 // loss: 0.046 epoch: 135, batch: 1400 // loss: 0.043 epoch: 135, batch: 1500 // loss: 0.048 epoch: 135, batch: 1600 // loss: 0.052 epoch: 135, batch: 1700 // loss: 0.045 epoch: 135, batch: 1800 // loss: 0.053 epoch: 135, batch: 1900 // loss: 0.045 epoch: 135, batch: 2000 // loss: 0.047 epoch: 135, batch: 2100 // loss: 0.046 epoch: 135, batch: 2200 // loss: 0.050 epoch: 135, batch: 2300 // loss: 0.049 epoch: 135, batch: 2400 // loss: 0.042 epoch: 135, batch: 2500 // loss: 0.042 epoch: 135, batch: 2600 // loss: 0.045 epoch: 135, batch: 2700 // loss: 0.042 epoch: 135, batch: 2800 // loss: 0.046 epoch: 135, batch: 2900 // loss: 0.042 epoch: 135, batch: 3000 // loss: 0.045 epoch: 135, batch: 3100 // loss: 0.043 epoch: 135, batch: 3200 // loss: 0.039 epoch: 135, batch: 3300 // loss: 0.038 epoch: 135, batch: 3400 // loss: 0.044 epoch: 135, batch: 3500 // loss: 0.035 epoch: 135, batch: 3600 // loss: 0.043 epoch: 135, batch: 3700 // loss: 0.043 epoch: 136, batch: 0 // loss: 0.053 epoch: 136, batch: 100 // loss: 0.046 epoch: 136, batch: 200 // loss: 0.043 epoch: 136, batch: 300 // loss: 0.049 epoch: 136, batch: 400 // loss: 0.046 epoch: 136, batch: 500 // loss: 0.040 epoch: 136, batch: 600 // loss: 0.040 epoch: 136, batch: 700 // loss: 0.044 epoch: 136, batch: 800 // loss: 0.042 epoch: 136, batch: 900 // loss: 0.049 epoch: 136, batch: 1000 // loss: 0.046 epoch: 136, batch: 1100 // loss: 0.042 epoch: 136, batch: 1200 // loss: 0.044 epoch: 136, batch: 1300 // loss: 0.046 epoch: 136, batch: 1400 // loss: 0.043 epoch: 136, batch: 1500 // loss: 0.048 epoch: 136, batch: 1600 // loss: 0.052 epoch: 136, batch: 1700 // loss: 0.045 epoch: 136, batch: 1800 // loss: 0.053 epoch: 136, batch: 1900 // loss: 0.045 epoch: 136, batch: 2000 // loss: 0.047 epoch: 136, batch: 2100 // loss: 0.046 epoch: 136, batch: 2200 // loss: 0.050 epoch: 136, batch: 2300 // loss: 0.049 epoch: 136, batch: 2400 // loss: 0.042 epoch: 136, batch: 2500 // loss: 0.042 epoch: 136, batch: 2600 // loss: 0.045 epoch: 136, batch: 2700 // loss: 0.042 epoch: 136, batch: 2800 // loss: 0.046 epoch: 136, batch: 2900 // loss: 0.042 epoch: 136, batch: 3000 // loss: 0.045 epoch: 136, batch: 3100 // loss: 0.043 epoch: 136, batch: 3200 // loss: 0.039 epoch: 136, batch: 3300 // loss: 0.038 epoch: 136, batch: 3400 // loss: 0.044 epoch: 136, batch: 3500 // loss: 0.035 epoch: 136, batch: 3600 // loss: 0.043 epoch: 136, batch: 3700 // loss: 0.043 epoch: 137, batch: 0 // loss: 0.053 epoch: 137, batch: 100 // loss: 0.046 epoch: 137, batch: 200 // loss: 0.043 epoch: 137, batch: 300 // loss: 0.049 epoch: 137, batch: 400 // loss: 0.046 epoch: 137, batch: 500 // loss: 0.040 epoch: 137, batch: 600 // loss: 0.040 epoch: 137, batch: 700 // loss: 0.044 epoch: 137, batch: 800 // loss: 0.042 epoch: 137, batch: 900 // loss: 0.049 epoch: 137, batch: 1000 // loss: 0.046 epoch: 137, batch: 1100 // loss: 0.042 epoch: 137, batch: 1200 // loss: 0.044 epoch: 137, batch: 1300 // loss: 0.046 epoch: 137, batch: 1400 // loss: 0.043 epoch: 137, batch: 1500 // loss: 0.048 epoch: 137, batch: 1600 // loss: 0.052 epoch: 137, batch: 1700 // loss: 0.045 epoch: 137, batch: 1800 // loss: 0.053 epoch: 137, batch: 1900 // loss: 0.045 epoch: 137, batch: 2000 // loss: 0.047 epoch: 137, batch: 2100 // loss: 0.046 epoch: 137, batch: 2200 // loss: 0.050 epoch: 137, batch: 2300 // loss: 0.049 epoch: 137, batch: 2400 // loss: 0.042 epoch: 137, batch: 2500 // loss: 0.042 epoch: 137, batch: 2600 // loss: 0.045 epoch: 137, batch: 2700 // loss: 0.042 epoch: 137, batch: 2800 // loss: 0.046 epoch: 137, batch: 2900 // loss: 0.042 epoch: 137, batch: 3000 // loss: 0.045 epoch: 137, batch: 3100 // loss: 0.043 epoch: 137, batch: 3200 // loss: 0.039 epoch: 137, batch: 3300 // loss: 0.038 epoch: 137, batch: 3400 // loss: 0.044 epoch: 137, batch: 3500 // loss: 0.035 epoch: 137, batch: 3600 // loss: 0.043 epoch: 137, batch: 3700 // loss: 0.043 epoch: 138, batch: 0 // loss: 0.053 epoch: 138, batch: 100 // loss: 0.046 epoch: 138, batch: 200 // loss: 0.043 epoch: 138, batch: 300 // loss: 0.049 epoch: 138, batch: 400 // loss: 0.046 epoch: 138, batch: 500 // loss: 0.040 epoch: 138, batch: 600 // loss: 0.040 epoch: 138, batch: 700 // loss: 0.044 epoch: 138, batch: 800 // loss: 0.042 epoch: 138, batch: 900 // loss: 0.049 epoch: 138, batch: 1000 // loss: 0.046 epoch: 138, batch: 1100 // loss: 0.042 epoch: 138, batch: 1200 // loss: 0.044 epoch: 138, batch: 1300 // loss: 0.046 epoch: 138, batch: 1400 // loss: 0.043 epoch: 138, batch: 1500 // loss: 0.048 epoch: 138, batch: 1600 // loss: 0.052 epoch: 138, batch: 1700 // loss: 0.045 epoch: 138, batch: 1800 // loss: 0.053 epoch: 138, batch: 1900 // loss: 0.045 epoch: 138, batch: 2000 // loss: 0.047 epoch: 138, batch: 2100 // loss: 0.046 epoch: 138, batch: 2200 // loss: 0.050 epoch: 138, batch: 2300 // loss: 0.049 epoch: 138, batch: 2400 // loss: 0.042 epoch: 138, batch: 2500 // loss: 0.042 epoch: 138, batch: 2600 // loss: 0.045 epoch: 138, batch: 2700 // loss: 0.042 epoch: 138, batch: 2800 // loss: 0.046 epoch: 138, batch: 2900 // loss: 0.042 epoch: 138, batch: 3000 // loss: 0.045 epoch: 138, batch: 3100 // loss: 0.043 epoch: 138, batch: 3200 // loss: 0.039 epoch: 138, batch: 3300 // loss: 0.038 epoch: 138, batch: 3400 // loss: 0.044 epoch: 138, batch: 3500 // loss: 0.035 epoch: 138, batch: 3600 // loss: 0.043 epoch: 138, batch: 3700 // loss: 0.043 epoch: 139, batch: 0 // loss: 0.053 epoch: 139, batch: 100 // loss: 0.046 epoch: 139, batch: 200 // loss: 0.043 epoch: 139, batch: 300 // loss: 0.049 epoch: 139, batch: 400 // loss: 0.046 epoch: 139, batch: 500 // loss: 0.040 epoch: 139, batch: 600 // loss: 0.040 epoch: 139, batch: 700 // loss: 0.044 epoch: 139, batch: 800 // loss: 0.042 epoch: 139, batch: 900 // loss: 0.049 epoch: 139, batch: 1000 // loss: 0.046 epoch: 139, batch: 1100 // loss: 0.042 epoch: 139, batch: 1200 // loss: 0.044 epoch: 139, batch: 1300 // loss: 0.046 epoch: 139, batch: 1400 // loss: 0.043 epoch: 139, batch: 1500 // loss: 0.048 epoch: 139, batch: 1600 // loss: 0.052 epoch: 139, batch: 1700 // loss: 0.045 epoch: 139, batch: 1800 // loss: 0.053 epoch: 139, batch: 1900 // loss: 0.045 epoch: 139, batch: 2000 // loss: 0.047 epoch: 139, batch: 2100 // loss: 0.046 epoch: 139, batch: 2200 // loss: 0.050 epoch: 139, batch: 2300 // loss: 0.049 epoch: 139, batch: 2400 // loss: 0.042 epoch: 139, batch: 2500 // loss: 0.042 epoch: 139, batch: 2600 // loss: 0.045 epoch: 139, batch: 2700 // loss: 0.042 epoch: 139, batch: 2800 // loss: 0.046 epoch: 139, batch: 2900 // loss: 0.042 epoch: 139, batch: 3000 // loss: 0.045 epoch: 139, batch: 3100 // loss: 0.043 epoch: 139, batch: 3200 // loss: 0.039 epoch: 139, batch: 3300 // loss: 0.038 epoch: 139, batch: 3400 // loss: 0.044 epoch: 139, batch: 3500 // loss: 0.035 epoch: 139, batch: 3600 // loss: 0.043 epoch: 139, batch: 3700 // loss: 0.043 epoch: 140, batch: 0 // loss: 0.053 epoch: 140, batch: 100 // loss: 0.046 epoch: 140, batch: 200 // loss: 0.043 epoch: 140, batch: 300 // loss: 0.049 epoch: 140, batch: 400 // loss: 0.046 epoch: 140, batch: 500 // loss: 0.040 epoch: 140, batch: 600 // loss: 0.040 epoch: 140, batch: 700 // loss: 0.044 epoch: 140, batch: 800 // loss: 0.042 epoch: 140, batch: 900 // loss: 0.049 epoch: 140, batch: 1000 // loss: 0.046 epoch: 140, batch: 1100 // loss: 0.042 epoch: 140, batch: 1200 // loss: 0.044 epoch: 140, batch: 1300 // loss: 0.046 epoch: 140, batch: 1400 // loss: 0.043 epoch: 140, batch: 1500 // loss: 0.048 epoch: 140, batch: 1600 // loss: 0.052 epoch: 140, batch: 1700 // loss: 0.045 epoch: 140, batch: 1800 // loss: 0.053 epoch: 140, batch: 1900 // loss: 0.045 epoch: 140, batch: 2000 // loss: 0.047 epoch: 140, batch: 2100 // loss: 0.046 epoch: 140, batch: 2200 // loss: 0.050 epoch: 140, batch: 2300 // loss: 0.049 epoch: 140, batch: 2400 // loss: 0.042 epoch: 140, batch: 2500 // loss: 0.042 epoch: 140, batch: 2600 // loss: 0.045 epoch: 140, batch: 2700 // loss: 0.042 epoch: 140, batch: 2800 // loss: 0.046 epoch: 140, batch: 2900 // loss: 0.042 epoch: 140, batch: 3000 // loss: 0.045 epoch: 140, batch: 3100 // loss: 0.043 epoch: 140, batch: 3200 // loss: 0.039 epoch: 140, batch: 3300 // loss: 0.038 epoch: 140, batch: 3400 // loss: 0.044 epoch: 140, batch: 3500 // loss: 0.035 epoch: 140, batch: 3600 // loss: 0.043 epoch: 140, batch: 3700 // loss: 0.043 epoch: 141, batch: 0 // loss: 0.053 epoch: 141, batch: 100 // loss: 0.046 epoch: 141, batch: 200 // loss: 0.043 epoch: 141, batch: 300 // loss: 0.049 epoch: 141, batch: 400 // loss: 0.046 epoch: 141, batch: 500 // loss: 0.040 epoch: 141, batch: 600 // loss: 0.040 epoch: 141, batch: 700 // loss: 0.044 epoch: 141, batch: 800 // loss: 0.042 epoch: 141, batch: 900 // loss: 0.049 epoch: 141, batch: 1000 // loss: 0.046 epoch: 141, batch: 1100 // loss: 0.042 epoch: 141, batch: 1200 // loss: 0.044 epoch: 141, batch: 1300 // loss: 0.046 epoch: 141, batch: 1400 // loss: 0.043 epoch: 141, batch: 1500 // loss: 0.048 epoch: 141, batch: 1600 // loss: 0.052 epoch: 141, batch: 1700 // loss: 0.045 epoch: 141, batch: 1800 // loss: 0.053 epoch: 141, batch: 1900 // loss: 0.045 epoch: 141, batch: 2000 // loss: 0.047 epoch: 141, batch: 2100 // loss: 0.046 epoch: 141, batch: 2200 // loss: 0.050 epoch: 141, batch: 2300 // loss: 0.049 epoch: 141, batch: 2400 // loss: 0.042 epoch: 141, batch: 2500 // loss: 0.042 epoch: 141, batch: 2600 // loss: 0.045 epoch: 141, batch: 2700 // loss: 0.042 epoch: 141, batch: 2800 // loss: 0.046 epoch: 141, batch: 2900 // loss: 0.042 epoch: 141, batch: 3000 // loss: 0.045 epoch: 141, batch: 3100 // loss: 0.043 epoch: 141, batch: 3200 // loss: 0.039 epoch: 141, batch: 3300 // loss: 0.038 epoch: 141, batch: 3400 // loss: 0.044 epoch: 141, batch: 3500 // loss: 0.035 epoch: 141, batch: 3600 // loss: 0.043 epoch: 141, batch: 3700 // loss: 0.043 epoch: 142, batch: 0 // loss: 0.053 epoch: 142, batch: 100 // loss: 0.046 epoch: 142, batch: 200 // loss: 0.043 epoch: 142, batch: 300 // loss: 0.049 epoch: 142, batch: 400 // loss: 0.046 epoch: 142, batch: 500 // loss: 0.040 epoch: 142, batch: 600 // loss: 0.040 epoch: 142, batch: 700 // loss: 0.044 epoch: 142, batch: 800 // loss: 0.042 epoch: 142, batch: 900 // loss: 0.049 epoch: 142, batch: 1000 // loss: 0.046 epoch: 142, batch: 1100 // loss: 0.042 epoch: 142, batch: 1200 // loss: 0.044 epoch: 142, batch: 1300 // loss: 0.046 epoch: 142, batch: 1400 // loss: 0.043 epoch: 142, batch: 1500 // loss: 0.048 epoch: 142, batch: 1600 // loss: 0.052 epoch: 142, batch: 1700 // loss: 0.045 epoch: 142, batch: 1800 // loss: 0.053 epoch: 142, batch: 1900 // loss: 0.045 epoch: 142, batch: 2000 // loss: 0.047 epoch: 142, batch: 2100 // loss: 0.046 epoch: 142, batch: 2200 // loss: 0.050 epoch: 142, batch: 2300 // loss: 0.049 epoch: 142, batch: 2400 // loss: 0.042 epoch: 142, batch: 2500 // loss: 0.042 epoch: 142, batch: 2600 // loss: 0.045 epoch: 142, batch: 2700 // loss: 0.042 epoch: 142, batch: 2800 // loss: 0.046 epoch: 142, batch: 2900 // loss: 0.042 epoch: 142, batch: 3000 // loss: 0.045 epoch: 142, batch: 3100 // loss: 0.043 epoch: 142, batch: 3200 // loss: 0.039 epoch: 142, batch: 3300 // loss: 0.038 epoch: 142, batch: 3400 // loss: 0.044 epoch: 142, batch: 3500 // loss: 0.035 epoch: 142, batch: 3600 // loss: 0.043 epoch: 142, batch: 3700 // loss: 0.043 epoch: 143, batch: 0 // loss: 0.053 epoch: 143, batch: 100 // loss: 0.046 epoch: 143, batch: 200 // loss: 0.043 epoch: 143, batch: 300 // loss: 0.049 epoch: 143, batch: 400 // loss: 0.046 epoch: 143, batch: 500 // loss: 0.040 epoch: 143, batch: 600 // loss: 0.040 epoch: 143, batch: 700 // loss: 0.044 epoch: 143, batch: 800 // loss: 0.042 epoch: 143, batch: 900 // loss: 0.049 epoch: 143, batch: 1000 // loss: 0.046 epoch: 143, batch: 1100 // loss: 0.042 epoch: 143, batch: 1200 // loss: 0.044 epoch: 143, batch: 1300 // loss: 0.046 epoch: 143, batch: 1400 // loss: 0.043 epoch: 143, batch: 1500 // loss: 0.048 epoch: 143, batch: 1600 // loss: 0.052 epoch: 143, batch: 1700 // loss: 0.045 epoch: 143, batch: 1800 // loss: 0.053 epoch: 143, batch: 1900 // loss: 0.045 epoch: 143, batch: 2000 // loss: 0.047 epoch: 143, batch: 2100 // loss: 0.046 epoch: 143, batch: 2200 // loss: 0.050 epoch: 143, batch: 2300 // loss: 0.049 epoch: 143, batch: 2400 // loss: 0.042 epoch: 143, batch: 2500 // loss: 0.042 epoch: 143, batch: 2600 // loss: 0.045 epoch: 143, batch: 2700 // loss: 0.042 epoch: 143, batch: 2800 // loss: 0.046 epoch: 143, batch: 2900 // loss: 0.042 epoch: 143, batch: 3000 // loss: 0.045 epoch: 143, batch: 3100 // loss: 0.043 epoch: 143, batch: 3200 // loss: 0.039 epoch: 143, batch: 3300 // loss: 0.038 epoch: 143, batch: 3400 // loss: 0.044 epoch: 143, batch: 3500 // loss: 0.035 epoch: 143, batch: 3600 // loss: 0.043 epoch: 143, batch: 3700 // loss: 0.043 epoch: 144, batch: 0 // loss: 0.053 epoch: 144, batch: 100 // loss: 0.046 epoch: 144, batch: 200 // loss: 0.043 epoch: 144, batch: 300 // loss: 0.049 epoch: 144, batch: 400 // loss: 0.046 epoch: 144, batch: 500 // loss: 0.040 epoch: 144, batch: 600 // loss: 0.040 epoch: 144, batch: 700 // loss: 0.044 epoch: 144, batch: 800 // loss: 0.042 epoch: 144, batch: 900 // loss: 0.049 epoch: 144, batch: 1000 // loss: 0.046 epoch: 144, batch: 1100 // loss: 0.042 epoch: 144, batch: 1200 // loss: 0.044 epoch: 144, batch: 1300 // loss: 0.046 epoch: 144, batch: 1400 // loss: 0.043 epoch: 144, batch: 1500 // loss: 0.048 epoch: 144, batch: 1600 // loss: 0.052 epoch: 144, batch: 1700 // loss: 0.045 epoch: 144, batch: 1800 // loss: 0.053 epoch: 144, batch: 1900 // loss: 0.045 epoch: 144, batch: 2000 // loss: 0.047 epoch: 144, batch: 2100 // loss: 0.046 epoch: 144, batch: 2200 // loss: 0.050 epoch: 144, batch: 2300 // loss: 0.049 epoch: 144, batch: 2400 // loss: 0.042 epoch: 144, batch: 2500 // loss: 0.042 epoch: 144, batch: 2600 // loss: 0.045 epoch: 144, batch: 2700 // loss: 0.042 epoch: 144, batch: 2800 // loss: 0.046 epoch: 144, batch: 2900 // loss: 0.042 epoch: 144, batch: 3000 // loss: 0.045 epoch: 144, batch: 3100 // loss: 0.043 epoch: 144, batch: 3200 // loss: 0.039 epoch: 144, batch: 3300 // loss: 0.038 epoch: 144, batch: 3400 // loss: 0.044 epoch: 144, batch: 3500 // loss: 0.035 epoch: 144, batch: 3600 // loss: 0.043 epoch: 144, batch: 3700 // loss: 0.043 epoch: 145, batch: 0 // loss: 0.053 epoch: 145, batch: 100 // loss: 0.046 epoch: 145, batch: 200 // loss: 0.043 epoch: 145, batch: 300 // loss: 0.049 epoch: 145, batch: 400 // loss: 0.046 epoch: 145, batch: 500 // loss: 0.040 epoch: 145, batch: 600 // loss: 0.040 epoch: 145, batch: 700 // loss: 0.044 epoch: 145, batch: 800 // loss: 0.042 epoch: 145, batch: 900 // loss: 0.049 epoch: 145, batch: 1000 // loss: 0.046 epoch: 145, batch: 1100 // loss: 0.042 epoch: 145, batch: 1200 // loss: 0.044 epoch: 145, batch: 1300 // loss: 0.046 epoch: 145, batch: 1400 // loss: 0.043 epoch: 145, batch: 1500 // loss: 0.048 epoch: 145, batch: 1600 // loss: 0.052 epoch: 145, batch: 1700 // loss: 0.045 epoch: 145, batch: 1800 // loss: 0.053 epoch: 145, batch: 1900 // loss: 0.045 epoch: 145, batch: 2000 // loss: 0.047 epoch: 145, batch: 2100 // loss: 0.046 epoch: 145, batch: 2200 // loss: 0.050 epoch: 145, batch: 2300 // loss: 0.049 epoch: 145, batch: 2400 // loss: 0.042 epoch: 145, batch: 2500 // loss: 0.042 epoch: 145, batch: 2600 // loss: 0.045 epoch: 145, batch: 2700 // loss: 0.042 epoch: 145, batch: 2800 // loss: 0.046 epoch: 145, batch: 2900 // loss: 0.042 epoch: 145, batch: 3000 // loss: 0.045 epoch: 145, batch: 3100 // loss: 0.043 epoch: 145, batch: 3200 // loss: 0.039 epoch: 145, batch: 3300 // loss: 0.038 epoch: 145, batch: 3400 // loss: 0.044 epoch: 145, batch: 3500 // loss: 0.035 epoch: 145, batch: 3600 // loss: 0.043 epoch: 145, batch: 3700 // loss: 0.043 epoch: 146, batch: 0 // loss: 0.053 epoch: 146, batch: 100 // loss: 0.046 epoch: 146, batch: 200 // loss: 0.043 epoch: 146, batch: 300 // loss: 0.049 epoch: 146, batch: 400 // loss: 0.046 epoch: 146, batch: 500 // loss: 0.040 epoch: 146, batch: 600 // loss: 0.040 epoch: 146, batch: 700 // loss: 0.044 epoch: 146, batch: 800 // loss: 0.042 epoch: 146, batch: 900 // loss: 0.049 epoch: 146, batch: 1000 // loss: 0.046 epoch: 146, batch: 1100 // loss: 0.042 epoch: 146, batch: 1200 // loss: 0.044 epoch: 146, batch: 1300 // loss: 0.046 epoch: 146, batch: 1400 // loss: 0.043 epoch: 146, batch: 1500 // loss: 0.048 epoch: 146, batch: 1600 // loss: 0.052 epoch: 146, batch: 1700 // loss: 0.045 epoch: 146, batch: 1800 // loss: 0.053 epoch: 146, batch: 1900 // loss: 0.045 epoch: 146, batch: 2000 // loss: 0.047 epoch: 146, batch: 2100 // loss: 0.046 epoch: 146, batch: 2200 // loss: 0.050 epoch: 146, batch: 2300 // loss: 0.049 epoch: 146, batch: 2400 // loss: 0.042 epoch: 146, batch: 2500 // loss: 0.042 epoch: 146, batch: 2600 // loss: 0.045 epoch: 146, batch: 2700 // loss: 0.042 epoch: 146, batch: 2800 // loss: 0.046 epoch: 146, batch: 2900 // loss: 0.042 epoch: 146, batch: 3000 // loss: 0.045 epoch: 146, batch: 3100 // loss: 0.043 epoch: 146, batch: 3200 // loss: 0.039 epoch: 146, batch: 3300 // loss: 0.038 epoch: 146, batch: 3400 // loss: 0.044 epoch: 146, batch: 3500 // loss: 0.035 epoch: 146, batch: 3600 // loss: 0.043 epoch: 146, batch: 3700 // loss: 0.043 epoch: 147, batch: 0 // loss: 0.053 epoch: 147, batch: 100 // loss: 0.046 epoch: 147, batch: 200 // loss: 0.043 epoch: 147, batch: 300 // loss: 0.049 epoch: 147, batch: 400 // loss: 0.046 epoch: 147, batch: 500 // loss: 0.040 epoch: 147, batch: 600 // loss: 0.040 epoch: 147, batch: 700 // loss: 0.044 epoch: 147, batch: 800 // loss: 0.042 epoch: 147, batch: 900 // loss: 0.049 epoch: 147, batch: 1000 // loss: 0.046 epoch: 147, batch: 1100 // loss: 0.042 epoch: 147, batch: 1200 // loss: 0.044 epoch: 147, batch: 1300 // loss: 0.046 epoch: 147, batch: 1400 // loss: 0.043 epoch: 147, batch: 1500 // loss: 0.048 epoch: 147, batch: 1600 // loss: 0.052 epoch: 147, batch: 1700 // loss: 0.045 epoch: 147, batch: 1800 // loss: 0.053 epoch: 147, batch: 1900 // loss: 0.045 epoch: 147, batch: 2000 // loss: 0.047 epoch: 147, batch: 2100 // loss: 0.046 epoch: 147, batch: 2200 // loss: 0.050 epoch: 147, batch: 2300 // loss: 0.049 epoch: 147, batch: 2400 // loss: 0.042 epoch: 147, batch: 2500 // loss: 0.042 epoch: 147, batch: 2600 // loss: 0.045 epoch: 147, batch: 2700 // loss: 0.042 epoch: 147, batch: 2800 // loss: 0.046 epoch: 147, batch: 2900 // loss: 0.042 epoch: 147, batch: 3000 // loss: 0.045 epoch: 147, batch: 3100 // loss: 0.043 epoch: 147, batch: 3200 // loss: 0.039 epoch: 147, batch: 3300 // loss: 0.038 epoch: 147, batch: 3400 // loss: 0.044 epoch: 147, batch: 3500 // loss: 0.035 epoch: 147, batch: 3600 // loss: 0.043 epoch: 147, batch: 3700 // loss: 0.043 epoch: 148, batch: 0 // loss: 0.053 epoch: 148, batch: 100 // loss: 0.046 epoch: 148, batch: 200 // loss: 0.043 epoch: 148, batch: 300 // loss: 0.049 epoch: 148, batch: 400 // loss: 0.046 epoch: 148, batch: 500 // loss: 0.040 epoch: 148, batch: 600 // loss: 0.040 epoch: 148, batch: 700 // loss: 0.044 epoch: 148, batch: 800 // loss: 0.042 epoch: 148, batch: 900 // loss: 0.049 epoch: 148, batch: 1000 // loss: 0.046 epoch: 148, batch: 1100 // loss: 0.042 epoch: 148, batch: 1200 // loss: 0.044 epoch: 148, batch: 1300 // loss: 0.046 epoch: 148, batch: 1400 // loss: 0.043 epoch: 148, batch: 1500 // loss: 0.048 epoch: 148, batch: 1600 // loss: 0.052 epoch: 148, batch: 1700 // loss: 0.045 epoch: 148, batch: 1800 // loss: 0.053 epoch: 148, batch: 1900 // loss: 0.045 epoch: 148, batch: 2000 // loss: 0.047 epoch: 148, batch: 2100 // loss: 0.046 epoch: 148, batch: 2200 // loss: 0.050 epoch: 148, batch: 2300 // loss: 0.049 epoch: 148, batch: 2400 // loss: 0.042 epoch: 148, batch: 2500 // loss: 0.042 epoch: 148, batch: 2600 // loss: 0.045 epoch: 148, batch: 2700 // loss: 0.042 epoch: 148, batch: 2800 // loss: 0.046 epoch: 148, batch: 2900 // loss: 0.042 epoch: 148, batch: 3000 // loss: 0.045 epoch: 148, batch: 3100 // loss: 0.043 epoch: 148, batch: 3200 // loss: 0.039 epoch: 148, batch: 3300 // loss: 0.038 epoch: 148, batch: 3400 // loss: 0.044 epoch: 148, batch: 3500 // loss: 0.035 epoch: 148, batch: 3600 // loss: 0.043 epoch: 148, batch: 3700 // loss: 0.043 epoch: 149, batch: 0 // loss: 0.053 epoch: 149, batch: 100 // loss: 0.046 epoch: 149, batch: 200 // loss: 0.043 epoch: 149, batch: 300 // loss: 0.049 epoch: 149, batch: 400 // loss: 0.046 epoch: 149, batch: 500 // loss: 0.040 epoch: 149, batch: 600 // loss: 0.040 epoch: 149, batch: 700 // loss: 0.044 epoch: 149, batch: 800 // loss: 0.042 epoch: 149, batch: 900 // loss: 0.049 epoch: 149, batch: 1000 // loss: 0.046 epoch: 149, batch: 1100 // loss: 0.042 epoch: 149, batch: 1200 // loss: 0.044 epoch: 149, batch: 1300 // loss: 0.046 epoch: 149, batch: 1400 // loss: 0.043 epoch: 149, batch: 1500 // loss: 0.048 epoch: 149, batch: 1600 // loss: 0.052 epoch: 149, batch: 1700 // loss: 0.045 epoch: 149, batch: 1800 // loss: 0.053 epoch: 149, batch: 1900 // loss: 0.045 epoch: 149, batch: 2000 // loss: 0.047 epoch: 149, batch: 2100 // loss: 0.046 epoch: 149, batch: 2200 // loss: 0.050 epoch: 149, batch: 2300 // loss: 0.049 epoch: 149, batch: 2400 // loss: 0.042 epoch: 149, batch: 2500 // loss: 0.042 epoch: 149, batch: 2600 // loss: 0.045 epoch: 149, batch: 2700 // loss: 0.042 epoch: 149, batch: 2800 // loss: 0.046 epoch: 149, batch: 2900 // loss: 0.042 epoch: 149, batch: 3000 // loss: 0.045 epoch: 149, batch: 3100 // loss: 0.043 epoch: 149, batch: 3200 // loss: 0.039 epoch: 149, batch: 3300 // loss: 0.038 epoch: 149, batch: 3400 // loss: 0.044 epoch: 149, batch: 3500 // loss: 0.035 epoch: 149, batch: 3600 // loss: 0.043 epoch: 149, batch: 3700 // loss: 0.043 epoch: 150, batch: 0 // loss: 0.053 epoch: 150, batch: 100 // loss: 0.046 epoch: 150, batch: 200 // loss: 0.043 epoch: 150, batch: 300 // loss: 0.049 epoch: 150, batch: 400 // loss: 0.046 epoch: 150, batch: 500 // loss: 0.040 epoch: 150, batch: 600 // loss: 0.040 epoch: 150, batch: 700 // loss: 0.044 epoch: 150, batch: 800 // loss: 0.042 epoch: 150, batch: 900 // loss: 0.049 epoch: 150, batch: 1000 // loss: 0.046 epoch: 150, batch: 1100 // loss: 0.042 epoch: 150, batch: 1200 // loss: 0.044 epoch: 150, batch: 1300 // loss: 0.046 epoch: 150, batch: 1400 // loss: 0.043 epoch: 150, batch: 1500 // loss: 0.048 epoch: 150, batch: 1600 // loss: 0.052 epoch: 150, batch: 1700 // loss: 0.045 epoch: 150, batch: 1800 // loss: 0.053 epoch: 150, batch: 1900 // loss: 0.045 epoch: 150, batch: 2000 // loss: 0.047 epoch: 150, batch: 2100 // loss: 0.046 epoch: 150, batch: 2200 // loss: 0.050 epoch: 150, batch: 2300 // loss: 0.049 epoch: 150, batch: 2400 // loss: 0.042 epoch: 150, batch: 2500 // loss: 0.042 epoch: 150, batch: 2600 // loss: 0.045 epoch: 150, batch: 2700 // loss: 0.042 epoch: 150, batch: 2800 // loss: 0.046 epoch: 150, batch: 2900 // loss: 0.042 epoch: 150, batch: 3000 // loss: 0.045 epoch: 150, batch: 3100 // loss: 0.043 epoch: 150, batch: 3200 // loss: 0.039 epoch: 150, batch: 3300 // loss: 0.038 epoch: 150, batch: 3400 // loss: 0.044 epoch: 150, batch: 3500 // loss: 0.035 epoch: 150, batch: 3600 // loss: 0.043 epoch: 150, batch: 3700 // loss: 0.043 epoch: 151, batch: 0 // loss: 0.053 epoch: 151, batch: 100 // loss: 0.046 epoch: 151, batch: 200 // loss: 0.043 epoch: 151, batch: 300 // loss: 0.049 epoch: 151, batch: 400 // loss: 0.046 epoch: 151, batch: 500 // loss: 0.040 epoch: 151, batch: 600 // loss: 0.040 epoch: 151, batch: 700 // loss: 0.044 epoch: 151, batch: 800 // loss: 0.042 epoch: 151, batch: 900 // loss: 0.049 epoch: 151, batch: 1000 // loss: 0.046 epoch: 151, batch: 1100 // loss: 0.042 epoch: 151, batch: 1200 // loss: 0.044 epoch: 151, batch: 1300 // loss: 0.046 epoch: 151, batch: 1400 // loss: 0.043 epoch: 151, batch: 1500 // loss: 0.048 epoch: 151, batch: 1600 // loss: 0.052 epoch: 151, batch: 1700 // loss: 0.045 epoch: 151, batch: 1800 // loss: 0.053 epoch: 151, batch: 1900 // loss: 0.045 epoch: 151, batch: 2000 // loss: 0.047 epoch: 151, batch: 2100 // loss: 0.046 epoch: 151, batch: 2200 // loss: 0.050 epoch: 151, batch: 2300 // loss: 0.049 epoch: 151, batch: 2400 // loss: 0.042 epoch: 151, batch: 2500 // loss: 0.042 epoch: 151, batch: 2600 // loss: 0.045 epoch: 151, batch: 2700 // loss: 0.042 epoch: 151, batch: 2800 // loss: 0.046 epoch: 151, batch: 2900 // loss: 0.042 epoch: 151, batch: 3000 // loss: 0.045 epoch: 151, batch: 3100 // loss: 0.043 epoch: 151, batch: 3200 // loss: 0.039 epoch: 151, batch: 3300 // loss: 0.038 epoch: 151, batch: 3400 // loss: 0.044 epoch: 151, batch: 3500 // loss: 0.035 epoch: 151, batch: 3600 // loss: 0.043 epoch: 151, batch: 3700 // loss: 0.043 epoch: 152, batch: 0 // loss: 0.053 epoch: 152, batch: 100 // loss: 0.046 epoch: 152, batch: 200 // loss: 0.043 epoch: 152, batch: 300 // loss: 0.049 epoch: 152, batch: 400 // loss: 0.046 epoch: 152, batch: 500 // loss: 0.040 epoch: 152, batch: 600 // loss: 0.040 epoch: 152, batch: 700 // loss: 0.044 epoch: 152, batch: 800 // loss: 0.042 epoch: 152, batch: 900 // loss: 0.049 epoch: 152, batch: 1000 // loss: 0.046 epoch: 152, batch: 1100 // loss: 0.042 epoch: 152, batch: 1200 // loss: 0.044 epoch: 152, batch: 1300 // loss: 0.046 epoch: 152, batch: 1400 // loss: 0.043 epoch: 152, batch: 1500 // loss: 0.048 epoch: 152, batch: 1600 // loss: 0.052 epoch: 152, batch: 1700 // loss: 0.045 epoch: 152, batch: 1800 // loss: 0.053 epoch: 152, batch: 1900 // loss: 0.045 epoch: 152, batch: 2000 // loss: 0.047 epoch: 152, batch: 2100 // loss: 0.046 epoch: 152, batch: 2200 // loss: 0.050 epoch: 152, batch: 2300 // loss: 0.049 epoch: 152, batch: 2400 // loss: 0.042 epoch: 152, batch: 2500 // loss: 0.042 epoch: 152, batch: 2600 // loss: 0.045 epoch: 152, batch: 2700 // loss: 0.042 epoch: 152, batch: 2800 // loss: 0.046 epoch: 152, batch: 2900 // loss: 0.042 epoch: 152, batch: 3000 // loss: 0.045 epoch: 152, batch: 3100 // loss: 0.043 epoch: 152, batch: 3200 // loss: 0.039 epoch: 152, batch: 3300 // loss: 0.038 epoch: 152, batch: 3400 // loss: 0.044 epoch: 152, batch: 3500 // loss: 0.035 epoch: 152, batch: 3600 // loss: 0.043 epoch: 152, batch: 3700 // loss: 0.043 epoch: 153, batch: 0 // loss: 0.053 epoch: 153, batch: 100 // loss: 0.046 epoch: 153, batch: 200 // loss: 0.043 epoch: 153, batch: 300 // loss: 0.049 epoch: 153, batch: 400 // loss: 0.046 epoch: 153, batch: 500 // loss: 0.040 epoch: 153, batch: 600 // loss: 0.040 epoch: 153, batch: 700 // loss: 0.044 epoch: 153, batch: 800 // loss: 0.042 epoch: 153, batch: 900 // loss: 0.049 epoch: 153, batch: 1000 // loss: 0.046 epoch: 153, batch: 1100 // loss: 0.042 epoch: 153, batch: 1200 // loss: 0.044 epoch: 153, batch: 1300 // loss: 0.046 epoch: 153, batch: 1400 // loss: 0.043 epoch: 153, batch: 1500 // loss: 0.048 epoch: 153, batch: 1600 // loss: 0.052 epoch: 153, batch: 1700 // loss: 0.045 epoch: 153, batch: 1800 // loss: 0.053 epoch: 153, batch: 1900 // loss: 0.045 epoch: 153, batch: 2000 // loss: 0.047 epoch: 153, batch: 2100 // loss: 0.046 epoch: 153, batch: 2200 // loss: 0.050 epoch: 153, batch: 2300 // loss: 0.049 epoch: 153, batch: 2400 // loss: 0.042 epoch: 153, batch: 2500 // loss: 0.042 epoch: 153, batch: 2600 // loss: 0.045 epoch: 153, batch: 2700 // loss: 0.042 epoch: 153, batch: 2800 // loss: 0.046 epoch: 153, batch: 2900 // loss: 0.042 epoch: 153, batch: 3000 // loss: 0.045 epoch: 153, batch: 3100 // loss: 0.043 epoch: 153, batch: 3200 // loss: 0.039 epoch: 153, batch: 3300 // loss: 0.038 epoch: 153, batch: 3400 // loss: 0.044 epoch: 153, batch: 3500 // loss: 0.035 epoch: 153, batch: 3600 // loss: 0.043 epoch: 153, batch: 3700 // loss: 0.043 epoch: 154, batch: 0 // loss: 0.053 epoch: 154, batch: 100 // loss: 0.046 epoch: 154, batch: 200 // loss: 0.043 epoch: 154, batch: 300 // loss: 0.049 epoch: 154, batch: 400 // loss: 0.046 epoch: 154, batch: 500 // loss: 0.040 epoch: 154, batch: 600 // loss: 0.040 epoch: 154, batch: 700 // loss: 0.044 epoch: 154, batch: 800 // loss: 0.042 epoch: 154, batch: 900 // loss: 0.049 epoch: 154, batch: 1000 // loss: 0.046 epoch: 154, batch: 1100 // loss: 0.042 epoch: 154, batch: 1200 // loss: 0.044 epoch: 154, batch: 1300 // loss: 0.046 epoch: 154, batch: 1400 // loss: 0.043 epoch: 154, batch: 1500 // loss: 0.048 epoch: 154, batch: 1600 // loss: 0.052 epoch: 154, batch: 1700 // loss: 0.045 epoch: 154, batch: 1800 // loss: 0.053 epoch: 154, batch: 1900 // loss: 0.045 epoch: 154, batch: 2000 // loss: 0.047 epoch: 154, batch: 2100 // loss: 0.046 epoch: 154, batch: 2200 // loss: 0.050 epoch: 154, batch: 2300 // loss: 0.049 epoch: 154, batch: 2400 // loss: 0.042 epoch: 154, batch: 2500 // loss: 0.042 epoch: 154, batch: 2600 // loss: 0.045 epoch: 154, batch: 2700 // loss: 0.042 epoch: 154, batch: 2800 // loss: 0.046 epoch: 154, batch: 2900 // loss: 0.042 epoch: 154, batch: 3000 // loss: 0.045 epoch: 154, batch: 3100 // loss: 0.043 epoch: 154, batch: 3200 // loss: 0.039 epoch: 154, batch: 3300 // loss: 0.038 epoch: 154, batch: 3400 // loss: 0.044 epoch: 154, batch: 3500 // loss: 0.035 epoch: 154, batch: 3600 // loss: 0.043 epoch: 154, batch: 3700 // loss: 0.043 epoch: 155, batch: 0 // loss: 0.053 epoch: 155, batch: 100 // loss: 0.046 epoch: 155, batch: 200 // loss: 0.043 epoch: 155, batch: 300 // loss: 0.049 epoch: 155, batch: 400 // loss: 0.046 epoch: 155, batch: 500 // loss: 0.040 epoch: 155, batch: 600 // loss: 0.040 epoch: 155, batch: 700 // loss: 0.044 epoch: 155, batch: 800 // loss: 0.042 epoch: 155, batch: 900 // loss: 0.049 epoch: 155, batch: 1000 // loss: 0.046 epoch: 155, batch: 1100 // loss: 0.042 epoch: 155, batch: 1200 // loss: 0.044 epoch: 155, batch: 1300 // loss: 0.046 epoch: 155, batch: 1400 // loss: 0.043 epoch: 155, batch: 1500 // loss: 0.048 epoch: 155, batch: 1600 // loss: 0.052 epoch: 155, batch: 1700 // loss: 0.045 epoch: 155, batch: 1800 // loss: 0.053 epoch: 155, batch: 1900 // loss: 0.045 epoch: 155, batch: 2000 // loss: 0.047 epoch: 155, batch: 2100 // loss: 0.046 epoch: 155, batch: 2200 // loss: 0.050 epoch: 155, batch: 2300 // loss: 0.049 epoch: 155, batch: 2400 // loss: 0.042 epoch: 155, batch: 2500 // loss: 0.042 epoch: 155, batch: 2600 // loss: 0.045 epoch: 155, batch: 2700 // loss: 0.042 epoch: 155, batch: 2800 // loss: 0.046 epoch: 155, batch: 2900 // loss: 0.042 epoch: 155, batch: 3000 // loss: 0.045 epoch: 155, batch: 3100 // loss: 0.043 epoch: 155, batch: 3200 // loss: 0.039 epoch: 155, batch: 3300 // loss: 0.038 epoch: 155, batch: 3400 // loss: 0.044 epoch: 155, batch: 3500 // loss: 0.035 epoch: 155, batch: 3600 // loss: 0.043 epoch: 155, batch: 3700 // loss: 0.043 epoch: 156, batch: 0 // loss: 0.053 epoch: 156, batch: 100 // loss: 0.046 epoch: 156, batch: 200 // loss: 0.043 epoch: 156, batch: 300 // loss: 0.049 epoch: 156, batch: 400 // loss: 0.046 epoch: 156, batch: 500 // loss: 0.040 epoch: 156, batch: 600 // loss: 0.040 epoch: 156, batch: 700 // loss: 0.044 epoch: 156, batch: 800 // loss: 0.042 epoch: 156, batch: 900 // loss: 0.049 epoch: 156, batch: 1000 // loss: 0.046 epoch: 156, batch: 1100 // loss: 0.042 epoch: 156, batch: 1200 // loss: 0.044 epoch: 156, batch: 1300 // loss: 0.046 epoch: 156, batch: 1400 // loss: 0.043 epoch: 156, batch: 1500 // loss: 0.048 epoch: 156, batch: 1600 // loss: 0.052 epoch: 156, batch: 1700 // loss: 0.045 epoch: 156, batch: 1800 // loss: 0.053 epoch: 156, batch: 1900 // loss: 0.045 epoch: 156, batch: 2000 // loss: 0.047 epoch: 156, batch: 2100 // loss: 0.046 epoch: 156, batch: 2200 // loss: 0.050 epoch: 156, batch: 2300 // loss: 0.049 epoch: 156, batch: 2400 // loss: 0.042 epoch: 156, batch: 2500 // loss: 0.042 epoch: 156, batch: 2600 // loss: 0.045 epoch: 156, batch: 2700 // loss: 0.042 epoch: 156, batch: 2800 // loss: 0.046 epoch: 156, batch: 2900 // loss: 0.042 epoch: 156, batch: 3000 // loss: 0.045 epoch: 156, batch: 3100 // loss: 0.043 epoch: 156, batch: 3200 // loss: 0.039 epoch: 156, batch: 3300 // loss: 0.038 epoch: 156, batch: 3400 // loss: 0.044 epoch: 156, batch: 3500 // loss: 0.035 epoch: 156, batch: 3600 // loss: 0.043 epoch: 156, batch: 3700 // loss: 0.043 epoch: 157, batch: 0 // loss: 0.053 epoch: 157, batch: 100 // loss: 0.046 epoch: 157, batch: 200 // loss: 0.043 epoch: 157, batch: 300 // loss: 0.049 epoch: 157, batch: 400 // loss: 0.046 epoch: 157, batch: 500 // loss: 0.040 epoch: 157, batch: 600 // loss: 0.040 epoch: 157, batch: 700 // loss: 0.044 epoch: 157, batch: 800 // loss: 0.042 epoch: 157, batch: 900 // loss: 0.049 epoch: 157, batch: 1000 // loss: 0.046 epoch: 157, batch: 1100 // loss: 0.042 epoch: 157, batch: 1200 // loss: 0.044 epoch: 157, batch: 1300 // loss: 0.046 epoch: 157, batch: 1400 // loss: 0.043 epoch: 157, batch: 1500 // loss: 0.048 epoch: 157, batch: 1600 // loss: 0.052 epoch: 157, batch: 1700 // loss: 0.045 epoch: 157, batch: 1800 // loss: 0.053 epoch: 157, batch: 1900 // loss: 0.045 epoch: 157, batch: 2000 // loss: 0.047 epoch: 157, batch: 2100 // loss: 0.046 epoch: 157, batch: 2200 // loss: 0.050 epoch: 157, batch: 2300 // loss: 0.049 epoch: 157, batch: 2400 // loss: 0.042 epoch: 157, batch: 2500 // loss: 0.042 epoch: 157, batch: 2600 // loss: 0.045 epoch: 157, batch: 2700 // loss: 0.042 epoch: 157, batch: 2800 // loss: 0.046 epoch: 157, batch: 2900 // loss: 0.042 epoch: 157, batch: 3000 // loss: 0.045 epoch: 157, batch: 3100 // loss: 0.043 epoch: 157, batch: 3200 // loss: 0.039 epoch: 157, batch: 3300 // loss: 0.038 epoch: 157, batch: 3400 // loss: 0.044 epoch: 157, batch: 3500 // loss: 0.035 epoch: 157, batch: 3600 // loss: 0.043 epoch: 157, batch: 3700 // loss: 0.043 epoch: 158, batch: 0 // loss: 0.053 epoch: 158, batch: 100 // loss: 0.046 epoch: 158, batch: 200 // loss: 0.043 epoch: 158, batch: 300 // loss: 0.049 epoch: 158, batch: 400 // loss: 0.046 epoch: 158, batch: 500 // loss: 0.040 epoch: 158, batch: 600 // loss: 0.040 epoch: 158, batch: 700 // loss: 0.044 epoch: 158, batch: 800 // loss: 0.042 epoch: 158, batch: 900 // loss: 0.049 epoch: 158, batch: 1000 // loss: 0.046 epoch: 158, batch: 1100 // loss: 0.042 epoch: 158, batch: 1200 // loss: 0.044 epoch: 158, batch: 1300 // loss: 0.046 epoch: 158, batch: 1400 // loss: 0.043 epoch: 158, batch: 1500 // loss: 0.048 epoch: 158, batch: 1600 // loss: 0.052 epoch: 158, batch: 1700 // loss: 0.045 epoch: 158, batch: 1800 // loss: 0.053 epoch: 158, batch: 1900 // loss: 0.045 epoch: 158, batch: 2000 // loss: 0.047 epoch: 158, batch: 2100 // loss: 0.046 epoch: 158, batch: 2200 // loss: 0.050 epoch: 158, batch: 2300 // loss: 0.049 epoch: 158, batch: 2400 // loss: 0.042 epoch: 158, batch: 2500 // loss: 0.042 epoch: 158, batch: 2600 // loss: 0.045 epoch: 158, batch: 2700 // loss: 0.042 epoch: 158, batch: 2800 // loss: 0.046 epoch: 158, batch: 2900 // loss: 0.042 epoch: 158, batch: 3000 // loss: 0.045 epoch: 158, batch: 3100 // loss: 0.043 epoch: 158, batch: 3200 // loss: 0.039 epoch: 158, batch: 3300 // loss: 0.038 epoch: 158, batch: 3400 // loss: 0.044 epoch: 158, batch: 3500 // loss: 0.035 epoch: 158, batch: 3600 // loss: 0.043 epoch: 158, batch: 3700 // loss: 0.043 epoch: 159, batch: 0 // loss: 0.053 epoch: 159, batch: 100 // loss: 0.046 epoch: 159, batch: 200 // loss: 0.043 epoch: 159, batch: 300 // loss: 0.049 epoch: 159, batch: 400 // loss: 0.046 epoch: 159, batch: 500 // loss: 0.040 epoch: 159, batch: 600 // loss: 0.040 epoch: 159, batch: 700 // loss: 0.044 epoch: 159, batch: 800 // loss: 0.042 epoch: 159, batch: 900 // loss: 0.049 epoch: 159, batch: 1000 // loss: 0.046 epoch: 159, batch: 1100 // loss: 0.042 epoch: 159, batch: 1200 // loss: 0.044 epoch: 159, batch: 1300 // loss: 0.046 epoch: 159, batch: 1400 // loss: 0.043 epoch: 159, batch: 1500 // loss: 0.048 epoch: 159, batch: 1600 // loss: 0.052 epoch: 159, batch: 1700 // loss: 0.045 epoch: 159, batch: 1800 // loss: 0.053 epoch: 159, batch: 1900 // loss: 0.045 epoch: 159, batch: 2000 // loss: 0.047 epoch: 159, batch: 2100 // loss: 0.046 epoch: 159, batch: 2200 // loss: 0.050 epoch: 159, batch: 2300 // loss: 0.049 epoch: 159, batch: 2400 // loss: 0.042 epoch: 159, batch: 2500 // loss: 0.042 epoch: 159, batch: 2600 // loss: 0.045 epoch: 159, batch: 2700 // loss: 0.042 epoch: 159, batch: 2800 // loss: 0.046 epoch: 159, batch: 2900 // loss: 0.042 epoch: 159, batch: 3000 // loss: 0.045 epoch: 159, batch: 3100 // loss: 0.043 epoch: 159, batch: 3200 // loss: 0.039 epoch: 159, batch: 3300 // loss: 0.038 epoch: 159, batch: 3400 // loss: 0.044 epoch: 159, batch: 3500 // loss: 0.035 epoch: 159, batch: 3600 // loss: 0.043 epoch: 159, batch: 3700 // loss: 0.043 epoch: 160, batch: 0 // loss: 0.053 epoch: 160, batch: 100 // loss: 0.046 epoch: 160, batch: 200 // loss: 0.043 epoch: 160, batch: 300 // loss: 0.049 epoch: 160, batch: 400 // loss: 0.046 epoch: 160, batch: 500 // loss: 0.040 epoch: 160, batch: 600 // loss: 0.040 epoch: 160, batch: 700 // loss: 0.044 epoch: 160, batch: 800 // loss: 0.042 epoch: 160, batch: 900 // loss: 0.049 epoch: 160, batch: 1000 // loss: 0.046 epoch: 160, batch: 1100 // loss: 0.042 epoch: 160, batch: 1200 // loss: 0.044 epoch: 160, batch: 1300 // loss: 0.046 epoch: 160, batch: 1400 // loss: 0.043 epoch: 160, batch: 1500 // loss: 0.048 epoch: 160, batch: 1600 // loss: 0.052 epoch: 160, batch: 1700 // loss: 0.045 epoch: 160, batch: 1800 // loss: 0.053 epoch: 160, batch: 1900 // loss: 0.045 epoch: 160, batch: 2000 // loss: 0.047 epoch: 160, batch: 2100 // loss: 0.046 epoch: 160, batch: 2200 // loss: 0.050 epoch: 160, batch: 2300 // loss: 0.049 epoch: 160, batch: 2400 // loss: 0.042 epoch: 160, batch: 2500 // loss: 0.042 epoch: 160, batch: 2600 // loss: 0.045 epoch: 160, batch: 2700 // loss: 0.042 epoch: 160, batch: 2800 // loss: 0.046 epoch: 160, batch: 2900 // loss: 0.042 epoch: 160, batch: 3000 // loss: 0.045 epoch: 160, batch: 3100 // loss: 0.043 epoch: 160, batch: 3200 // loss: 0.039 epoch: 160, batch: 3300 // loss: 0.038 epoch: 160, batch: 3400 // loss: 0.044 epoch: 160, batch: 3500 // loss: 0.035 epoch: 160, batch: 3600 // loss: 0.043 epoch: 160, batch: 3700 // loss: 0.043 epoch: 161, batch: 0 // loss: 0.053 epoch: 161, batch: 100 // loss: 0.046 epoch: 161, batch: 200 // loss: 0.043 epoch: 161, batch: 300 // loss: 0.049 epoch: 161, batch: 400 // loss: 0.046 epoch: 161, batch: 500 // loss: 0.040 epoch: 161, batch: 600 // loss: 0.040 epoch: 161, batch: 700 // loss: 0.044 epoch: 161, batch: 800 // loss: 0.042 epoch: 161, batch: 900 // loss: 0.049 epoch: 161, batch: 1000 // loss: 0.046 epoch: 161, batch: 1100 // loss: 0.042 epoch: 161, batch: 1200 // loss: 0.044 epoch: 161, batch: 1300 // loss: 0.046 epoch: 161, batch: 1400 // loss: 0.043 epoch: 161, batch: 1500 // loss: 0.048 epoch: 161, batch: 1600 // loss: 0.052 epoch: 161, batch: 1700 // loss: 0.045 epoch: 161, batch: 1800 // loss: 0.053 epoch: 161, batch: 1900 // loss: 0.045 epoch: 161, batch: 2000 // loss: 0.047 epoch: 161, batch: 2100 // loss: 0.046 epoch: 161, batch: 2200 // loss: 0.050 epoch: 161, batch: 2300 // loss: 0.049 epoch: 161, batch: 2400 // loss: 0.042 epoch: 161, batch: 2500 // loss: 0.042 epoch: 161, batch: 2600 // loss: 0.045 epoch: 161, batch: 2700 // loss: 0.042 epoch: 161, batch: 2800 // loss: 0.046 epoch: 161, batch: 2900 // loss: 0.042 epoch: 161, batch: 3000 // loss: 0.045 epoch: 161, batch: 3100 // loss: 0.043 epoch: 161, batch: 3200 // loss: 0.039 epoch: 161, batch: 3300 // loss: 0.038 epoch: 161, batch: 3400 // loss: 0.044 epoch: 161, batch: 3500 // loss: 0.035 epoch: 161, batch: 3600 // loss: 0.043 epoch: 161, batch: 3700 // loss: 0.043 epoch: 162, batch: 0 // loss: 0.053 epoch: 162, batch: 100 // loss: 0.046 epoch: 162, batch: 200 // loss: 0.043 epoch: 162, batch: 300 // loss: 0.049 epoch: 162, batch: 400 // loss: 0.046 epoch: 162, batch: 500 // loss: 0.040 epoch: 162, batch: 600 // loss: 0.040 epoch: 162, batch: 700 // loss: 0.044 epoch: 162, batch: 800 // loss: 0.042 epoch: 162, batch: 900 // loss: 0.049 epoch: 162, batch: 1000 // loss: 0.046 epoch: 162, batch: 1100 // loss: 0.042 epoch: 162, batch: 1200 // loss: 0.044 epoch: 162, batch: 1300 // loss: 0.046 epoch: 162, batch: 1400 // loss: 0.043 epoch: 162, batch: 1500 // loss: 0.048 epoch: 162, batch: 1600 // loss: 0.052 epoch: 162, batch: 1700 // loss: 0.045 epoch: 162, batch: 1800 // loss: 0.053 epoch: 162, batch: 1900 // loss: 0.045 epoch: 162, batch: 2000 // loss: 0.047 epoch: 162, batch: 2100 // loss: 0.046 epoch: 162, batch: 2200 // loss: 0.050 epoch: 162, batch: 2300 // loss: 0.049 epoch: 162, batch: 2400 // loss: 0.042 epoch: 162, batch: 2500 // loss: 0.042 epoch: 162, batch: 2600 // loss: 0.045 epoch: 162, batch: 2700 // loss: 0.042 epoch: 162, batch: 2800 // loss: 0.046 epoch: 162, batch: 2900 // loss: 0.042 epoch: 162, batch: 3000 // loss: 0.045 epoch: 162, batch: 3100 // loss: 0.043 epoch: 162, batch: 3200 // loss: 0.039 epoch: 162, batch: 3300 // loss: 0.038 epoch: 162, batch: 3400 // loss: 0.044 epoch: 162, batch: 3500 // loss: 0.035 epoch: 162, batch: 3600 // loss: 0.043 epoch: 162, batch: 3700 // loss: 0.043 epoch: 163, batch: 0 // loss: 0.053 epoch: 163, batch: 100 // loss: 0.046 epoch: 163, batch: 200 // loss: 0.043 epoch: 163, batch: 300 // loss: 0.049 epoch: 163, batch: 400 // loss: 0.046 epoch: 163, batch: 500 // loss: 0.040 epoch: 163, batch: 600 // loss: 0.040 epoch: 163, batch: 700 // loss: 0.044 epoch: 163, batch: 800 // loss: 0.042 epoch: 163, batch: 900 // loss: 0.049 epoch: 163, batch: 1000 // loss: 0.046 epoch: 163, batch: 1100 // loss: 0.042 epoch: 163, batch: 1200 // loss: 0.044 epoch: 163, batch: 1300 // loss: 0.046 epoch: 163, batch: 1400 // loss: 0.043 epoch: 163, batch: 1500 // loss: 0.048 epoch: 163, batch: 1600 // loss: 0.052 epoch: 163, batch: 1700 // loss: 0.045 epoch: 163, batch: 1800 // loss: 0.053 epoch: 163, batch: 1900 // loss: 0.045 epoch: 163, batch: 2000 // loss: 0.047 epoch: 163, batch: 2100 // loss: 0.046 epoch: 163, batch: 2200 // loss: 0.050 epoch: 163, batch: 2300 // loss: 0.049 epoch: 163, batch: 2400 // loss: 0.042 epoch: 163, batch: 2500 // loss: 0.042 epoch: 163, batch: 2600 // loss: 0.045 epoch: 163, batch: 2700 // loss: 0.042 epoch: 163, batch: 2800 // loss: 0.046 epoch: 163, batch: 2900 // loss: 0.042 epoch: 163, batch: 3000 // loss: 0.045 epoch: 163, batch: 3100 // loss: 0.043 epoch: 163, batch: 3200 // loss: 0.039 epoch: 163, batch: 3300 // loss: 0.038 epoch: 163, batch: 3400 // loss: 0.044 epoch: 163, batch: 3500 // loss: 0.035 epoch: 163, batch: 3600 // loss: 0.043 epoch: 163, batch: 3700 // loss: 0.043 epoch: 164, batch: 0 // loss: 0.053 epoch: 164, batch: 100 // loss: 0.046 epoch: 164, batch: 200 // loss: 0.043 epoch: 164, batch: 300 // loss: 0.049 epoch: 164, batch: 400 // loss: 0.046 epoch: 164, batch: 500 // loss: 0.040 epoch: 164, batch: 600 // loss: 0.040 epoch: 164, batch: 700 // loss: 0.044 epoch: 164, batch: 800 // loss: 0.042 epoch: 164, batch: 900 // loss: 0.049 epoch: 164, batch: 1000 // loss: 0.046 epoch: 164, batch: 1100 // loss: 0.042 epoch: 164, batch: 1200 // loss: 0.044 epoch: 164, batch: 1300 // loss: 0.046 epoch: 164, batch: 1400 // loss: 0.043 epoch: 164, batch: 1500 // loss: 0.048 epoch: 164, batch: 1600 // loss: 0.052 epoch: 164, batch: 1700 // loss: 0.045 epoch: 164, batch: 1800 // loss: 0.053 epoch: 164, batch: 1900 // loss: 0.045 epoch: 164, batch: 2000 // loss: 0.047 epoch: 164, batch: 2100 // loss: 0.046 epoch: 164, batch: 2200 // loss: 0.050 epoch: 164, batch: 2300 // loss: 0.049 epoch: 164, batch: 2400 // loss: 0.042 epoch: 164, batch: 2500 // loss: 0.042 epoch: 164, batch: 2600 // loss: 0.045 epoch: 164, batch: 2700 // loss: 0.042 epoch: 164, batch: 2800 // loss: 0.046 epoch: 164, batch: 2900 // loss: 0.042 epoch: 164, batch: 3000 // loss: 0.045 epoch: 164, batch: 3100 // loss: 0.043 epoch: 164, batch: 3200 // loss: 0.039 epoch: 164, batch: 3300 // loss: 0.038 epoch: 164, batch: 3400 // loss: 0.044 epoch: 164, batch: 3500 // loss: 0.035 epoch: 164, batch: 3600 // loss: 0.043 epoch: 164, batch: 3700 // loss: 0.043 epoch: 165, batch: 0 // loss: 0.053 epoch: 165, batch: 100 // loss: 0.046 epoch: 165, batch: 200 // loss: 0.043 epoch: 165, batch: 300 // loss: 0.049 epoch: 165, batch: 400 // loss: 0.046 epoch: 165, batch: 500 // loss: 0.040 epoch: 165, batch: 600 // loss: 0.040 epoch: 165, batch: 700 // loss: 0.044 epoch: 165, batch: 800 // loss: 0.042 epoch: 165, batch: 900 // loss: 0.049 epoch: 165, batch: 1000 // loss: 0.046 epoch: 165, batch: 1100 // loss: 0.042 epoch: 165, batch: 1200 // loss: 0.044 epoch: 165, batch: 1300 // loss: 0.046 epoch: 165, batch: 1400 // loss: 0.043 epoch: 165, batch: 1500 // loss: 0.048 epoch: 165, batch: 1600 // loss: 0.052 epoch: 165, batch: 1700 // loss: 0.045 epoch: 165, batch: 1800 // loss: 0.053 epoch: 165, batch: 1900 // loss: 0.045 epoch: 165, batch: 2000 // loss: 0.047 epoch: 165, batch: 2100 // loss: 0.046 epoch: 165, batch: 2200 // loss: 0.050 epoch: 165, batch: 2300 // loss: 0.049 epoch: 165, batch: 2400 // loss: 0.042 epoch: 165, batch: 2500 // loss: 0.042 epoch: 165, batch: 2600 // loss: 0.045 epoch: 165, batch: 2700 // loss: 0.042 epoch: 165, batch: 2800 // loss: 0.046 epoch: 165, batch: 2900 // loss: 0.042 epoch: 165, batch: 3000 // loss: 0.045 epoch: 165, batch: 3100 // loss: 0.043 epoch: 165, batch: 3200 // loss: 0.039 epoch: 165, batch: 3300 // loss: 0.038 epoch: 165, batch: 3400 // loss: 0.044 epoch: 165, batch: 3500 // loss: 0.035 epoch: 165, batch: 3600 // loss: 0.043 epoch: 165, batch: 3700 // loss: 0.043 epoch: 166, batch: 0 // loss: 0.053 epoch: 166, batch: 100 // loss: 0.046 epoch: 166, batch: 200 // loss: 0.043 epoch: 166, batch: 300 // loss: 0.049 epoch: 166, batch: 400 // loss: 0.046 epoch: 166, batch: 500 // loss: 0.040 epoch: 166, batch: 600 // loss: 0.040 epoch: 166, batch: 700 // loss: 0.044 epoch: 166, batch: 800 // loss: 0.042 epoch: 166, batch: 900 // loss: 0.049 epoch: 166, batch: 1000 // loss: 0.046 epoch: 166, batch: 1100 // loss: 0.042 epoch: 166, batch: 1200 // loss: 0.044 epoch: 166, batch: 1300 // loss: 0.046 epoch: 166, batch: 1400 // loss: 0.043 epoch: 166, batch: 1500 // loss: 0.048 epoch: 166, batch: 1600 // loss: 0.052 epoch: 166, batch: 1700 // loss: 0.045 epoch: 166, batch: 1800 // loss: 0.053 epoch: 166, batch: 1900 // loss: 0.045 epoch: 166, batch: 2000 // loss: 0.047 epoch: 166, batch: 2100 // loss: 0.046 epoch: 166, batch: 2200 // loss: 0.050 epoch: 166, batch: 2300 // loss: 0.049 epoch: 166, batch: 2400 // loss: 0.042 epoch: 166, batch: 2500 // loss: 0.042 epoch: 166, batch: 2600 // loss: 0.045 epoch: 166, batch: 2700 // loss: 0.042 epoch: 166, batch: 2800 // loss: 0.046 epoch: 166, batch: 2900 // loss: 0.042 epoch: 166, batch: 3000 // loss: 0.045 epoch: 166, batch: 3100 // loss: 0.043 epoch: 166, batch: 3200 // loss: 0.039 epoch: 166, batch: 3300 // loss: 0.038 epoch: 166, batch: 3400 // loss: 0.044 epoch: 166, batch: 3500 // loss: 0.035 epoch: 166, batch: 3600 // loss: 0.043 epoch: 166, batch: 3700 // loss: 0.043 epoch: 167, batch: 0 // loss: 0.053 epoch: 167, batch: 100 // loss: 0.046 epoch: 167, batch: 200 // loss: 0.043 epoch: 167, batch: 300 // loss: 0.049 epoch: 167, batch: 400 // loss: 0.046 epoch: 167, batch: 500 // loss: 0.040 epoch: 167, batch: 600 // loss: 0.040 epoch: 167, batch: 700 // loss: 0.044 epoch: 167, batch: 800 // loss: 0.042 epoch: 167, batch: 900 // loss: 0.049 epoch: 167, batch: 1000 // loss: 0.046 epoch: 167, batch: 1100 // loss: 0.042 epoch: 167, batch: 1200 // loss: 0.044 epoch: 167, batch: 1300 // loss: 0.046 epoch: 167, batch: 1400 // loss: 0.043 epoch: 167, batch: 1500 // loss: 0.048 epoch: 167, batch: 1600 // loss: 0.052 epoch: 167, batch: 1700 // loss: 0.045 epoch: 167, batch: 1800 // loss: 0.053 epoch: 167, batch: 1900 // loss: 0.045 epoch: 167, batch: 2000 // loss: 0.047 epoch: 167, batch: 2100 // loss: 0.046 epoch: 167, batch: 2200 // loss: 0.050 epoch: 167, batch: 2300 // loss: 0.049 epoch: 167, batch: 2400 // loss: 0.042 epoch: 167, batch: 2500 // loss: 0.042 epoch: 167, batch: 2600 // loss: 0.045 epoch: 167, batch: 2700 // loss: 0.042 epoch: 167, batch: 2800 // loss: 0.046 epoch: 167, batch: 2900 // loss: 0.042 epoch: 167, batch: 3000 // loss: 0.045 epoch: 167, batch: 3100 // loss: 0.043 epoch: 167, batch: 3200 // loss: 0.039 epoch: 167, batch: 3300 // loss: 0.038 epoch: 167, batch: 3400 // loss: 0.044 epoch: 167, batch: 3500 // loss: 0.035 epoch: 167, batch: 3600 // loss: 0.043 epoch: 167, batch: 3700 // loss: 0.043 epoch: 168, batch: 0 // loss: 0.053 epoch: 168, batch: 100 // loss: 0.046 epoch: 168, batch: 200 // loss: 0.043 epoch: 168, batch: 300 // loss: 0.049 epoch: 168, batch: 400 // loss: 0.046 epoch: 168, batch: 500 // loss: 0.040 epoch: 168, batch: 600 // loss: 0.040 epoch: 168, batch: 700 // loss: 0.044 epoch: 168, batch: 800 // loss: 0.042 epoch: 168, batch: 900 // loss: 0.049 epoch: 168, batch: 1000 // loss: 0.046 epoch: 168, batch: 1100 // loss: 0.042 epoch: 168, batch: 1200 // loss: 0.044 epoch: 168, batch: 1300 // loss: 0.046 epoch: 168, batch: 1400 // loss: 0.043 epoch: 168, batch: 1500 // loss: 0.048 epoch: 168, batch: 1600 // loss: 0.052 epoch: 168, batch: 1700 // loss: 0.045 epoch: 168, batch: 1800 // loss: 0.053 epoch: 168, batch: 1900 // loss: 0.045 epoch: 168, batch: 2000 // loss: 0.047 epoch: 168, batch: 2100 // loss: 0.046 epoch: 168, batch: 2200 // loss: 0.050 epoch: 168, batch: 2300 // loss: 0.049 epoch: 168, batch: 2400 // loss: 0.042 epoch: 168, batch: 2500 // loss: 0.042 epoch: 168, batch: 2600 // loss: 0.045 epoch: 168, batch: 2700 // loss: 0.042 epoch: 168, batch: 2800 // loss: 0.046 epoch: 168, batch: 2900 // loss: 0.042 epoch: 168, batch: 3000 // loss: 0.045 epoch: 168, batch: 3100 // loss: 0.043 epoch: 168, batch: 3200 // loss: 0.039 epoch: 168, batch: 3300 // loss: 0.038 epoch: 168, batch: 3400 // loss: 0.044 epoch: 168, batch: 3500 // loss: 0.035 epoch: 168, batch: 3600 // loss: 0.043 epoch: 168, batch: 3700 // loss: 0.043 epoch: 169, batch: 0 // loss: 0.053 epoch: 169, batch: 100 // loss: 0.046 epoch: 169, batch: 200 // loss: 0.043 epoch: 169, batch: 300 // loss: 0.049 epoch: 169, batch: 400 // loss: 0.046 epoch: 169, batch: 500 // loss: 0.040 epoch: 169, batch: 600 // loss: 0.040 epoch: 169, batch: 700 // loss: 0.044 epoch: 169, batch: 800 // loss: 0.042 epoch: 169, batch: 900 // loss: 0.049 epoch: 169, batch: 1000 // loss: 0.046 epoch: 169, batch: 1100 // loss: 0.042 epoch: 169, batch: 1200 // loss: 0.044 epoch: 169, batch: 1300 // loss: 0.046 epoch: 169, batch: 1400 // loss: 0.043 epoch: 169, batch: 1500 // loss: 0.048 epoch: 169, batch: 1600 // loss: 0.052 epoch: 169, batch: 1700 // loss: 0.045 epoch: 169, batch: 1800 // loss: 0.053 epoch: 169, batch: 1900 // loss: 0.045 epoch: 169, batch: 2000 // loss: 0.047 epoch: 169, batch: 2100 // loss: 0.046 epoch: 169, batch: 2200 // loss: 0.050 epoch: 169, batch: 2300 // loss: 0.049 epoch: 169, batch: 2400 // loss: 0.042 epoch: 169, batch: 2500 // loss: 0.042 epoch: 169, batch: 2600 // loss: 0.045 epoch: 169, batch: 2700 // loss: 0.042 epoch: 169, batch: 2800 // loss: 0.046 epoch: 169, batch: 2900 // loss: 0.042 epoch: 169, batch: 3000 // loss: 0.045 epoch: 169, batch: 3100 // loss: 0.043 epoch: 169, batch: 3200 // loss: 0.039 epoch: 169, batch: 3300 // loss: 0.038 epoch: 169, batch: 3400 // loss: 0.044 epoch: 169, batch: 3500 // loss: 0.035 epoch: 169, batch: 3600 // loss: 0.043 epoch: 169, batch: 3700 // loss: 0.043 epoch: 170, batch: 0 // loss: 0.053 epoch: 170, batch: 100 // loss: 0.046 epoch: 170, batch: 200 // loss: 0.043 epoch: 170, batch: 300 // loss: 0.049 epoch: 170, batch: 400 // loss: 0.046 epoch: 170, batch: 500 // loss: 0.040 epoch: 170, batch: 600 // loss: 0.040 epoch: 170, batch: 700 // loss: 0.044 epoch: 170, batch: 800 // loss: 0.042 epoch: 170, batch: 900 // loss: 0.049 epoch: 170, batch: 1000 // loss: 0.046 epoch: 170, batch: 1100 // loss: 0.042 epoch: 170, batch: 1200 // loss: 0.044 epoch: 170, batch: 1300 // loss: 0.046 epoch: 170, batch: 1400 // loss: 0.043 epoch: 170, batch: 1500 // loss: 0.048 epoch: 170, batch: 1600 // loss: 0.052 epoch: 170, batch: 1700 // loss: 0.045 epoch: 170, batch: 1800 // loss: 0.053 epoch: 170, batch: 1900 // loss: 0.045 epoch: 170, batch: 2000 // loss: 0.047 epoch: 170, batch: 2100 // loss: 0.046 epoch: 170, batch: 2200 // loss: 0.050 epoch: 170, batch: 2300 // loss: 0.049 epoch: 170, batch: 2400 // loss: 0.042 epoch: 170, batch: 2500 // loss: 0.042 epoch: 170, batch: 2600 // loss: 0.045 epoch: 170, batch: 2700 // loss: 0.042 epoch: 170, batch: 2800 // loss: 0.046 epoch: 170, batch: 2900 // loss: 0.042 epoch: 170, batch: 3000 // loss: 0.045 epoch: 170, batch: 3100 // loss: 0.043 epoch: 170, batch: 3200 // loss: 0.039 epoch: 170, batch: 3300 // loss: 0.038 epoch: 170, batch: 3400 // loss: 0.044 epoch: 170, batch: 3500 // loss: 0.035 epoch: 170, batch: 3600 // loss: 0.043 epoch: 170, batch: 3700 // loss: 0.043 epoch: 171, batch: 0 // loss: 0.053 epoch: 171, batch: 100 // loss: 0.046 epoch: 171, batch: 200 // loss: 0.043 epoch: 171, batch: 300 // loss: 0.049 epoch: 171, batch: 400 // loss: 0.046 epoch: 171, batch: 500 // loss: 0.040 epoch: 171, batch: 600 // loss: 0.040 epoch: 171, batch: 700 // loss: 0.044 epoch: 171, batch: 800 // loss: 0.042 epoch: 171, batch: 900 // loss: 0.049 epoch: 171, batch: 1000 // loss: 0.046 epoch: 171, batch: 1100 // loss: 0.042 epoch: 171, batch: 1200 // loss: 0.044 epoch: 171, batch: 1300 // loss: 0.046 epoch: 171, batch: 1400 // loss: 0.043 epoch: 171, batch: 1500 // loss: 0.048 epoch: 171, batch: 1600 // loss: 0.052 epoch: 171, batch: 1700 // loss: 0.045 epoch: 171, batch: 1800 // loss: 0.053 epoch: 171, batch: 1900 // loss: 0.045 epoch: 171, batch: 2000 // loss: 0.047 epoch: 171, batch: 2100 // loss: 0.046 epoch: 171, batch: 2200 // loss: 0.050 epoch: 171, batch: 2300 // loss: 0.049 epoch: 171, batch: 2400 // loss: 0.042 epoch: 171, batch: 2500 // loss: 0.042 epoch: 171, batch: 2600 // loss: 0.045 epoch: 171, batch: 2700 // loss: 0.042 epoch: 171, batch: 2800 // loss: 0.046 epoch: 171, batch: 2900 // loss: 0.042 epoch: 171, batch: 3000 // loss: 0.045 epoch: 171, batch: 3100 // loss: 0.043 epoch: 171, batch: 3200 // loss: 0.039 epoch: 171, batch: 3300 // loss: 0.038 epoch: 171, batch: 3400 // loss: 0.044 epoch: 171, batch: 3500 // loss: 0.035 epoch: 171, batch: 3600 // loss: 0.043 epoch: 171, batch: 3700 // loss: 0.043 epoch: 172, batch: 0 // loss: 0.053 epoch: 172, batch: 100 // loss: 0.046 epoch: 172, batch: 200 // loss: 0.043 epoch: 172, batch: 300 // loss: 0.049 epoch: 172, batch: 400 // loss: 0.046 epoch: 172, batch: 500 // loss: 0.040 epoch: 172, batch: 600 // loss: 0.040 epoch: 172, batch: 700 // loss: 0.044 epoch: 172, batch: 800 // loss: 0.042 epoch: 172, batch: 900 // loss: 0.049 epoch: 172, batch: 1000 // loss: 0.046 epoch: 172, batch: 1100 // loss: 0.042 epoch: 172, batch: 1200 // loss: 0.044 epoch: 172, batch: 1300 // loss: 0.046 epoch: 172, batch: 1400 // loss: 0.043 epoch: 172, batch: 1500 // loss: 0.048 epoch: 172, batch: 1600 // loss: 0.052 epoch: 172, batch: 1700 // loss: 0.045 epoch: 172, batch: 1800 // loss: 0.053 epoch: 172, batch: 1900 // loss: 0.045 epoch: 172, batch: 2000 // loss: 0.047 epoch: 172, batch: 2100 // loss: 0.046 epoch: 172, batch: 2200 // loss: 0.050 epoch: 172, batch: 2300 // loss: 0.049 epoch: 172, batch: 2400 // loss: 0.042 epoch: 172, batch: 2500 // loss: 0.042 epoch: 172, batch: 2600 // loss: 0.045 epoch: 172, batch: 2700 // loss: 0.042 epoch: 172, batch: 2800 // loss: 0.046 epoch: 172, batch: 2900 // loss: 0.042 epoch: 172, batch: 3000 // loss: 0.045 epoch: 172, batch: 3100 // loss: 0.043 epoch: 172, batch: 3200 // loss: 0.039 epoch: 172, batch: 3300 // loss: 0.038 epoch: 172, batch: 3400 // loss: 0.044 epoch: 172, batch: 3500 // loss: 0.035 epoch: 172, batch: 3600 // loss: 0.043 epoch: 172, batch: 3700 // loss: 0.043 epoch: 173, batch: 0 // loss: 0.053 epoch: 173, batch: 100 // loss: 0.046 epoch: 173, batch: 200 // loss: 0.043 epoch: 173, batch: 300 // loss: 0.049 epoch: 173, batch: 400 // loss: 0.046 epoch: 173, batch: 500 // loss: 0.040 epoch: 173, batch: 600 // loss: 0.040 epoch: 173, batch: 700 // loss: 0.044 epoch: 173, batch: 800 // loss: 0.042 epoch: 173, batch: 900 // loss: 0.049 epoch: 173, batch: 1000 // loss: 0.046 epoch: 173, batch: 1100 // loss: 0.042 epoch: 173, batch: 1200 // loss: 0.044 epoch: 173, batch: 1300 // loss: 0.046 epoch: 173, batch: 1400 // loss: 0.043 epoch: 173, batch: 1500 // loss: 0.048 epoch: 173, batch: 1600 // loss: 0.052 epoch: 173, batch: 1700 // loss: 0.045 epoch: 173, batch: 1800 // loss: 0.053 epoch: 173, batch: 1900 // loss: 0.045 epoch: 173, batch: 2000 // loss: 0.047 epoch: 173, batch: 2100 // loss: 0.046 epoch: 173, batch: 2200 // loss: 0.050 epoch: 173, batch: 2300 // loss: 0.049 epoch: 173, batch: 2400 // loss: 0.042 epoch: 173, batch: 2500 // loss: 0.042 epoch: 173, batch: 2600 // loss: 0.045 epoch: 173, batch: 2700 // loss: 0.042 epoch: 173, batch: 2800 // loss: 0.046 epoch: 173, batch: 2900 // loss: 0.042 epoch: 173, batch: 3000 // loss: 0.045 epoch: 173, batch: 3100 // loss: 0.043 epoch: 173, batch: 3200 // loss: 0.039 epoch: 173, batch: 3300 // loss: 0.038 epoch: 173, batch: 3400 // loss: 0.044 epoch: 173, batch: 3500 // loss: 0.035 epoch: 173, batch: 3600 // loss: 0.043 epoch: 173, batch: 3700 // loss: 0.043 epoch: 174, batch: 0 // loss: 0.053 epoch: 174, batch: 100 // loss: 0.046 epoch: 174, batch: 200 // loss: 0.043 epoch: 174, batch: 300 // loss: 0.049 epoch: 174, batch: 400 // loss: 0.046 epoch: 174, batch: 500 // loss: 0.040 epoch: 174, batch: 600 // loss: 0.040 epoch: 174, batch: 700 // loss: 0.044 epoch: 174, batch: 800 // loss: 0.042 epoch: 174, batch: 900 // loss: 0.049 epoch: 174, batch: 1000 // loss: 0.046 epoch: 174, batch: 1100 // loss: 0.042 epoch: 174, batch: 1200 // loss: 0.044 epoch: 174, batch: 1300 // loss: 0.046 epoch: 174, batch: 1400 // loss: 0.043 epoch: 174, batch: 1500 // loss: 0.048 epoch: 174, batch: 1600 // loss: 0.052 epoch: 174, batch: 1700 // loss: 0.045 epoch: 174, batch: 1800 // loss: 0.053 epoch: 174, batch: 1900 // loss: 0.045 epoch: 174, batch: 2000 // loss: 0.047 epoch: 174, batch: 2100 // loss: 0.046 epoch: 174, batch: 2200 // loss: 0.050 epoch: 174, batch: 2300 // loss: 0.049 epoch: 174, batch: 2400 // loss: 0.042 epoch: 174, batch: 2500 // loss: 0.042 epoch: 174, batch: 2600 // loss: 0.045 epoch: 174, batch: 2700 // loss: 0.042 epoch: 174, batch: 2800 // loss: 0.046 epoch: 174, batch: 2900 // loss: 0.042 epoch: 174, batch: 3000 // loss: 0.045 epoch: 174, batch: 3100 // loss: 0.043 epoch: 174, batch: 3200 // loss: 0.039 epoch: 174, batch: 3300 // loss: 0.038 epoch: 174, batch: 3400 // loss: 0.044 epoch: 174, batch: 3500 // loss: 0.035 epoch: 174, batch: 3600 // loss: 0.043 epoch: 174, batch: 3700 // loss: 0.043 epoch: 175, batch: 0 // loss: 0.053 epoch: 175, batch: 100 // loss: 0.046 epoch: 175, batch: 200 // loss: 0.043 epoch: 175, batch: 300 // loss: 0.049 epoch: 175, batch: 400 // loss: 0.046 epoch: 175, batch: 500 // loss: 0.040 epoch: 175, batch: 600 // loss: 0.040 epoch: 175, batch: 700 // loss: 0.044 epoch: 175, batch: 800 // loss: 0.042 epoch: 175, batch: 900 // loss: 0.049 epoch: 175, batch: 1000 // loss: 0.046 epoch: 175, batch: 1100 // loss: 0.042 epoch: 175, batch: 1200 // loss: 0.044 epoch: 175, batch: 1300 // loss: 0.046 epoch: 175, batch: 1400 // loss: 0.043 epoch: 175, batch: 1500 // loss: 0.048 epoch: 175, batch: 1600 // loss: 0.052 epoch: 175, batch: 1700 // loss: 0.045 epoch: 175, batch: 1800 // loss: 0.053 epoch: 175, batch: 1900 // loss: 0.045 epoch: 175, batch: 2000 // loss: 0.047 epoch: 175, batch: 2100 // loss: 0.046 epoch: 175, batch: 2200 // loss: 0.050 epoch: 175, batch: 2300 // loss: 0.049 epoch: 175, batch: 2400 // loss: 0.042 epoch: 175, batch: 2500 // loss: 0.042 epoch: 175, batch: 2600 // loss: 0.045 epoch: 175, batch: 2700 // loss: 0.042 epoch: 175, batch: 2800 // loss: 0.046 epoch: 175, batch: 2900 // loss: 0.042 epoch: 175, batch: 3000 // loss: 0.045 epoch: 175, batch: 3100 // loss: 0.043 epoch: 175, batch: 3200 // loss: 0.039 epoch: 175, batch: 3300 // loss: 0.038 epoch: 175, batch: 3400 // loss: 0.044 epoch: 175, batch: 3500 // loss: 0.035 epoch: 175, batch: 3600 // loss: 0.043 epoch: 175, batch: 3700 // loss: 0.043 epoch: 176, batch: 0 // loss: 0.053 epoch: 176, batch: 100 // loss: 0.046 epoch: 176, batch: 200 // loss: 0.043 epoch: 176, batch: 300 // loss: 0.049 epoch: 176, batch: 400 // loss: 0.046 epoch: 176, batch: 500 // loss: 0.040 epoch: 176, batch: 600 // loss: 0.040 epoch: 176, batch: 700 // loss: 0.044 epoch: 176, batch: 800 // loss: 0.042 epoch: 176, batch: 900 // loss: 0.049 epoch: 176, batch: 1000 // loss: 0.046 epoch: 176, batch: 1100 // loss: 0.042 epoch: 176, batch: 1200 // loss: 0.044 epoch: 176, batch: 1300 // loss: 0.046 epoch: 176, batch: 1400 // loss: 0.043 epoch: 176, batch: 1500 // loss: 0.048 epoch: 176, batch: 1600 // loss: 0.052 epoch: 176, batch: 1700 // loss: 0.045 epoch: 176, batch: 1800 // loss: 0.053 epoch: 176, batch: 1900 // loss: 0.045 epoch: 176, batch: 2000 // loss: 0.047 epoch: 176, batch: 2100 // loss: 0.046 epoch: 176, batch: 2200 // loss: 0.050 epoch: 176, batch: 2300 // loss: 0.049 epoch: 176, batch: 2400 // loss: 0.042 epoch: 176, batch: 2500 // loss: 0.042 epoch: 176, batch: 2600 // loss: 0.045 epoch: 176, batch: 2700 // loss: 0.042 epoch: 176, batch: 2800 // loss: 0.046 epoch: 176, batch: 2900 // loss: 0.042 epoch: 176, batch: 3000 // loss: 0.045 epoch: 176, batch: 3100 // loss: 0.043 epoch: 176, batch: 3200 // loss: 0.039 epoch: 176, batch: 3300 // loss: 0.038 epoch: 176, batch: 3400 // loss: 0.044 epoch: 176, batch: 3500 // loss: 0.035 epoch: 176, batch: 3600 // loss: 0.043 epoch: 176, batch: 3700 // loss: 0.043 epoch: 177, batch: 0 // loss: 0.053 epoch: 177, batch: 100 // loss: 0.046 epoch: 177, batch: 200 // loss: 0.043 epoch: 177, batch: 300 // loss: 0.049 epoch: 177, batch: 400 // loss: 0.046 epoch: 177, batch: 500 // loss: 0.040 epoch: 177, batch: 600 // loss: 0.040 epoch: 177, batch: 700 // loss: 0.044 epoch: 177, batch: 800 // loss: 0.042 epoch: 177, batch: 900 // loss: 0.049 epoch: 177, batch: 1000 // loss: 0.046 epoch: 177, batch: 1100 // loss: 0.042 epoch: 177, batch: 1200 // loss: 0.044 epoch: 177, batch: 1300 // loss: 0.046 epoch: 177, batch: 1400 // loss: 0.043 epoch: 177, batch: 1500 // loss: 0.048 epoch: 177, batch: 1600 // loss: 0.052 epoch: 177, batch: 1700 // loss: 0.045 epoch: 177, batch: 1800 // loss: 0.053 epoch: 177, batch: 1900 // loss: 0.045 epoch: 177, batch: 2000 // loss: 0.047 epoch: 177, batch: 2100 // loss: 0.046 epoch: 177, batch: 2200 // loss: 0.050 epoch: 177, batch: 2300 // loss: 0.049 epoch: 177, batch: 2400 // loss: 0.042 epoch: 177, batch: 2500 // loss: 0.042 epoch: 177, batch: 2600 // loss: 0.045 epoch: 177, batch: 2700 // loss: 0.042 epoch: 177, batch: 2800 // loss: 0.046 epoch: 177, batch: 2900 // loss: 0.042 epoch: 177, batch: 3000 // loss: 0.045 epoch: 177, batch: 3100 // loss: 0.043 epoch: 177, batch: 3200 // loss: 0.039 epoch: 177, batch: 3300 // loss: 0.038 epoch: 177, batch: 3400 // loss: 0.044 epoch: 177, batch: 3500 // loss: 0.035 epoch: 177, batch: 3600 // loss: 0.043 epoch: 177, batch: 3700 // loss: 0.043 epoch: 178, batch: 0 // loss: 0.053 epoch: 178, batch: 100 // loss: 0.046 epoch: 178, batch: 200 // loss: 0.043 epoch: 178, batch: 300 // loss: 0.049 epoch: 178, batch: 400 // loss: 0.046 epoch: 178, batch: 500 // loss: 0.040 epoch: 178, batch: 600 // loss: 0.040 epoch: 178, batch: 700 // loss: 0.044 epoch: 178, batch: 800 // loss: 0.042 epoch: 178, batch: 900 // loss: 0.049 epoch: 178, batch: 1000 // loss: 0.046 epoch: 178, batch: 1100 // loss: 0.042 epoch: 178, batch: 1200 // loss: 0.044 epoch: 178, batch: 1300 // loss: 0.046 epoch: 178, batch: 1400 // loss: 0.043 epoch: 178, batch: 1500 // loss: 0.048 epoch: 178, batch: 1600 // loss: 0.052 epoch: 178, batch: 1700 // loss: 0.045 epoch: 178, batch: 1800 // loss: 0.053 epoch: 178, batch: 1900 // loss: 0.045 epoch: 178, batch: 2000 // loss: 0.047 epoch: 178, batch: 2100 // loss: 0.046 epoch: 178, batch: 2200 // loss: 0.050 epoch: 178, batch: 2300 // loss: 0.049 epoch: 178, batch: 2400 // loss: 0.042 epoch: 178, batch: 2500 // loss: 0.042 epoch: 178, batch: 2600 // loss: 0.045 epoch: 178, batch: 2700 // loss: 0.042 epoch: 178, batch: 2800 // loss: 0.046 epoch: 178, batch: 2900 // loss: 0.042 epoch: 178, batch: 3000 // loss: 0.045 epoch: 178, batch: 3100 // loss: 0.043 epoch: 178, batch: 3200 // loss: 0.039 epoch: 178, batch: 3300 // loss: 0.038 epoch: 178, batch: 3400 // loss: 0.044 epoch: 178, batch: 3500 // loss: 0.035 epoch: 178, batch: 3600 // loss: 0.043 epoch: 178, batch: 3700 // loss: 0.043 epoch: 179, batch: 0 // loss: 0.053 epoch: 179, batch: 100 // loss: 0.046 epoch: 179, batch: 200 // loss: 0.043 epoch: 179, batch: 300 // loss: 0.049 epoch: 179, batch: 400 // loss: 0.046 epoch: 179, batch: 500 // loss: 0.040 epoch: 179, batch: 600 // loss: 0.040 epoch: 179, batch: 700 // loss: 0.044 epoch: 179, batch: 800 // loss: 0.042 epoch: 179, batch: 900 // loss: 0.049 epoch: 179, batch: 1000 // loss: 0.046 epoch: 179, batch: 1100 // loss: 0.042 epoch: 179, batch: 1200 // loss: 0.044 epoch: 179, batch: 1300 // loss: 0.046 epoch: 179, batch: 1400 // loss: 0.043 epoch: 179, batch: 1500 // loss: 0.048 epoch: 179, batch: 1600 // loss: 0.052 epoch: 179, batch: 1700 // loss: 0.045 epoch: 179, batch: 1800 // loss: 0.053 epoch: 179, batch: 1900 // loss: 0.045 epoch: 179, batch: 2000 // loss: 0.047 epoch: 179, batch: 2100 // loss: 0.046 epoch: 179, batch: 2200 // loss: 0.050 epoch: 179, batch: 2300 // loss: 0.049 epoch: 179, batch: 2400 // loss: 0.042 epoch: 179, batch: 2500 // loss: 0.042 epoch: 179, batch: 2600 // loss: 0.045 epoch: 179, batch: 2700 // loss: 0.042 epoch: 179, batch: 2800 // loss: 0.046 epoch: 179, batch: 2900 // loss: 0.042 epoch: 179, batch: 3000 // loss: 0.045 epoch: 179, batch: 3100 // loss: 0.043 epoch: 179, batch: 3200 // loss: 0.039 epoch: 179, batch: 3300 // loss: 0.038 epoch: 179, batch: 3400 // loss: 0.044 epoch: 179, batch: 3500 // loss: 0.035 epoch: 179, batch: 3600 // loss: 0.043 epoch: 179, batch: 3700 // loss: 0.043 epoch: 180, batch: 0 // loss: 0.053 epoch: 180, batch: 100 // loss: 0.046 epoch: 180, batch: 200 // loss: 0.043 epoch: 180, batch: 300 // loss: 0.049 epoch: 180, batch: 400 // loss: 0.046 epoch: 180, batch: 500 // loss: 0.040 epoch: 180, batch: 600 // loss: 0.040 epoch: 180, batch: 700 // loss: 0.044 epoch: 180, batch: 800 // loss: 0.042 epoch: 180, batch: 900 // loss: 0.049 epoch: 180, batch: 1000 // loss: 0.046 epoch: 180, batch: 1100 // loss: 0.042 epoch: 180, batch: 1200 // loss: 0.044 epoch: 180, batch: 1300 // loss: 0.046 epoch: 180, batch: 1400 // loss: 0.043 epoch: 180, batch: 1500 // loss: 0.048 epoch: 180, batch: 1600 // loss: 0.052 epoch: 180, batch: 1700 // loss: 0.045 epoch: 180, batch: 1800 // loss: 0.053 epoch: 180, batch: 1900 // loss: 0.045 epoch: 180, batch: 2000 // loss: 0.047 epoch: 180, batch: 2100 // loss: 0.046 epoch: 180, batch: 2200 // loss: 0.050 epoch: 180, batch: 2300 // loss: 0.049 epoch: 180, batch: 2400 // loss: 0.042 epoch: 180, batch: 2500 // loss: 0.042 epoch: 180, batch: 2600 // loss: 0.045 epoch: 180, batch: 2700 // loss: 0.042 epoch: 180, batch: 2800 // loss: 0.046 epoch: 180, batch: 2900 // loss: 0.042 epoch: 180, batch: 3000 // loss: 0.045 epoch: 180, batch: 3100 // loss: 0.043 epoch: 180, batch: 3200 // loss: 0.039 epoch: 180, batch: 3300 // loss: 0.038 epoch: 180, batch: 3400 // loss: 0.044 epoch: 180, batch: 3500 // loss: 0.035 epoch: 180, batch: 3600 // loss: 0.043 epoch: 180, batch: 3700 // loss: 0.043 epoch: 181, batch: 0 // loss: 0.053 epoch: 181, batch: 100 // loss: 0.046 epoch: 181, batch: 200 // loss: 0.043 epoch: 181, batch: 300 // loss: 0.049 epoch: 181, batch: 400 // loss: 0.046 epoch: 181, batch: 500 // loss: 0.040 epoch: 181, batch: 600 // loss: 0.040 epoch: 181, batch: 700 // loss: 0.044 epoch: 181, batch: 800 // loss: 0.042 epoch: 181, batch: 900 // loss: 0.049 epoch: 181, batch: 1000 // loss: 0.046 epoch: 181, batch: 1100 // loss: 0.042 epoch: 181, batch: 1200 // loss: 0.044 epoch: 181, batch: 1300 // loss: 0.046 epoch: 181, batch: 1400 // loss: 0.043 epoch: 181, batch: 1500 // loss: 0.048 epoch: 181, batch: 1600 // loss: 0.052 epoch: 181, batch: 1700 // loss: 0.045 epoch: 181, batch: 1800 // loss: 0.053 epoch: 181, batch: 1900 // loss: 0.045 epoch: 181, batch: 2000 // loss: 0.047 epoch: 181, batch: 2100 // loss: 0.046 epoch: 181, batch: 2200 // loss: 0.050 epoch: 181, batch: 2300 // loss: 0.049 epoch: 181, batch: 2400 // loss: 0.042 epoch: 181, batch: 2500 // loss: 0.042 epoch: 181, batch: 2600 // loss: 0.045 epoch: 181, batch: 2700 // loss: 0.042 epoch: 181, batch: 2800 // loss: 0.046 epoch: 181, batch: 2900 // loss: 0.042 epoch: 181, batch: 3000 // loss: 0.045 epoch: 181, batch: 3100 // loss: 0.043 epoch: 181, batch: 3200 // loss: 0.039 epoch: 181, batch: 3300 // loss: 0.038 epoch: 181, batch: 3400 // loss: 0.044 epoch: 181, batch: 3500 // loss: 0.035 epoch: 181, batch: 3600 // loss: 0.043 epoch: 181, batch: 3700 // loss: 0.043 epoch: 182, batch: 0 // loss: 0.053 epoch: 182, batch: 100 // loss: 0.046 epoch: 182, batch: 200 // loss: 0.043 epoch: 182, batch: 300 // loss: 0.049 epoch: 182, batch: 400 // loss: 0.046 epoch: 182, batch: 500 // loss: 0.040 epoch: 182, batch: 600 // loss: 0.040 epoch: 182, batch: 700 // loss: 0.044 epoch: 182, batch: 800 // loss: 0.042 epoch: 182, batch: 900 // loss: 0.049 epoch: 182, batch: 1000 // loss: 0.046 epoch: 182, batch: 1100 // loss: 0.042 epoch: 182, batch: 1200 // loss: 0.044 epoch: 182, batch: 1300 // loss: 0.046 epoch: 182, batch: 1400 // loss: 0.043 epoch: 182, batch: 1500 // loss: 0.048 epoch: 182, batch: 1600 // loss: 0.052 epoch: 182, batch: 1700 // loss: 0.045 epoch: 182, batch: 1800 // loss: 0.053 epoch: 182, batch: 1900 // loss: 0.045 epoch: 182, batch: 2000 // loss: 0.047 epoch: 182, batch: 2100 // loss: 0.046 epoch: 182, batch: 2200 // loss: 0.050 epoch: 182, batch: 2300 // loss: 0.049 epoch: 182, batch: 2400 // loss: 0.042 epoch: 182, batch: 2500 // loss: 0.042 epoch: 182, batch: 2600 // loss: 0.045 epoch: 182, batch: 2700 // loss: 0.042 epoch: 182, batch: 2800 // loss: 0.046 epoch: 182, batch: 2900 // loss: 0.042 epoch: 182, batch: 3000 // loss: 0.045 epoch: 182, batch: 3100 // loss: 0.043 epoch: 182, batch: 3200 // loss: 0.039 epoch: 182, batch: 3300 // loss: 0.038 epoch: 182, batch: 3400 // loss: 0.044 epoch: 182, batch: 3500 // loss: 0.035 epoch: 182, batch: 3600 // loss: 0.043 epoch: 182, batch: 3700 // loss: 0.043 epoch: 183, batch: 0 // loss: 0.053 epoch: 183, batch: 100 // loss: 0.046 epoch: 183, batch: 200 // loss: 0.043 epoch: 183, batch: 300 // loss: 0.049 epoch: 183, batch: 400 // loss: 0.046 epoch: 183, batch: 500 // loss: 0.040 epoch: 183, batch: 600 // loss: 0.040 epoch: 183, batch: 700 // loss: 0.044 epoch: 183, batch: 800 // loss: 0.042 epoch: 183, batch: 900 // loss: 0.049 epoch: 183, batch: 1000 // loss: 0.046 epoch: 183, batch: 1100 // loss: 0.042 epoch: 183, batch: 1200 // loss: 0.044 epoch: 183, batch: 1300 // loss: 0.046 epoch: 183, batch: 1400 // loss: 0.043 epoch: 183, batch: 1500 // loss: 0.048 epoch: 183, batch: 1600 // loss: 0.052 epoch: 183, batch: 1700 // loss: 0.045 epoch: 183, batch: 1800 // loss: 0.053 epoch: 183, batch: 1900 // loss: 0.045 epoch: 183, batch: 2000 // loss: 0.047 epoch: 183, batch: 2100 // loss: 0.046 epoch: 183, batch: 2200 // loss: 0.050 epoch: 183, batch: 2300 // loss: 0.049 epoch: 183, batch: 2400 // loss: 0.042 epoch: 183, batch: 2500 // loss: 0.042 epoch: 183, batch: 2600 // loss: 0.045 epoch: 183, batch: 2700 // loss: 0.042 epoch: 183, batch: 2800 // loss: 0.046 epoch: 183, batch: 2900 // loss: 0.042 epoch: 183, batch: 3000 // loss: 0.045 epoch: 183, batch: 3100 // loss: 0.043 epoch: 183, batch: 3200 // loss: 0.039 epoch: 183, batch: 3300 // loss: 0.038 epoch: 183, batch: 3400 // loss: 0.044 epoch: 183, batch: 3500 // loss: 0.035 epoch: 183, batch: 3600 // loss: 0.043 epoch: 183, batch: 3700 // loss: 0.043 epoch: 184, batch: 0 // loss: 0.053 epoch: 184, batch: 100 // loss: 0.046 epoch: 184, batch: 200 // loss: 0.043 epoch: 184, batch: 300 // loss: 0.049 epoch: 184, batch: 400 // loss: 0.046 epoch: 184, batch: 500 // loss: 0.040 epoch: 184, batch: 600 // loss: 0.040 epoch: 184, batch: 700 // loss: 0.044 epoch: 184, batch: 800 // loss: 0.042 epoch: 184, batch: 900 // loss: 0.049 epoch: 184, batch: 1000 // loss: 0.046 epoch: 184, batch: 1100 // loss: 0.042 epoch: 184, batch: 1200 // loss: 0.044 epoch: 184, batch: 1300 // loss: 0.046 epoch: 184, batch: 1400 // loss: 0.043 epoch: 184, batch: 1500 // loss: 0.048 epoch: 184, batch: 1600 // loss: 0.052 epoch: 184, batch: 1700 // loss: 0.045 epoch: 184, batch: 1800 // loss: 0.053 epoch: 184, batch: 1900 // loss: 0.045 epoch: 184, batch: 2000 // loss: 0.047 epoch: 184, batch: 2100 // loss: 0.046 epoch: 184, batch: 2200 // loss: 0.050 epoch: 184, batch: 2300 // loss: 0.049 epoch: 184, batch: 2400 // loss: 0.042 epoch: 184, batch: 2500 // loss: 0.042 epoch: 184, batch: 2600 // loss: 0.045 epoch: 184, batch: 2700 // loss: 0.042 epoch: 184, batch: 2800 // loss: 0.046 epoch: 184, batch: 2900 // loss: 0.042 epoch: 184, batch: 3000 // loss: 0.045 epoch: 184, batch: 3100 // loss: 0.043 epoch: 184, batch: 3200 // loss: 0.039 epoch: 184, batch: 3300 // loss: 0.038 epoch: 184, batch: 3400 // loss: 0.044 epoch: 184, batch: 3500 // loss: 0.035 epoch: 184, batch: 3600 // loss: 0.043 epoch: 184, batch: 3700 // loss: 0.043 epoch: 185, batch: 0 // loss: 0.053 epoch: 185, batch: 100 // loss: 0.046 epoch: 185, batch: 200 // loss: 0.043 epoch: 185, batch: 300 // loss: 0.049 epoch: 185, batch: 400 // loss: 0.046 epoch: 185, batch: 500 // loss: 0.040 epoch: 185, batch: 600 // loss: 0.040 epoch: 185, batch: 700 // loss: 0.044 epoch: 185, batch: 800 // loss: 0.042 epoch: 185, batch: 900 // loss: 0.049 epoch: 185, batch: 1000 // loss: 0.046 epoch: 185, batch: 1100 // loss: 0.042 epoch: 185, batch: 1200 // loss: 0.044 epoch: 185, batch: 1300 // loss: 0.046 epoch: 185, batch: 1400 // loss: 0.043 epoch: 185, batch: 1500 // loss: 0.048 epoch: 185, batch: 1600 // loss: 0.052 epoch: 185, batch: 1700 // loss: 0.045 epoch: 185, batch: 1800 // loss: 0.053 epoch: 185, batch: 1900 // loss: 0.045 epoch: 185, batch: 2000 // loss: 0.047 epoch: 185, batch: 2100 // loss: 0.046 epoch: 185, batch: 2200 // loss: 0.050 epoch: 185, batch: 2300 // loss: 0.049 epoch: 185, batch: 2400 // loss: 0.042 epoch: 185, batch: 2500 // loss: 0.042 epoch: 185, batch: 2600 // loss: 0.045 epoch: 185, batch: 2700 // loss: 0.042 epoch: 185, batch: 2800 // loss: 0.046 epoch: 185, batch: 2900 // loss: 0.042 epoch: 185, batch: 3000 // loss: 0.045 epoch: 185, batch: 3100 // loss: 0.043 epoch: 185, batch: 3200 // loss: 0.039 epoch: 185, batch: 3300 // loss: 0.038 epoch: 185, batch: 3400 // loss: 0.044 epoch: 185, batch: 3500 // loss: 0.035 epoch: 185, batch: 3600 // loss: 0.043 epoch: 185, batch: 3700 // loss: 0.043 epoch: 186, batch: 0 // loss: 0.053 epoch: 186, batch: 100 // loss: 0.046 epoch: 186, batch: 200 // loss: 0.043 epoch: 186, batch: 300 // loss: 0.049 epoch: 186, batch: 400 // loss: 0.046 epoch: 186, batch: 500 // loss: 0.040 epoch: 186, batch: 600 // loss: 0.040 epoch: 186, batch: 700 // loss: 0.044 epoch: 186, batch: 800 // loss: 0.042 epoch: 186, batch: 900 // loss: 0.049 epoch: 186, batch: 1000 // loss: 0.046 epoch: 186, batch: 1100 // loss: 0.042 epoch: 186, batch: 1200 // loss: 0.044 epoch: 186, batch: 1300 // loss: 0.046 epoch: 186, batch: 1400 // loss: 0.043 epoch: 186, batch: 1500 // loss: 0.048 epoch: 186, batch: 1600 // loss: 0.052 epoch: 186, batch: 1700 // loss: 0.045 epoch: 186, batch: 1800 // loss: 0.053 epoch: 186, batch: 1900 // loss: 0.045 epoch: 186, batch: 2000 // loss: 0.047 epoch: 186, batch: 2100 // loss: 0.046 epoch: 186, batch: 2200 // loss: 0.050 epoch: 186, batch: 2300 // loss: 0.049 epoch: 186, batch: 2400 // loss: 0.042 epoch: 186, batch: 2500 // loss: 0.042 epoch: 186, batch: 2600 // loss: 0.045 epoch: 186, batch: 2700 // loss: 0.042 epoch: 186, batch: 2800 // loss: 0.046 epoch: 186, batch: 2900 // loss: 0.042 epoch: 186, batch: 3000 // loss: 0.045 epoch: 186, batch: 3100 // loss: 0.043 epoch: 186, batch: 3200 // loss: 0.039 epoch: 186, batch: 3300 // loss: 0.038 epoch: 186, batch: 3400 // loss: 0.044 epoch: 186, batch: 3500 // loss: 0.035 epoch: 186, batch: 3600 // loss: 0.043 epoch: 186, batch: 3700 // loss: 0.043 epoch: 187, batch: 0 // loss: 0.053 epoch: 187, batch: 100 // loss: 0.046 epoch: 187, batch: 200 // loss: 0.043 epoch: 187, batch: 300 // loss: 0.049 epoch: 187, batch: 400 // loss: 0.046 epoch: 187, batch: 500 // loss: 0.040 epoch: 187, batch: 600 // loss: 0.040 epoch: 187, batch: 700 // loss: 0.044 epoch: 187, batch: 800 // loss: 0.042 epoch: 187, batch: 900 // loss: 0.049 epoch: 187, batch: 1000 // loss: 0.046 epoch: 187, batch: 1100 // loss: 0.042 epoch: 187, batch: 1200 // loss: 0.044 epoch: 187, batch: 1300 // loss: 0.046 epoch: 187, batch: 1400 // loss: 0.043 epoch: 187, batch: 1500 // loss: 0.048 epoch: 187, batch: 1600 // loss: 0.052 epoch: 187, batch: 1700 // loss: 0.045 epoch: 187, batch: 1800 // loss: 0.053 epoch: 187, batch: 1900 // loss: 0.045 epoch: 187, batch: 2000 // loss: 0.047 epoch: 187, batch: 2100 // loss: 0.046 epoch: 187, batch: 2200 // loss: 0.050 epoch: 187, batch: 2300 // loss: 0.049 epoch: 187, batch: 2400 // loss: 0.042 epoch: 187, batch: 2500 // loss: 0.042 epoch: 187, batch: 2600 // loss: 0.045 epoch: 187, batch: 2700 // loss: 0.042 epoch: 187, batch: 2800 // loss: 0.046 epoch: 187, batch: 2900 // loss: 0.042 epoch: 187, batch: 3000 // loss: 0.045 epoch: 187, batch: 3100 // loss: 0.043 epoch: 187, batch: 3200 // loss: 0.039 epoch: 187, batch: 3300 // loss: 0.038 epoch: 187, batch: 3400 // loss: 0.044 epoch: 187, batch: 3500 // loss: 0.035 epoch: 187, batch: 3600 // loss: 0.043 epoch: 187, batch: 3700 // loss: 0.043 epoch: 188, batch: 0 // loss: 0.053 epoch: 188, batch: 100 // loss: 0.046 epoch: 188, batch: 200 // loss: 0.043 epoch: 188, batch: 300 // loss: 0.049 epoch: 188, batch: 400 // loss: 0.046 epoch: 188, batch: 500 // loss: 0.040 epoch: 188, batch: 600 // loss: 0.040 epoch: 188, batch: 700 // loss: 0.044 epoch: 188, batch: 800 // loss: 0.042 epoch: 188, batch: 900 // loss: 0.049 epoch: 188, batch: 1000 // loss: 0.046 epoch: 188, batch: 1100 // loss: 0.042 epoch: 188, batch: 1200 // loss: 0.044 epoch: 188, batch: 1300 // loss: 0.046 epoch: 188, batch: 1400 // loss: 0.043 epoch: 188, batch: 1500 // loss: 0.048 epoch: 188, batch: 1600 // loss: 0.052 epoch: 188, batch: 1700 // loss: 0.045 epoch: 188, batch: 1800 // loss: 0.053 epoch: 188, batch: 1900 // loss: 0.045 epoch: 188, batch: 2000 // loss: 0.047 epoch: 188, batch: 2100 // loss: 0.046 epoch: 188, batch: 2200 // loss: 0.050 epoch: 188, batch: 2300 // loss: 0.049 epoch: 188, batch: 2400 // loss: 0.042 epoch: 188, batch: 2500 // loss: 0.042 epoch: 188, batch: 2600 // loss: 0.045 epoch: 188, batch: 2700 // loss: 0.042 epoch: 188, batch: 2800 // loss: 0.046 epoch: 188, batch: 2900 // loss: 0.042 epoch: 188, batch: 3000 // loss: 0.045 epoch: 188, batch: 3100 // loss: 0.043 epoch: 188, batch: 3200 // loss: 0.039 epoch: 188, batch: 3300 // loss: 0.038 epoch: 188, batch: 3400 // loss: 0.044 epoch: 188, batch: 3500 // loss: 0.035 epoch: 188, batch: 3600 // loss: 0.043 epoch: 188, batch: 3700 // loss: 0.043 epoch: 189, batch: 0 // loss: 0.053 epoch: 189, batch: 100 // loss: 0.046 epoch: 189, batch: 200 // loss: 0.043 epoch: 189, batch: 300 // loss: 0.049 epoch: 189, batch: 400 // loss: 0.046 epoch: 189, batch: 500 // loss: 0.040 epoch: 189, batch: 600 // loss: 0.040 epoch: 189, batch: 700 // loss: 0.044 epoch: 189, batch: 800 // loss: 0.042 epoch: 189, batch: 900 // loss: 0.049 epoch: 189, batch: 1000 // loss: 0.046 epoch: 189, batch: 1100 // loss: 0.042 epoch: 189, batch: 1200 // loss: 0.044 epoch: 189, batch: 1300 // loss: 0.046 epoch: 189, batch: 1400 // loss: 0.043 epoch: 189, batch: 1500 // loss: 0.048 epoch: 189, batch: 1600 // loss: 0.052 epoch: 189, batch: 1700 // loss: 0.045 epoch: 189, batch: 1800 // loss: 0.053 epoch: 189, batch: 1900 // loss: 0.045 epoch: 189, batch: 2000 // loss: 0.047 epoch: 189, batch: 2100 // loss: 0.046 epoch: 189, batch: 2200 // loss: 0.050 epoch: 189, batch: 2300 // loss: 0.049 epoch: 189, batch: 2400 // loss: 0.042 epoch: 189, batch: 2500 // loss: 0.042 epoch: 189, batch: 2600 // loss: 0.045 epoch: 189, batch: 2700 // loss: 0.042 epoch: 189, batch: 2800 // loss: 0.046 epoch: 189, batch: 2900 // loss: 0.042 epoch: 189, batch: 3000 // loss: 0.045 epoch: 189, batch: 3100 // loss: 0.043 epoch: 189, batch: 3200 // loss: 0.039 epoch: 189, batch: 3300 // loss: 0.038 epoch: 189, batch: 3400 // loss: 0.044 epoch: 189, batch: 3500 // loss: 0.035 epoch: 189, batch: 3600 // loss: 0.043 epoch: 189, batch: 3700 // loss: 0.043 epoch: 190, batch: 0 // loss: 0.053 epoch: 190, batch: 100 // loss: 0.046 epoch: 190, batch: 200 // loss: 0.043 epoch: 190, batch: 300 // loss: 0.049 epoch: 190, batch: 400 // loss: 0.046 epoch: 190, batch: 500 // loss: 0.040 epoch: 190, batch: 600 // loss: 0.040 epoch: 190, batch: 700 // loss: 0.044 epoch: 190, batch: 800 // loss: 0.042 epoch: 190, batch: 900 // loss: 0.049 epoch: 190, batch: 1000 // loss: 0.046 epoch: 190, batch: 1100 // loss: 0.042 epoch: 190, batch: 1200 // loss: 0.044 epoch: 190, batch: 1300 // loss: 0.046 epoch: 190, batch: 1400 // loss: 0.043 epoch: 190, batch: 1500 // loss: 0.048 epoch: 190, batch: 1600 // loss: 0.052 epoch: 190, batch: 1700 // loss: 0.045 epoch: 190, batch: 1800 // loss: 0.053 epoch: 190, batch: 1900 // loss: 0.045 epoch: 190, batch: 2000 // loss: 0.047 epoch: 190, batch: 2100 // loss: 0.046 epoch: 190, batch: 2200 // loss: 0.050 epoch: 190, batch: 2300 // loss: 0.049 epoch: 190, batch: 2400 // loss: 0.042 epoch: 190, batch: 2500 // loss: 0.042 epoch: 190, batch: 2600 // loss: 0.045 epoch: 190, batch: 2700 // loss: 0.042 epoch: 190, batch: 2800 // loss: 0.046 epoch: 190, batch: 2900 // loss: 0.042 epoch: 190, batch: 3000 // loss: 0.045 epoch: 190, batch: 3100 // loss: 0.043 epoch: 190, batch: 3200 // loss: 0.039 epoch: 190, batch: 3300 // loss: 0.038 epoch: 190, batch: 3400 // loss: 0.044 epoch: 190, batch: 3500 // loss: 0.035 epoch: 190, batch: 3600 // loss: 0.043 epoch: 190, batch: 3700 // loss: 0.043 epoch: 191, batch: 0 // loss: 0.053 epoch: 191, batch: 100 // loss: 0.046 epoch: 191, batch: 200 // loss: 0.043 epoch: 191, batch: 300 // loss: 0.049 epoch: 191, batch: 400 // loss: 0.046 epoch: 191, batch: 500 // loss: 0.040 epoch: 191, batch: 600 // loss: 0.040 epoch: 191, batch: 700 // loss: 0.044 epoch: 191, batch: 800 // loss: 0.042 epoch: 191, batch: 900 // loss: 0.049 epoch: 191, batch: 1000 // loss: 0.046 epoch: 191, batch: 1100 // loss: 0.042 epoch: 191, batch: 1200 // loss: 0.044 epoch: 191, batch: 1300 // loss: 0.046 epoch: 191, batch: 1400 // loss: 0.043 epoch: 191, batch: 1500 // loss: 0.048 epoch: 191, batch: 1600 // loss: 0.052 epoch: 191, batch: 1700 // loss: 0.045 epoch: 191, batch: 1800 // loss: 0.053 epoch: 191, batch: 1900 // loss: 0.045 epoch: 191, batch: 2000 // loss: 0.047 epoch: 191, batch: 2100 // loss: 0.046 epoch: 191, batch: 2200 // loss: 0.050 epoch: 191, batch: 2300 // loss: 0.049 epoch: 191, batch: 2400 // loss: 0.042 epoch: 191, batch: 2500 // loss: 0.042 epoch: 191, batch: 2600 // loss: 0.045 epoch: 191, batch: 2700 // loss: 0.042 epoch: 191, batch: 2800 // loss: 0.046 epoch: 191, batch: 2900 // loss: 0.042 epoch: 191, batch: 3000 // loss: 0.045 epoch: 191, batch: 3100 // loss: 0.043 epoch: 191, batch: 3200 // loss: 0.039 epoch: 191, batch: 3300 // loss: 0.038 epoch: 191, batch: 3400 // loss: 0.044 epoch: 191, batch: 3500 // loss: 0.035 epoch: 191, batch: 3600 // loss: 0.043 epoch: 191, batch: 3700 // loss: 0.043 epoch: 192, batch: 0 // loss: 0.053 epoch: 192, batch: 100 // loss: 0.046 epoch: 192, batch: 200 // loss: 0.043 epoch: 192, batch: 300 // loss: 0.049 epoch: 192, batch: 400 // loss: 0.046 epoch: 192, batch: 500 // loss: 0.040 epoch: 192, batch: 600 // loss: 0.040 epoch: 192, batch: 700 // loss: 0.044 epoch: 192, batch: 800 // loss: 0.042 epoch: 192, batch: 900 // loss: 0.049 epoch: 192, batch: 1000 // loss: 0.046 epoch: 192, batch: 1100 // loss: 0.042 epoch: 192, batch: 1200 // loss: 0.044 epoch: 192, batch: 1300 // loss: 0.046 epoch: 192, batch: 1400 // loss: 0.043 epoch: 192, batch: 1500 // loss: 0.048 epoch: 192, batch: 1600 // loss: 0.052 epoch: 192, batch: 1700 // loss: 0.045 epoch: 192, batch: 1800 // loss: 0.053 epoch: 192, batch: 1900 // loss: 0.045 epoch: 192, batch: 2000 // loss: 0.047 epoch: 192, batch: 2100 // loss: 0.046 epoch: 192, batch: 2200 // loss: 0.050 epoch: 192, batch: 2300 // loss: 0.049 epoch: 192, batch: 2400 // loss: 0.042 epoch: 192, batch: 2500 // loss: 0.042 epoch: 192, batch: 2600 // loss: 0.045 epoch: 192, batch: 2700 // loss: 0.042 epoch: 192, batch: 2800 // loss: 0.046 epoch: 192, batch: 2900 // loss: 0.042 epoch: 192, batch: 3000 // loss: 0.045 epoch: 192, batch: 3100 // loss: 0.043 epoch: 192, batch: 3200 // loss: 0.039 epoch: 192, batch: 3300 // loss: 0.038 epoch: 192, batch: 3400 // loss: 0.044 epoch: 192, batch: 3500 // loss: 0.035 epoch: 192, batch: 3600 // loss: 0.043 epoch: 192, batch: 3700 // loss: 0.043 epoch: 193, batch: 0 // loss: 0.053 epoch: 193, batch: 100 // loss: 0.046 epoch: 193, batch: 200 // loss: 0.043 epoch: 193, batch: 300 // loss: 0.049 epoch: 193, batch: 400 // loss: 0.046 epoch: 193, batch: 500 // loss: 0.040 epoch: 193, batch: 600 // loss: 0.040 epoch: 193, batch: 700 // loss: 0.044 epoch: 193, batch: 800 // loss: 0.042 epoch: 193, batch: 900 // loss: 0.049 epoch: 193, batch: 1000 // loss: 0.046 epoch: 193, batch: 1100 // loss: 0.042 epoch: 193, batch: 1200 // loss: 0.044 epoch: 193, batch: 1300 // loss: 0.046 epoch: 193, batch: 1400 // loss: 0.043 epoch: 193, batch: 1500 // loss: 0.048 epoch: 193, batch: 1600 // loss: 0.052 epoch: 193, batch: 1700 // loss: 0.045 epoch: 193, batch: 1800 // loss: 0.053 epoch: 193, batch: 1900 // loss: 0.045 epoch: 193, batch: 2000 // loss: 0.047 epoch: 193, batch: 2100 // loss: 0.046 epoch: 193, batch: 2200 // loss: 0.050 epoch: 193, batch: 2300 // loss: 0.049 epoch: 193, batch: 2400 // loss: 0.042 epoch: 193, batch: 2500 // loss: 0.042 epoch: 193, batch: 2600 // loss: 0.045 epoch: 193, batch: 2700 // loss: 0.042 epoch: 193, batch: 2800 // loss: 0.046 epoch: 193, batch: 2900 // loss: 0.042 epoch: 193, batch: 3000 // loss: 0.045 epoch: 193, batch: 3100 // loss: 0.043 epoch: 193, batch: 3200 // loss: 0.039 epoch: 193, batch: 3300 // loss: 0.038 epoch: 193, batch: 3400 // loss: 0.044 epoch: 193, batch: 3500 // loss: 0.035 epoch: 193, batch: 3600 // loss: 0.043 epoch: 193, batch: 3700 // loss: 0.043 epoch: 194, batch: 0 // loss: 0.053 epoch: 194, batch: 100 // loss: 0.046 epoch: 194, batch: 200 // loss: 0.043 epoch: 194, batch: 300 // loss: 0.049 epoch: 194, batch: 400 // loss: 0.046 epoch: 194, batch: 500 // loss: 0.040 epoch: 194, batch: 600 // loss: 0.040 epoch: 194, batch: 700 // loss: 0.044 epoch: 194, batch: 800 // loss: 0.042 epoch: 194, batch: 900 // loss: 0.049 epoch: 194, batch: 1000 // loss: 0.046 epoch: 194, batch: 1100 // loss: 0.042 epoch: 194, batch: 1200 // loss: 0.044 epoch: 194, batch: 1300 // loss: 0.046 epoch: 194, batch: 1400 // loss: 0.043 epoch: 194, batch: 1500 // loss: 0.048 epoch: 194, batch: 1600 // loss: 0.052 epoch: 194, batch: 1700 // loss: 0.045 epoch: 194, batch: 1800 // loss: 0.053 epoch: 194, batch: 1900 // loss: 0.045 epoch: 194, batch: 2000 // loss: 0.047 epoch: 194, batch: 2100 // loss: 0.046 epoch: 194, batch: 2200 // loss: 0.050 epoch: 194, batch: 2300 // loss: 0.049 epoch: 194, batch: 2400 // loss: 0.042 epoch: 194, batch: 2500 // loss: 0.042 epoch: 194, batch: 2600 // loss: 0.045 epoch: 194, batch: 2700 // loss: 0.042 epoch: 194, batch: 2800 // loss: 0.046 epoch: 194, batch: 2900 // loss: 0.042 epoch: 194, batch: 3000 // loss: 0.045 epoch: 194, batch: 3100 // loss: 0.043 epoch: 194, batch: 3200 // loss: 0.039 epoch: 194, batch: 3300 // loss: 0.038 epoch: 194, batch: 3400 // loss: 0.044 epoch: 194, batch: 3500 // loss: 0.035 epoch: 194, batch: 3600 // loss: 0.043 epoch: 194, batch: 3700 // loss: 0.043 epoch: 195, batch: 0 // loss: 0.053 epoch: 195, batch: 100 // loss: 0.046 epoch: 195, batch: 200 // loss: 0.043 epoch: 195, batch: 300 // loss: 0.049 epoch: 195, batch: 400 // loss: 0.046 epoch: 195, batch: 500 // loss: 0.040 epoch: 195, batch: 600 // loss: 0.040 epoch: 195, batch: 700 // loss: 0.044 epoch: 195, batch: 800 // loss: 0.042 epoch: 195, batch: 900 // loss: 0.049 epoch: 195, batch: 1000 // loss: 0.046 epoch: 195, batch: 1100 // loss: 0.042 epoch: 195, batch: 1200 // loss: 0.044 epoch: 195, batch: 1300 // loss: 0.046 epoch: 195, batch: 1400 // loss: 0.043 epoch: 195, batch: 1500 // loss: 0.048 epoch: 195, batch: 1600 // loss: 0.052 epoch: 195, batch: 1700 // loss: 0.045 epoch: 195, batch: 1800 // loss: 0.053 epoch: 195, batch: 1900 // loss: 0.045 epoch: 195, batch: 2000 // loss: 0.047 epoch: 195, batch: 2100 // loss: 0.046 epoch: 195, batch: 2200 // loss: 0.050 epoch: 195, batch: 2300 // loss: 0.049 epoch: 195, batch: 2400 // loss: 0.042 epoch: 195, batch: 2500 // loss: 0.042 epoch: 195, batch: 2600 // loss: 0.045 epoch: 195, batch: 2700 // loss: 0.042 epoch: 195, batch: 2800 // loss: 0.046 epoch: 195, batch: 2900 // loss: 0.042 epoch: 195, batch: 3000 // loss: 0.045 epoch: 195, batch: 3100 // loss: 0.043 epoch: 195, batch: 3200 // loss: 0.039 epoch: 195, batch: 3300 // loss: 0.038 epoch: 195, batch: 3400 // loss: 0.044 epoch: 195, batch: 3500 // loss: 0.035 epoch: 195, batch: 3600 // loss: 0.043 epoch: 195, batch: 3700 // loss: 0.043 epoch: 196, batch: 0 // loss: 0.053 epoch: 196, batch: 100 // loss: 0.046 epoch: 196, batch: 200 // loss: 0.043 epoch: 196, batch: 300 // loss: 0.049 epoch: 196, batch: 400 // loss: 0.046 epoch: 196, batch: 500 // loss: 0.040 epoch: 196, batch: 600 // loss: 0.040 epoch: 196, batch: 700 // loss: 0.044 epoch: 196, batch: 800 // loss: 0.042 epoch: 196, batch: 900 // loss: 0.049 epoch: 196, batch: 1000 // loss: 0.046 epoch: 196, batch: 1100 // loss: 0.042 epoch: 196, batch: 1200 // loss: 0.044 epoch: 196, batch: 1300 // loss: 0.046 epoch: 196, batch: 1400 // loss: 0.043 epoch: 196, batch: 1500 // loss: 0.048 epoch: 196, batch: 1600 // loss: 0.052 epoch: 196, batch: 1700 // loss: 0.045 epoch: 196, batch: 1800 // loss: 0.053 epoch: 196, batch: 1900 // loss: 0.045 epoch: 196, batch: 2000 // loss: 0.047 epoch: 196, batch: 2100 // loss: 0.046 epoch: 196, batch: 2200 // loss: 0.050 epoch: 196, batch: 2300 // loss: 0.049 epoch: 196, batch: 2400 // loss: 0.042 epoch: 196, batch: 2500 // loss: 0.042 epoch: 196, batch: 2600 // loss: 0.045 epoch: 196, batch: 2700 // loss: 0.042 epoch: 196, batch: 2800 // loss: 0.046 epoch: 196, batch: 2900 // loss: 0.042 epoch: 196, batch: 3000 // loss: 0.045 epoch: 196, batch: 3100 // loss: 0.043 epoch: 196, batch: 3200 // loss: 0.039 epoch: 196, batch: 3300 // loss: 0.038 epoch: 196, batch: 3400 // loss: 0.044 epoch: 196, batch: 3500 // loss: 0.035 epoch: 196, batch: 3600 // loss: 0.043 epoch: 196, batch: 3700 // loss: 0.043 epoch: 197, batch: 0 // loss: 0.053 epoch: 197, batch: 100 // loss: 0.046 epoch: 197, batch: 200 // loss: 0.043 epoch: 197, batch: 300 // loss: 0.049 epoch: 197, batch: 400 // loss: 0.046 epoch: 197, batch: 500 // loss: 0.040 epoch: 197, batch: 600 // loss: 0.040 epoch: 197, batch: 700 // loss: 0.044 epoch: 197, batch: 800 // loss: 0.042 epoch: 197, batch: 900 // loss: 0.049 epoch: 197, batch: 1000 // loss: 0.046 epoch: 197, batch: 1100 // loss: 0.042 epoch: 197, batch: 1200 // loss: 0.044 epoch: 197, batch: 1300 // loss: 0.046 epoch: 197, batch: 1400 // loss: 0.043 epoch: 197, batch: 1500 // loss: 0.048 epoch: 197, batch: 1600 // loss: 0.052 epoch: 197, batch: 1700 // loss: 0.045 epoch: 197, batch: 1800 // loss: 0.053 epoch: 197, batch: 1900 // loss: 0.045 epoch: 197, batch: 2000 // loss: 0.047 epoch: 197, batch: 2100 // loss: 0.046 epoch: 197, batch: 2200 // loss: 0.050 epoch: 197, batch: 2300 // loss: 0.049 epoch: 197, batch: 2400 // loss: 0.042 epoch: 197, batch: 2500 // loss: 0.042 epoch: 197, batch: 2600 // loss: 0.045 epoch: 197, batch: 2700 // loss: 0.042 epoch: 197, batch: 2800 // loss: 0.046 epoch: 197, batch: 2900 // loss: 0.042 epoch: 197, batch: 3000 // loss: 0.045 epoch: 197, batch: 3100 // loss: 0.043 epoch: 197, batch: 3200 // loss: 0.039 epoch: 197, batch: 3300 // loss: 0.038 epoch: 197, batch: 3400 // loss: 0.044 epoch: 197, batch: 3500 // loss: 0.035 epoch: 197, batch: 3600 // loss: 0.043 epoch: 197, batch: 3700 // loss: 0.043 epoch: 198, batch: 0 // loss: 0.053 epoch: 198, batch: 100 // loss: 0.046 epoch: 198, batch: 200 // loss: 0.043 epoch: 198, batch: 300 // loss: 0.049 epoch: 198, batch: 400 // loss: 0.046 epoch: 198, batch: 500 // loss: 0.040 epoch: 198, batch: 600 // loss: 0.040 epoch: 198, batch: 700 // loss: 0.044 epoch: 198, batch: 800 // loss: 0.042 epoch: 198, batch: 900 // loss: 0.049 epoch: 198, batch: 1000 // loss: 0.046 epoch: 198, batch: 1100 // loss: 0.042 epoch: 198, batch: 1200 // loss: 0.044 epoch: 198, batch: 1300 // loss: 0.046 epoch: 198, batch: 1400 // loss: 0.043 epoch: 198, batch: 1500 // loss: 0.048 epoch: 198, batch: 1600 // loss: 0.052 epoch: 198, batch: 1700 // loss: 0.045 epoch: 198, batch: 1800 // loss: 0.053 epoch: 198, batch: 1900 // loss: 0.045 epoch: 198, batch: 2000 // loss: 0.047 epoch: 198, batch: 2100 // loss: 0.046 epoch: 198, batch: 2200 // loss: 0.050 epoch: 198, batch: 2300 // loss: 0.049 epoch: 198, batch: 2400 // loss: 0.042 epoch: 198, batch: 2500 // loss: 0.042 epoch: 198, batch: 2600 // loss: 0.045 epoch: 198, batch: 2700 // loss: 0.042 epoch: 198, batch: 2800 // loss: 0.046 epoch: 198, batch: 2900 // loss: 0.042 epoch: 198, batch: 3000 // loss: 0.045 epoch: 198, batch: 3100 // loss: 0.043 epoch: 198, batch: 3200 // loss: 0.039 epoch: 198, batch: 3300 // loss: 0.038 epoch: 198, batch: 3400 // loss: 0.044 epoch: 198, batch: 3500 // loss: 0.035 epoch: 198, batch: 3600 // loss: 0.043 epoch: 198, batch: 3700 // loss: 0.043 epoch: 199, batch: 0 // loss: 0.053 epoch: 199, batch: 100 // loss: 0.046 epoch: 199, batch: 200 // loss: 0.043 epoch: 199, batch: 300 // loss: 0.049 epoch: 199, batch: 400 // loss: 0.046 epoch: 199, batch: 500 // loss: 0.040 epoch: 199, batch: 600 // loss: 0.040 epoch: 199, batch: 700 // loss: 0.044 epoch: 199, batch: 800 // loss: 0.042 epoch: 199, batch: 900 // loss: 0.049 epoch: 199, batch: 1000 // loss: 0.046 epoch: 199, batch: 1100 // loss: 0.042 epoch: 199, batch: 1200 // loss: 0.044 epoch: 199, batch: 1300 // loss: 0.046 epoch: 199, batch: 1400 // loss: 0.043 epoch: 199, batch: 1500 // loss: 0.048 epoch: 199, batch: 1600 // loss: 0.052 epoch: 199, batch: 1700 // loss: 0.045 epoch: 199, batch: 1800 // loss: 0.053 epoch: 199, batch: 1900 // loss: 0.045 epoch: 199, batch: 2000 // loss: 0.047 epoch: 199, batch: 2100 // loss: 0.046 epoch: 199, batch: 2200 // loss: 0.050 epoch: 199, batch: 2300 // loss: 0.049 epoch: 199, batch: 2400 // loss: 0.042 epoch: 199, batch: 2500 // loss: 0.042 epoch: 199, batch: 2600 // loss: 0.045 epoch: 199, batch: 2700 // loss: 0.042 epoch: 199, batch: 2800 // loss: 0.046 epoch: 199, batch: 2900 // loss: 0.042 epoch: 199, batch: 3000 // loss: 0.045 epoch: 199, batch: 3100 // loss: 0.043 epoch: 199, batch: 3200 // loss: 0.039 epoch: 199, batch: 3300 // loss: 0.038 epoch: 199, batch: 3400 // loss: 0.044 epoch: 199, batch: 3500 // loss: 0.035 epoch: 199, batch: 3600 // loss: 0.043 epoch: 199, batch: 3700 // loss: 0.043
X_tilde = auto(X[:5000].float()).detach().numpy()
imshow(np.asarray(X[3]).reshape(28,28), cmap='gray')
<matplotlib.image.AxesImage at 0x7f8f43d165c0>
imshow(np.asarray(X_tilde[2]).reshape(28,28), cmap='gray')
<matplotlib.image.AxesImage at 0x7f8f44507f28>
Zs = auto(X[:5000].float(), return_z=True).detach().numpy()
colors = ['r', 'g', 'b', 'c', 'm', 'y', 'k', 'w', 'orange', 'purple']
c = [colors[y_i] for y_i in y[:5000]]
plt.scatter(Zs[:,0], Zs[:,1], c=c)
<matplotlib.collections.PathCollection at 0x7f8f445308d0>
AE2
extending the above by adding a non-linear activation function (try Sigmoid
).¶class AE2(nn.Module):
def __init__(self, input_size=784, hidden_size=16):
'''
In the initializer we setup model parameters/layers.
'''
super(AE2, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
# input layer; from x -> z
self.i = nn.Linear(self.input_size, self.hidden_size)
self.a = nn.Sigmoid()
# output layer
self.o = nn.Linear(self.hidden_size, self.input_size)
def forward(self, X, return_z=False):
z = self.a(self.i(X))
if return_z:
return z
return self.o(z)
auto2 = AE2()
optimizer = optim.SGD(auto2.parameters(), lr=0.001, momentum=0.9)
train_AE(X, X, auto2, optimizer, loss_function)
epoch: 0, batch: 0 // loss: 0.309 epoch: 0, batch: 100 // loss: 0.292 epoch: 0, batch: 200 // loss: 0.255 epoch: 0, batch: 300 // loss: 0.236 epoch: 0, batch: 400 // loss: 0.238 epoch: 0, batch: 500 // loss: 0.233 epoch: 0, batch: 600 // loss: 0.235 epoch: 0, batch: 700 // loss: 0.242 epoch: 0, batch: 800 // loss: 0.213 epoch: 0, batch: 900 // loss: 0.255 epoch: 0, batch: 1000 // loss: 0.230 epoch: 0, batch: 1100 // loss: 0.260 epoch: 0, batch: 1200 // loss: 0.204 epoch: 0, batch: 1300 // loss: 0.248 epoch: 0, batch: 1400 // loss: 0.205 epoch: 0, batch: 1500 // loss: 0.204 epoch: 0, batch: 1600 // loss: 0.234 epoch: 0, batch: 1700 // loss: 0.219 epoch: 0, batch: 1800 // loss: 0.253 epoch: 0, batch: 1900 // loss: 0.223 epoch: 0, batch: 2000 // loss: 0.197 epoch: 0, batch: 2100 // loss: 0.197 epoch: 0, batch: 2200 // loss: 0.244 epoch: 0, batch: 2300 // loss: 0.216 epoch: 0, batch: 2400 // loss: 0.168 epoch: 0, batch: 2500 // loss: 0.184 epoch: 0, batch: 2600 // loss: 0.233 epoch: 0, batch: 2700 // loss: 0.183 epoch: 0, batch: 2800 // loss: 0.236 epoch: 0, batch: 2900 // loss: 0.172 epoch: 0, batch: 3000 // loss: 0.190 epoch: 0, batch: 3100 // loss: 0.226 epoch: 0, batch: 3200 // loss: 0.164 epoch: 0, batch: 3300 // loss: 0.192 epoch: 0, batch: 3400 // loss: 0.194 epoch: 0, batch: 3500 // loss: 0.205 epoch: 0, batch: 3600 // loss: 0.203 epoch: 0, batch: 3700 // loss: 0.224 epoch: 1, batch: 0 // loss: 0.204 epoch: 1, batch: 100 // loss: 0.213 epoch: 1, batch: 200 // loss: 0.194 epoch: 1, batch: 300 // loss: 0.182 epoch: 1, batch: 400 // loss: 0.191 epoch: 1, batch: 500 // loss: 0.189 epoch: 1, batch: 600 // loss: 0.193 epoch: 1, batch: 700 // loss: 0.201 epoch: 1, batch: 800 // loss: 0.176 epoch: 1, batch: 900 // loss: 0.217 epoch: 1, batch: 1000 // loss: 0.193 epoch: 1, batch: 1100 // loss: 0.222 epoch: 1, batch: 1200 // loss: 0.171 epoch: 1, batch: 1300 // loss: 0.212 epoch: 1, batch: 1400 // loss: 0.172 epoch: 1, batch: 1500 // loss: 0.172 epoch: 1, batch: 1600 // loss: 0.199 epoch: 1, batch: 1700 // loss: 0.186 epoch: 1, batch: 1800 // loss: 0.217 epoch: 1, batch: 1900 // loss: 0.189 epoch: 1, batch: 2000 // loss: 0.166 epoch: 1, batch: 2100 // loss: 0.167 epoch: 1, batch: 2200 // loss: 0.209 epoch: 1, batch: 2300 // loss: 0.184 epoch: 1, batch: 2400 // loss: 0.141 epoch: 1, batch: 2500 // loss: 0.154 epoch: 1, batch: 2600 // loss: 0.198 epoch: 1, batch: 2700 // loss: 0.154 epoch: 1, batch: 2800 // loss: 0.201 epoch: 1, batch: 2900 // loss: 0.145 epoch: 1, batch: 3000 // loss: 0.160 epoch: 1, batch: 3100 // loss: 0.191 epoch: 1, batch: 3200 // loss: 0.138 epoch: 1, batch: 3300 // loss: 0.161 epoch: 1, batch: 3400 // loss: 0.162 epoch: 1, batch: 3500 // loss: 0.171 epoch: 1, batch: 3600 // loss: 0.171 epoch: 1, batch: 3700 // loss: 0.189 epoch: 2, batch: 0 // loss: 0.174 epoch: 2, batch: 100 // loss: 0.178 epoch: 2, batch: 200 // loss: 0.163 epoch: 2, batch: 300 // loss: 0.153 epoch: 2, batch: 400 // loss: 0.160 epoch: 2, batch: 500 // loss: 0.156 epoch: 2, batch: 600 // loss: 0.160 epoch: 2, batch: 700 // loss: 0.167 epoch: 2, batch: 800 // loss: 0.147 epoch: 2, batch: 900 // loss: 0.181 epoch: 2, batch: 1000 // loss: 0.158 epoch: 2, batch: 1100 // loss: 0.183 epoch: 2, batch: 1200 // loss: 0.143 epoch: 2, batch: 1300 // loss: 0.176 epoch: 2, batch: 1400 // loss: 0.142 epoch: 2, batch: 1500 // loss: 0.142 epoch: 2, batch: 1600 // loss: 0.164 epoch: 2, batch: 1700 // loss: 0.153 epoch: 2, batch: 1800 // loss: 0.178 epoch: 2, batch: 1900 // loss: 0.155 epoch: 2, batch: 2000 // loss: 0.136 epoch: 2, batch: 2100 // loss: 0.139 epoch: 2, batch: 2200 // loss: 0.172 epoch: 2, batch: 2300 // loss: 0.152 epoch: 2, batch: 2400 // loss: 0.116 epoch: 2, batch: 2500 // loss: 0.126 epoch: 2, batch: 2600 // loss: 0.161 epoch: 2, batch: 2700 // loss: 0.125 epoch: 2, batch: 2800 // loss: 0.165 epoch: 2, batch: 2900 // loss: 0.118 epoch: 2, batch: 3000 // loss: 0.130 epoch: 2, batch: 3100 // loss: 0.154 epoch: 2, batch: 3200 // loss: 0.113 epoch: 2, batch: 3300 // loss: 0.129 epoch: 2, batch: 3400 // loss: 0.130 epoch: 2, batch: 3500 // loss: 0.137 epoch: 2, batch: 3600 // loss: 0.139 epoch: 2, batch: 3700 // loss: 0.153 epoch: 3, batch: 0 // loss: 0.144 epoch: 3, batch: 100 // loss: 0.143 epoch: 3, batch: 200 // loss: 0.134 epoch: 3, batch: 300 // loss: 0.126 epoch: 3, batch: 400 // loss: 0.130 epoch: 3, batch: 500 // loss: 0.125 epoch: 3, batch: 600 // loss: 0.127 epoch: 3, batch: 700 // loss: 0.133 epoch: 3, batch: 800 // loss: 0.120 epoch: 3, batch: 900 // loss: 0.147 epoch: 3, batch: 1000 // loss: 0.125 epoch: 3, batch: 1100 // loss: 0.145 epoch: 3, batch: 1200 // loss: 0.116 epoch: 3, batch: 1300 // loss: 0.141 epoch: 3, batch: 1400 // loss: 0.114 epoch: 3, batch: 1500 // loss: 0.115 epoch: 3, batch: 1600 // loss: 0.132 epoch: 3, batch: 1700 // loss: 0.124 epoch: 3, batch: 1800 // loss: 0.142 epoch: 3, batch: 1900 // loss: 0.125 epoch: 3, batch: 2000 // loss: 0.110 epoch: 3, batch: 2100 // loss: 0.115 epoch: 3, batch: 2200 // loss: 0.139 epoch: 3, batch: 2300 // loss: 0.123 epoch: 3, batch: 2400 // loss: 0.096 epoch: 3, batch: 2500 // loss: 0.102 epoch: 3, batch: 2600 // loss: 0.129 epoch: 3, batch: 2700 // loss: 0.101 epoch: 3, batch: 2800 // loss: 0.133 epoch: 3, batch: 2900 // loss: 0.096 epoch: 3, batch: 3000 // loss: 0.106 epoch: 3, batch: 3100 // loss: 0.122 epoch: 3, batch: 3200 // loss: 0.094 epoch: 3, batch: 3300 // loss: 0.104 epoch: 3, batch: 3400 // loss: 0.103 epoch: 3, batch: 3500 // loss: 0.109 epoch: 3, batch: 3600 // loss: 0.114 epoch: 3, batch: 3700 // loss: 0.124 epoch: 4, batch: 0 // loss: 0.120 epoch: 4, batch: 100 // loss: 0.115 epoch: 4, batch: 200 // loss: 0.112 epoch: 4, batch: 300 // loss: 0.105 epoch: 4, batch: 400 // loss: 0.108 epoch: 4, batch: 500 // loss: 0.101 epoch: 4, batch: 600 // loss: 0.103 epoch: 4, batch: 700 // loss: 0.108 epoch: 4, batch: 800 // loss: 0.101 epoch: 4, batch: 900 // loss: 0.121 epoch: 4, batch: 1000 // loss: 0.100 epoch: 4, batch: 1100 // loss: 0.117 epoch: 4, batch: 1200 // loss: 0.097 epoch: 4, batch: 1300 // loss: 0.116 epoch: 4, batch: 1400 // loss: 0.095 epoch: 4, batch: 1500 // loss: 0.096 epoch: 4, batch: 1600 // loss: 0.109 epoch: 4, batch: 1700 // loss: 0.104 epoch: 4, batch: 1800 // loss: 0.116 epoch: 4, batch: 1900 // loss: 0.104 epoch: 4, batch: 2000 // loss: 0.092 epoch: 4, batch: 2100 // loss: 0.098 epoch: 4, batch: 2200 // loss: 0.117 epoch: 4, batch: 2300 // loss: 0.104 epoch: 4, batch: 2400 // loss: 0.084 epoch: 4, batch: 2500 // loss: 0.086 epoch: 4, batch: 2600 // loss: 0.108 epoch: 4, batch: 2700 // loss: 0.086 epoch: 4, batch: 2800 // loss: 0.112 epoch: 4, batch: 2900 // loss: 0.082 epoch: 4, batch: 3000 // loss: 0.090 epoch: 4, batch: 3100 // loss: 0.102 epoch: 4, batch: 3200 // loss: 0.082 epoch: 4, batch: 3300 // loss: 0.088 epoch: 4, batch: 3400 // loss: 0.086 epoch: 4, batch: 3500 // loss: 0.091 epoch: 4, batch: 3600 // loss: 0.098 epoch: 4, batch: 3700 // loss: 0.105 epoch: 5, batch: 0 // loss: 0.105 epoch: 5, batch: 100 // loss: 0.098 epoch: 5, batch: 200 // loss: 0.098 epoch: 5, batch: 300 // loss: 0.092 epoch: 5, batch: 400 // loss: 0.094 epoch: 5, batch: 500 // loss: 0.086 epoch: 5, batch: 600 // loss: 0.089 epoch: 5, batch: 700 // loss: 0.093 epoch: 5, batch: 800 // loss: 0.089 epoch: 5, batch: 900 // loss: 0.105 epoch: 5, batch: 1000 // loss: 0.086 epoch: 5, batch: 1100 // loss: 0.099 epoch: 5, batch: 1200 // loss: 0.087 epoch: 5, batch: 1300 // loss: 0.100 epoch: 5, batch: 1400 // loss: 0.084 epoch: 5, batch: 1500 // loss: 0.086 epoch: 5, batch: 1600 // loss: 0.095 epoch: 5, batch: 1700 // loss: 0.092 epoch: 5, batch: 1800 // loss: 0.100 epoch: 5, batch: 1900 // loss: 0.091 epoch: 5, batch: 2000 // loss: 0.083 epoch: 5, batch: 2100 // loss: 0.089 epoch: 5, batch: 2200 // loss: 0.103 epoch: 5, batch: 2300 // loss: 0.093 epoch: 5, batch: 2400 // loss: 0.078 epoch: 5, batch: 2500 // loss: 0.078 epoch: 5, batch: 2600 // loss: 0.095 epoch: 5, batch: 2700 // loss: 0.078 epoch: 5, batch: 2800 // loss: 0.099 epoch: 5, batch: 2900 // loss: 0.075 epoch: 5, batch: 3000 // loss: 0.081 epoch: 5, batch: 3100 // loss: 0.090 epoch: 5, batch: 3200 // loss: 0.076 epoch: 5, batch: 3300 // loss: 0.079 epoch: 5, batch: 3400 // loss: 0.077 epoch: 5, batch: 3500 // loss: 0.081 epoch: 5, batch: 3600 // loss: 0.089 epoch: 5, batch: 3700 // loss: 0.094 epoch: 6, batch: 0 // loss: 0.097 epoch: 6, batch: 100 // loss: 0.088 epoch: 6, batch: 200 // loss: 0.091 epoch: 6, batch: 300 // loss: 0.085 epoch: 6, batch: 400 // loss: 0.087 epoch: 6, batch: 500 // loss: 0.078 epoch: 6, batch: 600 // loss: 0.080 epoch: 6, batch: 700 // loss: 0.084 epoch: 6, batch: 800 // loss: 0.083 epoch: 6, batch: 900 // loss: 0.096 epoch: 6, batch: 1000 // loss: 0.077 epoch: 6, batch: 1100 // loss: 0.089 epoch: 6, batch: 1200 // loss: 0.081 epoch: 6, batch: 1300 // loss: 0.092 epoch: 6, batch: 1400 // loss: 0.078 epoch: 6, batch: 1500 // loss: 0.081 epoch: 6, batch: 1600 // loss: 0.088 epoch: 6, batch: 1700 // loss: 0.085 epoch: 6, batch: 1800 // loss: 0.092 epoch: 6, batch: 1900 // loss: 0.085 epoch: 6, batch: 2000 // loss: 0.078 epoch: 6, batch: 2100 // loss: 0.085 epoch: 6, batch: 2200 // loss: 0.096 epoch: 6, batch: 2300 // loss: 0.087 epoch: 6, batch: 2400 // loss: 0.075 epoch: 6, batch: 2500 // loss: 0.073 epoch: 6, batch: 2600 // loss: 0.088 epoch: 6, batch: 2700 // loss: 0.073 epoch: 6, batch: 2800 // loss: 0.092 epoch: 6, batch: 2900 // loss: 0.071 epoch: 6, batch: 3000 // loss: 0.077 epoch: 6, batch: 3100 // loss: 0.083 epoch: 6, batch: 3200 // loss: 0.073 epoch: 6, batch: 3300 // loss: 0.075 epoch: 6, batch: 3400 // loss: 0.072 epoch: 6, batch: 3500 // loss: 0.076 epoch: 6, batch: 3600 // loss: 0.084 epoch: 6, batch: 3700 // loss: 0.088 epoch: 7, batch: 0 // loss: 0.092 epoch: 7, batch: 100 // loss: 0.083 epoch: 7, batch: 200 // loss: 0.086 epoch: 7, batch: 300 // loss: 0.082 epoch: 7, batch: 400 // loss: 0.083 epoch: 7, batch: 500 // loss: 0.073 epoch: 7, batch: 600 // loss: 0.076 epoch: 7, batch: 700 // loss: 0.079 epoch: 7, batch: 800 // loss: 0.080 epoch: 7, batch: 900 // loss: 0.090 epoch: 7, batch: 1000 // loss: 0.073 epoch: 7, batch: 1100 // loss: 0.083 epoch: 7, batch: 1200 // loss: 0.079 epoch: 7, batch: 1300 // loss: 0.087 epoch: 7, batch: 1400 // loss: 0.075 epoch: 7, batch: 1500 // loss: 0.078 epoch: 7, batch: 1600 // loss: 0.084 epoch: 7, batch: 1700 // loss: 0.081 epoch: 7, batch: 1800 // loss: 0.087 epoch: 7, batch: 1900 // loss: 0.081 epoch: 7, batch: 2000 // loss: 0.075 epoch: 7, batch: 2100 // loss: 0.083 epoch: 7, batch: 2200 // loss: 0.091 epoch: 7, batch: 2300 // loss: 0.084 epoch: 7, batch: 2400 // loss: 0.073 epoch: 7, batch: 2500 // loss: 0.071 epoch: 7, batch: 2600 // loss: 0.084 epoch: 7, batch: 2700 // loss: 0.071 epoch: 7, batch: 2800 // loss: 0.088 epoch: 7, batch: 2900 // loss: 0.069 epoch: 7, batch: 3000 // loss: 0.074 epoch: 7, batch: 3100 // loss: 0.079 epoch: 7, batch: 3200 // loss: 0.072 epoch: 7, batch: 3300 // loss: 0.072 epoch: 7, batch: 3400 // loss: 0.069 epoch: 7, batch: 3500 // loss: 0.072 epoch: 7, batch: 3600 // loss: 0.081 epoch: 7, batch: 3700 // loss: 0.085 epoch: 8, batch: 0 // loss: 0.089 epoch: 8, batch: 100 // loss: 0.080 epoch: 8, batch: 200 // loss: 0.084 epoch: 8, batch: 300 // loss: 0.080 epoch: 8, batch: 400 // loss: 0.081 epoch: 8, batch: 500 // loss: 0.071 epoch: 8, batch: 600 // loss: 0.073 epoch: 8, batch: 700 // loss: 0.076 epoch: 8, batch: 800 // loss: 0.078 epoch: 8, batch: 900 // loss: 0.087 epoch: 8, batch: 1000 // loss: 0.070 epoch: 8, batch: 1100 // loss: 0.079 epoch: 8, batch: 1200 // loss: 0.077 epoch: 8, batch: 1300 // loss: 0.084 epoch: 8, batch: 1400 // loss: 0.074 epoch: 8, batch: 1500 // loss: 0.077 epoch: 8, batch: 1600 // loss: 0.081 epoch: 8, batch: 1700 // loss: 0.079 epoch: 8, batch: 1800 // loss: 0.084 epoch: 8, batch: 1900 // loss: 0.079 epoch: 8, batch: 2000 // loss: 0.074 epoch: 8, batch: 2100 // loss: 0.081 epoch: 8, batch: 2200 // loss: 0.089 epoch: 8, batch: 2300 // loss: 0.082 epoch: 8, batch: 2400 // loss: 0.072 epoch: 8, batch: 2500 // loss: 0.070 epoch: 8, batch: 2600 // loss: 0.081 epoch: 8, batch: 2700 // loss: 0.070 epoch: 8, batch: 2800 // loss: 0.085 epoch: 8, batch: 2900 // loss: 0.068 epoch: 8, batch: 3000 // loss: 0.072 epoch: 8, batch: 3100 // loss: 0.077 epoch: 8, batch: 3200 // loss: 0.071 epoch: 8, batch: 3300 // loss: 0.070 epoch: 8, batch: 3400 // loss: 0.068 epoch: 8, batch: 3500 // loss: 0.070 epoch: 8, batch: 3600 // loss: 0.079 epoch: 8, batch: 3700 // loss: 0.082 epoch: 9, batch: 0 // loss: 0.088 epoch: 9, batch: 100 // loss: 0.078 epoch: 9, batch: 200 // loss: 0.082 epoch: 9, batch: 300 // loss: 0.079 epoch: 9, batch: 400 // loss: 0.079 epoch: 9, batch: 500 // loss: 0.069 epoch: 9, batch: 600 // loss: 0.071 epoch: 9, batch: 700 // loss: 0.074 epoch: 9, batch: 800 // loss: 0.077 epoch: 9, batch: 900 // loss: 0.085 epoch: 9, batch: 1000 // loss: 0.069 epoch: 9, batch: 1100 // loss: 0.077 epoch: 9, batch: 1200 // loss: 0.076 epoch: 9, batch: 1300 // loss: 0.082 epoch: 9, batch: 1400 // loss: 0.072 epoch: 9, batch: 1500 // loss: 0.076 epoch: 9, batch: 1600 // loss: 0.080 epoch: 9, batch: 1700 // loss: 0.077 epoch: 9, batch: 1800 // loss: 0.082 epoch: 9, batch: 1900 // loss: 0.077 epoch: 9, batch: 2000 // loss: 0.073 epoch: 9, batch: 2100 // loss: 0.080 epoch: 9, batch: 2200 // loss: 0.087 epoch: 9, batch: 2300 // loss: 0.080 epoch: 9, batch: 2400 // loss: 0.072 epoch: 9, batch: 2500 // loss: 0.069 epoch: 9, batch: 2600 // loss: 0.079 epoch: 9, batch: 2700 // loss: 0.069 epoch: 9, batch: 2800 // loss: 0.084 epoch: 9, batch: 2900 // loss: 0.068 epoch: 9, batch: 3000 // loss: 0.071 epoch: 9, batch: 3100 // loss: 0.075 epoch: 9, batch: 3200 // loss: 0.071 epoch: 9, batch: 3300 // loss: 0.069 epoch: 9, batch: 3400 // loss: 0.067 epoch: 9, batch: 3500 // loss: 0.068 epoch: 9, batch: 3600 // loss: 0.077 epoch: 9, batch: 3700 // loss: 0.081
Zs = auto2(X[:5000].float(), return_z=True).detach().numpy()
plt.scatter(Zs[:,0], Zs[:,1], c=c)
<matplotlib.collections.PathCollection at 0x7f8f4460fba8>
Zs.shape
(5000, 16)
Denoising auto-encoder. Now let's take as our target corrupted versions of the inputs. To create a corrupt version we will perturb the input pixel values by some random noise.
def corrupt(x, var=0.01):
return x + np.random.normal(np.zeros(x.shape), var)
X[0,:10]
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=torch.float64)
corrupt(X[0])[:10]
tensor([ 0.0073, 0.0166, -0.0076, 0.0072, 0.0170, 0.0171, 0.0167, 0.0069, 0.0050, 0.0150], dtype=torch.float64)
imshow(np.asarray(X[0].reshape((28,28))), cmap='gray')
<matplotlib.image.AxesImage at 0x7f8f445eabe0>
imshow(np.asarray(corrupt(X[0], var=0.1).reshape((28,28))), cmap='gray')
<matplotlib.image.AxesImage at 0x7f8f724245c0>
X_corrupt = corrupt(X)
auto3 = AE2(hidden_size=16)
optimizer = optim.SGD(auto3.parameters(), lr=0.01, momentum=0.9)
train_AE(X_corrupt, X, auto3, optimizer, loss_function)
epoch: 0, batch: 0 // loss: 0.306 epoch: 0, batch: 100 // loss: 0.235 epoch: 0, batch: 200 // loss: 0.204 epoch: 0, batch: 300 // loss: 0.184 epoch: 0, batch: 400 // loss: 0.186 epoch: 0, batch: 500 // loss: 0.175 epoch: 0, batch: 600 // loss: 0.171 epoch: 0, batch: 700 // loss: 0.168 epoch: 0, batch: 800 // loss: 0.141 epoch: 0, batch: 900 // loss: 0.162 epoch: 0, batch: 1000 // loss: 0.131 epoch: 0, batch: 1100 // loss: 0.140 epoch: 0, batch: 1200 // loss: 0.108 epoch: 0, batch: 1300 // loss: 0.120 epoch: 0, batch: 1400 // loss: 0.095 epoch: 0, batch: 1500 // loss: 0.092 epoch: 0, batch: 1600 // loss: 0.097 epoch: 0, batch: 1700 // loss: 0.090 epoch: 0, batch: 1800 // loss: 0.095 epoch: 0, batch: 1900 // loss: 0.086 epoch: 0, batch: 2000 // loss: 0.077 epoch: 0, batch: 2100 // loss: 0.084 epoch: 0, batch: 2200 // loss: 0.091 epoch: 0, batch: 2300 // loss: 0.083 epoch: 0, batch: 2400 // loss: 0.073 epoch: 0, batch: 2500 // loss: 0.070 epoch: 0, batch: 2600 // loss: 0.080 epoch: 0, batch: 2700 // loss: 0.070 epoch: 0, batch: 2800 // loss: 0.084 epoch: 0, batch: 2900 // loss: 0.068 epoch: 0, batch: 3000 // loss: 0.070 epoch: 0, batch: 3100 // loss: 0.075 epoch: 0, batch: 3200 // loss: 0.071 epoch: 0, batch: 3300 // loss: 0.068 epoch: 0, batch: 3400 // loss: 0.066 epoch: 0, batch: 3500 // loss: 0.067 epoch: 0, batch: 3600 // loss: 0.076 epoch: 0, batch: 3700 // loss: 0.079 epoch: 1, batch: 0 // loss: 0.085 epoch: 1, batch: 100 // loss: 0.074 epoch: 1, batch: 200 // loss: 0.078 epoch: 1, batch: 300 // loss: 0.077 epoch: 1, batch: 400 // loss: 0.076 epoch: 1, batch: 500 // loss: 0.065 epoch: 1, batch: 600 // loss: 0.067 epoch: 1, batch: 700 // loss: 0.071 epoch: 1, batch: 800 // loss: 0.074 epoch: 1, batch: 900 // loss: 0.080 epoch: 1, batch: 1000 // loss: 0.065 epoch: 1, batch: 1100 // loss: 0.071 epoch: 1, batch: 1200 // loss: 0.074 epoch: 1, batch: 1300 // loss: 0.075 epoch: 1, batch: 1400 // loss: 0.069 epoch: 1, batch: 1500 // loss: 0.074 epoch: 1, batch: 1600 // loss: 0.076 epoch: 1, batch: 1700 // loss: 0.071 epoch: 1, batch: 1800 // loss: 0.076 epoch: 1, batch: 1900 // loss: 0.072 epoch: 1, batch: 2000 // loss: 0.069 epoch: 1, batch: 2100 // loss: 0.076 epoch: 1, batch: 2200 // loss: 0.079 epoch: 1, batch: 2300 // loss: 0.074 epoch: 1, batch: 2400 // loss: 0.068 epoch: 1, batch: 2500 // loss: 0.065 epoch: 1, batch: 2600 // loss: 0.071 epoch: 1, batch: 2700 // loss: 0.065 epoch: 1, batch: 2800 // loss: 0.075 epoch: 1, batch: 2900 // loss: 0.063 epoch: 1, batch: 3000 // loss: 0.065 epoch: 1, batch: 3100 // loss: 0.068 epoch: 1, batch: 3200 // loss: 0.065 epoch: 1, batch: 3300 // loss: 0.063 epoch: 1, batch: 3400 // loss: 0.062 epoch: 1, batch: 3500 // loss: 0.060 epoch: 1, batch: 3600 // loss: 0.069 epoch: 1, batch: 3700 // loss: 0.072 epoch: 2, batch: 0 // loss: 0.078 epoch: 2, batch: 100 // loss: 0.069 epoch: 2, batch: 200 // loss: 0.071 epoch: 2, batch: 300 // loss: 0.071 epoch: 2, batch: 400 // loss: 0.070 epoch: 2, batch: 500 // loss: 0.060 epoch: 2, batch: 600 // loss: 0.062 epoch: 2, batch: 700 // loss: 0.065 epoch: 2, batch: 800 // loss: 0.067 epoch: 2, batch: 900 // loss: 0.073 epoch: 2, batch: 1000 // loss: 0.061 epoch: 2, batch: 1100 // loss: 0.065 epoch: 2, batch: 1200 // loss: 0.068 epoch: 2, batch: 1300 // loss: 0.068 epoch: 2, batch: 1400 // loss: 0.063 epoch: 2, batch: 1500 // loss: 0.069 epoch: 2, batch: 1600 // loss: 0.070 epoch: 2, batch: 1700 // loss: 0.064 epoch: 2, batch: 1800 // loss: 0.071 epoch: 2, batch: 1900 // loss: 0.065 epoch: 2, batch: 2000 // loss: 0.064 epoch: 2, batch: 2100 // loss: 0.069 epoch: 2, batch: 2200 // loss: 0.072 epoch: 2, batch: 2300 // loss: 0.068 epoch: 2, batch: 2400 // loss: 0.061 epoch: 2, batch: 2500 // loss: 0.059 epoch: 2, batch: 2600 // loss: 0.065 epoch: 2, batch: 2700 // loss: 0.059 epoch: 2, batch: 2800 // loss: 0.067 epoch: 2, batch: 2900 // loss: 0.057 epoch: 2, batch: 3000 // loss: 0.060 epoch: 2, batch: 3100 // loss: 0.061 epoch: 2, batch: 3200 // loss: 0.057 epoch: 2, batch: 3300 // loss: 0.056 epoch: 2, batch: 3400 // loss: 0.056 epoch: 2, batch: 3500 // loss: 0.053 epoch: 2, batch: 3600 // loss: 0.061 epoch: 2, batch: 3700 // loss: 0.064 epoch: 3, batch: 0 // loss: 0.070 epoch: 3, batch: 100 // loss: 0.062 epoch: 3, batch: 200 // loss: 0.064 epoch: 3, batch: 300 // loss: 0.064 epoch: 3, batch: 400 // loss: 0.062 epoch: 3, batch: 500 // loss: 0.054 epoch: 3, batch: 600 // loss: 0.055 epoch: 3, batch: 700 // loss: 0.059 epoch: 3, batch: 800 // loss: 0.059 epoch: 3, batch: 900 // loss: 0.065 epoch: 3, batch: 1000 // loss: 0.056 epoch: 3, batch: 1100 // loss: 0.058 epoch: 3, batch: 1200 // loss: 0.060 epoch: 3, batch: 1300 // loss: 0.061 epoch: 3, batch: 1400 // loss: 0.057 epoch: 3, batch: 1500 // loss: 0.062 epoch: 3, batch: 1600 // loss: 0.064 epoch: 3, batch: 1700 // loss: 0.058 epoch: 3, batch: 1800 // loss: 0.065 epoch: 3, batch: 1900 // loss: 0.058 epoch: 3, batch: 2000 // loss: 0.058 epoch: 3, batch: 2100 // loss: 0.060 epoch: 3, batch: 2200 // loss: 0.065 epoch: 3, batch: 2300 // loss: 0.061 epoch: 3, batch: 2400 // loss: 0.054 epoch: 3, batch: 2500 // loss: 0.053 epoch: 3, batch: 2600 // loss: 0.058 epoch: 3, batch: 2700 // loss: 0.054 epoch: 3, batch: 2800 // loss: 0.059 epoch: 3, batch: 2900 // loss: 0.051 epoch: 3, batch: 3000 // loss: 0.055 epoch: 3, batch: 3100 // loss: 0.054 epoch: 3, batch: 3200 // loss: 0.050 epoch: 3, batch: 3300 // loss: 0.049 epoch: 3, batch: 3400 // loss: 0.051 epoch: 3, batch: 3500 // loss: 0.047 epoch: 3, batch: 3600 // loss: 0.055 epoch: 3, batch: 3700 // loss: 0.056 epoch: 4, batch: 0 // loss: 0.064 epoch: 4, batch: 100 // loss: 0.056 epoch: 4, batch: 200 // loss: 0.057 epoch: 4, batch: 300 // loss: 0.058 epoch: 4, batch: 400 // loss: 0.056 epoch: 4, batch: 500 // loss: 0.048 epoch: 4, batch: 600 // loss: 0.050 epoch: 4, batch: 700 // loss: 0.054 epoch: 4, batch: 800 // loss: 0.052 epoch: 4, batch: 900 // loss: 0.059 epoch: 4, batch: 1000 // loss: 0.052 epoch: 4, batch: 1100 // loss: 0.052 epoch: 4, batch: 1200 // loss: 0.054 epoch: 4, batch: 1300 // loss: 0.055 epoch: 4, batch: 1400 // loss: 0.052 epoch: 4, batch: 1500 // loss: 0.057 epoch: 4, batch: 1600 // loss: 0.060 epoch: 4, batch: 1700 // loss: 0.053 epoch: 4, batch: 1800 // loss: 0.060 epoch: 4, batch: 1900 // loss: 0.053 epoch: 4, batch: 2000 // loss: 0.053 epoch: 4, batch: 2100 // loss: 0.054 epoch: 4, batch: 2200 // loss: 0.060 epoch: 4, batch: 2300 // loss: 0.056 epoch: 4, batch: 2400 // loss: 0.048 epoch: 4, batch: 2500 // loss: 0.048 epoch: 4, batch: 2600 // loss: 0.053 epoch: 4, batch: 2700 // loss: 0.050 epoch: 4, batch: 2800 // loss: 0.053 epoch: 4, batch: 2900 // loss: 0.047 epoch: 4, batch: 3000 // loss: 0.051 epoch: 4, batch: 3100 // loss: 0.050 epoch: 4, batch: 3200 // loss: 0.045 epoch: 4, batch: 3300 // loss: 0.045 epoch: 4, batch: 3400 // loss: 0.048 epoch: 4, batch: 3500 // loss: 0.042 epoch: 4, batch: 3600 // loss: 0.050 epoch: 4, batch: 3700 // loss: 0.051 epoch: 5, batch: 0 // loss: 0.059 epoch: 5, batch: 100 // loss: 0.052 epoch: 5, batch: 200 // loss: 0.052 epoch: 5, batch: 300 // loss: 0.054 epoch: 5, batch: 400 // loss: 0.052 epoch: 5, batch: 500 // loss: 0.045 epoch: 5, batch: 600 // loss: 0.046 epoch: 5, batch: 700 // loss: 0.050 epoch: 5, batch: 800 // loss: 0.048 epoch: 5, batch: 900 // loss: 0.055 epoch: 5, batch: 1000 // loss: 0.049 epoch: 5, batch: 1100 // loss: 0.049 epoch: 5, batch: 1200 // loss: 0.050 epoch: 5, batch: 1300 // loss: 0.051 epoch: 5, batch: 1400 // loss: 0.048 epoch: 5, batch: 1500 // loss: 0.054 epoch: 5, batch: 1600 // loss: 0.056 epoch: 5, batch: 1700 // loss: 0.050 epoch: 5, batch: 1800 // loss: 0.057 epoch: 5, batch: 1900 // loss: 0.050 epoch: 5, batch: 2000 // loss: 0.050 epoch: 5, batch: 2100 // loss: 0.051 epoch: 5, batch: 2200 // loss: 0.056 epoch: 5, batch: 2300 // loss: 0.053 epoch: 5, batch: 2400 // loss: 0.045 epoch: 5, batch: 2500 // loss: 0.045 epoch: 5, batch: 2600 // loss: 0.050 epoch: 5, batch: 2700 // loss: 0.047 epoch: 5, batch: 2800 // loss: 0.050 epoch: 5, batch: 2900 // loss: 0.044 epoch: 5, batch: 3000 // loss: 0.049 epoch: 5, batch: 3100 // loss: 0.047 epoch: 5, batch: 3200 // loss: 0.042 epoch: 5, batch: 3300 // loss: 0.042 epoch: 5, batch: 3400 // loss: 0.045 epoch: 5, batch: 3500 // loss: 0.039 epoch: 5, batch: 3600 // loss: 0.047 epoch: 5, batch: 3700 // loss: 0.048 epoch: 6, batch: 0 // loss: 0.056 epoch: 6, batch: 100 // loss: 0.049 epoch: 6, batch: 200 // loss: 0.049 epoch: 6, batch: 300 // loss: 0.052 epoch: 6, batch: 400 // loss: 0.049 epoch: 6, batch: 500 // loss: 0.043 epoch: 6, batch: 600 // loss: 0.044 epoch: 6, batch: 700 // loss: 0.048 epoch: 6, batch: 800 // loss: 0.045 epoch: 6, batch: 900 // loss: 0.052 epoch: 6, batch: 1000 // loss: 0.047 epoch: 6, batch: 1100 // loss: 0.046 epoch: 6, batch: 1200 // loss: 0.048 epoch: 6, batch: 1300 // loss: 0.048 epoch: 6, batch: 1400 // loss: 0.046 epoch: 6, batch: 1500 // loss: 0.052 epoch: 6, batch: 1600 // loss: 0.054 epoch: 6, batch: 1700 // loss: 0.048 epoch: 6, batch: 1800 // loss: 0.055 epoch: 6, batch: 1900 // loss: 0.047 epoch: 6, batch: 2000 // loss: 0.048 epoch: 6, batch: 2100 // loss: 0.049 epoch: 6, batch: 2200 // loss: 0.053 epoch: 6, batch: 2300 // loss: 0.051 epoch: 6, batch: 2400 // loss: 0.043 epoch: 6, batch: 2500 // loss: 0.043 epoch: 6, batch: 2600 // loss: 0.048 epoch: 6, batch: 2700 // loss: 0.046 epoch: 6, batch: 2800 // loss: 0.048 epoch: 6, batch: 2900 // loss: 0.043 epoch: 6, batch: 3000 // loss: 0.047 epoch: 6, batch: 3100 // loss: 0.045 epoch: 6, batch: 3200 // loss: 0.040 epoch: 6, batch: 3300 // loss: 0.040 epoch: 6, batch: 3400 // loss: 0.044 epoch: 6, batch: 3500 // loss: 0.037 epoch: 6, batch: 3600 // loss: 0.045 epoch: 6, batch: 3700 // loss: 0.045 epoch: 7, batch: 0 // loss: 0.054 epoch: 7, batch: 100 // loss: 0.047 epoch: 7, batch: 200 // loss: 0.046 epoch: 7, batch: 300 // loss: 0.050 epoch: 7, batch: 400 // loss: 0.047 epoch: 7, batch: 500 // loss: 0.041 epoch: 7, batch: 600 // loss: 0.042 epoch: 7, batch: 700 // loss: 0.046 epoch: 7, batch: 800 // loss: 0.043 epoch: 7, batch: 900 // loss: 0.050 epoch: 7, batch: 1000 // loss: 0.046 epoch: 7, batch: 1100 // loss: 0.044 epoch: 7, batch: 1200 // loss: 0.046 epoch: 7, batch: 1300 // loss: 0.046 epoch: 7, batch: 1400 // loss: 0.044 epoch: 7, batch: 1500 // loss: 0.050 epoch: 7, batch: 1600 // loss: 0.052 epoch: 7, batch: 1700 // loss: 0.046 epoch: 7, batch: 1800 // loss: 0.053 epoch: 7, batch: 1900 // loss: 0.046 epoch: 7, batch: 2000 // loss: 0.046 epoch: 7, batch: 2100 // loss: 0.047 epoch: 7, batch: 2200 // loss: 0.051 epoch: 7, batch: 2300 // loss: 0.049 epoch: 7, batch: 2400 // loss: 0.042 epoch: 7, batch: 2500 // loss: 0.042 epoch: 7, batch: 2600 // loss: 0.046 epoch: 7, batch: 2700 // loss: 0.044 epoch: 7, batch: 2800 // loss: 0.046 epoch: 7, batch: 2900 // loss: 0.041 epoch: 7, batch: 3000 // loss: 0.046 epoch: 7, batch: 3100 // loss: 0.044 epoch: 7, batch: 3200 // loss: 0.039 epoch: 7, batch: 3300 // loss: 0.038 epoch: 7, batch: 3400 // loss: 0.042 epoch: 7, batch: 3500 // loss: 0.035 epoch: 7, batch: 3600 // loss: 0.044 epoch: 7, batch: 3700 // loss: 0.044 epoch: 8, batch: 0 // loss: 0.052 epoch: 8, batch: 100 // loss: 0.046 epoch: 8, batch: 200 // loss: 0.045 epoch: 8, batch: 300 // loss: 0.048 epoch: 8, batch: 400 // loss: 0.045 epoch: 8, batch: 500 // loss: 0.040 epoch: 8, batch: 600 // loss: 0.041 epoch: 8, batch: 700 // loss: 0.045 epoch: 8, batch: 800 // loss: 0.041 epoch: 8, batch: 900 // loss: 0.049 epoch: 8, batch: 1000 // loss: 0.044 epoch: 8, batch: 1100 // loss: 0.043 epoch: 8, batch: 1200 // loss: 0.045 epoch: 8, batch: 1300 // loss: 0.044 epoch: 8, batch: 1400 // loss: 0.042 epoch: 8, batch: 1500 // loss: 0.048 epoch: 8, batch: 1600 // loss: 0.051 epoch: 8, batch: 1700 // loss: 0.045 epoch: 8, batch: 1800 // loss: 0.052 epoch: 8, batch: 1900 // loss: 0.044 epoch: 8, batch: 2000 // loss: 0.045 epoch: 8, batch: 2100 // loss: 0.046 epoch: 8, batch: 2200 // loss: 0.050 epoch: 8, batch: 2300 // loss: 0.048 epoch: 8, batch: 2400 // loss: 0.041 epoch: 8, batch: 2500 // loss: 0.041 epoch: 8, batch: 2600 // loss: 0.045 epoch: 8, batch: 2700 // loss: 0.043 epoch: 8, batch: 2800 // loss: 0.044 epoch: 8, batch: 2900 // loss: 0.040 epoch: 8, batch: 3000 // loss: 0.044 epoch: 8, batch: 3100 // loss: 0.042 epoch: 8, batch: 3200 // loss: 0.037 epoch: 8, batch: 3300 // loss: 0.037 epoch: 8, batch: 3400 // loss: 0.041 epoch: 8, batch: 3500 // loss: 0.034 epoch: 8, batch: 3600 // loss: 0.043 epoch: 8, batch: 3700 // loss: 0.042 epoch: 9, batch: 0 // loss: 0.051 epoch: 9, batch: 100 // loss: 0.045 epoch: 9, batch: 200 // loss: 0.043 epoch: 9, batch: 300 // loss: 0.047 epoch: 9, batch: 400 // loss: 0.044 epoch: 9, batch: 500 // loss: 0.039 epoch: 9, batch: 600 // loss: 0.039 epoch: 9, batch: 700 // loss: 0.044 epoch: 9, batch: 800 // loss: 0.040 epoch: 9, batch: 900 // loss: 0.048 epoch: 9, batch: 1000 // loss: 0.043 epoch: 9, batch: 1100 // loss: 0.042 epoch: 9, batch: 1200 // loss: 0.043 epoch: 9, batch: 1300 // loss: 0.043 epoch: 9, batch: 1400 // loss: 0.041 epoch: 9, batch: 1500 // loss: 0.047 epoch: 9, batch: 1600 // loss: 0.049 epoch: 9, batch: 1700 // loss: 0.044 epoch: 9, batch: 1800 // loss: 0.051 epoch: 9, batch: 1900 // loss: 0.043 epoch: 9, batch: 2000 // loss: 0.043 epoch: 9, batch: 2100 // loss: 0.045 epoch: 9, batch: 2200 // loss: 0.048 epoch: 9, batch: 2300 // loss: 0.047 epoch: 9, batch: 2400 // loss: 0.040 epoch: 9, batch: 2500 // loss: 0.040 epoch: 9, batch: 2600 // loss: 0.044 epoch: 9, batch: 2700 // loss: 0.042 epoch: 9, batch: 2800 // loss: 0.043 epoch: 9, batch: 2900 // loss: 0.039 epoch: 9, batch: 3000 // loss: 0.043 epoch: 9, batch: 3100 // loss: 0.042 epoch: 9, batch: 3200 // loss: 0.036 epoch: 9, batch: 3300 // loss: 0.036 epoch: 9, batch: 3400 // loss: 0.040 epoch: 9, batch: 3500 // loss: 0.033 epoch: 9, batch: 3600 // loss: 0.041 epoch: 9, batch: 3700 // loss: 0.041
Zs = auto3(X[:5000].float(), return_z=True).detach().numpy()
plt.scatter(Zs[:,0], Zs[:,1], c=c)
<matplotlib.collections.PathCollection at 0x7f8f8205c9e8>
X_tilde = auto3(X[:5000].float()).detach().numpy()
imshow(np.asarray(X_tilde[2]).reshape(28,28), cmap='gray')
<matplotlib.image.AxesImage at 0x7f8f22f93f60>
First, let's define the model appropriately. Consider: What do we need to change from above variants? Think about the training loop (below.)
class AE_regularized(nn.Module):
def __init__(self, input_size=784, hidden_size=2):
'''
In the initializer we setup model parameters/layers.
'''
super(AE_regularized, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
# input layer; from x -> z
self.i = nn.Linear(self.input_size, self.hidden_size)
self.a = nn.Sigmoid()
# output layer
self.o = nn.Linear(self.hidden_size, self.input_size)
def forward(self, X):
z = self.a(self.i(X))
# Now we always return z along with the output
return self.o(z), z
Now update the training loop to incorporate regularization. This will take a parameter lambda_
that encodes how much weight to put on the regularization penalty (vs typical/reconstruction loss).
Two hints:
(1) Consider that we want to incur a loss associated with our regularization (an l1 norm); where should we do that?
(2) See torch.norm
(https://pytorch.org/docs/stable/torch.html#torch.norm).
def train_regularized_AE(X_in, X_target, model, optimizer, loss_function, lambda_, EPOCHS=10):
for epoch in range(EPOCHS):
idx, batch_num = 0, 0
batch_size = 16
print("")
while idx < 60000:
# zero the parameter gradients
optimizer.zero_grad()
X_batch = X_in[idx: idx + batch_size].float()
X_target_batch = X_target[idx: idx + batch_size].float()
idx += batch_size
# now run our X's forward, get preds, incur
# loss, backprop, and step the optimizer.
X_tilde_batch, z = model(X_batch)
output_loss = loss_function(X_tilde_batch, X_target_batch)
# here is the regularization loss.
reg_loss = torch.norm(z, 1)
loss = output_loss + lambda_ * reg_loss
loss.backward()
optimizer.step()
# print out loss
if batch_num % 100 == 0:
print("epoch: {}, batch: {} // loss: {:.3f} // reg. loss (* \lambda): {:.3f}".format(
epoch, batch_num, output_loss.item(), lambda_ * reg_loss.item()))
batch_num += 1
AER = AE_regularized(hidden_size=16)
optimizer = optim.SGD(AER.parameters(), lr=0.01, momentum=0.9)
train_regularized_AE(X_corrupt, X, AER, optimizer, loss_function, 1)
epoch: 0, batch: 0 // loss: 0.311 // reg. loss (* \lambda): 125.579 epoch: 0, batch: 100 // loss: 0.239 // reg. loss (* \lambda): 0.000 epoch: 0, batch: 200 // loss: 0.207 // reg. loss (* \lambda): 0.000 epoch: 0, batch: 300 // loss: 0.192 // reg. loss (* \lambda): 0.000 epoch: 0, batch: 400 // loss: 0.196 // reg. loss (* \lambda): 0.000 epoch: 0, batch: 500 // loss: 0.191 // reg. loss (* \lambda): 0.000 epoch: 0, batch: 600 // loss: 0.189 // reg. loss (* \lambda): 0.000 epoch: 0, batch: 700 // loss: 0.195 // reg. loss (* \lambda): 0.000 epoch: 0, batch: 800 // loss: 0.162 // reg. loss (* \lambda): 0.005 epoch: 0, batch: 900 // loss: 0.198 // reg. loss (* \lambda): 0.184 epoch: 0, batch: 1000 // loss: 0.173 // reg. loss (* \lambda): 0.000 epoch: 0, batch: 1100 // loss: 0.196 // reg. loss (* \lambda): 0.000 epoch: 0, batch: 1200 // loss: 0.149 // reg. loss (* \lambda): 0.000 epoch: 0, batch: 1300 // loss: 0.179 // reg. loss (* \lambda): 0.000 epoch: 0, batch: 1400 // loss: 0.140 // reg. loss (* \lambda): 0.000 epoch: 0, batch: 1500 // loss: 0.140 // reg. loss (* \lambda): 0.000 epoch: 0, batch: 1600 // loss: 0.160 // reg. loss (* \lambda): 0.000 epoch: 0, batch: 1700 // loss: 0.144 // reg. loss (* \lambda): 0.006 epoch: 0, batch: 1800 // loss: 0.170 // reg. loss (* \lambda): 0.000 epoch: 0, batch: 1900 // loss: 0.144 // reg. loss (* \lambda): 0.001 epoch: 0, batch: 2000 // loss: 0.122 // reg. loss (* \lambda): 0.000 epoch: 0, batch: 2100 // loss: 0.125 // reg. loss (* \lambda): 0.000 epoch: 0, batch: 2200 // loss: 0.155 // reg. loss (* \lambda): 0.001 epoch: 0, batch: 2300 // loss: 0.132 // reg. loss (* \lambda): 0.000 epoch: 0, batch: 2400 // loss: 0.099 // reg. loss (* \lambda): 0.000 epoch: 0, batch: 2500 // loss: 0.106 // reg. loss (* \lambda): 0.000 epoch: 0, batch: 2600 // loss: 0.140 // reg. loss (* \lambda): 0.000 epoch: 0, batch: 2700 // loss: 0.103 // reg. loss (* \lambda): 0.000 epoch: 0, batch: 2800 // loss: 0.139 // reg. loss (* \lambda): 0.000 epoch: 0, batch: 2900 // loss: 0.094 // reg. loss (* \lambda): 0.000 epoch: 0, batch: 3000 // loss: 0.106 // reg. loss (* \lambda): 0.000 epoch: 0, batch: 3100 // loss: 0.127 // reg. loss (* \lambda): 0.000 epoch: 0, batch: 3200 // loss: 0.090 // reg. loss (* \lambda): 0.002 epoch: 0, batch: 3300 // loss: 0.103 // reg. loss (* \lambda): 0.000 epoch: 0, batch: 3400 // loss: 0.101 // reg. loss (* \lambda): 0.000 epoch: 0, batch: 3500 // loss: 0.108 // reg. loss (* \lambda): 0.000 epoch: 0, batch: 3600 // loss: 0.111 // reg. loss (* \lambda): 0.000 epoch: 0, batch: 3700 // loss: 0.122 // reg. loss (* \lambda): 0.000 epoch: 1, batch: 0 // loss: 0.115 // reg. loss (* \lambda): 0.000 epoch: 1, batch: 100 // loss: 0.110 // reg. loss (* \lambda): 0.000 epoch: 1, batch: 200 // loss: 0.108 // reg. loss (* \lambda): 0.000 epoch: 1, batch: 300 // loss: 0.097 // reg. loss (* \lambda): 0.000 epoch: 1, batch: 400 // loss: 0.101 // reg. loss (* \lambda): 0.000 epoch: 1, batch: 500 // loss: 0.095 // reg. loss (* \lambda): 0.000 epoch: 1, batch: 600 // loss: 0.097 // reg. loss (* \lambda): 0.000 epoch: 1, batch: 700 // loss: 0.102 // reg. loss (* \lambda): 0.000 epoch: 1, batch: 800 // loss: 0.092 // reg. loss (* \lambda): 0.004 epoch: 1, batch: 900 // loss: 0.113 // reg. loss (* \lambda): 0.161 epoch: 1, batch: 1000 // loss: 0.093 // reg. loss (* \lambda): 0.000 epoch: 1, batch: 1100 // loss: 0.110 // reg. loss (* \lambda): 0.000 epoch: 1, batch: 1200 // loss: 0.089 // reg. loss (* \lambda): 0.000 epoch: 1, batch: 1300 // loss: 0.107 // reg. loss (* \lambda): 0.000 epoch: 1, batch: 1400 // loss: 0.088 // reg. loss (* \lambda): 0.000 epoch: 1, batch: 1500 // loss: 0.088 // reg. loss (* \lambda): 0.000 epoch: 1, batch: 1600 // loss: 0.101 // reg. loss (* \lambda): 0.000 epoch: 1, batch: 1700 // loss: 0.097 // reg. loss (* \lambda): 0.005 epoch: 1, batch: 1800 // loss: 0.107 // reg. loss (* \lambda): 0.000 epoch: 1, batch: 1900 // loss: 0.095 // reg. loss (* \lambda): 0.000 epoch: 1, batch: 2000 // loss: 0.083 // reg. loss (* \lambda): 0.000 epoch: 1, batch: 2100 // loss: 0.091 // reg. loss (* \lambda): 0.000 epoch: 1, batch: 2200 // loss: 0.108 // reg. loss (* \lambda): 0.001 epoch: 1, batch: 2300 // loss: 0.096 // reg. loss (* \lambda): 0.000 epoch: 1, batch: 2400 // loss: 0.078 // reg. loss (* \lambda): 0.000 epoch: 1, batch: 2500 // loss: 0.078 // reg. loss (* \lambda): 0.000 epoch: 1, batch: 2600 // loss: 0.102 // reg. loss (* \lambda): 0.000 epoch: 1, batch: 2700 // loss: 0.079 // reg. loss (* \lambda): 0.000 epoch: 1, batch: 2800 // loss: 0.105 // reg. loss (* \lambda): 0.000 epoch: 1, batch: 2900 // loss: 0.075 // reg. loss (* \lambda): 0.000 epoch: 1, batch: 3000 // loss: 0.084 // reg. loss (* \lambda): 0.000 epoch: 1, batch: 3100 // loss: 0.096 // reg. loss (* \lambda): 0.000 epoch: 1, batch: 3200 // loss: 0.077 // reg. loss (* \lambda): 0.001 epoch: 1, batch: 3300 // loss: 0.082 // reg. loss (* \lambda): 0.000 epoch: 1, batch: 3400 // loss: 0.079 // reg. loss (* \lambda): 0.000 epoch: 1, batch: 3500 // loss: 0.086 // reg. loss (* \lambda): 0.000 epoch: 1, batch: 3600 // loss: 0.093 // reg. loss (* \lambda): 0.000 epoch: 1, batch: 3700 // loss: 0.099 // reg. loss (* \lambda): 0.000 epoch: 2, batch: 0 // loss: 0.100 // reg. loss (* \lambda): 0.000 epoch: 2, batch: 100 // loss: 0.089 // reg. loss (* \lambda): 0.000 epoch: 2, batch: 200 // loss: 0.096 // reg. loss (* \lambda): 0.000 epoch: 2, batch: 300 // loss: 0.086 // reg. loss (* \lambda): 0.000 epoch: 2, batch: 400 // loss: 0.089 // reg. loss (* \lambda): 0.000 epoch: 2, batch: 500 // loss: 0.081 // reg. loss (* \lambda): 0.000 epoch: 2, batch: 600 // loss: 0.083 // reg. loss (* \lambda): 0.000 epoch: 2, batch: 700 // loss: 0.087 // reg. loss (* \lambda): 0.000 epoch: 2, batch: 800 // loss: 0.085 // reg. loss (* \lambda): 0.003 epoch: 2, batch: 900 // loss: 0.099 // reg. loss (* \lambda): 0.144 epoch: 2, batch: 1000 // loss: 0.081 // reg. loss (* \lambda): 0.000 epoch: 2, batch: 1100 // loss: 0.094 // reg. loss (* \lambda): 0.000 epoch: 2, batch: 1200 // loss: 0.083 // reg. loss (* \lambda): 0.000 epoch: 2, batch: 1300 // loss: 0.095 // reg. loss (* \lambda): 0.000 epoch: 2, batch: 1400 // loss: 0.082 // reg. loss (* \lambda): 0.000 epoch: 2, batch: 1500 // loss: 0.082 // reg. loss (* \lambda): 0.000 epoch: 2, batch: 1600 // loss: 0.092 // reg. loss (* \lambda): 0.000 epoch: 2, batch: 1700 // loss: 0.092 // reg. loss (* \lambda): 0.004 epoch: 2, batch: 1800 // loss: 0.095 // reg. loss (* \lambda): 0.000 epoch: 2, batch: 1900 // loss: 0.087 // reg. loss (* \lambda): 0.000 epoch: 2, batch: 2000 // loss: 0.079 // reg. loss (* \lambda): 0.000 epoch: 2, batch: 2100 // loss: 0.088 // reg. loss (* \lambda): 0.000 epoch: 2, batch: 2200 // loss: 0.100 // reg. loss (* \lambda): 0.000 epoch: 2, batch: 2300 // loss: 0.091 // reg. loss (* \lambda): 0.000 epoch: 2, batch: 2400 // loss: 0.079 // reg. loss (* \lambda): 0.000 epoch: 2, batch: 2500 // loss: 0.076 // reg. loss (* \lambda): 0.000 epoch: 2, batch: 2600 // loss: 0.095 // reg. loss (* \lambda): 0.000 epoch: 2, batch: 2700 // loss: 0.077 // reg. loss (* \lambda): 0.000 epoch: 2, batch: 2800 // loss: 0.098 // reg. loss (* \lambda): 0.000 epoch: 2, batch: 2900 // loss: 0.075 // reg. loss (* \lambda): 0.000 epoch: 2, batch: 3000 // loss: 0.082 // reg. loss (* \lambda): 0.000 epoch: 2, batch: 3100 // loss: 0.090 // reg. loss (* \lambda): 0.000 epoch: 2, batch: 3200 // loss: 0.078 // reg. loss (* \lambda): 0.001 epoch: 2, batch: 3300 // loss: 0.080 // reg. loss (* \lambda): 0.000 epoch: 2, batch: 3400 // loss: 0.076 // reg. loss (* \lambda): 0.000 epoch: 2, batch: 3500 // loss: 0.083 // reg. loss (* \lambda): 0.000 epoch: 2, batch: 3600 // loss: 0.091 // reg. loss (* \lambda): 0.000 epoch: 2, batch: 3700 // loss: 0.094 // reg. loss (* \lambda): 0.000 epoch: 3, batch: 0 // loss: 0.098 // reg. loss (* \lambda): 0.000 epoch: 3, batch: 100 // loss: 0.085 // reg. loss (* \lambda): 0.000 epoch: 3, batch: 200 // loss: 0.095 // reg. loss (* \lambda): 0.000 epoch: 3, batch: 300 // loss: 0.086 // reg. loss (* \lambda): 0.000 epoch: 3, batch: 400 // loss: 0.087 // reg. loss (* \lambda): 0.000 epoch: 3, batch: 500 // loss: 0.079 // reg. loss (* \lambda): 0.000 epoch: 3, batch: 600 // loss: 0.081 // reg. loss (* \lambda): 0.000 epoch: 3, batch: 700 // loss: 0.085 // reg. loss (* \lambda): 0.000 epoch: 3, batch: 800 // loss: 0.085 // reg. loss (* \lambda): 0.003 epoch: 3, batch: 900 // loss: 0.097 // reg. loss (* \lambda): 0.130 epoch: 3, batch: 1000 // loss: 0.078 // reg. loss (* \lambda): 0.000 epoch: 3, batch: 1100 // loss: 0.091 // reg. loss (* \lambda): 0.000 epoch: 3, batch: 1200 // loss: 0.082 // reg. loss (* \lambda): 0.000 epoch: 3, batch: 1300 // loss: 0.093 // reg. loss (* \lambda): 0.000 epoch: 3, batch: 1400 // loss: 0.082 // reg. loss (* \lambda): 0.000 epoch: 3, batch: 1500 // loss: 0.082 // reg. loss (* \lambda): 0.000 epoch: 3, batch: 1600 // loss: 0.090 // reg. loss (* \lambda): 0.000 epoch: 3, batch: 1700 // loss: 0.092 // reg. loss (* \lambda): 0.004 epoch: 3, batch: 1800 // loss: 0.092 // reg. loss (* \lambda): 0.000 epoch: 3, batch: 1900 // loss: 0.086 // reg. loss (* \lambda): 0.000 epoch: 3, batch: 2000 // loss: 0.079 // reg. loss (* \lambda): 0.000 epoch: 3, batch: 2100 // loss: 0.088 // reg. loss (* \lambda): 0.000 epoch: 3, batch: 2200 // loss: 0.098 // reg. loss (* \lambda): 0.000 epoch: 3, batch: 2300 // loss: 0.090 // reg. loss (* \lambda): 0.000 epoch: 3, batch: 2400 // loss: 0.081 // reg. loss (* \lambda): 0.000 epoch: 3, batch: 2500 // loss: 0.076 // reg. loss (* \lambda): 0.000 epoch: 3, batch: 2600 // loss: 0.093 // reg. loss (* \lambda): 0.000 epoch: 3, batch: 2700 // loss: 0.077 // reg. loss (* \lambda): 0.000 epoch: 3, batch: 2800 // loss: 0.097 // reg. loss (* \lambda): 0.000 epoch: 3, batch: 2900 // loss: 0.076 // reg. loss (* \lambda): 0.000 epoch: 3, batch: 3000 // loss: 0.082 // reg. loss (* \lambda): 0.000 epoch: 3, batch: 3100 // loss: 0.088 // reg. loss (* \lambda): 0.000 epoch: 3, batch: 3200 // loss: 0.079 // reg. loss (* \lambda): 0.001 epoch: 3, batch: 3300 // loss: 0.080 // reg. loss (* \lambda): 0.000 epoch: 3, batch: 3400 // loss: 0.076 // reg. loss (* \lambda): 0.000 epoch: 3, batch: 3500 // loss: 0.083 // reg. loss (* \lambda): 0.000 epoch: 3, batch: 3600 // loss: 0.090 // reg. loss (* \lambda): 0.000 epoch: 3, batch: 3700 // loss: 0.093 // reg. loss (* \lambda): 0.000 epoch: 4, batch: 0 // loss: 0.099 // reg. loss (* \lambda): 0.000 epoch: 4, batch: 100 // loss: 0.084 // reg. loss (* \lambda): 0.000 epoch: 4, batch: 200 // loss: 0.096 // reg. loss (* \lambda): 0.000 epoch: 4, batch: 300 // loss: 0.086 // reg. loss (* \lambda): 0.000 epoch: 4, batch: 400 // loss: 0.088 // reg. loss (* \lambda): 0.000 epoch: 4, batch: 500 // loss: 0.078 // reg. loss (* \lambda): 0.000 epoch: 4, batch: 600 // loss: 0.081 // reg. loss (* \lambda): 0.000 epoch: 4, batch: 700 // loss: 0.084 // reg. loss (* \lambda): 0.000 epoch: 4, batch: 800 // loss: 0.085 // reg. loss (* \lambda): 0.002 epoch: 4, batch: 900 // loss: 0.096 // reg. loss (* \lambda): 0.119 epoch: 4, batch: 1000 // loss: 0.078 // reg. loss (* \lambda): 0.000 epoch: 4, batch: 1100 // loss: 0.090 // reg. loss (* \lambda): 0.000 epoch: 4, batch: 1200 // loss: 0.083 // reg. loss (* \lambda): 0.000 epoch: 4, batch: 1300 // loss: 0.092 // reg. loss (* \lambda): 0.000 epoch: 4, batch: 1400 // loss: 0.082 // reg. loss (* \lambda): 0.000 epoch: 4, batch: 1500 // loss: 0.082 // reg. loss (* \lambda): 0.000 epoch: 4, batch: 1600 // loss: 0.090 // reg. loss (* \lambda): 0.000 epoch: 4, batch: 1700 // loss: 0.092 // reg. loss (* \lambda): 0.003 epoch: 4, batch: 1800 // loss: 0.091 // reg. loss (* \lambda): 0.000 epoch: 4, batch: 1900 // loss: 0.086 // reg. loss (* \lambda): 0.000 epoch: 4, batch: 2000 // loss: 0.079 // reg. loss (* \lambda): 0.000 epoch: 4, batch: 2100 // loss: 0.088 // reg. loss (* \lambda): 0.000 epoch: 4, batch: 2200 // loss: 0.098 // reg. loss (* \lambda): 0.000 epoch: 4, batch: 2300 // loss: 0.090 // reg. loss (* \lambda): 0.000 epoch: 4, batch: 2400 // loss: 0.081 // reg. loss (* \lambda): 0.000 epoch: 4, batch: 2500 // loss: 0.076 // reg. loss (* \lambda): 0.000 epoch: 4, batch: 2600 // loss: 0.093 // reg. loss (* \lambda): 0.000 epoch: 4, batch: 2700 // loss: 0.077 // reg. loss (* \lambda): 0.000 epoch: 4, batch: 2800 // loss: 0.096 // reg. loss (* \lambda): 0.000 epoch: 4, batch: 2900 // loss: 0.076 // reg. loss (* \lambda): 0.000 epoch: 4, batch: 3000 // loss: 0.083 // reg. loss (* \lambda): 0.000 epoch: 4, batch: 3100 // loss: 0.088 // reg. loss (* \lambda): 0.000 epoch: 4, batch: 3200 // loss: 0.080 // reg. loss (* \lambda): 0.001 epoch: 4, batch: 3300 // loss: 0.080 // reg. loss (* \lambda): 0.000 epoch: 4, batch: 3400 // loss: 0.076 // reg. loss (* \lambda): 0.000 epoch: 4, batch: 3500 // loss: 0.082 // reg. loss (* \lambda): 0.000 epoch: 4, batch: 3600 // loss: 0.090 // reg. loss (* \lambda): 0.000 epoch: 4, batch: 3700 // loss: 0.093 // reg. loss (* \lambda): 0.000 epoch: 5, batch: 0 // loss: 0.099 // reg. loss (* \lambda): 0.000 epoch: 5, batch: 100 // loss: 0.084 // reg. loss (* \lambda): 0.000 epoch: 5, batch: 200 // loss: 0.096 // reg. loss (* \lambda): 0.000 epoch: 5, batch: 300 // loss: 0.086 // reg. loss (* \lambda): 0.000 epoch: 5, batch: 400 // loss: 0.088 // reg. loss (* \lambda): 0.000 epoch: 5, batch: 500 // loss: 0.078 // reg. loss (* \lambda): 0.000 epoch: 5, batch: 600 // loss: 0.081 // reg. loss (* \lambda): 0.000 epoch: 5, batch: 700 // loss: 0.084 // reg. loss (* \lambda): 0.000 epoch: 5, batch: 800 // loss: 0.086 // reg. loss (* \lambda): 0.002 epoch: 5, batch: 900 // loss: 0.096 // reg. loss (* \lambda): 0.110 epoch: 5, batch: 1000 // loss: 0.078 // reg. loss (* \lambda): 0.000 epoch: 5, batch: 1100 // loss: 0.089 // reg. loss (* \lambda): 0.000 epoch: 5, batch: 1200 // loss: 0.083 // reg. loss (* \lambda): 0.000 epoch: 5, batch: 1300 // loss: 0.092 // reg. loss (* \lambda): 0.000 epoch: 5, batch: 1400 // loss: 0.083 // reg. loss (* \lambda): 0.000 epoch: 5, batch: 1500 // loss: 0.082 // reg. loss (* \lambda): 0.000 epoch: 5, batch: 1600 // loss: 0.090 // reg. loss (* \lambda): 0.000 epoch: 5, batch: 1700 // loss: 0.092 // reg. loss (* \lambda): 0.003 epoch: 5, batch: 1800 // loss: 0.091 // reg. loss (* \lambda): 0.000 epoch: 5, batch: 1900 // loss: 0.086 // reg. loss (* \lambda): 0.000 epoch: 5, batch: 2000 // loss: 0.079 // reg. loss (* \lambda): 0.000 epoch: 5, batch: 2100 // loss: 0.088 // reg. loss (* \lambda): 0.000 epoch: 5, batch: 2200 // loss: 0.098 // reg. loss (* \lambda): 0.000 epoch: 5, batch: 2300 // loss: 0.090 // reg. loss (* \lambda): 0.000 epoch: 5, batch: 2400 // loss: 0.082 // reg. loss (* \lambda): 0.000 epoch: 5, batch: 2500 // loss: 0.076 // reg. loss (* \lambda): 0.000 epoch: 5, batch: 2600 // loss: 0.093 // reg. loss (* \lambda): 0.000 epoch: 5, batch: 2700 // loss: 0.077 // reg. loss (* \lambda): 0.000 epoch: 5, batch: 2800 // loss: 0.096 // reg. loss (* \lambda): 0.000 epoch: 5, batch: 2900 // loss: 0.076 // reg. loss (* \lambda): 0.000 epoch: 5, batch: 3000 // loss: 0.083 // reg. loss (* \lambda): 0.000 epoch: 5, batch: 3100 // loss: 0.088 // reg. loss (* \lambda): 0.000 epoch: 5, batch: 3200 // loss: 0.080 // reg. loss (* \lambda): 0.001 epoch: 5, batch: 3300 // loss: 0.080 // reg. loss (* \lambda): 0.000 epoch: 5, batch: 3400 // loss: 0.076 // reg. loss (* \lambda): 0.000 epoch: 5, batch: 3500 // loss: 0.082 // reg. loss (* \lambda): 0.000 epoch: 5, batch: 3600 // loss: 0.090 // reg. loss (* \lambda): 0.000 epoch: 5, batch: 3700 // loss: 0.092 // reg. loss (* \lambda): 0.000 epoch: 6, batch: 0 // loss: 0.099 // reg. loss (* \lambda): 0.000 epoch: 6, batch: 100 // loss: 0.084 // reg. loss (* \lambda): 0.000 epoch: 6, batch: 200 // loss: 0.096 // reg. loss (* \lambda): 0.000 epoch: 6, batch: 300 // loss: 0.086 // reg. loss (* \lambda): 0.000 epoch: 6, batch: 400 // loss: 0.088 // reg. loss (* \lambda): 0.000 epoch: 6, batch: 500 // loss: 0.078 // reg. loss (* \lambda): 0.000 epoch: 6, batch: 600 // loss: 0.081 // reg. loss (* \lambda): 0.000 epoch: 6, batch: 700 // loss: 0.084 // reg. loss (* \lambda): 0.000 epoch: 6, batch: 800 // loss: 0.086 // reg. loss (* \lambda): 0.002 epoch: 6, batch: 900 // loss: 0.096 // reg. loss (* \lambda): 0.102 epoch: 6, batch: 1000 // loss: 0.077 // reg. loss (* \lambda): 0.000 epoch: 6, batch: 1100 // loss: 0.089 // reg. loss (* \lambda): 0.000 epoch: 6, batch: 1200 // loss: 0.083 // reg. loss (* \lambda): 0.000 epoch: 6, batch: 1300 // loss: 0.092 // reg. loss (* \lambda): 0.000 epoch: 6, batch: 1400 // loss: 0.083 // reg. loss (* \lambda): 0.000 epoch: 6, batch: 1500 // loss: 0.082 // reg. loss (* \lambda): 0.000 epoch: 6, batch: 1600 // loss: 0.090 // reg. loss (* \lambda): 0.000 epoch: 6, batch: 1700 // loss: 0.092 // reg. loss (* \lambda): 0.003 epoch: 6, batch: 1800 // loss: 0.091 // reg. loss (* \lambda): 0.000 epoch: 6, batch: 1900 // loss: 0.086 // reg. loss (* \lambda): 0.000 epoch: 6, batch: 2000 // loss: 0.079 // reg. loss (* \lambda): 0.000 epoch: 6, batch: 2100 // loss: 0.089 // reg. loss (* \lambda): 0.000 epoch: 6, batch: 2200 // loss: 0.098 // reg. loss (* \lambda): 0.000 epoch: 6, batch: 2300 // loss: 0.090 // reg. loss (* \lambda): 0.000 epoch: 6, batch: 2400 // loss: 0.082 // reg. loss (* \lambda): 0.000 epoch: 6, batch: 2500 // loss: 0.077 // reg. loss (* \lambda): 0.000 epoch: 6, batch: 2600 // loss: 0.093 // reg. loss (* \lambda): 0.000 epoch: 6, batch: 2700 // loss: 0.077 // reg. loss (* \lambda): 0.000 epoch: 6, batch: 2800 // loss: 0.096 // reg. loss (* \lambda): 0.000 epoch: 6, batch: 2900 // loss: 0.076 // reg. loss (* \lambda): 0.000 epoch: 6, batch: 3000 // loss: 0.083 // reg. loss (* \lambda): 0.000 epoch: 6, batch: 3100 // loss: 0.088 // reg. loss (* \lambda): 0.000 epoch: 6, batch: 3200 // loss: 0.080 // reg. loss (* \lambda): 0.001 epoch: 6, batch: 3300 // loss: 0.080 // reg. loss (* \lambda): 0.000 epoch: 6, batch: 3400 // loss: 0.076 // reg. loss (* \lambda): 0.000 epoch: 6, batch: 3500 // loss: 0.082 // reg. loss (* \lambda): 0.000 epoch: 6, batch: 3600 // loss: 0.090 // reg. loss (* \lambda): 0.000 epoch: 6, batch: 3700 // loss: 0.092 // reg. loss (* \lambda): 0.000 epoch: 7, batch: 0 // loss: 0.099 // reg. loss (* \lambda): 0.000 epoch: 7, batch: 100 // loss: 0.084 // reg. loss (* \lambda): 0.000 epoch: 7, batch: 200 // loss: 0.096 // reg. loss (* \lambda): 0.000 epoch: 7, batch: 300 // loss: 0.087 // reg. loss (* \lambda): 0.000 epoch: 7, batch: 400 // loss: 0.088 // reg. loss (* \lambda): 0.000 epoch: 7, batch: 500 // loss: 0.078 // reg. loss (* \lambda): 0.000 epoch: 7, batch: 600 // loss: 0.081 // reg. loss (* \lambda): 0.000 epoch: 7, batch: 700 // loss: 0.084 // reg. loss (* \lambda): 0.000 epoch: 7, batch: 800 // loss: 0.086 // reg. loss (* \lambda): 0.001 epoch: 7, batch: 900 // loss: 0.096 // reg. loss (* \lambda): 0.096 epoch: 7, batch: 1000 // loss: 0.077 // reg. loss (* \lambda): 0.000 epoch: 7, batch: 1100 // loss: 0.089 // reg. loss (* \lambda): 0.000 epoch: 7, batch: 1200 // loss: 0.083 // reg. loss (* \lambda): 0.000 epoch: 7, batch: 1300 // loss: 0.092 // reg. loss (* \lambda): 0.000 epoch: 7, batch: 1400 // loss: 0.083 // reg. loss (* \lambda): 0.000 epoch: 7, batch: 1500 // loss: 0.082 // reg. loss (* \lambda): 0.000 epoch: 7, batch: 1600 // loss: 0.090 // reg. loss (* \lambda): 0.000 epoch: 7, batch: 1700 // loss: 0.092 // reg. loss (* \lambda): 0.003 epoch: 7, batch: 1800 // loss: 0.091 // reg. loss (* \lambda): 0.000 epoch: 7, batch: 1900 // loss: 0.086 // reg. loss (* \lambda): 0.000 epoch: 7, batch: 2000 // loss: 0.079 // reg. loss (* \lambda): 0.000 epoch: 7, batch: 2100 // loss: 0.089 // reg. loss (* \lambda): 0.000 epoch: 7, batch: 2200 // loss: 0.098 // reg. loss (* \lambda): 0.000 epoch: 7, batch: 2300 // loss: 0.090 // reg. loss (* \lambda): 0.000 epoch: 7, batch: 2400 // loss: 0.082 // reg. loss (* \lambda): 0.000 epoch: 7, batch: 2500 // loss: 0.077 // reg. loss (* \lambda): 0.000 epoch: 7, batch: 2600 // loss: 0.093 // reg. loss (* \lambda): 0.000 epoch: 7, batch: 2700 // loss: 0.077 // reg. loss (* \lambda): 0.000 epoch: 7, batch: 2800 // loss: 0.096 // reg. loss (* \lambda): 0.000 epoch: 7, batch: 2900 // loss: 0.076 // reg. loss (* \lambda): 0.000 epoch: 7, batch: 3000 // loss: 0.083 // reg. loss (* \lambda): 0.000 epoch: 7, batch: 3100 // loss: 0.088 // reg. loss (* \lambda): 0.000 epoch: 7, batch: 3200 // loss: 0.080 // reg. loss (* \lambda): 0.001 epoch: 7, batch: 3300 // loss: 0.080 // reg. loss (* \lambda): 0.000 epoch: 7, batch: 3400 // loss: 0.076 // reg. loss (* \lambda): 0.000 epoch: 7, batch: 3500 // loss: 0.082 // reg. loss (* \lambda): 0.000 epoch: 7, batch: 3600 // loss: 0.090 // reg. loss (* \lambda): 0.000 epoch: 7, batch: 3700 // loss: 0.092 // reg. loss (* \lambda): 0.000 epoch: 8, batch: 0 // loss: 0.099 // reg. loss (* \lambda): 0.000 epoch: 8, batch: 100 // loss: 0.084 // reg. loss (* \lambda): 0.000 epoch: 8, batch: 200 // loss: 0.096 // reg. loss (* \lambda): 0.000 epoch: 8, batch: 300 // loss: 0.087 // reg. loss (* \lambda): 0.000 epoch: 8, batch: 400 // loss: 0.088 // reg. loss (* \lambda): 0.000 epoch: 8, batch: 500 // loss: 0.078 // reg. loss (* \lambda): 0.000 epoch: 8, batch: 600 // loss: 0.081 // reg. loss (* \lambda): 0.000 epoch: 8, batch: 700 // loss: 0.084 // reg. loss (* \lambda): 0.000 epoch: 8, batch: 800 // loss: 0.086 // reg. loss (* \lambda): 0.001 epoch: 8, batch: 900 // loss: 0.096 // reg. loss (* \lambda): 0.090 epoch: 8, batch: 1000 // loss: 0.077 // reg. loss (* \lambda): 0.000 epoch: 8, batch: 1100 // loss: 0.089 // reg. loss (* \lambda): 0.000 epoch: 8, batch: 1200 // loss: 0.083 // reg. loss (* \lambda): 0.000 epoch: 8, batch: 1300 // loss: 0.092 // reg. loss (* \lambda): 0.000 epoch: 8, batch: 1400 // loss: 0.083 // reg. loss (* \lambda): 0.000 epoch: 8, batch: 1500 // loss: 0.082 // reg. loss (* \lambda): 0.000 epoch: 8, batch: 1600 // loss: 0.090 // reg. loss (* \lambda): 0.000 epoch: 8, batch: 1700 // loss: 0.092 // reg. loss (* \lambda): 0.002 epoch: 8, batch: 1800 // loss: 0.090 // reg. loss (* \lambda): 0.000 epoch: 8, batch: 1900 // loss: 0.086 // reg. loss (* \lambda): 0.000 epoch: 8, batch: 2000 // loss: 0.079 // reg. loss (* \lambda): 0.000 epoch: 8, batch: 2100 // loss: 0.089 // reg. loss (* \lambda): 0.000 epoch: 8, batch: 2200 // loss: 0.098 // reg. loss (* \lambda): 0.000 epoch: 8, batch: 2300 // loss: 0.090 // reg. loss (* \lambda): 0.000 epoch: 8, batch: 2400 // loss: 0.082 // reg. loss (* \lambda): 0.000 epoch: 8, batch: 2500 // loss: 0.077 // reg. loss (* \lambda): 0.000 epoch: 8, batch: 2600 // loss: 0.093 // reg. loss (* \lambda): 0.000 epoch: 8, batch: 2700 // loss: 0.077 // reg. loss (* \lambda): 0.000 epoch: 8, batch: 2800 // loss: 0.096 // reg. loss (* \lambda): 0.000 epoch: 8, batch: 2900 // loss: 0.076 // reg. loss (* \lambda): 0.000 epoch: 8, batch: 3000 // loss: 0.083 // reg. loss (* \lambda): 0.000 epoch: 8, batch: 3100 // loss: 0.088 // reg. loss (* \lambda): 0.000 epoch: 8, batch: 3200 // loss: 0.080 // reg. loss (* \lambda): 0.001 epoch: 8, batch: 3300 // loss: 0.080 // reg. loss (* \lambda): 0.000 epoch: 8, batch: 3400 // loss: 0.076 // reg. loss (* \lambda): 0.000 epoch: 8, batch: 3500 // loss: 0.082 // reg. loss (* \lambda): 0.000 epoch: 8, batch: 3600 // loss: 0.090 // reg. loss (* \lambda): 0.000 epoch: 8, batch: 3700 // loss: 0.092 // reg. loss (* \lambda): 0.000 epoch: 9, batch: 0 // loss: 0.099 // reg. loss (* \lambda): 0.000 epoch: 9, batch: 100 // loss: 0.084 // reg. loss (* \lambda): 0.000 epoch: 9, batch: 200 // loss: 0.096 // reg. loss (* \lambda): 0.000 epoch: 9, batch: 300 // loss: 0.087 // reg. loss (* \lambda): 0.000 epoch: 9, batch: 400 // loss: 0.088 // reg. loss (* \lambda): 0.000 epoch: 9, batch: 500 // loss: 0.078 // reg. loss (* \lambda): 0.000 epoch: 9, batch: 600 // loss: 0.081 // reg. loss (* \lambda): 0.000 epoch: 9, batch: 700 // loss: 0.084 // reg. loss (* \lambda): 0.000 epoch: 9, batch: 800 // loss: 0.086 // reg. loss (* \lambda): 0.001 epoch: 9, batch: 900 // loss: 0.096 // reg. loss (* \lambda): 0.085 epoch: 9, batch: 1000 // loss: 0.077 // reg. loss (* \lambda): 0.000 epoch: 9, batch: 1100 // loss: 0.089 // reg. loss (* \lambda): 0.000 epoch: 9, batch: 1200 // loss: 0.083 // reg. loss (* \lambda): 0.000 epoch: 9, batch: 1300 // loss: 0.092 // reg. loss (* \lambda): 0.000 epoch: 9, batch: 1400 // loss: 0.083 // reg. loss (* \lambda): 0.000 epoch: 9, batch: 1500 // loss: 0.082 // reg. loss (* \lambda): 0.000 epoch: 9, batch: 1600 // loss: 0.090 // reg. loss (* \lambda): 0.000 epoch: 9, batch: 1700 // loss: 0.092 // reg. loss (* \lambda): 0.002 epoch: 9, batch: 1800 // loss: 0.090 // reg. loss (* \lambda): 0.000 epoch: 9, batch: 1900 // loss: 0.086 // reg. loss (* \lambda): 0.000 epoch: 9, batch: 2000 // loss: 0.079 // reg. loss (* \lambda): 0.000 epoch: 9, batch: 2100 // loss: 0.089 // reg. loss (* \lambda): 0.000 epoch: 9, batch: 2200 // loss: 0.098 // reg. loss (* \lambda): 0.000 epoch: 9, batch: 2300 // loss: 0.090 // reg. loss (* \lambda): 0.000 epoch: 9, batch: 2400 // loss: 0.082 // reg. loss (* \lambda): 0.000 epoch: 9, batch: 2500 // loss: 0.077 // reg. loss (* \lambda): 0.000 epoch: 9, batch: 2600 // loss: 0.093 // reg. loss (* \lambda): 0.000 epoch: 9, batch: 2700 // loss: 0.077 // reg. loss (* \lambda): 0.000 epoch: 9, batch: 2800 // loss: 0.096 // reg. loss (* \lambda): 0.000 epoch: 9, batch: 2900 // loss: 0.076 // reg. loss (* \lambda): 0.000 epoch: 9, batch: 3000 // loss: 0.083 // reg. loss (* \lambda): 0.000 epoch: 9, batch: 3100 // loss: 0.088 // reg. loss (* \lambda): 0.000 epoch: 9, batch: 3200 // loss: 0.080 // reg. loss (* \lambda): 0.001 epoch: 9, batch: 3300 // loss: 0.080 // reg. loss (* \lambda): 0.000 epoch: 9, batch: 3400 // loss: 0.076 // reg. loss (* \lambda): 0.000 epoch: 9, batch: 3500 // loss: 0.082 // reg. loss (* \lambda): 0.000 epoch: 9, batch: 3600 // loss: 0.090 // reg. loss (* \lambda): 0.000 epoch: 9, batch: 3700 // loss: 0.092 // reg. loss (* \lambda): 0.000
First, let's review on board...
from torch.nn import functional as F
class VAE(nn.Module):
def __init__(self, input_size=784, hidden_size1=32, hidden_size2=32):
'''
In the initializer we setup model parameters/layers.
'''
super(VAE, self).__init__()
### encoder layers
self.fc_e = nn.Linear(784, hidden_size1)
self.fc_mean = nn.Linear(hidden_size1, hidden_size2)
self.fc_logvar = nn.Linear(hidden_size1, hidden_size2)
### decoder layers
self.fc_d1 = nn.Linear(hidden_size2, hidden_size1)
self.fc_d2 = nn.Linear(hidden_size1, 784)
def encoder(self, x_in):
x = self.fc_e(x_in)
mean = self.fc_mean(x)
logvar = self.fc_logvar(x)
return mean, logvar
def decoder(self, z):
z = F.relu(self.fc_d1(z))
x_out = F.sigmoid(self.fc_d2(z))
#return x_out.view(-1,1,28,28)
return x_out
def sample_normal(self, mean, logvar):
# Using torch.normal(means,sds) returns a stochastic tensor which we cannot backpropogate through.
# Instead we utilize the 'reparameterization trick'.
# http://stats.stackexchange.com/a/205336
# http://dpkingma.com/wordpress/wp-content/uploads/2015/12/talk_nips_workshop_2015.pdf
sd = torch.exp(logvar*0.5)
e = torch.tensor((torch.randn(sd.size()))) # Sample from standard normal
z = e.mul(sd).add_(mean)
return z
def forward(self, x_in):
z_mean, z_logvar = self.encoder(x_in)
z = self.sample_normal(z_mean, z_logvar)
x_out = self.decoder(z)
return x_out, z_mean, z_logvar
def train_VAE(X_in, X_target, model, optimizer, loss_function, EPOCHS=10):
for epoch in range(EPOCHS):
idx, batch_num = 0, 0
batch_size = 16
print("")
while idx < 60000:
# zero the parameter gradients
optimizer.zero_grad()
X_batch = X_in[idx: idx + batch_size].float()
X_target_batch = X_target[idx: idx + batch_size].float()
idx += batch_size
# now run our X's forward, get preds, incur
# loss, backprop, and step the optimizer.
X_tilde_batch, _, _ = model(X_batch)
loss = loss_function(X_tilde_batch, X_target_batch)
loss.backward()
optimizer.step()
# print out loss
if batch_num % 100 == 0:
print("epoch: {}, batch: {} // loss: {:.3f}".format(epoch, batch_num, loss.item()))
batch_num += 1
m = VAE()
optimizer = optim.SGD(m.parameters(), lr=0.01, momentum=0.9)
train_VAE(X, X, m, optimizer, loss_function, EPOCHS=20)
epoch: 0, batch: 0 // loss: 0.186 epoch: 0, batch: 100 // loss: 0.157
/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:39: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
epoch: 0, batch: 200 // loss: 0.187 epoch: 0, batch: 300 // loss: 0.175 epoch: 0, batch: 400 // loss: 0.172 epoch: 0, batch: 500 // loss: 0.162 epoch: 0, batch: 600 // loss: 0.167 epoch: 0, batch: 700 // loss: 0.164 epoch: 0, batch: 800 // loss: 0.173 epoch: 0, batch: 900 // loss: 0.167 epoch: 0, batch: 1000 // loss: 0.152 epoch: 0, batch: 1100 // loss: 0.157 epoch: 0, batch: 1200 // loss: 0.153 epoch: 0, batch: 1300 // loss: 0.148 epoch: 0, batch: 1400 // loss: 0.155 epoch: 0, batch: 1500 // loss: 0.148 epoch: 0, batch: 1600 // loss: 0.147 epoch: 0, batch: 1700 // loss: 0.144 epoch: 0, batch: 1800 // loss: 0.132 epoch: 0, batch: 1900 // loss: 0.135 epoch: 0, batch: 2000 // loss: 0.119 epoch: 0, batch: 2100 // loss: 0.123 epoch: 0, batch: 2200 // loss: 0.127 epoch: 0, batch: 2300 // loss: 0.119 epoch: 0, batch: 2400 // loss: 0.109 epoch: 0, batch: 2500 // loss: 0.100 epoch: 0, batch: 2600 // loss: 0.107 epoch: 0, batch: 2700 // loss: 0.094 epoch: 0, batch: 2800 // loss: 0.104 epoch: 0, batch: 2900 // loss: 0.091 epoch: 0, batch: 3000 // loss: 0.094 epoch: 0, batch: 3100 // loss: 0.089 epoch: 0, batch: 3200 // loss: 0.093 epoch: 0, batch: 3300 // loss: 0.088 epoch: 0, batch: 3400 // loss: 0.078 epoch: 0, batch: 3500 // loss: 0.084 epoch: 0, batch: 3600 // loss: 0.099 epoch: 0, batch: 3700 // loss: 0.092 epoch: 1, batch: 0 // loss: 0.108 epoch: 1, batch: 100 // loss: 0.089 epoch: 1, batch: 200 // loss: 0.101 epoch: 1, batch: 300 // loss: 0.090 epoch: 1, batch: 400 // loss: 0.094 epoch: 1, batch: 500 // loss: 0.080 epoch: 1, batch: 600 // loss: 0.086 epoch: 1, batch: 700 // loss: 0.086 epoch: 1, batch: 800 // loss: 0.089 epoch: 1, batch: 900 // loss: 0.093 epoch: 1, batch: 1000 // loss: 0.075 epoch: 1, batch: 1100 // loss: 0.086 epoch: 1, batch: 1200 // loss: 0.084 epoch: 1, batch: 1300 // loss: 0.086 epoch: 1, batch: 1400 // loss: 0.084 epoch: 1, batch: 1500 // loss: 0.079 epoch: 1, batch: 1600 // loss: 0.089 epoch: 1, batch: 1700 // loss: 0.089 epoch: 1, batch: 1800 // loss: 0.086 epoch: 1, batch: 1900 // loss: 0.082 epoch: 1, batch: 2000 // loss: 0.071 epoch: 1, batch: 2100 // loss: 0.085 epoch: 1, batch: 2200 // loss: 0.093 epoch: 1, batch: 2300 // loss: 0.087 epoch: 1, batch: 2400 // loss: 0.075 epoch: 1, batch: 2500 // loss: 0.072 epoch: 1, batch: 2600 // loss: 0.083 epoch: 1, batch: 2700 // loss: 0.074 epoch: 1, batch: 2800 // loss: 0.084 epoch: 1, batch: 2900 // loss: 0.073 epoch: 1, batch: 3000 // loss: 0.075 epoch: 1, batch: 3100 // loss: 0.071 epoch: 1, batch: 3200 // loss: 0.070 epoch: 1, batch: 3300 // loss: 0.068 epoch: 1, batch: 3400 // loss: 0.062 epoch: 1, batch: 3500 // loss: 0.063 epoch: 1, batch: 3600 // loss: 0.074 epoch: 1, batch: 3700 // loss: 0.071 epoch: 2, batch: 0 // loss: 0.086 epoch: 2, batch: 100 // loss: 0.075 epoch: 2, batch: 200 // loss: 0.070 epoch: 2, batch: 300 // loss: 0.072 epoch: 2, batch: 400 // loss: 0.073 epoch: 2, batch: 500 // loss: 0.059 epoch: 2, batch: 600 // loss: 0.065 epoch: 2, batch: 700 // loss: 0.065 epoch: 2, batch: 800 // loss: 0.067 epoch: 2, batch: 900 // loss: 0.072 epoch: 2, batch: 1000 // loss: 0.056 epoch: 2, batch: 1100 // loss: 0.062 epoch: 2, batch: 1200 // loss: 0.060 epoch: 2, batch: 1300 // loss: 0.067 epoch: 2, batch: 1400 // loss: 0.060 epoch: 2, batch: 1500 // loss: 0.061 epoch: 2, batch: 1600 // loss: 0.068 epoch: 2, batch: 1700 // loss: 0.063 epoch: 2, batch: 1800 // loss: 0.066 epoch: 2, batch: 1900 // loss: 0.060 epoch: 2, batch: 2000 // loss: 0.057 epoch: 2, batch: 2100 // loss: 0.062 epoch: 2, batch: 2200 // loss: 0.070 epoch: 2, batch: 2300 // loss: 0.067 epoch: 2, batch: 2400 // loss: 0.049 epoch: 2, batch: 2500 // loss: 0.054 epoch: 2, batch: 2600 // loss: 0.058 epoch: 2, batch: 2700 // loss: 0.055 epoch: 2, batch: 2800 // loss: 0.059 epoch: 2, batch: 2900 // loss: 0.056 epoch: 2, batch: 3000 // loss: 0.057 epoch: 2, batch: 3100 // loss: 0.050 epoch: 2, batch: 3200 // loss: 0.050 epoch: 2, batch: 3300 // loss: 0.049 epoch: 2, batch: 3400 // loss: 0.048 epoch: 2, batch: 3500 // loss: 0.042 epoch: 2, batch: 3600 // loss: 0.052 epoch: 2, batch: 3700 // loss: 0.051 epoch: 3, batch: 0 // loss: 0.067 epoch: 3, batch: 100 // loss: 0.060 epoch: 3, batch: 200 // loss: 0.051 epoch: 3, batch: 300 // loss: 0.058 epoch: 3, batch: 400 // loss: 0.056 epoch: 3, batch: 500 // loss: 0.045 epoch: 3, batch: 600 // loss: 0.048 epoch: 3, batch: 700 // loss: 0.051 epoch: 3, batch: 800 // loss: 0.052 epoch: 3, batch: 900 // loss: 0.058 epoch: 3, batch: 1000 // loss: 0.046 epoch: 3, batch: 1100 // loss: 0.048 epoch: 3, batch: 1200 // loss: 0.050 epoch: 3, batch: 1300 // loss: 0.053 epoch: 3, batch: 1400 // loss: 0.047 epoch: 3, batch: 1500 // loss: 0.052 epoch: 3, batch: 1600 // loss: 0.059 epoch: 3, batch: 1700 // loss: 0.050 epoch: 3, batch: 1800 // loss: 0.055 epoch: 3, batch: 1900 // loss: 0.050 epoch: 3, batch: 2000 // loss: 0.049 epoch: 3, batch: 2100 // loss: 0.052 epoch: 3, batch: 2200 // loss: 0.055 epoch: 3, batch: 2300 // loss: 0.054 epoch: 3, batch: 2400 // loss: 0.043 epoch: 3, batch: 2500 // loss: 0.046 epoch: 3, batch: 2600 // loss: 0.048 epoch: 3, batch: 2700 // loss: 0.046 epoch: 3, batch: 2800 // loss: 0.048 epoch: 3, batch: 2900 // loss: 0.048 epoch: 3, batch: 3000 // loss: 0.050 epoch: 3, batch: 3100 // loss: 0.043 epoch: 3, batch: 3200 // loss: 0.043 epoch: 3, batch: 3300 // loss: 0.041 epoch: 3, batch: 3400 // loss: 0.044 epoch: 3, batch: 3500 // loss: 0.035 epoch: 3, batch: 3600 // loss: 0.044 epoch: 3, batch: 3700 // loss: 0.044 epoch: 4, batch: 0 // loss: 0.057 epoch: 4, batch: 100 // loss: 0.049 epoch: 4, batch: 200 // loss: 0.044 epoch: 4, batch: 300 // loss: 0.052 epoch: 4, batch: 400 // loss: 0.047 epoch: 4, batch: 500 // loss: 0.040 epoch: 4, batch: 600 // loss: 0.041 epoch: 4, batch: 700 // loss: 0.045 epoch: 4, batch: 800 // loss: 0.042 epoch: 4, batch: 900 // loss: 0.050 epoch: 4, batch: 1000 // loss: 0.043 epoch: 4, batch: 1100 // loss: 0.043 epoch: 4, batch: 1200 // loss: 0.045 epoch: 4, batch: 1300 // loss: 0.046 epoch: 4, batch: 1400 // loss: 0.042 epoch: 4, batch: 1500 // loss: 0.046 epoch: 4, batch: 1600 // loss: 0.053 epoch: 4, batch: 1700 // loss: 0.043 epoch: 4, batch: 1800 // loss: 0.050 epoch: 4, batch: 1900 // loss: 0.044 epoch: 4, batch: 2000 // loss: 0.045 epoch: 4, batch: 2100 // loss: 0.046 epoch: 4, batch: 2200 // loss: 0.048 epoch: 4, batch: 2300 // loss: 0.049 epoch: 4, batch: 2400 // loss: 0.041 epoch: 4, batch: 2500 // loss: 0.042 epoch: 4, batch: 2600 // loss: 0.044 epoch: 4, batch: 2700 // loss: 0.042 epoch: 4, batch: 2800 // loss: 0.043 epoch: 4, batch: 2900 // loss: 0.044 epoch: 4, batch: 3000 // loss: 0.046 epoch: 4, batch: 3100 // loss: 0.040 epoch: 4, batch: 3200 // loss: 0.038 epoch: 4, batch: 3300 // loss: 0.038 epoch: 4, batch: 3400 // loss: 0.042 epoch: 4, batch: 3500 // loss: 0.032 epoch: 4, batch: 3600 // loss: 0.041 epoch: 4, batch: 3700 // loss: 0.040 epoch: 5, batch: 0 // loss: 0.052 epoch: 5, batch: 100 // loss: 0.045 epoch: 5, batch: 200 // loss: 0.041 epoch: 5, batch: 300 // loss: 0.048 epoch: 5, batch: 400 // loss: 0.044 epoch: 5, batch: 500 // loss: 0.038 epoch: 5, batch: 600 // loss: 0.038 epoch: 5, batch: 700 // loss: 0.041 epoch: 5, batch: 800 // loss: 0.039 epoch: 5, batch: 900 // loss: 0.046 epoch: 5, batch: 1000 // loss: 0.042 epoch: 5, batch: 1100 // loss: 0.041 epoch: 5, batch: 1200 // loss: 0.043 epoch: 5, batch: 1300 // loss: 0.043 epoch: 5, batch: 1400 // loss: 0.040 epoch: 5, batch: 1500 // loss: 0.043 epoch: 5, batch: 1600 // loss: 0.050 epoch: 5, batch: 1700 // loss: 0.040 epoch: 5, batch: 1800 // loss: 0.048 epoch: 5, batch: 1900 // loss: 0.042 epoch: 5, batch: 2000 // loss: 0.043 epoch: 5, batch: 2100 // loss: 0.044 epoch: 5, batch: 2200 // loss: 0.045 epoch: 5, batch: 2300 // loss: 0.046 epoch: 5, batch: 2400 // loss: 0.039 epoch: 5, batch: 2500 // loss: 0.040 epoch: 5, batch: 2600 // loss: 0.042 epoch: 5, batch: 2700 // loss: 0.041 epoch: 5, batch: 2800 // loss: 0.041 epoch: 5, batch: 2900 // loss: 0.042 epoch: 5, batch: 3000 // loss: 0.044 epoch: 5, batch: 3100 // loss: 0.040 epoch: 5, batch: 3200 // loss: 0.036 epoch: 5, batch: 3300 // loss: 0.036 epoch: 5, batch: 3400 // loss: 0.041 epoch: 5, batch: 3500 // loss: 0.031 epoch: 5, batch: 3600 // loss: 0.040 epoch: 5, batch: 3700 // loss: 0.038 epoch: 6, batch: 0 // loss: 0.051 epoch: 6, batch: 100 // loss: 0.043 epoch: 6, batch: 200 // loss: 0.039 epoch: 6, batch: 300 // loss: 0.047 epoch: 6, batch: 400 // loss: 0.043 epoch: 6, batch: 500 // loss: 0.037 epoch: 6, batch: 600 // loss: 0.037 epoch: 6, batch: 700 // loss: 0.041 epoch: 6, batch: 800 // loss: 0.037 epoch: 6, batch: 900 // loss: 0.045 epoch: 6, batch: 1000 // loss: 0.042 epoch: 6, batch: 1100 // loss: 0.041 epoch: 6, batch: 1200 // loss: 0.042 epoch: 6, batch: 1300 // loss: 0.042 epoch: 6, batch: 1400 // loss: 0.040 epoch: 6, batch: 1500 // loss: 0.042 epoch: 6, batch: 1600 // loss: 0.049 epoch: 6, batch: 1700 // loss: 0.039 epoch: 6, batch: 1800 // loss: 0.047 epoch: 6, batch: 1900 // loss: 0.041 epoch: 6, batch: 2000 // loss: 0.042 epoch: 6, batch: 2100 // loss: 0.042 epoch: 6, batch: 2200 // loss: 0.044 epoch: 6, batch: 2300 // loss: 0.045 epoch: 6, batch: 2400 // loss: 0.038 epoch: 6, batch: 2500 // loss: 0.039 epoch: 6, batch: 2600 // loss: 0.042 epoch: 6, batch: 2700 // loss: 0.041 epoch: 6, batch: 2800 // loss: 0.040 epoch: 6, batch: 2900 // loss: 0.041 epoch: 6, batch: 3000 // loss: 0.044 epoch: 6, batch: 3100 // loss: 0.039 epoch: 6, batch: 3200 // loss: 0.036 epoch: 6, batch: 3300 // loss: 0.035 epoch: 6, batch: 3400 // loss: 0.040 epoch: 6, batch: 3500 // loss: 0.030 epoch: 6, batch: 3600 // loss: 0.039 epoch: 6, batch: 3700 // loss: 0.037 epoch: 7, batch: 0 // loss: 0.049 epoch: 7, batch: 100 // loss: 0.042 epoch: 7, batch: 200 // loss: 0.038 epoch: 7, batch: 300 // loss: 0.046 epoch: 7, batch: 400 // loss: 0.042 epoch: 7, batch: 500 // loss: 0.036 epoch: 7, batch: 600 // loss: 0.036 epoch: 7, batch: 700 // loss: 0.040 epoch: 7, batch: 800 // loss: 0.036 epoch: 7, batch: 900 // loss: 0.044 epoch: 7, batch: 1000 // loss: 0.042 epoch: 7, batch: 1100 // loss: 0.040 epoch: 7, batch: 1200 // loss: 0.041 epoch: 7, batch: 1300 // loss: 0.042 epoch: 7, batch: 1400 // loss: 0.039 epoch: 7, batch: 1500 // loss: 0.042 epoch: 7, batch: 1600 // loss: 0.048 epoch: 7, batch: 1700 // loss: 0.038 epoch: 7, batch: 1800 // loss: 0.046 epoch: 7, batch: 1900 // loss: 0.041 epoch: 7, batch: 2000 // loss: 0.042 epoch: 7, batch: 2100 // loss: 0.041 epoch: 7, batch: 2200 // loss: 0.043 epoch: 7, batch: 2300 // loss: 0.044 epoch: 7, batch: 2400 // loss: 0.037 epoch: 7, batch: 2500 // loss: 0.039 epoch: 7, batch: 2600 // loss: 0.041 epoch: 7, batch: 2700 // loss: 0.040 epoch: 7, batch: 2800 // loss: 0.039 epoch: 7, batch: 2900 // loss: 0.040 epoch: 7, batch: 3000 // loss: 0.043 epoch: 7, batch: 3100 // loss: 0.039 epoch: 7, batch: 3200 // loss: 0.035 epoch: 7, batch: 3300 // loss: 0.034 epoch: 7, batch: 3400 // loss: 0.040 epoch: 7, batch: 3500 // loss: 0.030 epoch: 7, batch: 3600 // loss: 0.038 epoch: 7, batch: 3700 // loss: 0.037 epoch: 8, batch: 0 // loss: 0.048 epoch: 8, batch: 100 // loss: 0.041 epoch: 8, batch: 200 // loss: 0.037 epoch: 8, batch: 300 // loss: 0.045 epoch: 8, batch: 400 // loss: 0.041 epoch: 8, batch: 500 // loss: 0.035 epoch: 8, batch: 600 // loss: 0.036 epoch: 8, batch: 700 // loss: 0.039 epoch: 8, batch: 800 // loss: 0.036 epoch: 8, batch: 900 // loss: 0.043 epoch: 8, batch: 1000 // loss: 0.041 epoch: 8, batch: 1100 // loss: 0.039 epoch: 8, batch: 1200 // loss: 0.040 epoch: 8, batch: 1300 // loss: 0.041 epoch: 8, batch: 1400 // loss: 0.038 epoch: 8, batch: 1500 // loss: 0.041 epoch: 8, batch: 1600 // loss: 0.047 epoch: 8, batch: 1700 // loss: 0.037 epoch: 8, batch: 1800 // loss: 0.046 epoch: 8, batch: 1900 // loss: 0.039 epoch: 8, batch: 2000 // loss: 0.041 epoch: 8, batch: 2100 // loss: 0.040 epoch: 8, batch: 2200 // loss: 0.042 epoch: 8, batch: 2300 // loss: 0.043 epoch: 8, batch: 2400 // loss: 0.037 epoch: 8, batch: 2500 // loss: 0.038 epoch: 8, batch: 2600 // loss: 0.041 epoch: 8, batch: 2700 // loss: 0.039 epoch: 8, batch: 2800 // loss: 0.038 epoch: 8, batch: 2900 // loss: 0.039 epoch: 8, batch: 3000 // loss: 0.042 epoch: 8, batch: 3100 // loss: 0.038 epoch: 8, batch: 3200 // loss: 0.034 epoch: 8, batch: 3300 // loss: 0.034 epoch: 8, batch: 3400 // loss: 0.039 epoch: 8, batch: 3500 // loss: 0.029 epoch: 8, batch: 3600 // loss: 0.037 epoch: 8, batch: 3700 // loss: 0.036 epoch: 9, batch: 0 // loss: 0.047 epoch: 9, batch: 100 // loss: 0.040 epoch: 9, batch: 200 // loss: 0.036 epoch: 9, batch: 300 // loss: 0.044 epoch: 9, batch: 400 // loss: 0.040 epoch: 9, batch: 500 // loss: 0.035 epoch: 9, batch: 600 // loss: 0.035 epoch: 9, batch: 700 // loss: 0.038 epoch: 9, batch: 800 // loss: 0.035 epoch: 9, batch: 900 // loss: 0.042 epoch: 9, batch: 1000 // loss: 0.040 epoch: 9, batch: 1100 // loss: 0.038 epoch: 9, batch: 1200 // loss: 0.039 epoch: 9, batch: 1300 // loss: 0.040 epoch: 9, batch: 1400 // loss: 0.037 epoch: 9, batch: 1500 // loss: 0.039 epoch: 9, batch: 1600 // loss: 0.046 epoch: 9, batch: 1700 // loss: 0.036 epoch: 9, batch: 1800 // loss: 0.044 epoch: 9, batch: 1900 // loss: 0.037 epoch: 9, batch: 2000 // loss: 0.040 epoch: 9, batch: 2100 // loss: 0.039 epoch: 9, batch: 2200 // loss: 0.041 epoch: 9, batch: 2300 // loss: 0.041 epoch: 9, batch: 2400 // loss: 0.036 epoch: 9, batch: 2500 // loss: 0.037 epoch: 9, batch: 2600 // loss: 0.040 epoch: 9, batch: 2700 // loss: 0.037 epoch: 9, batch: 2800 // loss: 0.036 epoch: 9, batch: 2900 // loss: 0.038 epoch: 9, batch: 3000 // loss: 0.041 epoch: 9, batch: 3100 // loss: 0.038 epoch: 9, batch: 3200 // loss: 0.033 epoch: 9, batch: 3300 // loss: 0.033 epoch: 9, batch: 3400 // loss: 0.037 epoch: 9, batch: 3500 // loss: 0.028 epoch: 9, batch: 3600 // loss: 0.036 epoch: 9, batch: 3700 // loss: 0.035 epoch: 10, batch: 0 // loss: 0.045 epoch: 10, batch: 100 // loss: 0.039 epoch: 10, batch: 200 // loss: 0.035 epoch: 10, batch: 300 // loss: 0.043 epoch: 10, batch: 400 // loss: 0.039 epoch: 10, batch: 500 // loss: 0.034 epoch: 10, batch: 600 // loss: 0.034 epoch: 10, batch: 700 // loss: 0.037 epoch: 10, batch: 800 // loss: 0.034 epoch: 10, batch: 900 // loss: 0.041 epoch: 10, batch: 1000 // loss: 0.039 epoch: 10, batch: 1100 // loss: 0.037 epoch: 10, batch: 1200 // loss: 0.037 epoch: 10, batch: 1300 // loss: 0.039 epoch: 10, batch: 1400 // loss: 0.036 epoch: 10, batch: 1500 // loss: 0.038 epoch: 10, batch: 1600 // loss: 0.044 epoch: 10, batch: 1700 // loss: 0.035 epoch: 10, batch: 1800 // loss: 0.043 epoch: 10, batch: 1900 // loss: 0.035 epoch: 10, batch: 2000 // loss: 0.039 epoch: 10, batch: 2100 // loss: 0.038 epoch: 10, batch: 2200 // loss: 0.039 epoch: 10, batch: 2300 // loss: 0.040 epoch: 10, batch: 2400 // loss: 0.034 epoch: 10, batch: 2500 // loss: 0.035 epoch: 10, batch: 2600 // loss: 0.038 epoch: 10, batch: 2700 // loss: 0.036 epoch: 10, batch: 2800 // loss: 0.034 epoch: 10, batch: 2900 // loss: 0.037 epoch: 10, batch: 3000 // loss: 0.040 epoch: 10, batch: 3100 // loss: 0.037 epoch: 10, batch: 3200 // loss: 0.031 epoch: 10, batch: 3300 // loss: 0.031 epoch: 10, batch: 3400 // loss: 0.036 epoch: 10, batch: 3500 // loss: 0.027 epoch: 10, batch: 3600 // loss: 0.034 epoch: 10, batch: 3700 // loss: 0.033 epoch: 11, batch: 0 // loss: 0.044 epoch: 11, batch: 100 // loss: 0.037 epoch: 11, batch: 200 // loss: 0.034 epoch: 11, batch: 300 // loss: 0.041 epoch: 11, batch: 400 // loss: 0.037 epoch: 11, batch: 500 // loss: 0.033 epoch: 11, batch: 600 // loss: 0.033 epoch: 11, batch: 700 // loss: 0.035 epoch: 11, batch: 800 // loss: 0.032 epoch: 11, batch: 900 // loss: 0.040 epoch: 11, batch: 1000 // loss: 0.038 epoch: 11, batch: 1100 // loss: 0.036 epoch: 11, batch: 1200 // loss: 0.036 epoch: 11, batch: 1300 // loss: 0.039 epoch: 11, batch: 1400 // loss: 0.035 epoch: 11, batch: 1500 // loss: 0.037 epoch: 11, batch: 1600 // loss: 0.042 epoch: 11, batch: 1700 // loss: 0.033 epoch: 11, batch: 1800 // loss: 0.041 epoch: 11, batch: 1900 // loss: 0.033 epoch: 11, batch: 2000 // loss: 0.038 epoch: 11, batch: 2100 // loss: 0.036 epoch: 11, batch: 2200 // loss: 0.038 epoch: 11, batch: 2300 // loss: 0.039 epoch: 11, batch: 2400 // loss: 0.033 epoch: 11, batch: 2500 // loss: 0.034 epoch: 11, batch: 2600 // loss: 0.037 epoch: 11, batch: 2700 // loss: 0.035 epoch: 11, batch: 2800 // loss: 0.033 epoch: 11, batch: 2900 // loss: 0.036 epoch: 11, batch: 3000 // loss: 0.039 epoch: 11, batch: 3100 // loss: 0.037 epoch: 11, batch: 3200 // loss: 0.030 epoch: 11, batch: 3300 // loss: 0.030 epoch: 11, batch: 3400 // loss: 0.035 epoch: 11, batch: 3500 // loss: 0.026 epoch: 11, batch: 3600 // loss: 0.033 epoch: 11, batch: 3700 // loss: 0.032 epoch: 12, batch: 0 // loss: 0.042 epoch: 12, batch: 100 // loss: 0.036 epoch: 12, batch: 200 // loss: 0.033 epoch: 12, batch: 300 // loss: 0.040 epoch: 12, batch: 400 // loss: 0.036 epoch: 12, batch: 500 // loss: 0.033 epoch: 12, batch: 600 // loss: 0.033 epoch: 12, batch: 700 // loss: 0.035 epoch: 12, batch: 800 // loss: 0.031 epoch: 12, batch: 900 // loss: 0.039 epoch: 12, batch: 1000 // loss: 0.037 epoch: 12, batch: 1100 // loss: 0.036 epoch: 12, batch: 1200 // loss: 0.035 epoch: 12, batch: 1300 // loss: 0.037 epoch: 12, batch: 1400 // loss: 0.034 epoch: 12, batch: 1500 // loss: 0.035 epoch: 12, batch: 1600 // loss: 0.041 epoch: 12, batch: 1700 // loss: 0.033 epoch: 12, batch: 1800 // loss: 0.040 epoch: 12, batch: 1900 // loss: 0.032 epoch: 12, batch: 2000 // loss: 0.037 epoch: 12, batch: 2100 // loss: 0.035 epoch: 12, batch: 2200 // loss: 0.037 epoch: 12, batch: 2300 // loss: 0.038 epoch: 12, batch: 2400 // loss: 0.032 epoch: 12, batch: 2500 // loss: 0.034 epoch: 12, batch: 2600 // loss: 0.037 epoch: 12, batch: 2700 // loss: 0.034 epoch: 12, batch: 2800 // loss: 0.031 epoch: 12, batch: 2900 // loss: 0.035 epoch: 12, batch: 3000 // loss: 0.038 epoch: 12, batch: 3100 // loss: 0.036 epoch: 12, batch: 3200 // loss: 0.030 epoch: 12, batch: 3300 // loss: 0.030 epoch: 12, batch: 3400 // loss: 0.034 epoch: 12, batch: 3500 // loss: 0.025 epoch: 12, batch: 3600 // loss: 0.032 epoch: 12, batch: 3700 // loss: 0.032 epoch: 13, batch: 0 // loss: 0.042 epoch: 13, batch: 100 // loss: 0.035 epoch: 13, batch: 200 // loss: 0.032 epoch: 13, batch: 300 // loss: 0.039 epoch: 13, batch: 400 // loss: 0.035 epoch: 13, batch: 500 // loss: 0.032 epoch: 13, batch: 600 // loss: 0.032 epoch: 13, batch: 700 // loss: 0.034 epoch: 13, batch: 800 // loss: 0.031 epoch: 13, batch: 900 // loss: 0.039 epoch: 13, batch: 1000 // loss: 0.036 epoch: 13, batch: 1100 // loss: 0.035 epoch: 13, batch: 1200 // loss: 0.034 epoch: 13, batch: 1300 // loss: 0.037 epoch: 13, batch: 1400 // loss: 0.033 epoch: 13, batch: 1500 // loss: 0.035 epoch: 13, batch: 1600 // loss: 0.040 epoch: 13, batch: 1700 // loss: 0.032 epoch: 13, batch: 1800 // loss: 0.039 epoch: 13, batch: 1900 // loss: 0.031 epoch: 13, batch: 2000 // loss: 0.037 epoch: 13, batch: 2100 // loss: 0.034 epoch: 13, batch: 2200 // loss: 0.037 epoch: 13, batch: 2300 // loss: 0.037 epoch: 13, batch: 2400 // loss: 0.032 epoch: 13, batch: 2500 // loss: 0.033 epoch: 13, batch: 2600 // loss: 0.036 epoch: 13, batch: 2700 // loss: 0.034 epoch: 13, batch: 2800 // loss: 0.030 epoch: 13, batch: 2900 // loss: 0.035 epoch: 13, batch: 3000 // loss: 0.037 epoch: 13, batch: 3100 // loss: 0.036 epoch: 13, batch: 3200 // loss: 0.029 epoch: 13, batch: 3300 // loss: 0.029 epoch: 13, batch: 3400 // loss: 0.034 epoch: 13, batch: 3500 // loss: 0.025 epoch: 13, batch: 3600 // loss: 0.031 epoch: 13, batch: 3700 // loss: 0.031 epoch: 14, batch: 0 // loss: 0.041 epoch: 14, batch: 100 // loss: 0.035 epoch: 14, batch: 200 // loss: 0.031 epoch: 14, batch: 300 // loss: 0.038 epoch: 14, batch: 400 // loss: 0.034 epoch: 14, batch: 500 // loss: 0.032 epoch: 14, batch: 600 // loss: 0.032 epoch: 14, batch: 700 // loss: 0.033 epoch: 14, batch: 800 // loss: 0.030 epoch: 14, batch: 900 // loss: 0.038 epoch: 14, batch: 1000 // loss: 0.036 epoch: 14, batch: 1100 // loss: 0.034 epoch: 14, batch: 1200 // loss: 0.033 epoch: 14, batch: 1300 // loss: 0.036 epoch: 14, batch: 1400 // loss: 0.033 epoch: 14, batch: 1500 // loss: 0.034 epoch: 14, batch: 1600 // loss: 0.039 epoch: 14, batch: 1700 // loss: 0.031 epoch: 14, batch: 1800 // loss: 0.038 epoch: 14, batch: 1900 // loss: 0.030 epoch: 14, batch: 2000 // loss: 0.036 epoch: 14, batch: 2100 // loss: 0.034 epoch: 14, batch: 2200 // loss: 0.036 epoch: 14, batch: 2300 // loss: 0.036 epoch: 14, batch: 2400 // loss: 0.031 epoch: 14, batch: 2500 // loss: 0.033 epoch: 14, batch: 2600 // loss: 0.035 epoch: 14, batch: 2700 // loss: 0.033 epoch: 14, batch: 2800 // loss: 0.030 epoch: 14, batch: 2900 // loss: 0.034 epoch: 14, batch: 3000 // loss: 0.037 epoch: 14, batch: 3100 // loss: 0.035 epoch: 14, batch: 3200 // loss: 0.029 epoch: 14, batch: 3300 // loss: 0.029 epoch: 14, batch: 3400 // loss: 0.033 epoch: 14, batch: 3500 // loss: 0.025 epoch: 14, batch: 3600 // loss: 0.031 epoch: 14, batch: 3700 // loss: 0.030 epoch: 15, batch: 0 // loss: 0.040 epoch: 15, batch: 100 // loss: 0.034 epoch: 15, batch: 200 // loss: 0.031 epoch: 15, batch: 300 // loss: 0.038 epoch: 15, batch: 400 // loss: 0.034 epoch: 15, batch: 500 // loss: 0.032 epoch: 15, batch: 600 // loss: 0.031 epoch: 15, batch: 700 // loss: 0.033 epoch: 15, batch: 800 // loss: 0.029 epoch: 15, batch: 900 // loss: 0.038 epoch: 15, batch: 1000 // loss: 0.036 epoch: 15, batch: 1100 // loss: 0.034 epoch: 15, batch: 1200 // loss: 0.033 epoch: 15, batch: 1300 // loss: 0.036 epoch: 15, batch: 1400 // loss: 0.033 epoch: 15, batch: 1500 // loss: 0.034 epoch: 15, batch: 1600 // loss: 0.039 epoch: 15, batch: 1700 // loss: 0.031 epoch: 15, batch: 1800 // loss: 0.037 epoch: 15, batch: 1900 // loss: 0.030 epoch: 15, batch: 2000 // loss: 0.035 epoch: 15, batch: 2100 // loss: 0.033 epoch: 15, batch: 2200 // loss: 0.036 epoch: 15, batch: 2300 // loss: 0.036 epoch: 15, batch: 2400 // loss: 0.031 epoch: 15, batch: 2500 // loss: 0.032 epoch: 15, batch: 2600 // loss: 0.035 epoch: 15, batch: 2700 // loss: 0.033 epoch: 15, batch: 2800 // loss: 0.029 epoch: 15, batch: 2900 // loss: 0.034 epoch: 15, batch: 3000 // loss: 0.036 epoch: 15, batch: 3100 // loss: 0.035 epoch: 15, batch: 3200 // loss: 0.028 epoch: 15, batch: 3300 // loss: 0.028 epoch: 15, batch: 3400 // loss: 0.033 epoch: 15, batch: 3500 // loss: 0.024 epoch: 15, batch: 3600 // loss: 0.030 epoch: 15, batch: 3700 // loss: 0.030 epoch: 16, batch: 0 // loss: 0.040 epoch: 16, batch: 100 // loss: 0.033 epoch: 16, batch: 200 // loss: 0.030 epoch: 16, batch: 300 // loss: 0.037 epoch: 16, batch: 400 // loss: 0.033 epoch: 16, batch: 500 // loss: 0.031 epoch: 16, batch: 600 // loss: 0.031 epoch: 16, batch: 700 // loss: 0.032 epoch: 16, batch: 800 // loss: 0.029 epoch: 16, batch: 900 // loss: 0.038 epoch: 16, batch: 1000 // loss: 0.035 epoch: 16, batch: 1100 // loss: 0.033 epoch: 16, batch: 1200 // loss: 0.032 epoch: 16, batch: 1300 // loss: 0.036 epoch: 16, batch: 1400 // loss: 0.032 epoch: 16, batch: 1500 // loss: 0.034 epoch: 16, batch: 1600 // loss: 0.038 epoch: 16, batch: 1700 // loss: 0.030 epoch: 16, batch: 1800 // loss: 0.036 epoch: 16, batch: 1900 // loss: 0.029 epoch: 16, batch: 2000 // loss: 0.035 epoch: 16, batch: 2100 // loss: 0.033 epoch: 16, batch: 2200 // loss: 0.035 epoch: 16, batch: 2300 // loss: 0.035 epoch: 16, batch: 2400 // loss: 0.031 epoch: 16, batch: 2500 // loss: 0.031 epoch: 16, batch: 2600 // loss: 0.035 epoch: 16, batch: 2700 // loss: 0.032 epoch: 16, batch: 2800 // loss: 0.029 epoch: 16, batch: 2900 // loss: 0.033 epoch: 16, batch: 3000 // loss: 0.036 epoch: 16, batch: 3100 // loss: 0.034 epoch: 16, batch: 3200 // loss: 0.027 epoch: 16, batch: 3300 // loss: 0.028 epoch: 16, batch: 3400 // loss: 0.032 epoch: 16, batch: 3500 // loss: 0.024 epoch: 16, batch: 3600 // loss: 0.029 epoch: 16, batch: 3700 // loss: 0.029 epoch: 17, batch: 0 // loss: 0.039 epoch: 17, batch: 100 // loss: 0.032 epoch: 17, batch: 200 // loss: 0.029 epoch: 17, batch: 300 // loss: 0.037 epoch: 17, batch: 400 // loss: 0.033 epoch: 17, batch: 500 // loss: 0.031 epoch: 17, batch: 600 // loss: 0.030 epoch: 17, batch: 700 // loss: 0.032 epoch: 17, batch: 800 // loss: 0.028 epoch: 17, batch: 900 // loss: 0.037 epoch: 17, batch: 1000 // loss: 0.035 epoch: 17, batch: 1100 // loss: 0.032 epoch: 17, batch: 1200 // loss: 0.031 epoch: 17, batch: 1300 // loss: 0.035 epoch: 17, batch: 1400 // loss: 0.031 epoch: 17, batch: 1500 // loss: 0.033 epoch: 17, batch: 1600 // loss: 0.037 epoch: 17, batch: 1700 // loss: 0.030 epoch: 17, batch: 1800 // loss: 0.035 epoch: 17, batch: 1900 // loss: 0.029 epoch: 17, batch: 2000 // loss: 0.034 epoch: 17, batch: 2100 // loss: 0.032 epoch: 17, batch: 2200 // loss: 0.034 epoch: 17, batch: 2300 // loss: 0.033 epoch: 17, batch: 2400 // loss: 0.030 epoch: 17, batch: 2500 // loss: 0.030 epoch: 17, batch: 2600 // loss: 0.034 epoch: 17, batch: 2700 // loss: 0.031 epoch: 17, batch: 2800 // loss: 0.028 epoch: 17, batch: 2900 // loss: 0.032 epoch: 17, batch: 3000 // loss: 0.035 epoch: 17, batch: 3100 // loss: 0.034 epoch: 17, batch: 3200 // loss: 0.027 epoch: 17, batch: 3300 // loss: 0.027 epoch: 17, batch: 3400 // loss: 0.031 epoch: 17, batch: 3500 // loss: 0.024 epoch: 17, batch: 3600 // loss: 0.029 epoch: 17, batch: 3700 // loss: 0.028 epoch: 18, batch: 0 // loss: 0.039 epoch: 18, batch: 100 // loss: 0.031 epoch: 18, batch: 200 // loss: 0.028 epoch: 18, batch: 300 // loss: 0.036 epoch: 18, batch: 400 // loss: 0.032 epoch: 18, batch: 500 // loss: 0.031 epoch: 18, batch: 600 // loss: 0.030 epoch: 18, batch: 700 // loss: 0.031 epoch: 18, batch: 800 // loss: 0.027 epoch: 18, batch: 900 // loss: 0.036 epoch: 18, batch: 1000 // loss: 0.034 epoch: 18, batch: 1100 // loss: 0.031 epoch: 18, batch: 1200 // loss: 0.031 epoch: 18, batch: 1300 // loss: 0.034 epoch: 18, batch: 1400 // loss: 0.031 epoch: 18, batch: 1500 // loss: 0.033 epoch: 18, batch: 1600 // loss: 0.036 epoch: 18, batch: 1700 // loss: 0.030 epoch: 18, batch: 1800 // loss: 0.033 epoch: 18, batch: 1900 // loss: 0.028 epoch: 18, batch: 2000 // loss: 0.033 epoch: 18, batch: 2100 // loss: 0.031 epoch: 18, batch: 2200 // loss: 0.033 epoch: 18, batch: 2300 // loss: 0.032 epoch: 18, batch: 2400 // loss: 0.029 epoch: 18, batch: 2500 // loss: 0.029 epoch: 18, batch: 2600 // loss: 0.033 epoch: 18, batch: 2700 // loss: 0.030 epoch: 18, batch: 2800 // loss: 0.027 epoch: 18, batch: 2900 // loss: 0.031 epoch: 18, batch: 3000 // loss: 0.034 epoch: 18, batch: 3100 // loss: 0.033 epoch: 18, batch: 3200 // loss: 0.026 epoch: 18, batch: 3300 // loss: 0.026 epoch: 18, batch: 3400 // loss: 0.030 epoch: 18, batch: 3500 // loss: 0.023 epoch: 18, batch: 3600 // loss: 0.028 epoch: 18, batch: 3700 // loss: 0.027 epoch: 19, batch: 0 // loss: 0.038 epoch: 19, batch: 100 // loss: 0.030 epoch: 19, batch: 200 // loss: 0.027 epoch: 19, batch: 300 // loss: 0.035 epoch: 19, batch: 400 // loss: 0.031 epoch: 19, batch: 500 // loss: 0.030 epoch: 19, batch: 600 // loss: 0.029 epoch: 19, batch: 700 // loss: 0.030 epoch: 19, batch: 800 // loss: 0.026 epoch: 19, batch: 900 // loss: 0.036 epoch: 19, batch: 1000 // loss: 0.033 epoch: 19, batch: 1100 // loss: 0.031 epoch: 19, batch: 1200 // loss: 0.030 epoch: 19, batch: 1300 // loss: 0.034 epoch: 19, batch: 1400 // loss: 0.030 epoch: 19, batch: 1500 // loss: 0.033 epoch: 19, batch: 1600 // loss: 0.035 epoch: 19, batch: 1700 // loss: 0.029 epoch: 19, batch: 1800 // loss: 0.032 epoch: 19, batch: 1900 // loss: 0.027 epoch: 19, batch: 2000 // loss: 0.033 epoch: 19, batch: 2100 // loss: 0.030 epoch: 19, batch: 2200 // loss: 0.031 epoch: 19, batch: 2300 // loss: 0.031 epoch: 19, batch: 2400 // loss: 0.029 epoch: 19, batch: 2500 // loss: 0.028 epoch: 19, batch: 2600 // loss: 0.033 epoch: 19, batch: 2700 // loss: 0.029 epoch: 19, batch: 2800 // loss: 0.026 epoch: 19, batch: 2900 // loss: 0.030 epoch: 19, batch: 3000 // loss: 0.033 epoch: 19, batch: 3100 // loss: 0.032 epoch: 19, batch: 3200 // loss: 0.025 epoch: 19, batch: 3300 // loss: 0.025 epoch: 19, batch: 3400 // loss: 0.029 epoch: 19, batch: 3500 // loss: 0.023 epoch: 19, batch: 3600 // loss: 0.027 epoch: 19, batch: 3700 // loss: 0.026
samples = [m(X[2,:].float())[0].detach().numpy() for _ in range(5)]
/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:39: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
imshow(np.asarray(samples[4]).reshape(28,28), cmap='gray')
<matplotlib.image.AxesImage at 0x7f8f309202b0>
X[0,:].shape
torch.Size([784])
X.shape
torch.Size([60000, 784])