DeepInvention

Install

  sudo apt install musescore timidity lilypond
  pip install music21 matplotlib scipy tensorflow
In [1]:
from music21 import *
In [11]:
us = environment.UserSettings()
us.getSettingsPath()
Out[11]:
'/home/tsu-nera/.music21rc'
In [12]:
#us["musicxmlPath"] = "/usr/bin/gedit"
us["musicxmlPath"] = "/usr/bin/musescore"
us["midiPath"] = "/usr/bin/timidity"
us["showFormat"] = "lilypond"
us["writeFormat"] = "lilypond"
us["musescoreDirectPNGPath"] = "/usr/bin/musescore"

Prepare Data

In [ ]:
!mkdir composer
In [ ]:
import glob
REP="@\n"
def trim_metadata(output_path, glob_path):
    comp_txt = open(output_path,"w")
    ll = glob.glob(glob_path)
    for song in ll:
        lines = open(song,"r").readlines()
        out = []
        found_first = False
        for l in lines:
            if l.startswith("="):
                ## new measure, replace the measure with the @ sign, not part of humdrum
                out.append(REP)
                found_first = True
                continue
            if not found_first:
                ## keep going until we find the end of the header and metadata
                continue
            if l.startswith("!"):
                ## ignore comments
                continue
            out.append(l)
        comp_txt.writelines(out)
    comp_txt.close()

Get Data from KernScore

In [ ]:
%mkdir kernscore
%mkdir kernscore/bach
In [ ]:
from urllib.request import urlopen
for i in range(1,15+1):
    filename = "inven{0:02d}.krn".format(i)
    file = urlopen("http://kern.humdrum.org/cgi-bin/ksdata?l=osu/classical/bach/inventions&file=%s&f=kern"%filename)
    with open("kernscore/bach/"+filename,'wb') as output:
        output.write(file.read())
In [ ]:
output_path = "composer/bach.txt"
glob_path = "kernscore/bach/*.krn"
trim_metadata(output_path, glob_path)

Setup

In [3]:
import time
from collections import namedtuple

import numpy as np
import tensorflow as tf
In [4]:
filename = 'composer/bach.txt'
with open(filename, 'r') as f:
    text=f.read()
vocab = set(text)
vocab_to_int = {c: i for i, c in enumerate(vocab)}
int_to_vocab = dict(enumerate(vocab))
encoded = np.array([vocab_to_int[c] for c in text], dtype=np.int32)
In [5]:
text[:50]
Out[5]:
'@\n4.r\t16dL\n.\t16e\n.\t16f\n.\t16g\n.\t16a\n.\t16b-J\[email protected]\n4.r\t1'
In [6]:
encoded[:100]
Out[6]:
array([11,  4,  8, 31, 15, 35,  7,  3, 27,  6,  4, 31, 35,  7,  3, 12,  4,
       31, 35,  7,  3, 39,  4, 31, 35,  7,  3, 24,  4, 31, 35,  7,  3,  2,
        4, 31, 35,  7,  3,  9, 13,  1,  4, 11,  4,  8, 31, 15, 35,  7,  3,
       22, 32,  6,  4, 31, 35,  7,  3,  9, 13,  4, 31, 35,  7,  3,  2,  4,
       31, 35,  7,  3, 24,  4, 31, 35,  7,  3, 39,  4, 31, 35,  7,  3, 12,
        1,  4, 11,  4,  7,  3, 41,  6, 35, 21, 39,  6,  4,  7,  3], dtype=int32)
In [7]:
vocab_size = len(vocab)
vocab_size
Out[7]:
43

Preprocess

In [8]:
def get_batches(arr, n_seqs, n_steps):
    '''Create a generator that returns batches of size
       n_seqs x n_steps from arr.
       
       Arguments
       ---------
       arr: Array you want to make batches from
       n_seqs: Batch size, the number of sequences per batch
       n_steps: Number of sequence steps per batch
    '''
    # Get the batch size and number of batches we can make
    batch_size = n_seqs * n_steps
    n_batches = len(arr)//batch_size
    
    # Keep only enough characters to make full batches
    arr = arr[:n_batches * batch_size]
    
    # Reshape into n_seqs rows
    arr = arr.reshape((n_seqs, -1))
    
    for n in range(0, arr.shape[1], n_steps):
        # The features
        x = arr[:, n:n+n_steps]
        # The targets, shifted by one
        y = np.zeros_like(x)
        y[:, :-1], y[:, -1] = x[:, 1:], x[:, 0]
        yield x, y
In [9]:
batches = get_batches(encoded, 10, 50)
x, y = next(batches)
In [10]:
print('x\n', x[:10, :10])
print('\ny\n', y[:10, :10])
x
 [[11  4  8 31 15 35  7  3 27  6]
 [35  7  3 24 24 42  5  4  7  3]
 [ 7  3 22 22 32  4 31 35  7  3]
 [ 9  9  4  7  3 38 35 31  4 11]
 [ 3 24  4  7  3 33 35  7  3 39]
 [ 2  2  4  7  3 27 35 31  4  7]
 [16 18 41 32 35  7  3 39 32  1]
 [35 31  4  7  3 38 35  8  9 13]
 [27  1  4 11  4  7  3 33  6 35]
 [ 7  3 24  4 11  4  7  3  0 35]]

y
 [[ 4  8 31 15 35  7  3 27  6  4]
 [ 7  3 24 24 42  5  4  7  3 36]
 [ 3 22 22 32  4 31 35  7  3  2]
 [ 9  4  7  3 38 35 31  4 11  4]
 [24  4  7  3 33 35  7  3 39 32]
 [ 2  4  7  3 27 35 31  4  7  3]
 [18 41 32 35  7  3 39 32  1  4]
 [31  4  7  3 38 35  8  9 13 19]
 [ 1  4 11  4  7  3 33  6 35 21]
 [ 3 24  4 11  4  7  3  0 35 21]]

Build Model

In [11]:
def build_inputs(batch_size, num_steps):
    ''' Define placeholders for inputs, targets, and dropout 
    
        Arguments
        ---------
        batch_size: Batch size, number of sequences per batch
        num_steps: Number of sequence steps in a batch
        
    '''
    # Declare placeholders we'll feed into the graph
    inputs = tf.placeholder(tf.int32, [batch_size, num_steps], name='inputs')
    targets = tf.placeholder(tf.int32, [batch_size, num_steps], name='targets')
    
    # Keep probability placeholder for drop out layers
    keep_prob = tf.placeholder(tf.float32, name='keep_prob')
    
    return inputs, targets, keep_prob
In [12]:
def build_lstm(lstm_size, num_layers, batch_size, keep_prob):
    ''' Build LSTM cell.
    
        Arguments
        ---------
        keep_prob: Scalar tensor (tf.placeholder) for the dropout keep probability
        lstm_size: Size of the hidden layers in the LSTM cells
        num_layers: Number of LSTM layers
        batch_size: Batch size

    '''
    ### Build the LSTM Cell
    # Use a basic LSTM cell
    lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)
    
    # Add dropout to the cell
    drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob)
    
    # Stack up multiple LSTM layers, for deep learning
    cell = tf.contrib.rnn.MultiRNNCell([drop] * num_layers)
    initial_state = cell.zero_state(batch_size, tf.float32)
    
    return cell, initial_state
In [13]:
def build_output(lstm_output, in_size, out_size):
    ''' Build a softmax layer, return the softmax output and logits.
    
        Arguments
        ---------
        
        x: Input tensor
        in_size: Size of the input tensor, for example, size of the LSTM cells
        out_size: Size of this softmax layer
    
    '''

    # Reshape output so it's a bunch of rows, one row for each step for each sequence.
    # That is, the shape should be batch_size*num_steps rows by lstm_size columns
    seq_output = tf.concat(lstm_output, axis=1)
    x = tf.reshape(seq_output, [-1, in_size])
    
    # Connect the RNN outputs to a softmax layer
    with tf.variable_scope('softmax'):
        softmax_w = tf.Variable(tf.truncated_normal((in_size, out_size), stddev=0.1))
        softmax_b = tf.Variable(tf.zeros(out_size))
    
    # Since output is a bunch of rows of RNN cell outputs, logits will be a bunch
    # of rows of logit outputs, one for each step and sequence
    logits = tf.matmul(x, softmax_w) + softmax_b
    
    # Use softmax to get the probabilities for predicted characters
    out = tf.nn.softmax(logits, name='predictions')
    
    return out, logits
In [14]:
def build_loss(logits, targets, lstm_size, num_classes):
    ''' Calculate the loss from the logits and the targets.
    
        Arguments
        ---------
        logits: Logits from final fully connected layer
        targets: Targets for supervised learning
        lstm_size: Number of LSTM hidden units
        num_classes: Number of classes in targets
        
    '''
    
    # One-hot encode targets and reshape to match logits, one row per batch_size per step
    y_one_hot = tf.one_hot(targets, num_classes)
    y_reshaped = tf.reshape(y_one_hot, logits.get_shape())
    
    # Softmax cross entropy loss
    loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y_reshaped)
    loss = tf.reduce_mean(loss)
    return loss
In [15]:
def build_optimizer(loss, learning_rate, grad_clip):
    ''' Build optmizer for training, using gradient clipping.
    
        Arguments:
        loss: Network loss
        learning_rate: Learning rate for optimizer
    
    '''
    
    # Optimizer for training, using gradient clipping to control exploding gradients
    tvars = tf.trainable_variables()
    grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), grad_clip)
    train_op = tf.train.AdamOptimizer(learning_rate)
    optimizer = train_op.apply_gradients(zip(grads, tvars))
    
    return optimizer
In [16]:
class CharRNN:
    
    def __init__(self, num_classes, batch_size=64, num_steps=50, 
                       lstm_size=128, num_layers=2, learning_rate=0.001, 
                       grad_clip=5, sampling=False):
    
        # When we're using this network for sampling later, we'll be passing in
        # one character at a time, so providing an option for that
        if sampling == True:
            batch_size, num_steps = 1, 1
        else:
            batch_size, num_steps = batch_size, num_steps

        tf.reset_default_graph()
        
        # Build the input placeholder tensors
        self.inputs, self.targets, self.keep_prob = build_inputs(batch_size, num_steps)

        # Build the LSTM cell
        cell, self.initial_state = build_lstm(lstm_size, num_layers, batch_size, self.keep_prob)

        ### Run the data through the RNN layers
        # First, one-hot encode the input tokens
        x_one_hot = tf.one_hot(self.inputs, num_classes)
        
        # Run each sequence step through the RNN and collect the outputs
        outputs, state = tf.nn.dynamic_rnn(cell, x_one_hot, initial_state=self.initial_state)
        self.final_state = state
        
        # Get softmax predictions and logits
        self.prediction, self.logits = build_output(outputs, lstm_size, num_classes)
        
        # Loss and optimizer (with gradient clipping)
        self.loss = build_loss(self.logits, self.targets, lstm_size, num_classes)
        self.optimizer = build_optimizer(self.loss, learning_rate, grad_clip)

Training

In [23]:
batch_size = 10
num_steps = 10 
lstm_size = 512
num_layers = 2
learning_rate = 0.001
keep_prob = 0.5
In [24]:
epochs = 20
# Save every N iterations
save_every_n = 200

model = CharRNN(len(vocab), batch_size=batch_size, num_steps=num_steps,
                lstm_size=lstm_size, num_layers=num_layers, 
                learning_rate=learning_rate)

saver = tf.train.Saver(max_to_keep=100)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    # Use the line below to load a checkpoint and resume training
    #saver.restore(sess, 'checkpoints/______.ckpt')
    counter = 0
    for e in range(epochs):
        # Train network
        new_state = sess.run(model.initial_state)
        loss = 0
        for x, y in get_batches(encoded, batch_size, num_steps):
            counter += 1
            start = time.time()
            feed = {model.inputs: x,
                    model.targets: y,
                    model.keep_prob: keep_prob,
                    model.initial_state: new_state}
            batch_loss, new_state, _ = sess.run([model.loss, 
                                                 model.final_state, 
                                                 model.optimizer], 
                                                 feed_dict=feed)
            
            end = time.time()
            print('Epoch: {}/{}... '.format(e+1, epochs),
                  'Training Step: {}... '.format(counter),
                  'Training loss: {:.4f}... '.format(batch_loss),
                  '{:.4f} sec/batch'.format((end-start)))
        
            if (counter % save_every_n == 0):
                saver.save(sess, "checkpoints/i{}_l{}.ckpt".format(counter, lstm_size))
    
    saver.save(sess, "checkpoints/i{}_l{}.ckpt".format(counter, lstm_size))
Epoch: 1/20...  Training Step: 1...  Training loss: 3.7620...  0.1642 sec/batch
Epoch: 1/20...  Training Step: 2...  Training loss: 3.7256...  0.1233 sec/batch
Epoch: 1/20...  Training Step: 3...  Training loss: 3.6567...  0.1201 sec/batch
Epoch: 1/20...  Training Step: 4...  Training loss: 3.4860...  0.1188 sec/batch
Epoch: 1/20...  Training Step: 5...  Training loss: 3.2803...  0.1250 sec/batch
Epoch: 1/20...  Training Step: 6...  Training loss: 3.6022...  0.1220 sec/batch
Epoch: 1/20...  Training Step: 7...  Training loss: 4.0865...  0.1183 sec/batch
Epoch: 1/20...  Training Step: 8...  Training loss: 3.1127...  0.1227 sec/batch
Epoch: 1/20...  Training Step: 9...  Training loss: 3.3907...  0.1215 sec/batch
Epoch: 1/20...  Training Step: 10...  Training loss: 3.4856...  0.1189 sec/batch
Epoch: 1/20...  Training Step: 11...  Training loss: 3.4883...  0.1214 sec/batch
Epoch: 1/20...  Training Step: 12...  Training loss: 3.3462...  0.1254 sec/batch
Epoch: 1/20...  Training Step: 13...  Training loss: 3.3203...  0.1224 sec/batch
Epoch: 1/20...  Training Step: 14...  Training loss: 3.1857...  0.1243 sec/batch
Epoch: 1/20...  Training Step: 15...  Training loss: 3.1654...  0.1260 sec/batch
Epoch: 1/20...  Training Step: 16...  Training loss: 3.3089...  0.1259 sec/batch
Epoch: 1/20...  Training Step: 17...  Training loss: 3.0935...  0.1218 sec/batch
Epoch: 1/20...  Training Step: 18...  Training loss: 3.0318...  0.1229 sec/batch
Epoch: 1/20...  Training Step: 19...  Training loss: 3.1196...  0.1216 sec/batch
Epoch: 1/20...  Training Step: 20...  Training loss: 2.9132...  0.1198 sec/batch
Epoch: 1/20...  Training Step: 21...  Training loss: 3.1271...  0.1283 sec/batch
Epoch: 1/20...  Training Step: 22...  Training loss: 3.1013...  0.1219 sec/batch
Epoch: 1/20...  Training Step: 23...  Training loss: 3.1496...  0.1247 sec/batch
Epoch: 1/20...  Training Step: 24...  Training loss: 3.0565...  0.1232 sec/batch
Epoch: 1/20...  Training Step: 25...  Training loss: 2.9929...  0.1239 sec/batch
Epoch: 1/20...  Training Step: 26...  Training loss: 3.0869...  0.1269 sec/batch
Epoch: 1/20...  Training Step: 27...  Training loss: 3.2220...  0.1225 sec/batch
Epoch: 1/20...  Training Step: 28...  Training loss: 3.1725...  0.1248 sec/batch
Epoch: 1/20...  Training Step: 29...  Training loss: 3.0635...  0.1241 sec/batch
Epoch: 1/20...  Training Step: 30...  Training loss: 3.1275...  0.1208 sec/batch
Epoch: 1/20...  Training Step: 31...  Training loss: 2.8966...  0.1237 sec/batch
Epoch: 1/20...  Training Step: 32...  Training loss: 2.9878...  0.1236 sec/batch
Epoch: 1/20...  Training Step: 33...  Training loss: 3.0631...  0.1196 sec/batch
Epoch: 1/20...  Training Step: 34...  Training loss: 3.0332...  0.1258 sec/batch
Epoch: 1/20...  Training Step: 35...  Training loss: 3.0228...  0.1243 sec/batch
Epoch: 1/20...  Training Step: 36...  Training loss: 2.9531...  0.1221 sec/batch
Epoch: 1/20...  Training Step: 37...  Training loss: 3.0487...  0.1235 sec/batch
Epoch: 1/20...  Training Step: 38...  Training loss: 2.9803...  0.1217 sec/batch
Epoch: 1/20...  Training Step: 39...  Training loss: 2.9113...  0.1199 sec/batch
Epoch: 1/20...  Training Step: 40...  Training loss: 3.1494...  0.1242 sec/batch
Epoch: 1/20...  Training Step: 41...  Training loss: 2.8697...  0.1210 sec/batch
Epoch: 1/20...  Training Step: 42...  Training loss: 2.9242...  0.1231 sec/batch
Epoch: 1/20...  Training Step: 43...  Training loss: 3.0483...  0.1218 sec/batch
Epoch: 1/20...  Training Step: 44...  Training loss: 2.8306...  0.1232 sec/batch
Epoch: 1/20...  Training Step: 45...  Training loss: 2.8007...  0.1192 sec/batch
Epoch: 1/20...  Training Step: 46...  Training loss: 2.9705...  0.1237 sec/batch
Epoch: 1/20...  Training Step: 47...  Training loss: 2.8790...  0.1243 sec/batch
Epoch: 1/20...  Training Step: 48...  Training loss: 2.9677...  0.1245 sec/batch
Epoch: 1/20...  Training Step: 49...  Training loss: 2.8278...  0.1245 sec/batch
Epoch: 1/20...  Training Step: 50...  Training loss: 3.0830...  0.1210 sec/batch
Epoch: 1/20...  Training Step: 51...  Training loss: 2.8571...  0.1262 sec/batch
Epoch: 1/20...  Training Step: 52...  Training loss: 2.9284...  0.1210 sec/batch
Epoch: 1/20...  Training Step: 53...  Training loss: 3.1083...  0.1261 sec/batch
Epoch: 1/20...  Training Step: 54...  Training loss: 3.0061...  0.1195 sec/batch
Epoch: 1/20...  Training Step: 55...  Training loss: 2.9092...  0.1215 sec/batch
Epoch: 1/20...  Training Step: 56...  Training loss: 2.9404...  0.1235 sec/batch
Epoch: 1/20...  Training Step: 57...  Training loss: 2.9676...  0.1235 sec/batch
Epoch: 1/20...  Training Step: 58...  Training loss: 2.9035...  0.1225 sec/batch
Epoch: 1/20...  Training Step: 59...  Training loss: 2.8589...  0.1242 sec/batch
Epoch: 1/20...  Training Step: 60...  Training loss: 2.8798...  0.1243 sec/batch
Epoch: 1/20...  Training Step: 61...  Training loss: 2.9282...  0.1190 sec/batch
Epoch: 1/20...  Training Step: 62...  Training loss: 2.9433...  0.1257 sec/batch
Epoch: 1/20...  Training Step: 63...  Training loss: 2.8242...  0.1214 sec/batch
Epoch: 1/20...  Training Step: 64...  Training loss: 2.8128...  0.1243 sec/batch
Epoch: 1/20...  Training Step: 65...  Training loss: 2.8223...  0.1235 sec/batch
Epoch: 1/20...  Training Step: 66...  Training loss: 2.9849...  0.1269 sec/batch
Epoch: 1/20...  Training Step: 67...  Training loss: 2.9266...  0.1241 sec/batch
Epoch: 1/20...  Training Step: 68...  Training loss: 2.9161...  0.1243 sec/batch
Epoch: 1/20...  Training Step: 69...  Training loss: 2.8079...  0.1184 sec/batch
Epoch: 1/20...  Training Step: 70...  Training loss: 2.8682...  0.1167 sec/batch
Epoch: 1/20...  Training Step: 71...  Training loss: 2.8148...  0.1274 sec/batch
Epoch: 1/20...  Training Step: 72...  Training loss: 2.9319...  0.1277 sec/batch
Epoch: 1/20...  Training Step: 73...  Training loss: 2.8397...  0.1169 sec/batch
Epoch: 1/20...  Training Step: 74...  Training loss: 2.8626...  0.1186 sec/batch
Epoch: 1/20...  Training Step: 75...  Training loss: 3.0676...  0.1178 sec/batch
Epoch: 1/20...  Training Step: 76...  Training loss: 2.9233...  0.1202 sec/batch
Epoch: 1/20...  Training Step: 77...  Training loss: 2.7613...  0.1244 sec/batch
Epoch: 1/20...  Training Step: 78...  Training loss: 2.9925...  0.1241 sec/batch
Epoch: 1/20...  Training Step: 79...  Training loss: 2.7884...  0.1273 sec/batch
Epoch: 1/20...  Training Step: 80...  Training loss: 2.7235...  0.1301 sec/batch
Epoch: 1/20...  Training Step: 81...  Training loss: 2.9118...  0.1195 sec/batch
Epoch: 1/20...  Training Step: 82...  Training loss: 2.9589...  0.1243 sec/batch
Epoch: 1/20...  Training Step: 83...  Training loss: 2.7112...  0.1238 sec/batch
Epoch: 1/20...  Training Step: 84...  Training loss: 2.8291...  0.1217 sec/batch
Epoch: 1/20...  Training Step: 85...  Training loss: 2.7706...  0.1315 sec/batch
Epoch: 1/20...  Training Step: 86...  Training loss: 2.6703...  0.1237 sec/batch
Epoch: 1/20...  Training Step: 87...  Training loss: 2.7684...  0.1229 sec/batch
Epoch: 1/20...  Training Step: 88...  Training loss: 2.8455...  0.1235 sec/batch
Epoch: 1/20...  Training Step: 89...  Training loss: 2.9805...  0.1220 sec/batch
Epoch: 1/20...  Training Step: 90...  Training loss: 2.6880...  0.1194 sec/batch
Epoch: 1/20...  Training Step: 91...  Training loss: 2.8288...  0.1221 sec/batch
Epoch: 1/20...  Training Step: 92...  Training loss: 2.8261...  0.1208 sec/batch
Epoch: 1/20...  Training Step: 93...  Training loss: 2.6363...  0.1211 sec/batch
Epoch: 1/20...  Training Step: 94...  Training loss: 2.8438...  0.1238 sec/batch
Epoch: 1/20...  Training Step: 95...  Training loss: 2.5802...  0.1219 sec/batch
Epoch: 1/20...  Training Step: 96...  Training loss: 2.7704...  0.1238 sec/batch
Epoch: 1/20...  Training Step: 97...  Training loss: 2.9868...  0.1239 sec/batch
Epoch: 1/20...  Training Step: 98...  Training loss: 2.8030...  0.1260 sec/batch
Epoch: 1/20...  Training Step: 99...  Training loss: 2.6999...  0.1219 sec/batch
Epoch: 1/20...  Training Step: 100...  Training loss: 2.7008...  0.1196 sec/batch
Epoch: 1/20...  Training Step: 101...  Training loss: 2.7749...  0.1243 sec/batch
Epoch: 1/20...  Training Step: 102...  Training loss: 2.8638...  0.1252 sec/batch
Epoch: 1/20...  Training Step: 103...  Training loss: 2.7618...  0.1239 sec/batch
Epoch: 1/20...  Training Step: 104...  Training loss: 2.6063...  0.1229 sec/batch
Epoch: 1/20...  Training Step: 105...  Training loss: 2.6149...  0.1242 sec/batch
Epoch: 1/20...  Training Step: 106...  Training loss: 2.9139...  0.1234 sec/batch
Epoch: 1/20...  Training Step: 107...  Training loss: 2.9257...  0.1232 sec/batch
Epoch: 1/20...  Training Step: 108...  Training loss: 2.9118...  0.1231 sec/batch
Epoch: 1/20...  Training Step: 109...  Training loss: 2.7397...  0.1236 sec/batch
Epoch: 1/20...  Training Step: 110...  Training loss: 2.7949...  0.1237 sec/batch
Epoch: 1/20...  Training Step: 111...  Training loss: 2.7819...  0.1240 sec/batch
Epoch: 1/20...  Training Step: 112...  Training loss: 2.7028...  0.1237 sec/batch
Epoch: 1/20...  Training Step: 113...  Training loss: 2.6550...  0.1264 sec/batch
Epoch: 1/20...  Training Step: 114...  Training loss: 2.8067...  0.1234 sec/batch
Epoch: 1/20...  Training Step: 115...  Training loss: 2.6431...  0.1203 sec/batch
Epoch: 1/20...  Training Step: 116...  Training loss: 2.5301...  0.1255 sec/batch
Epoch: 1/20...  Training Step: 117...  Training loss: 2.6822...  0.1195 sec/batch
Epoch: 1/20...  Training Step: 118...  Training loss: 2.6422...  0.1225 sec/batch
Epoch: 1/20...  Training Step: 119...  Training loss: 2.5233...  0.1210 sec/batch
Epoch: 1/20...  Training Step: 120...  Training loss: 2.3074...  0.1192 sec/batch
Epoch: 1/20...  Training Step: 121...  Training loss: 2.4839...  0.1240 sec/batch
Epoch: 1/20...  Training Step: 122...  Training loss: 2.5947...  0.1242 sec/batch
Epoch: 1/20...  Training Step: 123...  Training loss: 2.6883...  0.1250 sec/batch
Epoch: 1/20...  Training Step: 124...  Training loss: 2.4409...  0.1222 sec/batch
Epoch: 1/20...  Training Step: 125...  Training loss: 2.5898...  0.1205 sec/batch
Epoch: 1/20...  Training Step: 126...  Training loss: 2.3660...  0.1183 sec/batch
Epoch: 1/20...  Training Step: 127...  Training loss: 2.3348...  0.1216 sec/batch
Epoch: 1/20...  Training Step: 128...  Training loss: 2.5245...  0.1231 sec/batch
Epoch: 1/20...  Training Step: 129...  Training loss: 2.5923...  0.1235 sec/batch
Epoch: 1/20...  Training Step: 130...  Training loss: 2.4242...  0.1249 sec/batch
Epoch: 1/20...  Training Step: 131...  Training loss: 2.8288...  0.1198 sec/batch
Epoch: 1/20...  Training Step: 132...  Training loss: 2.3149...  0.1204 sec/batch
Epoch: 1/20...  Training Step: 133...  Training loss: 2.3525...  0.1230 sec/batch
Epoch: 1/20...  Training Step: 134...  Training loss: 2.3290...  0.1236 sec/batch
Epoch: 1/20...  Training Step: 135...  Training loss: 2.1879...  0.1243 sec/batch
Epoch: 1/20...  Training Step: 136...  Training loss: 2.1351...  0.1202 sec/batch
Epoch: 1/20...  Training Step: 137...  Training loss: 2.4319...  0.1197 sec/batch
Epoch: 1/20...  Training Step: 138...  Training loss: 2.4167...  0.1169 sec/batch
Epoch: 1/20...  Training Step: 139...  Training loss: 2.2640...  0.1190 sec/batch
Epoch: 1/20...  Training Step: 140...  Training loss: 2.3276...  0.1220 sec/batch
Epoch: 1/20...  Training Step: 141...  Training loss: 2.4944...  0.1236 sec/batch
Epoch: 1/20...  Training Step: 142...  Training loss: 2.3255...  0.1240 sec/batch
Epoch: 1/20...  Training Step: 143...  Training loss: 2.3086...  0.1197 sec/batch
Epoch: 1/20...  Training Step: 144...  Training loss: 2.2411...  0.1262 sec/batch
Epoch: 1/20...  Training Step: 145...  Training loss: 2.3505...  0.1224 sec/batch
Epoch: 1/20...  Training Step: 146...  Training loss: 2.2917...  0.1205 sec/batch
Epoch: 1/20...  Training Step: 147...  Training loss: 2.4639...  0.1219 sec/batch
Epoch: 1/20...  Training Step: 148...  Training loss: 2.0835...  0.1224 sec/batch
Epoch: 1/20...  Training Step: 149...  Training loss: 2.2542...  0.1188 sec/batch
Epoch: 1/20...  Training Step: 150...  Training loss: 2.3682...  0.1219 sec/batch
Epoch: 1/20...  Training Step: 151...  Training loss: 2.2879...  0.1237 sec/batch
Epoch: 1/20...  Training Step: 152...  Training loss: 2.2459...  0.1166 sec/batch
Epoch: 1/20...  Training Step: 153...  Training loss: 2.4501...  0.1250 sec/batch
Epoch: 1/20...  Training Step: 154...  Training loss: 2.3604...  0.1238 sec/batch
Epoch: 1/20...  Training Step: 155...  Training loss: 2.2573...  0.1251 sec/batch
Epoch: 1/20...  Training Step: 156...  Training loss: 2.1554...  0.1222 sec/batch
Epoch: 1/20...  Training Step: 157...  Training loss: 1.9763...  0.1264 sec/batch
Epoch: 1/20...  Training Step: 158...  Training loss: 2.0656...  0.1255 sec/batch
Epoch: 1/20...  Training Step: 159...  Training loss: 2.0456...  0.1246 sec/batch
Epoch: 1/20...  Training Step: 160...  Training loss: 2.1463...  0.1206 sec/batch
Epoch: 1/20...  Training Step: 161...  Training loss: 2.4674...  0.1221 sec/batch
Epoch: 1/20...  Training Step: 162...  Training loss: 2.1990...  0.1213 sec/batch
Epoch: 1/20...  Training Step: 163...  Training loss: 2.1825...  0.1241 sec/batch
Epoch: 1/20...  Training Step: 164...  Training loss: 2.0409...  0.1204 sec/batch
Epoch: 1/20...  Training Step: 165...  Training loss: 2.1511...  0.1212 sec/batch
Epoch: 1/20...  Training Step: 166...  Training loss: 2.0999...  0.1215 sec/batch
Epoch: 1/20...  Training Step: 167...  Training loss: 2.0156...  0.1248 sec/batch
Epoch: 1/20...  Training Step: 168...  Training loss: 2.2786...  0.1198 sec/batch
Epoch: 1/20...  Training Step: 169...  Training loss: 2.2583...  0.1214 sec/batch
Epoch: 1/20...  Training Step: 170...  Training loss: 2.1629...  0.1261 sec/batch
Epoch: 1/20...  Training Step: 171...  Training loss: 2.1873...  0.1271 sec/batch
Epoch: 1/20...  Training Step: 172...  Training loss: 2.1109...  0.1211 sec/batch
Epoch: 1/20...  Training Step: 173...  Training loss: 2.2487...  0.1229 sec/batch
Epoch: 1/20...  Training Step: 174...  Training loss: 2.1224...  0.1235 sec/batch
Epoch: 1/20...  Training Step: 175...  Training loss: 2.1910...  0.1261 sec/batch
Epoch: 1/20...  Training Step: 176...  Training loss: 1.9140...  0.1263 sec/batch
Epoch: 1/20...  Training Step: 177...  Training loss: 1.9269...  0.1381 sec/batch
Epoch: 1/20...  Training Step: 178...  Training loss: 2.2543...  0.1342 sec/batch
Epoch: 1/20...  Training Step: 179...  Training loss: 2.0510...  0.1197 sec/batch
Epoch: 1/20...  Training Step: 180...  Training loss: 2.2251...  0.1231 sec/batch
Epoch: 1/20...  Training Step: 181...  Training loss: 2.0182...  0.1236 sec/batch
Epoch: 1/20...  Training Step: 182...  Training loss: 2.1679...  0.1253 sec/batch
Epoch: 1/20...  Training Step: 183...  Training loss: 2.1451...  0.1212 sec/batch
Epoch: 1/20...  Training Step: 184...  Training loss: 2.1111...  0.1243 sec/batch
Epoch: 1/20...  Training Step: 185...  Training loss: 2.2130...  0.1185 sec/batch
Epoch: 1/20...  Training Step: 186...  Training loss: 1.8731...  0.1240 sec/batch
Epoch: 1/20...  Training Step: 187...  Training loss: 2.0875...  0.1259 sec/batch
Epoch: 1/20...  Training Step: 188...  Training loss: 1.7481...  0.1211 sec/batch
Epoch: 1/20...  Training Step: 189...  Training loss: 2.1269...  0.1285 sec/batch
Epoch: 1/20...  Training Step: 190...  Training loss: 1.8619...  0.1321 sec/batch
Epoch: 1/20...  Training Step: 191...  Training loss: 1.9233...  0.1252 sec/batch
Epoch: 1/20...  Training Step: 192...  Training loss: 2.0531...  0.1201 sec/batch
Epoch: 1/20...  Training Step: 193...  Training loss: 1.9795...  0.1364 sec/batch
Epoch: 1/20...  Training Step: 194...  Training loss: 1.8791...  0.1308 sec/batch
Epoch: 1/20...  Training Step: 195...  Training loss: 2.2336...  0.1297 sec/batch
Epoch: 1/20...  Training Step: 196...  Training loss: 2.0534...  0.1352 sec/batch
Epoch: 1/20...  Training Step: 197...  Training loss: 1.8241...  0.1365 sec/batch
Epoch: 1/20...  Training Step: 198...  Training loss: 2.0393...  0.1306 sec/batch
Epoch: 1/20...  Training Step: 199...  Training loss: 1.7450...  0.1350 sec/batch
Epoch: 1/20...  Training Step: 200...  Training loss: 1.7357...  0.1320 sec/batch
Epoch: 1/20...  Training Step: 201...  Training loss: 1.9492...  0.1283 sec/batch
Epoch: 1/20...  Training Step: 202...  Training loss: 1.9008...  0.1301 sec/batch
Epoch: 1/20...  Training Step: 203...  Training loss: 1.8058...  0.1433 sec/batch
Epoch: 1/20...  Training Step: 204...  Training loss: 2.0252...  0.1273 sec/batch
Epoch: 1/20...  Training Step: 205...  Training loss: 1.7999...  0.1380 sec/batch
Epoch: 1/20...  Training Step: 206...  Training loss: 1.7063...  0.1327 sec/batch
Epoch: 1/20...  Training Step: 207...  Training loss: 1.8999...  0.1324 sec/batch
Epoch: 1/20...  Training Step: 208...  Training loss: 1.8881...  0.1335 sec/batch
Epoch: 1/20...  Training Step: 209...  Training loss: 1.8977...  0.1328 sec/batch
Epoch: 1/20...  Training Step: 210...  Training loss: 2.0043...  0.1271 sec/batch
Epoch: 1/20...  Training Step: 211...  Training loss: 1.5164...  0.1328 sec/batch
Epoch: 1/20...  Training Step: 212...  Training loss: 2.0064...  0.1357 sec/batch
Epoch: 1/20...  Training Step: 213...  Training loss: 1.9792...  0.1264 sec/batch
Epoch: 1/20...  Training Step: 214...  Training loss: 1.7379...  0.1256 sec/batch
Epoch: 1/20...  Training Step: 215...  Training loss: 2.1222...  0.1225 sec/batch
Epoch: 1/20...  Training Step: 216...  Training loss: 1.7652...  0.1208 sec/batch
Epoch: 1/20...  Training Step: 217...  Training loss: 2.0598...  0.1341 sec/batch
Epoch: 1/20...  Training Step: 218...  Training loss: 1.9019...  0.1372 sec/batch
Epoch: 1/20...  Training Step: 219...  Training loss: 2.0017...  0.1505 sec/batch
Epoch: 1/20...  Training Step: 220...  Training loss: 1.9209...  0.1393 sec/batch
Epoch: 1/20...  Training Step: 221...  Training loss: 1.8711...  0.1373 sec/batch
Epoch: 1/20...  Training Step: 222...  Training loss: 2.1657...  0.1415 sec/batch
Epoch: 1/20...  Training Step: 223...  Training loss: 2.0872...  0.1364 sec/batch
Epoch: 1/20...  Training Step: 224...  Training loss: 2.0484...  0.1354 sec/batch
Epoch: 1/20...  Training Step: 225...  Training loss: 1.9232...  0.1396 sec/batch
Epoch: 1/20...  Training Step: 226...  Training loss: 2.0963...  0.1369 sec/batch
Epoch: 1/20...  Training Step: 227...  Training loss: 2.0884...  0.1375 sec/batch
Epoch: 1/20...  Training Step: 228...  Training loss: 1.9220...  0.1246 sec/batch
Epoch: 1/20...  Training Step: 229...  Training loss: 1.9084...  0.1239 sec/batch
Epoch: 1/20...  Training Step: 230...  Training loss: 2.1362...  0.1292 sec/batch
Epoch: 1/20...  Training Step: 231...  Training loss: 1.8550...  0.1276 sec/batch
Epoch: 1/20...  Training Step: 232...  Training loss: 1.8892...  0.1224 sec/batch
Epoch: 1/20...  Training Step: 233...  Training loss: 2.2566...  0.1180 sec/batch
Epoch: 1/20...  Training Step: 234...  Training loss: 1.9981...  0.1170 sec/batch
Epoch: 1/20...  Training Step: 235...  Training loss: 2.0485...  0.1258 sec/batch
Epoch: 1/20...  Training Step: 236...  Training loss: 1.8156...  0.1232 sec/batch
Epoch: 1/20...  Training Step: 237...  Training loss: 2.1858...  0.1201 sec/batch
Epoch: 1/20...  Training Step: 238...  Training loss: 1.8726...  0.1269 sec/batch
Epoch: 1/20...  Training Step: 239...  Training loss: 1.9511...  0.1295 sec/batch
Epoch: 1/20...  Training Step: 240...  Training loss: 2.1758...  0.1268 sec/batch
Epoch: 1/20...  Training Step: 241...  Training loss: 1.9088...  0.1327 sec/batch
Epoch: 1/20...  Training Step: 242...  Training loss: 2.0303...  0.1412 sec/batch
Epoch: 1/20...  Training Step: 243...  Training loss: 2.1431...  0.1331 sec/batch
Epoch: 1/20...  Training Step: 244...  Training loss: 1.8868...  0.1342 sec/batch
Epoch: 1/20...  Training Step: 245...  Training loss: 1.8554...  0.1274 sec/batch
Epoch: 1/20...  Training Step: 246...  Training loss: 1.6133...  0.1356 sec/batch
Epoch: 1/20...  Training Step: 247...  Training loss: 1.8370...  0.1277 sec/batch
Epoch: 1/20...  Training Step: 248...  Training loss: 1.9527...  0.1214 sec/batch
Epoch: 1/20...  Training Step: 249...  Training loss: 1.8062...  0.1156 sec/batch
Epoch: 1/20...  Training Step: 250...  Training loss: 1.7428...  0.1226 sec/batch
Epoch: 1/20...  Training Step: 251...  Training loss: 1.9264...  0.1285 sec/batch
Epoch: 1/20...  Training Step: 252...  Training loss: 1.8250...  0.1279 sec/batch
Epoch: 1/20...  Training Step: 253...  Training loss: 1.7278...  0.1298 sec/batch
Epoch: 1/20...  Training Step: 254...  Training loss: 2.1920...  0.1351 sec/batch
Epoch: 1/20...  Training Step: 255...  Training loss: 1.8406...  0.1345 sec/batch
Epoch: 1/20...  Training Step: 256...  Training loss: 1.7293...  0.1367 sec/batch
Epoch: 1/20...  Training Step: 257...  Training loss: 1.9573...  0.1311 sec/batch
Epoch: 1/20...  Training Step: 258...  Training loss: 1.9288...  0.1309 sec/batch
Epoch: 1/20...  Training Step: 259...  Training loss: 1.9979...  0.1295 sec/batch
Epoch: 1/20...  Training Step: 260...  Training loss: 1.9598...  0.1355 sec/batch
Epoch: 1/20...  Training Step: 261...  Training loss: 1.9618...  0.1301 sec/batch
Epoch: 1/20...  Training Step: 262...  Training loss: 1.9427...  0.1299 sec/batch
Epoch: 1/20...  Training Step: 263...  Training loss: 1.9407...  0.1284 sec/batch
Epoch: 1/20...  Training Step: 264...  Training loss: 1.9523...  0.1208 sec/batch
Epoch: 1/20...  Training Step: 265...  Training loss: 1.9571...  0.1206 sec/batch
Epoch: 1/20...  Training Step: 266...  Training loss: 1.8152...  0.1291 sec/batch
Epoch: 1/20...  Training Step: 267...  Training loss: 2.0836...  0.1350 sec/batch
Epoch: 1/20...  Training Step: 268...  Training loss: 2.0145...  0.1366 sec/batch
Epoch: 1/20...  Training Step: 269...  Training loss: 1.9578...  0.1317 sec/batch
Epoch: 1/20...  Training Step: 270...  Training loss: 2.1346...  0.1203 sec/batch
Epoch: 1/20...  Training Step: 271...  Training loss: 1.7738...  0.1267 sec/batch
Epoch: 1/20...  Training Step: 272...  Training loss: 2.0176...  0.1273 sec/batch
Epoch: 1/20...  Training Step: 273...  Training loss: 1.9272...  0.1310 sec/batch
Epoch: 1/20...  Training Step: 274...  Training loss: 1.8668...  0.1284 sec/batch
Epoch: 1/20...  Training Step: 275...  Training loss: 1.9420...  0.1279 sec/batch
Epoch: 1/20...  Training Step: 276...  Training loss: 1.9878...  0.1259 sec/batch
Epoch: 1/20...  Training Step: 277...  Training loss: 2.0064...  0.1336 sec/batch
Epoch: 1/20...  Training Step: 278...  Training loss: 2.0244...  0.1296 sec/batch
Epoch: 1/20...  Training Step: 279...  Training loss: 1.7891...  0.1286 sec/batch
Epoch: 1/20...  Training Step: 280...  Training loss: 1.7947...  0.1274 sec/batch
Epoch: 1/20...  Training Step: 281...  Training loss: 1.6625...  0.1232 sec/batch
Epoch: 1/20...  Training Step: 282...  Training loss: 1.7442...  0.1244 sec/batch
Epoch: 1/20...  Training Step: 283...  Training loss: 1.6750...  0.1179 sec/batch
Epoch: 1/20...  Training Step: 284...  Training loss: 1.8013...  0.1205 sec/batch
Epoch: 1/20...  Training Step: 285...  Training loss: 1.6533...  0.1234 sec/batch
Epoch: 1/20...  Training Step: 286...  Training loss: 1.8193...  0.1230 sec/batch
Epoch: 1/20...  Training Step: 287...  Training loss: 1.6664...  0.1203 sec/batch
Epoch: 1/20...  Training Step: 288...  Training loss: 1.8409...  0.1194 sec/batch
Epoch: 1/20...  Training Step: 289...  Training loss: 2.0103...  0.1231 sec/batch
Epoch: 1/20...  Training Step: 290...  Training loss: 1.8769...  0.1256 sec/batch
Epoch: 1/20...  Training Step: 291...  Training loss: 1.9272...  0.1230 sec/batch
Epoch: 1/20...  Training Step: 292...  Training loss: 1.7780...  0.1235 sec/batch
Epoch: 1/20...  Training Step: 293...  Training loss: 1.7624...  0.1267 sec/batch
Epoch: 1/20...  Training Step: 294...  Training loss: 1.9197...  0.1201 sec/batch
Epoch: 1/20...  Training Step: 295...  Training loss: 1.8272...  0.1339 sec/batch
Epoch: 1/20...  Training Step: 296...  Training loss: 2.0636...  0.1272 sec/batch
Epoch: 1/20...  Training Step: 297...  Training loss: 1.8698...  0.1327 sec/batch
Epoch: 1/20...  Training Step: 298...  Training loss: 1.8798...  0.1313 sec/batch
Epoch: 1/20...  Training Step: 299...  Training loss: 1.8173...  0.1287 sec/batch
Epoch: 1/20...  Training Step: 300...  Training loss: 1.8802...  0.1297 sec/batch
Epoch: 1/20...  Training Step: 301...  Training loss: 1.8425...  0.1284 sec/batch
Epoch: 1/20...  Training Step: 302...  Training loss: 1.7539...  0.1234 sec/batch
Epoch: 1/20...  Training Step: 303...  Training loss: 1.6652...  0.1222 sec/batch
Epoch: 1/20...  Training Step: 304...  Training loss: 1.9984...  0.1201 sec/batch
Epoch: 1/20...  Training Step: 305...  Training loss: 1.6832...  0.1223 sec/batch
Epoch: 1/20...  Training Step: 306...  Training loss: 2.0259...  0.1216 sec/batch
Epoch: 1/20...  Training Step: 307...  Training loss: 1.7023...  0.1201 sec/batch
Epoch: 1/20...  Training Step: 308...  Training loss: 2.2779...  0.1259 sec/batch
Epoch: 1/20...  Training Step: 309...  Training loss: 1.7804...  0.1248 sec/batch
Epoch: 1/20...  Training Step: 310...  Training loss: 2.0357...  0.1211 sec/batch
Epoch: 1/20...  Training Step: 311...  Training loss: 1.9977...  0.1221 sec/batch
Epoch: 1/20...  Training Step: 312...  Training loss: 1.8353...  0.1237 sec/batch
Epoch: 1/20...  Training Step: 313...  Training loss: 1.9002...  0.1264 sec/batch
Epoch: 1/20...  Training Step: 314...  Training loss: 1.8166...  0.1167 sec/batch
Epoch: 1/20...  Training Step: 315...  Training loss: 1.5815...  0.1188 sec/batch
Epoch: 1/20...  Training Step: 316...  Training loss: 1.8202...  0.1207 sec/batch
Epoch: 1/20...  Training Step: 317...  Training loss: 1.9972...  0.1240 sec/batch
Epoch: 1/20...  Training Step: 318...  Training loss: 1.6601...  0.1254 sec/batch
Epoch: 1/20...  Training Step: 319...  Training loss: 1.8317...  0.1218 sec/batch
Epoch: 1/20...  Training Step: 320...  Training loss: 1.7362...  0.1200 sec/batch
Epoch: 1/20...  Training Step: 321...  Training loss: 1.7212...  0.1241 sec/batch
Epoch: 1/20...  Training Step: 322...  Training loss: 1.9143...  0.1230 sec/batch
Epoch: 1/20...  Training Step: 323...  Training loss: 1.5693...  0.1203 sec/batch
Epoch: 1/20...  Training Step: 324...  Training loss: 1.5907...  0.1221 sec/batch
Epoch: 1/20...  Training Step: 325...  Training loss: 1.5188...  0.1176 sec/batch
Epoch: 1/20...  Training Step: 326...  Training loss: 1.7385...  0.1246 sec/batch
Epoch: 1/20...  Training Step: 327...  Training loss: 1.7668...  0.1233 sec/batch
Epoch: 1/20...  Training Step: 328...  Training loss: 1.7833...  0.1234 sec/batch
Epoch: 1/20...  Training Step: 329...  Training loss: 1.8986...  0.1213 sec/batch
Epoch: 1/20...  Training Step: 330...  Training loss: 1.7910...  0.1223 sec/batch
Epoch: 1/20...  Training Step: 331...  Training loss: 1.7676...  0.1231 sec/batch
Epoch: 1/20...  Training Step: 332...  Training loss: 1.9112...  0.1220 sec/batch
Epoch: 1/20...  Training Step: 333...  Training loss: 1.8434...  0.1257 sec/batch
Epoch: 1/20...  Training Step: 334...  Training loss: 1.7872...  0.1177 sec/batch
Epoch: 1/20...  Training Step: 335...  Training loss: 1.9730...  0.1222 sec/batch
Epoch: 1/20...  Training Step: 336...  Training loss: 1.9150...  0.1211 sec/batch
Epoch: 1/20...  Training Step: 337...  Training loss: 1.8544...  0.1211 sec/batch
Epoch: 1/20...  Training Step: 338...  Training loss: 1.9315...  0.1236 sec/batch
Epoch: 1/20...  Training Step: 339...  Training loss: 1.8512...  0.1244 sec/batch
Epoch: 1/20...  Training Step: 340...  Training loss: 1.7988...  0.1212 sec/batch
Epoch: 1/20...  Training Step: 341...  Training loss: 1.6037...  0.1270 sec/batch
Epoch: 1/20...  Training Step: 342...  Training loss: 1.8011...  0.1232 sec/batch
Epoch: 1/20...  Training Step: 343...  Training loss: 1.5731...  0.1230 sec/batch
Epoch: 1/20...  Training Step: 344...  Training loss: 1.9590...  0.1202 sec/batch
Epoch: 1/20...  Training Step: 345...  Training loss: 1.8489...  0.1183 sec/batch
Epoch: 1/20...  Training Step: 346...  Training loss: 1.6802...  0.1263 sec/batch
Epoch: 1/20...  Training Step: 347...  Training loss: 1.5646...  0.1212 sec/batch
Epoch: 1/20...  Training Step: 348...  Training loss: 1.9530...  0.1227 sec/batch
Epoch: 1/20...  Training Step: 349...  Training loss: 1.7007...  0.1255 sec/batch
Epoch: 1/20...  Training Step: 350...  Training loss: 1.8760...  0.1199 sec/batch
Epoch: 1/20...  Training Step: 351...  Training loss: 1.7809...  0.1218 sec/batch
Epoch: 1/20...  Training Step: 352...  Training loss: 1.8105...  0.1218 sec/batch
Epoch: 1/20...  Training Step: 353...  Training loss: 1.6809...  0.1221 sec/batch
Epoch: 1/20...  Training Step: 354...  Training loss: 1.4776...  0.1270 sec/batch
Epoch: 1/20...  Training Step: 355...  Training loss: 2.0394...  0.1206 sec/batch
Epoch: 1/20...  Training Step: 356...  Training loss: 1.7149...  0.1252 sec/batch
Epoch: 1/20...  Training Step: 357...  Training loss: 1.5545...  0.1213 sec/batch
Epoch: 1/20...  Training Step: 358...  Training loss: 1.8659...  0.1208 sec/batch
Epoch: 1/20...  Training Step: 359...  Training loss: 1.7484...  0.1211 sec/batch
Epoch: 1/20...  Training Step: 360...  Training loss: 1.6083...  0.1229 sec/batch
Epoch: 1/20...  Training Step: 361...  Training loss: 1.8397...  0.1193 sec/batch
Epoch: 1/20...  Training Step: 362...  Training loss: 1.7802...  0.1207 sec/batch
Epoch: 1/20...  Training Step: 363...  Training loss: 1.8073...  0.1211 sec/batch
Epoch: 1/20...  Training Step: 364...  Training loss: 1.7364...  0.1175 sec/batch
Epoch: 1/20...  Training Step: 365...  Training loss: 1.7202...  0.1268 sec/batch
Epoch: 1/20...  Training Step: 366...  Training loss: 1.8243...  0.1281 sec/batch
Epoch: 1/20...  Training Step: 367...  Training loss: 1.6731...  0.1288 sec/batch
Epoch: 1/20...  Training Step: 368...  Training loss: 1.7976...  0.1260 sec/batch
Epoch: 1/20...  Training Step: 369...  Training loss: 1.7247...  0.1251 sec/batch
Epoch: 1/20...  Training Step: 370...  Training loss: 1.7413...  0.1244 sec/batch
Epoch: 1/20...  Training Step: 371...  Training loss: 1.8750...  0.1243 sec/batch
Epoch: 1/20...  Training Step: 372...  Training loss: 1.8337...  0.1234 sec/batch
Epoch: 1/20...  Training Step: 373...  Training loss: 1.8835...  0.1235 sec/batch
Epoch: 1/20...  Training Step: 374...  Training loss: 2.0701...  0.1277 sec/batch
Epoch: 1/20...  Training Step: 375...  Training loss: 1.7518...  0.1290 sec/batch
Epoch: 1/20...  Training Step: 376...  Training loss: 1.8389...  0.1459 sec/batch
Epoch: 1/20...  Training Step: 377...  Training loss: 1.7932...  0.1210 sec/batch
Epoch: 1/20...  Training Step: 378...  Training loss: 1.8923...  0.1196 sec/batch
Epoch: 1/20...  Training Step: 379...  Training loss: 1.6982...  0.1236 sec/batch
Epoch: 1/20...  Training Step: 380...  Training loss: 1.8352...  0.1249 sec/batch
Epoch: 1/20...  Training Step: 381...  Training loss: 1.7485...  0.1255 sec/batch
Epoch: 1/20...  Training Step: 382...  Training loss: 1.7305...  0.1200 sec/batch
Epoch: 1/20...  Training Step: 383...  Training loss: 1.7531...  0.1224 sec/batch
Epoch: 1/20...  Training Step: 384...  Training loss: 1.7135...  0.1216 sec/batch
Epoch: 1/20...  Training Step: 385...  Training loss: 1.5366...  0.1196 sec/batch
Epoch: 1/20...  Training Step: 386...  Training loss: 1.8873...  0.1195 sec/batch
Epoch: 1/20...  Training Step: 387...  Training loss: 1.8128...  0.1238 sec/batch
Epoch: 1/20...  Training Step: 388...  Training loss: 1.7778...  0.1235 sec/batch
Epoch: 1/20...  Training Step: 389...  Training loss: 1.7313...  0.1230 sec/batch
Epoch: 1/20...  Training Step: 390...  Training loss: 1.7203...  0.1227 sec/batch
Epoch: 1/20...  Training Step: 391...  Training loss: 1.6142...  0.1233 sec/batch
Epoch: 1/20...  Training Step: 392...  Training loss: 1.6548...  0.1216 sec/batch
Epoch: 1/20...  Training Step: 393...  Training loss: 1.6818...  0.1181 sec/batch
Epoch: 1/20...  Training Step: 394...  Training loss: 1.8411...  0.1219 sec/batch
Epoch: 1/20...  Training Step: 395...  Training loss: 1.5136...  0.1249 sec/batch
Epoch: 1/20...  Training Step: 396...  Training loss: 1.8199...  0.1205 sec/batch
Epoch: 1/20...  Training Step: 397...  Training loss: 1.5027...  0.1245 sec/batch
Epoch: 1/20...  Training Step: 398...  Training loss: 1.4402...  0.1178 sec/batch
Epoch: 1/20...  Training Step: 399...  Training loss: 1.6574...  0.1253 sec/batch
Epoch: 1/20...  Training Step: 400...  Training loss: 1.5007...  0.1219 sec/batch
Epoch: 1/20...  Training Step: 401...  Training loss: 1.8357...  0.1199 sec/batch
Epoch: 1/20...  Training Step: 402...  Training loss: 1.7633...  0.1200 sec/batch
Epoch: 1/20...  Training Step: 403...  Training loss: 1.6854...  0.1191 sec/batch
Epoch: 1/20...  Training Step: 404...  Training loss: 1.8063...  0.1216 sec/batch
Epoch: 1/20...  Training Step: 405...  Training loss: 1.6896...  0.1205 sec/batch
Epoch: 1/20...  Training Step: 406...  Training loss: 1.7406...  0.1259 sec/batch
Epoch: 1/20...  Training Step: 407...  Training loss: 1.8450...  0.1270 sec/batch
Epoch: 1/20...  Training Step: 408...  Training loss: 1.9894...  0.1220 sec/batch
Epoch: 1/20...  Training Step: 409...  Training loss: 1.7480...  0.1221 sec/batch
Epoch: 1/20...  Training Step: 410...  Training loss: 1.7030...  0.1210 sec/batch
Epoch: 1/20...  Training Step: 411...  Training loss: 1.7146...  0.1219 sec/batch
Epoch: 1/20...  Training Step: 412...  Training loss: 1.7491...  0.1209 sec/batch
Epoch: 1/20...  Training Step: 413...  Training loss: 1.6690...  0.1186 sec/batch
Epoch: 1/20...  Training Step: 414...  Training loss: 1.6483...  0.1254 sec/batch
Epoch: 1/20...  Training Step: 415...  Training loss: 2.0953...  0.1253 sec/batch
Epoch: 1/20...  Training Step: 416...  Training loss: 2.0327...  0.1216 sec/batch
Epoch: 1/20...  Training Step: 417...  Training loss: 1.9521...  0.1294 sec/batch
Epoch: 1/20...  Training Step: 418...  Training loss: 1.6823...  0.1218 sec/batch
Epoch: 1/20...  Training Step: 419...  Training loss: 1.8449...  0.1245 sec/batch
Epoch: 1/20...  Training Step: 420...  Training loss: 1.6649...  0.1266 sec/batch
Epoch: 1/20...  Training Step: 421...  Training loss: 1.8106...  0.1248 sec/batch
Epoch: 1/20...  Training Step: 422...  Training loss: 1.6933...  0.1229 sec/batch
Epoch: 1/20...  Training Step: 423...  Training loss: 1.9252...  0.1264 sec/batch
Epoch: 1/20...  Training Step: 424...  Training loss: 1.9893...  0.1250 sec/batch
Epoch: 1/20...  Training Step: 425...  Training loss: 1.7732...  0.1247 sec/batch
Epoch: 1/20...  Training Step: 426...  Training loss: 1.7674...  0.1239 sec/batch
Epoch: 1/20...  Training Step: 427...  Training loss: 1.6535...  0.1268 sec/batch
Epoch: 1/20...  Training Step: 428...  Training loss: 2.0166...  0.1194 sec/batch
Epoch: 1/20...  Training Step: 429...  Training loss: 1.6403...  0.1226 sec/batch
Epoch: 1/20...  Training Step: 430...  Training loss: 1.7100...  0.1224 sec/batch
Epoch: 1/20...  Training Step: 431...  Training loss: 1.9968...  0.1201 sec/batch
Epoch: 1/20...  Training Step: 432...  Training loss: 1.7512...  0.1216 sec/batch
Epoch: 1/20...  Training Step: 433...  Training loss: 1.9491...  0.1228 sec/batch
Epoch: 1/20...  Training Step: 434...  Training loss: 1.9198...  0.1382 sec/batch
Epoch: 1/20...  Training Step: 435...  Training loss: 1.6259...  0.1230 sec/batch
Epoch: 1/20...  Training Step: 436...  Training loss: 1.8238...  0.1213 sec/batch
Epoch: 1/20...  Training Step: 437...  Training loss: 1.7599...  0.1218 sec/batch
Epoch: 1/20...  Training Step: 438...  Training loss: 1.8576...  0.1255 sec/batch
Epoch: 1/20...  Training Step: 439...  Training loss: 1.9367...  0.1207 sec/batch
Epoch: 1/20...  Training Step: 440...  Training loss: 2.1496...  0.1303 sec/batch
Epoch: 1/20...  Training Step: 441...  Training loss: 1.9533...  0.1361 sec/batch
Epoch: 1/20...  Training Step: 442...  Training loss: 1.7783...  0.1323 sec/batch
Epoch: 1/20...  Training Step: 443...  Training loss: 1.8428...  0.1197 sec/batch
Epoch: 1/20...  Training Step: 444...  Training loss: 1.7003...  0.1290 sec/batch
Epoch: 1/20...  Training Step: 445...  Training loss: 1.7882...  0.1272 sec/batch
Epoch: 1/20...  Training Step: 446...  Training loss: 1.7898...  0.1245 sec/batch
Epoch: 1/20...  Training Step: 447...  Training loss: 1.7436...  0.1284 sec/batch
Epoch: 1/20...  Training Step: 448...  Training loss: 1.6811...  0.1222 sec/batch
Epoch: 1/20...  Training Step: 449...  Training loss: 1.7512...  0.1249 sec/batch
Epoch: 1/20...  Training Step: 450...  Training loss: 1.7195...  0.1251 sec/batch
Epoch: 1/20...  Training Step: 451...  Training loss: 1.8251...  0.1198 sec/batch
Epoch: 1/20...  Training Step: 452...  Training loss: 1.7388...  0.1209 sec/batch
Epoch: 1/20...  Training Step: 453...  Training loss: 1.6635...  0.1208 sec/batch
Epoch: 1/20...  Training Step: 454...  Training loss: 2.0937...  0.1227 sec/batch
Epoch: 1/20...  Training Step: 455...  Training loss: 1.8394...  0.1225 sec/batch
Epoch: 1/20...  Training Step: 456...  Training loss: 1.6080...  0.1203 sec/batch
Epoch: 1/20...  Training Step: 457...  Training loss: 1.7142...  0.1253 sec/batch
Epoch: 1/20...  Training Step: 458...  Training loss: 1.7019...  0.1227 sec/batch
Epoch: 1/20...  Training Step: 459...  Training loss: 1.5507...  0.1222 sec/batch
Epoch: 1/20...  Training Step: 460...  Training loss: 1.7918...  0.1226 sec/batch
Epoch: 1/20...  Training Step: 461...  Training loss: 1.9374...  0.1258 sec/batch
Epoch: 1/20...  Training Step: 462...  Training loss: 1.7728...  0.1187 sec/batch
Epoch: 1/20...  Training Step: 463...  Training loss: 1.7676...  0.1206 sec/batch
Epoch: 1/20...  Training Step: 464...  Training loss: 1.5364...  0.1235 sec/batch
Epoch: 2/20...  Training Step: 465...  Training loss: 2.1169...  0.1199 sec/batch
Epoch: 2/20...  Training Step: 466...  Training loss: 1.8043...  0.1209 sec/batch
Epoch: 2/20...  Training Step: 467...  Training loss: 1.7437...  0.1196 sec/batch
Epoch: 2/20...  Training Step: 468...  Training loss: 1.6801...  0.1214 sec/batch
Epoch: 2/20...  Training Step: 469...  Training loss: 2.0278...  0.1229 sec/batch
Epoch: 2/20...  Training Step: 470...  Training loss: 1.5746...  0.1217 sec/batch
Epoch: 2/20...  Training Step: 471...  Training loss: 1.8345...  0.1220 sec/batch
Epoch: 2/20...  Training Step: 472...  Training loss: 1.6416...  0.1232 sec/batch
Epoch: 2/20...  Training Step: 473...  Training loss: 1.7075...  0.1241 sec/batch
Epoch: 2/20...  Training Step: 474...  Training loss: 1.9914...  0.1202 sec/batch
Epoch: 2/20...  Training Step: 475...  Training loss: 1.5417...  0.1212 sec/batch
Epoch: 2/20...  Training Step: 476...  Training loss: 1.5214...  0.1212 sec/batch
Epoch: 2/20...  Training Step: 477...  Training loss: 1.8553...  0.1235 sec/batch
Epoch: 2/20...  Training Step: 478...  Training loss: 1.5335...  0.1228 sec/batch
Epoch: 2/20...  Training Step: 479...  Training loss: 1.8862...  0.1184 sec/batch
Epoch: 2/20...  Training Step: 480...  Training loss: 1.7181...  0.1239 sec/batch
Epoch: 2/20...  Training Step: 481...  Training loss: 1.4988...  0.1216 sec/batch
Epoch: 2/20...  Training Step: 482...  Training loss: 1.5411...  0.1235 sec/batch
Epoch: 2/20...  Training Step: 483...  Training loss: 1.6608...  0.1201 sec/batch
Epoch: 2/20...  Training Step: 484...  Training loss: 1.4726...  0.1181 sec/batch
Epoch: 2/20...  Training Step: 485...  Training loss: 1.6968...  0.1271 sec/batch
Epoch: 2/20...  Training Step: 486...  Training loss: 1.5778...  0.1228 sec/batch
Epoch: 2/20...  Training Step: 487...  Training loss: 1.9038...  0.1221 sec/batch
Epoch: 2/20...  Training Step: 488...  Training loss: 1.5238...  0.1251 sec/batch
Epoch: 2/20...  Training Step: 489...  Training loss: 1.6305...  0.1235 sec/batch
Epoch: 2/20...  Training Step: 490...  Training loss: 1.6736...  0.1207 sec/batch
Epoch: 2/20...  Training Step: 491...  Training loss: 1.7360...  0.1220 sec/batch
Epoch: 2/20...  Training Step: 492...  Training loss: 1.5628...  0.1215 sec/batch
Epoch: 2/20...  Training Step: 493...  Training loss: 1.6163...  0.1211 sec/batch
Epoch: 2/20...  Training Step: 494...  Training loss: 1.6658...  0.1230 sec/batch
Epoch: 2/20...  Training Step: 495...  Training loss: 1.4685...  0.1184 sec/batch
Epoch: 2/20...  Training Step: 496...  Training loss: 1.6188...  0.1200 sec/batch
Epoch: 2/20...  Training Step: 497...  Training loss: 1.4422...  0.1180 sec/batch
Epoch: 2/20...  Training Step: 498...  Training loss: 1.5989...  0.1241 sec/batch
Epoch: 2/20...  Training Step: 499...  Training loss: 1.6538...  0.1219 sec/batch
Epoch: 2/20...  Training Step: 500...  Training loss: 1.5559...  0.1235 sec/batch
Epoch: 2/20...  Training Step: 501...  Training loss: 1.6995...  0.1220 sec/batch
Epoch: 2/20...  Training Step: 502...  Training loss: 1.5303...  0.1238 sec/batch
Epoch: 2/20...  Training Step: 503...  Training loss: 1.5401...  0.1172 sec/batch
Epoch: 2/20...  Training Step: 504...  Training loss: 1.8869...  0.1249 sec/batch
Epoch: 2/20...  Training Step: 505...  Training loss: 1.5657...  0.1235 sec/batch
Epoch: 2/20...  Training Step: 506...  Training loss: 1.5236...  0.1210 sec/batch
Epoch: 2/20...  Training Step: 507...  Training loss: 1.8203...  0.1213 sec/batch
Epoch: 2/20...  Training Step: 508...  Training loss: 1.4910...  0.1218 sec/batch
Epoch: 2/20...  Training Step: 509...  Training loss: 1.6385...  0.1188 sec/batch
Epoch: 2/20...  Training Step: 510...  Training loss: 1.5416...  0.1244 sec/batch
Epoch: 2/20...  Training Step: 511...  Training loss: 1.4825...  0.1223 sec/batch
Epoch: 2/20...  Training Step: 512...  Training loss: 1.8408...  0.1237 sec/batch
Epoch: 2/20...  Training Step: 513...  Training loss: 1.5825...  0.1248 sec/batch
Epoch: 2/20...  Training Step: 514...  Training loss: 1.8381...  0.1233 sec/batch
Epoch: 2/20...  Training Step: 515...  Training loss: 1.6678...  0.1225 sec/batch
Epoch: 2/20...  Training Step: 516...  Training loss: 1.8468...  0.1225 sec/batch
Epoch: 2/20...  Training Step: 517...  Training loss: 1.7648...  0.1205 sec/batch
Epoch: 2/20...  Training Step: 518...  Training loss: 1.6612...  0.1214 sec/batch
Epoch: 2/20...  Training Step: 519...  Training loss: 1.5334...  0.1220 sec/batch
Epoch: 2/20...  Training Step: 520...  Training loss: 1.6670...  0.1264 sec/batch
Epoch: 2/20...  Training Step: 521...  Training loss: 1.8123...  0.1248 sec/batch
Epoch: 2/20...  Training Step: 522...  Training loss: 1.8160...  0.1246 sec/batch
Epoch: 2/20...  Training Step: 523...  Training loss: 1.6110...  0.1191 sec/batch
Epoch: 2/20...  Training Step: 524...  Training loss: 1.6792...  0.1206 sec/batch
Epoch: 2/20...  Training Step: 525...  Training loss: 1.6498...  0.1249 sec/batch
Epoch: 2/20...  Training Step: 526...  Training loss: 1.8336...  0.1202 sec/batch
Epoch: 2/20...  Training Step: 527...  Training loss: 1.5916...  0.1234 sec/batch
Epoch: 2/20...  Training Step: 528...  Training loss: 1.7026...  0.1227 sec/batch
Epoch: 2/20...  Training Step: 529...  Training loss: 1.4911...  0.1241 sec/batch
Epoch: 2/20...  Training Step: 530...  Training loss: 1.6888...  0.1217 sec/batch
Epoch: 2/20...  Training Step: 531...  Training loss: 1.6631...  0.1197 sec/batch
Epoch: 2/20...  Training Step: 532...  Training loss: 1.7223...  0.1233 sec/batch
Epoch: 2/20...  Training Step: 533...  Training loss: 1.6294...  0.1231 sec/batch
Epoch: 2/20...  Training Step: 534...  Training loss: 1.6289...  0.1279 sec/batch
Epoch: 2/20...  Training Step: 535...  Training loss: 1.7408...  0.1231 sec/batch
Epoch: 2/20...  Training Step: 536...  Training loss: 1.8541...  0.1194 sec/batch
Epoch: 2/20...  Training Step: 537...  Training loss: 1.6901...  0.1215 sec/batch
Epoch: 2/20...  Training Step: 538...  Training loss: 1.5052...  0.1240 sec/batch
Epoch: 2/20...  Training Step: 539...  Training loss: 1.9474...  0.1161 sec/batch
Epoch: 2/20...  Training Step: 540...  Training loss: 1.5172...  0.1202 sec/batch
Epoch: 2/20...  Training Step: 541...  Training loss: 1.4782...  0.1203 sec/batch
Epoch: 2/20...  Training Step: 542...  Training loss: 1.7421...  0.1222 sec/batch
Epoch: 2/20...  Training Step: 543...  Training loss: 1.7581...  0.1230 sec/batch
Epoch: 2/20...  Training Step: 544...  Training loss: 1.5988...  0.1178 sec/batch
Epoch: 2/20...  Training Step: 545...  Training loss: 1.6857...  0.1171 sec/batch
Epoch: 2/20...  Training Step: 546...  Training loss: 1.7556...  0.1237 sec/batch
Epoch: 2/20...  Training Step: 547...  Training loss: 1.5255...  0.1232 sec/batch
Epoch: 2/20...  Training Step: 548...  Training loss: 1.7376...  0.1200 sec/batch
Epoch: 2/20...  Training Step: 549...  Training loss: 1.6665...  0.1200 sec/batch
Epoch: 2/20...  Training Step: 550...  Training loss: 1.5863...  0.1202 sec/batch
Epoch: 2/20...  Training Step: 551...  Training loss: 1.5182...  0.1184 sec/batch
Epoch: 2/20...  Training Step: 552...  Training loss: 1.7099...  0.1178 sec/batch
Epoch: 2/20...  Training Step: 553...  Training loss: 1.9698...  0.1189 sec/batch
Epoch: 2/20...  Training Step: 554...  Training loss: 1.5779...  0.1222 sec/batch
Epoch: 2/20...  Training Step: 555...  Training loss: 1.7203...  0.1233 sec/batch
Epoch: 2/20...  Training Step: 556...  Training loss: 1.8608...  0.1221 sec/batch
Epoch: 2/20...  Training Step: 557...  Training loss: 1.4067...  0.1226 sec/batch
Epoch: 2/20...  Training Step: 558...  Training loss: 1.8412...  0.1214 sec/batch
Epoch: 2/20...  Training Step: 559...  Training loss: 1.4484...  0.1245 sec/batch
Epoch: 2/20...  Training Step: 560...  Training loss: 1.7325...  0.1258 sec/batch
Epoch: 2/20...  Training Step: 561...  Training loss: 2.0178...  0.1231 sec/batch
Epoch: 2/20...  Training Step: 562...  Training loss: 1.7388...  0.1219 sec/batch
Epoch: 2/20...  Training Step: 563...  Training loss: 1.7662...  0.1240 sec/batch
Epoch: 2/20...  Training Step: 564...  Training loss: 1.5795...  0.1206 sec/batch
Epoch: 2/20...  Training Step: 565...  Training loss: 1.6619...  0.1283 sec/batch
Epoch: 2/20...  Training Step: 566...  Training loss: 1.8696...  0.1230 sec/batch
Epoch: 2/20...  Training Step: 567...  Training loss: 1.9462...  0.1194 sec/batch
Epoch: 2/20...  Training Step: 568...  Training loss: 1.6107...  0.1226 sec/batch
Epoch: 2/20...  Training Step: 569...  Training loss: 1.8654...  0.1231 sec/batch
Epoch: 2/20...  Training Step: 570...  Training loss: 2.0711...  0.1209 sec/batch
Epoch: 2/20...  Training Step: 571...  Training loss: 1.9632...  0.1206 sec/batch
Epoch: 2/20...  Training Step: 572...  Training loss: 2.0685...  0.1212 sec/batch
Epoch: 2/20...  Training Step: 573...  Training loss: 1.9684...  0.1233 sec/batch
Epoch: 2/20...  Training Step: 574...  Training loss: 1.8868...  0.1275 sec/batch
Epoch: 2/20...  Training Step: 575...  Training loss: 1.8931...  0.1193 sec/batch
Epoch: 2/20...  Training Step: 576...  Training loss: 1.8381...  0.1197 sec/batch
Epoch: 2/20...  Training Step: 577...  Training loss: 1.8021...  0.1183 sec/batch
Epoch: 2/20...  Training Step: 578...  Training loss: 1.9651...  0.1182 sec/batch
Epoch: 2/20...  Training Step: 579...  Training loss: 1.8136...  0.1184 sec/batch
Epoch: 2/20...  Training Step: 580...  Training loss: 1.6910...  0.1174 sec/batch
Epoch: 2/20...  Training Step: 581...  Training loss: 1.9931...  0.1183 sec/batch
Epoch: 2/20...  Training Step: 582...  Training loss: 2.0034...  0.1180 sec/batch
Epoch: 2/20...  Training Step: 583...  Training loss: 1.7529...  0.1211 sec/batch
Epoch: 2/20...  Training Step: 584...  Training loss: 1.6017...  0.1224 sec/batch
Epoch: 2/20...  Training Step: 585...  Training loss: 1.7148...  0.1195 sec/batch
Epoch: 2/20...  Training Step: 586...  Training loss: 1.7608...  0.1220 sec/batch
Epoch: 2/20...  Training Step: 587...  Training loss: 1.9276...  0.1203 sec/batch
Epoch: 2/20...  Training Step: 588...  Training loss: 1.9657...  0.1240 sec/batch
Epoch: 2/20...  Training Step: 589...  Training loss: 1.9765...  0.1337 sec/batch
Epoch: 2/20...  Training Step: 590...  Training loss: 1.6106...  0.1296 sec/batch
Epoch: 2/20...  Training Step: 591...  Training loss: 1.6972...  0.1229 sec/batch
Epoch: 2/20...  Training Step: 592...  Training loss: 1.6956...  0.1212 sec/batch
Epoch: 2/20...  Training Step: 593...  Training loss: 1.8264...  0.1205 sec/batch
Epoch: 2/20...  Training Step: 594...  Training loss: 1.7785...  0.1188 sec/batch
Epoch: 2/20...  Training Step: 595...  Training loss: 2.0840...  0.1218 sec/batch
Epoch: 2/20...  Training Step: 596...  Training loss: 1.8626...  0.1222 sec/batch
Epoch: 2/20...  Training Step: 597...  Training loss: 1.6291...  0.1234 sec/batch
Epoch: 2/20...  Training Step: 598...  Training loss: 1.7861...  0.1194 sec/batch
Epoch: 2/20...  Training Step: 599...  Training loss: 1.6117...  0.1248 sec/batch
Epoch: 2/20...  Training Step: 600...  Training loss: 1.5718...  0.1232 sec/batch
Epoch: 2/20...  Training Step: 601...  Training loss: 1.6199...  0.1235 sec/batch
Epoch: 2/20...  Training Step: 602...  Training loss: 1.7399...  0.1192 sec/batch
Epoch: 2/20...  Training Step: 603...  Training loss: 1.5251...  0.1201 sec/batch
Epoch: 2/20...  Training Step: 604...  Training loss: 1.6950...  0.1233 sec/batch
Epoch: 2/20...  Training Step: 605...  Training loss: 1.7904...  0.1260 sec/batch
Epoch: 2/20...  Training Step: 606...  Training loss: 1.7237...  0.1219 sec/batch
Epoch: 2/20...  Training Step: 607...  Training loss: 1.5295...  0.1212 sec/batch
Epoch: 2/20...  Training Step: 608...  Training loss: 1.6737...  0.1216 sec/batch
Epoch: 2/20...  Training Step: 609...  Training loss: 1.7418...  0.1224 sec/batch
Epoch: 2/20...  Training Step: 610...  Training loss: 1.6588...  0.1210 sec/batch
Epoch: 2/20...  Training Step: 611...  Training loss: 1.9643...  0.1240 sec/batch
Epoch: 2/20...  Training Step: 612...  Training loss: 1.6860...  0.1183 sec/batch
Epoch: 2/20...  Training Step: 613...  Training loss: 1.7044...  0.1217 sec/batch
Epoch: 2/20...  Training Step: 614...  Training loss: 1.7717...  0.1236 sec/batch
Epoch: 2/20...  Training Step: 615...  Training loss: 1.9744...  0.1204 sec/batch
Epoch: 2/20...  Training Step: 616...  Training loss: 1.8483...  0.1229 sec/batch
Epoch: 2/20...  Training Step: 617...  Training loss: 1.8924...  0.1230 sec/batch
Epoch: 2/20...  Training Step: 618...  Training loss: 1.8024...  0.1223 sec/batch
Epoch: 2/20...  Training Step: 619...  Training loss: 1.8520...  0.1198 sec/batch
Epoch: 2/20...  Training Step: 620...  Training loss: 1.5388...  0.1195 sec/batch
Epoch: 2/20...  Training Step: 621...  Training loss: 1.5904...  0.1208 sec/batch
Epoch: 2/20...  Training Step: 622...  Training loss: 1.7615...  0.1214 sec/batch
Epoch: 2/20...  Training Step: 623...  Training loss: 1.6085...  0.1234 sec/batch
Epoch: 2/20...  Training Step: 624...  Training loss: 1.6241...  0.1237 sec/batch
Epoch: 2/20...  Training Step: 625...  Training loss: 1.8298...  0.1250 sec/batch
Epoch: 2/20...  Training Step: 626...  Training loss: 1.6570...  0.1229 sec/batch
Epoch: 2/20...  Training Step: 627...  Training loss: 1.7788...  0.1267 sec/batch
Epoch: 2/20...  Training Step: 628...  Training loss: 1.4951...  0.1235 sec/batch
Epoch: 2/20...  Training Step: 629...  Training loss: 1.6211...  0.1262 sec/batch
Epoch: 2/20...  Training Step: 630...  Training loss: 1.5436...  0.1209 sec/batch
Epoch: 2/20...  Training Step: 631...  Training loss: 1.7008...  0.1217 sec/batch
Epoch: 2/20...  Training Step: 632...  Training loss: 1.8135...  0.1212 sec/batch
Epoch: 2/20...  Training Step: 633...  Training loss: 1.8089...  0.1251 sec/batch
Epoch: 2/20...  Training Step: 634...  Training loss: 1.8158...  0.1202 sec/batch
Epoch: 2/20...  Training Step: 635...  Training loss: 1.7606...  0.1213 sec/batch
Epoch: 2/20...  Training Step: 636...  Training loss: 1.6653...  0.1216 sec/batch
Epoch: 2/20...  Training Step: 637...  Training loss: 1.8007...  0.1215 sec/batch
Epoch: 2/20...  Training Step: 638...  Training loss: 1.7540...  0.1243 sec/batch
Epoch: 2/20...  Training Step: 639...  Training loss: 1.7894...  0.1247 sec/batch
Epoch: 2/20...  Training Step: 640...  Training loss: 1.4989...  0.1209 sec/batch
Epoch: 2/20...  Training Step: 641...  Training loss: 1.5423...  0.1193 sec/batch
Epoch: 2/20...  Training Step: 642...  Training loss: 1.8979...  0.1229 sec/batch
Epoch: 2/20...  Training Step: 643...  Training loss: 1.6102...  0.1244 sec/batch
Epoch: 2/20...  Training Step: 644...  Training loss: 1.8565...  0.1245 sec/batch
Epoch: 2/20...  Training Step: 645...  Training loss: 1.5797...  0.1293 sec/batch
Epoch: 2/20...  Training Step: 646...  Training loss: 1.8037...  0.1279 sec/batch
Epoch: 2/20...  Training Step: 647...  Training loss: 1.7730...  0.1312 sec/batch
Epoch: 2/20...  Training Step: 648...  Training loss: 1.7959...  0.1239 sec/batch
Epoch: 2/20...  Training Step: 649...  Training loss: 1.8847...  0.1234 sec/batch
Epoch: 2/20...  Training Step: 650...  Training loss: 1.6873...  0.1232 sec/batch
Epoch: 2/20...  Training Step: 651...  Training loss: 1.8867...  0.1251 sec/batch
Epoch: 2/20...  Training Step: 652...  Training loss: 1.5452...  0.1236 sec/batch
Epoch: 2/20...  Training Step: 653...  Training loss: 1.7488...  0.1221 sec/batch
Epoch: 2/20...  Training Step: 654...  Training loss: 1.6784...  0.1245 sec/batch
Epoch: 2/20...  Training Step: 655...  Training loss: 1.4720...  0.1264 sec/batch
Epoch: 2/20...  Training Step: 656...  Training loss: 1.8478...  0.1243 sec/batch
Epoch: 2/20...  Training Step: 657...  Training loss: 1.7053...  0.1222 sec/batch
Epoch: 2/20...  Training Step: 658...  Training loss: 1.7101...  0.1206 sec/batch
Epoch: 2/20...  Training Step: 659...  Training loss: 1.8143...  0.1210 sec/batch
Epoch: 2/20...  Training Step: 660...  Training loss: 1.7087...  0.1220 sec/batch
Epoch: 2/20...  Training Step: 661...  Training loss: 1.4732...  0.1224 sec/batch
Epoch: 2/20...  Training Step: 662...  Training loss: 1.7075...  0.1230 sec/batch
Epoch: 2/20...  Training Step: 663...  Training loss: 1.5147...  0.1248 sec/batch
Epoch: 2/20...  Training Step: 664...  Training loss: 1.5947...  0.1236 sec/batch
Epoch: 2/20...  Training Step: 665...  Training loss: 1.7045...  0.1227 sec/batch
Epoch: 2/20...  Training Step: 666...  Training loss: 1.6102...  0.1226 sec/batch
Epoch: 2/20...  Training Step: 667...  Training loss: 1.5347...  0.1256 sec/batch
Epoch: 2/20...  Training Step: 668...  Training loss: 1.7791...  0.1275 sec/batch
Epoch: 2/20...  Training Step: 669...  Training loss: 1.6174...  0.1349 sec/batch
Epoch: 2/20...  Training Step: 670...  Training loss: 1.5101...  0.1356 sec/batch
Epoch: 2/20...  Training Step: 671...  Training loss: 1.5001...  0.1285 sec/batch
Epoch: 2/20...  Training Step: 672...  Training loss: 1.7598...  0.1323 sec/batch
Epoch: 2/20...  Training Step: 673...  Training loss: 1.5729...  0.1286 sec/batch
Epoch: 2/20...  Training Step: 674...  Training loss: 1.5274...  0.1186 sec/batch
Epoch: 2/20...  Training Step: 675...  Training loss: 1.3030...  0.1212 sec/batch
Epoch: 2/20...  Training Step: 676...  Training loss: 1.7295...  0.1234 sec/batch
Epoch: 2/20...  Training Step: 677...  Training loss: 1.7557...  0.1276 sec/batch
Epoch: 2/20...  Training Step: 678...  Training loss: 1.5915...  0.1235 sec/batch
Epoch: 2/20...  Training Step: 679...  Training loss: 1.7897...  0.1793 sec/batch
Epoch: 2/20...  Training Step: 680...  Training loss: 1.4899...  0.1643 sec/batch
Epoch: 2/20...  Training Step: 681...  Training loss: 1.6719...  0.1357 sec/batch
Epoch: 2/20...  Training Step: 682...  Training loss: 1.6153...  0.1455 sec/batch
Epoch: 2/20...  Training Step: 683...  Training loss: 1.7885...  0.1401 sec/batch
Epoch: 2/20...  Training Step: 684...  Training loss: 1.6302...  0.1201 sec/batch
Epoch: 2/20...  Training Step: 685...  Training loss: 1.5734...  0.1197 sec/batch
Epoch: 2/20...  Training Step: 686...  Training loss: 1.8202...  0.1236 sec/batch
Epoch: 2/20...  Training Step: 687...  Training loss: 1.8504...  0.1229 sec/batch
Epoch: 2/20...  Training Step: 688...  Training loss: 1.9010...  0.1262 sec/batch
Epoch: 2/20...  Training Step: 689...  Training loss: 1.6481...  0.1198 sec/batch
Epoch: 2/20...  Training Step: 690...  Training loss: 1.8181...  0.1224 sec/batch
Epoch: 2/20...  Training Step: 691...  Training loss: 1.8177...  0.1186 sec/batch
Epoch: 2/20...  Training Step: 692...  Training loss: 1.5322...  0.1246 sec/batch
Epoch: 2/20...  Training Step: 693...  Training loss: 1.5682...  0.1203 sec/batch
Epoch: 2/20...  Training Step: 694...  Training loss: 1.6734...  0.1195 sec/batch
Epoch: 2/20...  Training Step: 695...  Training loss: 1.5798...  0.1223 sec/batch
Epoch: 2/20...  Training Step: 696...  Training loss: 1.5021...  0.1162 sec/batch
Epoch: 2/20...  Training Step: 697...  Training loss: 1.9915...  0.1208 sec/batch
Epoch: 2/20...  Training Step: 698...  Training loss: 1.6299...  0.1180 sec/batch
Epoch: 2/20...  Training Step: 699...  Training loss: 1.8016...  0.1236 sec/batch
Epoch: 2/20...  Training Step: 700...  Training loss: 1.5611...  0.1258 sec/batch
Epoch: 2/20...  Training Step: 701...  Training loss: 2.0087...  0.1231 sec/batch
Epoch: 2/20...  Training Step: 702...  Training loss: 1.5452...  0.1183 sec/batch
Epoch: 2/20...  Training Step: 703...  Training loss: 1.6698...  0.1229 sec/batch
Epoch: 2/20...  Training Step: 704...  Training loss: 1.8368...  0.1209 sec/batch
Epoch: 2/20...  Training Step: 705...  Training loss: 1.6080...  0.1225 sec/batch
Epoch: 2/20...  Training Step: 706...  Training loss: 1.6187...  0.1137 sec/batch
Epoch: 2/20...  Training Step: 707...  Training loss: 1.9185...  0.1197 sec/batch
Epoch: 2/20...  Training Step: 708...  Training loss: 1.6918...  0.1221 sec/batch
Epoch: 2/20...  Training Step: 709...  Training loss: 1.6527...  0.1235 sec/batch
Epoch: 2/20...  Training Step: 710...  Training loss: 1.3810...  0.1221 sec/batch
Epoch: 2/20...  Training Step: 711...  Training loss: 1.5839...  0.1228 sec/batch
Epoch: 2/20...  Training Step: 712...  Training loss: 1.7290...  0.1217 sec/batch
Epoch: 2/20...  Training Step: 713...  Training loss: 1.6177...  0.1224 sec/batch
Epoch: 2/20...  Training Step: 714...  Training loss: 1.5815...  0.1236 sec/batch
Epoch: 2/20...  Training Step: 715...  Training loss: 1.7203...  0.1201 sec/batch
Epoch: 2/20...  Training Step: 716...  Training loss: 1.6022...  0.1257 sec/batch
Epoch: 2/20...  Training Step: 717...  Training loss: 1.4405...  0.1269 sec/batch
Epoch: 2/20...  Training Step: 718...  Training loss: 1.7779...  0.1215 sec/batch
Epoch: 2/20...  Training Step: 719...  Training loss: 1.6135...  0.1228 sec/batch
Epoch: 2/20...  Training Step: 720...  Training loss: 1.6352...  0.1187 sec/batch
Epoch: 2/20...  Training Step: 721...  Training loss: 1.7968...  0.1210 sec/batch
Epoch: 2/20...  Training Step: 722...  Training loss: 1.5118...  0.1202 sec/batch
Epoch: 2/20...  Training Step: 723...  Training loss: 1.6732...  0.1296 sec/batch
Epoch: 2/20...  Training Step: 724...  Training loss: 1.6489...  0.1237 sec/batch
Epoch: 2/20...  Training Step: 725...  Training loss: 1.8273...  0.1233 sec/batch
Epoch: 2/20...  Training Step: 726...  Training loss: 1.6901...  0.1188 sec/batch
Epoch: 2/20...  Training Step: 727...  Training loss: 1.6478...  0.1249 sec/batch
Epoch: 2/20...  Training Step: 728...  Training loss: 1.8091...  0.1242 sec/batch
Epoch: 2/20...  Training Step: 729...  Training loss: 1.7358...  0.1222 sec/batch
Epoch: 2/20...  Training Step: 730...  Training loss: 1.6705...  0.1199 sec/batch
Epoch: 2/20...  Training Step: 731...  Training loss: 1.7879...  0.1221 sec/batch
Epoch: 2/20...  Training Step: 732...  Training loss: 1.8142...  0.1214 sec/batch
Epoch: 2/20...  Training Step: 733...  Training loss: 1.7720...  0.1208 sec/batch
Epoch: 2/20...  Training Step: 734...  Training loss: 1.8787...  0.1196 sec/batch
Epoch: 2/20...  Training Step: 735...  Training loss: 1.6196...  0.1184 sec/batch
Epoch: 2/20...  Training Step: 736...  Training loss: 1.7935...  0.1209 sec/batch
Epoch: 2/20...  Training Step: 737...  Training loss: 1.7010...  0.1235 sec/batch
Epoch: 2/20...  Training Step: 738...  Training loss: 1.5914...  0.1216 sec/batch
Epoch: 2/20...  Training Step: 739...  Training loss: 1.7384...  0.1179 sec/batch
Epoch: 2/20...  Training Step: 740...  Training loss: 1.6520...  0.1200 sec/batch
Epoch: 2/20...  Training Step: 741...  Training loss: 1.6665...  0.1246 sec/batch
Epoch: 2/20...  Training Step: 742...  Training loss: 1.9144...  0.1206 sec/batch
Epoch: 2/20...  Training Step: 743...  Training loss: 1.4846...  0.1192 sec/batch
Epoch: 2/20...  Training Step: 744...  Training loss: 1.5296...  0.1244 sec/batch
Epoch: 2/20...  Training Step: 745...  Training loss: 1.5161...  0.1229 sec/batch
Epoch: 2/20...  Training Step: 746...  Training loss: 1.5085...  0.1221 sec/batch
Epoch: 2/20...  Training Step: 747...  Training loss: 1.5750...  0.1233 sec/batch
Epoch: 2/20...  Training Step: 748...  Training loss: 1.5892...  0.1219 sec/batch
Epoch: 2/20...  Training Step: 749...  Training loss: 1.4467...  0.1194 sec/batch
Epoch: 2/20...  Training Step: 750...  Training loss: 1.5899...  0.1163 sec/batch
Epoch: 2/20...  Training Step: 751...  Training loss: 1.6438...  0.1220 sec/batch
Epoch: 2/20...  Training Step: 752...  Training loss: 1.7068...  0.1321 sec/batch
Epoch: 2/20...  Training Step: 753...  Training loss: 1.8248...  0.1280 sec/batch
Epoch: 2/20...  Training Step: 754...  Training loss: 1.7047...  0.1251 sec/batch
Epoch: 2/20...  Training Step: 755...  Training loss: 1.6354...  0.1208 sec/batch
Epoch: 2/20...  Training Step: 756...  Training loss: 1.5990...  0.1218 sec/batch
Epoch: 2/20...  Training Step: 757...  Training loss: 1.5000...  0.1219 sec/batch
Epoch: 2/20...  Training Step: 758...  Training loss: 1.5278...  0.1247 sec/batch
Epoch: 2/20...  Training Step: 759...  Training loss: 1.5278...  0.1251 sec/batch
Epoch: 2/20...  Training Step: 760...  Training loss: 1.8009...  0.1206 sec/batch
Epoch: 2/20...  Training Step: 761...  Training loss: 1.5023...  0.1196 sec/batch
Epoch: 2/20...  Training Step: 762...  Training loss: 1.6568...  0.1220 sec/batch
Epoch: 2/20...  Training Step: 763...  Training loss: 1.6426...  0.1202 sec/batch
Epoch: 2/20...  Training Step: 764...  Training loss: 1.6477...  0.1237 sec/batch
Epoch: 2/20...  Training Step: 765...  Training loss: 1.5885...  0.1231 sec/batch
Epoch: 2/20...  Training Step: 766...  Training loss: 1.6078...  0.1240 sec/batch
Epoch: 2/20...  Training Step: 767...  Training loss: 1.4139...  0.1251 sec/batch
Epoch: 2/20...  Training Step: 768...  Training loss: 1.8775...  0.1215 sec/batch
Epoch: 2/20...  Training Step: 769...  Training loss: 1.5152...  0.1250 sec/batch
Epoch: 2/20...  Training Step: 770...  Training loss: 1.8197...  0.1175 sec/batch
Epoch: 2/20...  Training Step: 771...  Training loss: 1.6091...  0.1251 sec/batch
Epoch: 2/20...  Training Step: 772...  Training loss: 1.9504...  0.1265 sec/batch
Epoch: 2/20...  Training Step: 773...  Training loss: 1.6483...  0.1250 sec/batch
Epoch: 2/20...  Training Step: 774...  Training loss: 1.6440...  0.1169 sec/batch
Epoch: 2/20...  Training Step: 775...  Training loss: 1.6952...  0.1235 sec/batch
Epoch: 2/20...  Training Step: 776...  Training loss: 1.6463...  0.1295 sec/batch
Epoch: 2/20...  Training Step: 777...  Training loss: 1.6828...  0.1209 sec/batch
Epoch: 2/20...  Training Step: 778...  Training loss: 1.6365...  0.1232 sec/batch
Epoch: 2/20...  Training Step: 779...  Training loss: 1.4231...  0.1188 sec/batch
Epoch: 2/20...  Training Step: 780...  Training loss: 1.5930...  0.1212 sec/batch
Epoch: 2/20...  Training Step: 781...  Training loss: 1.6856...  0.1220 sec/batch
Epoch: 2/20...  Training Step: 782...  Training loss: 1.4957...  0.1235 sec/batch
Epoch: 2/20...  Training Step: 783...  Training loss: 1.6320...  0.1189 sec/batch
Epoch: 2/20...  Training Step: 784...  Training loss: 1.5432...  0.1234 sec/batch
Epoch: 2/20...  Training Step: 785...  Training loss: 1.4257...  0.1232 sec/batch
Epoch: 2/20...  Training Step: 786...  Training loss: 1.7358...  0.1234 sec/batch
Epoch: 2/20...  Training Step: 787...  Training loss: 1.4737...  0.1219 sec/batch
Epoch: 2/20...  Training Step: 788...  Training loss: 1.4110...  0.1243 sec/batch
Epoch: 2/20...  Training Step: 789...  Training loss: 1.4146...  0.1178 sec/batch
Epoch: 2/20...  Training Step: 790...  Training loss: 1.5268...  0.1191 sec/batch
Epoch: 2/20...  Training Step: 791...  Training loss: 1.6573...  0.1208 sec/batch
Epoch: 2/20...  Training Step: 792...  Training loss: 1.4660...  0.1230 sec/batch
Epoch: 2/20...  Training Step: 793...  Training loss: 1.6335...  0.1257 sec/batch
Epoch: 2/20...  Training Step: 794...  Training loss: 1.4822...  0.1194 sec/batch
Epoch: 2/20...  Training Step: 795...  Training loss: 1.5909...  0.1238 sec/batch
Epoch: 2/20...  Training Step: 796...  Training loss: 1.5352...  0.1192 sec/batch
Epoch: 2/20...  Training Step: 797...  Training loss: 1.4827...  0.1234 sec/batch
Epoch: 2/20...  Training Step: 798...  Training loss: 1.5295...  0.1216 sec/batch
Epoch: 2/20...  Training Step: 799...  Training loss: 1.8338...  0.1261 sec/batch
Epoch: 2/20...  Training Step: 800...  Training loss: 1.6561...  0.1217 sec/batch
Epoch: 2/20...  Training Step: 801...  Training loss: 1.6718...  0.1240 sec/batch
Epoch: 2/20...  Training Step: 802...  Training loss: 1.6799...  0.1221 sec/batch
Epoch: 2/20...  Training Step: 803...  Training loss: 1.6305...  0.1195 sec/batch
Epoch: 2/20...  Training Step: 804...  Training loss: 1.6059...  0.1215 sec/batch
Epoch: 2/20...  Training Step: 805...  Training loss: 1.5072...  0.1228 sec/batch
Epoch: 2/20...  Training Step: 806...  Training loss: 1.5934...  0.1245 sec/batch
Epoch: 2/20...  Training Step: 807...  Training loss: 1.4723...  0.1217 sec/batch
Epoch: 2/20...  Training Step: 808...  Training loss: 1.8214...  0.1237 sec/batch
Epoch: 2/20...  Training Step: 809...  Training loss: 1.5360...  0.1225 sec/batch
Epoch: 2/20...  Training Step: 810...  Training loss: 1.4941...  0.1201 sec/batch
Epoch: 2/20...  Training Step: 811...  Training loss: 1.5319...  0.1236 sec/batch
Epoch: 2/20...  Training Step: 812...  Training loss: 1.8912...  0.1190 sec/batch
Epoch: 2/20...  Training Step: 813...  Training loss: 1.5442...  0.1204 sec/batch
Epoch: 2/20...  Training Step: 814...  Training loss: 1.5836...  0.1218 sec/batch
Epoch: 2/20...  Training Step: 815...  Training loss: 1.4504...  0.1245 sec/batch
Epoch: 2/20...  Training Step: 816...  Training loss: 1.4963...  0.1211 sec/batch
Epoch: 2/20...  Training Step: 817...  Training loss: 1.3978...  0.1253 sec/batch
Epoch: 2/20...  Training Step: 818...  Training loss: 1.2765...  0.1211 sec/batch
Epoch: 2/20...  Training Step: 819...  Training loss: 1.8025...  0.1184 sec/batch
Epoch: 2/20...  Training Step: 820...  Training loss: 1.4530...  0.1242 sec/batch
Epoch: 2/20...  Training Step: 821...  Training loss: 1.4771...  0.1208 sec/batch
Epoch: 2/20...  Training Step: 822...  Training loss: 1.6415...  0.1236 sec/batch
Epoch: 2/20...  Training Step: 823...  Training loss: 1.5389...  0.1283 sec/batch
Epoch: 2/20...  Training Step: 824...  Training loss: 1.3361...  0.1234 sec/batch
Epoch: 2/20...  Training Step: 825...  Training loss: 1.7247...  0.1187 sec/batch
Epoch: 2/20...  Training Step: 826...  Training loss: 1.5560...  0.1225 sec/batch
Epoch: 2/20...  Training Step: 827...  Training loss: 1.4898...  0.1238 sec/batch
Epoch: 2/20...  Training Step: 828...  Training loss: 1.5243...  0.1196 sec/batch
Epoch: 2/20...  Training Step: 829...  Training loss: 1.4751...  0.1236 sec/batch
Epoch: 2/20...  Training Step: 830...  Training loss: 1.5885...  0.1209 sec/batch
Epoch: 2/20...  Training Step: 831...  Training loss: 1.3582...  0.1225 sec/batch
Epoch: 2/20...  Training Step: 832...  Training loss: 1.6574...  0.1239 sec/batch
Epoch: 2/20...  Training Step: 833...  Training loss: 1.6347...  0.1206 sec/batch
Epoch: 2/20...  Training Step: 834...  Training loss: 1.6155...  0.1215 sec/batch
Epoch: 2/20...  Training Step: 835...  Training loss: 1.5048...  0.1202 sec/batch
Epoch: 2/20...  Training Step: 836...  Training loss: 1.5085...  0.1191 sec/batch
Epoch: 2/20...  Training Step: 837...  Training loss: 1.6117...  0.1245 sec/batch
Epoch: 2/20...  Training Step: 838...  Training loss: 1.7322...  0.1242 sec/batch
Epoch: 2/20...  Training Step: 839...  Training loss: 1.5658...  0.1229 sec/batch
Epoch: 2/20...  Training Step: 840...  Training loss: 1.5773...  0.1226 sec/batch
Epoch: 2/20...  Training Step: 841...  Training loss: 1.6765...  0.1220 sec/batch
Epoch: 2/20...  Training Step: 842...  Training loss: 1.6058...  0.1201 sec/batch
Epoch: 2/20...  Training Step: 843...  Training loss: 1.4991...  0.1241 sec/batch
Epoch: 2/20...  Training Step: 844...  Training loss: 1.8051...  0.1201 sec/batch
Epoch: 2/20...  Training Step: 845...  Training loss: 1.5494...  0.1167 sec/batch
Epoch: 2/20...  Training Step: 846...  Training loss: 1.4711...  0.1233 sec/batch
Epoch: 2/20...  Training Step: 847...  Training loss: 1.4854...  0.1215 sec/batch
Epoch: 2/20...  Training Step: 848...  Training loss: 1.7007...  0.1216 sec/batch
Epoch: 2/20...  Training Step: 849...  Training loss: 1.4295...  0.1249 sec/batch
Epoch: 2/20...  Training Step: 850...  Training loss: 1.6863...  0.1221 sec/batch
Epoch: 2/20...  Training Step: 851...  Training loss: 1.6460...  0.1252 sec/batch
Epoch: 2/20...  Training Step: 852...  Training loss: 1.4551...  0.1169 sec/batch
Epoch: 2/20...  Training Step: 853...  Training loss: 1.6202...  0.1222 sec/batch
Epoch: 2/20...  Training Step: 854...  Training loss: 1.6456...  0.1245 sec/batch
Epoch: 2/20...  Training Step: 855...  Training loss: 1.4674...  0.1238 sec/batch
Epoch: 2/20...  Training Step: 856...  Training loss: 1.4540...  0.1181 sec/batch
Epoch: 2/20...  Training Step: 857...  Training loss: 1.4891...  0.1229 sec/batch
Epoch: 2/20...  Training Step: 858...  Training loss: 1.6085...  0.1213 sec/batch
Epoch: 2/20...  Training Step: 859...  Training loss: 1.3830...  0.1222 sec/batch
Epoch: 2/20...  Training Step: 860...  Training loss: 1.6827...  0.1206 sec/batch
Epoch: 2/20...  Training Step: 861...  Training loss: 1.3171...  0.1231 sec/batch
Epoch: 2/20...  Training Step: 862...  Training loss: 1.3113...  0.1263 sec/batch
Epoch: 2/20...  Training Step: 863...  Training loss: 1.5239...  0.1317 sec/batch
Epoch: 2/20...  Training Step: 864...  Training loss: 1.3605...  0.1394 sec/batch
Epoch: 2/20...  Training Step: 865...  Training loss: 1.6535...  0.1397 sec/batch
Epoch: 2/20...  Training Step: 866...  Training loss: 1.5430...  0.1356 sec/batch
Epoch: 2/20...  Training Step: 867...  Training loss: 1.4408...  0.1318 sec/batch
Epoch: 2/20...  Training Step: 868...  Training loss: 1.5662...  0.1235 sec/batch
Epoch: 2/20...  Training Step: 869...  Training loss: 1.4922...  0.1241 sec/batch
Epoch: 2/20...  Training Step: 870...  Training loss: 1.7384...  0.1287 sec/batch
Epoch: 2/20...  Training Step: 871...  Training loss: 1.5672...  0.1232 sec/batch
Epoch: 2/20...  Training Step: 872...  Training loss: 1.8103...  0.1210 sec/batch
Epoch: 2/20...  Training Step: 873...  Training loss: 1.5248...  0.1240 sec/batch
Epoch: 2/20...  Training Step: 874...  Training loss: 1.5134...  0.1195 sec/batch
Epoch: 2/20...  Training Step: 875...  Training loss: 1.4664...  0.1216 sec/batch
Epoch: 2/20...  Training Step: 876...  Training loss: 1.4864...  0.1236 sec/batch
Epoch: 2/20...  Training Step: 877...  Training loss: 1.6520...  0.1235 sec/batch
Epoch: 2/20...  Training Step: 878...  Training loss: 1.5669...  0.1227 sec/batch
Epoch: 2/20...  Training Step: 879...  Training loss: 1.8312...  0.1279 sec/batch
Epoch: 2/20...  Training Step: 880...  Training loss: 1.7645...  0.1224 sec/batch
Epoch: 2/20...  Training Step: 881...  Training loss: 1.7802...  0.1231 sec/batch
Epoch: 2/20...  Training Step: 882...  Training loss: 1.4438...  0.1211 sec/batch
Epoch: 2/20...  Training Step: 883...  Training loss: 1.5683...  0.1246 sec/batch
Epoch: 2/20...  Training Step: 884...  Training loss: 1.3227...  0.1179 sec/batch
Epoch: 2/20...  Training Step: 885...  Training loss: 1.6070...  0.1201 sec/batch
Epoch: 2/20...  Training Step: 886...  Training loss: 1.5553...  0.1225 sec/batch
Epoch: 2/20...  Training Step: 887...  Training loss: 1.6264...  0.1215 sec/batch
Epoch: 2/20...  Training Step: 888...  Training loss: 1.7644...  0.1215 sec/batch
Epoch: 2/20...  Training Step: 889...  Training loss: 1.4650...  0.1268 sec/batch
Epoch: 2/20...  Training Step: 890...  Training loss: 1.6550...  0.1206 sec/batch
Epoch: 2/20...  Training Step: 891...  Training loss: 1.5634...  0.1265 sec/batch
Epoch: 2/20...  Training Step: 892...  Training loss: 1.8245...  0.1254 sec/batch
Epoch: 2/20...  Training Step: 893...  Training loss: 1.4900...  0.1210 sec/batch
Epoch: 2/20...  Training Step: 894...  Training loss: 1.5772...  0.1218 sec/batch
Epoch: 2/20...  Training Step: 895...  Training loss: 1.8296...  0.1242 sec/batch
Epoch: 2/20...  Training Step: 896...  Training loss: 1.5620...  0.1300 sec/batch
Epoch: 2/20...  Training Step: 897...  Training loss: 1.8698...  0.1343 sec/batch
Epoch: 2/20...  Training Step: 898...  Training loss: 1.6986...  0.1357 sec/batch
Epoch: 2/20...  Training Step: 899...  Training loss: 1.4239...  0.1191 sec/batch
Epoch: 2/20...  Training Step: 900...  Training loss: 1.6140...  0.1219 sec/batch
Epoch: 2/20...  Training Step: 901...  Training loss: 1.5297...  0.1268 sec/batch
Epoch: 2/20...  Training Step: 902...  Training loss: 1.7523...  0.1198 sec/batch
Epoch: 2/20...  Training Step: 903...  Training loss: 1.8285...  0.1243 sec/batch
Epoch: 2/20...  Training Step: 904...  Training loss: 1.7777...  0.1244 sec/batch
Epoch: 2/20...  Training Step: 905...  Training loss: 1.6053...  0.1240 sec/batch
Epoch: 2/20...  Training Step: 906...  Training loss: 1.4899...  0.1242 sec/batch
Epoch: 2/20...  Training Step: 907...  Training loss: 1.7333...  0.1223 sec/batch
Epoch: 2/20...  Training Step: 908...  Training loss: 1.5857...  0.1244 sec/batch
Epoch: 2/20...  Training Step: 909...  Training loss: 1.6375...  0.1235 sec/batch
Epoch: 2/20...  Training Step: 910...  Training loss: 1.5512...  0.1222 sec/batch
Epoch: 2/20...  Training Step: 911...  Training loss: 1.6102...  0.1236 sec/batch
Epoch: 2/20...  Training Step: 912...  Training loss: 1.5020...  0.1223 sec/batch
Epoch: 2/20...  Training Step: 913...  Training loss: 1.4942...  0.1268 sec/batch
Epoch: 2/20...  Training Step: 914...  Training loss: 1.5327...  0.1223 sec/batch
Epoch: 2/20...  Training Step: 915...  Training loss: 1.5921...  0.1206 sec/batch
Epoch: 2/20...  Training Step: 916...  Training loss: 1.6016...  0.1236 sec/batch
Epoch: 2/20...  Training Step: 917...  Training loss: 1.5069...  0.1227 sec/batch
Epoch: 2/20...  Training Step: 918...  Training loss: 1.8236...  0.1219 sec/batch
Epoch: 2/20...  Training Step: 919...  Training loss: 1.7293...  0.1249 sec/batch
Epoch: 2/20...  Training Step: 920...  Training loss: 1.5183...  0.1228 sec/batch
Epoch: 2/20...  Training Step: 921...  Training loss: 1.4369...  0.1184 sec/batch
Epoch: 2/20...  Training Step: 922...  Training loss: 1.4567...  0.1240 sec/batch
Epoch: 2/20...  Training Step: 923...  Training loss: 1.4554...  0.1246 sec/batch
Epoch: 2/20...  Training Step: 924...  Training loss: 1.5544...  0.1297 sec/batch
Epoch: 2/20...  Training Step: 925...  Training loss: 1.7085...  0.1255 sec/batch
Epoch: 2/20...  Training Step: 926...  Training loss: 1.5437...  0.1240 sec/batch
Epoch: 2/20...  Training Step: 927...  Training loss: 1.6608...  0.1226 sec/batch
Epoch: 2/20...  Training Step: 928...  Training loss: 1.3968...  0.1187 sec/batch
Epoch: 3/20...  Training Step: 929...  Training loss: 1.9449...  0.1186 sec/batch
Epoch: 3/20...  Training Step: 930...  Training loss: 1.5780...  0.1178 sec/batch
Epoch: 3/20...  Training Step: 931...  Training loss: 1.4143...  0.1228 sec/batch
Epoch: 3/20...  Training Step: 932...  Training loss: 1.4950...  0.1215 sec/batch
Epoch: 3/20...  Training Step: 933...  Training loss: 1.6531...  0.1225 sec/batch
Epoch: 3/20...  Training Step: 934...  Training loss: 1.3630...  0.1230 sec/batch
Epoch: 3/20...  Training Step: 935...  Training loss: 1.7588...  0.1272 sec/batch
Epoch: 3/20...  Training Step: 936...  Training loss: 1.4835...  0.1206 sec/batch
Epoch: 3/20...  Training Step: 937...  Training loss: 1.4910...  0.1202 sec/batch
Epoch: 3/20...  Training Step: 938...  Training loss: 1.7330...  0.1177 sec/batch
Epoch: 3/20...  Training Step: 939...  Training loss: 1.4915...  0.1176 sec/batch
Epoch: 3/20...  Training Step: 940...  Training loss: 1.2877...  0.1227 sec/batch
Epoch: 3/20...  Training Step: 941...  Training loss: 1.7049...  0.1183 sec/batch
Epoch: 3/20...  Training Step: 942...  Training loss: 1.3454...  0.1214 sec/batch
Epoch: 3/20...  Training Step: 943...  Training loss: 1.5023...  0.1248 sec/batch
Epoch: 3/20...  Training Step: 944...  Training loss: 1.5896...  0.1223 sec/batch
Epoch: 3/20...  Training Step: 945...  Training loss: 1.3567...  0.1321 sec/batch
Epoch: 3/20...  Training Step: 946...  Training loss: 1.3792...  0.1288 sec/batch
Epoch: 3/20...  Training Step: 947...  Training loss: 1.4189...  0.1255 sec/batch
Epoch: 3/20...  Training Step: 948...  Training loss: 1.3969...  0.1185 sec/batch
Epoch: 3/20...  Training Step: 949...  Training loss: 1.6710...  0.1233 sec/batch
Epoch: 3/20...  Training Step: 950...  Training loss: 1.4185...  0.1224 sec/batch
Epoch: 3/20...  Training Step: 951...  Training loss: 1.6852...  0.1223 sec/batch
Epoch: 3/20...  Training Step: 952...  Training loss: 1.3690...  0.1263 sec/batch
Epoch: 3/20...  Training Step: 953...  Training loss: 1.4494...  0.1225 sec/batch
Epoch: 3/20...  Training Step: 954...  Training loss: 1.5974...  0.1224 sec/batch
Epoch: 3/20...  Training Step: 955...  Training loss: 1.5788...  0.1268 sec/batch
Epoch: 3/20...  Training Step: 956...  Training loss: 1.3920...  0.1221 sec/batch
Epoch: 3/20...  Training Step: 957...  Training loss: 1.5513...  0.1200 sec/batch
Epoch: 3/20...  Training Step: 958...  Training loss: 1.4653...  0.1218 sec/batch
Epoch: 3/20...  Training Step: 959...  Training loss: 1.3200...  0.1240 sec/batch
Epoch: 3/20...  Training Step: 960...  Training loss: 1.4287...  0.1249 sec/batch
Epoch: 3/20...  Training Step: 961...  Training loss: 1.3820...  0.1249 sec/batch
Epoch: 3/20...  Training Step: 962...  Training loss: 1.3546...  0.1176 sec/batch
Epoch: 3/20...  Training Step: 963...  Training loss: 1.3718...  0.1220 sec/batch
Epoch: 3/20...  Training Step: 964...  Training loss: 1.4093...  0.1228 sec/batch
Epoch: 3/20...  Training Step: 965...  Training loss: 1.5570...  0.1228 sec/batch
Epoch: 3/20...  Training Step: 966...  Training loss: 1.3754...  0.1236 sec/batch
Epoch: 3/20...  Training Step: 967...  Training loss: 1.3773...  0.1185 sec/batch
Epoch: 3/20...  Training Step: 968...  Training loss: 1.6443...  0.1251 sec/batch
Epoch: 3/20...  Training Step: 969...  Training loss: 1.3945...  0.1199 sec/batch
Epoch: 3/20...  Training Step: 970...  Training loss: 1.3672...  0.1200 sec/batch
Epoch: 3/20...  Training Step: 971...  Training loss: 1.5994...  0.1226 sec/batch
Epoch: 3/20...  Training Step: 972...  Training loss: 1.3363...  0.1240 sec/batch
Epoch: 3/20...  Training Step: 973...  Training loss: 1.4538...  0.1204 sec/batch
Epoch: 3/20...  Training Step: 974...  Training loss: 1.3994...  0.1225 sec/batch
Epoch: 3/20...  Training Step: 975...  Training loss: 1.3655...  0.1199 sec/batch
Epoch: 3/20...  Training Step: 976...  Training loss: 1.5634...  0.1207 sec/batch
Epoch: 3/20...  Training Step: 977...  Training loss: 1.4231...  0.1213 sec/batch
Epoch: 3/20...  Training Step: 978...  Training loss: 1.7701...  0.1199 sec/batch
Epoch: 3/20...  Training Step: 979...  Training loss: 1.4485...  0.1221 sec/batch
Epoch: 3/20...  Training Step: 980...  Training loss: 1.5609...  0.1254 sec/batch
Epoch: 3/20...  Training Step: 981...  Training loss: 1.7103...  0.1217 sec/batch
Epoch: 3/20...  Training Step: 982...  Training loss: 1.5043...  0.1146 sec/batch
Epoch: 3/20...  Training Step: 983...  Training loss: 1.2968...  0.1188 sec/batch
Epoch: 3/20...  Training Step: 984...  Training loss: 1.4487...  0.1233 sec/batch
Epoch: 3/20...  Training Step: 985...  Training loss: 1.7074...  0.1232 sec/batch
Epoch: 3/20...  Training Step: 986...  Training loss: 1.6142...  0.1224 sec/batch
Epoch: 3/20...  Training Step: 987...  Training loss: 1.3614...  0.1232 sec/batch
Epoch: 3/20...  Training Step: 988...  Training loss: 1.4105...  0.1208 sec/batch
Epoch: 3/20...  Training Step: 989...  Training loss: 1.5193...  0.1241 sec/batch
Epoch: 3/20...  Training Step: 990...  Training loss: 1.5993...  0.1178 sec/batch
Epoch: 3/20...  Training Step: 991...  Training loss: 1.3827...  0.1199 sec/batch
Epoch: 3/20...  Training Step: 992...  Training loss: 1.4665...  0.1265 sec/batch
Epoch: 3/20...  Training Step: 993...  Training loss: 1.3230...  0.1242 sec/batch
Epoch: 3/20...  Training Step: 994...  Training loss: 1.6459...  0.1214 sec/batch
Epoch: 3/20...  Training Step: 995...  Training loss: 1.4463...  0.1243 sec/batch
Epoch: 3/20...  Training Step: 996...  Training loss: 1.6212...  0.1204 sec/batch
Epoch: 3/20...  Training Step: 997...  Training loss: 1.3989...  0.1204 sec/batch
Epoch: 3/20...  Training Step: 998...  Training loss: 1.4242...  0.1238 sec/batch
Epoch: 3/20...  Training Step: 999...  Training loss: 1.5636...  0.1213 sec/batch
Epoch: 3/20...  Training Step: 1000...  Training loss: 1.5529...  0.1234 sec/batch
Epoch: 3/20...  Training Step: 1001...  Training loss: 1.5257...  0.1177 sec/batch
Epoch: 3/20...  Training Step: 1002...  Training loss: 1.4005...  0.1252 sec/batch
Epoch: 3/20...  Training Step: 1003...  Training loss: 1.7731...  0.1245 sec/batch
Epoch: 3/20...  Training Step: 1004...  Training loss: 1.4202...  0.1186 sec/batch
Epoch: 3/20...  Training Step: 1005...  Training loss: 1.3353...  0.1217 sec/batch
Epoch: 3/20...  Training Step: 1006...  Training loss: 1.4132...  0.1213 sec/batch
Epoch: 3/20...  Training Step: 1007...  Training loss: 1.4927...  0.1216 sec/batch
Epoch: 3/20...  Training Step: 1008...  Training loss: 1.3253...  0.1216 sec/batch
Epoch: 3/20...  Training Step: 1009...  Training loss: 1.5526...  0.1250 sec/batch
Epoch: 3/20...  Training Step: 1010...  Training loss: 1.4232...  0.1196 sec/batch
Epoch: 3/20...  Training Step: 1011...  Training loss: 1.3292...  0.1196 sec/batch
Epoch: 3/20...  Training Step: 1012...  Training loss: 1.5740...  0.1256 sec/batch
Epoch: 3/20...  Training Step: 1013...  Training loss: 1.4884...  0.1178 sec/batch
Epoch: 3/20...  Training Step: 1014...  Training loss: 1.5495...  0.1208 sec/batch
Epoch: 3/20...  Training Step: 1015...  Training loss: 1.3761...  0.1253 sec/batch
Epoch: 3/20...  Training Step: 1016...  Training loss: 1.5496...  0.1255 sec/batch
Epoch: 3/20...  Training Step: 1017...  Training loss: 1.7168...  0.1249 sec/batch
Epoch: 3/20...  Training Step: 1018...  Training loss: 1.4293...  0.1175 sec/batch
Epoch: 3/20...  Training Step: 1019...  Training loss: 1.5856...  0.1164 sec/batch
Epoch: 3/20...  Training Step: 1020...  Training loss: 1.8311...  0.1222 sec/batch
Epoch: 3/20...  Training Step: 1021...  Training loss: 1.3409...  0.1226 sec/batch
Epoch: 3/20...  Training Step: 1022...  Training loss: 1.8408...  0.1253 sec/batch
Epoch: 3/20...  Training Step: 1023...  Training loss: 1.4086...  0.1221 sec/batch
Epoch: 3/20...  Training Step: 1024...  Training loss: 1.5127...  0.1212 sec/batch
Epoch: 3/20...  Training Step: 1025...  Training loss: 1.9485...  0.1273 sec/batch
Epoch: 3/20...  Training Step: 1026...  Training loss: 1.6082...  0.1271 sec/batch
Epoch: 3/20...  Training Step: 1027...  Training loss: 1.5680...  0.1217 sec/batch
Epoch: 3/20...  Training Step: 1028...  Training loss: 1.4651...  0.1236 sec/batch
Epoch: 3/20...  Training Step: 1029...  Training loss: 1.4863...  0.1217 sec/batch
Epoch: 3/20...  Training Step: 1030...  Training loss: 1.8156...  0.1183 sec/batch
Epoch: 3/20...  Training Step: 1031...  Training loss: 1.8167...  0.1229 sec/batch
Epoch: 3/20...  Training Step: 1032...  Training loss: 1.5805...  0.1231 sec/batch
Epoch: 3/20...  Training Step: 1033...  Training loss: 1.6752...  0.1198 sec/batch
Epoch: 3/20...  Training Step: 1034...  Training loss: 1.8935...  0.1246 sec/batch
Epoch: 3/20...  Training Step: 1035...  Training loss: 1.7469...  0.1196 sec/batch
Epoch: 3/20...  Training Step: 1036...  Training loss: 1.8368...  0.1240 sec/batch
Epoch: 3/20...  Training Step: 1037...  Training loss: 1.8881...  0.1188 sec/batch
Epoch: 3/20...  Training Step: 1038...  Training loss: 1.5683...  0.1230 sec/batch
Epoch: 3/20...  Training Step: 1039...  Training loss: 1.6711...  0.1254 sec/batch
Epoch: 3/20...  Training Step: 1040...  Training loss: 1.6341...  0.1196 sec/batch
Epoch: 3/20...  Training Step: 1041...  Training loss: 1.5969...  0.1217 sec/batch
Epoch: 3/20...  Training Step: 1042...  Training loss: 1.8003...  0.1228 sec/batch
Epoch: 3/20...  Training Step: 1043...  Training loss: 1.6066...  0.1214 sec/batch
Epoch: 3/20...  Training Step: 1044...  Training loss: 1.4393...  0.1198 sec/batch
Epoch: 3/20...  Training Step: 1045...  Training loss: 1.7296...  0.1231 sec/batch
Epoch: 3/20...  Training Step: 1046...  Training loss: 1.7490...  0.1211 sec/batch
Epoch: 3/20...  Training Step: 1047...  Training loss: 1.6345...  0.1210 sec/batch
Epoch: 3/20...  Training Step: 1048...  Training loss: 1.4011...  0.1202 sec/batch
Epoch: 3/20...  Training Step: 1049...  Training loss: 1.5176...  0.1219 sec/batch
Epoch: 3/20...  Training Step: 1050...  Training loss: 1.6017...  0.1204 sec/batch
Epoch: 3/20...  Training Step: 1051...  Training loss: 1.7129...  0.1179 sec/batch
Epoch: 3/20...  Training Step: 1052...  Training loss: 1.6136...  0.1223 sec/batch
Epoch: 3/20...  Training Step: 1053...  Training loss: 1.6677...  0.1204 sec/batch
Epoch: 3/20...  Training Step: 1054...  Training loss: 1.3784...  0.1184 sec/batch
Epoch: 3/20...  Training Step: 1055...  Training loss: 1.4385...  0.1215 sec/batch
Epoch: 3/20...  Training Step: 1056...  Training loss: 1.5424...  0.1260 sec/batch
Epoch: 3/20...  Training Step: 1057...  Training loss: 1.6811...  0.1172 sec/batch
Epoch: 3/20...  Training Step: 1058...  Training loss: 1.4956...  0.1194 sec/batch
Epoch: 3/20...  Training Step: 1059...  Training loss: 1.8544...  0.1348 sec/batch
Epoch: 3/20...  Training Step: 1060...  Training loss: 1.6321...  0.1359 sec/batch
Epoch: 3/20...  Training Step: 1061...  Training loss: 1.5454...  0.1209 sec/batch
Epoch: 3/20...  Training Step: 1062...  Training loss: 1.6454...  0.1221 sec/batch
Epoch: 3/20...  Training Step: 1063...  Training loss: 1.4355...  0.1192 sec/batch
Epoch: 3/20...  Training Step: 1064...  Training loss: 1.4099...  0.1232 sec/batch
Epoch: 3/20...  Training Step: 1065...  Training loss: 1.3926...  0.1248 sec/batch
Epoch: 3/20...  Training Step: 1066...  Training loss: 1.4718...  0.1207 sec/batch
Epoch: 3/20...  Training Step: 1067...  Training loss: 1.4711...  0.1203 sec/batch
Epoch: 3/20...  Training Step: 1068...  Training loss: 1.5006...  0.1210 sec/batch
Epoch: 3/20...  Training Step: 1069...  Training loss: 1.4499...  0.1232 sec/batch
Epoch: 3/20...  Training Step: 1070...  Training loss: 1.4404...  0.1260 sec/batch
Epoch: 3/20...  Training Step: 1071...  Training loss: 1.3352...  0.1286 sec/batch
Epoch: 3/20...  Training Step: 1072...  Training loss: 1.4516...  0.1287 sec/batch
Epoch: 3/20...  Training Step: 1073...  Training loss: 1.5586...  0.1205 sec/batch
Epoch: 3/20...  Training Step: 1074...  Training loss: 1.4360...  0.1222 sec/batch
Epoch: 3/20...  Training Step: 1075...  Training loss: 1.6583...  0.1214 sec/batch
Epoch: 3/20...  Training Step: 1076...  Training loss: 1.4968...  0.1215 sec/batch
Epoch: 3/20...  Training Step: 1077...  Training loss: 1.5031...  0.1245 sec/batch
Epoch: 3/20...  Training Step: 1078...  Training loss: 1.6054...  0.1214 sec/batch
Epoch: 3/20...  Training Step: 1079...  Training loss: 1.6738...  0.1265 sec/batch
Epoch: 3/20...  Training Step: 1080...  Training loss: 1.6745...  0.1237 sec/batch
Epoch: 3/20...  Training Step: 1081...  Training loss: 1.7202...  0.1313 sec/batch
Epoch: 3/20...  Training Step: 1082...  Training loss: 1.6778...  0.1264 sec/batch
Epoch: 3/20...  Training Step: 1083...  Training loss: 1.6715...  0.1232 sec/batch
Epoch: 3/20...  Training Step: 1084...  Training loss: 1.4047...  0.1207 sec/batch
Epoch: 3/20...  Training Step: 1085...  Training loss: 1.4174...  0.1226 sec/batch
Epoch: 3/20...  Training Step: 1086...  Training loss: 1.4059...  0.1210 sec/batch
Epoch: 3/20...  Training Step: 1087...  Training loss: 1.4300...  0.1238 sec/batch
Epoch: 3/20...  Training Step: 1088...  Training loss: 1.4531...  0.1197 sec/batch
Epoch: 3/20...  Training Step: 1089...  Training loss: 1.6735...  0.1237 sec/batch
Epoch: 3/20...  Training Step: 1090...  Training loss: 1.5071...  0.1212 sec/batch
Epoch: 3/20...  Training Step: 1091...  Training loss: 1.6246...  0.1225 sec/batch
Epoch: 3/20...  Training Step: 1092...  Training loss: 1.2634...  0.1224 sec/batch
Epoch: 3/20...  Training Step: 1093...  Training loss: 1.6045...  0.1222 sec/batch
Epoch: 3/20...  Training Step: 1094...  Training loss: 1.5277...  0.1230 sec/batch
Epoch: 3/20...  Training Step: 1095...  Training loss: 1.4848...  0.1215 sec/batch
Epoch: 3/20...  Training Step: 1096...  Training loss: 1.6212...  0.1231 sec/batch
Epoch: 3/20...  Training Step: 1097...  Training loss: 1.6334...  0.1212 sec/batch
Epoch: 3/20...  Training Step: 1098...  Training loss: 1.6819...  0.1189 sec/batch
Epoch: 3/20...  Training Step: 1099...  Training loss: 1.5043...  0.1238 sec/batch
Epoch: 3/20...  Training Step: 1100...  Training loss: 1.5513...  0.1239 sec/batch
Epoch: 3/20...  Training Step: 1101...  Training loss: 1.5325...  0.1245 sec/batch
Epoch: 3/20...  Training Step: 1102...  Training loss: 1.5957...  0.1265 sec/batch
Epoch: 3/20...  Training Step: 1103...  Training loss: 1.6104...  0.1219 sec/batch
Epoch: 3/20...  Training Step: 1104...  Training loss: 1.3920...  0.1203 sec/batch
Epoch: 3/20...  Training Step: 1105...  Training loss: 1.2441...  0.1195 sec/batch
Epoch: 3/20...  Training Step: 1106...  Training loss: 1.6908...  0.1232 sec/batch
Epoch: 3/20...  Training Step: 1107...  Training loss: 1.3905...  0.1199 sec/batch
Epoch: 3/20...  Training Step: 1108...  Training loss: 1.7121...  0.1222 sec/batch
Epoch: 3/20...  Training Step: 1109...  Training loss: 1.4404...  0.1228 sec/batch
Epoch: 3/20...  Training Step: 1110...  Training loss: 1.7088...  0.1201 sec/batch
Epoch: 3/20...  Training Step: 1111...  Training loss: 1.6019...  0.1215 sec/batch
Epoch: 3/20...  Training Step: 1112...  Training loss: 1.5274...  0.1190 sec/batch
Epoch: 3/20...  Training Step: 1113...  Training loss: 1.8034...  0.1230 sec/batch
Epoch: 3/20...  Training Step: 1114...  Training loss: 1.5510...  0.1219 sec/batch
Epoch: 3/20...  Training Step: 1115...  Training loss: 1.7064...  0.1228 sec/batch
Epoch: 3/20...  Training Step: 1116...  Training loss: 1.4531...  0.1190 sec/batch
Epoch: 3/20...  Training Step: 1117...  Training loss: 1.5547...  0.1258 sec/batch
Epoch: 3/20...  Training Step: 1118...  Training loss: 1.6288...  0.1215 sec/batch
Epoch: 3/20...  Training Step: 1119...  Training loss: 1.3383...  0.1216 sec/batch
Epoch: 3/20...  Training Step: 1120...  Training loss: 1.6835...  0.1210 sec/batch
Epoch: 3/20...  Training Step: 1121...  Training loss: 1.5582...  0.1240 sec/batch
Epoch: 3/20...  Training Step: 1122...  Training loss: 1.4711...  0.1240 sec/batch
Epoch: 3/20...  Training Step: 1123...  Training loss: 1.5970...  0.1265 sec/batch
Epoch: 3/20...  Training Step: 1124...  Training loss: 1.5486...  0.1213 sec/batch
Epoch: 3/20...  Training Step: 1125...  Training loss: 1.3892...  0.1204 sec/batch
Epoch: 3/20...  Training Step: 1126...  Training loss: 1.5995...  0.1194 sec/batch
Epoch: 3/20...  Training Step: 1127...  Training loss: 1.3688...  0.1312 sec/batch
Epoch: 3/20...  Training Step: 1128...  Training loss: 1.4986...  0.1248 sec/batch
Epoch: 3/20...  Training Step: 1129...  Training loss: 1.3921...  0.1203 sec/batch
Epoch: 3/20...  Training Step: 1130...  Training loss: 1.4580...  0.1205 sec/batch
Epoch: 3/20...  Training Step: 1131...  Training loss: 1.5017...  0.1312 sec/batch
Epoch: 3/20...  Training Step: 1132...  Training loss: 1.6963...  0.1323 sec/batch
Epoch: 3/20...  Training Step: 1133...  Training loss: 1.3546...  0.1297 sec/batch
Epoch: 3/20...  Training Step: 1134...  Training loss: 1.3314...  0.1327 sec/batch
Epoch: 3/20...  Training Step: 1135...  Training loss: 1.5422...  0.1353 sec/batch
Epoch: 3/20...  Training Step: 1136...  Training loss: 1.5293...  0.1274 sec/batch
Epoch: 3/20...  Training Step: 1137...  Training loss: 1.4793...  0.1262 sec/batch
Epoch: 3/20...  Training Step: 1138...  Training loss: 1.4585...  0.1305 sec/batch
Epoch: 3/20...  Training Step: 1139...  Training loss: 1.1157...  0.1414 sec/batch
Epoch: 3/20...  Training Step: 1140...  Training loss: 1.5859...  0.1278 sec/batch
Epoch: 3/20...  Training Step: 1141...  Training loss: 1.5781...  0.1232 sec/batch
Epoch: 3/20...  Training Step: 1142...  Training loss: 1.4823...  0.1214 sec/batch
Epoch: 3/20...  Training Step: 1143...  Training loss: 1.6744...  0.1209 sec/batch
Epoch: 3/20...  Training Step: 1144...  Training loss: 1.4642...  0.1265 sec/batch
Epoch: 3/20...  Training Step: 1145...  Training loss: 1.5313...  0.1217 sec/batch
Epoch: 3/20...  Training Step: 1146...  Training loss: 1.3649...  0.1450 sec/batch
Epoch: 3/20...  Training Step: 1147...  Training loss: 1.6506...  0.1408 sec/batch
Epoch: 3/20...  Training Step: 1148...  Training loss: 1.4020...  0.1336 sec/batch
Epoch: 3/20...  Training Step: 1149...  Training loss: 1.4229...  0.1321 sec/batch
Epoch: 3/20...  Training Step: 1150...  Training loss: 1.6742...  0.1291 sec/batch
Epoch: 3/20...  Training Step: 1151...  Training loss: 1.6882...  0.1212 sec/batch
Epoch: 3/20...  Training Step: 1152...  Training loss: 1.6954...  0.1226 sec/batch
Epoch: 3/20...  Training Step: 1153...  Training loss: 1.5207...  0.1207 sec/batch
Epoch: 3/20...  Training Step: 1154...  Training loss: 1.6923...  0.1200 sec/batch
Epoch: 3/20...  Training Step: 1155...  Training loss: 1.6228...  0.1204 sec/batch
Epoch: 3/20...  Training Step: 1156...  Training loss: 1.3515...  0.1204 sec/batch
Epoch: 3/20...  Training Step: 1157...  Training loss: 1.4203...  0.1255 sec/batch
Epoch: 3/20...  Training Step: 1158...  Training loss: 1.4987...  0.1249 sec/batch
Epoch: 3/20...  Training Step: 1159...  Training loss: 1.4366...  0.1234 sec/batch
Epoch: 3/20...  Training Step: 1160...  Training loss: 1.3728...  0.1248 sec/batch
Epoch: 3/20...  Training Step: 1161...  Training loss: 1.7861...  0.1224 sec/batch
Epoch: 3/20...  Training Step: 1162...  Training loss: 1.3730...  0.1230 sec/batch
Epoch: 3/20...  Training Step: 1163...  Training loss: 1.6585...  0.1241 sec/batch
Epoch: 3/20...  Training Step: 1164...  Training loss: 1.3996...  0.1230 sec/batch
Epoch: 3/20...  Training Step: 1165...  Training loss: 1.8117...  0.1224 sec/batch
Epoch: 3/20...  Training Step: 1166...  Training loss: 1.3453...  0.1259 sec/batch
Epoch: 3/20...  Training Step: 1167...  Training loss: 1.5191...  0.1248 sec/batch
Epoch: 3/20...  Training Step: 1168...  Training loss: 1.5896...  0.1200 sec/batch
Epoch: 3/20...  Training Step: 1169...  Training loss: 1.4248...  0.1253 sec/batch
Epoch: 3/20...  Training Step: 1170...  Training loss: 1.4025...  0.1236 sec/batch
Epoch: 3/20...  Training Step: 1171...  Training loss: 1.6216...  0.1211 sec/batch
Epoch: 3/20...  Training Step: 1172...  Training loss: 1.5249...  0.1234 sec/batch
Epoch: 3/20...  Training Step: 1173...  Training loss: 1.5229...  0.1256 sec/batch
Epoch: 3/20...  Training Step: 1174...  Training loss: 1.3503...  0.1235 sec/batch
Epoch: 3/20...  Training Step: 1175...  Training loss: 1.4559...  0.1283 sec/batch
Epoch: 3/20...  Training Step: 1176...  Training loss: 1.5606...  0.1195 sec/batch
Epoch: 3/20...  Training Step: 1177...  Training loss: 1.3951...  0.1220 sec/batch
Epoch: 3/20...  Training Step: 1178...  Training loss: 1.4849...  0.1230 sec/batch
Epoch: 3/20...  Training Step: 1179...  Training loss: 1.5060...  0.1229 sec/batch
Epoch: 3/20...  Training Step: 1180...  Training loss: 1.4280...  0.1242 sec/batch
Epoch: 3/20...  Training Step: 1181...  Training loss: 1.3011...  0.1213 sec/batch
Epoch: 3/20...  Training Step: 1182...  Training loss: 1.5110...  0.1192 sec/batch
Epoch: 3/20...  Training Step: 1183...  Training loss: 1.4393...  0.1215 sec/batch
Epoch: 3/20...  Training Step: 1184...  Training loss: 1.4552...  0.1194 sec/batch
Epoch: 3/20...  Training Step: 1185...  Training loss: 1.7136...  0.1237 sec/batch
Epoch: 3/20...  Training Step: 1186...  Training loss: 1.4040...  0.1202 sec/batch
Epoch: 3/20...  Training Step: 1187...  Training loss: 1.4646...  0.1237 sec/batch
Epoch: 3/20...  Training Step: 1188...  Training loss: 1.4631...  0.1198 sec/batch
Epoch: 3/20...  Training Step: 1189...  Training loss: 1.4965...  0.1248 sec/batch
Epoch: 3/20...  Training Step: 1190...  Training loss: 1.5032...  0.1190 sec/batch
Epoch: 3/20...  Training Step: 1191...  Training loss: 1.5201...  0.1232 sec/batch
Epoch: 3/20...  Training Step: 1192...  Training loss: 1.6394...  0.1240 sec/batch
Epoch: 3/20...  Training Step: 1193...  Training loss: 1.5528...  0.1200 sec/batch
Epoch: 3/20...  Training Step: 1194...  Training loss: 1.5468...  0.1232 sec/batch
Epoch: 3/20...  Training Step: 1195...  Training loss: 1.8005...  0.1265 sec/batch
Epoch: 3/20...  Training Step: 1196...  Training loss: 1.5517...  0.1237 sec/batch
Epoch: 3/20...  Training Step: 1197...  Training loss: 1.5621...  0.1256 sec/batch
Epoch: 3/20...  Training Step: 1198...  Training loss: 1.7631...  0.1204 sec/batch
Epoch: 3/20...  Training Step: 1199...  Training loss: 1.5714...  0.1230 sec/batch
Epoch: 3/20...  Training Step: 1200...  Training loss: 1.5347...  0.1218 sec/batch
Epoch: 3/20...  Training Step: 1201...  Training loss: 1.6003...  0.1256 sec/batch
Epoch: 3/20...  Training Step: 1202...  Training loss: 1.4169...  0.1171 sec/batch
Epoch: 3/20...  Training Step: 1203...  Training loss: 1.6148...  0.1161 sec/batch
Epoch: 3/20...  Training Step: 1204...  Training loss: 1.5746...  0.1197 sec/batch
Epoch: 3/20...  Training Step: 1205...  Training loss: 1.6521...  0.1223 sec/batch
Epoch: 3/20...  Training Step: 1206...  Training loss: 1.7894...  0.1206 sec/batch
Epoch: 3/20...  Training Step: 1207...  Training loss: 1.5322...  0.1197 sec/batch
Epoch: 3/20...  Training Step: 1208...  Training loss: 1.4790...  0.1191 sec/batch
Epoch: 3/20...  Training Step: 1209...  Training loss: 1.3678...  0.1205 sec/batch
Epoch: 3/20...  Training Step: 1210...  Training loss: 1.3871...  0.1266 sec/batch
Epoch: 3/20...  Training Step: 1211...  Training loss: 1.4056...  0.1211 sec/batch
Epoch: 3/20...  Training Step: 1212...  Training loss: 1.5430...  0.1221 sec/batch
Epoch: 3/20...  Training Step: 1213...  Training loss: 1.4124...  0.1227 sec/batch
Epoch: 3/20...  Training Step: 1214...  Training loss: 1.5950...  0.1227 sec/batch
Epoch: 3/20...  Training Step: 1215...  Training loss: 1.5371...  0.1209 sec/batch
Epoch: 3/20...  Training Step: 1216...  Training loss: 1.5451...  0.1199 sec/batch
Epoch: 3/20...  Training Step: 1217...  Training loss: 1.6628...  0.1254 sec/batch
Epoch: 3/20...  Training Step: 1218...  Training loss: 1.4933...  0.1308 sec/batch
Epoch: 3/20...  Training Step: 1219...  Training loss: 1.4938...  0.1243 sec/batch
Epoch: 3/20...  Training Step: 1220...  Training loss: 1.4262...  0.1183 sec/batch
Epoch: 3/20...  Training Step: 1221...  Training loss: 1.3884...  0.1186 sec/batch
Epoch: 3/20...  Training Step: 1222...  Training loss: 1.4537...  0.1233 sec/batch
Epoch: 3/20...  Training Step: 1223...  Training loss: 1.5068...  0.1255 sec/batch
Epoch: 3/20...  Training Step: 1224...  Training loss: 1.6773...  0.1205 sec/batch
Epoch: 3/20...  Training Step: 1225...  Training loss: 1.3959...  0.1217 sec/batch
Epoch: 3/20...  Training Step: 1226...  Training loss: 1.4378...  0.1213 sec/batch
Epoch: 3/20...  Training Step: 1227...  Training loss: 1.4300...  0.1211 sec/batch
Epoch: 3/20...  Training Step: 1228...  Training loss: 1.5322...  0.1208 sec/batch
Epoch: 3/20...  Training Step: 1229...  Training loss: 1.4586...  0.1243 sec/batch
Epoch: 3/20...  Training Step: 1230...  Training loss: 1.5052...  0.1236 sec/batch
Epoch: 3/20...  Training Step: 1231...  Training loss: 1.2914...  0.1227 sec/batch
Epoch: 3/20...  Training Step: 1232...  Training loss: 1.7427...  0.1197 sec/batch
Epoch: 3/20...  Training Step: 1233...  Training loss: 1.3229...  0.1235 sec/batch
Epoch: 3/20...  Training Step: 1234...  Training loss: 1.5045...  0.1225 sec/batch
Epoch: 3/20...  Training Step: 1235...  Training loss: 1.4541...  0.1210 sec/batch
Epoch: 3/20...  Training Step: 1236...  Training loss: 1.8124...  0.1204 sec/batch
Epoch: 3/20...  Training Step: 1237...  Training loss: 1.5099...  0.1206 sec/batch
Epoch: 3/20...  Training Step: 1238...  Training loss: 1.6020...  0.1218 sec/batch
Epoch: 3/20...  Training Step: 1239...  Training loss: 1.5292...  0.1217 sec/batch
Epoch: 3/20...  Training Step: 1240...  Training loss: 1.5018...  0.1231 sec/batch
Epoch: 3/20...  Training Step: 1241...  Training loss: 1.6346...  0.1230 sec/batch
Epoch: 3/20...  Training Step: 1242...  Training loss: 1.4792...  0.1233 sec/batch
Epoch: 3/20...  Training Step: 1243...  Training loss: 1.2944...  0.1229 sec/batch
Epoch: 3/20...  Training Step: 1244...  Training loss: 1.4494...  0.1199 sec/batch
Epoch: 3/20...  Training Step: 1245...  Training loss: 1.4722...  0.1237 sec/batch
Epoch: 3/20...  Training Step: 1246...  Training loss: 1.4190...  0.1211 sec/batch
Epoch: 3/20...  Training Step: 1247...  Training loss: 1.4602...  0.1234 sec/batch
Epoch: 3/20...  Training Step: 1248...  Training loss: 1.3172...  0.1233 sec/batch
Epoch: 3/20...  Training Step: 1249...  Training loss: 1.2813...  0.1188 sec/batch
Epoch: 3/20...  Training Step: 1250...  Training loss: 1.4847...  0.1252 sec/batch
Epoch: 3/20...  Training Step: 1251...  Training loss: 1.3949...  0.1244 sec/batch
Epoch: 3/20...  Training Step: 1252...  Training loss: 1.3037...  0.1237 sec/batch
Epoch: 3/20...  Training Step: 1253...  Training loss: 1.2033...  0.1338 sec/batch
Epoch: 3/20...  Training Step: 1254...  Training loss: 1.3316...  0.1363 sec/batch
Epoch: 3/20...  Training Step: 1255...  Training loss: 1.5470...  0.1228 sec/batch
Epoch: 3/20...  Training Step: 1256...  Training loss: 1.4848...  0.1190 sec/batch
Epoch: 3/20...  Training Step: 1257...  Training loss: 1.4451...  0.1195 sec/batch
Epoch: 3/20...  Training Step: 1258...  Training loss: 1.5039...  0.1204 sec/batch
Epoch: 3/20...  Training Step: 1259...  Training loss: 1.5019...  0.1223 sec/batch
Epoch: 3/20...  Training Step: 1260...  Training loss: 1.4659...  0.1216 sec/batch
Epoch: 3/20...  Training Step: 1261...  Training loss: 1.2788...  0.1192 sec/batch
Epoch: 3/20...  Training Step: 1262...  Training loss: 1.3967...  0.1267 sec/batch
Epoch: 3/20...  Training Step: 1263...  Training loss: 1.6080...  0.1298 sec/batch
Epoch: 3/20...  Training Step: 1264...  Training loss: 1.4442...  0.1268 sec/batch
Epoch: 3/20...  Training Step: 1265...  Training loss: 1.4486...  0.1297 sec/batch
Epoch: 3/20...  Training Step: 1266...  Training loss: 1.4580...  0.1258 sec/batch
Epoch: 3/20...  Training Step: 1267...  Training loss: 1.5623...  0.1245 sec/batch
Epoch: 3/20...  Training Step: 1268...  Training loss: 1.3603...  0.1206 sec/batch
Epoch: 3/20...  Training Step: 1269...  Training loss: 1.3881...  0.1228 sec/batch
Epoch: 3/20...  Training Step: 1270...  Training loss: 1.5881...  0.1202 sec/batch
Epoch: 3/20...  Training Step: 1271...  Training loss: 1.1880...  0.1177 sec/batch
Epoch: 3/20...  Training Step: 1272...  Training loss: 1.6003...  0.1224 sec/batch
Epoch: 3/20...  Training Step: 1273...  Training loss: 1.3740...  0.1229 sec/batch
Epoch: 3/20...  Training Step: 1274...  Training loss: 1.3288...  0.1190 sec/batch
Epoch: 3/20...  Training Step: 1275...  Training loss: 1.2824...  0.1275 sec/batch
Epoch: 3/20...  Training Step: 1276...  Training loss: 1.7658...  0.1248 sec/batch
Epoch: 3/20...  Training Step: 1277...  Training loss: 1.2926...  0.1230 sec/batch
Epoch: 3/20...  Training Step: 1278...  Training loss: 1.4131...  0.1203 sec/batch
Epoch: 3/20...  Training Step: 1279...  Training loss: 1.4284...  0.1236 sec/batch
Epoch: 3/20...  Training Step: 1280...  Training loss: 1.4369...  0.1240 sec/batch
Epoch: 3/20...  Training Step: 1281...  Training loss: 1.2484...  0.1205 sec/batch
Epoch: 3/20...  Training Step: 1282...  Training loss: 1.1394...  0.1212 sec/batch
Epoch: 3/20...  Training Step: 1283...  Training loss: 1.5956...  0.1231 sec/batch
Epoch: 3/20...  Training Step: 1284...  Training loss: 1.3623...  0.1231 sec/batch
Epoch: 3/20...  Training Step: 1285...  Training loss: 1.3414...  0.1168 sec/batch
Epoch: 3/20...  Training Step: 1286...  Training loss: 1.4809...  0.1236 sec/batch
Epoch: 3/20...  Training Step: 1287...  Training loss: 1.6673...  0.1233 sec/batch
Epoch: 3/20...  Training Step: 1288...  Training loss: 1.1649...  0.1233 sec/batch
Epoch: 3/20...  Training Step: 1289...  Training loss: 1.4957...  0.1265 sec/batch
Epoch: 3/20...  Training Step: 1290...  Training loss: 1.4759...  0.1211 sec/batch
Epoch: 3/20...  Training Step: 1291...  Training loss: 1.4942...  0.1233 sec/batch
Epoch: 3/20...  Training Step: 1292...  Training loss: 1.5007...  0.1234 sec/batch
Epoch: 3/20...  Training Step: 1293...  Training loss: 1.3933...  0.1204 sec/batch
Epoch: 3/20...  Training Step: 1294...  Training loss: 1.4123...  0.1174 sec/batch
Epoch: 3/20...  Training Step: 1295...  Training loss: 1.2318...  0.1189 sec/batch
Epoch: 3/20...  Training Step: 1296...  Training loss: 1.5183...  0.1207 sec/batch
Epoch: 3/20...  Training Step: 1297...  Training loss: 1.4462...  0.1256 sec/batch
Epoch: 3/20...  Training Step: 1298...  Training loss: 1.3265...  0.1228 sec/batch
Epoch: 3/20...  Training Step: 1299...  Training loss: 1.2633...  0.1228 sec/batch
Epoch: 3/20...  Training Step: 1300...  Training loss: 1.5061...  0.1218 sec/batch
Epoch: 3/20...  Training Step: 1301...  Training loss: 1.4581...  0.1223 sec/batch
Epoch: 3/20...  Training Step: 1302...  Training loss: 1.5485...  0.1216 sec/batch
Epoch: 3/20...  Training Step: 1303...  Training loss: 1.4608...  0.1212 sec/batch
Epoch: 3/20...  Training Step: 1304...  Training loss: 1.3954...  0.1225 sec/batch
Epoch: 3/20...  Training Step: 1305...  Training loss: 1.5705...  0.1261 sec/batch
Epoch: 3/20...  Training Step: 1306...  Training loss: 1.4479...  0.1320 sec/batch
Epoch: 3/20...  Training Step: 1307...  Training loss: 1.4015...  0.1288 sec/batch
Epoch: 3/20...  Training Step: 1308...  Training loss: 1.6474...  0.1200 sec/batch
Epoch: 3/20...  Training Step: 1309...  Training loss: 1.2750...  0.1179 sec/batch
Epoch: 3/20...  Training Step: 1310...  Training loss: 1.3717...  0.1206 sec/batch
Epoch: 3/20...  Training Step: 1311...  Training loss: 1.3306...  0.1225 sec/batch
Epoch: 3/20...  Training Step: 1312...  Training loss: 1.4124...  0.1149 sec/batch
Epoch: 3/20...  Training Step: 1313...  Training loss: 1.3034...  0.1221 sec/batch
Epoch: 3/20...  Training Step: 1314...  Training loss: 1.4703...  0.1244 sec/batch
Epoch: 3/20...  Training Step: 1315...  Training loss: 1.4667...  0.1232 sec/batch
Epoch: 3/20...  Training Step: 1316...  Training loss: 1.2582...  0.1288 sec/batch
Epoch: 3/20...  Training Step: 1317...  Training loss: 1.3367...  0.1225 sec/batch
Epoch: 3/20...  Training Step: 1318...  Training loss: 1.3607...  0.1178 sec/batch
Epoch: 3/20...  Training Step: 1319...  Training loss: 1.2614...  0.1231 sec/batch
Epoch: 3/20...  Training Step: 1320...  Training loss: 1.2499...  0.1217 sec/batch
Epoch: 3/20...  Training Step: 1321...  Training loss: 1.3843...  0.1233 sec/batch
Epoch: 3/20...  Training Step: 1322...  Training loss: 1.4731...  0.1237 sec/batch
Epoch: 3/20...  Training Step: 1323...  Training loss: 1.3160...  0.1215 sec/batch
Epoch: 3/20...  Training Step: 1324...  Training loss: 1.5521...  0.1229 sec/batch
Epoch: 3/20...  Training Step: 1325...  Training loss: 1.2794...  0.1243 sec/batch
Epoch: 3/20...  Training Step: 1326...  Training loss: 1.2844...  0.1232 sec/batch
Epoch: 3/20...  Training Step: 1327...  Training loss: 1.3642...  0.1236 sec/batch
Epoch: 3/20...  Training Step: 1328...  Training loss: 1.3242...  0.1219 sec/batch
Epoch: 3/20...  Training Step: 1329...  Training loss: 1.4141...  0.1227 sec/batch
Epoch: 3/20...  Training Step: 1330...  Training loss: 1.3989...  0.1258 sec/batch
Epoch: 3/20...  Training Step: 1331...  Training loss: 1.3728...  0.1221 sec/batch
Epoch: 3/20...  Training Step: 1332...  Training loss: 1.4453...  0.1225 sec/batch
Epoch: 3/20...  Training Step: 1333...  Training loss: 1.4572...  0.1202 sec/batch
Epoch: 3/20...  Training Step: 1334...  Training loss: 1.6580...  0.1209 sec/batch
Epoch: 3/20...  Training Step: 1335...  Training loss: 1.4955...  0.1209 sec/batch
Epoch: 3/20...  Training Step: 1336...  Training loss: 1.7907...  0.1191 sec/batch
Epoch: 3/20...  Training Step: 1337...  Training loss: 1.4268...  0.1239 sec/batch
Epoch: 3/20...  Training Step: 1338...  Training loss: 1.3740...  0.1220 sec/batch
Epoch: 3/20...  Training Step: 1339...  Training loss: 1.2539...  0.1243 sec/batch
Epoch: 3/20...  Training Step: 1340...  Training loss: 1.4921...  0.1291 sec/batch
Epoch: 3/20...  Training Step: 1341...  Training loss: 1.4860...  0.1239 sec/batch
Epoch: 3/20...  Training Step: 1342...  Training loss: 1.4720...  0.1208 sec/batch
Epoch: 3/20...  Training Step: 1343...  Training loss: 1.6715...  0.1269 sec/batch
Epoch: 3/20...  Training Step: 1344...  Training loss: 1.7732...  0.1307 sec/batch
Epoch: 3/20...  Training Step: 1345...  Training loss: 1.5820...  0.1327 sec/batch
Epoch: 3/20...  Training Step: 1346...  Training loss: 1.3372...  0.1170 sec/batch
Epoch: 3/20...  Training Step: 1347...  Training loss: 1.6558...  0.1314 sec/batch
Epoch: 3/20...  Training Step: 1348...  Training loss: 1.3329...  0.1218 sec/batch
Epoch: 3/20...  Training Step: 1349...  Training loss: 1.5555...  0.1188 sec/batch
Epoch: 3/20...  Training Step: 1350...  Training loss: 1.5288...  0.1314 sec/batch
Epoch: 3/20...  Training Step: 1351...  Training loss: 1.5522...  0.1235 sec/batch
Epoch: 3/20...  Training Step: 1352...  Training loss: 1.5946...  0.1249 sec/batch
Epoch: 3/20...  Training Step: 1353...  Training loss: 1.3923...  0.1265 sec/batch
Epoch: 3/20...  Training Step: 1354...  Training loss: 1.5529...  0.1299 sec/batch
Epoch: 3/20...  Training Step: 1355...  Training loss: 1.5271...  0.1359 sec/batch
Epoch: 3/20...  Training Step: 1356...  Training loss: 1.5991...  0.1324 sec/batch
Epoch: 3/20...  Training Step: 1357...  Training loss: 1.4104...  0.1308 sec/batch
Epoch: 3/20...  Training Step: 1358...  Training loss: 1.4555...  0.1285 sec/batch
Epoch: 3/20...  Training Step: 1359...  Training loss: 1.6443...  0.1335 sec/batch
Epoch: 3/20...  Training Step: 1360...  Training loss: 1.3999...  0.1206 sec/batch
Epoch: 3/20...  Training Step: 1361...  Training loss: 1.7414...  0.1229 sec/batch
Epoch: 3/20...  Training Step: 1362...  Training loss: 1.4795...  0.1209 sec/batch
Epoch: 3/20...  Training Step: 1363...  Training loss: 1.3346...  0.1194 sec/batch
Epoch: 3/20...  Training Step: 1364...  Training loss: 1.5552...  0.1215 sec/batch
Epoch: 3/20...  Training Step: 1365...  Training loss: 1.3470...  0.1204 sec/batch
Epoch: 3/20...  Training Step: 1366...  Training loss: 1.6726...  0.1196 sec/batch
Epoch: 3/20...  Training Step: 1367...  Training loss: 1.7311...  0.1206 sec/batch
Epoch: 3/20...  Training Step: 1368...  Training loss: 1.7849...  0.1217 sec/batch
Epoch: 3/20...  Training Step: 1369...  Training loss: 1.5782...  0.1246 sec/batch
Epoch: 3/20...  Training Step: 1370...  Training loss: 1.4696...  0.1254 sec/batch
Epoch: 3/20...  Training Step: 1371...  Training loss: 1.5873...  0.1217 sec/batch
Epoch: 3/20...  Training Step: 1372...  Training loss: 1.5114...  0.1223 sec/batch
Epoch: 3/20...  Training Step: 1373...  Training loss: 1.4033...  0.1171 sec/batch
Epoch: 3/20...  Training Step: 1374...  Training loss: 1.3788...  0.1273 sec/batch
Epoch: 3/20...  Training Step: 1375...  Training loss: 1.5542...  0.1250 sec/batch
Epoch: 3/20...  Training Step: 1376...  Training loss: 1.2138...  0.1211 sec/batch
Epoch: 3/20...  Training Step: 1377...  Training loss: 1.3873...  0.1206 sec/batch
Epoch: 3/20...  Training Step: 1378...  Training loss: 1.4237...  0.1236 sec/batch
Epoch: 3/20...  Training Step: 1379...  Training loss: 1.4807...  0.1255 sec/batch
Epoch: 3/20...  Training Step: 1380...  Training loss: 1.4350...  0.1270 sec/batch
Epoch: 3/20...  Training Step: 1381...  Training loss: 1.3911...  0.1233 sec/batch
Epoch: 3/20...  Training Step: 1382...  Training loss: 1.5712...  0.1264 sec/batch
Epoch: 3/20...  Training Step: 1383...  Training loss: 1.5879...  0.1213 sec/batch
Epoch: 3/20...  Training Step: 1384...  Training loss: 1.3152...  0.1237 sec/batch
Epoch: 3/20...  Training Step: 1385...  Training loss: 1.3861...  0.1255 sec/batch
Epoch: 3/20...  Training Step: 1386...  Training loss: 1.2745...  0.1213 sec/batch
Epoch: 3/20...  Training Step: 1387...  Training loss: 1.1810...  0.1248 sec/batch
Epoch: 3/20...  Training Step: 1388...  Training loss: 1.3882...  0.1251 sec/batch
Epoch: 3/20...  Training Step: 1389...  Training loss: 1.5194...  0.1234 sec/batch
Epoch: 3/20...  Training Step: 1390...  Training loss: 1.4200...  0.1217 sec/batch
Epoch: 3/20...  Training Step: 1391...  Training loss: 1.4400...  0.1234 sec/batch
Epoch: 3/20...  Training Step: 1392...  Training loss: 1.3359...  0.1225 sec/batch
Epoch: 4/20...  Training Step: 1393...  Training loss: 1.9313...  0.1260 sec/batch
Epoch: 4/20...  Training Step: 1394...  Training loss: 1.4922...  0.1188 sec/batch
Epoch: 4/20...  Training Step: 1395...  Training loss: 1.4301...  0.1268 sec/batch
Epoch: 4/20...  Training Step: 1396...  Training loss: 1.3722...  0.1196 sec/batch
Epoch: 4/20...  Training Step: 1397...  Training loss: 1.4674...  0.1252 sec/batch
Epoch: 4/20...  Training Step: 1398...  Training loss: 1.2833...  0.1244 sec/batch
Epoch: 4/20...  Training Step: 1399...  Training loss: 1.4789...  0.1240 sec/batch
Epoch: 4/20...  Training Step: 1400...  Training loss: 1.3207...  0.1296 sec/batch
Epoch: 4/20...  Training Step: 1401...  Training loss: 1.2971...  0.1233 sec/batch
Epoch: 4/20...  Training Step: 1402...  Training loss: 1.6051...  0.1176 sec/batch
Epoch: 4/20...  Training Step: 1403...  Training loss: 1.3909...  0.1229 sec/batch
Epoch: 4/20...  Training Step: 1404...  Training loss: 1.1811...  0.1225 sec/batch
Epoch: 4/20...  Training Step: 1405...  Training loss: 1.6804...  0.1213 sec/batch
Epoch: 4/20...  Training Step: 1406...  Training loss: 1.2042...  0.1191 sec/batch
Epoch: 4/20...  Training Step: 1407...  Training loss: 1.3924...  0.1253 sec/batch
Epoch: 4/20...  Training Step: 1408...  Training loss: 1.4801...  0.1210 sec/batch
Epoch: 4/20...  Training Step: 1409...  Training loss: 1.3012...  0.1255 sec/batch
Epoch: 4/20...  Training Step: 1410...  Training loss: 1.2288...  0.1194 sec/batch
Epoch: 4/20...  Training Step: 1411...  Training loss: 1.3916...  0.1194 sec/batch
Epoch: 4/20...  Training Step: 1412...  Training loss: 1.2628...  0.1217 sec/batch
Epoch: 4/20...  Training Step: 1413...  Training loss: 1.3941...  0.1213 sec/batch
Epoch: 4/20...  Training Step: 1414...  Training loss: 1.3971...  0.1188 sec/batch
Epoch: 4/20...  Training Step: 1415...  Training loss: 1.6025...  0.1210 sec/batch
Epoch: 4/20...  Training Step: 1416...  Training loss: 1.2473...  0.1230 sec/batch
Epoch: 4/20...  Training Step: 1417...  Training loss: 1.3737...  0.1197 sec/batch
Epoch: 4/20...  Training Step: 1418...  Training loss: 1.3562...  0.1182 sec/batch
Epoch: 4/20...  Training Step: 1419...  Training loss: 1.4493...  0.1213 sec/batch
Epoch: 4/20...  Training Step: 1420...  Training loss: 1.2747...  0.1222 sec/batch
Epoch: 4/20...  Training Step: 1421...  Training loss: 1.3746...  0.1236 sec/batch
Epoch: 4/20...  Training Step: 1422...  Training loss: 1.3678...  0.1167 sec/batch
Epoch: 4/20...  Training Step: 1423...  Training loss: 1.2374...  0.1255 sec/batch
Epoch: 4/20...  Training Step: 1424...  Training loss: 1.3995...  0.1181 sec/batch
Epoch: 4/20...  Training Step: 1425...  Training loss: 1.2729...  0.1219 sec/batch
Epoch: 4/20...  Training Step: 1426...  Training loss: 1.2481...  0.1241 sec/batch
Epoch: 4/20...  Training Step: 1427...  Training loss: 1.3059...  0.1253 sec/batch
Epoch: 4/20...  Training Step: 1428...  Training loss: 1.2463...  0.1213 sec/batch
Epoch: 4/20...  Training Step: 1429...  Training loss: 1.4052...  0.1252 sec/batch
Epoch: 4/20...  Training Step: 1430...  Training loss: 1.1703...  0.1200 sec/batch
Epoch: 4/20...  Training Step: 1431...  Training loss: 1.2630...  0.1275 sec/batch
Epoch: 4/20...  Training Step: 1432...  Training loss: 1.5672...  0.1254 sec/batch
Epoch: 4/20...  Training Step: 1433...  Training loss: 1.3659...  0.1217 sec/batch
Epoch: 4/20...  Training Step: 1434...  Training loss: 1.2535...  0.1230 sec/batch
Epoch: 4/20...  Training Step: 1435...  Training loss: 1.5014...  0.1286 sec/batch
Epoch: 4/20...  Training Step: 1436...  Training loss: 1.0834...  0.1224 sec/batch
Epoch: 4/20...  Training Step: 1437...  Training loss: 1.3498...  0.1297 sec/batch
Epoch: 4/20...  Training Step: 1438...  Training loss: 1.1832...  0.1233 sec/batch
Epoch: 4/20...  Training Step: 1439...  Training loss: 1.3530...  0.1220 sec/batch
Epoch: 4/20...  Training Step: 1440...  Training loss: 1.3427...  0.1238 sec/batch
Epoch: 4/20...  Training Step: 1441...  Training loss: 1.2966...  0.1211 sec/batch
Epoch: 4/20...  Training Step: 1442...  Training loss: 1.4366...  0.1193 sec/batch
Epoch: 4/20...  Training Step: 1443...  Training loss: 1.3389...  0.1235 sec/batch
Epoch: 4/20...  Training Step: 1444...  Training loss: 1.4836...  0.1181 sec/batch
Epoch: 4/20...  Training Step: 1445...  Training loss: 1.4737...  0.1209 sec/batch
Epoch: 4/20...  Training Step: 1446...  Training loss: 1.3881...  0.1197 sec/batch
Epoch: 4/20...  Training Step: 1447...  Training loss: 1.2272...  0.1262 sec/batch
Epoch: 4/20...  Training Step: 1448...  Training loss: 1.4611...  0.1356 sec/batch
Epoch: 4/20...  Training Step: 1449...  Training loss: 1.4331...  0.1266 sec/batch
Epoch: 4/20...  Training Step: 1450...  Training loss: 1.3789...  0.1190 sec/batch
Epoch: 4/20...  Training Step: 1451...  Training loss: 1.2747...  0.1268 sec/batch
Epoch: 4/20...  Training Step: 1452...  Training loss: 1.2960...  0.1232 sec/batch
Epoch: 4/20...  Training Step: 1453...  Training loss: 1.4640...  0.1207 sec/batch
Epoch: 4/20...  Training Step: 1454...  Training loss: 1.4305...  0.1203 sec/batch
Epoch: 4/20...  Training Step: 1455...  Training loss: 1.3445...  0.1195 sec/batch
Epoch: 4/20...  Training Step: 1456...  Training loss: 1.3732...  0.1236 sec/batch
Epoch: 4/20...  Training Step: 1457...  Training loss: 1.1927...  0.1213 sec/batch
Epoch: 4/20...  Training Step: 1458...  Training loss: 1.5063...  0.1196 sec/batch
Epoch: 4/20...  Training Step: 1459...  Training loss: 1.4179...  0.1225 sec/batch
Epoch: 4/20...  Training Step: 1460...  Training loss: 1.4725...  0.1216 sec/batch
Epoch: 4/20...  Training Step: 1461...  Training loss: 1.2833...  0.1218 sec/batch
Epoch: 4/20...  Training Step: 1462...  Training loss: 1.3418...  0.1200 sec/batch
Epoch: 4/20...  Training Step: 1463...  Training loss: 1.3958...  0.1219 sec/batch
Epoch: 4/20...  Training Step: 1464...  Training loss: 1.3670...  0.1246 sec/batch
Epoch: 4/20...  Training Step: 1465...  Training loss: 1.4485...  0.1194 sec/batch
Epoch: 4/20...  Training Step: 1466...  Training loss: 1.3058...  0.1264 sec/batch
Epoch: 4/20...  Training Step: 1467...  Training loss: 1.6422...  0.1211 sec/batch
Epoch: 4/20...  Training Step: 1468...  Training loss: 1.3307...  0.1223 sec/batch
Epoch: 4/20...  Training Step: 1469...  Training loss: 1.2994...  0.1323 sec/batch
Epoch: 4/20...  Training Step: 1470...  Training loss: 1.4384...  0.1281 sec/batch
Epoch: 4/20...  Training Step: 1471...  Training loss: 1.3853...  0.1251 sec/batch
Epoch: 4/20...  Training Step: 1472...  Training loss: 1.2819...  0.1274 sec/batch
Epoch: 4/20...  Training Step: 1473...  Training loss: 1.5100...  0.1250 sec/batch
Epoch: 4/20...  Training Step: 1474...  Training loss: 1.3332...  0.1283 sec/batch
Epoch: 4/20...  Training Step: 1475...  Training loss: 1.2866...  0.1289 sec/batch
Epoch: 4/20...  Training Step: 1476...  Training loss: 1.4128...  0.1186 sec/batch
Epoch: 4/20...  Training Step: 1477...  Training loss: 1.4052...  0.1275 sec/batch
Epoch: 4/20...  Training Step: 1478...  Training loss: 1.4796...  0.1229 sec/batch
Epoch: 4/20...  Training Step: 1479...  Training loss: 1.2473...  0.1240 sec/batch
Epoch: 4/20...  Training Step: 1480...  Training loss: 1.5011...  0.1213 sec/batch
Epoch: 4/20...  Training Step: 1481...  Training loss: 1.5194...  0.1224 sec/batch
Epoch: 4/20...  Training Step: 1482...  Training loss: 1.3242...  0.1188 sec/batch
Epoch: 4/20...  Training Step: 1483...  Training loss: 1.5245...  0.1207 sec/batch
Epoch: 4/20...  Training Step: 1484...  Training loss: 1.6861...  0.1270 sec/batch
Epoch: 4/20...  Training Step: 1485...  Training loss: 1.2433...  0.1218 sec/batch
Epoch: 4/20...  Training Step: 1486...  Training loss: 1.5423...  0.1188 sec/batch
Epoch: 4/20...  Training Step: 1487...  Training loss: 1.3283...  0.1232 sec/batch
Epoch: 4/20...  Training Step: 1488...  Training loss: 1.3628...  0.1221 sec/batch
Epoch: 4/20...  Training Step: 1489...  Training loss: 1.7787...  0.1226 sec/batch
Epoch: 4/20...  Training Step: 1490...  Training loss: 1.5023...  0.1201 sec/batch
Epoch: 4/20...  Training Step: 1491...  Training loss: 1.5314...  0.1214 sec/batch
Epoch: 4/20...  Training Step: 1492...  Training loss: 1.3412...  0.1200 sec/batch
Epoch: 4/20...  Training Step: 1493...  Training loss: 1.4585...  0.1210 sec/batch
Epoch: 4/20...  Training Step: 1494...  Training loss: 1.6980...  0.1221 sec/batch
Epoch: 4/20...  Training Step: 1495...  Training loss: 1.6606...  0.1256 sec/batch
Epoch: 4/20...  Training Step: 1496...  Training loss: 1.5003...  0.1248 sec/batch
Epoch: 4/20...  Training Step: 1497...  Training loss: 1.6203...  0.1218 sec/batch
Epoch: 4/20...  Training Step: 1498...  Training loss: 1.7408...  0.1183 sec/batch
Epoch: 4/20...  Training Step: 1499...  Training loss: 1.6151...  0.1221 sec/batch
Epoch: 4/20...  Training Step: 1500...  Training loss: 1.9297...  0.1235 sec/batch
Epoch: 4/20...  Training Step: 1501...  Training loss: 1.6583...  0.1245 sec/batch
Epoch: 4/20...  Training Step: 1502...  Training loss: 1.3600...  0.1203 sec/batch
Epoch: 4/20...  Training Step: 1503...  Training loss: 1.5712...  0.1240 sec/batch
Epoch: 4/20...  Training Step: 1504...  Training loss: 1.4967...  0.1227 sec/batch
Epoch: 4/20...  Training Step: 1505...  Training loss: 1.6027...  0.1224 sec/batch
Epoch: 4/20...  Training Step: 1506...  Training loss: 1.6185...  0.1237 sec/batch
Epoch: 4/20...  Training Step: 1507...  Training loss: 1.5903...  0.1201 sec/batch
Epoch: 4/20...  Training Step: 1508...  Training loss: 1.3800...  0.1210 sec/batch
Epoch: 4/20...  Training Step: 1509...  Training loss: 1.5354...  0.1228 sec/batch
Epoch: 4/20...  Training Step: 1510...  Training loss: 1.6983...  0.1213 sec/batch
Epoch: 4/20...  Training Step: 1511...  Training loss: 1.4242...  0.1237 sec/batch
Epoch: 4/20...  Training Step: 1512...  Training loss: 1.2859...  0.1217 sec/batch
Epoch: 4/20...  Training Step: 1513...  Training loss: 1.5107...  0.1242 sec/batch
Epoch: 4/20...  Training Step: 1514...  Training loss: 1.4637...  0.1193 sec/batch
Epoch: 4/20...  Training Step: 1515...  Training loss: 1.6800...  0.1193 sec/batch
Epoch: 4/20...  Training Step: 1516...  Training loss: 1.5276...  0.1198 sec/batch
Epoch: 4/20...  Training Step: 1517...  Training loss: 1.5649...  0.1181 sec/batch
Epoch: 4/20...  Training Step: 1518...  Training loss: 1.2967...  0.1233 sec/batch
Epoch: 4/20...  Training Step: 1519...  Training loss: 1.3368...  0.1250 sec/batch
Epoch: 4/20...  Training Step: 1520...  Training loss: 1.4736...  0.1232 sec/batch
Epoch: 4/20...  Training Step: 1521...  Training loss: 1.6530...  0.1236 sec/batch
Epoch: 4/20...  Training Step: 1522...  Training loss: 1.4966...  0.1230 sec/batch
Epoch: 4/20...  Training Step: 1523...  Training loss: 1.7429...  0.1173 sec/batch
Epoch: 4/20...  Training Step: 1524...  Training loss: 1.5686...  0.1211 sec/batch
Epoch: 4/20...  Training Step: 1525...  Training loss: 1.3538...  0.1220 sec/batch
Epoch: 4/20...  Training Step: 1526...  Training loss: 1.5236...  0.1212 sec/batch
Epoch: 4/20...  Training Step: 1527...  Training loss: 1.2845...  0.1264 sec/batch
Epoch: 4/20...  Training Step: 1528...  Training loss: 1.2576...  0.1238 sec/batch
Epoch: 4/20...  Training Step: 1529...  Training loss: 1.2262...  0.1259 sec/batch
Epoch: 4/20...  Training Step: 1530...  Training loss: 1.3750...  0.1197 sec/batch
Epoch: 4/20...  Training Step: 1531...  Training loss: 1.2956...  0.1232 sec/batch
Epoch: 4/20...  Training Step: 1532...  Training loss: 1.3355...  0.1191 sec/batch
Epoch: 4/20...  Training Step: 1533...  Training loss: 1.3361...  0.1202 sec/batch
Epoch: 4/20...  Training Step: 1534...  Training loss: 1.3512...  0.1224 sec/batch
Epoch: 4/20...  Training Step: 1535...  Training loss: 1.1822...  0.1271 sec/batch
Epoch: 4/20...  Training Step: 1536...  Training loss: 1.4463...  0.1209 sec/batch
Epoch: 4/20...  Training Step: 1537...  Training loss: 1.4729...  0.1215 sec/batch
Epoch: 4/20...  Training Step: 1538...  Training loss: 1.3268...  0.1181 sec/batch
Epoch: 4/20...  Training Step: 1539...  Training loss: 1.5076...  0.1230 sec/batch
Epoch: 4/20...  Training Step: 1540...  Training loss: 1.2768...  0.1227 sec/batch
Epoch: 4/20...  Training Step: 1541...  Training loss: 1.3413...  0.1303 sec/batch
Epoch: 4/20...  Training Step: 1542...  Training loss: 1.6151...  0.1289 sec/batch
Epoch: 4/20...  Training Step: 1543...  Training loss: 1.4276...  0.1224 sec/batch
Epoch: 4/20...  Training Step: 1544...  Training loss: 1.5594...  0.1185 sec/batch
Epoch: 4/20...  Training Step: 1545...  Training loss: 1.6299...  0.1234 sec/batch
Epoch: 4/20...  Training Step: 1546...  Training loss: 1.4598...  0.1215 sec/batch
Epoch: 4/20...  Training Step: 1547...  Training loss: 1.5796...  0.1202 sec/batch
Epoch: 4/20...  Training Step: 1548...  Training loss: 1.2760...  0.1241 sec/batch
Epoch: 4/20...  Training Step: 1549...  Training loss: 1.3051...  0.1261 sec/batch
Epoch: 4/20...  Training Step: 1550...  Training loss: 1.3215...  0.1242 sec/batch
Epoch: 4/20...  Training Step: 1551...  Training loss: 1.3592...  0.1184 sec/batch
Epoch: 4/20...  Training Step: 1552...  Training loss: 1.3590...  0.1255 sec/batch
Epoch: 4/20...  Training Step: 1553...  Training loss: 1.4959...  0.1265 sec/batch
Epoch: 4/20...  Training Step: 1554...  Training loss: 1.4831...  0.1190 sec/batch
Epoch: 4/20...  Training Step: 1555...  Training loss: 1.5494...  0.1240 sec/batch
Epoch: 4/20...  Training Step: 1556...  Training loss: 1.2590...  0.1238 sec/batch
Epoch: 4/20...  Training Step: 1557...  Training loss: 1.4489...  0.1272 sec/batch
Epoch: 4/20...  Training Step: 1558...  Training loss: 1.3497...  0.1266 sec/batch
Epoch: 4/20...  Training Step: 1559...  Training loss: 1.3712...  0.1186 sec/batch
Epoch: 4/20...  Training Step: 1560...  Training loss: 1.5263...  0.1200 sec/batch
Epoch: 4/20...  Training Step: 1561...  Training loss: 1.4972...  0.1242 sec/batch
Epoch: 4/20...  Training Step: 1562...  Training loss: 1.5029...  0.1242 sec/batch
Epoch: 4/20...  Training Step: 1563...  Training loss: 1.3780...  0.1257 sec/batch
Epoch: 4/20...  Training Step: 1564...  Training loss: 1.5499...  0.1213 sec/batch
Epoch: 4/20...  Training Step: 1565...  Training loss: 1.3015...  0.1164 sec/batch
Epoch: 4/20...  Training Step: 1566...  Training loss: 1.4424...  0.1207 sec/batch
Epoch: 4/20...  Training Step: 1567...  Training loss: 1.4742...  0.1179 sec/batch
Epoch: 4/20...  Training Step: 1568...  Training loss: 1.2731...  0.1262 sec/batch
Epoch: 4/20...  Training Step: 1569...  Training loss: 1.2498...  0.1218 sec/batch
Epoch: 4/20...  Training Step: 1570...  Training loss: 1.6188...  0.1240 sec/batch
Epoch: 4/20...  Training Step: 1571...  Training loss: 1.3378...  0.1189 sec/batch
Epoch: 4/20...  Training Step: 1572...  Training loss: 1.5607...  0.1239 sec/batch
Epoch: 4/20...  Training Step: 1573...  Training loss: 1.3539...  0.1270 sec/batch
Epoch: 4/20...  Training Step: 1574...  Training loss: 1.6304...  0.1174 sec/batch
Epoch: 4/20...  Training Step: 1575...  Training loss: 1.4477...  0.1237 sec/batch
Epoch: 4/20...  Training Step: 1576...  Training loss: 1.4303...  0.1214 sec/batch
Epoch: 4/20...  Training Step: 1577...  Training loss: 1.6332...  0.1256 sec/batch
Epoch: 4/20...  Training Step: 1578...  Training loss: 1.4556...  0.1245 sec/batch
Epoch: 4/20...  Training Step: 1579...  Training loss: 1.5490...  0.1233 sec/batch
Epoch: 4/20...  Training Step: 1580...  Training loss: 1.2421...  0.1211 sec/batch
Epoch: 4/20...  Training Step: 1581...  Training loss: 1.4559...  0.1238 sec/batch
Epoch: 4/20...  Training Step: 1582...  Training loss: 1.2966...  0.1234 sec/batch
Epoch: 4/20...  Training Step: 1583...  Training loss: 1.2735...  0.1240 sec/batch
Epoch: 4/20...  Training Step: 1584...  Training loss: 1.5259...  0.1202 sec/batch
Epoch: 4/20...  Training Step: 1585...  Training loss: 1.4117...  0.1178 sec/batch
Epoch: 4/20...  Training Step: 1586...  Training loss: 1.4221...  0.1234 sec/batch
Epoch: 4/20...  Training Step: 1587...  Training loss: 1.5599...  0.1219 sec/batch
Epoch: 4/20...  Training Step: 1588...  Training loss: 1.3522...  0.1262 sec/batch
Epoch: 4/20...  Training Step: 1589...  Training loss: 1.2736...  0.1309 sec/batch
Epoch: 4/20...  Training Step: 1590...  Training loss: 1.4886...  0.1279 sec/batch
Epoch: 4/20...  Training Step: 1591...  Training loss: 1.2435...  0.1349 sec/batch
Epoch: 4/20...  Training Step: 1592...  Training loss: 1.2611...  0.1300 sec/batch
Epoch: 4/20...  Training Step: 1593...  Training loss: 1.3498...  0.1319 sec/batch
Epoch: 4/20...  Training Step: 1594...  Training loss: 1.3829...  0.1224 sec/batch
Epoch: 4/20...  Training Step: 1595...  Training loss: 1.3758...  0.1255 sec/batch
Epoch: 4/20...  Training Step: 1596...  Training loss: 1.5544...  0.1204 sec/batch
Epoch: 4/20...  Training Step: 1597...  Training loss: 1.3693...  0.1221 sec/batch
Epoch: 4/20...  Training Step: 1598...  Training loss: 1.2057...  0.1349 sec/batch
Epoch: 4/20...  Training Step: 1599...  Training loss: 1.2627...  0.1315 sec/batch
Epoch: 4/20...  Training Step: 1600...  Training loss: 1.4666...  0.1432 sec/batch
Epoch: 4/20...  Training Step: 1601...  Training loss: 1.4690...  0.1338 sec/batch
Epoch: 4/20...  Training Step: 1602...  Training loss: 1.3161...  0.1553 sec/batch
Epoch: 4/20...  Training Step: 1603...  Training loss: 1.0884...  0.1282 sec/batch
Epoch: 4/20...  Training Step: 1604...  Training loss: 1.4143...  0.1227 sec/batch
Epoch: 4/20...  Training Step: 1605...  Training loss: 1.5037...  0.1251 sec/batch
Epoch: 4/20...  Training Step: 1606...  Training loss: 1.4227...  0.1244 sec/batch
Epoch: 4/20...  Training Step: 1607...  Training loss: 1.5015...  0.1221 sec/batch
Epoch: 4/20...  Training Step: 1608...  Training loss: 1.3333...  0.1235 sec/batch
Epoch: 4/20...  Training Step: 1609...  Training loss: 1.5212...  0.1224 sec/batch
Epoch: 4/20...  Training Step: 1610...  Training loss: 1.2993...  0.1238 sec/batch
Epoch: 4/20...  Training Step: 1611...  Training loss: 1.5958...  0.1299 sec/batch
Epoch: 4/20...  Training Step: 1612...  Training loss: 1.3819...  0.1300 sec/batch
Epoch: 4/20...  Training Step: 1613...  Training loss: 1.2495...  0.1296 sec/batch
Epoch: 4/20...  Training Step: 1614...  Training loss: 1.6065...  0.1306 sec/batch
Epoch: 4/20...  Training Step: 1615...  Training loss: 1.6064...  0.1313 sec/batch
Epoch: 4/20...  Training Step: 1616...  Training loss: 1.5914...  0.1289 sec/batch
Epoch: 4/20...  Training Step: 1617...  Training loss: 1.3934...  0.1289 sec/batch
Epoch: 4/20...  Training Step: 1618...  Training loss: 1.6287...  0.1289 sec/batch
Epoch: 4/20...  Training Step: 1619...  Training loss: 1.5575...  0.1284 sec/batch
Epoch: 4/20...  Training Step: 1620...  Training loss: 1.2240...  0.1257 sec/batch
Epoch: 4/20...  Training Step: 1621...  Training loss: 1.4031...  0.1268 sec/batch
Epoch: 4/20...  Training Step: 1622...  Training loss: 1.3496...  0.1323 sec/batch
Epoch: 4/20...  Training Step: 1623...  Training loss: 1.3593...  0.1295 sec/batch
Epoch: 4/20...  Training Step: 1624...  Training loss: 1.3055...  0.1310 sec/batch
Epoch: 4/20...  Training Step: 1625...  Training loss: 1.6255...  0.1297 sec/batch
Epoch: 4/20...  Training Step: 1626...  Training loss: 1.3199...  0.1245 sec/batch
Epoch: 4/20...  Training Step: 1627...  Training loss: 1.6438...  0.1193 sec/batch
Epoch: 4/20...  Training Step: 1628...  Training loss: 1.3492...  0.1222 sec/batch
Epoch: 4/20...  Training Step: 1629...  Training loss: 1.7020...  0.1184 sec/batch
Epoch: 4/20...  Training Step: 1630...  Training loss: 1.3203...  0.1252 sec/batch
Epoch: 4/20...  Training Step: 1631...  Training loss: 1.4881...  0.1253 sec/batch
Epoch: 4/20...  Training Step: 1632...  Training loss: 1.5097...  0.1213 sec/batch
Epoch: 4/20...  Training Step: 1633...  Training loss: 1.4173...  0.1257 sec/batch
Epoch: 4/20...  Training Step: 1634...  Training loss: 1.3100...  0.1241 sec/batch
Epoch: 4/20...  Training Step: 1635...  Training loss: 1.6259...  0.1226 sec/batch
Epoch: 4/20...  Training Step: 1636...  Training loss: 1.5558...  0.1209 sec/batch
Epoch: 4/20...  Training Step: 1637...  Training loss: 1.5569...  0.1200 sec/batch
Epoch: 4/20...  Training Step: 1638...  Training loss: 1.3143...  0.1193 sec/batch
Epoch: 4/20...  Training Step: 1639...  Training loss: 1.2947...  0.1234 sec/batch
Epoch: 4/20...  Training Step: 1640...  Training loss: 1.4623...  0.1248 sec/batch
Epoch: 4/20...  Training Step: 1641...  Training loss: 1.3586...  0.1394 sec/batch
Epoch: 4/20...  Training Step: 1642...  Training loss: 1.3476...  0.1464 sec/batch
Epoch: 4/20...  Training Step: 1643...  Training loss: 1.4967...  0.1290 sec/batch
Epoch: 4/20...  Training Step: 1644...  Training loss: 1.3591...  0.1230 sec/batch
Epoch: 4/20...  Training Step: 1645...  Training loss: 1.2847...  0.1208 sec/batch
Epoch: 4/20...  Training Step: 1646...  Training loss: 1.5421...  0.1176 sec/batch
Epoch: 4/20...  Training Step: 1647...  Training loss: 1.3966...  0.1211 sec/batch
Epoch: 4/20...  Training Step: 1648...  Training loss: 1.3962...  0.1202 sec/batch
Epoch: 4/20...  Training Step: 1649...  Training loss: 1.4429...  0.1268 sec/batch
Epoch: 4/20...  Training Step: 1650...  Training loss: 1.3419...  0.1183 sec/batch
Epoch: 4/20...  Training Step: 1651...  Training loss: 1.3109...  0.1250 sec/batch
Epoch: 4/20...  Training Step: 1652...  Training loss: 1.4374...  0.1219 sec/batch
Epoch: 4/20...  Training Step: 1653...  Training loss: 1.3746...  0.1232 sec/batch
Epoch: 4/20...  Training Step: 1654...  Training loss: 1.5243...  0.1224 sec/batch
Epoch: 4/20...  Training Step: 1655...  Training loss: 1.4850...  0.1235 sec/batch
Epoch: 4/20...  Training Step: 1656...  Training loss: 1.5935...  0.1210 sec/batch
Epoch: 4/20...  Training Step: 1657...  Training loss: 1.5334...  0.1215 sec/batch
Epoch: 4/20...  Training Step: 1658...  Training loss: 1.4143...  0.1209 sec/batch
Epoch: 4/20...  Training Step: 1659...  Training loss: 1.5740...  0.1208 sec/batch
Epoch: 4/20...  Training Step: 1660...  Training loss: 1.5156...  0.1212 sec/batch
Epoch: 4/20...  Training Step: 1661...  Training loss: 1.4966...  0.1257 sec/batch
Epoch: 4/20...  Training Step: 1662...  Training loss: 1.5426...  0.1237 sec/batch
Epoch: 4/20...  Training Step: 1663...  Training loss: 1.4888...  0.1230 sec/batch
Epoch: 4/20...  Training Step: 1664...  Training loss: 1.5367...  0.1220 sec/batch
Epoch: 4/20...  Training Step: 1665...  Training loss: 1.5524...  0.1247 sec/batch
Epoch: 4/20...  Training Step: 1666...  Training loss: 1.4987...  0.1335 sec/batch
Epoch: 4/20...  Training Step: 1667...  Training loss: 1.6245...  0.1233 sec/batch
Epoch: 4/20...  Training Step: 1668...  Training loss: 1.4191...  0.1230 sec/batch
Epoch: 4/20...  Training Step: 1669...  Training loss: 1.4755...  0.1256 sec/batch
Epoch: 4/20...  Training Step: 1670...  Training loss: 1.7585...  0.1238 sec/batch
Epoch: 4/20...  Training Step: 1671...  Training loss: 1.4467...  0.1227 sec/batch
Epoch: 4/20...  Training Step: 1672...  Training loss: 1.3123...  0.1213 sec/batch
Epoch: 4/20...  Training Step: 1673...  Training loss: 1.3756...  0.1251 sec/batch
Epoch: 4/20...  Training Step: 1674...  Training loss: 1.2536...  0.1206 sec/batch
Epoch: 4/20...  Training Step: 1675...  Training loss: 1.3763...  0.1223 sec/batch
Epoch: 4/20...  Training Step: 1676...  Training loss: 1.4851...  0.1184 sec/batch
Epoch: 4/20...  Training Step: 1677...  Training loss: 1.2984...  0.1213 sec/batch
Epoch: 4/20...  Training Step: 1678...  Training loss: 1.4871...  0.1206 sec/batch
Epoch: 4/20...  Training Step: 1679...  Training loss: 1.3846...  0.1213 sec/batch
Epoch: 4/20...  Training Step: 1680...  Training loss: 1.5179...  0.1239 sec/batch
Epoch: 4/20...  Training Step: 1681...  Training loss: 1.4800...  0.1216 sec/batch
Epoch: 4/20...  Training Step: 1682...  Training loss: 1.4224...  0.1206 sec/batch
Epoch: 4/20...  Training Step: 1683...  Training loss: 1.3142...  0.1271 sec/batch
Epoch: 4/20...  Training Step: 1684...  Training loss: 1.2903...  0.1236 sec/batch
Epoch: 4/20...  Training Step: 1685...  Training loss: 1.2910...  0.1238 sec/batch
Epoch: 4/20...  Training Step: 1686...  Training loss: 1.4233...  0.1226 sec/batch
Epoch: 4/20...  Training Step: 1687...  Training loss: 1.3648...  0.1250 sec/batch
Epoch: 4/20...  Training Step: 1688...  Training loss: 1.6586...  0.1287 sec/batch
Epoch: 4/20...  Training Step: 1689...  Training loss: 1.3122...  0.1221 sec/batch
Epoch: 4/20...  Training Step: 1690...  Training loss: 1.3081...  0.1304 sec/batch
Epoch: 4/20...  Training Step: 1691...  Training loss: 1.3743...  0.1287 sec/batch
Epoch: 4/20...  Training Step: 1692...  Training loss: 1.3375...  0.1274 sec/batch
Epoch: 4/20...  Training Step: 1693...  Training loss: 1.3612...  0.1261 sec/batch
Epoch: 4/20...  Training Step: 1694...  Training loss: 1.2892...  0.1288 sec/batch
Epoch: 4/20...  Training Step: 1695...  Training loss: 1.1671...  0.1340 sec/batch
Epoch: 4/20...  Training Step: 1696...  Training loss: 1.5735...  0.1237 sec/batch
Epoch: 4/20...  Training Step: 1697...  Training loss: 1.3152...  0.1234 sec/batch
Epoch: 4/20...  Training Step: 1698...  Training loss: 1.4739...  0.1179 sec/batch
Epoch: 4/20...  Training Step: 1699...  Training loss: 1.3768...  0.1204 sec/batch
Epoch: 4/20...  Training Step: 1700...  Training loss: 1.8189...  0.1197 sec/batch
Epoch: 4/20...  Training Step: 1701...  Training loss: 1.4422...  0.1188 sec/batch
Epoch: 4/20...  Training Step: 1702...  Training loss: 1.4908...  0.1234 sec/batch
Epoch: 4/20...  Training Step: 1703...  Training loss: 1.3663...  0.1165 sec/batch
Epoch: 4/20...  Training Step: 1704...  Training loss: 1.2265...  0.1173 sec/batch
Epoch: 4/20...  Training Step: 1705...  Training loss: 1.3273...  0.1172 sec/batch
Epoch: 4/20...  Training Step: 1706...  Training loss: 1.2965...  0.1192 sec/batch
Epoch: 4/20...  Training Step: 1707...  Training loss: 1.1678...  0.1235 sec/batch
Epoch: 4/20...  Training Step: 1708...  Training loss: 1.3140...  0.1240 sec/batch
Epoch: 4/20...  Training Step: 1709...  Training loss: 1.4159...  0.1265 sec/batch
Epoch: 4/20...  Training Step: 1710...  Training loss: 1.2664...  0.1191 sec/batch
Epoch: 4/20...  Training Step: 1711...  Training loss: 1.3747...  0.1237 sec/batch
Epoch: 4/20...  Training Step: 1712...  Training loss: 1.2646...  0.1211 sec/batch
Epoch: 4/20...  Training Step: 1713...  Training loss: 1.2504...  0.1237 sec/batch
Epoch: 4/20...  Training Step: 1714...  Training loss: 1.5354...  0.1215 sec/batch
Epoch: 4/20...  Training Step: 1715...  Training loss: 1.3490...  0.1257 sec/batch
Epoch: 4/20...  Training Step: 1716...  Training loss: 1.2395...  0.1245 sec/batch
Epoch: 4/20...  Training Step: 1717...  Training loss: 1.2260...  0.1177 sec/batch
Epoch: 4/20...  Training Step: 1718...  Training loss: 1.1965...  0.1230 sec/batch
Epoch: 4/20...  Training Step: 1719...  Training loss: 1.4599...  0.1239 sec/batch
Epoch: 4/20...  Training Step: 1720...  Training loss: 1.3538...  0.1203 sec/batch
Epoch: 4/20...  Training Step: 1721...  Training loss: 1.4356...  0.1214 sec/batch
Epoch: 4/20...  Training Step: 1722...  Training loss: 1.2716...  0.1216 sec/batch
Epoch: 4/20...  Training Step: 1723...  Training loss: 1.3971...  0.1220 sec/batch
Epoch: 4/20...  Training Step: 1724...  Training loss: 1.3930...  0.1184 sec/batch
Epoch: 4/20...  Training Step: 1725...  Training loss: 1.3373...  0.1242 sec/batch
Epoch: 4/20...  Training Step: 1726...  Training loss: 1.3501...  0.1200 sec/batch
Epoch: 4/20...  Training Step: 1727...  Training loss: 1.5114...  0.1196 sec/batch
Epoch: 4/20...  Training Step: 1728...  Training loss: 1.3357...  0.1212 sec/batch
Epoch: 4/20...  Training Step: 1729...  Training loss: 1.4122...  0.1229 sec/batch
Epoch: 4/20...  Training Step: 1730...  Training loss: 1.3890...  0.1264 sec/batch
Epoch: 4/20...  Training Step: 1731...  Training loss: 1.4253...  0.1228 sec/batch
Epoch: 4/20...  Training Step: 1732...  Training loss: 1.2306...  0.1203 sec/batch
Epoch: 4/20...  Training Step: 1733...  Training loss: 1.2428...  0.1333 sec/batch
Epoch: 4/20...  Training Step: 1734...  Training loss: 1.4867...  0.1221 sec/batch
Epoch: 4/20...  Training Step: 1735...  Training loss: 1.1870...  0.1257 sec/batch
Epoch: 4/20...  Training Step: 1736...  Training loss: 1.4857...  0.1272 sec/batch
Epoch: 4/20...  Training Step: 1737...  Training loss: 1.2644...  0.1221 sec/batch
Epoch: 4/20...  Training Step: 1738...  Training loss: 1.2092...  0.1258 sec/batch
Epoch: 4/20...  Training Step: 1739...  Training loss: 1.0920...  0.1264 sec/batch
Epoch: 4/20...  Training Step: 1740...  Training loss: 1.6339...  0.1245 sec/batch
Epoch: 4/20...  Training Step: 1741...  Training loss: 1.2019...  0.1238 sec/batch
Epoch: 4/20...  Training Step: 1742...  Training loss: 1.3660...  0.1203 sec/batch
Epoch: 4/20...  Training Step: 1743...  Training loss: 1.2780...  0.1227 sec/batch
Epoch: 4/20...  Training Step: 1744...  Training loss: 1.1986...  0.1247 sec/batch
Epoch: 4/20...  Training Step: 1745...  Training loss: 1.2250...  0.1214 sec/batch
Epoch: 4/20...  Training Step: 1746...  Training loss: 1.0924...  0.1207 sec/batch
Epoch: 4/20...  Training Step: 1747...  Training loss: 1.4161...  0.1241 sec/batch
Epoch: 4/20...  Training Step: 1748...  Training loss: 1.2304...  0.1211 sec/batch
Epoch: 4/20...  Training Step: 1749...  Training loss: 1.2339...  0.1246 sec/batch
Epoch: 4/20...  Training Step: 1750...  Training loss: 1.3953...  0.1239 sec/batch
Epoch: 4/20...  Training Step: 1751...  Training loss: 1.5165...  0.1247 sec/batch
Epoch: 4/20...  Training Step: 1752...  Training loss: 1.0447...  0.1209 sec/batch
Epoch: 4/20...  Training Step: 1753...  Training loss: 1.5157...  0.1246 sec/batch
Epoch: 4/20...  Training Step: 1754...  Training loss: 1.4152...  0.1217 sec/batch
Epoch: 4/20...  Training Step: 1755...  Training loss: 1.2556...  0.1244 sec/batch
Epoch: 4/20...  Training Step: 1756...  Training loss: 1.3216...  0.1244 sec/batch
Epoch: 4/20...  Training Step: 1757...  Training loss: 1.2850...  0.1195 sec/batch
Epoch: 4/20...  Training Step: 1758...  Training loss: 1.3210...  0.1210 sec/batch
Epoch: 4/20...  Training Step: 1759...  Training loss: 1.2160...  0.1211 sec/batch
Epoch: 4/20...  Training Step: 1760...  Training loss: 1.4533...  0.1241 sec/batch
Epoch: 4/20...  Training Step: 1761...  Training loss: 1.4620...  0.1244 sec/batch
Epoch: 4/20...  Training Step: 1762...  Training loss: 1.3681...  0.1226 sec/batch
Epoch: 4/20...  Training Step: 1763...  Training loss: 1.3665...  0.1221 sec/batch
Epoch: 4/20...  Training Step: 1764...  Training loss: 1.5026...  0.1198 sec/batch
Epoch: 4/20...  Training Step: 1765...  Training loss: 1.4163...  0.1240 sec/batch
Epoch: 4/20...  Training Step: 1766...  Training loss: 1.5014...  0.1233 sec/batch
Epoch: 4/20...  Training Step: 1767...  Training loss: 1.4156...  0.1220 sec/batch
Epoch: 4/20...  Training Step: 1768...  Training loss: 1.3675...  0.1246 sec/batch
Epoch: 4/20...  Training Step: 1769...  Training loss: 1.4033...  0.1246 sec/batch
Epoch: 4/20...  Training Step: 1770...  Training loss: 1.4267...  0.1186 sec/batch
Epoch: 4/20...  Training Step: 1771...  Training loss: 1.3637...  0.1161 sec/batch
Epoch: 4/20...  Training Step: 1772...  Training loss: 1.5148...  0.1200 sec/batch
Epoch: 4/20...  Training Step: 1773...  Training loss: 1.2203...  0.1197 sec/batch
Epoch: 4/20...  Training Step: 1774...  Training loss: 1.2130...  0.1205 sec/batch
Epoch: 4/20...  Training Step: 1775...  Training loss: 1.3131...  0.1195 sec/batch
Epoch: 4/20...  Training Step: 1776...  Training loss: 1.3942...  0.1196 sec/batch
Epoch: 4/20...  Training Step: 1777...  Training loss: 1.2957...  0.1184 sec/batch
Epoch: 4/20...  Training Step: 1778...  Training loss: 1.3659...  0.1243 sec/batch
Epoch: 4/20...  Training Step: 1779...  Training loss: 1.3520...  0.1225 sec/batch
Epoch: 4/20...  Training Step: 1780...  Training loss: 1.2150...  0.1190 sec/batch
Epoch: 4/20...  Training Step: 1781...  Training loss: 1.2827...  0.1175 sec/batch
Epoch: 4/20...  Training Step: 1782...  Training loss: 1.3544...  0.1189 sec/batch
Epoch: 4/20...  Training Step: 1783...  Training loss: 1.2194...  0.1213 sec/batch
Epoch: 4/20...  Training Step: 1784...  Training loss: 1.2046...  0.1239 sec/batch
Epoch: 4/20...  Training Step: 1785...  Training loss: 1.2055...  0.1211 sec/batch
Epoch: 4/20...  Training Step: 1786...  Training loss: 1.2770...  0.1193 sec/batch
Epoch: 4/20...  Training Step: 1787...  Training loss: 1.2163...  0.1212 sec/batch
Epoch: 4/20...  Training Step: 1788...  Training loss: 1.5733...  0.1216 sec/batch
Epoch: 4/20...  Training Step: 1789...  Training loss: 1.2982...  0.1211 sec/batch
Epoch: 4/20...  Training Step: 1790...  Training loss: 1.2508...  0.1250 sec/batch
Epoch: 4/20...  Training Step: 1791...  Training loss: 1.3930...  0.1236 sec/batch
Epoch: 4/20...  Training Step: 1792...  Training loss: 1.2744...  0.1216 sec/batch
Epoch: 4/20...  Training Step: 1793...  Training loss: 1.2828...  0.1240 sec/batch
Epoch: 4/20...  Training Step: 1794...  Training loss: 1.2882...  0.1256 sec/batch
Epoch: 4/20...  Training Step: 1795...  Training loss: 1.1593...  0.1237 sec/batch
Epoch: 4/20...  Training Step: 1796...  Training loss: 1.3708...  0.1224 sec/batch
Epoch: 4/20...  Training Step: 1797...  Training loss: 1.2979...  0.1220 sec/batch
Epoch: 4/20...  Training Step: 1798...  Training loss: 1.6349...  0.1223 sec/batch
Epoch: 4/20...  Training Step: 1799...  Training loss: 1.3745...  0.1240 sec/batch
Epoch: 4/20...  Training Step: 1800...  Training loss: 1.6144...  0.1219 sec/batch
Epoch: 4/20...  Training Step: 1801...  Training loss: 1.4490...  0.1205 sec/batch
Epoch: 4/20...  Training Step: 1802...  Training loss: 1.3823...  0.1222 sec/batch
Epoch: 4/20...  Training Step: 1803...  Training loss: 1.2400...  0.1209 sec/batch
Epoch: 4/20...  Training Step: 1804...  Training loss: 1.4599...  0.1212 sec/batch
Epoch: 4/20...  Training Step: 1805...  Training loss: 1.5113...  0.1237 sec/batch
Epoch: 4/20...  Training Step: 1806...  Training loss: 1.4261...  0.1172 sec/batch
Epoch: 4/20...  Training Step: 1807...  Training loss: 1.6043...  0.1224 sec/batch
Epoch: 4/20...  Training Step: 1808...  Training loss: 1.6254...  0.1239 sec/batch
Epoch: 4/20...  Training Step: 1809...  Training loss: 1.6262...  0.1234 sec/batch
Epoch: 4/20...  Training Step: 1810...  Training loss: 1.2339...  0.1252 sec/batch
Epoch: 4/20...  Training Step: 1811...  Training loss: 1.4666...  0.1193 sec/batch
Epoch: 4/20...  Training Step: 1812...  Training loss: 1.1788...  0.1275 sec/batch
Epoch: 4/20...  Training Step: 1813...  Training loss: 1.5304...  0.1238 sec/batch
Epoch: 4/20...  Training Step: 1814...  Training loss: 1.4188...  0.1226 sec/batch
Epoch: 4/20...  Training Step: 1815...  Training loss: 1.5319...  0.1204 sec/batch
Epoch: 4/20...  Training Step: 1816...  Training loss: 1.5240...  0.1202 sec/batch
Epoch: 4/20...  Training Step: 1817...  Training loss: 1.3966...  0.1207 sec/batch
Epoch: 4/20...  Training Step: 1818...  Training loss: 1.3632...  0.1177 sec/batch
Epoch: 4/20...  Training Step: 1819...  Training loss: 1.4918...  0.1213 sec/batch
Epoch: 4/20...  Training Step: 1820...  Training loss: 1.4537...  0.1198 sec/batch
Epoch: 4/20...  Training Step: 1821...  Training loss: 1.2203...  0.1245 sec/batch
Epoch: 4/20...  Training Step: 1822...  Training loss: 1.4192...  0.1237 sec/batch
Epoch: 4/20...  Training Step: 1823...  Training loss: 1.6875...  0.1224 sec/batch
Epoch: 4/20...  Training Step: 1824...  Training loss: 1.3888...  0.1219 sec/batch
Epoch: 4/20...  Training Step: 1825...  Training loss: 1.7070...  0.1238 sec/batch
Epoch: 4/20...  Training Step: 1826...  Training loss: 1.5605...  0.1244 sec/batch
Epoch: 4/20...  Training Step: 1827...  Training loss: 1.2419...  0.1290 sec/batch
Epoch: 4/20...  Training Step: 1828...  Training loss: 1.3892...  0.1277 sec/batch
Epoch: 4/20...  Training Step: 1829...  Training loss: 1.3549...  0.1251 sec/batch
Epoch: 4/20...  Training Step: 1830...  Training loss: 1.6378...  0.1176 sec/batch
Epoch: 4/20...  Training Step: 1831...  Training loss: 1.6030...  0.1207 sec/batch
Epoch: 4/20...  Training Step: 1832...  Training loss: 1.6696...  0.1241 sec/batch
Epoch: 4/20...  Training Step: 1833...  Training loss: 1.4242...  0.1178 sec/batch
Epoch: 4/20...  Training Step: 1834...  Training loss: 1.3844...  0.1257 sec/batch
Epoch: 4/20...  Training Step: 1835...  Training loss: 1.5733...  0.1199 sec/batch
Epoch: 4/20...  Training Step: 1836...  Training loss: 1.3559...  0.1257 sec/batch
Epoch: 4/20...  Training Step: 1837...  Training loss: 1.4308...  0.1295 sec/batch
Epoch: 4/20...  Training Step: 1838...  Training loss: 1.2822...  0.1288 sec/batch
Epoch: 4/20...  Training Step: 1839...  Training loss: 1.3567...  0.1241 sec/batch
Epoch: 4/20...  Training Step: 1840...  Training loss: 1.1859...  0.1205 sec/batch
Epoch: 4/20...  Training Step: 1841...  Training loss: 1.3348...  0.1231 sec/batch
Epoch: 4/20...  Training Step: 1842...  Training loss: 1.4622...  0.1269 sec/batch
Epoch: 4/20...  Training Step: 1843...  Training loss: 1.3571...  0.1211 sec/batch
Epoch: 4/20...  Training Step: 1844...  Training loss: 1.4487...  0.1246 sec/batch
Epoch: 4/20...  Training Step: 1845...  Training loss: 1.2888...  0.1223 sec/batch
Epoch: 4/20...  Training Step: 1846...  Training loss: 1.6262...  0.1220 sec/batch
Epoch: 4/20...  Training Step: 1847...  Training loss: 1.4500...  0.1233 sec/batch
Epoch: 4/20...  Training Step: 1848...  Training loss: 1.2393...  0.1183 sec/batch
Epoch: 4/20...  Training Step: 1849...  Training loss: 1.2943...  0.1254 sec/batch
Epoch: 4/20...  Training Step: 1850...  Training loss: 1.2009...  0.1254 sec/batch
Epoch: 4/20...  Training Step: 1851...  Training loss: 1.2055...  0.1217 sec/batch
Epoch: 4/20...  Training Step: 1852...  Training loss: 1.3902...  0.1260 sec/batch
Epoch: 4/20...  Training Step: 1853...  Training loss: 1.4751...  0.1182 sec/batch
Epoch: 4/20...  Training Step: 1854...  Training loss: 1.4420...  0.1227 sec/batch
Epoch: 4/20...  Training Step: 1855...  Training loss: 1.4047...  0.1219 sec/batch
Epoch: 4/20...  Training Step: 1856...  Training loss: 1.2511...  0.1212 sec/batch
Epoch: 5/20...  Training Step: 1857...  Training loss: 1.7105...  0.1221 sec/batch
Epoch: 5/20...  Training Step: 1858...  Training loss: 1.4392...  0.1208 sec/batch
Epoch: 5/20...  Training Step: 1859...  Training loss: 1.3687...  0.1208 sec/batch
Epoch: 5/20...  Training Step: 1860...  Training loss: 1.3123...  0.1208 sec/batch
Epoch: 5/20...  Training Step: 1861...  Training loss: 1.4527...  0.1202 sec/batch
Epoch: 5/20...  Training Step: 1862...  Training loss: 1.1734...  0.1185 sec/batch
Epoch: 5/20...  Training Step: 1863...  Training loss: 1.4351...  0.1180 sec/batch
Epoch: 5/20...  Training Step: 1864...  Training loss: 1.2495...  0.1229 sec/batch
Epoch: 5/20...  Training Step: 1865...  Training loss: 1.2998...  0.1207 sec/batch
Epoch: 5/20...  Training Step: 1866...  Training loss: 1.4686...  0.1215 sec/batch
Epoch: 5/20...  Training Step: 1867...  Training loss: 1.3448...  0.1185 sec/batch
Epoch: 5/20...  Training Step: 1868...  Training loss: 1.1075...  0.1214 sec/batch
Epoch: 5/20...  Training Step: 1869...  Training loss: 1.5307...  0.1251 sec/batch
Epoch: 5/20...  Training Step: 1870...  Training loss: 1.0672...  0.1218 sec/batch
Epoch: 5/20...  Training Step: 1871...  Training loss: 1.3289...  0.1217 sec/batch
Epoch: 5/20...  Training Step: 1872...  Training loss: 1.4295...  0.1224 sec/batch
Epoch: 5/20...  Training Step: 1873...  Training loss: 1.2210...  0.1206 sec/batch
Epoch: 5/20...  Training Step: 1874...  Training loss: 1.1445...  0.1275 sec/batch
Epoch: 5/20...  Training Step: 1875...  Training loss: 1.3625...  0.1205 sec/batch
Epoch: 5/20...  Training Step: 1876...  Training loss: 1.1365...  0.1285 sec/batch
Epoch: 5/20...  Training Step: 1877...  Training loss: 1.4231...  0.1338 sec/batch
Epoch: 5/20...  Training Step: 1878...  Training loss: 1.2400...  0.1332 sec/batch
Epoch: 5/20...  Training Step: 1879...  Training loss: 1.4825...  0.1294 sec/batch
Epoch: 5/20...  Training Step: 1880...  Training loss: 1.2371...  0.1276 sec/batch
Epoch: 5/20...  Training Step: 1881...  Training loss: 1.3419...  0.1256 sec/batch
Epoch: 5/20...  Training Step: 1882...  Training loss: 1.2379...  0.1311 sec/batch
Epoch: 5/20...  Training Step: 1883...  Training loss: 1.4356...  0.1354 sec/batch
Epoch: 5/20...  Training Step: 1884...  Training loss: 1.1254...  0.1272 sec/batch
Epoch: 5/20...  Training Step: 1885...  Training loss: 1.2998...  0.1333 sec/batch
Epoch: 5/20...  Training Step: 1886...  Training loss: 1.3118...  0.1359 sec/batch
Epoch: 5/20...  Training Step: 1887...  Training loss: 1.1696...  0.1302 sec/batch
Epoch: 5/20...  Training Step: 1888...  Training loss: 1.2502...  0.1343 sec/batch
Epoch: 5/20...  Training Step: 1889...  Training loss: 1.1884...  0.1181 sec/batch
Epoch: 5/20...  Training Step: 1890...  Training loss: 1.1185...  0.1325 sec/batch
Epoch: 5/20...  Training Step: 1891...  Training loss: 1.1801...  0.1278 sec/batch
Epoch: 5/20...  Training Step: 1892...  Training loss: 1.2366...  0.1285 sec/batch
Epoch: 5/20...  Training Step: 1893...  Training loss: 1.3793...  0.1291 sec/batch
Epoch: 5/20...  Training Step: 1894...  Training loss: 1.1703...  0.1350 sec/batch
Epoch: 5/20...  Training Step: 1895...  Training loss: 1.2572...  0.1330 sec/batch
Epoch: 5/20...  Training Step: 1896...  Training loss: 1.5438...  0.1270 sec/batch
Epoch: 5/20...  Training Step: 1897...  Training loss: 1.2769...  0.1335 sec/batch
Epoch: 5/20...  Training Step: 1898...  Training loss: 1.1648...  0.1309 sec/batch
Epoch: 5/20...  Training Step: 1899...  Training loss: 1.4280...  0.1296 sec/batch
Epoch: 5/20...  Training Step: 1900...  Training loss: 1.1000...  0.1308 sec/batch
Epoch: 5/20...  Training Step: 1901...  Training loss: 1.2958...  0.1389 sec/batch
Epoch: 5/20...  Training Step: 1902...  Training loss: 1.2596...  0.1345 sec/batch
Epoch: 5/20...  Training Step: 1903...  Training loss: 1.2586...  0.1307 sec/batch
Epoch: 5/20...  Training Step: 1904...  Training loss: 1.2538...  0.1231 sec/batch
Epoch: 5/20...  Training Step: 1905...  Training loss: 1.2464...  0.1273 sec/batch
Epoch: 5/20...  Training Step: 1906...  Training loss: 1.3838...  0.1286 sec/batch
Epoch: 5/20...  Training Step: 1907...  Training loss: 1.2342...  0.1372 sec/batch
Epoch: 5/20...  Training Step: 1908...  Training loss: 1.3099...  0.1282 sec/batch
Epoch: 5/20...  Training Step: 1909...  Training loss: 1.4159...  0.1301 sec/batch
Epoch: 5/20...  Training Step: 1910...  Training loss: 1.4648...  0.1310 sec/batch
Epoch: 5/20...  Training Step: 1911...  Training loss: 1.1761...  0.1243 sec/batch
Epoch: 5/20...  Training Step: 1912...  Training loss: 1.2998...  0.1310 sec/batch
Epoch: 5/20...  Training Step: 1913...  Training loss: 1.4938...  0.1315 sec/batch
Epoch: 5/20...  Training Step: 1914...  Training loss: 1.3429...  0.1309 sec/batch
Epoch: 5/20...  Training Step: 1915...  Training loss: 1.1736...  0.1322 sec/batch
Epoch: 5/20...  Training Step: 1916...  Training loss: 1.1663...  0.1367 sec/batch
Epoch: 5/20...  Training Step: 1917...  Training loss: 1.3314...  0.1303 sec/batch
Epoch: 5/20...  Training Step: 1918...  Training loss: 1.4389...  0.1276 sec/batch
Epoch: 5/20...  Training Step: 1919...  Training loss: 1.1892...  0.1304 sec/batch
Epoch: 5/20...  Training Step: 1920...  Training loss: 1.2446...  0.1375 sec/batch
Epoch: 5/20...  Training Step: 1921...  Training loss: 1.2025...  0.1246 sec/batch
Epoch: 5/20...  Training Step: 1922...  Training loss: 1.4916...  0.1248 sec/batch
Epoch: 5/20...  Training Step: 1923...  Training loss: 1.2647...  0.1320 sec/batch
Epoch: 5/20...  Training Step: 1924...  Training loss: 1.3336...  0.1256 sec/batch
Epoch: 5/20...  Training Step: 1925...  Training loss: 1.2618...  0.1258 sec/batch
Epoch: 5/20...  Training Step: 1926...  Training loss: 1.2409...  0.1357 sec/batch
Epoch: 5/20...  Training Step: 1927...  Training loss: 1.3869...  0.1303 sec/batch
Epoch: 5/20...  Training Step: 1928...  Training loss: 1.3183...  0.1320 sec/batch
Epoch: 5/20...  Training Step: 1929...  Training loss: 1.3104...  0.1383 sec/batch
Epoch: 5/20...  Training Step: 1930...  Training loss: 1.1808...  0.1385 sec/batch
Epoch: 5/20...  Training Step: 1931...  Training loss: 1.4363...  0.1305 sec/batch
Epoch: 5/20...  Training Step: 1932...  Training loss: 1.2102...  0.1289 sec/batch
Epoch: 5/20...  Training Step: 1933...  Training loss: 1.2449...  0.1369 sec/batch
Epoch: 5/20...  Training Step: 1934...  Training loss: 1.2348...  0.1138 sec/batch
Epoch: 5/20...  Training Step: 1935...  Training loss: 1.2913...  0.1205 sec/batch
Epoch: 5/20...  Training Step: 1936...  Training loss: 1.2973...  0.1301 sec/batch
Epoch: 5/20...  Training Step: 1937...  Training loss: 1.4525...  0.1316 sec/batch
Epoch: 5/20...  Training Step: 1938...  Training loss: 1.3099...  0.1242 sec/batch
Epoch: 5/20...  Training Step: 1939...  Training loss: 1.3122...  0.1203 sec/batch
Epoch: 5/20...  Training Step: 1940...  Training loss: 1.4561...  0.1139 sec/batch
Epoch: 5/20...  Training Step: 1941...  Training loss: 1.4068...  0.1303 sec/batch
Epoch: 5/20...  Training Step: 1942...  Training loss: 1.3605...  0.1296 sec/batch
Epoch: 5/20...  Training Step: 1943...  Training loss: 1.2415...  0.1241 sec/batch
Epoch: 5/20...  Training Step: 1944...  Training loss: 1.4320...  0.1227 sec/batch
Epoch: 5/20...  Training Step: 1945...  Training loss: 1.4569...  0.1209 sec/batch
Epoch: 5/20...  Training Step: 1946...  Training loss: 1.2464...  0.1224 sec/batch
Epoch: 5/20...  Training Step: 1947...  Training loss: 1.4470...  0.1210 sec/batch
Epoch: 5/20...  Training Step: 1948...  Training loss: 1.4155...  0.1351 sec/batch
Epoch: 5/20...  Training Step: 1949...  Training loss: 1.2021...  0.1289 sec/batch
Epoch: 5/20...  Training Step: 1950...  Training loss: 1.4768...  0.1244 sec/batch
Epoch: 5/20...  Training Step: 1951...  Training loss: 1.2854...  0.1321 sec/batch
Epoch: 5/20...  Training Step: 1952...  Training loss: 1.3493...  0.1369 sec/batch
Epoch: 5/20...  Training Step: 1953...  Training loss: 1.6357...  0.1289 sec/batch
Epoch: 5/20...  Training Step: 1954...  Training loss: 1.4641...  0.1247 sec/batch
Epoch: 5/20...  Training Step: 1955...  Training loss: 1.3350...  0.1318 sec/batch
Epoch: 5/20...  Training Step: 1956...  Training loss: 1.2323...  0.1292 sec/batch
Epoch: 5/20...  Training Step: 1957...  Training loss: 1.4544...  0.1296 sec/batch
Epoch: 5/20...  Training Step: 1958...  Training loss: 1.6270...  0.1229 sec/batch
Epoch: 5/20...  Training Step: 1959...  Training loss: 1.5493...  0.1312 sec/batch
Epoch: 5/20...  Training Step: 1960...  Training loss: 1.3591...  0.1268 sec/batch
Epoch: 5/20...  Training Step: 1961...  Training loss: 1.5031...  0.1207 sec/batch
Epoch: 5/20...  Training Step: 1962...  Training loss: 1.6247...  0.1215 sec/batch
Epoch: 5/20...  Training Step: 1963...  Training loss: 1.4599...  0.1295 sec/batch
Epoch: 5/20...  Training Step: 1964...  Training loss: 1.5750...  0.1308 sec/batch
Epoch: 5/20...  Training Step: 1965...  Training loss: 1.3838...  0.1406 sec/batch
Epoch: 5/20...  Training Step: 1966...  Training loss: 1.2777...  0.1342 sec/batch
Epoch: 5/20...  Training Step: 1967...  Training loss: 1.4589...  0.1398 sec/batch
Epoch: 5/20...  Training Step: 1968...  Training loss: 1.4192...  0.1329 sec/batch
Epoch: 5/20...  Training Step: 1969...  Training loss: 1.4203...  0.1309 sec/batch
Epoch: 5/20...  Training Step: 1970...  Training loss: 1.5021...  0.1325 sec/batch
Epoch: 5/20...  Training Step: 1971...  Training loss: 1.4147...  0.1296 sec/batch
Epoch: 5/20...  Training Step: 1972...  Training loss: 1.3449...  0.1320 sec/batch
Epoch: 5/20...  Training Step: 1973...  Training loss: 1.4838...  0.1299 sec/batch
Epoch: 5/20...  Training Step: 1974...  Training loss: 1.3985...  0.1306 sec/batch
Epoch: 5/20...  Training Step: 1975...  Training loss: 1.3387...  0.1373 sec/batch
Epoch: 5/20...  Training Step: 1976...  Training loss: 1.1368...  0.1269 sec/batch
Epoch: 5/20...  Training Step: 1977...  Training loss: 1.3112...  0.1307 sec/batch
Epoch: 5/20...  Training Step: 1978...  Training loss: 1.4505...  0.1320 sec/batch
Epoch: 5/20...  Training Step: 1979...  Training loss: 1.3829...  0.1240 sec/batch
Epoch: 5/20...  Training Step: 1980...  Training loss: 1.5161...  0.1268 sec/batch
Epoch: 5/20...  Training Step: 1981...  Training loss: 1.3980...  0.1318 sec/batch
Epoch: 5/20...  Training Step: 1982...  Training loss: 1.1892...  0.1297 sec/batch
Epoch: 5/20...  Training Step: 1983...  Training loss: 1.4018...  0.1304 sec/batch
Epoch: 5/20...  Training Step: 1984...  Training loss: 1.3985...  0.1267 sec/batch
Epoch: 5/20...  Training Step: 1985...  Training loss: 1.5136...  0.1328 sec/batch
Epoch: 5/20...  Training Step: 1986...  Training loss: 1.2822...  0.1307 sec/batch
Epoch: 5/20...  Training Step: 1987...  Training loss: 1.6553...  0.1299 sec/batch
Epoch: 5/20...  Training Step: 1988...  Training loss: 1.3044...  0.1330 sec/batch
Epoch: 5/20...  Training Step: 1989...  Training loss: 1.3977...  0.1297 sec/batch
Epoch: 5/20...  Training Step: 1990...  Training loss: 1.4979...  0.1301 sec/batch
Epoch: 5/20...  Training Step: 1991...  Training loss: 1.2436...  0.1321 sec/batch
Epoch: 5/20...  Training Step: 1992...  Training loss: 1.1109...  0.1300 sec/batch
Epoch: 5/20...  Training Step: 1993...  Training loss: 1.1794...  0.1300 sec/batch
Epoch: 5/20...  Training Step: 1994...  Training loss: 1.4049...  0.1309 sec/batch
Epoch: 5/20...  Training Step: 1995...  Training loss: 1.2522...  0.1308 sec/batch
Epoch: 5/20...  Training Step: 1996...  Training loss: 1.4161...  0.1297 sec/batch
Epoch: 5/20...  Training Step: 1997...  Training loss: 1.2564...  0.1243 sec/batch
Epoch: 5/20...  Training Step: 1998...  Training loss: 1.2638...  0.1195 sec/batch
Epoch: 5/20...  Training Step: 1999...  Training loss: 1.1311...  0.1188 sec/batch
Epoch: 5/20...  Training Step: 2000...  Training loss: 1.4742...  0.1220 sec/batch
Epoch: 5/20...  Training Step: 2001...  Training loss: 1.3373...  0.1201 sec/batch
Epoch: 5/20...  Training Step: 2002...  Training loss: 1.2330...  0.1250 sec/batch
Epoch: 5/20...  Training Step: 2003...  Training loss: 1.3269...  0.1252 sec/batch
Epoch: 5/20...  Training Step: 2004...  Training loss: 1.3388...  0.1229 sec/batch
Epoch: 5/20...  Training Step: 2005...  Training loss: 1.3626...  0.1262 sec/batch
Epoch: 5/20...  Training Step: 2006...  Training loss: 1.5422...  0.1200 sec/batch
Epoch: 5/20...  Training Step: 2007...  Training loss: 1.4364...  0.1206 sec/batch
Epoch: 5/20...  Training Step: 2008...  Training loss: 1.4181...  0.1219 sec/batch
Epoch: 5/20...  Training Step: 2009...  Training loss: 1.5090...  0.1242 sec/batch
Epoch: 5/20...  Training Step: 2010...  Training loss: 1.3017...  0.1242 sec/batch
Epoch: 5/20...  Training Step: 2011...  Training loss: 1.4210...  0.1232 sec/batch
Epoch: 5/20...  Training Step: 2012...  Training loss: 1.2559...  0.1219 sec/batch
Epoch: 5/20...  Training Step: 2013...  Training loss: 1.2238...  0.1236 sec/batch
Epoch: 5/20...  Training Step: 2014...  Training loss: 1.2536...  0.1266 sec/batch
Epoch: 5/20...  Training Step: 2015...  Training loss: 1.1755...  0.1243 sec/batch
Epoch: 5/20...  Training Step: 2016...  Training loss: 1.2535...  0.1226 sec/batch
Epoch: 5/20...  Training Step: 2017...  Training loss: 1.4850...  0.1190 sec/batch
Epoch: 5/20...  Training Step: 2018...  Training loss: 1.3355...  0.1211 sec/batch
Epoch: 5/20...  Training Step: 2019...  Training loss: 1.4649...  0.1212 sec/batch
Epoch: 5/20...  Training Step: 2020...  Training loss: 1.2030...  0.1236 sec/batch
Epoch: 5/20...  Training Step: 2021...  Training loss: 1.3589...  0.1196 sec/batch
Epoch: 5/20...  Training Step: 2022...  Training loss: 1.2917...  0.1188 sec/batch
Epoch: 5/20...  Training Step: 2023...  Training loss: 1.2187...  0.1251 sec/batch
Epoch: 5/20...  Training Step: 2024...  Training loss: 1.5688...  0.1228 sec/batch
Epoch: 5/20...  Training Step: 2025...  Training loss: 1.3659...  0.1200 sec/batch
Epoch: 5/20...  Training Step: 2026...  Training loss: 1.4193...  0.1325 sec/batch
Epoch: 5/20...  Training Step: 2027...  Training loss: 1.3733...  0.1341 sec/batch
Epoch: 5/20...  Training Step: 2028...  Training loss: 1.4903...  0.1251 sec/batch
Epoch: 5/20...  Training Step: 2029...  Training loss: 1.3682...  0.1219 sec/batch
Epoch: 5/20...  Training Step: 2030...  Training loss: 1.4203...  0.1237 sec/batch
Epoch: 5/20...  Training Step: 2031...  Training loss: 1.4826...  0.1158 sec/batch
Epoch: 5/20...  Training Step: 2032...  Training loss: 1.1715...  0.1249 sec/batch
Epoch: 5/20...  Training Step: 2033...  Training loss: 1.1239...  0.1208 sec/batch
Epoch: 5/20...  Training Step: 2034...  Training loss: 1.4076...  0.1162 sec/batch
Epoch: 5/20...  Training Step: 2035...  Training loss: 1.2335...  0.1242 sec/batch
Epoch: 5/20...  Training Step: 2036...  Training loss: 1.4004...  0.1203 sec/batch
Epoch: 5/20...  Training Step: 2037...  Training loss: 1.2360...  0.1220 sec/batch
Epoch: 5/20...  Training Step: 2038...  Training loss: 1.4961...  0.1233 sec/batch
Epoch: 5/20...  Training Step: 2039...  Training loss: 1.4774...  0.1221 sec/batch
Epoch: 5/20...  Training Step: 2040...  Training loss: 1.3138...  0.1237 sec/batch
Epoch: 5/20...  Training Step: 2041...  Training loss: 1.4879...  0.1273 sec/batch
Epoch: 5/20...  Training Step: 2042...  Training loss: 1.4395...  0.1312 sec/batch
Epoch: 5/20...  Training Step: 2043...  Training loss: 1.4202...  0.1251 sec/batch
Epoch: 5/20...  Training Step: 2044...  Training loss: 1.1512...  0.1229 sec/batch
Epoch: 5/20...  Training Step: 2045...  Training loss: 1.4685...  0.1202 sec/batch
Epoch: 5/20...  Training Step: 2046...  Training loss: 1.2811...  0.1248 sec/batch
Epoch: 5/20...  Training Step: 2047...  Training loss: 1.2285...  0.1291 sec/batch
Epoch: 5/20...  Training Step: 2048...  Training loss: 1.5609...  0.1284 sec/batch
Epoch: 5/20...  Training Step: 2049...  Training loss: 1.4412...  0.1253 sec/batch
Epoch: 5/20...  Training Step: 2050...  Training loss: 1.2376...  0.1265 sec/batch
Epoch: 5/20...  Training Step: 2051...  Training loss: 1.4113...  0.1338 sec/batch
Epoch: 5/20...  Training Step: 2052...  Training loss: 1.3083...  0.1321 sec/batch
Epoch: 5/20...  Training Step: 2053...  Training loss: 1.2489...  0.1240 sec/batch
Epoch: 5/20...  Training Step: 2054...  Training loss: 1.4137...  0.1216 sec/batch
Epoch: 5/20...  Training Step: 2055...  Training loss: 1.0055...  0.1286 sec/batch
Epoch: 5/20...  Training Step: 2056...  Training loss: 1.2158...  0.1273 sec/batch
Epoch: 5/20...  Training Step: 2057...  Training loss: 1.2625...  0.1221 sec/batch
Epoch: 5/20...  Training Step: 2058...  Training loss: 1.2687...  0.1235 sec/batch
Epoch: 5/20...  Training Step: 2059...  Training loss: 1.3201...  0.1393 sec/batch
Epoch: 5/20...  Training Step: 2060...  Training loss: 1.5002...  0.1273 sec/batch
Epoch: 5/20...  Training Step: 2061...  Training loss: 1.2850...  0.1207 sec/batch
Epoch: 5/20...  Training Step: 2062...  Training loss: 1.1946...  0.1206 sec/batch
Epoch: 5/20...  Training Step: 2063...  Training loss: 1.2273...  0.1231 sec/batch
Epoch: 5/20...  Training Step: 2064...  Training loss: 1.3940...  0.1257 sec/batch
Epoch: 5/20...  Training Step: 2065...  Training loss: 1.3493...  0.1207 sec/batch
Epoch: 5/20...  Training Step: 2066...  Training loss: 1.1373...  0.1229 sec/batch
Epoch: 5/20...  Training Step: 2067...  Training loss: 1.1226...  0.1202 sec/batch
Epoch: 5/20...  Training Step: 2068...  Training loss: 1.4506...  0.1214 sec/batch
Epoch: 5/20...  Training Step: 2069...  Training loss: 1.5134...  0.1205 sec/batch
Epoch: 5/20...  Training Step: 2070...  Training loss: 1.3288...  0.1207 sec/batch
Epoch: 5/20...  Training Step: 2071...  Training loss: 1.4938...  0.1267 sec/batch
Epoch: 5/20...  Training Step: 2072...  Training loss: 1.2917...  0.1212 sec/batch
Epoch: 5/20...  Training Step: 2073...  Training loss: 1.3838...  0.1245 sec/batch
Epoch: 5/20...  Training Step: 2074...  Training loss: 1.2705...  0.1221 sec/batch
Epoch: 5/20...  Training Step: 2075...  Training loss: 1.5055...  0.1240 sec/batch
Epoch: 5/20...  Training Step: 2076...  Training loss: 1.2751...  0.1212 sec/batch
Epoch: 5/20...  Training Step: 2077...  Training loss: 1.2524...  0.1229 sec/batch
Epoch: 5/20...  Training Step: 2078...  Training loss: 1.4998...  0.1214 sec/batch
Epoch: 5/20...  Training Step: 2079...  Training loss: 1.5108...  0.1255 sec/batch
Epoch: 5/20...  Training Step: 2080...  Training loss: 1.5976...  0.1239 sec/batch
Epoch: 5/20...  Training Step: 2081...  Training loss: 1.4327...  0.1278 sec/batch
Epoch: 5/20...  Training Step: 2082...  Training loss: 1.5730...  0.1260 sec/batch
Epoch: 5/20...  Training Step: 2083...  Training loss: 1.5314...  0.1323 sec/batch
Epoch: 5/20...  Training Step: 2084...  Training loss: 1.1941...  0.1263 sec/batch
Epoch: 5/20...  Training Step: 2085...  Training loss: 1.3786...  0.1315 sec/batch
Epoch: 5/20...  Training Step: 2086...  Training loss: 1.3234...  0.1336 sec/batch
Epoch: 5/20...  Training Step: 2087...  Training loss: 1.3384...  0.1253 sec/batch
Epoch: 5/20...  Training Step: 2088...  Training loss: 1.2511...  0.1285 sec/batch
Epoch: 5/20...  Training Step: 2089...  Training loss: 1.6410...  0.1235 sec/batch
Epoch: 5/20...  Training Step: 2090...  Training loss: 1.3386...  0.1193 sec/batch
Epoch: 5/20...  Training Step: 2091...  Training loss: 1.5113...  0.1273 sec/batch
Epoch: 5/20...  Training Step: 2092...  Training loss: 1.3396...  0.1211 sec/batch
Epoch: 5/20...  Training Step: 2093...  Training loss: 1.6290...  0.1292 sec/batch
Epoch: 5/20...  Training Step: 2094...  Training loss: 1.2716...  0.1308 sec/batch
Epoch: 5/20...  Training Step: 2095...  Training loss: 1.3236...  0.1286 sec/batch
Epoch: 5/20...  Training Step: 2096...  Training loss: 1.4320...  0.1312 sec/batch
Epoch: 5/20...  Training Step: 2097...  Training loss: 1.3221...  0.1297 sec/batch
Epoch: 5/20...  Training Step: 2098...  Training loss: 1.2143...  0.1261 sec/batch
Epoch: 5/20...  Training Step: 2099...  Training loss: 1.4509...  0.1210 sec/batch
Epoch: 5/20...  Training Step: 2100...  Training loss: 1.4199...  0.1252 sec/batch
Epoch: 5/20...  Training Step: 2101...  Training loss: 1.3452...  0.1240 sec/batch
Epoch: 5/20...  Training Step: 2102...  Training loss: 1.1982...  0.1189 sec/batch
Epoch: 5/20...  Training Step: 2103...  Training loss: 1.3105...  0.1236 sec/batch
Epoch: 5/20...  Training Step: 2104...  Training loss: 1.4242...  0.1205 sec/batch
Epoch: 5/20...  Training Step: 2105...  Training loss: 1.3309...  0.1208 sec/batch
Epoch: 5/20...  Training Step: 2106...  Training loss: 1.2867...  0.1221 sec/batch
Epoch: 5/20...  Training Step: 2107...  Training loss: 1.5059...  0.1287 sec/batch
Epoch: 5/20...  Training Step: 2108...  Training loss: 1.2699...  0.1256 sec/batch
Epoch: 5/20...  Training Step: 2109...  Training loss: 1.2599...  0.1243 sec/batch
Epoch: 5/20...  Training Step: 2110...  Training loss: 1.3264...  0.1209 sec/batch
Epoch: 5/20...  Training Step: 2111...  Training loss: 1.3123...  0.1191 sec/batch
Epoch: 5/20...  Training Step: 2112...  Training loss: 1.4189...  0.1174 sec/batch
Epoch: 5/20...  Training Step: 2113...  Training loss: 1.4893...  0.1279 sec/batch
Epoch: 5/20...  Training Step: 2114...  Training loss: 1.1976...  0.1254 sec/batch
Epoch: 5/20...  Training Step: 2115...  Training loss: 1.1844...  0.1213 sec/batch
Epoch: 5/20...  Training Step: 2116...  Training loss: 1.2370...  0.1286 sec/batch
Epoch: 5/20...  Training Step: 2117...  Training loss: 1.2447...  0.1308 sec/batch
Epoch: 5/20...  Training Step: 2118...  Training loss: 1.4092...  0.1261 sec/batch
Epoch: 5/20...  Training Step: 2119...  Training loss: 1.3564...  0.1290 sec/batch
Epoch: 5/20...  Training Step: 2120...  Training loss: 1.4175...  0.1282 sec/batch
Epoch: 5/20...  Training Step: 2121...  Training loss: 1.5402...  0.1307 sec/batch
Epoch: 5/20...  Training Step: 2122...  Training loss: 1.3617...  0.1244 sec/batch
Epoch: 5/20...  Training Step: 2123...  Training loss: 1.6011...  0.1165 sec/batch
Epoch: 5/20...  Training Step: 2124...  Training loss: 1.4850...  0.1316 sec/batch
Epoch: 5/20...  Training Step: 2125...  Training loss: 1.3993...  0.1218 sec/batch
Epoch: 5/20...  Training Step: 2126...  Training loss: 1.4731...  0.1224 sec/batch
Epoch: 5/20...  Training Step: 2127...  Training loss: 1.4577...  0.1325 sec/batch
Epoch: 5/20...  Training Step: 2128...  Training loss: 1.4428...  0.1301 sec/batch
Epoch: 5/20...  Training Step: 2129...  Training loss: 1.5331...  0.1209 sec/batch
Epoch: 5/20...  Training Step: 2130...  Training loss: 1.3790...  0.1200 sec/batch
Epoch: 5/20...  Training Step: 2131...  Training loss: 1.4644...  0.1229 sec/batch
Epoch: 5/20...  Training Step: 2132...  Training loss: 1.3375...  0.1204 sec/batch
Epoch: 5/20...  Training Step: 2133...  Training loss: 1.4570...  0.1232 sec/batch
Epoch: 5/20...  Training Step: 2134...  Training loss: 1.6120...  0.1202 sec/batch
Epoch: 5/20...  Training Step: 2135...  Training loss: 1.3700...  0.1219 sec/batch
Epoch: 5/20...  Training Step: 2136...  Training loss: 1.3754...  0.1206 sec/batch
Epoch: 5/20...  Training Step: 2137...  Training loss: 1.2274...  0.1173 sec/batch
Epoch: 5/20...  Training Step: 2138...  Training loss: 1.2678...  0.1255 sec/batch
Epoch: 5/20...  Training Step: 2139...  Training loss: 1.2651...  0.1268 sec/batch
Epoch: 5/20...  Training Step: 2140...  Training loss: 1.4352...  0.1201 sec/batch
Epoch: 5/20...  Training Step: 2141...  Training loss: 1.1677...  0.1194 sec/batch
Epoch: 5/20...  Training Step: 2142...  Training loss: 1.3226...  0.1230 sec/batch
Epoch: 5/20...  Training Step: 2143...  Training loss: 1.3909...  0.1217 sec/batch
Epoch: 5/20...  Training Step: 2144...  Training loss: 1.4291...  0.1218 sec/batch
Epoch: 5/20...  Training Step: 2145...  Training loss: 1.4847...  0.1200 sec/batch
Epoch: 5/20...  Training Step: 2146...  Training loss: 1.4530...  0.1212 sec/batch
Epoch: 5/20...  Training Step: 2147...  Training loss: 1.4686...  0.1188 sec/batch
Epoch: 5/20...  Training Step: 2148...  Training loss: 1.2717...  0.1255 sec/batch
Epoch: 5/20...  Training Step: 2149...  Training loss: 1.2172...  0.1174 sec/batch
Epoch: 5/20...  Training Step: 2150...  Training loss: 1.3201...  0.1261 sec/batch
Epoch: 5/20...  Training Step: 2151...  Training loss: 1.3613...  0.1223 sec/batch
Epoch: 5/20...  Training Step: 2152...  Training loss: 1.5339...  0.1200 sec/batch
Epoch: 5/20...  Training Step: 2153...  Training loss: 1.2476...  0.1256 sec/batch
Epoch: 5/20...  Training Step: 2154...  Training loss: 1.3315...  0.1247 sec/batch
Epoch: 5/20...  Training Step: 2155...  Training loss: 1.3830...  0.1207 sec/batch
Epoch: 5/20...  Training Step: 2156...  Training loss: 1.2626...  0.1240 sec/batch
Epoch: 5/20...  Training Step: 2157...  Training loss: 1.4222...  0.1264 sec/batch
Epoch: 5/20...  Training Step: 2158...  Training loss: 1.3713...  0.1262 sec/batch
Epoch: 5/20...  Training Step: 2159...  Training loss: 1.1779...  0.1219 sec/batch
Epoch: 5/20...  Training Step: 2160...  Training loss: 1.5389...  0.1224 sec/batch
Epoch: 5/20...  Training Step: 2161...  Training loss: 1.1701...  0.1180 sec/batch
Epoch: 5/20...  Training Step: 2162...  Training loss: 1.4128...  0.1177 sec/batch
Epoch: 5/20...  Training Step: 2163...  Training loss: 1.3217...  0.1235 sec/batch
Epoch: 5/20...  Training Step: 2164...  Training loss: 1.7374...  0.1249 sec/batch
Epoch: 5/20...  Training Step: 2165...  Training loss: 1.3241...  0.1240 sec/batch
Epoch: 5/20...  Training Step: 2166...  Training loss: 1.4510...  0.1205 sec/batch
Epoch: 5/20...  Training Step: 2167...  Training loss: 1.3563...  0.1223 sec/batch
Epoch: 5/20...  Training Step: 2168...  Training loss: 1.1241...  0.1207 sec/batch
Epoch: 5/20...  Training Step: 2169...  Training loss: 1.3653...  0.1251 sec/batch
Epoch: 5/20...  Training Step: 2170...  Training loss: 1.3491...  0.1238 sec/batch
Epoch: 5/20...  Training Step: 2171...  Training loss: 1.0264...  0.1245 sec/batch
Epoch: 5/20...  Training Step: 2172...  Training loss: 1.2446...  0.1219 sec/batch
Epoch: 5/20...  Training Step: 2173...  Training loss: 1.3225...  0.1177 sec/batch
Epoch: 5/20...  Training Step: 2174...  Training loss: 1.2322...  0.1171 sec/batch
Epoch: 5/20...  Training Step: 2175...  Training loss: 1.3576...  0.1215 sec/batch
Epoch: 5/20...  Training Step: 2176...  Training loss: 1.2236...  0.1224 sec/batch
Epoch: 5/20...  Training Step: 2177...  Training loss: 1.1469...  0.1203 sec/batch
Epoch: 5/20...  Training Step: 2178...  Training loss: 1.4571...  0.1256 sec/batch
Epoch: 5/20...  Training Step: 2179...  Training loss: 1.2698...  0.1237 sec/batch
Epoch: 5/20...  Training Step: 2180...  Training loss: 1.1421...  0.1253 sec/batch
Epoch: 5/20...  Training Step: 2181...  Training loss: 1.1433...  0.1213 sec/batch
Epoch: 5/20...  Training Step: 2182...  Training loss: 1.2757...  0.1231 sec/batch
Epoch: 5/20...  Training Step: 2183...  Training loss: 1.3471...  0.1218 sec/batch
Epoch: 5/20...  Training Step: 2184...  Training loss: 1.3731...  0.1208 sec/batch
Epoch: 5/20...  Training Step: 2185...  Training loss: 1.3316...  0.1237 sec/batch
Epoch: 5/20...  Training Step: 2186...  Training loss: 1.1254...  0.1246 sec/batch
Epoch: 5/20...  Training Step: 2187...  Training loss: 1.4053...  0.1191 sec/batch
Epoch: 5/20...  Training Step: 2188...  Training loss: 1.3396...  0.1237 sec/batch
Epoch: 5/20...  Training Step: 2189...  Training loss: 1.2039...  0.1249 sec/batch
Epoch: 5/20...  Training Step: 2190...  Training loss: 1.3069...  0.1183 sec/batch
Epoch: 5/20...  Training Step: 2191...  Training loss: 1.4495...  0.1211 sec/batch
Epoch: 5/20...  Training Step: 2192...  Training loss: 1.1505...  0.1235 sec/batch
Epoch: 5/20...  Training Step: 2193...  Training loss: 1.3098...  0.1222 sec/batch
Epoch: 5/20...  Training Step: 2194...  Training loss: 1.1587...  0.1242 sec/batch
Epoch: 5/20...  Training Step: 2195...  Training loss: 1.4081...  0.1266 sec/batch
Epoch: 5/20...  Training Step: 2196...  Training loss: 1.2032...  0.1248 sec/batch
Epoch: 5/20...  Training Step: 2197...  Training loss: 1.1867...  0.1177 sec/batch
Epoch: 5/20...  Training Step: 2198...  Training loss: 1.4075...  0.1234 sec/batch
Epoch: 5/20...  Training Step: 2199...  Training loss: 1.0782...  0.1249 sec/batch
Epoch: 5/20...  Training Step: 2200...  Training loss: 1.4022...  0.1191 sec/batch
Epoch: 5/20...  Training Step: 2201...  Training loss: 1.2112...  0.1221 sec/batch
Epoch: 5/20...  Training Step: 2202...  Training loss: 1.0665...  0.1220 sec/batch
Epoch: 5/20...  Training Step: 2203...  Training loss: 1.1189...  0.1227 sec/batch
Epoch: 5/20...  Training Step: 2204...  Training loss: 1.4799...  0.1209 sec/batch
Epoch: 5/20...  Training Step: 2205...  Training loss: 1.2105...  0.1260 sec/batch
Epoch: 5/20...  Training Step: 2206...  Training loss: 1.3033...  0.1197 sec/batch
Epoch: 5/20...  Training Step: 2207...  Training loss: 1.2723...  0.1227 sec/batch
Epoch: 5/20...  Training Step: 2208...  Training loss: 1.1299...  0.1211 sec/batch
Epoch: 5/20...  Training Step: 2209...  Training loss: 1.0649...  0.1209 sec/batch
Epoch: 5/20...  Training Step: 2210...  Training loss: 1.0279...  0.1237 sec/batch
Epoch: 5/20...  Training Step: 2211...  Training loss: 1.3164...  0.1186 sec/batch
Epoch: 5/20...  Training Step: 2212...  Training loss: 1.2554...  0.1235 sec/batch
Epoch: 5/20...  Training Step: 2213...  Training loss: 1.2331...  0.1203 sec/batch
Epoch: 5/20...  Training Step: 2214...  Training loss: 1.3963...  0.1192 sec/batch
Epoch: 5/20...  Training Step: 2215...  Training loss: 1.4784...  0.1168 sec/batch
Epoch: 5/20...  Training Step: 2216...  Training loss: 1.1013...  0.1231 sec/batch
Epoch: 5/20...  Training Step: 2217...  Training loss: 1.4188...  0.1231 sec/batch
Epoch: 5/20...  Training Step: 2218...  Training loss: 1.3214...  0.1239 sec/batch
Epoch: 5/20...  Training Step: 2219...  Training loss: 1.3651...  0.1288 sec/batch
Epoch: 5/20...  Training Step: 2220...  Training loss: 1.2781...  0.1258 sec/batch
Epoch: 5/20...  Training Step: 2221...  Training loss: 1.2946...  0.1267 sec/batch
Epoch: 5/20...  Training Step: 2222...  Training loss: 1.3607...  0.1199 sec/batch
Epoch: 5/20...  Training Step: 2223...  Training loss: 1.2041...  0.1219 sec/batch
Epoch: 5/20...  Training Step: 2224...  Training loss: 1.4460...  0.1196 sec/batch
Epoch: 5/20...  Training Step: 2225...  Training loss: 1.4566...  0.1222 sec/batch
Epoch: 5/20...  Training Step: 2226...  Training loss: 1.2434...  0.1268 sec/batch
Epoch: 5/20...  Training Step: 2227...  Training loss: 1.1295...  0.1265 sec/batch
Epoch: 5/20...  Training Step: 2228...  Training loss: 1.3996...  0.1202 sec/batch
Epoch: 5/20...  Training Step: 2229...  Training loss: 1.2414...  0.1186 sec/batch
Epoch: 5/20...  Training Step: 2230...  Training loss: 1.4211...  0.1176 sec/batch
Epoch: 5/20...  Training Step: 2231...  Training loss: 1.2947...  0.1223 sec/batch
Epoch: 5/20...  Training Step: 2232...  Training loss: 1.2941...  0.1208 sec/batch
Epoch: 5/20...  Training Step: 2233...  Training loss: 1.4468...  0.1201 sec/batch
Epoch: 5/20...  Training Step: 2234...  Training loss: 1.4240...  0.1227 sec/batch
Epoch: 5/20...  Training Step: 2235...  Training loss: 1.2270...  0.1216 sec/batch
Epoch: 5/20...  Training Step: 2236...  Training loss: 1.4344...  0.1219 sec/batch
Epoch: 5/20...  Training Step: 2237...  Training loss: 1.1316...  0.1211 sec/batch
Epoch: 5/20...  Training Step: 2238...  Training loss: 1.1930...  0.1236 sec/batch
Epoch: 5/20...  Training Step: 2239...  Training loss: 1.2547...  0.1173 sec/batch
Epoch: 5/20...  Training Step: 2240...  Training loss: 1.3189...  0.1208 sec/batch
Epoch: 5/20...  Training Step: 2241...  Training loss: 1.1847...  0.1217 sec/batch
Epoch: 5/20...  Training Step: 2242...  Training loss: 1.2743...  0.1204 sec/batch
Epoch: 5/20...  Training Step: 2243...  Training loss: 1.3364...  0.1240 sec/batch
Epoch: 5/20...  Training Step: 2244...  Training loss: 1.1097...  0.1209 sec/batch
Epoch: 5/20...  Training Step: 2245...  Training loss: 1.2344...  0.1229 sec/batch
Epoch: 5/20...  Training Step: 2246...  Training loss: 1.3282...  0.1238 sec/batch
Epoch: 5/20...  Training Step: 2247...  Training loss: 1.1812...  0.1285 sec/batch
Epoch: 5/20...  Training Step: 2248...  Training loss: 1.2016...  0.1242 sec/batch
Epoch: 5/20...  Training Step: 2249...  Training loss: 1.2162...  0.1287 sec/batch
Epoch: 5/20...  Training Step: 2250...  Training loss: 1.2379...  0.1281 sec/batch
Epoch: 5/20...  Training Step: 2251...  Training loss: 1.1340...  0.1266 sec/batch
Epoch: 5/20...  Training Step: 2252...  Training loss: 1.4828...  0.1338 sec/batch
Epoch: 5/20...  Training Step: 2253...  Training loss: 1.2306...  0.1288 sec/batch
Epoch: 5/20...  Training Step: 2254...  Training loss: 1.1628...  0.1235 sec/batch
Epoch: 5/20...  Training Step: 2255...  Training loss: 1.2777...  0.1235 sec/batch
Epoch: 5/20...  Training Step: 2256...  Training loss: 1.2684...  0.1269 sec/batch
Epoch: 5/20...  Training Step: 2257...  Training loss: 1.2504...  0.1259 sec/batch
Epoch: 5/20...  Training Step: 2258...  Training loss: 1.2345...  0.1280 sec/batch
Epoch: 5/20...  Training Step: 2259...  Training loss: 1.1592...  0.1308 sec/batch
Epoch: 5/20...  Training Step: 2260...  Training loss: 1.3156...  0.1366 sec/batch
Epoch: 5/20...  Training Step: 2261...  Training loss: 1.2982...  0.1433 sec/batch
Epoch: 5/20...  Training Step: 2262...  Training loss: 1.5661...  0.1304 sec/batch
Epoch: 5/20...  Training Step: 2263...  Training loss: 1.2208...  0.1253 sec/batch
Epoch: 5/20...  Training Step: 2264...  Training loss: 1.4968...  0.1316 sec/batch
Epoch: 5/20...  Training Step: 2265...  Training loss: 1.4126...  0.1250 sec/batch
Epoch: 5/20...  Training Step: 2266...  Training loss: 1.2998...  0.1282 sec/batch
Epoch: 5/20...  Training Step: 2267...  Training loss: 1.1956...  0.1308 sec/batch
Epoch: 5/20...  Training Step: 2268...  Training loss: 1.4148...  0.1302 sec/batch
Epoch: 5/20...  Training Step: 2269...  Training loss: 1.3953...  0.1282 sec/batch
Epoch: 5/20...  Training Step: 2270...  Training loss: 1.3235...  0.1295 sec/batch
Epoch: 5/20...  Training Step: 2271...  Training loss: 1.5398...  0.1322 sec/batch
Epoch: 5/20...  Training Step: 2272...  Training loss: 1.4907...  0.1231 sec/batch
Epoch: 5/20...  Training Step: 2273...  Training loss: 1.4090...  0.1257 sec/batch
Epoch: 5/20...  Training Step: 2274...  Training loss: 1.1847...  0.1181 sec/batch
Epoch: 5/20...  Training Step: 2275...  Training loss: 1.3121...  0.1379 sec/batch
Epoch: 5/20...  Training Step: 2276...  Training loss: 1.0537...  0.1335 sec/batch
Epoch: 5/20...  Training Step: 2277...  Training loss: 1.3853...  0.1488 sec/batch
Epoch: 5/20...  Training Step: 2278...  Training loss: 1.3854...  0.1264 sec/batch
Epoch: 5/20...  Training Step: 2279...  Training loss: 1.4398...  0.1309 sec/batch
Epoch: 5/20...  Training Step: 2280...  Training loss: 1.5588...  0.1327 sec/batch
Epoch: 5/20...  Training Step: 2281...  Training loss: 1.2669...  0.1276 sec/batch
Epoch: 5/20...  Training Step: 2282...  Training loss: 1.3803...  0.1310 sec/batch
Epoch: 5/20...  Training Step: 2283...  Training loss: 1.4367...  0.1296 sec/batch
Epoch: 5/20...  Training Step: 2284...  Training loss: 1.4695...  0.1346 sec/batch
Epoch: 5/20...  Training Step: 2285...  Training loss: 1.1804...  0.1366 sec/batch
Epoch: 5/20...  Training Step: 2286...  Training loss: 1.3090...  0.1370 sec/batch
Epoch: 5/20...  Training Step: 2287...  Training loss: 1.4822...  0.1354 sec/batch
Epoch: 5/20...  Training Step: 2288...  Training loss: 1.3859...  0.1343 sec/batch
Epoch: 5/20...  Training Step: 2289...  Training loss: 1.5533...  0.1243 sec/batch
Epoch: 5/20...  Training Step: 2290...  Training loss: 1.4981...  0.1305 sec/batch
Epoch: 5/20...  Training Step: 2291...  Training loss: 1.3120...  0.1345 sec/batch
Epoch: 5/20...  Training Step: 2292...  Training loss: 1.3983...  0.1309 sec/batch
Epoch: 5/20...  Training Step: 2293...  Training loss: 1.1974...  0.1382 sec/batch
Epoch: 5/20...  Training Step: 2294...  Training loss: 1.5368...  0.1251 sec/batch
Epoch: 5/20...  Training Step: 2295...  Training loss: 1.5380...  0.1457 sec/batch
Epoch: 5/20...  Training Step: 2296...  Training loss: 1.6047...  0.1546 sec/batch
Epoch: 5/20...  Training Step: 2297...  Training loss: 1.3679...  0.1530 sec/batch
Epoch: 5/20...  Training Step: 2298...  Training loss: 1.2772...  0.1483 sec/batch
Epoch: 5/20...  Training Step: 2299...  Training loss: 1.4528...  0.1311 sec/batch
Epoch: 5/20...  Training Step: 2300...  Training loss: 1.2750...  0.1419 sec/batch
Epoch: 5/20...  Training Step: 2301...  Training loss: 1.2816...  0.1385 sec/batch
Epoch: 5/20...  Training Step: 2302...  Training loss: 1.3153...  0.1384 sec/batch
Epoch: 5/20...  Training Step: 2303...  Training loss: 1.3220...  0.1355 sec/batch
Epoch: 5/20...  Training Step: 2304...  Training loss: 1.2907...  0.1351 sec/batch
Epoch: 5/20...  Training Step: 2305...  Training loss: 1.3137...  0.1494 sec/batch
Epoch: 5/20...  Training Step: 2306...  Training loss: 1.3615...  0.1339 sec/batch
Epoch: 5/20...  Training Step: 2307...  Training loss: 1.2708...  0.1317 sec/batch
Epoch: 5/20...  Training Step: 2308...  Training loss: 1.3530...  0.1351 sec/batch
Epoch: 5/20...  Training Step: 2309...  Training loss: 1.2334...  0.1302 sec/batch
Epoch: 5/20...  Training Step: 2310...  Training loss: 1.6088...  0.1386 sec/batch
Epoch: 5/20...  Training Step: 2311...  Training loss: 1.3722...  0.1317 sec/batch
Epoch: 5/20...  Training Step: 2312...  Training loss: 1.1236...  0.1342 sec/batch
Epoch: 5/20...  Training Step: 2313...  Training loss: 1.2260...  0.1341 sec/batch
Epoch: 5/20...  Training Step: 2314...  Training loss: 1.1246...  0.1322 sec/batch
Epoch: 5/20...  Training Step: 2315...  Training loss: 1.1872...  0.1337 sec/batch
Epoch: 5/20...  Training Step: 2316...  Training loss: 1.2634...  0.1294 sec/batch
Epoch: 5/20...  Training Step: 2317...  Training loss: 1.4518...  0.1356 sec/batch
Epoch: 5/20...  Training Step: 2318...  Training loss: 1.2658...  0.1281 sec/batch
Epoch: 5/20...  Training Step: 2319...  Training loss: 1.4098...  0.1212 sec/batch
Epoch: 5/20...  Training Step: 2320...  Training loss: 1.2009...  0.1197 sec/batch
Epoch: 6/20...  Training Step: 2321...  Training loss: 1.6311...  0.1237 sec/batch
Epoch: 6/20...  Training Step: 2322...  Training loss: 1.3930...  0.1246 sec/batch
Epoch: 6/20...  Training Step: 2323...  Training loss: 1.3058...  0.1211 sec/batch
Epoch: 6/20...  Training Step: 2324...  Training loss: 1.2827...  0.1264 sec/batch
Epoch: 6/20...  Training Step: 2325...  Training loss: 1.3382...  0.1179 sec/batch
Epoch: 6/20...  Training Step: 2326...  Training loss: 1.1043...  0.1229 sec/batch
Epoch: 6/20...  Training Step: 2327...  Training loss: 1.4317...  0.1271 sec/batch
Epoch: 6/20...  Training Step: 2328...  Training loss: 1.1889...  0.1190 sec/batch
Epoch: 6/20...  Training Step: 2329...  Training loss: 1.2613...  0.1216 sec/batch
Epoch: 6/20...  Training Step: 2330...  Training loss: 1.3218...  0.1193 sec/batch
Epoch: 6/20...  Training Step: 2331...  Training loss: 1.2366...  0.1186 sec/batch
Epoch: 6/20...  Training Step: 2332...  Training loss: 1.0599...  0.1200 sec/batch
Epoch: 6/20...  Training Step: 2333...  Training loss: 1.4858...  0.1217 sec/batch
Epoch: 6/20...  Training Step: 2334...  Training loss: 1.0154...  0.1261 sec/batch
Epoch: 6/20...  Training Step: 2335...  Training loss: 1.3003...  0.1286 sec/batch
Epoch: 6/20...  Training Step: 2336...  Training loss: 1.3698...  0.1231 sec/batch
Epoch: 6/20...  Training Step: 2337...  Training loss: 1.1545...  0.1205 sec/batch
Epoch: 6/20...  Training Step: 2338...  Training loss: 1.1700...  0.1207 sec/batch
Epoch: 6/20...  Training Step: 2339...  Training loss: 1.2990...  0.1230 sec/batch
Epoch: 6/20...  Training Step: 2340...  Training loss: 1.1251...  0.1248 sec/batch
Epoch: 6/20...  Training Step: 2341...  Training loss: 1.3312...  0.1210 sec/batch
Epoch: 6/20...  Training Step: 2342...  Training loss: 1.2236...  0.1220 sec/batch
Epoch: 6/20...  Training Step: 2343...  Training loss: 1.4102...  0.1248 sec/batch
Epoch: 6/20...  Training Step: 2344...  Training loss: 1.1989...  0.1256 sec/batch
Epoch: 6/20...  Training Step: 2345...  Training loss: 1.2348...  0.1211 sec/batch
Epoch: 6/20...  Training Step: 2346...  Training loss: 1.3191...  0.1217 sec/batch
Epoch: 6/20...  Training Step: 2347...  Training loss: 1.3903...  0.1200 sec/batch
Epoch: 6/20...  Training Step: 2348...  Training loss: 1.0599...  0.1242 sec/batch
Epoch: 6/20...  Training Step: 2349...  Training loss: 1.2737...  0.1183 sec/batch
Epoch: 6/20...  Training Step: 2350...  Training loss: 1.2724...  0.1209 sec/batch
Epoch: 6/20...  Training Step: 2351...  Training loss: 1.0888...  0.1266 sec/batch
Epoch: 6/20...  Training Step: 2352...  Training loss: 1.2256...  0.1196 sec/batch
Epoch: 6/20...  Training Step: 2353...  Training loss: 1.0456...  0.1243 sec/batch
Epoch: 6/20...  Training Step: 2354...  Training loss: 1.0945...  0.1174 sec/batch
Epoch: 6/20...  Training Step: 2355...  Training loss: 1.1539...  0.1204 sec/batch
Epoch: 6/20...  Training Step: 2356...  Training loss: 1.2595...  0.1191 sec/batch
Epoch: 6/20...  Training Step: 2357...  Training loss: 1.2769...  0.1265 sec/batch
Epoch: 6/20...  Training Step: 2358...  Training loss: 1.0836...  0.1214 sec/batch
Epoch: 6/20...  Training Step: 2359...  Training loss: 1.1771...  0.1263 sec/batch
Epoch: 6/20...  Training Step: 2360...  Training loss: 1.4154...  0.1202 sec/batch
Epoch: 6/20...  Training Step: 2361...  Training loss: 1.3142...  0.1215 sec/batch
Epoch: 6/20...  Training Step: 2362...  Training loss: 1.1953...  0.1244 sec/batch
Epoch: 6/20...  Training Step: 2363...  Training loss: 1.3591...  0.1226 sec/batch
Epoch: 6/20...  Training Step: 2364...  Training loss: 1.0060...  0.1268 sec/batch
Epoch: 6/20...  Training Step: 2365...  Training loss: 1.2036...  0.1237 sec/batch
Epoch: 6/20...  Training Step: 2366...  Training loss: 1.1471...  0.1211 sec/batch
Epoch: 6/20...  Training Step: 2367...  Training loss: 1.2212...  0.1229 sec/batch
Epoch: 6/20...  Training Step: 2368...  Training loss: 1.2131...  0.1190 sec/batch
Epoch: 6/20...  Training Step: 2369...  Training loss: 1.2261...  0.1263 sec/batch
Epoch: 6/20...  Training Step: 2370...  Training loss: 1.3443...  0.1243 sec/batch
Epoch: 6/20...  Training Step: 2371...  Training loss: 1.2670...  0.1235 sec/batch
Epoch: 6/20...  Training Step: 2372...  Training loss: 1.2596...  0.1217 sec/batch
Epoch: 6/20...  Training Step: 2373...  Training loss: 1.4464...  0.1225 sec/batch
Epoch: 6/20...  Training Step: 2374...  Training loss: 1.4029...  0.1227 sec/batch
Epoch: 6/20...  Training Step: 2375...  Training loss: 1.0272...  0.1220 sec/batch
Epoch: 6/20...  Training Step: 2376...  Training loss: 1.1753...  0.1230 sec/batch
Epoch: 6/20...  Training Step: 2377...  Training loss: 1.3865...  0.1190 sec/batch
Epoch: 6/20...  Training Step: 2378...  Training loss: 1.3641...  0.1202 sec/batch
Epoch: 6/20...  Training Step: 2379...  Training loss: 1.0820...  0.1230 sec/batch
Epoch: 6/20...  Training Step: 2380...  Training loss: 1.1591...  0.1218 sec/batch
Epoch: 6/20...  Training Step: 2381...  Training loss: 1.1741...  0.1222 sec/batch
Epoch: 6/20...  Training Step: 2382...  Training loss: 1.4185...  0.1227 sec/batch
Epoch: 6/20...  Training Step: 2383...  Training loss: 1.2276...  0.1212 sec/batch
Epoch: 6/20...  Training Step: 2384...  Training loss: 1.2727...  0.1269 sec/batch
Epoch: 6/20...  Training Step: 2385...  Training loss: 1.0896...  0.1365 sec/batch
Epoch: 6/20...  Training Step: 2386...  Training loss: 1.4431...  0.1241 sec/batch
Epoch: 6/20...  Training Step: 2387...  Training loss: 1.2559...  0.1264 sec/batch
Epoch: 6/20...  Training Step: 2388...  Training loss: 1.3154...  0.1207 sec/batch
Epoch: 6/20...  Training Step: 2389...  Training loss: 1.1405...  0.1225 sec/batch
Epoch: 6/20...  Training Step: 2390...  Training loss: 1.2699...  0.1225 sec/batch
Epoch: 6/20...  Training Step: 2391...  Training loss: 1.4147...  0.1253 sec/batch
Epoch: 6/20...  Training Step: 2392...  Training loss: 1.2288...  0.1194 sec/batch
Epoch: 6/20...  Training Step: 2393...  Training loss: 1.3025...  0.1268 sec/batch
Epoch: 6/20...  Training Step: 2394...  Training loss: 1.1623...  0.1224 sec/batch
Epoch: 6/20...  Training Step: 2395...  Training loss: 1.4626...  0.1214 sec/batch
Epoch: 6/20...  Training Step: 2396...  Training loss: 1.1778...  0.1247 sec/batch
Epoch: 6/20...  Training Step: 2397...  Training loss: 1.1353...  0.1226 sec/batch
Epoch: 6/20...  Training Step: 2398...  Training loss: 1.2984...  0.1285 sec/batch
Epoch: 6/20...  Training Step: 2399...  Training loss: 1.2978...  0.1203 sec/batch
Epoch: 6/20...  Training Step: 2400...  Training loss: 1.1298...  0.1199 sec/batch
Epoch: 6/20...  Training Step: 2401...  Training loss: 1.4595...  0.1219 sec/batch
Epoch: 6/20...  Training Step: 2402...  Training loss: 1.2626...  0.1208 sec/batch
Epoch: 6/20...  Training Step: 2403...  Training loss: 1.2375...  0.1228 sec/batch
Epoch: 6/20...  Training Step: 2404...  Training loss: 1.4104...  0.1207 sec/batch
Epoch: 6/20...  Training Step: 2405...  Training loss: 1.2687...  0.1203 sec/batch
Epoch: 6/20...  Training Step: 2406...  Training loss: 1.2620...  0.1222 sec/batch
Epoch: 6/20...  Training Step: 2407...  Training loss: 1.1840...  0.1194 sec/batch
Epoch: 6/20...  Training Step: 2408...  Training loss: 1.3223...  0.1195 sec/batch
Epoch: 6/20...  Training Step: 2409...  Training loss: 1.4587...  0.1214 sec/batch
Epoch: 6/20...  Training Step: 2410...  Training loss: 1.2219...  0.1207 sec/batch
Epoch: 6/20...  Training Step: 2411...  Training loss: 1.3596...  0.1224 sec/batch
Epoch: 6/20...  Training Step: 2412...  Training loss: 1.5247...  0.1234 sec/batch
Epoch: 6/20...  Training Step: 2413...  Training loss: 1.0828...  0.1264 sec/batch
Epoch: 6/20...  Training Step: 2414...  Training loss: 1.2971...  0.1184 sec/batch
Epoch: 6/20...  Training Step: 2415...  Training loss: 1.3302...  0.1219 sec/batch
Epoch: 6/20...  Training Step: 2416...  Training loss: 1.2316...  0.1236 sec/batch
Epoch: 6/20...  Training Step: 2417...  Training loss: 1.5149...  0.1278 sec/batch
Epoch: 6/20...  Training Step: 2418...  Training loss: 1.3349...  0.1216 sec/batch
Epoch: 6/20...  Training Step: 2419...  Training loss: 1.3305...  0.1198 sec/batch
Epoch: 6/20...  Training Step: 2420...  Training loss: 1.1834...  0.1214 sec/batch
Epoch: 6/20...  Training Step: 2421...  Training loss: 1.3429...  0.1213 sec/batch
Epoch: 6/20...  Training Step: 2422...  Training loss: 1.5014...  0.1210 sec/batch
Epoch: 6/20...  Training Step: 2423...  Training loss: 1.4785...  0.1211 sec/batch
Epoch: 6/20...  Training Step: 2424...  Training loss: 1.2383...  0.1231 sec/batch
Epoch: 6/20...  Training Step: 2425...  Training loss: 1.4800...  0.1201 sec/batch
Epoch: 6/20...  Training Step: 2426...  Training loss: 1.5512...  0.1184 sec/batch
Epoch: 6/20...  Training Step: 2427...  Training loss: 1.4062...  0.1216 sec/batch
Epoch: 6/20...  Training Step: 2428...  Training loss: 1.4362...  0.1213 sec/batch
Epoch: 6/20...  Training Step: 2429...  Training loss: 1.4424...  0.1228 sec/batch
Epoch: 6/20...  Training Step: 2430...  Training loss: 1.2157...  0.1208 sec/batch
Epoch: 6/20...  Training Step: 2431...  Training loss: 1.3395...  0.1193 sec/batch
Epoch: 6/20...  Training Step: 2432...  Training loss: 1.2810...  0.1222 sec/batch
Epoch: 6/20...  Training Step: 2433...  Training loss: 1.3109...  0.1221 sec/batch
Epoch: 6/20...  Training Step: 2434...  Training loss: 1.5148...  0.1247 sec/batch
Epoch: 6/20...  Training Step: 2435...  Training loss: 1.3909...  0.1228 sec/batch
Epoch: 6/20...  Training Step: 2436...  Training loss: 1.1984...  0.1239 sec/batch
Epoch: 6/20...  Training Step: 2437...  Training loss: 1.4642...  0.1215 sec/batch
Epoch: 6/20...  Training Step: 2438...  Training loss: 1.4342...  0.1207 sec/batch
Epoch: 6/20...  Training Step: 2439...  Training loss: 1.3017...  0.1224 sec/batch
Epoch: 6/20...  Training Step: 2440...  Training loss: 1.1203...  0.1204 sec/batch
Epoch: 6/20...  Training Step: 2441...  Training loss: 1.3539...  0.1222 sec/batch
Epoch: 6/20...  Training Step: 2442...  Training loss: 1.3851...  0.1246 sec/batch
Epoch: 6/20...  Training Step: 2443...  Training loss: 1.3193...  0.1214 sec/batch
Epoch: 6/20...  Training Step: 2444...  Training loss: 1.5250...  0.1198 sec/batch
Epoch: 6/20...  Training Step: 2445...  Training loss: 1.3380...  0.1219 sec/batch
Epoch: 6/20...  Training Step: 2446...  Training loss: 1.1748...  0.1196 sec/batch
Epoch: 6/20...  Training Step: 2447...  Training loss: 1.2455...  0.1241 sec/batch
Epoch: 6/20...  Training Step: 2448...  Training loss: 1.3589...  0.1228 sec/batch
Epoch: 6/20...  Training Step: 2449...  Training loss: 1.3846...  0.1230 sec/batch
Epoch: 6/20...  Training Step: 2450...  Training loss: 1.2957...  0.1212 sec/batch
Epoch: 6/20...  Training Step: 2451...  Training loss: 1.4721...  0.1262 sec/batch
Epoch: 6/20...  Training Step: 2452...  Training loss: 1.3400...  0.1339 sec/batch
Epoch: 6/20...  Training Step: 2453...  Training loss: 1.3076...  0.1217 sec/batch
Epoch: 6/20...  Training Step: 2454...  Training loss: 1.4066...  0.1233 sec/batch
Epoch: 6/20...  Training Step: 2455...  Training loss: 1.2339...  0.1223 sec/batch
Epoch: 6/20...  Training Step: 2456...  Training loss: 1.0549...  0.1212 sec/batch
Epoch: 6/20...  Training Step: 2457...  Training loss: 1.1928...  0.1267 sec/batch
Epoch: 6/20...  Training Step: 2458...  Training loss: 1.3019...  0.1396 sec/batch
Epoch: 6/20...  Training Step: 2459...  Training loss: 1.2046...  0.1308 sec/batch
Epoch: 6/20...  Training Step: 2460...  Training loss: 1.2998...  0.1185 sec/batch
Epoch: 6/20...  Training Step: 2461...  Training loss: 1.2088...  0.1215 sec/batch
Epoch: 6/20...  Training Step: 2462...  Training loss: 1.2659...  0.1196 sec/batch
Epoch: 6/20...  Training Step: 2463...  Training loss: 1.1465...  0.1244 sec/batch
Epoch: 6/20...  Training Step: 2464...  Training loss: 1.3121...  0.1207 sec/batch
Epoch: 6/20...  Training Step: 2465...  Training loss: 1.2393...  0.1201 sec/batch
Epoch: 6/20...  Training Step: 2466...  Training loss: 1.2197...  0.1207 sec/batch
Epoch: 6/20...  Training Step: 2467...  Training loss: 1.3598...  0.1209 sec/batch
Epoch: 6/20...  Training Step: 2468...  Training loss: 1.1963...  0.1239 sec/batch
Epoch: 6/20...  Training Step: 2469...  Training loss: 1.2691...  0.1231 sec/batch
Epoch: 6/20...  Training Step: 2470...  Training loss: 1.3611...  0.1230 sec/batch
Epoch: 6/20...  Training Step: 2471...  Training loss: 1.3896...  0.1249 sec/batch
Epoch: 6/20...  Training Step: 2472...  Training loss: 1.3983...  0.1246 sec/batch
Epoch: 6/20...  Training Step: 2473...  Training loss: 1.4176...  0.1214 sec/batch
Epoch: 6/20...  Training Step: 2474...  Training loss: 1.3656...  0.1234 sec/batch
Epoch: 6/20...  Training Step: 2475...  Training loss: 1.2919...  0.1249 sec/batch
Epoch: 6/20...  Training Step: 2476...  Training loss: 1.2340...  0.1223 sec/batch
Epoch: 6/20...  Training Step: 2477...  Training loss: 1.2620...  0.1194 sec/batch
Epoch: 6/20...  Training Step: 2478...  Training loss: 1.1818...  0.1215 sec/batch
Epoch: 6/20...  Training Step: 2479...  Training loss: 1.2103...  0.1232 sec/batch
Epoch: 6/20...  Training Step: 2480...  Training loss: 1.2561...  0.1249 sec/batch
Epoch: 6/20...  Training Step: 2481...  Training loss: 1.4544...  0.1183 sec/batch
Epoch: 6/20...  Training Step: 2482...  Training loss: 1.3303...  0.1190 sec/batch
Epoch: 6/20...  Training Step: 2483...  Training loss: 1.3413...  0.1236 sec/batch
Epoch: 6/20...  Training Step: 2484...  Training loss: 1.2096...  0.1217 sec/batch
Epoch: 6/20...  Training Step: 2485...  Training loss: 1.3481...  0.1235 sec/batch
Epoch: 6/20...  Training Step: 2486...  Training loss: 1.2442...  0.1239 sec/batch
Epoch: 6/20...  Training Step: 2487...  Training loss: 1.2041...  0.1234 sec/batch
Epoch: 6/20...  Training Step: 2488...  Training loss: 1.5734...  0.1204 sec/batch
Epoch: 6/20...  Training Step: 2489...  Training loss: 1.2203...  0.1228 sec/batch
Epoch: 6/20...  Training Step: 2490...  Training loss: 1.3087...  0.1199 sec/batch
Epoch: 6/20...  Training Step: 2491...  Training loss: 1.4068...  0.1152 sec/batch
Epoch: 6/20...  Training Step: 2492...  Training loss: 1.4106...  0.1196 sec/batch
Epoch: 6/20...  Training Step: 2493...  Training loss: 1.1642...  0.1206 sec/batch
Epoch: 6/20...  Training Step: 2494...  Training loss: 1.2704...  0.1223 sec/batch
Epoch: 6/20...  Training Step: 2495...  Training loss: 1.3372...  0.1202 sec/batch
Epoch: 6/20...  Training Step: 2496...  Training loss: 1.1189...  0.1197 sec/batch
Epoch: 6/20...  Training Step: 2497...  Training loss: 1.0800...  0.1204 sec/batch
Epoch: 6/20...  Training Step: 2498...  Training loss: 1.4059...  0.1214 sec/batch
Epoch: 6/20...  Training Step: 2499...  Training loss: 1.1671...  0.1212 sec/batch
Epoch: 6/20...  Training Step: 2500...  Training loss: 1.4284...  0.1183 sec/batch
Epoch: 6/20...  Training Step: 2501...  Training loss: 1.1956...  0.1215 sec/batch
Epoch: 6/20...  Training Step: 2502...  Training loss: 1.3876...  0.1196 sec/batch
Epoch: 6/20...  Training Step: 2503...  Training loss: 1.2569...  0.1173 sec/batch
Epoch: 6/20...  Training Step: 2504...  Training loss: 1.2678...  0.1219 sec/batch
Epoch: 6/20...  Training Step: 2505...  Training loss: 1.4282...  0.1211 sec/batch
Epoch: 6/20...  Training Step: 2506...  Training loss: 1.3326...  0.1230 sec/batch
Epoch: 6/20...  Training Step: 2507...  Training loss: 1.4063...  0.1292 sec/batch
Epoch: 6/20...  Training Step: 2508...  Training loss: 1.1530...  0.1300 sec/batch
Epoch: 6/20...  Training Step: 2509...  Training loss: 1.4763...  0.1298 sec/batch
Epoch: 6/20...  Training Step: 2510...  Training loss: 1.1846...  0.1266 sec/batch
Epoch: 6/20...  Training Step: 2511...  Training loss: 1.2322...  0.1219 sec/batch
Epoch: 6/20...  Training Step: 2512...  Training loss: 1.2969...  0.1256 sec/batch
Epoch: 6/20...  Training Step: 2513...  Training loss: 1.2889...  0.1277 sec/batch
Epoch: 6/20...  Training Step: 2514...  Training loss: 1.3047...  0.1170 sec/batch
Epoch: 6/20...  Training Step: 2515...  Training loss: 1.4125...  0.1248 sec/batch
Epoch: 6/20...  Training Step: 2516...  Training loss: 1.2930...  0.1305 sec/batch
Epoch: 6/20...  Training Step: 2517...  Training loss: 1.2578...  0.1283 sec/batch
Epoch: 6/20...  Training Step: 2518...  Training loss: 1.4139...  0.1359 sec/batch
Epoch: 6/20...  Training Step: 2519...  Training loss: 1.0733...  0.1243 sec/batch
Epoch: 6/20...  Training Step: 2520...  Training loss: 1.2980...  0.1225 sec/batch
Epoch: 6/20...  Training Step: 2521...  Training loss: 1.2366...  0.1234 sec/batch
Epoch: 6/20...  Training Step: 2522...  Training loss: 1.3186...  0.1210 sec/batch
Epoch: 6/20...  Training Step: 2523...  Training loss: 1.2487...  0.1212 sec/batch
Epoch: 6/20...  Training Step: 2524...  Training loss: 1.4179...  0.1236 sec/batch
Epoch: 6/20...  Training Step: 2525...  Training loss: 1.2025...  0.1221 sec/batch
Epoch: 6/20...  Training Step: 2526...  Training loss: 1.2384...  0.1228 sec/batch
Epoch: 6/20...  Training Step: 2527...  Training loss: 1.1733...  0.1234 sec/batch
Epoch: 6/20...  Training Step: 2528...  Training loss: 1.4089...  0.1582 sec/batch
Epoch: 6/20...  Training Step: 2529...  Training loss: 1.3204...  0.1262 sec/batch
Epoch: 6/20...  Training Step: 2530...  Training loss: 1.0509...  0.1227 sec/batch
Epoch: 6/20...  Training Step: 2531...  Training loss: 1.0744...  0.1190 sec/batch
Epoch: 6/20...  Training Step: 2532...  Training loss: 1.3947...  0.1246 sec/batch
Epoch: 6/20...  Training Step: 2533...  Training loss: 1.4841...  0.1214 sec/batch
Epoch: 6/20...  Training Step: 2534...  Training loss: 1.2203...  0.1203 sec/batch
Epoch: 6/20...  Training Step: 2535...  Training loss: 1.5004...  0.1235 sec/batch
Epoch: 6/20...  Training Step: 2536...  Training loss: 1.2421...  0.1228 sec/batch
Epoch: 6/20...  Training Step: 2537...  Training loss: 1.3332...  0.1194 sec/batch
Epoch: 6/20...  Training Step: 2538...  Training loss: 1.3127...  0.1224 sec/batch
Epoch: 6/20...  Training Step: 2539...  Training loss: 1.4510...  0.1210 sec/batch
Epoch: 6/20...  Training Step: 2540...  Training loss: 1.3265...  0.1195 sec/batch
Epoch: 6/20...  Training Step: 2541...  Training loss: 1.2189...  0.1242 sec/batch
Epoch: 6/20...  Training Step: 2542...  Training loss: 1.4283...  0.1219 sec/batch
Epoch: 6/20...  Training Step: 2543...  Training loss: 1.4004...  0.1185 sec/batch
Epoch: 6/20...  Training Step: 2544...  Training loss: 1.4495...  0.1283 sec/batch
Epoch: 6/20...  Training Step: 2545...  Training loss: 1.3090...  0.1344 sec/batch
Epoch: 6/20...  Training Step: 2546...  Training loss: 1.4558...  0.1292 sec/batch
Epoch: 6/20...  Training Step: 2547...  Training loss: 1.5184...  0.1283 sec/batch
Epoch: 6/20...  Training Step: 2548...  Training loss: 1.1978...  0.1278 sec/batch
Epoch: 6/20...  Training Step: 2549...  Training loss: 1.3501...  0.1223 sec/batch
Epoch: 6/20...  Training Step: 2550...  Training loss: 1.3034...  0.1223 sec/batch
Epoch: 6/20...  Training Step: 2551...  Training loss: 1.2627...  0.1209 sec/batch
Epoch: 6/20...  Training Step: 2552...  Training loss: 1.2194...  0.1185 sec/batch
Epoch: 6/20...  Training Step: 2553...  Training loss: 1.5413...  0.1225 sec/batch
Epoch: 6/20...  Training Step: 2554...  Training loss: 1.2447...  0.1181 sec/batch
Epoch: 6/20...  Training Step: 2555...  Training loss: 1.5604...  0.1225 sec/batch
Epoch: 6/20...  Training Step: 2556...  Training loss: 1.2957...  0.1220 sec/batch
Epoch: 6/20...  Training Step: 2557...  Training loss: 1.5167...  0.1230 sec/batch
Epoch: 6/20...  Training Step: 2558...  Training loss: 1.1945...  0.1204 sec/batch
Epoch: 6/20...  Training Step: 2559...  Training loss: 1.4043...  0.1209 sec/batch
Epoch: 6/20...  Training Step: 2560...  Training loss: 1.5053...  0.1234 sec/batch
Epoch: 6/20...  Training Step: 2561...  Training loss: 1.2642...  0.1262 sec/batch
Epoch: 6/20...  Training Step: 2562...  Training loss: 1.2124...  0.1272 sec/batch
Epoch: 6/20...  Training Step: 2563...  Training loss: 1.3084...  0.1257 sec/batch
Epoch: 6/20...  Training Step: 2564...  Training loss: 1.3336...  0.1215 sec/batch
Epoch: 6/20...  Training Step: 2565...  Training loss: 1.3394...  0.1226 sec/batch
Epoch: 6/20...  Training Step: 2566...  Training loss: 1.1771...  0.1193 sec/batch
Epoch: 6/20...  Training Step: 2567...  Training loss: 1.2187...  0.1249 sec/batch
Epoch: 6/20...  Training Step: 2568...  Training loss: 1.4689...  0.1232 sec/batch
Epoch: 6/20...  Training Step: 2569...  Training loss: 1.2879...  0.1247 sec/batch
Epoch: 6/20...  Training Step: 2570...  Training loss: 1.2866...  0.1201 sec/batch
Epoch: 6/20...  Training Step: 2571...  Training loss: 1.4767...  0.1183 sec/batch
Epoch: 6/20...  Training Step: 2572...  Training loss: 1.3109...  0.1211 sec/batch
Epoch: 6/20...  Training Step: 2573...  Training loss: 1.2209...  0.1252 sec/batch
Epoch: 6/20...  Training Step: 2574...  Training loss: 1.2947...  0.1252 sec/batch
Epoch: 6/20...  Training Step: 2575...  Training loss: 1.2359...  0.1206 sec/batch
Epoch: 6/20...  Training Step: 2576...  Training loss: 1.2758...  0.1251 sec/batch
Epoch: 6/20...  Training Step: 2577...  Training loss: 1.3215...  0.1220 sec/batch
Epoch: 6/20...  Training Step: 2578...  Training loss: 1.2118...  0.1233 sec/batch
Epoch: 6/20...  Training Step: 2579...  Training loss: 1.1892...  0.1229 sec/batch
Epoch: 6/20...  Training Step: 2580...  Training loss: 1.2476...  0.1219 sec/batch
Epoch: 6/20...  Training Step: 2581...  Training loss: 1.3397...  0.1201 sec/batch
Epoch: 6/20...  Training Step: 2582...  Training loss: 1.3370...  0.1207 sec/batch
Epoch: 6/20...  Training Step: 2583...  Training loss: 1.2363...  0.1184 sec/batch
Epoch: 6/20...  Training Step: 2584...  Training loss: 1.4695...  0.1232 sec/batch
Epoch: 6/20...  Training Step: 2585...  Training loss: 1.4374...  0.1187 sec/batch
Epoch: 6/20...  Training Step: 2586...  Training loss: 1.3395...  0.1225 sec/batch
Epoch: 6/20...  Training Step: 2587...  Training loss: 1.5199...  0.1237 sec/batch
Epoch: 6/20...  Training Step: 2588...  Training loss: 1.4206...  0.1168 sec/batch
Epoch: 6/20...  Training Step: 2589...  Training loss: 1.3456...  0.1227 sec/batch
Epoch: 6/20...  Training Step: 2590...  Training loss: 1.5636...  0.1204 sec/batch
Epoch: 6/20...  Training Step: 2591...  Training loss: 1.4367...  0.1188 sec/batch
Epoch: 6/20...  Training Step: 2592...  Training loss: 1.5237...  0.1191 sec/batch
Epoch: 6/20...  Training Step: 2593...  Training loss: 1.4519...  0.1200 sec/batch
Epoch: 6/20...  Training Step: 2594...  Training loss: 1.3393...  0.1347 sec/batch
Epoch: 6/20...  Training Step: 2595...  Training loss: 1.3891...  0.1256 sec/batch
Epoch: 6/20...  Training Step: 2596...  Training loss: 1.3778...  0.1285 sec/batch
Epoch: 6/20...  Training Step: 2597...  Training loss: 1.3558...  0.1312 sec/batch
Epoch: 6/20...  Training Step: 2598...  Training loss: 1.5473...  0.1301 sec/batch
Epoch: 6/20...  Training Step: 2599...  Training loss: 1.2889...  0.1273 sec/batch
Epoch: 6/20...  Training Step: 2600...  Training loss: 1.2634...  0.1215 sec/batch
Epoch: 6/20...  Training Step: 2601...  Training loss: 1.2807...  0.1258 sec/batch
Epoch: 6/20...  Training Step: 2602...  Training loss: 1.2044...  0.1222 sec/batch
Epoch: 6/20...  Training Step: 2603...  Training loss: 1.2936...  0.1293 sec/batch
Epoch: 6/20...  Training Step: 2604...  Training loss: 1.4126...  0.1272 sec/batch
Epoch: 6/20...  Training Step: 2605...  Training loss: 1.2088...  0.1256 sec/batch
Epoch: 6/20...  Training Step: 2606...  Training loss: 1.3395...  0.1251 sec/batch
Epoch: 6/20...  Training Step: 2607...  Training loss: 1.3479...  0.1249 sec/batch
Epoch: 6/20...  Training Step: 2608...  Training loss: 1.3363...  0.1241 sec/batch
Epoch: 6/20...  Training Step: 2609...  Training loss: 1.3678...  0.1269 sec/batch
Epoch: 6/20...  Training Step: 2610...  Training loss: 1.2880...  0.1178 sec/batch
Epoch: 6/20...  Training Step: 2611...  Training loss: 1.2258...  0.1240 sec/batch
Epoch: 6/20...  Training Step: 2612...  Training loss: 1.2708...  0.1263 sec/batch
Epoch: 6/20...  Training Step: 2613...  Training loss: 1.1810...  0.1246 sec/batch
Epoch: 6/20...  Training Step: 2614...  Training loss: 1.3215...  0.1242 sec/batch
Epoch: 6/20...  Training Step: 2615...  Training loss: 1.2815...  0.1235 sec/batch
Epoch: 6/20...  Training Step: 2616...  Training loss: 1.6054...  0.1163 sec/batch
Epoch: 6/20...  Training Step: 2617...  Training loss: 1.2212...  0.1251 sec/batch
Epoch: 6/20...  Training Step: 2618...  Training loss: 1.2007...  0.1281 sec/batch
Epoch: 6/20...  Training Step: 2619...  Training loss: 1.2158...  0.1251 sec/batch
Epoch: 6/20...  Training Step: 2620...  Training loss: 1.1755...  0.1238 sec/batch
Epoch: 6/20...  Training Step: 2621...  Training loss: 1.3614...  0.1213 sec/batch
Epoch: 6/20...  Training Step: 2622...  Training loss: 1.2741...  0.1230 sec/batch
Epoch: 6/20...  Training Step: 2623...  Training loss: 1.0992...  0.1220 sec/batch
Epoch: 6/20...  Training Step: 2624...  Training loss: 1.4738...  0.1251 sec/batch
Epoch: 6/20...  Training Step: 2625...  Training loss: 1.2304...  0.1324 sec/batch
Epoch: 6/20...  Training Step: 2626...  Training loss: 1.3359...  0.1225 sec/batch
Epoch: 6/20...  Training Step: 2627...  Training loss: 1.3527...  0.1223 sec/batch
Epoch: 6/20...  Training Step: 2628...  Training loss: 1.5696...  0.1153 sec/batch
Epoch: 6/20...  Training Step: 2629...  Training loss: 1.3618...  0.1155 sec/batch
Epoch: 6/20...  Training Step: 2630...  Training loss: 1.3211...  0.1222 sec/batch
Epoch: 6/20...  Training Step: 2631...  Training loss: 1.3071...  0.1198 sec/batch
Epoch: 6/20...  Training Step: 2632...  Training loss: 1.2305...  0.1288 sec/batch
Epoch: 6/20...  Training Step: 2633...  Training loss: 1.2516...  0.1230 sec/batch
Epoch: 6/20...  Training Step: 2634...  Training loss: 1.3357...  0.1215 sec/batch
Epoch: 6/20...  Training Step: 2635...  Training loss: 1.0482...  0.1205 sec/batch
Epoch: 6/20...  Training Step: 2636...  Training loss: 1.1613...  0.1237 sec/batch
Epoch: 6/20...  Training Step: 2637...  Training loss: 1.0563...  0.1333 sec/batch
Epoch: 6/20...  Training Step: 2638...  Training loss: 1.3082...  0.1238 sec/batch
Epoch: 6/20...  Training Step: 2639...  Training loss: 1.2320...  0.1343 sec/batch
Epoch: 6/20...  Training Step: 2640...  Training loss: 1.2189...  0.1269 sec/batch
Epoch: 6/20...  Training Step: 2641...  Training loss: 1.1423...  0.1289 sec/batch
Epoch: 6/20...  Training Step: 2642...  Training loss: 1.4718...  0.1234 sec/batch
Epoch: 6/20...  Training Step: 2643...  Training loss: 1.2052...  0.1275 sec/batch
Epoch: 6/20...  Training Step: 2644...  Training loss: 1.1737...  0.1191 sec/batch
Epoch: 6/20...  Training Step: 2645...  Training loss: 1.1290...  0.1218 sec/batch
Epoch: 6/20...  Training Step: 2646...  Training loss: 1.1563...  0.1336 sec/batch
Epoch: 6/20...  Training Step: 2647...  Training loss: 1.2487...  0.1340 sec/batch
Epoch: 6/20...  Training Step: 2648...  Training loss: 1.2252...  0.1251 sec/batch
Epoch: 6/20...  Training Step: 2649...  Training loss: 1.2746...  0.1229 sec/batch
Epoch: 6/20...  Training Step: 2650...  Training loss: 1.1701...  0.1234 sec/batch
Epoch: 6/20...  Training Step: 2651...  Training loss: 1.2769...  0.1206 sec/batch
Epoch: 6/20...  Training Step: 2652...  Training loss: 1.2191...  0.1215 sec/batch
Epoch: 6/20...  Training Step: 2653...  Training loss: 1.1704...  0.1262 sec/batch
Epoch: 6/20...  Training Step: 2654...  Training loss: 1.2015...  0.1232 sec/batch
Epoch: 6/20...  Training Step: 2655...  Training loss: 1.4035...  0.1229 sec/batch
Epoch: 6/20...  Training Step: 2656...  Training loss: 1.1808...  0.1194 sec/batch
Epoch: 6/20...  Training Step: 2657...  Training loss: 1.2434...  0.1213 sec/batch
Epoch: 6/20...  Training Step: 2658...  Training loss: 1.1943...  0.1207 sec/batch
Epoch: 6/20...  Training Step: 2659...  Training loss: 1.3294...  0.1212 sec/batch
Epoch: 6/20...  Training Step: 2660...  Training loss: 1.1892...  0.1229 sec/batch
Epoch: 6/20...  Training Step: 2661...  Training loss: 1.1274...  0.1214 sec/batch
Epoch: 6/20...  Training Step: 2662...  Training loss: 1.2657...  0.1130 sec/batch
Epoch: 6/20...  Training Step: 2663...  Training loss: 1.0259...  0.1222 sec/batch
Epoch: 6/20...  Training Step: 2664...  Training loss: 1.3320...  0.1219 sec/batch
Epoch: 6/20...  Training Step: 2665...  Training loss: 1.1805...  0.1227 sec/batch
Epoch: 6/20...  Training Step: 2666...  Training loss: 1.0993...  0.1228 sec/batch
Epoch: 6/20...  Training Step: 2667...  Training loss: 1.0682...  0.1194 sec/batch
Epoch: 6/20...  Training Step: 2668...  Training loss: 1.4938...  0.1202 sec/batch
Epoch: 6/20...  Training Step: 2669...  Training loss: 1.2091...  0.1202 sec/batch
Epoch: 6/20...  Training Step: 2670...  Training loss: 1.2793...  0.1198 sec/batch
Epoch: 6/20...  Training Step: 2671...  Training loss: 1.2122...  0.1252 sec/batch
Epoch: 6/20...  Training Step: 2672...  Training loss: 1.1276...  0.1234 sec/batch
Epoch: 6/20...  Training Step: 2673...  Training loss: 1.0146...  0.1189 sec/batch
Epoch: 6/20...  Training Step: 2674...  Training loss: 0.9861...  0.1251 sec/batch
Epoch: 6/20...  Training Step: 2675...  Training loss: 1.2772...  0.1225 sec/batch
Epoch: 6/20...  Training Step: 2676...  Training loss: 1.1507...  0.1239 sec/batch
Epoch: 6/20...  Training Step: 2677...  Training loss: 1.1612...  0.1197 sec/batch
Epoch: 6/20...  Training Step: 2678...  Training loss: 1.3257...  0.1222 sec/batch
Epoch: 6/20...  Training Step: 2679...  Training loss: 1.5169...  0.1244 sec/batch
Epoch: 6/20...  Training Step: 2680...  Training loss: 1.0488...  0.1287 sec/batch
Epoch: 6/20...  Training Step: 2681...  Training loss: 1.5007...  0.1309 sec/batch
Epoch: 6/20...  Training Step: 2682...  Training loss: 1.1452...  0.1189 sec/batch
Epoch: 6/20...  Training Step: 2683...  Training loss: 1.1118...  0.1197 sec/batch
Epoch: 6/20...  Training Step: 2684...  Training loss: 1.1954...  0.1199 sec/batch
Epoch: 6/20...  Training Step: 2685...  Training loss: 1.1546...  0.1231 sec/batch
Epoch: 6/20...  Training Step: 2686...  Training loss: 1.3192...  0.1176 sec/batch
Epoch: 6/20...  Training Step: 2687...  Training loss: 1.2011...  0.1250 sec/batch
Epoch: 6/20...  Training Step: 2688...  Training loss: 1.4733...  0.1239 sec/batch
Epoch: 6/20...  Training Step: 2689...  Training loss: 1.2262...  0.1239 sec/batch
Epoch: 6/20...  Training Step: 2690...  Training loss: 1.1838...  0.1271 sec/batch
Epoch: 6/20...  Training Step: 2691...  Training loss: 1.1633...  0.1272 sec/batch
Epoch: 6/20...  Training Step: 2692...  Training loss: 1.2878...  0.1207 sec/batch
Epoch: 6/20...  Training Step: 2693...  Training loss: 1.2619...  0.1254 sec/batch
Epoch: 6/20...  Training Step: 2694...  Training loss: 1.3523...  0.1234 sec/batch
Epoch: 6/20...  Training Step: 2695...  Training loss: 1.2359...  0.1233 sec/batch
Epoch: 6/20...  Training Step: 2696...  Training loss: 1.2706...  0.1244 sec/batch
Epoch: 6/20...  Training Step: 2697...  Training loss: 1.3521...  0.1233 sec/batch
Epoch: 6/20...  Training Step: 2698...  Training loss: 1.3451...  0.1238 sec/batch
Epoch: 6/20...  Training Step: 2699...  Training loss: 1.1686...  0.1206 sec/batch
Epoch: 6/20...  Training Step: 2700...  Training loss: 1.3626...  0.1224 sec/batch
Epoch: 6/20...  Training Step: 2701...  Training loss: 1.1392...  0.1194 sec/batch
Epoch: 6/20...  Training Step: 2702...  Training loss: 1.1876...  0.1217 sec/batch
Epoch: 6/20...  Training Step: 2703...  Training loss: 1.2004...  0.1248 sec/batch
Epoch: 6/20...  Training Step: 2704...  Training loss: 1.2247...  0.1201 sec/batch
Epoch: 6/20...  Training Step: 2705...  Training loss: 1.1215...  0.1227 sec/batch
Epoch: 6/20...  Training Step: 2706...  Training loss: 1.2892...  0.1206 sec/batch
Epoch: 6/20...  Training Step: 2707...  Training loss: 1.2517...  0.1207 sec/batch
Epoch: 6/20...  Training Step: 2708...  Training loss: 1.0285...  0.1183 sec/batch
Epoch: 6/20...  Training Step: 2709...  Training loss: 1.0774...  0.1257 sec/batch
Epoch: 6/20...  Training Step: 2710...  Training loss: 1.2098...  0.1220 sec/batch
Epoch: 6/20...  Training Step: 2711...  Training loss: 1.0020...  0.1178 sec/batch
Epoch: 6/20...  Training Step: 2712...  Training loss: 1.0795...  0.1211 sec/batch
Epoch: 6/20...  Training Step: 2713...  Training loss: 1.0765...  0.1208 sec/batch
Epoch: 6/20...  Training Step: 2714...  Training loss: 1.2854...  0.1189 sec/batch
Epoch: 6/20...  Training Step: 2715...  Training loss: 1.1075...  0.1223 sec/batch
Epoch: 6/20...  Training Step: 2716...  Training loss: 1.3624...  0.1243 sec/batch
Epoch: 6/20...  Training Step: 2717...  Training loss: 1.1206...  0.1196 sec/batch
Epoch: 6/20...  Training Step: 2718...  Training loss: 1.1915...  0.1262 sec/batch
Epoch: 6/20...  Training Step: 2719...  Training loss: 1.1577...  0.1227 sec/batch
Epoch: 6/20...  Training Step: 2720...  Training loss: 1.1904...  0.1211 sec/batch
Epoch: 6/20...  Training Step: 2721...  Training loss: 1.1397...  0.1194 sec/batch
Epoch: 6/20...  Training Step: 2722...  Training loss: 1.1919...  0.1224 sec/batch
Epoch: 6/20...  Training Step: 2723...  Training loss: 1.1319...  0.1285 sec/batch
Epoch: 6/20...  Training Step: 2724...  Training loss: 1.3126...  0.1211 sec/batch
Epoch: 6/20...  Training Step: 2725...  Training loss: 1.2256...  0.1232 sec/batch
Epoch: 6/20...  Training Step: 2726...  Training loss: 1.4587...  0.1203 sec/batch
Epoch: 6/20...  Training Step: 2727...  Training loss: 1.2153...  0.1263 sec/batch
Epoch: 6/20...  Training Step: 2728...  Training loss: 1.4818...  0.1203 sec/batch
Epoch: 6/20...  Training Step: 2729...  Training loss: 1.3395...  0.1196 sec/batch
Epoch: 6/20...  Training Step: 2730...  Training loss: 1.2651...  0.1245 sec/batch
Epoch: 6/20...  Training Step: 2731...  Training loss: 1.1330...  0.1297 sec/batch
Epoch: 6/20...  Training Step: 2732...  Training loss: 1.3653...  0.1308 sec/batch
Epoch: 6/20...  Training Step: 2733...  Training loss: 1.3198...  0.1308 sec/batch
Epoch: 6/20...  Training Step: 2734...  Training loss: 1.3021...  0.1338 sec/batch
Epoch: 6/20...  Training Step: 2735...  Training loss: 1.5001...  0.1254 sec/batch
Epoch: 6/20...  Training Step: 2736...  Training loss: 1.4025...  0.1309 sec/batch
Epoch: 6/20...  Training Step: 2737...  Training loss: 1.3685...  0.1307 sec/batch
Epoch: 6/20...  Training Step: 2738...  Training loss: 1.2107...  0.1322 sec/batch
Epoch: 6/20...  Training Step: 2739...  Training loss: 1.2744...  0.1286 sec/batch
Epoch: 6/20...  Training Step: 2740...  Training loss: 1.0829...  0.1244 sec/batch
Epoch: 6/20...  Training Step: 2741...  Training loss: 1.4387...  0.1222 sec/batch
Epoch: 6/20...  Training Step: 2742...  Training loss: 1.2008...  0.1229 sec/batch
Epoch: 6/20...  Training Step: 2743...  Training loss: 1.3727...  0.1290 sec/batch
Epoch: 6/20...  Training Step: 2744...  Training loss: 1.4844...  0.1224 sec/batch
Epoch: 6/20...  Training Step: 2745...  Training loss: 1.2374...  0.1259 sec/batch
Epoch: 6/20...  Training Step: 2746...  Training loss: 1.2671...  0.1257 sec/batch
Epoch: 6/20...  Training Step: 2747...  Training loss: 1.3834...  0.1239 sec/batch
Epoch: 6/20...  Training Step: 2748...  Training loss: 1.3830...  0.1199 sec/batch
Epoch: 6/20...  Training Step: 2749...  Training loss: 1.1991...  0.1228 sec/batch
Epoch: 6/20...  Training Step: 2750...  Training loss: 1.2974...  0.1214 sec/batch
Epoch: 6/20...  Training Step: 2751...  Training loss: 1.5204...  0.1175 sec/batch
Epoch: 6/20...  Training Step: 2752...  Training loss: 1.2199...  0.1257 sec/batch
Epoch: 6/20...  Training Step: 2753...  Training loss: 1.6371...  0.1263 sec/batch
Epoch: 6/20...  Training Step: 2754...  Training loss: 1.4243...  0.1258 sec/batch
Epoch: 6/20...  Training Step: 2755...  Training loss: 1.2148...  0.1185 sec/batch
Epoch: 6/20...  Training Step: 2756...  Training loss: 1.3230...  0.1256 sec/batch
Epoch: 6/20...  Training Step: 2757...  Training loss: 1.2326...  0.1266 sec/batch
Epoch: 6/20...  Training Step: 2758...  Training loss: 1.4747...  0.1204 sec/batch
Epoch: 6/20...  Training Step: 2759...  Training loss: 1.4895...  0.1261 sec/batch
Epoch: 6/20...  Training Step: 2760...  Training loss: 1.6230...  0.1221 sec/batch
Epoch: 6/20...  Training Step: 2761...  Training loss: 1.2875...  0.1209 sec/batch
Epoch: 6/20...  Training Step: 2762...  Training loss: 1.2346...  0.1221 sec/batch
Epoch: 6/20...  Training Step: 2763...  Training loss: 1.3268...  0.1191 sec/batch
Epoch: 6/20...  Training Step: 2764...  Training loss: 1.2308...  0.1207 sec/batch
Epoch: 6/20...  Training Step: 2765...  Training loss: 1.3149...  0.1254 sec/batch
Epoch: 6/20...  Training Step: 2766...  Training loss: 1.1606...  0.1224 sec/batch
Epoch: 6/20...  Training Step: 2767...  Training loss: 1.2550...  0.1219 sec/batch
Epoch: 6/20...  Training Step: 2768...  Training loss: 1.1248...  0.1224 sec/batch
Epoch: 6/20...  Training Step: 2769...  Training loss: 1.2580...  0.1248 sec/batch
Epoch: 6/20...  Training Step: 2770...  Training loss: 1.3312...  0.1228 sec/batch
Epoch: 6/20...  Training Step: 2771...  Training loss: 1.3274...  0.1232 sec/batch
Epoch: 6/20...  Training Step: 2772...  Training loss: 1.2951...  0.1216 sec/batch
Epoch: 6/20...  Training Step: 2773...  Training loss: 1.1720...  0.1265 sec/batch
Epoch: 6/20...  Training Step: 2774...  Training loss: 1.5893...  0.1210 sec/batch
Epoch: 6/20...  Training Step: 2775...  Training loss: 1.4110...  0.1226 sec/batch
Epoch: 6/20...  Training Step: 2776...  Training loss: 1.1963...  0.1227 sec/batch
Epoch: 6/20...  Training Step: 2777...  Training loss: 1.1328...  0.1239 sec/batch
Epoch: 6/20...  Training Step: 2778...  Training loss: 1.0506...  0.1225 sec/batch
Epoch: 6/20...  Training Step: 2779...  Training loss: 1.0907...  0.1211 sec/batch
Epoch: 6/20...  Training Step: 2780...  Training loss: 1.2546...  0.1198 sec/batch
Epoch: 6/20...  Training Step: 2781...  Training loss: 1.4092...  0.1238 sec/batch
Epoch: 6/20...  Training Step: 2782...  Training loss: 1.1441...  0.1231 sec/batch
Epoch: 6/20...  Training Step: 2783...  Training loss: 1.2670...  0.1222 sec/batch
Epoch: 6/20...  Training Step: 2784...  Training loss: 1.2103...  0.1212 sec/batch
Epoch: 7/20...  Training Step: 2785...  Training loss: 1.6022...  0.1177 sec/batch
Epoch: 7/20...  Training Step: 2786...  Training loss: 1.2978...  0.1223 sec/batch
Epoch: 7/20...  Training Step: 2787...  Training loss: 1.2757...  0.1219 sec/batch
Epoch: 7/20...  Training Step: 2788...  Training loss: 1.2762...  0.1234 sec/batch
Epoch: 7/20...  Training Step: 2789...  Training loss: 1.3186...  0.1196 sec/batch
Epoch: 7/20...  Training Step: 2790...  Training loss: 1.1030...  0.1203 sec/batch
Epoch: 7/20...  Training Step: 2791...  Training loss: 1.4062...  0.1198 sec/batch
Epoch: 7/20...  Training Step: 2792...  Training loss: 1.2127...  0.1219 sec/batch
Epoch: 7/20...  Training Step: 2793...  Training loss: 1.0612...  0.1180 sec/batch
Epoch: 7/20...  Training Step: 2794...  Training loss: 1.3288...  0.1236 sec/batch
Epoch: 7/20...  Training Step: 2795...  Training loss: 1.1410...  0.1246 sec/batch
Epoch: 7/20...  Training Step: 2796...  Training loss: 1.0711...  0.1205 sec/batch
Epoch: 7/20...  Training Step: 2797...  Training loss: 1.4178...  0.1219 sec/batch
Epoch: 7/20...  Training Step: 2798...  Training loss: 1.0990...  0.1226 sec/batch
Epoch: 7/20...  Training Step: 2799...  Training loss: 1.2992...  0.1213 sec/batch
Epoch: 7/20...  Training Step: 2800...  Training loss: 1.3827...  0.1229 sec/batch
Epoch: 7/20...  Training Step: 2801...  Training loss: 1.2046...  0.1240 sec/batch
Epoch: 7/20...  Training Step: 2802...  Training loss: 1.0667...  0.1220 sec/batch
Epoch: 7/20...  Training Step: 2803...  Training loss: 1.2391...  0.1273 sec/batch
Epoch: 7/20...  Training Step: 2804...  Training loss: 1.0837...  0.1251 sec/batch
Epoch: 7/20...  Training Step: 2805...  Training loss: 1.3235...  0.1246 sec/batch
Epoch: 7/20...  Training Step: 2806...  Training loss: 1.1864...  0.1203 sec/batch
Epoch: 7/20...  Training Step: 2807...  Training loss: 1.4851...  0.1232 sec/batch
Epoch: 7/20...  Training Step: 2808...  Training loss: 1.1897...  0.1209 sec/batch
Epoch: 7/20...  Training Step: 2809...  Training loss: 1.1682...  0.1201 sec/batch
Epoch: 7/20...  Training Step: 2810...  Training loss: 1.2310...  0.1219 sec/batch
Epoch: 7/20...  Training Step: 2811...  Training loss: 1.3416...  0.1236 sec/batch
Epoch: 7/20...  Training Step: 2812...  Training loss: 1.0823...  0.1236 sec/batch
Epoch: 7/20...  Training Step: 2813...  Training loss: 1.1574...  0.1211 sec/batch
Epoch: 7/20...  Training Step: 2814...  Training loss: 1.2997...  0.1232 sec/batch
Epoch: 7/20...  Training Step: 2815...  Training loss: 1.0314...  0.1267 sec/batch
Epoch: 7/20...  Training Step: 2816...  Training loss: 1.1368...  0.1215 sec/batch
Epoch: 7/20...  Training Step: 2817...  Training loss: 1.0489...  0.1185 sec/batch
Epoch: 7/20...  Training Step: 2818...  Training loss: 1.1406...  0.1197 sec/batch
Epoch: 7/20...  Training Step: 2819...  Training loss: 1.1123...  0.1178 sec/batch
Epoch: 7/20...  Training Step: 2820...  Training loss: 1.1562...  0.1204 sec/batch
Epoch: 7/20...  Training Step: 2821...  Training loss: 1.3419...  0.1194 sec/batch
Epoch: 7/20...  Training Step: 2822...  Training loss: 1.1514...  0.1193 sec/batch
Epoch: 7/20...  Training Step: 2823...  Training loss: 1.1650...  0.1216 sec/batch
Epoch: 7/20...  Training Step: 2824...  Training loss: 1.4537...  0.1186 sec/batch
Epoch: 7/20...  Training Step: 2825...  Training loss: 1.2162...  0.1238 sec/batch
Epoch: 7/20...  Training Step: 2826...  Training loss: 1.0869...  0.1190 sec/batch
Epoch: 7/20...  Training Step: 2827...  Training loss: 1.3064...  0.1227 sec/batch
Epoch: 7/20...  Training Step: 2828...  Training loss: 1.0138...  0.1203 sec/batch
Epoch: 7/20...  Training Step: 2829...  Training loss: 1.2151...  0.1217 sec/batch
Epoch: 7/20...  Training Step: 2830...  Training loss: 1.1558...  0.1201 sec/batch
Epoch: 7/20...  Training Step: 2831...  Training loss: 1.2438...  0.1206 sec/batch
Epoch: 7/20...  Training Step: 2832...  Training loss: 1.2028...  0.1242 sec/batch
Epoch: 7/20...  Training Step: 2833...  Training loss: 1.1428...  0.1235 sec/batch
Epoch: 7/20...  Training Step: 2834...  Training loss: 1.2648...  0.1203 sec/batch
Epoch: 7/20...  Training Step: 2835...  Training loss: 1.2467...  0.1212 sec/batch
Epoch: 7/20...  Training Step: 2836...  Training loss: 1.2900...  0.1243 sec/batch
Epoch: 7/20...  Training Step: 2837...  Training loss: 1.2872...  0.1210 sec/batch
Epoch: 7/20...  Training Step: 2838...  Training loss: 1.1544...  0.1192 sec/batch
Epoch: 7/20...  Training Step: 2839...  Training loss: 1.1249...  0.1200 sec/batch
Epoch: 7/20...  Training Step: 2840...  Training loss: 1.0838...  0.1202 sec/batch
Epoch: 7/20...  Training Step: 2841...  Training loss: 1.2728...  0.1338 sec/batch
Epoch: 7/20...  Training Step: 2842...  Training loss: 1.3078...  0.1325 sec/batch
Epoch: 7/20...  Training Step: 2843...  Training loss: 1.0610...  0.1209 sec/batch
Epoch: 7/20...  Training Step: 2844...  Training loss: 1.0928...  0.1209 sec/batch
Epoch: 7/20...  Training Step: 2845...  Training loss: 1.1602...  0.1226 sec/batch
Epoch: 7/20...  Training Step: 2846...  Training loss: 1.3309...  0.1219 sec/batch
Epoch: 7/20...  Training Step: 2847...  Training loss: 1.0787...  0.1203 sec/batch
Epoch: 7/20...  Training Step: 2848...  Training loss: 1.1858...  0.1213 sec/batch
Epoch: 7/20...  Training Step: 2849...  Training loss: 1.0879...  0.1207 sec/batch
Epoch: 7/20...  Training Step: 2850...  Training loss: 1.3595...  0.1214 sec/batch
Epoch: 7/20...  Training Step: 2851...  Training loss: 1.1286...  0.1198 sec/batch
Epoch: 7/20...  Training Step: 2852...  Training loss: 1.2659...  0.1208 sec/batch
Epoch: 7/20...  Training Step: 2853...  Training loss: 1.2338...  0.1182 sec/batch
Epoch: 7/20...  Training Step: 2854...  Training loss: 1.2420...  0.1199 sec/batch
Epoch: 7/20...  Training Step: 2855...  Training loss: 1.3005...  0.1250 sec/batch
Epoch: 7/20...  Training Step: 2856...  Training loss: 1.1923...  0.1210 sec/batch
Epoch: 7/20...  Training Step: 2857...  Training loss: 1.1712...  0.1237 sec/batch
Epoch: 7/20...  Training Step: 2858...  Training loss: 1.0615...  0.1242 sec/batch
Epoch: 7/20...  Training Step: 2859...  Training loss: 1.3661...  0.1226 sec/batch
Epoch: 7/20...  Training Step: 2860...  Training loss: 1.0779...  0.1207 sec/batch
Epoch: 7/20...  Training Step: 2861...  Training loss: 1.1600...  0.1213 sec/batch
Epoch: 7/20...  Training Step: 2862...  Training loss: 1.2276...  0.1154 sec/batch
Epoch: 7/20...  Training Step: 2863...  Training loss: 1.2054...  0.1197 sec/batch
Epoch: 7/20...  Training Step: 2864...  Training loss: 1.1183...  0.1257 sec/batch
Epoch: 7/20...  Training Step: 2865...  Training loss: 1.2551...  0.1238 sec/batch
Epoch: 7/20...  Training Step: 2866...  Training loss: 1.2596...  0.1187 sec/batch
Epoch: 7/20...  Training Step: 2867...  Training loss: 1.0930...  0.1226 sec/batch
Epoch: 7/20...  Training Step: 2868...  Training loss: 1.3685...  0.1224 sec/batch
Epoch: 7/20...  Training Step: 2869...  Training loss: 1.2715...  0.1203 sec/batch
Epoch: 7/20...  Training Step: 2870...  Training loss: 1.3676...  0.1234 sec/batch
Epoch: 7/20...  Training Step: 2871...  Training loss: 1.1656...  0.1248 sec/batch
Epoch: 7/20...  Training Step: 2872...  Training loss: 1.3329...  0.1228 sec/batch
Epoch: 7/20...  Training Step: 2873...  Training loss: 1.4455...  0.1200 sec/batch
Epoch: 7/20...  Training Step: 2874...  Training loss: 1.1862...  0.1207 sec/batch
Epoch: 7/20...  Training Step: 2875...  Training loss: 1.3555...  0.1234 sec/batch
Epoch: 7/20...  Training Step: 2876...  Training loss: 1.3545...  0.1203 sec/batch
Epoch: 7/20...  Training Step: 2877...  Training loss: 1.0862...  0.1242 sec/batch
Epoch: 7/20...  Training Step: 2878...  Training loss: 1.4092...  0.1167 sec/batch
Epoch: 7/20...  Training Step: 2879...  Training loss: 1.2123...  0.1235 sec/batch
Epoch: 7/20...  Training Step: 2880...  Training loss: 1.1842...  0.1177 sec/batch
Epoch: 7/20...  Training Step: 2881...  Training loss: 1.4710...  0.1194 sec/batch
Epoch: 7/20...  Training Step: 2882...  Training loss: 1.2854...  0.1227 sec/batch
Epoch: 7/20...  Training Step: 2883...  Training loss: 1.2302...  0.1188 sec/batch
Epoch: 7/20...  Training Step: 2884...  Training loss: 1.1533...  0.1231 sec/batch
Epoch: 7/20...  Training Step: 2885...  Training loss: 1.3681...  0.1234 sec/batch
Epoch: 7/20...  Training Step: 2886...  Training loss: 1.3707...  0.1215 sec/batch
Epoch: 7/20...  Training Step: 2887...  Training loss: 1.5041...  0.1231 sec/batch
Epoch: 7/20...  Training Step: 2888...  Training loss: 1.2210...  0.1269 sec/batch
Epoch: 7/20...  Training Step: 2889...  Training loss: 1.3641...  0.1275 sec/batch
Epoch: 7/20...  Training Step: 2890...  Training loss: 1.4395...  0.1209 sec/batch
Epoch: 7/20...  Training Step: 2891...  Training loss: 1.2698...  0.1222 sec/batch
Epoch: 7/20...  Training Step: 2892...  Training loss: 1.4226...  0.1224 sec/batch
Epoch: 7/20...  Training Step: 2893...  Training loss: 1.2878...  0.1231 sec/batch
Epoch: 7/20...  Training Step: 2894...  Training loss: 1.2959...  0.1200 sec/batch
Epoch: 7/20...  Training Step: 2895...  Training loss: 1.3491...  0.1204 sec/batch
Epoch: 7/20...  Training Step: 2896...  Training loss: 1.1730...  0.1245 sec/batch
Epoch: 7/20...  Training Step: 2897...  Training loss: 1.3237...  0.1280 sec/batch
Epoch: 7/20...  Training Step: 2898...  Training loss: 1.3830...  0.1232 sec/batch
Epoch: 7/20...  Training Step: 2899...  Training loss: 1.3956...  0.1281 sec/batch
Epoch: 7/20...  Training Step: 2900...  Training loss: 1.2348...  0.1208 sec/batch
Epoch: 7/20...  Training Step: 2901...  Training loss: 1.3487...  0.1238 sec/batch
Epoch: 7/20...  Training Step: 2902...  Training loss: 1.4461...  0.1190 sec/batch
Epoch: 7/20...  Training Step: 2903...  Training loss: 1.2144...  0.1230 sec/batch
Epoch: 7/20...  Training Step: 2904...  Training loss: 1.1711...  0.1243 sec/batch
Epoch: 7/20...  Training Step: 2905...  Training loss: 1.2793...  0.1243 sec/batch
Epoch: 7/20...  Training Step: 2906...  Training loss: 1.3035...  0.1234 sec/batch
Epoch: 7/20...  Training Step: 2907...  Training loss: 1.3720...  0.1221 sec/batch
Epoch: 7/20...  Training Step: 2908...  Training loss: 1.2514...  0.1269 sec/batch
Epoch: 7/20...  Training Step: 2909...  Training loss: 1.3289...  0.1195 sec/batch
Epoch: 7/20...  Training Step: 2910...  Training loss: 1.0861...  0.1220 sec/batch
Epoch: 7/20...  Training Step: 2911...  Training loss: 1.2257...  0.1227 sec/batch
Epoch: 7/20...  Training Step: 2912...  Training loss: 1.2460...  0.1220 sec/batch
Epoch: 7/20...  Training Step: 2913...  Training loss: 1.3374...  0.1176 sec/batch
Epoch: 7/20...  Training Step: 2914...  Training loss: 1.2274...  0.1227 sec/batch
Epoch: 7/20...  Training Step: 2915...  Training loss: 1.5028...  0.1224 sec/batch
Epoch: 7/20...  Training Step: 2916...  Training loss: 1.2695...  0.1222 sec/batch
Epoch: 7/20...  Training Step: 2917...  Training loss: 1.2075...  0.1258 sec/batch
Epoch: 7/20...  Training Step: 2918...  Training loss: 1.4489...  0.1207 sec/batch
Epoch: 7/20...  Training Step: 2919...  Training loss: 1.2187...  0.1203 sec/batch
Epoch: 7/20...  Training Step: 2920...  Training loss: 1.1162...  0.1191 sec/batch
Epoch: 7/20...  Training Step: 2921...  Training loss: 1.1236...  0.1229 sec/batch
Epoch: 7/20...  Training Step: 2922...  Training loss: 1.2346...  0.1249 sec/batch
Epoch: 7/20...  Training Step: 2923...  Training loss: 1.1896...  0.1222 sec/batch
Epoch: 7/20...  Training Step: 2924...  Training loss: 1.3334...  0.1208 sec/batch
Epoch: 7/20...  Training Step: 2925...  Training loss: 1.1305...  0.1225 sec/batch
Epoch: 7/20...  Training Step: 2926...  Training loss: 1.2262...  0.1196 sec/batch
Epoch: 7/20...  Training Step: 2927...  Training loss: 1.1787...  0.1202 sec/batch
Epoch: 7/20...  Training Step: 2928...  Training loss: 1.2920...  0.1202 sec/batch
Epoch: 7/20...  Training Step: 2929...  Training loss: 1.2056...  0.1204 sec/batch
Epoch: 7/20...  Training Step: 2930...  Training loss: 1.1178...  0.1186 sec/batch
Epoch: 7/20...  Training Step: 2931...  Training loss: 1.2777...  0.1210 sec/batch
Epoch: 7/20...  Training Step: 2932...  Training loss: 1.1138...  0.1203 sec/batch
Epoch: 7/20...  Training Step: 2933...  Training loss: 1.2942...  0.1212 sec/batch
Epoch: 7/20...  Training Step: 2934...  Training loss: 1.4486...  0.1211 sec/batch
Epoch: 7/20...  Training Step: 2935...  Training loss: 1.3656...  0.1198 sec/batch
Epoch: 7/20...  Training Step: 2936...  Training loss: 1.4583...  0.1221 sec/batch
Epoch: 7/20...  Training Step: 2937...  Training loss: 1.5172...  0.1205 sec/batch
Epoch: 7/20...  Training Step: 2938...  Training loss: 1.3577...  0.1245 sec/batch
Epoch: 7/20...  Training Step: 2939...  Training loss: 1.2891...  0.1251 sec/batch
Epoch: 7/20...  Training Step: 2940...  Training loss: 1.1759...  0.1182 sec/batch
Epoch: 7/20...  Training Step: 2941...  Training loss: 1.2062...  0.1200 sec/batch
Epoch: 7/20...  Training Step: 2942...  Training loss: 1.1917...  0.1182 sec/batch
Epoch: 7/20...  Training Step: 2943...  Training loss: 1.1627...  0.1222 sec/batch
Epoch: 7/20...  Training Step: 2944...  Training loss: 1.1959...  0.1187 sec/batch
Epoch: 7/20...  Training Step: 2945...  Training loss: 1.3872...  0.1206 sec/batch
Epoch: 7/20...  Training Step: 2946...  Training loss: 1.2075...  0.1233 sec/batch
Epoch: 7/20...  Training Step: 2947...  Training loss: 1.4168...  0.1262 sec/batch
Epoch: 7/20...  Training Step: 2948...  Training loss: 1.0882...  0.1221 sec/batch
Epoch: 7/20...  Training Step: 2949...  Training loss: 1.2932...  0.1227 sec/batch
Epoch: 7/20...  Training Step: 2950...  Training loss: 1.2506...  0.1207 sec/batch
Epoch: 7/20...  Training Step: 2951...  Training loss: 1.1376...  0.1212 sec/batch
Epoch: 7/20...  Training Step: 2952...  Training loss: 1.3625...  0.1222 sec/batch
Epoch: 7/20...  Training Step: 2953...  Training loss: 1.1834...  0.1280 sec/batch
Epoch: 7/20...  Training Step: 2954...  Training loss: 1.2536...  0.1206 sec/batch
Epoch: 7/20...  Training Step: 2955...  Training loss: 1.3487...  0.1226 sec/batch
Epoch: 7/20...  Training Step: 2956...  Training loss: 1.3959...  0.1199 sec/batch
Epoch: 7/20...  Training Step: 2957...  Training loss: 1.1147...  0.1219 sec/batch
Epoch: 7/20...  Training Step: 2958...  Training loss: 1.3603...  0.1252 sec/batch
Epoch: 7/20...  Training Step: 2959...  Training loss: 1.2933...  0.1203 sec/batch
Epoch: 7/20...  Training Step: 2960...  Training loss: 1.1402...  0.1222 sec/batch
Epoch: 7/20...  Training Step: 2961...  Training loss: 1.1359...  0.1216 sec/batch
Epoch: 7/20...  Training Step: 2962...  Training loss: 1.4074...  0.1238 sec/batch
Epoch: 7/20...  Training Step: 2963...  Training loss: 1.0790...  0.1220 sec/batch
Epoch: 7/20...  Training Step: 2964...  Training loss: 1.3171...  0.1174 sec/batch
Epoch: 7/20...  Training Step: 2965...  Training loss: 1.0411...  0.1194 sec/batch
Epoch: 7/20...  Training Step: 2966...  Training loss: 1.3761...  0.1238 sec/batch
Epoch: 7/20...  Training Step: 2967...  Training loss: 1.3227...  0.1253 sec/batch
Epoch: 7/20...  Training Step: 2968...  Training loss: 1.1409...  0.1206 sec/batch
Epoch: 7/20...  Training Step: 2969...  Training loss: 1.4530...  0.1249 sec/batch
Epoch: 7/20...  Training Step: 2970...  Training loss: 1.2685...  0.1342 sec/batch
Epoch: 7/20...  Training Step: 2971...  Training loss: 1.3722...  0.1291 sec/batch
Epoch: 7/20...  Training Step: 2972...  Training loss: 1.0738...  0.1183 sec/batch
Epoch: 7/20...  Training Step: 2973...  Training loss: 1.3365...  0.1143 sec/batch
Epoch: 7/20...  Training Step: 2974...  Training loss: 1.2478...  0.1185 sec/batch
Epoch: 7/20...  Training Step: 2975...  Training loss: 1.2580...  0.1272 sec/batch
Epoch: 7/20...  Training Step: 2976...  Training loss: 1.4199...  0.1286 sec/batch
Epoch: 7/20...  Training Step: 2977...  Training loss: 1.2238...  0.1252 sec/batch
Epoch: 7/20...  Training Step: 2978...  Training loss: 1.1797...  0.1318 sec/batch
Epoch: 7/20...  Training Step: 2979...  Training loss: 1.2900...  0.1397 sec/batch
Epoch: 7/20...  Training Step: 2980...  Training loss: 1.3102...  0.1459 sec/batch
Epoch: 7/20...  Training Step: 2981...  Training loss: 1.1616...  0.1313 sec/batch
Epoch: 7/20...  Training Step: 2982...  Training loss: 1.3561...  0.1290 sec/batch
Epoch: 7/20...  Training Step: 2983...  Training loss: 0.9945...  0.1355 sec/batch
Epoch: 7/20...  Training Step: 2984...  Training loss: 1.1149...  0.1252 sec/batch
Epoch: 7/20...  Training Step: 2985...  Training loss: 1.1706...  0.1185 sec/batch
Epoch: 7/20...  Training Step: 2986...  Training loss: 1.2888...  0.1164 sec/batch
Epoch: 7/20...  Training Step: 2987...  Training loss: 1.2587...  0.1146 sec/batch
Epoch: 7/20...  Training Step: 2988...  Training loss: 1.3948...  0.1234 sec/batch
Epoch: 7/20...  Training Step: 2989...  Training loss: 1.0784...  0.1209 sec/batch
Epoch: 7/20...  Training Step: 2990...  Training loss: 1.1056...  0.1214 sec/batch
Epoch: 7/20...  Training Step: 2991...  Training loss: 1.1471...  0.1236 sec/batch
Epoch: 7/20...  Training Step: 2992...  Training loss: 1.3841...  0.1199 sec/batch
Epoch: 7/20...  Training Step: 2993...  Training loss: 1.2974...  0.1326 sec/batch
Epoch: 7/20...  Training Step: 2994...  Training loss: 1.1223...  0.1776 sec/batch
Epoch: 7/20...  Training Step: 2995...  Training loss: 1.1056...  0.1571 sec/batch
Epoch: 7/20...  Training Step: 2996...  Training loss: 1.3590...  0.1420 sec/batch
Epoch: 7/20...  Training Step: 2997...  Training loss: 1.3812...  0.1396 sec/batch
Epoch: 7/20...  Training Step: 2998...  Training loss: 1.2572...  0.1149 sec/batch
Epoch: 7/20...  Training Step: 2999...  Training loss: 1.3649...  0.1176 sec/batch
Epoch: 7/20...  Training Step: 3000...  Training loss: 1.1410...  0.1182 sec/batch
Epoch: 7/20...  Training Step: 3001...  Training loss: 1.3187...  0.1238 sec/batch
Epoch: 7/20...  Training Step: 3002...  Training loss: 1.2409...  0.1212 sec/batch
Epoch: 7/20...  Training Step: 3003...  Training loss: 1.3818...  0.1233 sec/batch
Epoch: 7/20...  Training Step: 3004...  Training loss: 1.2768...  0.1206 sec/batch
Epoch: 7/20...  Training Step: 3005...  Training loss: 1.1818...  0.1230 sec/batch
Epoch: 7/20...  Training Step: 3006...  Training loss: 1.5467...  0.1201 sec/batch
Epoch: 7/20...  Training Step: 3007...  Training loss: 1.3526...  0.1227 sec/batch
Epoch: 7/20...  Training Step: 3008...  Training loss: 1.5588...  0.1249 sec/batch
Epoch: 7/20...  Training Step: 3009...  Training loss: 1.2423...  0.1187 sec/batch
Epoch: 7/20...  Training Step: 3010...  Training loss: 1.3806...  0.1207 sec/batch
Epoch: 7/20...  Training Step: 3011...  Training loss: 1.3839...  0.1304 sec/batch
Epoch: 7/20...  Training Step: 3012...  Training loss: 1.2262...  0.1309 sec/batch
Epoch: 7/20...  Training Step: 3013...  Training loss: 1.2710...  0.1349 sec/batch
Epoch: 7/20...  Training Step: 3014...  Training loss: 1.2469...  0.1354 sec/batch
Epoch: 7/20...  Training Step: 3015...  Training loss: 1.2859...  0.1282 sec/batch
Epoch: 7/20...  Training Step: 3016...  Training loss: 1.2387...  0.1272 sec/batch
Epoch: 7/20...  Training Step: 3017...  Training loss: 1.5192...  0.1197 sec/batch
Epoch: 7/20...  Training Step: 3018...  Training loss: 1.2299...  0.1232 sec/batch
Epoch: 7/20...  Training Step: 3019...  Training loss: 1.4920...  0.1184 sec/batch
Epoch: 7/20...  Training Step: 3020...  Training loss: 1.2855...  0.1184 sec/batch
Epoch: 7/20...  Training Step: 3021...  Training loss: 1.5329...  0.1233 sec/batch
Epoch: 7/20...  Training Step: 3022...  Training loss: 1.0556...  0.1228 sec/batch
Epoch: 7/20...  Training Step: 3023...  Training loss: 1.2808...  0.1174 sec/batch
Epoch: 7/20...  Training Step: 3024...  Training loss: 1.3961...  0.1226 sec/batch
Epoch: 7/20...  Training Step: 3025...  Training loss: 1.1147...  0.1225 sec/batch
Epoch: 7/20...  Training Step: 3026...  Training loss: 1.1854...  0.1183 sec/batch
Epoch: 7/20...  Training Step: 3027...  Training loss: 1.3955...  0.1187 sec/batch
Epoch: 7/20...  Training Step: 3028...  Training loss: 1.3307...  0.1233 sec/batch
Epoch: 7/20...  Training Step: 3029...  Training loss: 1.3542...  0.1219 sec/batch
Epoch: 7/20...  Training Step: 3030...  Training loss: 1.1325...  0.1231 sec/batch
Epoch: 7/20...  Training Step: 3031...  Training loss: 1.1863...  0.1202 sec/batch
Epoch: 7/20...  Training Step: 3032...  Training loss: 1.4447...  0.1209 sec/batch
Epoch: 7/20...  Training Step: 3033...  Training loss: 1.2066...  0.1218 sec/batch
Epoch: 7/20...  Training Step: 3034...  Training loss: 1.3334...  0.1214 sec/batch
Epoch: 7/20...  Training Step: 3035...  Training loss: 1.4566...  0.1270 sec/batch
Epoch: 7/20...  Training Step: 3036...  Training loss: 1.3020...  0.1329 sec/batch
Epoch: 7/20...  Training Step: 3037...  Training loss: 1.1827...  0.1238 sec/batch
Epoch: 7/20...  Training Step: 3038...  Training loss: 1.2752...  0.1216 sec/batch
Epoch: 7/20...  Training Step: 3039...  Training loss: 1.2711...  0.1201 sec/batch
Epoch: 7/20...  Training Step: 3040...  Training loss: 1.3104...  0.1230 sec/batch
Epoch: 7/20...  Training Step: 3041...  Training loss: 1.3659...  0.1229 sec/batch
Epoch: 7/20...  Training Step: 3042...  Training loss: 1.1547...  0.1210 sec/batch
Epoch: 7/20...  Training Step: 3043...  Training loss: 1.1271...  0.1206 sec/batch
Epoch: 7/20...  Training Step: 3044...  Training loss: 1.2393...  0.1237 sec/batch
Epoch: 7/20...  Training Step: 3045...  Training loss: 1.3038...  0.1250 sec/batch
Epoch: 7/20...  Training Step: 3046...  Training loss: 1.4131...  0.1187 sec/batch
Epoch: 7/20...  Training Step: 3047...  Training loss: 1.1953...  0.1241 sec/batch
Epoch: 7/20...  Training Step: 3048...  Training loss: 1.3392...  0.1192 sec/batch
Epoch: 7/20...  Training Step: 3049...  Training loss: 1.3827...  0.1146 sec/batch
Epoch: 7/20...  Training Step: 3050...  Training loss: 1.3188...  0.1237 sec/batch
Epoch: 7/20...  Training Step: 3051...  Training loss: 1.3933...  0.1206 sec/batch
Epoch: 7/20...  Training Step: 3052...  Training loss: 1.4441...  0.1195 sec/batch
Epoch: 7/20...  Training Step: 3053...  Training loss: 1.2725...  0.1225 sec/batch
Epoch: 7/20...  Training Step: 3054...  Training loss: 1.4141...  0.1203 sec/batch
Epoch: 7/20...  Training Step: 3055...  Training loss: 1.2908...  0.1206 sec/batch
Epoch: 7/20...  Training Step: 3056...  Training loss: 1.4566...  0.1222 sec/batch
Epoch: 7/20...  Training Step: 3057...  Training loss: 1.3350...  0.1209 sec/batch
Epoch: 7/20...  Training Step: 3058...  Training loss: 1.2420...  0.1215 sec/batch
Epoch: 7/20...  Training Step: 3059...  Training loss: 1.3743...  0.1237 sec/batch
Epoch: 7/20...  Training Step: 3060...  Training loss: 1.2568...  0.1223 sec/batch
Epoch: 7/20...  Training Step: 3061...  Training loss: 1.2991...  0.1196 sec/batch
Epoch: 7/20...  Training Step: 3062...  Training loss: 1.4898...  0.1215 sec/batch
Epoch: 7/20...  Training Step: 3063...  Training loss: 1.2120...  0.1263 sec/batch
Epoch: 7/20...  Training Step: 3064...  Training loss: 1.2244...  0.1207 sec/batch
Epoch: 7/20...  Training Step: 3065...  Training loss: 1.2063...  0.1231 sec/batch
Epoch: 7/20...  Training Step: 3066...  Training loss: 1.1842...  0.1177 sec/batch
Epoch: 7/20...  Training Step: 3067...  Training loss: 1.2801...  0.1202 sec/batch
Epoch: 7/20...  Training Step: 3068...  Training loss: 1.2744...  0.1237 sec/batch
Epoch: 7/20...  Training Step: 3069...  Training loss: 1.1272...  0.1256 sec/batch
Epoch: 7/20...  Training Step: 3070...  Training loss: 1.2795...  0.1220 sec/batch
Epoch: 7/20...  Training Step: 3071...  Training loss: 1.3246...  0.1204 sec/batch
Epoch: 7/20...  Training Step: 3072...  Training loss: 1.2986...  0.1253 sec/batch
Epoch: 7/20...  Training Step: 3073...  Training loss: 1.3096...  0.1177 sec/batch
Epoch: 7/20...  Training Step: 3074...  Training loss: 1.2854...  0.1222 sec/batch
Epoch: 7/20...  Training Step: 3075...  Training loss: 1.3599...  0.1206 sec/batch
Epoch: 7/20...  Training Step: 3076...  Training loss: 1.1808...  0.1238 sec/batch
Epoch: 7/20...  Training Step: 3077...  Training loss: 1.1529...  0.1206 sec/batch
Epoch: 7/20...  Training Step: 3078...  Training loss: 1.2824...  0.1236 sec/batch
Epoch: 7/20...  Training Step: 3079...  Training loss: 1.2650...  0.1201 sec/batch
Epoch: 7/20...  Training Step: 3080...  Training loss: 1.4909...  0.1239 sec/batch
Epoch: 7/20...  Training Step: 3081...  Training loss: 1.2227...  0.1255 sec/batch
Epoch: 7/20...  Training Step: 3082...  Training loss: 1.2138...  0.1177 sec/batch
Epoch: 7/20...  Training Step: 3083...  Training loss: 1.2459...  0.1201 sec/batch
Epoch: 7/20...  Training Step: 3084...  Training loss: 1.1699...  0.1243 sec/batch
Epoch: 7/20...  Training Step: 3085...  Training loss: 1.2717...  0.1200 sec/batch
Epoch: 7/20...  Training Step: 3086...  Training loss: 1.2582...  0.1228 sec/batch
Epoch: 7/20...  Training Step: 3087...  Training loss: 1.1604...  0.1242 sec/batch
Epoch: 7/20...  Training Step: 3088...  Training loss: 1.4051...  0.1232 sec/batch
Epoch: 7/20...  Training Step: 3089...  Training loss: 1.1313...  0.1190 sec/batch
Epoch: 7/20...  Training Step: 3090...  Training loss: 1.2458...  0.1222 sec/batch
Epoch: 7/20...  Training Step: 3091...  Training loss: 1.2898...  0.1258 sec/batch
Epoch: 7/20...  Training Step: 3092...  Training loss: 1.6273...  0.1236 sec/batch
Epoch: 7/20...  Training Step: 3093...  Training loss: 1.3822...  0.1207 sec/batch
Epoch: 7/20...  Training Step: 3094...  Training loss: 1.2632...  0.1207 sec/batch
Epoch: 7/20...  Training Step: 3095...  Training loss: 1.2501...  0.1200 sec/batch
Epoch: 7/20...  Training Step: 3096...  Training loss: 1.1641...  0.1200 sec/batch
Epoch: 7/20...  Training Step: 3097...  Training loss: 1.2217...  0.1224 sec/batch
Epoch: 7/20...  Training Step: 3098...  Training loss: 1.1319...  0.1251 sec/batch
Epoch: 7/20...  Training Step: 3099...  Training loss: 0.9536...  0.1206 sec/batch
Epoch: 7/20...  Training Step: 3100...  Training loss: 1.0892...  0.1232 sec/batch
Epoch: 7/20...  Training Step: 3101...  Training loss: 1.1958...  0.1231 sec/batch
Epoch: 7/20...  Training Step: 3102...  Training loss: 1.2396...  0.1185 sec/batch
Epoch: 7/20...  Training Step: 3103...  Training loss: 1.2405...  0.1199 sec/batch
Epoch: 7/20...  Training Step: 3104...  Training loss: 1.0964...  0.1159 sec/batch
Epoch: 7/20...  Training Step: 3105...  Training loss: 1.1538...  0.1190 sec/batch
Epoch: 7/20...  Training Step: 3106...  Training loss: 1.3817...  0.1235 sec/batch
Epoch: 7/20...  Training Step: 3107...  Training loss: 1.0997...  0.1244 sec/batch
Epoch: 7/20...  Training Step: 3108...  Training loss: 1.0852...  0.1218 sec/batch
Epoch: 7/20...  Training Step: 3109...  Training loss: 1.1240...  0.1254 sec/batch
Epoch: 7/20...  Training Step: 3110...  Training loss: 0.9437...  0.1216 sec/batch
Epoch: 7/20...  Training Step: 3111...  Training loss: 1.2613...  0.1207 sec/batch
Epoch: 7/20...  Training Step: 3112...  Training loss: 1.1648...  0.1221 sec/batch
Epoch: 7/20...  Training Step: 3113...  Training loss: 1.3288...  0.1311 sec/batch
Epoch: 7/20...  Training Step: 3114...  Training loss: 1.1001...  0.1253 sec/batch
Epoch: 7/20...  Training Step: 3115...  Training loss: 1.2499...  0.1231 sec/batch
Epoch: 7/20...  Training Step: 3116...  Training loss: 1.2215...  0.1233 sec/batch
Epoch: 7/20...  Training Step: 3117...  Training loss: 1.0433...  0.1164 sec/batch
Epoch: 7/20...  Training Step: 3118...  Training loss: 1.2437...  0.1248 sec/batch
Epoch: 7/20...  Training Step: 3119...  Training loss: 1.2448...  0.1195 sec/batch
Epoch: 7/20...  Training Step: 3120...  Training loss: 1.1497...  0.1224 sec/batch
Epoch: 7/20...  Training Step: 3121...  Training loss: 1.2094...  0.1209 sec/batch
Epoch: 7/20...  Training Step: 3122...  Training loss: 1.0038...  0.1168 sec/batch
Epoch: 7/20...  Training Step: 3123...  Training loss: 1.2565...  0.1219 sec/batch
Epoch: 7/20...  Training Step: 3124...  Training loss: 1.1723...  0.1196 sec/batch
Epoch: 7/20...  Training Step: 3125...  Training loss: 1.1583...  0.1232 sec/batch
Epoch: 7/20...  Training Step: 3126...  Training loss: 1.3479...  0.1240 sec/batch
Epoch: 7/20...  Training Step: 3127...  Training loss: 1.0287...  0.1221 sec/batch
Epoch: 7/20...  Training Step: 3128...  Training loss: 1.4796...  0.1188 sec/batch
Epoch: 7/20...  Training Step: 3129...  Training loss: 1.1575...  0.1238 sec/batch
Epoch: 7/20...  Training Step: 3130...  Training loss: 1.0189...  0.1274 sec/batch
Epoch: 7/20...  Training Step: 3131...  Training loss: 1.0379...  0.1246 sec/batch
Epoch: 7/20...  Training Step: 3132...  Training loss: 1.5389...  0.1242 sec/batch
Epoch: 7/20...  Training Step: 3133...  Training loss: 1.1431...  0.1231 sec/batch
Epoch: 7/20...  Training Step: 3134...  Training loss: 1.3206...  0.1172 sec/batch
Epoch: 7/20...  Training Step: 3135...  Training loss: 1.1721...  0.1214 sec/batch
Epoch: 7/20...  Training Step: 3136...  Training loss: 1.1851...  0.1199 sec/batch
Epoch: 7/20...  Training Step: 3137...  Training loss: 1.0153...  0.1205 sec/batch
Epoch: 7/20...  Training Step: 3138...  Training loss: 0.9928...  0.1217 sec/batch
Epoch: 7/20...  Training Step: 3139...  Training loss: 1.2778...  0.1231 sec/batch
Epoch: 7/20...  Training Step: 3140...  Training loss: 1.2535...  0.1228 sec/batch
Epoch: 7/20...  Training Step: 3141...  Training loss: 1.0508...  0.1195 sec/batch
Epoch: 7/20...  Training Step: 3142...  Training loss: 1.2995...  0.1248 sec/batch
Epoch: 7/20...  Training Step: 3143...  Training loss: 1.4199...  0.1207 sec/batch
Epoch: 7/20...  Training Step: 3144...  Training loss: 1.0698...  0.1199 sec/batch
Epoch: 7/20...  Training Step: 3145...  Training loss: 1.3312...  0.1240 sec/batch
Epoch: 7/20...  Training Step: 3146...  Training loss: 1.1414...  0.1236 sec/batch
Epoch: 7/20...  Training Step: 3147...  Training loss: 1.1476...  0.1207 sec/batch
Epoch: 7/20...  Training Step: 3148...  Training loss: 1.1760...  0.1192 sec/batch
Epoch: 7/20...  Training Step: 3149...  Training loss: 1.2606...  0.1268 sec/batch
Epoch: 7/20...  Training Step: 3150...  Training loss: 1.2919...  0.1197 sec/batch
Epoch: 7/20...  Training Step: 3151...  Training loss: 1.1448...  0.1192 sec/batch
Epoch: 7/20...  Training Step: 3152...  Training loss: 1.3525...  0.1209 sec/batch
Epoch: 7/20...  Training Step: 3153...  Training loss: 1.3536...  0.1246 sec/batch
Epoch: 7/20...  Training Step: 3154...  Training loss: 1.2643...  0.1255 sec/batch
Epoch: 7/20...  Training Step: 3155...  Training loss: 1.1279...  0.1259 sec/batch
Epoch: 7/20...  Training Step: 3156...  Training loss: 1.1800...  0.1216 sec/batch
Epoch: 7/20...  Training Step: 3157...  Training loss: 1.0797...  0.1177 sec/batch
Epoch: 7/20...  Training Step: 3158...  Training loss: 1.3322...  0.1192 sec/batch
Epoch: 7/20...  Training Step: 3159...  Training loss: 1.2452...  0.1296 sec/batch
Epoch: 7/20...  Training Step: 3160...  Training loss: 1.2592...  0.1154 sec/batch
Epoch: 7/20...  Training Step: 3161...  Training loss: 1.3449...  0.1227 sec/batch
Epoch: 7/20...  Training Step: 3162...  Training loss: 1.3945...  0.1267 sec/batch
Epoch: 7/20...  Training Step: 3163...  Training loss: 1.1524...  0.1232 sec/batch
Epoch: 7/20...  Training Step: 3164...  Training loss: 1.2772...  0.1217 sec/batch
Epoch: 7/20...  Training Step: 3165...  Training loss: 1.1202...  0.1247 sec/batch
Epoch: 7/20...  Training Step: 3166...  Training loss: 1.1862...  0.1196 sec/batch
Epoch: 7/20...  Training Step: 3167...  Training loss: 1.1262...  0.1253 sec/batch
Epoch: 7/20...  Training Step: 3168...  Training loss: 1.2611...  0.1214 sec/batch
Epoch: 7/20...  Training Step: 3169...  Training loss: 1.1657...  0.1249 sec/batch
Epoch: 7/20...  Training Step: 3170...  Training loss: 1.2976...  0.1234 sec/batch
Epoch: 7/20...  Training Step: 3171...  Training loss: 1.2226...  0.1234 sec/batch
Epoch: 7/20...  Training Step: 3172...  Training loss: 1.0814...  0.1241 sec/batch
Epoch: 7/20...  Training Step: 3173...  Training loss: 1.1090...  0.1260 sec/batch
Epoch: 7/20...  Training Step: 3174...  Training loss: 1.2359...  0.1218 sec/batch
Epoch: 7/20...  Training Step: 3175...  Training loss: 1.1152...  0.1235 sec/batch
Epoch: 7/20...  Training Step: 3176...  Training loss: 1.0353...  0.1213 sec/batch
Epoch: 7/20...  Training Step: 3177...  Training loss: 1.1522...  0.1173 sec/batch
Epoch: 7/20...  Training Step: 3178...  Training loss: 1.1130...  0.1174 sec/batch
Epoch: 7/20...  Training Step: 3179...  Training loss: 1.1318...  0.1260 sec/batch
Epoch: 7/20...  Training Step: 3180...  Training loss: 1.4845...  0.1201 sec/batch
Epoch: 7/20...  Training Step: 3181...  Training loss: 1.2188...  0.1237 sec/batch
Epoch: 7/20...  Training Step: 3182...  Training loss: 1.1568...  0.1211 sec/batch
Epoch: 7/20...  Training Step: 3183...  Training loss: 1.2438...  0.1191 sec/batch
Epoch: 7/20...  Training Step: 3184...  Training loss: 1.0755...  0.1173 sec/batch
Epoch: 7/20...  Training Step: 3185...  Training loss: 1.1461...  0.1219 sec/batch
Epoch: 7/20...  Training Step: 3186...  Training loss: 1.1495...  0.1245 sec/batch
Epoch: 7/20...  Training Step: 3187...  Training loss: 1.0971...  0.1208 sec/batch
Epoch: 7/20...  Training Step: 3188...  Training loss: 1.2161...  0.1212 sec/batch
Epoch: 7/20...  Training Step: 3189...  Training loss: 1.3190...  0.1212 sec/batch
Epoch: 7/20...  Training Step: 3190...  Training loss: 1.4433...  0.1270 sec/batch
Epoch: 7/20...  Training Step: 3191...  Training loss: 1.1874...  0.1199 sec/batch
Epoch: 7/20...  Training Step: 3192...  Training loss: 1.5070...  0.1215 sec/batch
Epoch: 7/20...  Training Step: 3193...  Training loss: 1.3069...  0.1187 sec/batch
Epoch: 7/20...  Training Step: 3194...  Training loss: 1.2269...  0.1218 sec/batch
Epoch: 7/20...  Training Step: 3195...  Training loss: 1.1359...  0.1214 sec/batch
Epoch: 7/20...  Training Step: 3196...  Training loss: 1.1190...  0.1262 sec/batch
Epoch: 7/20...  Training Step: 3197...  Training loss: 1.3502...  0.1236 sec/batch
Epoch: 7/20...  Training Step: 3198...  Training loss: 1.2827...  0.1229 sec/batch
Epoch: 7/20...  Training Step: 3199...  Training loss: 1.3563...  0.1203 sec/batch
Epoch: 7/20...  Training Step: 3200...  Training loss: 1.4287...  0.1217 sec/batch
Epoch: 7/20...  Training Step: 3201...  Training loss: 1.3330...  0.1253 sec/batch
Epoch: 7/20...  Training Step: 3202...  Training loss: 1.1027...  0.1203 sec/batch
Epoch: 7/20...  Training Step: 3203...  Training loss: 1.1840...  0.1211 sec/batch
Epoch: 7/20...  Training Step: 3204...  Training loss: 1.0667...  0.1236 sec/batch
Epoch: 7/20...  Training Step: 3205...  Training loss: 1.2822...  0.1183 sec/batch
Epoch: 7/20...  Training Step: 3206...  Training loss: 1.2431...  0.1190 sec/batch
Epoch: 7/20...  Training Step: 3207...  Training loss: 1.2613...  0.1209 sec/batch
Epoch: 7/20...  Training Step: 3208...  Training loss: 1.4000...  0.1229 sec/batch
Epoch: 7/20...  Training Step: 3209...  Training loss: 1.1610...  0.1239 sec/batch
Epoch: 7/20...  Training Step: 3210...  Training loss: 1.2771...  0.1214 sec/batch
Epoch: 7/20...  Training Step: 3211...  Training loss: 1.2774...  0.1231 sec/batch
Epoch: 7/20...  Training Step: 3212...  Training loss: 1.3404...  0.1235 sec/batch
Epoch: 7/20...  Training Step: 3213...  Training loss: 1.2024...  0.1273 sec/batch
Epoch: 7/20...  Training Step: 3214...  Training loss: 1.2484...  0.1248 sec/batch
Epoch: 7/20...  Training Step: 3215...  Training loss: 1.5099...  0.1187 sec/batch
Epoch: 7/20...  Training Step: 3216...  Training loss: 1.2710...  0.1131 sec/batch
Epoch: 7/20...  Training Step: 3217...  Training loss: 1.3640...  0.1195 sec/batch
Epoch: 7/20...  Training Step: 3218...  Training loss: 1.2738...  0.1110 sec/batch
Epoch: 7/20...  Training Step: 3219...  Training loss: 1.1394...  0.1197 sec/batch
Epoch: 7/20...  Training Step: 3220...  Training loss: 1.3137...  0.1170 sec/batch
Epoch: 7/20...  Training Step: 3221...  Training loss: 1.1610...  0.1235 sec/batch
Epoch: 7/20...  Training Step: 3222...  Training loss: 1.3274...  0.1193 sec/batch
Epoch: 7/20...  Training Step: 3223...  Training loss: 1.4552...  0.1162 sec/batch
Epoch: 7/20...  Training Step: 3224...  Training loss: 1.5764...  0.1195 sec/batch
Epoch: 7/20...  Training Step: 3225...  Training loss: 1.2216...  0.1169 sec/batch
Epoch: 7/20...  Training Step: 3226...  Training loss: 1.2733...  0.1151 sec/batch
Epoch: 7/20...  Training Step: 3227...  Training loss: 1.3239...  0.1201 sec/batch
Epoch: 7/20...  Training Step: 3228...  Training loss: 1.2744...  0.1209 sec/batch
Epoch: 7/20...  Training Step: 3229...  Training loss: 1.2496...  0.1150 sec/batch
Epoch: 7/20...  Training Step: 3230...  Training loss: 1.1760...  0.1186 sec/batch
Epoch: 7/20...  Training Step: 3231...  Training loss: 1.2811...  0.1225 sec/batch
Epoch: 7/20...  Training Step: 3232...  Training loss: 1.1643...  0.1162 sec/batch
Epoch: 7/20...  Training Step: 3233...  Training loss: 1.3005...  0.1253 sec/batch
Epoch: 7/20...  Training Step: 3234...  Training loss: 1.2522...  0.1355 sec/batch
Epoch: 7/20...  Training Step: 3235...  Training loss: 1.1556...  0.1162 sec/batch
Epoch: 7/20...  Training Step: 3236...  Training loss: 1.3389...  0.1157 sec/batch
Epoch: 7/20...  Training Step: 3237...  Training loss: 1.0780...  0.1189 sec/batch
Epoch: 7/20...  Training Step: 3238...  Training loss: 1.6944...  0.1183 sec/batch
Epoch: 7/20...  Training Step: 3239...  Training loss: 1.3899...  0.1157 sec/batch
Epoch: 7/20...  Training Step: 3240...  Training loss: 1.0673...  0.1183 sec/batch
Epoch: 7/20...  Training Step: 3241...  Training loss: 1.1741...  0.1136 sec/batch
Epoch: 7/20...  Training Step: 3242...  Training loss: 0.9654...  0.1160 sec/batch
Epoch: 7/20...  Training Step: 3243...  Training loss: 1.1513...  0.1128 sec/batch
Epoch: 7/20...  Training Step: 3244...  Training loss: 1.3113...  0.1196 sec/batch
Epoch: 7/20...  Training Step: 3245...  Training loss: 1.2757...  0.1146 sec/batch
Epoch: 7/20...  Training Step: 3246...  Training loss: 1.1444...  0.1208 sec/batch
Epoch: 7/20...  Training Step: 3247...  Training loss: 1.3500...  0.1201 sec/batch
Epoch: 7/20...  Training Step: 3248...  Training loss: 1.1983...  0.1170 sec/batch
Epoch: 8/20...  Training Step: 3249...  Training loss: 1.5048...  0.1224 sec/batch
Epoch: 8/20...  Training Step: 3250...  Training loss: 1.3285...  0.1195 sec/batch
Epoch: 8/20...  Training Step: 3251...  Training loss: 1.2784...  0.1151 sec/batch
Epoch: 8/20...  Training Step: 3252...  Training loss: 1.1725...  0.1168 sec/batch
Epoch: 8/20...  Training Step: 3253...  Training loss: 1.2325...  0.1186 sec/batch
Epoch: 8/20...  Training Step: 3254...  Training loss: 1.0584...  0.1219 sec/batch
Epoch: 8/20...  Training Step: 3255...  Training loss: 1.3989...  0.1168 sec/batch
Epoch: 8/20...  Training Step: 3256...  Training loss: 1.0949...  0.1180 sec/batch
Epoch: 8/20...  Training Step: 3257...  Training loss: 1.0393...  0.1195 sec/batch
Epoch: 8/20...  Training Step: 3258...  Training loss: 1.2853...  0.1170 sec/batch
Epoch: 8/20...  Training Step: 3259...  Training loss: 1.1439...  0.1178 sec/batch
Epoch: 8/20...  Training Step: 3260...  Training loss: 1.0540...  0.1162 sec/batch
Epoch: 8/20...  Training Step: 3261...  Training loss: 1.4176...  0.1275 sec/batch
Epoch: 8/20...  Training Step: 3262...  Training loss: 1.0036...  0.1250 sec/batch
Epoch: 8/20...  Training Step: 3263...  Training loss: 1.2504...  0.1215 sec/batch
Epoch: 8/20...  Training Step: 3264...  Training loss: 1.4186...  0.1147 sec/batch
Epoch: 8/20...  Training Step: 3265...  Training loss: 1.1110...  0.1210 sec/batch
Epoch: 8/20...  Training Step: 3266...  Training loss: 1.1113...  0.1197 sec/batch
Epoch: 8/20...  Training Step: 3267...  Training loss: 1.2669...  0.1169 sec/batch
Epoch: 8/20...  Training Step: 3268...  Training loss: 1.0783...  0.1187 sec/batch
Epoch: 8/20...  Training Step: 3269...  Training loss: 1.3684...  0.1147 sec/batch
Epoch: 8/20...  Training Step: 3270...  Training loss: 1.1295...  0.1194 sec/batch
Epoch: 8/20...  Training Step: 3271...  Training loss: 1.3864...  0.1181 sec/batch
Epoch: 8/20...  Training Step: 3272...  Training loss: 1.2161...  0.1219 sec/batch
Epoch: 8/20...  Training Step: 3273...  Training loss: 1.1874...  0.1178 sec/batch
Epoch: 8/20...  Training Step: 3274...  Training loss: 1.2300...  0.1273 sec/batch
Epoch: 8/20...  Training Step: 3275...  Training loss: 1.2638...  0.1222 sec/batch
Epoch: 8/20...  Training Step: 3276...  Training loss: 1.1008...  0.1295 sec/batch
Epoch: 8/20...  Training Step: 3277...  Training loss: 1.0863...  0.1225 sec/batch
Epoch: 8/20...  Training Step: 3278...  Training loss: 1.2468...  0.1249 sec/batch
Epoch: 8/20...  Training Step: 3279...  Training loss: 1.0180...  0.1236 sec/batch
Epoch: 8/20...  Training Step: 3280...  Training loss: 1.0786...  0.1234 sec/batch
Epoch: 8/20...  Training Step: 3281...  Training loss: 1.0152...  0.1214 sec/batch
Epoch: 8/20...  Training Step: 3282...  Training loss: 1.0477...  0.1289 sec/batch
Epoch: 8/20...  Training Step: 3283...  Training loss: 1.0578...  0.1253 sec/batch
Epoch: 8/20...  Training Step: 3284...  Training loss: 1.2052...  0.1162 sec/batch
Epoch: 8/20...  Training Step: 3285...  Training loss: 1.2091...  0.1221 sec/batch
Epoch: 8/20...  Training Step: 3286...  Training loss: 1.0305...  0.1235 sec/batch
Epoch: 8/20...  Training Step: 3287...  Training loss: 1.1228...  0.1342 sec/batch
Epoch: 8/20...  Training Step: 3288...  Training loss: 1.4156...  0.1279 sec/batch
Epoch: 8/20...  Training Step: 3289...  Training loss: 1.1858...  0.1318 sec/batch
Epoch: 8/20...  Training Step: 3290...  Training loss: 1.1119...  0.1210 sec/batch
Epoch: 8/20...  Training Step: 3291...  Training loss: 1.2720...  0.1224 sec/batch
Epoch: 8/20...  Training Step: 3292...  Training loss: 1.0342...  0.1176 sec/batch
Epoch: 8/20...  Training Step: 3293...  Training loss: 1.0997...  0.1191 sec/batch
Epoch: 8/20...  Training Step: 3294...  Training loss: 1.1066...  0.1188 sec/batch
Epoch: 8/20...  Training Step: 3295...  Training loss: 1.1851...  0.1209 sec/batch
Epoch: 8/20...  Training Step: 3296...  Training loss: 1.1577...  0.1175 sec/batch
Epoch: 8/20...  Training Step: 3297...  Training loss: 1.1152...  0.1170 sec/batch
Epoch: 8/20...  Training Step: 3298...  Training loss: 1.2337...  0.1165 sec/batch
Epoch: 8/20...  Training Step: 3299...  Training loss: 1.1009...  0.1216 sec/batch
Epoch: 8/20...  Training Step: 3300...  Training loss: 1.2324...  0.1157 sec/batch
Epoch: 8/20...  Training Step: 3301...  Training loss: 1.1964...  0.1212 sec/batch
Epoch: 8/20...  Training Step: 3302...  Training loss: 1.2295...  0.1203 sec/batch
Epoch: 8/20...  Training Step: 3303...  Training loss: 0.9775...  0.1203 sec/batch
Epoch: 8/20...  Training Step: 3304...  Training loss: 1.0413...  0.1185 sec/batch
Epoch: 8/20...  Training Step: 3305...  Training loss: 1.1191...  0.1181 sec/batch
Epoch: 8/20...  Training Step: 3306...  Training loss: 1.1647...  0.1211 sec/batch
Epoch: 8/20...  Training Step: 3307...  Training loss: 0.9222...  0.1186 sec/batch
Epoch: 8/20...  Training Step: 3308...  Training loss: 1.1429...  0.1202 sec/batch
Epoch: 8/20...  Training Step: 3309...  Training loss: 1.1077...  0.1197 sec/batch
Epoch: 8/20...  Training Step: 3310...  Training loss: 1.3185...  0.1227 sec/batch
Epoch: 8/20...  Training Step: 3311...  Training loss: 1.0749...  0.1178 sec/batch
Epoch: 8/20...  Training Step: 3312...  Training loss: 1.1811...  0.1203 sec/batch
Epoch: 8/20...  Training Step: 3313...  Training loss: 1.0664...  0.1171 sec/batch
Epoch: 8/20...  Training Step: 3314...  Training loss: 1.3363...  0.1219 sec/batch
Epoch: 8/20...  Training Step: 3315...  Training loss: 1.1278...  0.1188 sec/batch
Epoch: 8/20...  Training Step: 3316...  Training loss: 1.2457...  0.1165 sec/batch
Epoch: 8/20...  Training Step: 3317...  Training loss: 1.1585...  0.1195 sec/batch
Epoch: 8/20...  Training Step: 3318...  Training loss: 1.1996...  0.1180 sec/batch
Epoch: 8/20...  Training Step: 3319...  Training loss: 1.2517...  0.1208 sec/batch
Epoch: 8/20...  Training Step: 3320...  Training loss: 1.0945...  0.1189 sec/batch
Epoch: 8/20...  Training Step: 3321...  Training loss: 1.2217...  0.1149 sec/batch
Epoch: 8/20...  Training Step: 3322...  Training loss: 1.0205...  0.1185 sec/batch
Epoch: 8/20...  Training Step: 3323...  Training loss: 1.5043...  0.1173 sec/batch
Epoch: 8/20...  Training Step: 3324...  Training loss: 1.0886...  0.1203 sec/batch
Epoch: 8/20...  Training Step: 3325...  Training loss: 1.1459...  0.1165 sec/batch
Epoch: 8/20...  Training Step: 3326...  Training loss: 1.2019...  0.1134 sec/batch
Epoch: 8/20...  Training Step: 3327...  Training loss: 1.2495...  0.1191 sec/batch
Epoch: 8/20...  Training Step: 3328...  Training loss: 1.1023...  0.1201 sec/batch
Epoch: 8/20...  Training Step: 3329...  Training loss: 1.3687...  0.1214 sec/batch
Epoch: 8/20...  Training Step: 3330...  Training loss: 1.2268...  0.1159 sec/batch
Epoch: 8/20...  Training Step: 3331...  Training loss: 1.0678...  0.1138 sec/batch
Epoch: 8/20...  Training Step: 3332...  Training loss: 1.3630...  0.1199 sec/batch
Epoch: 8/20...  Training Step: 3333...  Training loss: 1.2492...  0.1207 sec/batch
Epoch: 8/20...  Training Step: 3334...  Training loss: 1.2818...  0.1146 sec/batch
Epoch: 8/20...  Training Step: 3335...  Training loss: 1.0959...  0.1158 sec/batch
Epoch: 8/20...  Training Step: 3336...  Training loss: 1.2039...  0.1217 sec/batch
Epoch: 8/20...  Training Step: 3337...  Training loss: 1.3188...  0.1202 sec/batch
Epoch: 8/20...  Training Step: 3338...  Training loss: 1.2181...  0.1146 sec/batch
Epoch: 8/20...  Training Step: 3339...  Training loss: 1.3140...  0.1168 sec/batch
Epoch: 8/20...  Training Step: 3340...  Training loss: 1.3705...  0.1164 sec/batch
Epoch: 8/20...  Training Step: 3341...  Training loss: 1.0456...  0.1144 sec/batch
Epoch: 8/20...  Training Step: 3342...  Training loss: 1.3037...  0.1155 sec/batch
Epoch: 8/20...  Training Step: 3343...  Training loss: 1.2118...  0.1194 sec/batch
Epoch: 8/20...  Training Step: 3344...  Training loss: 1.2380...  0.1107 sec/batch
Epoch: 8/20...  Training Step: 3345...  Training loss: 1.4454...  0.1177 sec/batch
Epoch: 8/20...  Training Step: 3346...  Training loss: 1.3446...  0.1160 sec/batch
Epoch: 8/20...  Training Step: 3347...  Training loss: 1.3090...  0.1182 sec/batch
Epoch: 8/20...  Training Step: 3348...  Training loss: 1.1442...  0.1187 sec/batch
Epoch: 8/20...  Training Step: 3349...  Training loss: 1.2476...  0.1221 sec/batch
Epoch: 8/20...  Training Step: 3350...  Training loss: 1.2957...  0.1176 sec/batch
Epoch: 8/20...  Training Step: 3351...  Training loss: 1.4406...  0.1182 sec/batch
Epoch: 8/20...  Training Step: 3352...  Training loss: 1.1937...  0.1184 sec/batch
Epoch: 8/20...  Training Step: 3353...  Training loss: 1.3844...  0.1179 sec/batch
Epoch: 8/20...  Training Step: 3354...  Training loss: 1.4356...  0.1168 sec/batch
Epoch: 8/20...  Training Step: 3355...  Training loss: 1.2225...  0.1209 sec/batch
Epoch: 8/20...  Training Step: 3356...  Training loss: 1.2872...  0.1239 sec/batch
Epoch: 8/20...  Training Step: 3357...  Training loss: 1.2375...  0.1123 sec/batch
Epoch: 8/20...  Training Step: 3358...  Training loss: 1.1250...  0.1192 sec/batch
Epoch: 8/20...  Training Step: 3359...  Training loss: 1.2729...  0.1184 sec/batch
Epoch: 8/20...  Training Step: 3360...  Training loss: 1.1384...  0.1177 sec/batch
Epoch: 8/20...  Training Step: 3361...  Training loss: 1.2029...  0.1215 sec/batch
Epoch: 8/20...  Training Step: 3362...  Training loss: 1.3879...  0.1156 sec/batch
Epoch: 8/20...  Training Step: 3363...  Training loss: 1.2279...  0.1147 sec/batch
Epoch: 8/20...  Training Step: 3364...  Training loss: 1.2931...  0.1166 sec/batch
Epoch: 8/20...  Training Step: 3365...  Training loss: 1.2731...  0.1220 sec/batch
Epoch: 8/20...  Training Step: 3366...  Training loss: 1.3373...  0.1222 sec/batch
Epoch: 8/20...  Training Step: 3367...  Training loss: 1.1943...  0.1142 sec/batch
Epoch: 8/20...  Training Step: 3368...  Training loss: 1.0630...  0.1221 sec/batch
Epoch: 8/20...  Training Step: 3369...  Training loss: 1.2677...  0.1182 sec/batch
Epoch: 8/20...  Training Step: 3370...  Training loss: 1.2003...  0.1140 sec/batch
Epoch: 8/20...  Training Step: 3371...  Training loss: 1.2572...  0.1197 sec/batch
Epoch: 8/20...  Training Step: 3372...  Training loss: 1.2582...  0.1141 sec/batch
Epoch: 8/20...  Training Step: 3373...  Training loss: 1.2401...  0.1205 sec/batch
Epoch: 8/20...  Training Step: 3374...  Training loss: 1.0791...  0.1166 sec/batch
Epoch: 8/20...  Training Step: 3375...  Training loss: 1.0402...  0.1176 sec/batch
Epoch: 8/20...  Training Step: 3376...  Training loss: 1.3166...  0.1194 sec/batch
Epoch: 8/20...  Training Step: 3377...  Training loss: 1.2331...  0.1151 sec/batch
Epoch: 8/20...  Training Step: 3378...  Training loss: 1.2786...  0.1227 sec/batch
Epoch: 8/20...  Training Step: 3379...  Training loss: 1.5033...  0.1166 sec/batch
Epoch: 8/20...  Training Step: 3380...  Training loss: 1.2407...  0.1162 sec/batch
Epoch: 8/20...  Training Step: 3381...  Training loss: 1.2283...  0.1203 sec/batch
Epoch: 8/20...  Training Step: 3382...  Training loss: 1.3507...  0.1160 sec/batch
Epoch: 8/20...  Training Step: 3383...  Training loss: 1.0793...  0.1169 sec/batch
Epoch: 8/20...  Training Step: 3384...  Training loss: 0.9898...  0.1251 sec/batch
Epoch: 8/20...  Training Step: 3385...  Training loss: 1.0367...  0.1183 sec/batch
Epoch: 8/20...  Training Step: 3386...  Training loss: 1.2980...  0.1177 sec/batch
Epoch: 8/20...  Training Step: 3387...  Training loss: 1.1272...  0.1252 sec/batch
Epoch: 8/20...  Training Step: 3388...  Training loss: 1.1748...  0.1325 sec/batch
Epoch: 8/20...  Training Step: 3389...  Training loss: 1.0833...  0.1297 sec/batch
Epoch: 8/20...  Training Step: 3390...  Training loss: 1.0836...  0.1156 sec/batch
Epoch: 8/20...  Training Step: 3391...  Training loss: 1.0377...  0.1194 sec/batch
Epoch: 8/20...  Training Step: 3392...  Training loss: 1.2196...  0.1162 sec/batch
Epoch: 8/20...  Training Step: 3393...  Training loss: 1.2394...  0.1141 sec/batch
Epoch: 8/20...  Training Step: 3394...  Training loss: 1.1393...  0.1160 sec/batch
Epoch: 8/20...  Training Step: 3395...  Training loss: 1.1310...  0.1150 sec/batch
Epoch: 8/20...  Training Step: 3396...  Training loss: 1.0669...  0.1172 sec/batch
Epoch: 8/20...  Training Step: 3397...  Training loss: 1.3027...  0.1176 sec/batch
Epoch: 8/20...  Training Step: 3398...  Training loss: 1.3562...  0.1191 sec/batch
Epoch: 8/20...  Training Step: 3399...  Training loss: 1.2296...  0.1191 sec/batch
Epoch: 8/20...  Training Step: 3400...  Training loss: 1.1442...  0.1155 sec/batch
Epoch: 8/20...  Training Step: 3401...  Training loss: 1.4094...  0.1165 sec/batch
Epoch: 8/20...  Training Step: 3402...  Training loss: 1.2341...  0.1195 sec/batch
Epoch: 8/20...  Training Step: 3403...  Training loss: 1.1850...  0.1129 sec/batch
Epoch: 8/20...  Training Step: 3404...  Training loss: 1.2147...  0.1159 sec/batch
Epoch: 8/20...  Training Step: 3405...  Training loss: 1.1028...  0.1141 sec/batch
Epoch: 8/20...  Training Step: 3406...  Training loss: 1.2325...  0.1124 sec/batch
Epoch: 8/20...  Training Step: 3407...  Training loss: 1.0698...  0.1157 sec/batch
Epoch: 8/20...  Training Step: 3408...  Training loss: 1.1489...  0.1192 sec/batch
Epoch: 8/20...  Training Step: 3409...  Training loss: 1.3284...  0.1172 sec/batch
Epoch: 8/20...  Training Step: 3410...  Training loss: 1.1731...  0.1156 sec/batch
Epoch: 8/20...  Training Step: 3411...  Training loss: 1.4068...  0.1136 sec/batch
Epoch: 8/20...  Training Step: 3412...  Training loss: 1.1216...  0.1175 sec/batch
Epoch: 8/20...  Training Step: 3413...  Training loss: 1.3087...  0.1169 sec/batch
Epoch: 8/20...  Training Step: 3414...  Training loss: 1.1461...  0.1145 sec/batch
Epoch: 8/20...  Training Step: 3415...  Training loss: 1.1127...  0.1203 sec/batch
Epoch: 8/20...  Training Step: 3416...  Training loss: 1.2975...  0.1223 sec/batch
Epoch: 8/20...  Training Step: 3417...  Training loss: 1.1204...  0.1156 sec/batch
Epoch: 8/20...  Training Step: 3418...  Training loss: 1.3186...  0.1180 sec/batch
Epoch: 8/20...  Training Step: 3419...  Training loss: 1.2813...  0.1183 sec/batch
Epoch: 8/20...  Training Step: 3420...  Training loss: 1.4530...  0.1157 sec/batch
Epoch: 8/20...  Training Step: 3421...  Training loss: 1.1071...  0.1128 sec/batch
Epoch: 8/20...  Training Step: 3422...  Training loss: 1.1599...  0.1175 sec/batch
Epoch: 8/20...  Training Step: 3423...  Training loss: 1.3531...  0.1203 sec/batch
Epoch: 8/20...  Training Step: 3424...  Training loss: 1.0668...  0.1200 sec/batch
Epoch: 8/20...  Training Step: 3425...  Training loss: 1.0775...  0.1166 sec/batch
Epoch: 8/20...  Training Step: 3426...  Training loss: 1.2885...  0.1202 sec/batch
Epoch: 8/20...  Training Step: 3427...  Training loss: 1.0786...  0.1200 sec/batch
Epoch: 8/20...  Training Step: 3428...  Training loss: 1.2705...  0.1191 sec/batch
Epoch: 8/20...  Training Step: 3429...  Training loss: 1.0824...  0.1166 sec/batch
Epoch: 8/20...  Training Step: 3430...  Training loss: 1.3983...  0.1146 sec/batch
Epoch: 8/20...  Training Step: 3431...  Training loss: 1.2709...  0.1192 sec/batch
Epoch: 8/20...  Training Step: 3432...  Training loss: 1.1623...  0.1233 sec/batch
Epoch: 8/20...  Training Step: 3433...  Training loss: 1.3321...  0.1226 sec/batch
Epoch: 8/20...  Training Step: 3434...  Training loss: 1.2467...  0.1234 sec/batch
Epoch: 8/20...  Training Step: 3435...  Training loss: 1.3658...  0.1319 sec/batch
Epoch: 8/20...  Training Step: 3436...  Training loss: 0.9808...  0.1213 sec/batch
Epoch: 8/20...  Training Step: 3437...  Training loss: 1.2422...  0.1171 sec/batch
Epoch: 8/20...  Training Step: 3438...  Training loss: 1.1700...  0.1191 sec/batch
Epoch: 8/20...  Training Step: 3439...  Training loss: 1.1619...  0.1185 sec/batch
Epoch: 8/20...  Training Step: 3440...  Training loss: 1.3323...  0.1220 sec/batch
Epoch: 8/20...  Training Step: 3441...  Training loss: 1.2559...  0.1192 sec/batch
Epoch: 8/20...  Training Step: 3442...  Training loss: 1.2831...  0.1157 sec/batch
Epoch: 8/20...  Training Step: 3443...  Training loss: 1.2767...  0.1179 sec/batch
Epoch: 8/20...  Training Step: 3444...  Training loss: 1.2116...  0.1246 sec/batch
Epoch: 8/20...  Training Step: 3445...  Training loss: 1.0955...  0.1359 sec/batch
Epoch: 8/20...  Training Step: 3446...  Training loss: 1.2556...  0.1365 sec/batch
Epoch: 8/20...  Training Step: 3447...  Training loss: 0.9638...  0.1266 sec/batch
Epoch: 8/20...  Training Step: 3448...  Training loss: 1.2205...  0.1248 sec/batch
Epoch: 8/20...  Training Step: 3449...  Training loss: 1.1820...  0.1213 sec/batch
Epoch: 8/20...  Training Step: 3450...  Training loss: 1.2127...  0.1149 sec/batch
Epoch: 8/20...  Training Step: 3451...  Training loss: 1.1697...  0.1189 sec/batch
Epoch: 8/20...  Training Step: 3452...  Training loss: 1.2831...  0.1224 sec/batch
Epoch: 8/20...  Training Step: 3453...  Training loss: 1.1755...  0.1345 sec/batch
Epoch: 8/20...  Training Step: 3454...  Training loss: 1.0929...  0.1398 sec/batch
Epoch: 8/20...  Training Step: 3455...  Training loss: 1.1790...  0.1191 sec/batch
Epoch: 8/20...  Training Step: 3456...  Training loss: 1.2462...  0.1178 sec/batch
Epoch: 8/20...  Training Step: 3457...  Training loss: 1.2278...  0.1186 sec/batch
Epoch: 8/20...  Training Step: 3458...  Training loss: 1.0611...  0.1182 sec/batch
Epoch: 8/20...  Training Step: 3459...  Training loss: 1.0867...  0.1194 sec/batch
Epoch: 8/20...  Training Step: 3460...  Training loss: 1.2865...  0.1179 sec/batch
Epoch: 8/20...  Training Step: 3461...  Training loss: 1.3636...  0.1198 sec/batch
Epoch: 8/20...  Training Step: 3462...  Training loss: 1.1519...  0.1191 sec/batch
Epoch: 8/20...  Training Step: 3463...  Training loss: 1.3023...  0.1180 sec/batch
Epoch: 8/20...  Training Step: 3464...  Training loss: 1.2019...  0.1164 sec/batch
Epoch: 8/20...  Training Step: 3465...  Training loss: 1.2683...  0.1208 sec/batch
Epoch: 8/20...  Training Step: 3466...  Training loss: 1.1807...  0.1189 sec/batch
Epoch: 8/20...  Training Step: 3467...  Training loss: 1.4081...  0.1180 sec/batch
Epoch: 8/20...  Training Step: 3468...  Training loss: 1.1620...  0.1172 sec/batch
Epoch: 8/20...  Training Step: 3469...  Training loss: 1.1256...  0.1175 sec/batch
Epoch: 8/20...  Training Step: 3470...  Training loss: 1.5012...  0.1190 sec/batch
Epoch: 8/20...  Training Step: 3471...  Training loss: 1.3165...  0.1177 sec/batch
Epoch: 8/20...  Training Step: 3472...  Training loss: 1.5198...  0.1152 sec/batch
Epoch: 8/20...  Training Step: 3473...  Training loss: 1.2766...  0.1203 sec/batch
Epoch: 8/20...  Training Step: 3474...  Training loss: 1.3748...  0.1187 sec/batch
Epoch: 8/20...  Training Step: 3475...  Training loss: 1.4035...  0.1136 sec/batch
Epoch: 8/20...  Training Step: 3476...  Training loss: 1.1263...  0.1214 sec/batch
Epoch: 8/20...  Training Step: 3477...  Training loss: 1.2199...  0.1226 sec/batch
Epoch: 8/20...  Training Step: 3478...  Training loss: 1.1208...  0.1265 sec/batch
Epoch: 8/20...  Training Step: 3479...  Training loss: 1.2814...  0.1194 sec/batch
Epoch: 8/20...  Training Step: 3480...  Training loss: 1.1796...  0.1273 sec/batch
Epoch: 8/20...  Training Step: 3481...  Training loss: 1.4161...  0.1184 sec/batch
Epoch: 8/20...  Training Step: 3482...  Training loss: 1.1744...  0.1181 sec/batch
Epoch: 8/20...  Training Step: 3483...  Training loss: 1.4632...  0.1153 sec/batch
Epoch: 8/20...  Training Step: 3484...  Training loss: 1.2591...  0.1173 sec/batch
Epoch: 8/20...  Training Step: 3485...  Training loss: 1.4350...  0.1200 sec/batch
Epoch: 8/20...  Training Step: 3486...  Training loss: 0.9963...  0.1181 sec/batch
Epoch: 8/20...  Training Step: 3487...  Training loss: 1.2543...  0.1195 sec/batch
Epoch: 8/20...  Training Step: 3488...  Training loss: 1.2731...  0.1210 sec/batch
Epoch: 8/20...  Training Step: 3489...  Training loss: 1.2142...  0.1184 sec/batch
Epoch: 8/20...  Training Step: 3490...  Training loss: 1.1756...  0.1190 sec/batch
Epoch: 8/20...  Training Step: 3491...  Training loss: 1.4179...  0.1163 sec/batch
Epoch: 8/20...  Training Step: 3492...  Training loss: 1.2849...  0.1125 sec/batch
Epoch: 8/20...  Training Step: 3493...  Training loss: 1.2010...  0.1098 sec/batch
Epoch: 8/20...  Training Step: 3494...  Training loss: 1.1039...  0.1154 sec/batch
Epoch: 8/20...  Training Step: 3495...  Training loss: 1.1747...  0.1117 sec/batch
Epoch: 8/20...  Training Step: 3496...  Training loss: 1.3154...  0.1163 sec/batch
Epoch: 8/20...  Training Step: 3497...  Training loss: 1.1856...  0.1119 sec/batch
Epoch: 8/20...  Training Step: 3498...  Training loss: 1.2362...  0.1163 sec/batch
Epoch: 8/20...  Training Step: 3499...  Training loss: 1.5139...  0.1151 sec/batch
Epoch: 8/20...  Training Step: 3500...  Training loss: 1.1847...  0.1177 sec/batch
Epoch: 8/20...  Training Step: 3501...  Training loss: 1.1698...  0.1172 sec/batch
Epoch: 8/20...  Training Step: 3502...  Training loss: 1.1883...  0.1144 sec/batch
Epoch: 8/20...  Training Step: 3503...  Training loss: 1.2426...  0.1173 sec/batch
Epoch: 8/20...  Training Step: 3504...  Training loss: 1.3160...  0.1121 sec/batch
Epoch: 8/20...  Training Step: 3505...  Training loss: 1.4383...  0.1169 sec/batch
Epoch: 8/20...  Training Step: 3506...  Training loss: 1.0693...  0.1270 sec/batch
Epoch: 8/20...  Training Step: 3507...  Training loss: 1.0964...  0.1257 sec/batch
Epoch: 8/20...  Training Step: 3508...  Training loss: 1.1918...  0.1175 sec/batch
Epoch: 8/20...  Training Step: 3509...  Training loss: 1.2756...  0.1229 sec/batch
Epoch: 8/20...  Training Step: 3510...  Training loss: 1.3913...  0.1305 sec/batch
Epoch: 8/20...  Training Step: 3511...  Training loss: 1.1669...  0.1171 sec/batch
Epoch: 8/20...  Training Step: 3512...  Training loss: 1.2816...  0.1140 sec/batch
Epoch: 8/20...  Training Step: 3513...  Training loss: 1.3139...  0.1111 sec/batch
Epoch: 8/20...  Training Step: 3514...  Training loss: 1.2217...  0.1222 sec/batch
Epoch: 8/20...  Training Step: 3515...  Training loss: 1.5079...  0.1138 sec/batch
Epoch: 8/20...  Training Step: 3516...  Training loss: 1.3340...  0.1185 sec/batch
Epoch: 8/20...  Training Step: 3517...  Training loss: 1.3063...  0.1154 sec/batch
Epoch: 8/20...  Training Step: 3518...  Training loss: 1.3483...  0.1195 sec/batch
Epoch: 8/20...  Training Step: 3519...  Training loss: 1.2842...  0.1171 sec/batch
Epoch: 8/20...  Training Step: 3520...  Training loss: 1.4193...  0.1161 sec/batch
Epoch: 8/20...  Training Step: 3521...  Training loss: 1.3003...  0.1192 sec/batch
Epoch: 8/20...  Training Step: 3522...  Training loss: 1.2223...  0.1152 sec/batch
Epoch: 8/20...  Training Step: 3523...  Training loss: 1.3015...  0.1175 sec/batch
Epoch: 8/20...  Training Step: 3524...  Training loss: 1.1168...  0.1178 sec/batch
Epoch: 8/20...  Training Step: 3525...  Training loss: 1.0948...  0.1165 sec/batch
Epoch: 8/20...  Training Step: 3526...  Training loss: 1.3710...  0.1122 sec/batch
Epoch: 8/20...  Training Step: 3527...  Training loss: 1.2253...  0.1171 sec/batch
Epoch: 8/20...  Training Step: 3528...  Training loss: 1.2259...  0.1178 sec/batch
Epoch: 8/20...  Training Step: 3529...  Training loss: 1.2264...  0.1164 sec/batch
Epoch: 8/20...  Training Step: 3530...  Training loss: 1.1534...  0.1144 sec/batch
Epoch: 8/20...  Training Step: 3531...  Training loss: 1.1764...  0.1188 sec/batch
Epoch: 8/20...  Training Step: 3532...  Training loss: 1.2178...  0.1173 sec/batch
Epoch: 8/20...  Training Step: 3533...  Training loss: 1.0923...  0.1189 sec/batch
Epoch: 8/20...  Training Step: 3534...  Training loss: 1.2170...  0.1181 sec/batch
Epoch: 8/20...  Training Step: 3535...  Training loss: 1.2378...  0.1183 sec/batch
Epoch: 8/20...  Training Step: 3536...  Training loss: 1.2721...  0.1173 sec/batch
Epoch: 8/20...  Training Step: 3537...  Training loss: 1.2870...  0.1180 sec/batch
Epoch: 8/20...  Training Step: 3538...  Training loss: 1.2874...  0.1183 sec/batch
Epoch: 8/20...  Training Step: 3539...  Training loss: 1.1355...  0.1198 sec/batch
Epoch: 8/20...  Training Step: 3540...  Training loss: 1.1787...  0.1180 sec/batch
Epoch: 8/20...  Training Step: 3541...  Training loss: 1.1525...  0.1142 sec/batch
Epoch: 8/20...  Training Step: 3542...  Training loss: 1.1899...  0.1112 sec/batch
Epoch: 8/20...  Training Step: 3543...  Training loss: 1.2645...  0.1140 sec/batch
Epoch: 8/20...  Training Step: 3544...  Training loss: 1.4336...  0.1206 sec/batch
Epoch: 8/20...  Training Step: 3545...  Training loss: 1.1304...  0.1188 sec/batch
Epoch: 8/20...  Training Step: 3546...  Training loss: 1.1376...  0.1143 sec/batch
Epoch: 8/20...  Training Step: 3547...  Training loss: 1.2392...  0.1174 sec/batch
Epoch: 8/20...  Training Step: 3548...  Training loss: 1.1892...  0.1174 sec/batch
Epoch: 8/20...  Training Step: 3549...  Training loss: 1.2131...  0.1177 sec/batch
Epoch: 8/20...  Training Step: 3550...  Training loss: 1.1614...  0.1202 sec/batch
Epoch: 8/20...  Training Step: 3551...  Training loss: 1.0127...  0.1153 sec/batch
Epoch: 8/20...  Training Step: 3552...  Training loss: 1.3428...  0.1146 sec/batch
Epoch: 8/20...  Training Step: 3553...  Training loss: 1.1150...  0.1158 sec/batch
Epoch: 8/20...  Training Step: 3554...  Training loss: 1.2468...  0.1163 sec/batch
Epoch: 8/20...  Training Step: 3555...  Training loss: 1.2675...  0.1165 sec/batch
Epoch: 8/20...  Training Step: 3556...  Training loss: 1.5856...  0.1165 sec/batch
Epoch: 8/20...  Training Step: 3557...  Training loss: 1.4071...  0.1190 sec/batch
Epoch: 8/20...  Training Step: 3558...  Training loss: 1.3550...  0.1233 sec/batch
Epoch: 8/20...  Training Step: 3559...  Training loss: 1.2834...  0.1153 sec/batch
Epoch: 8/20...  Training Step: 3560...  Training loss: 1.1250...  0.1198 sec/batch
Epoch: 8/20...  Training Step: 3561...  Training loss: 1.2427...  0.1160 sec/batch
Epoch: 8/20...  Training Step: 3562...  Training loss: 1.2434...  0.1154 sec/batch
Epoch: 8/20...  Training Step: 3563...  Training loss: 0.9807...  0.1163 sec/batch
Epoch: 8/20...  Training Step: 3564...  Training loss: 1.0360...  0.1152 sec/batch
Epoch: 8/20...  Training Step: 3565...  Training loss: 1.1987...  0.1189 sec/batch
Epoch: 8/20...  Training Step: 3566...  Training loss: 1.1793...  0.1219 sec/batch
Epoch: 8/20...  Training Step: 3567...  Training loss: 1.1244...  0.1164 sec/batch
Epoch: 8/20...  Training Step: 3568...  Training loss: 1.1163...  0.1177 sec/batch
Epoch: 8/20...  Training Step: 3569...  Training loss: 1.1717...  0.1178 sec/batch
Epoch: 8/20...  Training Step: 3570...  Training loss: 1.3050...  0.1201 sec/batch
Epoch: 8/20...  Training Step: 3571...  Training loss: 1.0987...  0.1217 sec/batch
Epoch: 8/20...  Training Step: 3572...  Training loss: 1.1090...  0.1172 sec/batch
Epoch: 8/20...  Training Step: 3573...  Training loss: 1.0227...  0.1201 sec/batch
Epoch: 8/20...  Training Step: 3574...  Training loss: 1.0322...  0.1216 sec/batch
Epoch: 8/20...  Training Step: 3575...  Training loss: 1.2717...  0.1207 sec/batch
Epoch: 8/20...  Training Step: 3576...  Training loss: 1.1401...  0.1138 sec/batch
Epoch: 8/20...  Training Step: 3577...  Training loss: 1.3145...  0.1148 sec/batch
Epoch: 8/20...  Training Step: 3578...  Training loss: 1.0755...  0.1157 sec/batch
Epoch: 8/20...  Training Step: 3579...  Training loss: 1.2717...  0.1190 sec/batch
Epoch: 8/20...  Training Step: 3580...  Training loss: 1.2385...  0.1175 sec/batch
Epoch: 8/20...  Training Step: 3581...  Training loss: 1.0678...  0.1204 sec/batch
Epoch: 8/20...  Training Step: 3582...  Training loss: 1.1512...  0.1179 sec/batch
Epoch: 8/20...  Training Step: 3583...  Training loss: 1.3370...  0.1214 sec/batch
Epoch: 8/20...  Training Step: 3584...  Training loss: 1.1858...  0.1171 sec/batch
Epoch: 8/20...  Training Step: 3585...  Training loss: 1.2930...  0.1167 sec/batch
Epoch: 8/20...  Training Step: 3586...  Training loss: 1.0456...  0.1160 sec/batch
Epoch: 8/20...  Training Step: 3587...  Training loss: 1.2738...  0.1191 sec/batch
Epoch: 8/20...  Training Step: 3588...  Training loss: 1.0772...  0.1214 sec/batch
Epoch: 8/20...  Training Step: 3589...  Training loss: 1.1081...  0.1137 sec/batch
Epoch: 8/20...  Training Step: 3590...  Training loss: 1.2896...  0.1168 sec/batch
Epoch: 8/20...  Training Step: 3591...  Training loss: 1.0592...  0.1175 sec/batch
Epoch: 8/20...  Training Step: 3592...  Training loss: 1.3552...  0.1204 sec/batch
Epoch: 8/20...  Training Step: 3593...  Training loss: 1.1708...  0.1133 sec/batch
Epoch: 8/20...  Training Step: 3594...  Training loss: 0.9883...  0.1209 sec/batch
Epoch: 8/20...  Training Step: 3595...  Training loss: 1.0935...  0.1181 sec/batch
Epoch: 8/20...  Training Step: 3596...  Training loss: 1.6098...  0.1176 sec/batch
Epoch: 8/20...  Training Step: 3597...  Training loss: 1.1201...  0.1186 sec/batch
Epoch: 8/20...  Training Step: 3598...  Training loss: 1.2033...  0.1153 sec/batch
Epoch: 8/20...  Training Step: 3599...  Training loss: 1.2012...  0.1186 sec/batch
Epoch: 8/20...  Training Step: 3600...  Training loss: 1.0117...  0.1168 sec/batch
Epoch: 8/20...  Training Step: 3601...  Training loss: 0.9879...  0.1175 sec/batch
Epoch: 8/20...  Training Step: 3602...  Training loss: 0.9650...  0.1158 sec/batch
Epoch: 8/20...  Training Step: 3603...  Training loss: 1.2634...  0.1162 sec/batch
Epoch: 8/20...  Training Step: 3604...  Training loss: 1.0917...  0.1174 sec/batch
Epoch: 8/20...  Training Step: 3605...  Training loss: 1.0822...  0.1157 sec/batch
Epoch: 8/20...  Training Step: 3606...  Training loss: 1.1990...  0.1204 sec/batch
Epoch: 8/20...  Training Step: 3607...  Training loss: 1.2827...  0.1208 sec/batch
Epoch: 8/20...  Training Step: 3608...  Training loss: 0.9170...  0.1204 sec/batch
Epoch: 8/20...  Training Step: 3609...  Training loss: 1.3381...  0.1163 sec/batch
Epoch: 8/20...  Training Step: 3610...  Training loss: 1.2600...  0.1138 sec/batch
Epoch: 8/20...  Training Step: 3611...  Training loss: 1.1378...  0.1143 sec/batch
Epoch: 8/20...  Training Step: 3612...  Training loss: 1.0712...  0.1182 sec/batch
Epoch: 8/20...  Training Step: 3613...  Training loss: 1.0983...  0.1206 sec/batch
Epoch: 8/20...  Training Step: 3614...  Training loss: 1.2849...  0.1175 sec/batch
Epoch: 8/20...  Training Step: 3615...  Training loss: 1.1254...  0.1195 sec/batch
Epoch: 8/20...  Training Step: 3616...  Training loss: 1.4476...  0.1210 sec/batch
Epoch: 8/20...  Training Step: 3617...  Training loss: 1.2753...  0.1178 sec/batch
Epoch: 8/20...  Training Step: 3618...  Training loss: 1.1892...  0.1174 sec/batch
Epoch: 8/20...  Training Step: 3619...  Training loss: 1.0789...  0.1158 sec/batch
Epoch: 8/20...  Training Step: 3620...  Training loss: 1.3881...  0.1132 sec/batch
Epoch: 8/20...  Training Step: 3621...  Training loss: 1.1967...  0.1170 sec/batch
Epoch: 8/20...  Training Step: 3622...  Training loss: 1.2696...  0.1197 sec/batch
Epoch: 8/20...  Training Step: 3623...  Training loss: 1.2152...  0.1213 sec/batch
Epoch: 8/20...  Training Step: 3624...  Training loss: 1.3095...  0.1165 sec/batch
Epoch: 8/20...  Training Step: 3625...  Training loss: 1.3215...  0.1202 sec/batch
Epoch: 8/20...  Training Step: 3626...  Training loss: 1.3250...  0.1208 sec/batch
Epoch: 8/20...  Training Step: 3627...  Training loss: 1.1718...  0.1196 sec/batch
Epoch: 8/20...  Training Step: 3628...  Training loss: 1.2460...  0.1194 sec/batch
Epoch: 8/20...  Training Step: 3629...  Training loss: 1.1353...  0.1238 sec/batch
Epoch: 8/20...  Training Step: 3630...  Training loss: 1.1156...  0.1199 sec/batch
Epoch: 8/20...  Training Step: 3631...  Training loss: 1.0353...  0.1179 sec/batch
Epoch: 8/20...  Training Step: 3632...  Training loss: 1.1766...  0.1181 sec/batch
Epoch: 8/20...  Training Step: 3633...  Training loss: 1.1169...  0.1146 sec/batch
Epoch: 8/20...  Training Step: 3634...  Training loss: 1.3121...  0.1113 sec/batch
Epoch: 8/20...  Training Step: 3635...  Training loss: 1.1549...  0.1145 sec/batch
Epoch: 8/20...  Training Step: 3636...  Training loss: 1.0469...  0.1205 sec/batch
Epoch: 8/20...  Training Step: 3637...  Training loss: 1.0398...  0.1293 sec/batch
Epoch: 8/20...  Training Step: 3638...  Training loss: 1.1428...  0.1231 sec/batch
Epoch: 8/20...  Training Step: 3639...  Training loss: 1.0695...  0.1177 sec/batch
Epoch: 8/20...  Training Step: 3640...  Training loss: 1.1162...  0.1140 sec/batch
Epoch: 8/20...  Training Step: 3641...  Training loss: 1.1384...  0.1146 sec/batch
Epoch: 8/20...  Training Step: 3642...  Training loss: 1.1871...  0.1163 sec/batch
Epoch: 8/20...  Training Step: 3643...  Training loss: 1.0643...  0.1192 sec/batch
Epoch: 8/20...  Training Step: 3644...  Training loss: 1.3138...  0.1176 sec/batch
Epoch: 8/20...  Training Step: 3645...  Training loss: 1.1776...  0.1201 sec/batch
Epoch: 8/20...  Training Step: 3646...  Training loss: 1.1316...  0.1171 sec/batch
Epoch: 8/20...  Training Step: 3647...  Training loss: 1.1926...  0.1089 sec/batch
Epoch: 8/20...  Training Step: 3648...  Training loss: 1.1154...  0.1156 sec/batch
Epoch: 8/20...  Training Step: 3649...  Training loss: 1.2040...  0.1131 sec/batch
Epoch: 8/20...  Training Step: 3650...  Training loss: 1.0210...  0.1150 sec/batch
Epoch: 8/20...  Training Step: 3651...  Training loss: 1.0906...  0.1159 sec/batch
Epoch: 8/20...  Training Step: 3652...  Training loss: 1.3195...  0.1149 sec/batch
Epoch: 8/20...  Training Step: 3653...  Training loss: 1.1210...  0.1115 sec/batch
Epoch: 8/20...  Training Step: 3654...  Training loss: 1.5398...  0.1153 sec/batch
Epoch: 8/20...  Training Step: 3655...  Training loss: 1.1332...  0.1177 sec/batch
Epoch: 8/20...  Training Step: 3656...  Training loss: 1.5540...  0.1148 sec/batch
Epoch: 8/20...  Training Step: 3657...  Training loss: 1.2730...  0.1165 sec/batch
Epoch: 8/20...  Training Step: 3658...  Training loss: 1.1283...  0.1135 sec/batch
Epoch: 8/20...  Training Step: 3659...  Training loss: 1.0646...  0.1168 sec/batch
Epoch: 8/20...  Training Step: 3660...  Training loss: 1.1104...  0.1187 sec/batch
Epoch: 8/20...  Training Step: 3661...  Training loss: 1.2510...  0.1231 sec/batch
Epoch: 8/20...  Training Step: 3662...  Training loss: 1.2064...  0.1154 sec/batch
Epoch: 8/20...  Training Step: 3663...  Training loss: 1.3209...  0.1128 sec/batch
Epoch: 8/20...  Training Step: 3664...  Training loss: 1.3347...  0.1207 sec/batch
Epoch: 8/20...  Training Step: 3665...  Training loss: 1.3407...  0.1186 sec/batch
Epoch: 8/20...  Training Step: 3666...  Training loss: 1.0755...  0.1167 sec/batch
Epoch: 8/20...  Training Step: 3667...  Training loss: 1.2830...  0.1142 sec/batch
Epoch: 8/20...  Training Step: 3668...  Training loss: 1.0141...  0.1163 sec/batch
Epoch: 8/20...  Training Step: 3669...  Training loss: 1.2766...  0.1169 sec/batch
Epoch: 8/20...  Training Step: 3670...  Training loss: 1.3100...  0.1188 sec/batch
Epoch: 8/20...  Training Step: 3671...  Training loss: 1.3143...  0.1161 sec/batch
Epoch: 8/20...  Training Step: 3672...  Training loss: 1.4252...  0.1194 sec/batch
Epoch: 8/20...  Training Step: 3673...  Training loss: 1.2260...  0.1214 sec/batch
Epoch: 8/20...  Training Step: 3674...  Training loss: 1.2051...  0.1179 sec/batch
Epoch: 8/20...  Training Step: 3675...  Training loss: 1.1938...  0.1156 sec/batch
Epoch: 8/20...  Training Step: 3676...  Training loss: 1.2538...  0.1196 sec/batch
Epoch: 8/20...  Training Step: 3677...  Training loss: 1.0626...  0.1137 sec/batch
Epoch: 8/20...  Training Step: 3678...  Training loss: 1.2141...  0.1169 sec/batch
Epoch: 8/20...  Training Step: 3679...  Training loss: 1.3506...  0.1172 sec/batch
Epoch: 8/20...  Training Step: 3680...  Training loss: 1.3112...  0.1135 sec/batch
Epoch: 8/20...  Training Step: 3681...  Training loss: 1.4658...  0.1160 sec/batch
Epoch: 8/20...  Training Step: 3682...  Training loss: 1.2654...  0.1189 sec/batch
Epoch: 8/20...  Training Step: 3683...  Training loss: 1.0896...  0.1183 sec/batch
Epoch: 8/20...  Training Step: 3684...  Training loss: 1.2297...  0.1165 sec/batch
Epoch: 8/20...  Training Step: 3685...  Training loss: 1.2225...  0.1174 sec/batch
Epoch: 8/20...  Training Step: 3686...  Training loss: 1.2966...  0.1227 sec/batch
Epoch: 8/20...  Training Step: 3687...  Training loss: 1.3977...  0.1146 sec/batch
Epoch: 8/20...  Training Step: 3688...  Training loss: 1.4859...  0.1101 sec/batch
Epoch: 8/20...  Training Step: 3689...  Training loss: 1.0861...  0.1148 sec/batch
Epoch: 8/20...  Training Step: 3690...  Training loss: 1.1856...  0.1181 sec/batch
Epoch: 8/20...  Training Step: 3691...  Training loss: 1.2494...  0.1175 sec/batch
Epoch: 8/20...  Training Step: 3692...  Training loss: 1.2289...  0.1177 sec/batch
Epoch: 8/20...  Training Step: 3693...  Training loss: 1.2114...  0.1172 sec/batch
Epoch: 8/20...  Training Step: 3694...  Training loss: 1.1489...  0.1221 sec/batch
Epoch: 8/20...  Training Step: 3695...  Training loss: 1.3059...  0.1174 sec/batch
Epoch: 8/20...  Training Step: 3696...  Training loss: 1.1364...  0.1172 sec/batch
Epoch: 8/20...  Training Step: 3697...  Training loss: 1.2428...  0.1161 sec/batch
Epoch: 8/20...  Training Step: 3698...  Training loss: 1.2286...  0.1239 sec/batch
Epoch: 8/20...  Training Step: 3699...  Training loss: 1.1761...  0.1168 sec/batch
Epoch: 8/20...  Training Step: 3700...  Training loss: 1.2923...  0.1169 sec/batch
Epoch: 8/20...  Training Step: 3701...  Training loss: 1.0435...  0.1196 sec/batch
Epoch: 8/20...  Training Step: 3702...  Training loss: 1.4672...  0.1187 sec/batch
Epoch: 8/20...  Training Step: 3703...  Training loss: 1.2033...  0.1142 sec/batch
Epoch: 8/20...  Training Step: 3704...  Training loss: 1.0732...  0.1128 sec/batch
Epoch: 8/20...  Training Step: 3705...  Training loss: 1.1276...  0.1180 sec/batch
Epoch: 8/20...  Training Step: 3706...  Training loss: 1.0397...  0.1153 sec/batch
Epoch: 8/20...  Training Step: 3707...  Training loss: 1.0802...  0.1177 sec/batch
Epoch: 8/20...  Training Step: 3708...  Training loss: 1.2413...  0.1199 sec/batch
Epoch: 8/20...  Training Step: 3709...  Training loss: 1.2985...  0.1155 sec/batch
Epoch: 8/20...  Training Step: 3710...  Training loss: 1.1363...  0.1168 sec/batch
Epoch: 8/20...  Training Step: 3711...  Training loss: 1.2168...  0.1142 sec/batch
Epoch: 8/20...  Training Step: 3712...  Training loss: 1.1327...  0.1150 sec/batch
Epoch: 9/20...  Training Step: 3713...  Training loss: 1.5320...  0.1194 sec/batch
Epoch: 9/20...  Training Step: 3714...  Training loss: 1.2747...  0.1192 sec/batch
Epoch: 9/20...  Training Step: 3715...  Training loss: 1.1972...  0.1171 sec/batch
Epoch: 9/20...  Training Step: 3716...  Training loss: 1.2015...  0.1151 sec/batch
Epoch: 9/20...  Training Step: 3717...  Training loss: 1.2670...  0.1165 sec/batch
Epoch: 9/20...  Training Step: 3718...  Training loss: 1.0613...  0.1185 sec/batch
Epoch: 9/20...  Training Step: 3719...  Training loss: 1.4628...  0.1173 sec/batch
Epoch: 9/20...  Training Step: 3720...  Training loss: 1.1439...  0.1180 sec/batch
Epoch: 9/20...  Training Step: 3721...  Training loss: 1.0168...  0.1175 sec/batch
Epoch: 9/20...  Training Step: 3722...  Training loss: 1.2820...  0.1141 sec/batch
Epoch: 9/20...  Training Step: 3723...  Training loss: 1.1704...  0.1134 sec/batch
Epoch: 9/20...  Training Step: 3724...  Training loss: 1.0800...  0.1145 sec/batch
Epoch: 9/20...  Training Step: 3725...  Training loss: 1.3321...  0.1200 sec/batch
Epoch: 9/20...  Training Step: 3726...  Training loss: 0.9963...  0.1175 sec/batch
Epoch: 9/20...  Training Step: 3727...  Training loss: 1.1766...  0.1175 sec/batch
Epoch: 9/20...  Training Step: 3728...  Training loss: 1.2556...  0.1185 sec/batch
Epoch: 9/20...  Training Step: 3729...  Training loss: 1.1061...  0.1148 sec/batch
Epoch: 9/20...  Training Step: 3730...  Training loss: 1.1364...  0.1182 sec/batch
Epoch: 9/20...  Training Step: 3731...  Training loss: 1.2040...  0.1182 sec/batch
Epoch: 9/20...  Training Step: 3732...  Training loss: 1.1218...  0.1174 sec/batch
Epoch: 9/20...  Training Step: 3733...  Training loss: 1.2983...  0.1210 sec/batch
Epoch: 9/20...  Training Step: 3734...  Training loss: 1.0552...  0.1157 sec/batch
Epoch: 9/20...  Training Step: 3735...  Training loss: 1.2856...  0.1163 sec/batch
Epoch: 9/20...  Training Step: 3736...  Training loss: 1.1001...  0.1184 sec/batch
Epoch: 9/20...  Training Step: 3737...  Training loss: 1.1027...  0.1204 sec/batch
Epoch: 9/20...  Training Step: 3738...  Training loss: 1.0988...  0.1155 sec/batch
Epoch: 9/20...  Training Step: 3739...  Training loss: 1.3067...  0.1169 sec/batch
Epoch: 9/20...  Training Step: 3740...  Training loss: 1.0097...  0.1214 sec/batch
Epoch: 9/20...  Training Step: 3741...  Training loss: 1.0685...  0.1160 sec/batch
Epoch: 9/20...  Training Step: 3742...  Training loss: 1.1552...  0.1122 sec/batch
Epoch: 9/20...  Training Step: 3743...  Training loss: 1.0153...  0.1182 sec/batch
Epoch: 9/20...  Training Step: 3744...  Training loss: 1.1073...  0.1149 sec/batch
Epoch: 9/20...  Training Step: 3745...  Training loss: 1.0013...  0.1156 sec/batch
Epoch: 9/20...  Training Step: 3746...  Training loss: 0.9218...  0.1199 sec/batch
Epoch: 9/20...  Training Step: 3747...  Training loss: 1.0162...  0.1220 sec/batch
Epoch: 9/20...  Training Step: 3748...  Training loss: 0.9840...  0.1172 sec/batch
Epoch: 9/20...  Training Step: 3749...  Training loss: 1.1616...  0.1185 sec/batch
Epoch: 9/20...  Training Step: 3750...  Training loss: 1.0463...  0.1174 sec/batch
Epoch: 9/20...  Training Step: 3751...  Training loss: 0.9711...  0.1265 sec/batch
Epoch: 9/20...  Training Step: 3752...  Training loss: 1.4550...  0.1207 sec/batch
Epoch: 9/20...  Training Step: 3753...  Training loss: 1.1485...  0.1209 sec/batch
Epoch: 9/20...  Training Step: 3754...  Training loss: 1.0232...  0.1188 sec/batch
Epoch: 9/20...  Training Step: 3755...  Training loss: 1.3335...  0.1177 sec/batch
Epoch: 9/20...  Training Step: 3756...  Training loss: 0.9102...  0.1187 sec/batch
Epoch: 9/20...  Training Step: 3757...  Training loss: 1.1285...  0.1183 sec/batch
Epoch: 9/20...  Training Step: 3758...  Training loss: 1.1115...  0.1141 sec/batch
Epoch: 9/20...  Training Step: 3759...  Training loss: 1.2281...  0.1206 sec/batch
Epoch: 9/20...  Training Step: 3760...  Training loss: 1.0665...  0.1166 sec/batch
Epoch: 9/20...  Training Step: 3761...  Training loss: 1.0426...  0.1190 sec/batch
Epoch: 9/20...  Training Step: 3762...  Training loss: 1.1176...  0.1146 sec/batch
Epoch: 9/20...  Training Step: 3763...  Training loss: 1.1277...  0.1133 sec/batch
Epoch: 9/20...  Training Step: 3764...  Training loss: 1.1510...  0.1164 sec/batch
Epoch: 9/20...  Training Step: 3765...  Training loss: 1.1701...  0.1211 sec/batch
Epoch: 9/20...  Training Step: 3766...  Training loss: 1.2572...  0.1157 sec/batch
Epoch: 9/20...  Training Step: 3767...  Training loss: 1.0497...  0.1177 sec/batch
Epoch: 9/20...  Training Step: 3768...  Training loss: 1.1780...  0.1191 sec/batch
Epoch: 9/20...  Training Step: 3769...  Training loss: 1.1573...  0.1236 sec/batch
Epoch: 9/20...  Training Step: 3770...  Training loss: 1.2053...  0.1181 sec/batch
Epoch: 9/20...  Training Step: 3771...  Training loss: 1.0686...  0.1172 sec/batch
Epoch: 9/20...  Training Step: 3772...  Training loss: 1.0564...  0.1165 sec/batch
Epoch: 9/20...  Training Step: 3773...  Training loss: 1.1102...  0.1228 sec/batch
Epoch: 9/20...  Training Step: 3774...  Training loss: 1.2381...  0.1123 sec/batch
Epoch: 9/20...  Training Step: 3775...  Training loss: 1.1156...  0.1145 sec/batch
Epoch: 9/20...  Training Step: 3776...  Training loss: 1.2140...  0.1143 sec/batch
Epoch: 9/20...  Training Step: 3777...  Training loss: 1.0763...  0.1214 sec/batch
Epoch: 9/20...  Training Step: 3778...  Training loss: 1.2878...  0.1166 sec/batch
Epoch: 9/20...  Training Step: 3779...  Training loss: 1.1538...  0.1178 sec/batch
Epoch: 9/20...  Training Step: 3780...  Training loss: 1.1438...  0.1218 sec/batch
Epoch: 9/20...  Training Step: 3781...  Training loss: 1.1072...  0.1203 sec/batch
Epoch: 9/20...  Training Step: 3782...  Training loss: 1.2298...  0.1172 sec/batch
Epoch: 9/20...  Training Step: 3783...  Training loss: 1.2939...  0.1150 sec/batch
Epoch: 9/20...  Training Step: 3784...  Training loss: 1.1040...  0.1198 sec/batch
Epoch: 9/20...  Training Step: 3785...  Training loss: 1.2440...  0.1222 sec/batch
Epoch: 9/20...  Training Step: 3786...  Training loss: 1.0282...  0.1161 sec/batch
Epoch: 9/20...  Training Step: 3787...  Training loss: 1.3070...  0.1165 sec/batch
Epoch: 9/20...  Training Step: 3788...  Training loss: 1.0301...  0.1167 sec/batch
Epoch: 9/20...  Training Step: 3789...  Training loss: 1.0376...  0.1187 sec/batch
Epoch: 9/20...  Training Step: 3790...  Training loss: 1.1478...  0.1178 sec/batch
Epoch: 9/20...  Training Step: 3791...  Training loss: 1.1900...  0.1158 sec/batch
Epoch: 9/20...  Training Step: 3792...  Training loss: 1.0514...  0.1167 sec/batch
Epoch: 9/20...  Training Step: 3793...  Training loss: 1.3060...  0.1198 sec/batch
Epoch: 9/20...  Training Step: 3794...  Training loss: 1.1302...  0.1146 sec/batch
Epoch: 9/20...  Training Step: 3795...  Training loss: 0.9719...  0.1194 sec/batch
Epoch: 9/20...  Training Step: 3796...  Training loss: 1.1993...  0.1189 sec/batch
Epoch: 9/20...  Training Step: 3797...  Training loss: 1.1943...  0.1199 sec/batch
Epoch: 9/20...  Training Step: 3798...  Training loss: 1.3317...  0.1213 sec/batch
Epoch: 9/20...  Training Step: 3799...  Training loss: 1.0516...  0.1131 sec/batch
Epoch: 9/20...  Training Step: 3800...  Training loss: 1.2661...  0.1191 sec/batch
Epoch: 9/20...  Training Step: 3801...  Training loss: 1.2984...  0.1341 sec/batch
Epoch: 9/20...  Training Step: 3802...  Training loss: 1.0981...  0.1316 sec/batch
Epoch: 9/20...  Training Step: 3803...  Training loss: 1.2408...  0.1398 sec/batch
Epoch: 9/20...  Training Step: 3804...  Training loss: 1.3233...  0.1239 sec/batch
Epoch: 9/20...  Training Step: 3805...  Training loss: 0.9964...  0.1304 sec/batch
Epoch: 9/20...  Training Step: 3806...  Training loss: 1.3029...  0.1260 sec/batch
Epoch: 9/20...  Training Step: 3807...  Training loss: 1.1359...  0.1312 sec/batch
Epoch: 9/20...  Training Step: 3808...  Training loss: 1.1413...  0.1275 sec/batch
Epoch: 9/20...  Training Step: 3809...  Training loss: 1.3998...  0.1333 sec/batch
Epoch: 9/20...  Training Step: 3810...  Training loss: 1.3632...  0.1234 sec/batch
Epoch: 9/20...  Training Step: 3811...  Training loss: 1.1958...  0.1310 sec/batch
Epoch: 9/20...  Training Step: 3812...  Training loss: 1.1398...  0.1314 sec/batch
Epoch: 9/20...  Training Step: 3813...  Training loss: 1.2688...  0.1347 sec/batch
Epoch: 9/20...  Training Step: 3814...  Training loss: 1.3363...  0.1176 sec/batch
Epoch: 9/20...  Training Step: 3815...  Training loss: 1.4669...  0.1337 sec/batch
Epoch: 9/20...  Training Step: 3816...  Training loss: 1.1994...  0.1268 sec/batch
Epoch: 9/20...  Training Step: 3817...  Training loss: 1.3930...  0.1323 sec/batch
Epoch: 9/20...  Training Step: 3818...  Training loss: 1.3335...  0.1322 sec/batch
Epoch: 9/20...  Training Step: 3819...  Training loss: 1.2424...  0.1179 sec/batch
Epoch: 9/20...  Training Step: 3820...  Training loss: 1.2670...  0.1203 sec/batch
Epoch: 9/20...  Training Step: 3821...  Training loss: 1.2071...  0.1310 sec/batch
Epoch: 9/20...  Training Step: 3822...  Training loss: 1.0459...  0.1316 sec/batch
Epoch: 9/20...  Training Step: 3823...  Training loss: 1.1453...  0.1358 sec/batch
Epoch: 9/20...  Training Step: 3824...  Training loss: 1.0989...  0.1421 sec/batch
Epoch: 9/20...  Training Step: 3825...  Training loss: 1.2646...  0.1347 sec/batch
Epoch: 9/20...  Training Step: 3826...  Training loss: 1.3884...  0.1342 sec/batch
Epoch: 9/20...  Training Step: 3827...  Training loss: 1.1303...  0.1341 sec/batch
Epoch: 9/20...  Training Step: 3828...  Training loss: 1.1183...  0.1322 sec/batch
Epoch: 9/20...  Training Step: 3829...  Training loss: 1.1555...  0.1309 sec/batch
Epoch: 9/20...  Training Step: 3830...  Training loss: 1.1988...  0.1426 sec/batch
Epoch: 9/20...  Training Step: 3831...  Training loss: 1.2079...  0.1288 sec/batch
Epoch: 9/20...  Training Step: 3832...  Training loss: 1.0413...  0.1258 sec/batch
Epoch: 9/20...  Training Step: 3833...  Training loss: 1.2583...  0.1195 sec/batch
Epoch: 9/20...  Training Step: 3834...  Training loss: 1.2215...  0.1312 sec/batch
Epoch: 9/20...  Training Step: 3835...  Training loss: 1.2448...  0.1259 sec/batch
Epoch: 9/20...  Training Step: 3836...  Training loss: 1.1691...  0.1213 sec/batch
Epoch: 9/20...  Training Step: 3837...  Training loss: 1.1772...  0.1385 sec/batch
Epoch: 9/20...  Training Step: 3838...  Training loss: 1.0182...  0.1391 sec/batch
Epoch: 9/20...  Training Step: 3839...  Training loss: 1.1551...  0.1302 sec/batch
Epoch: 9/20...  Training Step: 3840...  Training loss: 1.3720...  0.1270 sec/batch
Epoch: 9/20...  Training Step: 3841...  Training loss: 1.2571...  0.1287 sec/batch
Epoch: 9/20...  Training Step: 3842...  Training loss: 1.1206...  0.1283 sec/batch
Epoch: 9/20...  Training Step: 3843...  Training loss: 1.3158...  0.1292 sec/batch
Epoch: 9/20...  Training Step: 3844...  Training loss: 1.1912...  0.1277 sec/batch
Epoch: 9/20...  Training Step: 3845...  Training loss: 1.1410...  0.1296 sec/batch
Epoch: 9/20...  Training Step: 3846...  Training loss: 1.4156...  0.1312 sec/batch
Epoch: 9/20...  Training Step: 3847...  Training loss: 1.0270...  0.1299 sec/batch
Epoch: 9/20...  Training Step: 3848...  Training loss: 0.9582...  0.1195 sec/batch
Epoch: 9/20...  Training Step: 3849...  Training loss: 1.0672...  0.1227 sec/batch
Epoch: 9/20...  Training Step: 3850...  Training loss: 1.1840...  0.1200 sec/batch
Epoch: 9/20...  Training Step: 3851...  Training loss: 1.1181...  0.1338 sec/batch
Epoch: 9/20...  Training Step: 3852...  Training loss: 1.2428...  0.1211 sec/batch
Epoch: 9/20...  Training Step: 3853...  Training loss: 1.0695...  0.1171 sec/batch
Epoch: 9/20...  Training Step: 3854...  Training loss: 1.0753...  0.1223 sec/batch
Epoch: 9/20...  Training Step: 3855...  Training loss: 1.0133...  0.1280 sec/batch
Epoch: 9/20...  Training Step: 3856...  Training loss: 1.2034...  0.1215 sec/batch
Epoch: 9/20...  Training Step: 3857...  Training loss: 1.1841...  0.1135 sec/batch
Epoch: 9/20...  Training Step: 3858...  Training loss: 1.1464...  0.1210 sec/batch
Epoch: 9/20...  Training Step: 3859...  Training loss: 1.1337...  0.1314 sec/batch
Epoch: 9/20...  Training Step: 3860...  Training loss: 1.0549...  0.1195 sec/batch
Epoch: 9/20...  Training Step: 3861...  Training loss: 1.0795...  0.1243 sec/batch
Epoch: 9/20...  Training Step: 3862...  Training loss: 1.4009...  0.1232 sec/batch
Epoch: 9/20...  Training Step: 3863...  Training loss: 1.1578...  0.1417 sec/batch
Epoch: 9/20...  Training Step: 3864...  Training loss: 1.2086...  0.1265 sec/batch
Epoch: 9/20...  Training Step: 3865...  Training loss: 1.2938...  0.1335 sec/batch
Epoch: 9/20...  Training Step: 3866...  Training loss: 1.1621...  0.1250 sec/batch
Epoch: 9/20...  Training Step: 3867...  Training loss: 1.2238...  0.1235 sec/batch
Epoch: 9/20...  Training Step: 3868...  Training loss: 1.1627...  0.1143 sec/batch
Epoch: 9/20...  Training Step: 3869...  Training loss: 1.0434...  0.1181 sec/batch
Epoch: 9/20...  Training Step: 3870...  Training loss: 1.2588...  0.1169 sec/batch
Epoch: 9/20...  Training Step: 3871...  Training loss: 0.9961...  0.1212 sec/batch
Epoch: 9/20...  Training Step: 3872...  Training loss: 1.0530...  0.1159 sec/batch
Epoch: 9/20...  Training Step: 3873...  Training loss: 1.3061...  0.1309 sec/batch
Epoch: 9/20...  Training Step: 3874...  Training loss: 1.1153...  0.1298 sec/batch
Epoch: 9/20...  Training Step: 3875...  Training loss: 1.3081...  0.1313 sec/batch
Epoch: 9/20...  Training Step: 3876...  Training loss: 1.0099...  0.1303 sec/batch
Epoch: 9/20...  Training Step: 3877...  Training loss: 1.2241...  0.1204 sec/batch
Epoch: 9/20...  Training Step: 3878...  Training loss: 1.0746...  0.1218 sec/batch
Epoch: 9/20...  Training Step: 3879...  Training loss: 1.0854...  0.1235 sec/batch
Epoch: 9/20...  Training Step: 3880...  Training loss: 1.4502...  0.1279 sec/batch
Epoch: 9/20...  Training Step: 3881...  Training loss: 1.1510...  0.1259 sec/batch
Epoch: 9/20...  Training Step: 3882...  Training loss: 1.2109...  0.1257 sec/batch
Epoch: 9/20...  Training Step: 3883...  Training loss: 1.2336...  0.1221 sec/batch
Epoch: 9/20...  Training Step: 3884...  Training loss: 1.2690...  0.1159 sec/batch
Epoch: 9/20...  Training Step: 3885...  Training loss: 0.9936...  0.1190 sec/batch
Epoch: 9/20...  Training Step: 3886...  Training loss: 1.2161...  0.1295 sec/batch
Epoch: 9/20...  Training Step: 3887...  Training loss: 1.2669...  0.1153 sec/batch
Epoch: 9/20...  Training Step: 3888...  Training loss: 1.0813...  0.1314 sec/batch
Epoch: 9/20...  Training Step: 3889...  Training loss: 1.0671...  0.1246 sec/batch
Epoch: 9/20...  Training Step: 3890...  Training loss: 1.2827...  0.1267 sec/batch
Epoch: 9/20...  Training Step: 3891...  Training loss: 1.0078...  0.1320 sec/batch
Epoch: 9/20...  Training Step: 3892...  Training loss: 1.2362...  0.1163 sec/batch
Epoch: 9/20...  Training Step: 3893...  Training loss: 1.0349...  0.1275 sec/batch
Epoch: 9/20...  Training Step: 3894...  Training loss: 1.2117...  0.1288 sec/batch
Epoch: 9/20...  Training Step: 3895...  Training loss: 1.3236...  0.1272 sec/batch
Epoch: 9/20...  Training Step: 3896...  Training loss: 1.2329...  0.1274 sec/batch
Epoch: 9/20...  Training Step: 3897...  Training loss: 1.2642...  0.1353 sec/batch
Epoch: 9/20...  Training Step: 3898...  Training loss: 1.2242...  0.1250 sec/batch
Epoch: 9/20...  Training Step: 3899...  Training loss: 1.2828...  0.1202 sec/batch
Epoch: 9/20...  Training Step: 3900...  Training loss: 1.1515...  0.1188 sec/batch
Epoch: 9/20...  Training Step: 3901...  Training loss: 1.3210...  0.1207 sec/batch
Epoch: 9/20...  Training Step: 3902...  Training loss: 1.1601...  0.1314 sec/batch
Epoch: 9/20...  Training Step: 3903...  Training loss: 1.0494...  0.1289 sec/batch
Epoch: 9/20...  Training Step: 3904...  Training loss: 1.2767...  0.1201 sec/batch
Epoch: 9/20...  Training Step: 3905...  Training loss: 1.1415...  0.1268 sec/batch
Epoch: 9/20...  Training Step: 3906...  Training loss: 1.2722...  0.1244 sec/batch
Epoch: 9/20...  Training Step: 3907...  Training loss: 1.2561...  0.1289 sec/batch
Epoch: 9/20...  Training Step: 3908...  Training loss: 1.2410...  0.1237 sec/batch
Epoch: 9/20...  Training Step: 3909...  Training loss: 1.1165...  0.1302 sec/batch
Epoch: 9/20...  Training Step: 3910...  Training loss: 1.2457...  0.1215 sec/batch
Epoch: 9/20...  Training Step: 3911...  Training loss: 0.9192...  0.1265 sec/batch
Epoch: 9/20...  Training Step: 3912...  Training loss: 1.2001...  0.1278 sec/batch
Epoch: 9/20...  Training Step: 3913...  Training loss: 1.1458...  0.1390 sec/batch
Epoch: 9/20...  Training Step: 3914...  Training loss: 1.1605...  0.1344 sec/batch
Epoch: 9/20...  Training Step: 3915...  Training loss: 1.1611...  0.1279 sec/batch
Epoch: 9/20...  Training Step: 3916...  Training loss: 1.3208...  0.1329 sec/batch
Epoch: 9/20...  Training Step: 3917...  Training loss: 1.0593...  0.1259 sec/batch
Epoch: 9/20...  Training Step: 3918...  Training loss: 1.1644...  0.1257 sec/batch
Epoch: 9/20...  Training Step: 3919...  Training loss: 1.0960...  0.1139 sec/batch
Epoch: 9/20...  Training Step: 3920...  Training loss: 1.2383...  0.1248 sec/batch
Epoch: 9/20...  Training Step: 3921...  Training loss: 1.1366...  0.1199 sec/batch
Epoch: 9/20...  Training Step: 3922...  Training loss: 1.0142...  0.1248 sec/batch
Epoch: 9/20...  Training Step: 3923...  Training loss: 1.0302...  0.1314 sec/batch
Epoch: 9/20...  Training Step: 3924...  Training loss: 1.2199...  0.1280 sec/batch
Epoch: 9/20...  Training Step: 3925...  Training loss: 1.3143...  0.1184 sec/batch
Epoch: 9/20...  Training Step: 3926...  Training loss: 1.0695...  0.1178 sec/batch
Epoch: 9/20...  Training Step: 3927...  Training loss: 1.3217...  0.1234 sec/batch
Epoch: 9/20...  Training Step: 3928...  Training loss: 1.1344...  0.1255 sec/batch
Epoch: 9/20...  Training Step: 3929...  Training loss: 1.2234...  0.1192 sec/batch
Epoch: 9/20...  Training Step: 3930...  Training loss: 1.1301...  0.1193 sec/batch
Epoch: 9/20...  Training Step: 3931...  Training loss: 1.3356...  0.1171 sec/batch
Epoch: 9/20...  Training Step: 3932...  Training loss: 1.1714...  0.1158 sec/batch
Epoch: 9/20...  Training Step: 3933...  Training loss: 1.1798...  0.1183 sec/batch
Epoch: 9/20...  Training Step: 3934...  Training loss: 1.4757...  0.1187 sec/batch
Epoch: 9/20...  Training Step: 3935...  Training loss: 1.2272...  0.1156 sec/batch
Epoch: 9/20...  Training Step: 3936...  Training loss: 1.3206...  0.1194 sec/batch
Epoch: 9/20...  Training Step: 3937...  Training loss: 1.1877...  0.1483 sec/batch
Epoch: 9/20...  Training Step: 3938...  Training loss: 1.3636...  0.1239 sec/batch
Epoch: 9/20...  Training Step: 3939...  Training loss: 1.4350...  0.1269 sec/batch
Epoch: 9/20...  Training Step: 3940...  Training loss: 1.1233...  0.1291 sec/batch
Epoch: 9/20...  Training Step: 3941...  Training loss: 1.2520...  0.1284 sec/batch
Epoch: 9/20...  Training Step: 3942...  Training loss: 1.1451...  0.1252 sec/batch
Epoch: 9/20...  Training Step: 3943...  Training loss: 1.3654...  0.1242 sec/batch
Epoch: 9/20...  Training Step: 3944...  Training loss: 1.1550...  0.1200 sec/batch
Epoch: 9/20...  Training Step: 3945...  Training loss: 1.4274...  0.1149 sec/batch
Epoch: 9/20...  Training Step: 3946...  Training loss: 1.1491...  0.1196 sec/batch
Epoch: 9/20...  Training Step: 3947...  Training loss: 1.4354...  0.1180 sec/batch
Epoch: 9/20...  Training Step: 3948...  Training loss: 1.1841...  0.1209 sec/batch
Epoch: 9/20...  Training Step: 3949...  Training loss: 1.3754...  0.1175 sec/batch
Epoch: 9/20...  Training Step: 3950...  Training loss: 1.1041...  0.1124 sec/batch
Epoch: 9/20...  Training Step: 3951...  Training loss: 1.2725...  0.1197 sec/batch
Epoch: 9/20...  Training Step: 3952...  Training loss: 1.2727...  0.1170 sec/batch
Epoch: 9/20...  Training Step: 3953...  Training loss: 1.1806...  0.1172 sec/batch
Epoch: 9/20...  Training Step: 3954...  Training loss: 1.1604...  0.1166 sec/batch
Epoch: 9/20...  Training Step: 3955...  Training loss: 1.2014...  0.1191 sec/batch
Epoch: 9/20...  Training Step: 3956...  Training loss: 1.2062...  0.1165 sec/batch
Epoch: 9/20...  Training Step: 3957...  Training loss: 1.2627...  0.1201 sec/batch
Epoch: 9/20...  Training Step: 3958...  Training loss: 1.0833...  0.1244 sec/batch
Epoch: 9/20...  Training Step: 3959...  Training loss: 1.0972...  0.1273 sec/batch
Epoch: 9/20...  Training Step: 3960...  Training loss: 1.3095...  0.1316 sec/batch
Epoch: 9/20...  Training Step: 3961...  Training loss: 1.1242...  0.1246 sec/batch
Epoch: 9/20...  Training Step: 3962...  Training loss: 1.2725...  0.1283 sec/batch
Epoch: 9/20...  Training Step: 3963...  Training loss: 1.2592...  0.1301 sec/batch
Epoch: 9/20...  Training Step: 3964...  Training loss: 1.1886...  0.1351 sec/batch
Epoch: 9/20...  Training Step: 3965...  Training loss: 1.1089...  0.1375 sec/batch
Epoch: 9/20...  Training Step: 3966...  Training loss: 1.1956...  0.1314 sec/batch
Epoch: 9/20...  Training Step: 3967...  Training loss: 1.1517...  0.1309 sec/batch
Epoch: 9/20...  Training Step: 3968...  Training loss: 1.1358...  0.1332 sec/batch
Epoch: 9/20...  Training Step: 3969...  Training loss: 1.4692...  0.1351 sec/batch
Epoch: 9/20...  Training Step: 3970...  Training loss: 1.0327...  0.1335 sec/batch
Epoch: 9/20...  Training Step: 3971...  Training loss: 1.1087...  0.1218 sec/batch
Epoch: 9/20...  Training Step: 3972...  Training loss: 1.0598...  0.1253 sec/batch
Epoch: 9/20...  Training Step: 3973...  Training loss: 1.2052...  0.1303 sec/batch
Epoch: 9/20...  Training Step: 3974...  Training loss: 1.2612...  0.1245 sec/batch
Epoch: 9/20...  Training Step: 3975...  Training loss: 1.1524...  0.1229 sec/batch
Epoch: 9/20...  Training Step: 3976...  Training loss: 1.2045...  0.1274 sec/batch
Epoch: 9/20...  Training Step: 3977...  Training loss: 1.2814...  0.1277 sec/batch
Epoch: 9/20...  Training Step: 3978...  Training loss: 1.2965...  0.1314 sec/batch
Epoch: 9/20...  Training Step: 3979...  Training loss: 1.4474...  0.1273 sec/batch
Epoch: 9/20...  Training Step: 3980...  Training loss: 1.3096...  0.1202 sec/batch
Epoch: 9/20...  Training Step: 3981...  Training loss: 1.2294...  0.1135 sec/batch
Epoch: 9/20...  Training Step: 3982...  Training loss: 1.3974...  0.1149 sec/batch
Epoch: 9/20...  Training Step: 3983...  Training loss: 1.3126...  0.1180 sec/batch
Epoch: 9/20...  Training Step: 3984...  Training loss: 1.3374...  0.1260 sec/batch
Epoch: 9/20...  Training Step: 3985...  Training loss: 1.3046...  0.1347 sec/batch
Epoch: 9/20...  Training Step: 3986...  Training loss: 1.2181...  0.1285 sec/batch
Epoch: 9/20...  Training Step: 3987...  Training loss: 1.3277...  0.1259 sec/batch
Epoch: 9/20...  Training Step: 3988...  Training loss: 1.0947...  0.1309 sec/batch
Epoch: 9/20...  Training Step: 3989...  Training loss: 1.1564...  0.1239 sec/batch
Epoch: 9/20...  Training Step: 3990...  Training loss: 1.3555...  0.1228 sec/batch
Epoch: 9/20...  Training Step: 3991...  Training loss: 1.1499...  0.1308 sec/batch
Epoch: 9/20...  Training Step: 3992...  Training loss: 1.2126...  0.1283 sec/batch
Epoch: 9/20...  Training Step: 3993...  Training loss: 1.1097...  0.1471 sec/batch
Epoch: 9/20...  Training Step: 3994...  Training loss: 1.2027...  0.1399 sec/batch
Epoch: 9/20...  Training Step: 3995...  Training loss: 1.0800...  0.1305 sec/batch
Epoch: 9/20...  Training Step: 3996...  Training loss: 1.2544...  0.1428 sec/batch
Epoch: 9/20...  Training Step: 3997...  Training loss: 1.0967...  0.1328 sec/batch
Epoch: 9/20...  Training Step: 3998...  Training loss: 1.2055...  0.1287 sec/batch
Epoch: 9/20...  Training Step: 3999...  Training loss: 1.2410...  0.1389 sec/batch
Epoch: 9/20...  Training Step: 4000...  Training loss: 1.3239...  0.1429 sec/batch
Epoch: 9/20...  Training Step: 4001...  Training loss: 1.2892...  0.1188 sec/batch
Epoch: 9/20...  Training Step: 4002...  Training loss: 1.2924...  0.1138 sec/batch
Epoch: 9/20...  Training Step: 4003...  Training loss: 1.1903...  0.1208 sec/batch
Epoch: 9/20...  Training Step: 4004...  Training loss: 1.0960...  0.1248 sec/batch
Epoch: 9/20...  Training Step: 4005...  Training loss: 1.1214...  0.1221 sec/batch
Epoch: 9/20...  Training Step: 4006...  Training loss: 1.1866...  0.1257 sec/batch
Epoch: 9/20...  Training Step: 4007...  Training loss: 1.2709...  0.1147 sec/batch
Epoch: 9/20...  Training Step: 4008...  Training loss: 1.5377...  0.1157 sec/batch
Epoch: 9/20...  Training Step: 4009...  Training loss: 1.1470...  0.1174 sec/batch
Epoch: 9/20...  Training Step: 4010...  Training loss: 1.1955...  0.1251 sec/batch
Epoch: 9/20...  Training Step: 4011...  Training loss: 1.1117...  0.1261 sec/batch
Epoch: 9/20...  Training Step: 4012...  Training loss: 1.2144...  0.1316 sec/batch
Epoch: 9/20...  Training Step: 4013...  Training loss: 1.1689...  0.1246 sec/batch
Epoch: 9/20...  Training Step: 4014...  Training loss: 1.2033...  0.1256 sec/batch
Epoch: 9/20...  Training Step: 4015...  Training loss: 0.9384...  0.1258 sec/batch
Epoch: 9/20...  Training Step: 4016...  Training loss: 1.3535...  0.1288 sec/batch
Epoch: 9/20...  Training Step: 4017...  Training loss: 1.0835...  0.1284 sec/batch
Epoch: 9/20...  Training Step: 4018...  Training loss: 1.1843...  0.1267 sec/batch
Epoch: 9/20...  Training Step: 4019...  Training loss: 1.2152...  0.1306 sec/batch
Epoch: 9/20...  Training Step: 4020...  Training loss: 1.5008...  0.1229 sec/batch
Epoch: 9/20...  Training Step: 4021...  Training loss: 1.2543...  0.1341 sec/batch
Epoch: 9/20...  Training Step: 4022...  Training loss: 1.3007...  0.1277 sec/batch
Epoch: 9/20...  Training Step: 4023...  Training loss: 1.1771...  0.1170 sec/batch
Epoch: 9/20...  Training Step: 4024...  Training loss: 1.1595...  0.1199 sec/batch
Epoch: 9/20...  Training Step: 4025...  Training loss: 1.1253...  0.1258 sec/batch
Epoch: 9/20...  Training Step: 4026...  Training loss: 1.1427...  0.1306 sec/batch
Epoch: 9/20...  Training Step: 4027...  Training loss: 0.9541...  0.1280 sec/batch
Epoch: 9/20...  Training Step: 4028...  Training loss: 1.0082...  0.1393 sec/batch
Epoch: 9/20...  Training Step: 4029...  Training loss: 1.1587...  0.1402 sec/batch
Epoch: 9/20...  Training Step: 4030...  Training loss: 1.0303...  0.1262 sec/batch
Epoch: 9/20...  Training Step: 4031...  Training loss: 1.0602...  0.1254 sec/batch
Epoch: 9/20...  Training Step: 4032...  Training loss: 1.0453...  0.1129 sec/batch
Epoch: 9/20...  Training Step: 4033...  Training loss: 1.0932...  0.1146 sec/batch
Epoch: 9/20...  Training Step: 4034...  Training loss: 1.3645...  0.1263 sec/batch
Epoch: 9/20...  Training Step: 4035...  Training loss: 1.0722...  0.1265 sec/batch
Epoch: 9/20...  Training Step: 4036...  Training loss: 0.9849...  0.1279 sec/batch
Epoch: 9/20...  Training Step: 4037...  Training loss: 1.0758...  0.1222 sec/batch
Epoch: 9/20...  Training Step: 4038...  Training loss: 1.0089...  0.1216 sec/batch
Epoch: 9/20...  Training Step: 4039...  Training loss: 1.1203...  0.1196 sec/batch
Epoch: 9/20...  Training Step: 4040...  Training loss: 1.0915...  0.1174 sec/batch
Epoch: 9/20...  Training Step: 4041...  Training loss: 1.2433...  0.1194 sec/batch
Epoch: 9/20...  Training Step: 4042...  Training loss: 1.1655...  0.1175 sec/batch
Epoch: 9/20...  Training Step: 4043...  Training loss: 1.1939...  0.1162 sec/batch
Epoch: 9/20...  Training Step: 4044...  Training loss: 1.1152...  0.1207 sec/batch
Epoch: 9/20...  Training Step: 4045...  Training loss: 1.1523...  0.1275 sec/batch
Epoch: 9/20...  Training Step: 4046...  Training loss: 1.0753...  0.1216 sec/batch
Epoch: 9/20...  Training Step: 4047...  Training loss: 1.1885...  0.1239 sec/batch
Epoch: 9/20...  Training Step: 4048...  Training loss: 1.1437...  0.1265 sec/batch
Epoch: 9/20...  Training Step: 4049...  Training loss: 1.2248...  0.1298 sec/batch
Epoch: 9/20...  Training Step: 4050...  Training loss: 1.0015...  0.1242 sec/batch
Epoch: 9/20...  Training Step: 4051...  Training loss: 1.2726...  0.1254 sec/batch
Epoch: 9/20...  Training Step: 4052...  Training loss: 1.0893...  0.1185 sec/batch
Epoch: 9/20...  Training Step: 4053...  Training loss: 1.0452...  0.1299 sec/batch
Epoch: 9/20...  Training Step: 4054...  Training loss: 1.1615...  0.1172 sec/batch
Epoch: 9/20...  Training Step: 4055...  Training loss: 1.0424...  0.1181 sec/batch
Epoch: 9/20...  Training Step: 4056...  Training loss: 1.4045...  0.1177 sec/batch
Epoch: 9/20...  Training Step: 4057...  Training loss: 1.0259...  0.1143 sec/batch
Epoch: 9/20...  Training Step: 4058...  Training loss: 0.9632...  0.1202 sec/batch
Epoch: 9/20...  Training Step: 4059...  Training loss: 0.9937...  0.1216 sec/batch
Epoch: 9/20...  Training Step: 4060...  Training loss: 1.4693...  0.1215 sec/batch
Epoch: 9/20...  Training Step: 4061...  Training loss: 0.9853...  0.1124 sec/batch
Epoch: 9/20...  Training Step: 4062...  Training loss: 1.2074...  0.1253 sec/batch
Epoch: 9/20...  Training Step: 4063...  Training loss: 1.1137...  0.1240 sec/batch
Epoch: 9/20...  Training Step: 4064...  Training loss: 1.1248...  0.1257 sec/batch
Epoch: 9/20...  Training Step: 4065...  Training loss: 1.0584...  0.1261 sec/batch
Epoch: 9/20...  Training Step: 4066...  Training loss: 0.8470...  0.1239 sec/batch
Epoch: 9/20...  Training Step: 4067...  Training loss: 1.2456...  0.1284 sec/batch
Epoch: 9/20...  Training Step: 4068...  Training loss: 1.1321...  0.1239 sec/batch
Epoch: 9/20...  Training Step: 4069...  Training loss: 1.1332...  0.1297 sec/batch
Epoch: 9/20...  Training Step: 4070...  Training loss: 1.3777...  0.1235 sec/batch
Epoch: 9/20...  Training Step: 4071...  Training loss: 1.3316...  0.1225 sec/batch
Epoch: 9/20...  Training Step: 4072...  Training loss: 1.0148...  0.1251 sec/batch
Epoch: 9/20...  Training Step: 4073...  Training loss: 1.2889...  0.1264 sec/batch
Epoch: 9/20...  Training Step: 4074...  Training loss: 1.2710...  0.1227 sec/batch
Epoch: 9/20...  Training Step: 4075...  Training loss: 0.9636...  0.1207 sec/batch
Epoch: 9/20...  Training Step: 4076...  Training loss: 1.1778...  0.1205 sec/batch
Epoch: 9/20...  Training Step: 4077...  Training loss: 1.1487...  0.1274 sec/batch
Epoch: 9/20...  Training Step: 4078...  Training loss: 1.2552...  0.1306 sec/batch
Epoch: 9/20...  Training Step: 4079...  Training loss: 1.1047...  0.1289 sec/batch
Epoch: 9/20...  Training Step: 4080...  Training loss: 1.2616...  0.1309 sec/batch
Epoch: 9/20...  Training Step: 4081...  Training loss: 1.1982...  0.1198 sec/batch
Epoch: 9/20...  Training Step: 4082...  Training loss: 1.1430...  0.1231 sec/batch
Epoch: 9/20...  Training Step: 4083...  Training loss: 1.0058...  0.1249 sec/batch
Epoch: 9/20...  Training Step: 4084...  Training loss: 1.0760...  0.1290 sec/batch
Epoch: 9/20...  Training Step: 4085...  Training loss: 0.9818...  0.1256 sec/batch
Epoch: 9/20...  Training Step: 4086...  Training loss: 1.2802...  0.1239 sec/batch
Epoch: 9/20...  Training Step: 4087...  Training loss: 1.2483...  0.1280 sec/batch
Epoch: 9/20...  Training Step: 4088...  Training loss: 1.2787...  0.1259 sec/batch
Epoch: 9/20...  Training Step: 4089...  Training loss: 1.3062...  0.1217 sec/batch
Epoch: 9/20...  Training Step: 4090...  Training loss: 1.2343...  0.1239 sec/batch
Epoch: 9/20...  Training Step: 4091...  Training loss: 1.0578...  0.1263 sec/batch
Epoch: 9/20...  Training Step: 4092...  Training loss: 1.2267...  0.1229 sec/batch
Epoch: 9/20...  Training Step: 4093...  Training loss: 1.0127...  0.1171 sec/batch
Epoch: 9/20...  Training Step: 4094...  Training loss: 1.0636...  0.1263 sec/batch
Epoch: 9/20...  Training Step: 4095...  Training loss: 0.9899...  0.1276 sec/batch
Epoch: 9/20...  Training Step: 4096...  Training loss: 1.1551...  0.1282 sec/batch
Epoch: 9/20...  Training Step: 4097...  Training loss: 1.1092...  0.1253 sec/batch
Epoch: 9/20...  Training Step: 4098...  Training loss: 1.1581...  0.1242 sec/batch
Epoch: 9/20...  Training Step: 4099...  Training loss: 1.1301...  0.1316 sec/batch
Epoch: 9/20...  Training Step: 4100...  Training loss: 0.9596...  0.1241 sec/batch
Epoch: 9/20...  Training Step: 4101...  Training loss: 0.9633...  0.1344 sec/batch
Epoch: 9/20...  Training Step: 4102...  Training loss: 1.1678...  0.1302 sec/batch
Epoch: 9/20...  Training Step: 4103...  Training loss: 1.0343...  0.1274 sec/batch
Epoch: 9/20...  Training Step: 4104...  Training loss: 1.0770...  0.1214 sec/batch
Epoch: 9/20...  Training Step: 4105...  Training loss: 1.1124...  0.1271 sec/batch
Epoch: 9/20...  Training Step: 4106...  Training loss: 1.1495...  0.1298 sec/batch
Epoch: 9/20...  Training Step: 4107...  Training loss: 1.0969...  0.1384 sec/batch
Epoch: 9/20...  Training Step: 4108...  Training loss: 1.2966...  0.1331 sec/batch
Epoch: 9/20...  Training Step: 4109...  Training loss: 1.1019...  0.1315 sec/batch
Epoch: 9/20...  Training Step: 4110...  Training loss: 1.0561...  0.1270 sec/batch
Epoch: 9/20...  Training Step: 4111...  Training loss: 1.1790...  0.1321 sec/batch
Epoch: 9/20...  Training Step: 4112...  Training loss: 1.1200...  0.1266 sec/batch
Epoch: 9/20...  Training Step: 4113...  Training loss: 1.0765...  0.1349 sec/batch
Epoch: 9/20...  Training Step: 4114...  Training loss: 1.1980...  0.1308 sec/batch
Epoch: 9/20...  Training Step: 4115...  Training loss: 0.9648...  0.1306 sec/batch
Epoch: 9/20...  Training Step: 4116...  Training loss: 1.2153...  0.1256 sec/batch
Epoch: 9/20...  Training Step: 4117...  Training loss: 1.1668...  0.1291 sec/batch
Epoch: 9/20...  Training Step: 4118...  Training loss: 1.2990...  0.1274 sec/batch
Epoch: 9/20...  Training Step: 4119...  Training loss: 1.0973...  0.1289 sec/batch
Epoch: 9/20...  Training Step: 4120...  Training loss: 1.4914...  0.1265 sec/batch
Epoch: 9/20...  Training Step: 4121...  Training loss: 1.1640...  0.1263 sec/batch
Epoch: 9/20...  Training Step: 4122...  Training loss: 1.1291...  0.1288 sec/batch
Epoch: 9/20...  Training Step: 4123...  Training loss: 1.0696...  0.1305 sec/batch
Epoch: 9/20...  Training Step: 4124...  Training loss: 1.0987...  0.1240 sec/batch
Epoch: 9/20...  Training Step: 4125...  Training loss: 1.2570...  0.1285 sec/batch
Epoch: 9/20...  Training Step: 4126...  Training loss: 1.2944...  0.1233 sec/batch
Epoch: 9/20...  Training Step: 4127...  Training loss: 1.3613...  0.1226 sec/batch
Epoch: 9/20...  Training Step: 4128...  Training loss: 1.4389...  0.1282 sec/batch
Epoch: 9/20...  Training Step: 4129...  Training loss: 1.2639...  0.1182 sec/batch
Epoch: 9/20...  Training Step: 4130...  Training loss: 1.0235...  0.1256 sec/batch
Epoch: 9/20...  Training Step: 4131...  Training loss: 1.2217...  0.1287 sec/batch
Epoch: 9/20...  Training Step: 4132...  Training loss: 1.0099...  0.1279 sec/batch
Epoch: 9/20...  Training Step: 4133...  Training loss: 1.2878...  0.1200 sec/batch
Epoch: 9/20...  Training Step: 4134...  Training loss: 1.2617...  0.1277 sec/batch
Epoch: 9/20...  Training Step: 4135...  Training loss: 1.1982...  0.1236 sec/batch
Epoch: 9/20...  Training Step: 4136...  Training loss: 1.3560...  0.1255 sec/batch
Epoch: 9/20...  Training Step: 4137...  Training loss: 1.0892...  0.1286 sec/batch
Epoch: 9/20...  Training Step: 4138...  Training loss: 1.1437...  0.1234 sec/batch
Epoch: 9/20...  Training Step: 4139...  Training loss: 1.2856...  0.1152 sec/batch
Epoch: 9/20...  Training Step: 4140...  Training loss: 1.1652...  0.1137 sec/batch
Epoch: 9/20...  Training Step: 4141...  Training loss: 1.0497...  0.1234 sec/batch
Epoch: 9/20...  Training Step: 4142...  Training loss: 1.1332...  0.1409 sec/batch
Epoch: 9/20...  Training Step: 4143...  Training loss: 1.3548...  0.1249 sec/batch
Epoch: 9/20...  Training Step: 4144...  Training loss: 1.0759...  0.1264 sec/batch
Epoch: 9/20...  Training Step: 4145...  Training loss: 1.3749...  0.1290 sec/batch
Epoch: 9/20...  Training Step: 4146...  Training loss: 1.3487...  0.1269 sec/batch
Epoch: 9/20...  Training Step: 4147...  Training loss: 1.0991...  0.1261 sec/batch
Epoch: 9/20...  Training Step: 4148...  Training loss: 1.2306...  0.1252 sec/batch
Epoch: 9/20...  Training Step: 4149...  Training loss: 1.1169...  0.1240 sec/batch
Epoch: 9/20...  Training Step: 4150...  Training loss: 1.2973...  0.1283 sec/batch
Epoch: 9/20...  Training Step: 4151...  Training loss: 1.4494...  0.1304 sec/batch
Epoch: 9/20...  Training Step: 4152...  Training loss: 1.5044...  0.1265 sec/batch
Epoch: 9/20...  Training Step: 4153...  Training loss: 1.2041...  0.1281 sec/batch
Epoch: 9/20...  Training Step: 4154...  Training loss: 1.1673...  0.1269 sec/batch
Epoch: 9/20...  Training Step: 4155...  Training loss: 1.3387...  0.1257 sec/batch
Epoch: 9/20...  Training Step: 4156...  Training loss: 1.1266...  0.1258 sec/batch
Epoch: 9/20...  Training Step: 4157...  Training loss: 1.2123...  0.1238 sec/batch
Epoch: 9/20...  Training Step: 4158...  Training loss: 1.1009...  0.1260 sec/batch
Epoch: 9/20...  Training Step: 4159...  Training loss: 1.2821...  0.1286 sec/batch
Epoch: 9/20...  Training Step: 4160...  Training loss: 1.0411...  0.1303 sec/batch
Epoch: 9/20...  Training Step: 4161...  Training loss: 1.2089...  0.1272 sec/batch
Epoch: 9/20...  Training Step: 4162...  Training loss: 1.1728...  0.1201 sec/batch
Epoch: 9/20...  Training Step: 4163...  Training loss: 1.0922...  0.1321 sec/batch
Epoch: 9/20...  Training Step: 4164...  Training loss: 1.2159...  0.1192 sec/batch
Epoch: 9/20...  Training Step: 4165...  Training loss: 1.1454...  0.1219 sec/batch
Epoch: 9/20...  Training Step: 4166...  Training loss: 1.5105...  0.1274 sec/batch
Epoch: 9/20...  Training Step: 4167...  Training loss: 1.1704...  0.1215 sec/batch
Epoch: 9/20...  Training Step: 4168...  Training loss: 1.0189...  0.1283 sec/batch
Epoch: 9/20...  Training Step: 4169...  Training loss: 1.1923...  0.1291 sec/batch
Epoch: 9/20...  Training Step: 4170...  Training loss: 0.9818...  0.1288 sec/batch
Epoch: 9/20...  Training Step: 4171...  Training loss: 1.0383...  0.1242 sec/batch
Epoch: 9/20...  Training Step: 4172...  Training loss: 1.2189...  0.1352 sec/batch
Epoch: 9/20...  Training Step: 4173...  Training loss: 1.2283...  0.1277 sec/batch
Epoch: 9/20...  Training Step: 4174...  Training loss: 1.1110...  0.1258 sec/batch
Epoch: 9/20...  Training Step: 4175...  Training loss: 1.1709...  0.1247 sec/batch
Epoch: 9/20...  Training Step: 4176...  Training loss: 1.1041...  0.1259 sec/batch
Epoch: 10/20...  Training Step: 4177...  Training loss: 1.4897...  0.1271 sec/batch
Epoch: 10/20...  Training Step: 4178...  Training loss: 1.3029...  0.1230 sec/batch
Epoch: 10/20...  Training Step: 4179...  Training loss: 1.2326...  0.1225 sec/batch
Epoch: 10/20...  Training Step: 4180...  Training loss: 1.1466...  0.1249 sec/batch
Epoch: 10/20...  Training Step: 4181...  Training loss: 1.2613...  0.1247 sec/batch
Epoch: 10/20...  Training Step: 4182...  Training loss: 0.9899...  0.1215 sec/batch
Epoch: 10/20...  Training Step: 4183...  Training loss: 1.3281...  0.1228 sec/batch
Epoch: 10/20...  Training Step: 4184...  Training loss: 1.0899...  0.1228 sec/batch
Epoch: 10/20...  Training Step: 4185...  Training loss: 0.9513...  0.1291 sec/batch
Epoch: 10/20...  Training Step: 4186...  Training loss: 1.1952...  0.1224 sec/batch
Epoch: 10/20...  Training Step: 4187...  Training loss: 1.1751...  0.1340 sec/batch
Epoch: 10/20...  Training Step: 4188...  Training loss: 0.9337...  0.1248 sec/batch
Epoch: 10/20...  Training Step: 4189...  Training loss: 1.2610...  0.1250 sec/batch
Epoch: 10/20...  Training Step: 4190...  Training loss: 0.9347...  0.1313 sec/batch
Epoch: 10/20...  Training Step: 4191...  Training loss: 1.1948...  0.1248 sec/batch
Epoch: 10/20...  Training Step: 4192...  Training loss: 1.2187...  0.1253 sec/batch
Epoch: 10/20...  Training Step: 4193...  Training loss: 1.1291...  0.1322 sec/batch
Epoch: 10/20...  Training Step: 4194...  Training loss: 1.0373...  0.1194 sec/batch
Epoch: 10/20...  Training Step: 4195...  Training loss: 1.2021...  0.1244 sec/batch
Epoch: 10/20...  Training Step: 4196...  Training loss: 1.0496...  0.1258 sec/batch
Epoch: 10/20...  Training Step: 4197...  Training loss: 1.2756...  0.1135 sec/batch
Epoch: 10/20...  Training Step: 4198...  Training loss: 1.1040...  0.1172 sec/batch
Epoch: 10/20...  Training Step: 4199...  Training loss: 1.1884...  0.1255 sec/batch
Epoch: 10/20...  Training Step: 4200...  Training loss: 1.0920...  0.1208 sec/batch
Epoch: 10/20...  Training Step: 4201...  Training loss: 1.1449...  0.1260 sec/batch
Epoch: 10/20...  Training Step: 4202...  Training loss: 1.1218...  0.1211 sec/batch
Epoch: 10/20...  Training Step: 4203...  Training loss: 1.1524...  0.1202 sec/batch
Epoch: 10/20...  Training Step: 4204...  Training loss: 0.9931...  0.1313 sec/batch
Epoch: 10/20...  Training Step: 4205...  Training loss: 1.0479...  0.1202 sec/batch
Epoch: 10/20...  Training Step: 4206...  Training loss: 1.2005...  0.1263 sec/batch
Epoch: 10/20...  Training Step: 4207...  Training loss: 1.0116...  0.1325 sec/batch
Epoch: 10/20...  Training Step: 4208...  Training loss: 1.0173...  0.1291 sec/batch
Epoch: 10/20...  Training Step: 4209...  Training loss: 1.0208...  0.1301 sec/batch
Epoch: 10/20...  Training Step: 4210...  Training loss: 0.9153...  0.1299 sec/batch
Epoch: 10/20...  Training Step: 4211...  Training loss: 0.9842...  0.1251 sec/batch
Epoch: 10/20...  Training Step: 4212...  Training loss: 1.0295...  0.1231 sec/batch
Epoch: 10/20...  Training Step: 4213...  Training loss: 1.1670...  0.1288 sec/batch
Epoch: 10/20...  Training Step: 4214...  Training loss: 1.0439...  0.1253 sec/batch
Epoch: 10/20...  Training Step: 4215...  Training loss: 1.0147...  0.1246 sec/batch
Epoch: 10/20...  Training Step: 4216...  Training loss: 1.4644...  0.1261 sec/batch
Epoch: 10/20...  Training Step: 4217...  Training loss: 1.1416...  0.1275 sec/batch
Epoch: 10/20...  Training Step: 4218...  Training loss: 1.1423...  0.1278 sec/batch
Epoch: 10/20...  Training Step: 4219...  Training loss: 1.3705...  0.1302 sec/batch
Epoch: 10/20...  Training Step: 4220...  Training loss: 0.9318...  0.1247 sec/batch
Epoch: 10/20...  Training Step: 4221...  Training loss: 1.0997...  0.1217 sec/batch
Epoch: 10/20...  Training Step: 4222...  Training loss: 1.0647...  0.1252 sec/batch
Epoch: 10/20...  Training Step: 4223...  Training loss: 1.1137...  0.1231 sec/batch
Epoch: 10/20...  Training Step: 4224...  Training loss: 1.0588...  0.1270 sec/batch
Epoch: 10/20...  Training Step: 4225...  Training loss: 1.1267...  0.1254 sec/batch
Epoch: 10/20...  Training Step: 4226...  Training loss: 1.1558...  0.1162 sec/batch
Epoch: 10/20...  Training Step: 4227...  Training loss: 1.0758...  0.1149 sec/batch
Epoch: 10/20...  Training Step: 4228...  Training loss: 1.1836...  0.1137 sec/batch
Epoch: 10/20...  Training Step: 4229...  Training loss: 1.0702...  0.1143 sec/batch
Epoch: 10/20...  Training Step: 4230...  Training loss: 1.0644...  0.1209 sec/batch
Epoch: 10/20...  Training Step: 4231...  Training loss: 1.0265...  0.1222 sec/batch
Epoch: 10/20...  Training Step: 4232...  Training loss: 1.0832...  0.1168 sec/batch
Epoch: 10/20...  Training Step: 4233...  Training loss: 1.2093...  0.1128 sec/batch
Epoch: 10/20...  Training Step: 4234...  Training loss: 1.2224...  0.1286 sec/batch
Epoch: 10/20...  Training Step: 4235...  Training loss: 0.9000...  0.1242 sec/batch
Epoch: 10/20...  Training Step: 4236...  Training loss: 1.0365...  0.1236 sec/batch
Epoch: 10/20...  Training Step: 4237...  Training loss: 0.9875...  0.1198 sec/batch
Epoch: 10/20...  Training Step: 4238...  Training loss: 1.2142...  0.1200 sec/batch
Epoch: 10/20...  Training Step: 4239...  Training loss: 1.0867...  0.1296 sec/batch
Epoch: 10/20...  Training Step: 4240...  Training loss: 1.2296...  0.1279 sec/batch
Epoch: 10/20...  Training Step: 4241...  Training loss: 1.0848...  0.1213 sec/batch
Epoch: 10/20...  Training Step: 4242...  Training loss: 1.2648...  0.1225 sec/batch
Epoch: 10/20...  Training Step: 4243...  Training loss: 1.0764...  0.1338 sec/batch
Epoch: 10/20...  Training Step: 4244...  Training loss: 1.1088...  0.1301 sec/batch
Epoch: 10/20...  Training Step: 4245...  Training loss: 0.9594...  0.1322 sec/batch
Epoch: 10/20...  Training Step: 4246...  Training loss: 1.1748...  0.1285 sec/batch
Epoch: 10/20...  Training Step: 4247...  Training loss: 1.3319...  0.1325 sec/batch
Epoch: 10/20...  Training Step: 4248...  Training loss: 0.9734...  0.1305 sec/batch
Epoch: 10/20...  Training Step: 4249...  Training loss: 1.0947...  0.1309 sec/batch
Epoch: 10/20...  Training Step: 4250...  Training loss: 0.9825...  0.1289 sec/batch
Epoch: 10/20...  Training Step: 4251...  Training loss: 1.2376...  0.1262 sec/batch
Epoch: 10/20...  Training Step: 4252...  Training loss: 0.9625...  0.1315 sec/batch
Epoch: 10/20...  Training Step: 4253...  Training loss: 1.0855...  0.1282 sec/batch
Epoch: 10/20...  Training Step: 4254...  Training loss: 1.1098...  0.1215 sec/batch
Epoch: 10/20...  Training Step: 4255...  Training loss: 1.1574...  0.1167 sec/batch
Epoch: 10/20...  Training Step: 4256...  Training loss: 1.0148...  0.1257 sec/batch
Epoch: 10/20...  Training Step: 4257...  Training loss: 1.2213...  0.1250 sec/batch
Epoch: 10/20...  Training Step: 4258...  Training loss: 1.1717...  0.1141 sec/batch
Epoch: 10/20...  Training Step: 4259...  Training loss: 0.9735...  0.1138 sec/batch
Epoch: 10/20...  Training Step: 4260...  Training loss: 1.3001...  0.1151 sec/batch
Epoch: 10/20...  Training Step: 4261...  Training loss: 1.0866...  0.1203 sec/batch
Epoch: 10/20...  Training Step: 4262...  Training loss: 1.2688...  0.1206 sec/batch
Epoch: 10/20...  Training Step: 4263...  Training loss: 1.0210...  0.1297 sec/batch
Epoch: 10/20...  Training Step: 4264...  Training loss: 1.3269...  0.1274 sec/batch
Epoch: 10/20...  Training Step: 4265...  Training loss: 1.3788...  0.1263 sec/batch
Epoch: 10/20...  Training Step: 4266...  Training loss: 1.1015...  0.1268 sec/batch
Epoch: 10/20...  Training Step: 4267...  Training loss: 1.1486...  0.1246 sec/batch
Epoch: 10/20...  Training Step: 4268...  Training loss: 1.2981...  0.1246 sec/batch
Epoch: 10/20...  Training Step: 4269...  Training loss: 0.9786...  0.1242 sec/batch
Epoch: 10/20...  Training Step: 4270...  Training loss: 1.3597...  0.1296 sec/batch
Epoch: 10/20...  Training Step: 4271...  Training loss: 1.1045...  0.1284 sec/batch
Epoch: 10/20...  Training Step: 4272...  Training loss: 1.1656...  0.1158 sec/batch
Epoch: 10/20...  Training Step: 4273...  Training loss: 1.3334...  0.1295 sec/batch
Epoch: 10/20...  Training Step: 4274...  Training loss: 1.1610...  0.1208 sec/batch
Epoch: 10/20...  Training Step: 4275...  Training loss: 1.2580...  0.1288 sec/batch
Epoch: 10/20...  Training Step: 4276...  Training loss: 1.1842...  0.1222 sec/batch
Epoch: 10/20...  Training Step: 4277...  Training loss: 1.1798...  0.1227 sec/batch
Epoch: 10/20...  Training Step: 4278...  Training loss: 1.3106...  0.1227 sec/batch
Epoch: 10/20...  Training Step: 4279...  Training loss: 1.2596...  0.1286 sec/batch
Epoch: 10/20...  Training Step: 4280...  Training loss: 1.0992...  0.1283 sec/batch
Epoch: 10/20...  Training Step: 4281...  Training loss: 1.2577...  0.1271 sec/batch
Epoch: 10/20...  Training Step: 4282...  Training loss: 1.2658...  0.1233 sec/batch
Epoch: 10/20...  Training Step: 4283...  Training loss: 1.2340...  0.1272 sec/batch
Epoch: 10/20...  Training Step: 4284...  Training loss: 1.1958...  0.1293 sec/batch
Epoch: 10/20...  Training Step: 4285...  Training loss: 1.3253...  0.1268 sec/batch
Epoch: 10/20...  Training Step: 4286...  Training loss: 1.1065...  0.1288 sec/batch
Epoch: 10/20...  Training Step: 4287...  Training loss: 1.1880...  0.1249 sec/batch
Epoch: 10/20...  Training Step: 4288...  Training loss: 1.0821...  0.1213 sec/batch
Epoch: 10/20...  Training Step: 4289...  Training loss: 1.1503...  0.1309 sec/batch
Epoch: 10/20...  Training Step: 4290...  Training loss: 1.2567...  0.1269 sec/batch
Epoch: 10/20...  Training Step: 4291...  Training loss: 1.1389...  0.1217 sec/batch
Epoch: 10/20...  Training Step: 4292...  Training loss: 1.1063...  0.1250 sec/batch
Epoch: 10/20...  Training Step: 4293...  Training loss: 1.2297...  0.1275 sec/batch
Epoch: 10/20...  Training Step: 4294...  Training loss: 1.2314...  0.1230 sec/batch
Epoch: 10/20...  Training Step: 4295...  Training loss: 1.1724...  0.1316 sec/batch
Epoch: 10/20...  Training Step: 4296...  Training loss: 1.0651...  0.1264 sec/batch
Epoch: 10/20...  Training Step: 4297...  Training loss: 1.2658...  0.1291 sec/batch
Epoch: 10/20...  Training Step: 4298...  Training loss: 1.1955...  0.1276 sec/batch
Epoch: 10/20...  Training Step: 4299...  Training loss: 1.2097...  0.1231 sec/batch
Epoch: 10/20...  Training Step: 4300...  Training loss: 1.2730...  0.1253 sec/batch
Epoch: 10/20...  Training Step: 4301...  Training loss: 1.1441...  0.1301 sec/batch
Epoch: 10/20...  Training Step: 4302...  Training loss: 1.1319...  0.1239 sec/batch
Epoch: 10/20...  Training Step: 4303...  Training loss: 1.1309...  0.1341 sec/batch
Epoch: 10/20...  Training Step: 4304...  Training loss: 1.2407...  0.1225 sec/batch
Epoch: 10/20...  Training Step: 4305...  Training loss: 1.2400...  0.1304 sec/batch
Epoch: 10/20...  Training Step: 4306...  Training loss: 1.2216...  0.1274 sec/batch
Epoch: 10/20...  Training Step: 4307...  Training loss: 1.3206...  0.1330 sec/batch
Epoch: 10/20...  Training Step: 4308...  Training loss: 1.0707...  0.1217 sec/batch
Epoch: 10/20...  Training Step: 4309...  Training loss: 1.1349...  0.1300 sec/batch
Epoch: 10/20...  Training Step: 4310...  Training loss: 1.3210...  0.1250 sec/batch
Epoch: 10/20...  Training Step: 4311...  Training loss: 1.0921...  0.1145 sec/batch
Epoch: 10/20...  Training Step: 4312...  Training loss: 0.8720...  0.1180 sec/batch
Epoch: 10/20...  Training Step: 4313...  Training loss: 0.9262...  0.1242 sec/batch
Epoch: 10/20...  Training Step: 4314...  Training loss: 1.1777...  0.1267 sec/batch
Epoch: 10/20...  Training Step: 4315...  Training loss: 0.9608...  0.1284 sec/batch
Epoch: 10/20...  Training Step: 4316...  Training loss: 1.0851...  0.1204 sec/batch
Epoch: 10/20...  Training Step: 4317...  Training loss: 0.9827...  0.1241 sec/batch
Epoch: 10/20...  Training Step: 4318...  Training loss: 1.0255...  0.1271 sec/batch
Epoch: 10/20...  Training Step: 4319...  Training loss: 1.0028...  0.1302 sec/batch
Epoch: 10/20...  Training Step: 4320...  Training loss: 1.0996...  0.1272 sec/batch
Epoch: 10/20...  Training Step: 4321...  Training loss: 1.2163...  0.1225 sec/batch
Epoch: 10/20...  Training Step: 4322...  Training loss: 1.1064...  0.1301 sec/batch
Epoch: 10/20...  Training Step: 4323...  Training loss: 1.2679...  0.1273 sec/batch
Epoch: 10/20...  Training Step: 4324...  Training loss: 1.0351...  0.1187 sec/batch
Epoch: 10/20...  Training Step: 4325...  Training loss: 1.0708...  0.1242 sec/batch
Epoch: 10/20...  Training Step: 4326...  Training loss: 1.4002...  0.1195 sec/batch
Epoch: 10/20...  Training Step: 4327...  Training loss: 1.2838...  0.1214 sec/batch
Epoch: 10/20...  Training Step: 4328...  Training loss: 1.2432...  0.1172 sec/batch
Epoch: 10/20...  Training Step: 4329...  Training loss: 1.3235...  0.1148 sec/batch
Epoch: 10/20...  Training Step: 4330...  Training loss: 1.1642...  0.1125 sec/batch
Epoch: 10/20...  Training Step: 4331...  Training loss: 1.0724...  0.1151 sec/batch
Epoch: 10/20...  Training Step: 4332...  Training loss: 1.1600...  0.1123 sec/batch
Epoch: 10/20...  Training Step: 4333...  Training loss: 1.1040...  0.1124 sec/batch
Epoch: 10/20...  Training Step: 4334...  Training loss: 1.0896...  0.1098 sec/batch
Epoch: 10/20...  Training Step: 4335...  Training loss: 0.9887...  0.1123 sec/batch
Epoch: 10/20...  Training Step: 4336...  Training loss: 1.1405...  0.1194 sec/batch
Epoch: 10/20...  Training Step: 4337...  Training loss: 1.2233...  0.1215 sec/batch
Epoch: 10/20...  Training Step: 4338...  Training loss: 1.0593...  0.1391 sec/batch
Epoch: 10/20...  Training Step: 4339...  Training loss: 1.3096...  0.1254 sec/batch
Epoch: 10/20...  Training Step: 4340...  Training loss: 1.0341...  0.1259 sec/batch
Epoch: 10/20...  Training Step: 4341...  Training loss: 1.1713...  0.1252 sec/batch
Epoch: 10/20...  Training Step: 4342...  Training loss: 1.0758...  0.1237 sec/batch
Epoch: 10/20...  Training Step: 4343...  Training loss: 0.9925...  0.1243 sec/batch
Epoch: 10/20...  Training Step: 4344...  Training loss: 1.2749...  0.1308 sec/batch
Epoch: 10/20...  Training Step: 4345...  Training loss: 1.1590...  0.1251 sec/batch
Epoch: 10/20...  Training Step: 4346...  Training loss: 1.2234...  0.1244 sec/batch
Epoch: 10/20...  Training Step: 4347...  Training loss: 1.1414...  0.1237 sec/batch
Epoch: 10/20...  Training Step: 4348...  Training loss: 1.1851...  0.1341 sec/batch
Epoch: 10/20...  Training Step: 4349...  Training loss: 1.0287...  0.1261 sec/batch
Epoch: 10/20...  Training Step: 4350...  Training loss: 1.0734...  0.1205 sec/batch
Epoch: 10/20...  Training Step: 4351...  Training loss: 1.2144...  0.1226 sec/batch
Epoch: 10/20...  Training Step: 4352...  Training loss: 1.0433...  0.1244 sec/batch
Epoch: 10/20...  Training Step: 4353...  Training loss: 1.0484...  0.1323 sec/batch
Epoch: 10/20...  Training Step: 4354...  Training loss: 1.2620...  0.1285 sec/batch
Epoch: 10/20...  Training Step: 4355...  Training loss: 0.9245...  0.1266 sec/batch
Epoch: 10/20...  Training Step: 4356...  Training loss: 1.1664...  0.1232 sec/batch
Epoch: 10/20...  Training Step: 4357...  Training loss: 0.9381...  0.1257 sec/batch
Epoch: 10/20...  Training Step: 4358...  Training loss: 1.3875...  0.1250 sec/batch
Epoch: 10/20...  Training Step: 4359...  Training loss: 1.1439...  0.1240 sec/batch
Epoch: 10/20...  Training Step: 4360...  Training loss: 1.1823...  0.1252 sec/batch
Epoch: 10/20...  Training Step: 4361...  Training loss: 1.3091...  0.1271 sec/batch
Epoch: 10/20...  Training Step: 4362...  Training loss: 1.1521...  0.1258 sec/batch
Epoch: 10/20...  Training Step: 4363...  Training loss: 1.2272...  0.1230 sec/batch
Epoch: 10/20...  Training Step: 4364...  Training loss: 1.0251...  0.1272 sec/batch
Epoch: 10/20...  Training Step: 4365...  Training loss: 1.0933...  0.1143 sec/batch
Epoch: 10/20...  Training Step: 4366...  Training loss: 1.1254...  0.1266 sec/batch
Epoch: 10/20...  Training Step: 4367...  Training loss: 1.0964...  0.1263 sec/batch
Epoch: 10/20...  Training Step: 4368...  Training loss: 1.2000...  0.1222 sec/batch
Epoch: 10/20...  Training Step: 4369...  Training loss: 1.1610...  0.1242 sec/batch
Epoch: 10/20...  Training Step: 4370...  Training loss: 1.0537...  0.1199 sec/batch
Epoch: 10/20...  Training Step: 4371...  Training loss: 1.1871...  0.1140 sec/batch
Epoch: 10/20...  Training Step: 4372...  Training loss: 1.1271...  0.1151 sec/batch
Epoch: 10/20...  Training Step: 4373...  Training loss: 1.1223...  0.1179 sec/batch
Epoch: 10/20...  Training Step: 4374...  Training loss: 1.1731...  0.1112 sec/batch
Epoch: 10/20...  Training Step: 4375...  Training loss: 0.9271...  0.1143 sec/batch
Epoch: 10/20...  Training Step: 4376...  Training loss: 1.2973...  0.1319 sec/batch
Epoch: 10/20...  Training Step: 4377...  Training loss: 1.1086...  0.1283 sec/batch
Epoch: 10/20...  Training Step: 4378...  Training loss: 1.1284...  0.1297 sec/batch
Epoch: 10/20...  Training Step: 4379...  Training loss: 1.0454...  0.1301 sec/batch
Epoch: 10/20...  Training Step: 4380...  Training loss: 1.3970...  0.1333 sec/batch
Epoch: 10/20...  Training Step: 4381...  Training loss: 0.9962...  0.1295 sec/batch
Epoch: 10/20...  Training Step: 4382...  Training loss: 1.1138...  0.1370 sec/batch
Epoch: 10/20...  Training Step: 4383...  Training loss: 1.0737...  0.1362 sec/batch
Epoch: 10/20...  Training Step: 4384...  Training loss: 1.1975...  0.1147 sec/batch
Epoch: 10/20...  Training Step: 4385...  Training loss: 1.2926...  0.1251 sec/batch
Epoch: 10/20...  Training Step: 4386...  Training loss: 0.9819...  0.1269 sec/batch
Epoch: 10/20...  Training Step: 4387...  Training loss: 1.0702...  0.1279 sec/batch
Epoch: 10/20...  Training Step: 4388...  Training loss: 1.2559...  0.1254 sec/batch
Epoch: 10/20...  Training Step: 4389...  Training loss: 1.2826...  0.1299 sec/batch
Epoch: 10/20...  Training Step: 4390...  Training loss: 1.1355...  0.1319 sec/batch
Epoch: 10/20...  Training Step: 4391...  Training loss: 1.2754...  0.1469 sec/batch
Epoch: 10/20...  Training Step: 4392...  Training loss: 1.2225...  0.1558 sec/batch
Epoch: 10/20...  Training Step: 4393...  Training loss: 1.1693...  0.1311 sec/batch
Epoch: 10/20...  Training Step: 4394...  Training loss: 1.1344...  0.1245 sec/batch
Epoch: 10/20...  Training Step: 4395...  Training loss: 1.1934...  0.1165 sec/batch
Epoch: 10/20...  Training Step: 4396...  Training loss: 1.1168...  0.1199 sec/batch
Epoch: 10/20...  Training Step: 4397...  Training loss: 1.0950...  0.1218 sec/batch
Epoch: 10/20...  Training Step: 4398...  Training loss: 1.2786...  0.1203 sec/batch
Epoch: 10/20...  Training Step: 4399...  Training loss: 1.2243...  0.1139 sec/batch
Epoch: 10/20...  Training Step: 4400...  Training loss: 1.3285...  0.1273 sec/batch
Epoch: 10/20...  Training Step: 4401...  Training loss: 1.2273...  0.1207 sec/batch
Epoch: 10/20...  Training Step: 4402...  Training loss: 1.2156...  0.1163 sec/batch
Epoch: 10/20...  Training Step: 4403...  Training loss: 1.3044...  0.1295 sec/batch
Epoch: 10/20...  Training Step: 4404...  Training loss: 1.1220...  0.1304 sec/batch
Epoch: 10/20...  Training Step: 4405...  Training loss: 1.1665...  0.1266 sec/batch
Epoch: 10/20...  Training Step: 4406...  Training loss: 1.1443...  0.1259 sec/batch
Epoch: 10/20...  Training Step: 4407...  Training loss: 1.1990...  0.1161 sec/batch
Epoch: 10/20...  Training Step: 4408...  Training loss: 1.1773...  0.1143 sec/batch
Epoch: 10/20...  Training Step: 4409...  Training loss: 1.5552...  0.1288 sec/batch
Epoch: 10/20...  Training Step: 4410...  Training loss: 1.1643...  0.1225 sec/batch
Epoch: 10/20...  Training Step: 4411...  Training loss: 1.2691...  0.1212 sec/batch
Epoch: 10/20...  Training Step: 4412...  Training loss: 1.1812...  0.1230 sec/batch
Epoch: 10/20...  Training Step: 4413...  Training loss: 1.2593...  0.1209 sec/batch
Epoch: 10/20...  Training Step: 4414...  Training loss: 1.0751...  0.1175 sec/batch
Epoch: 10/20...  Training Step: 4415...  Training loss: 1.2163...  0.1215 sec/batch
Epoch: 10/20...  Training Step: 4416...  Training loss: 1.3049...  0.1153 sec/batch
Epoch: 10/20...  Training Step: 4417...  Training loss: 1.1065...  0.1221 sec/batch
Epoch: 10/20...  Training Step: 4418...  Training loss: 1.0960...  0.1295 sec/batch
Epoch: 10/20...  Training Step: 4419...  Training loss: 1.1576...  0.1255 sec/batch
Epoch: 10/20...  Training Step: 4420...  Training loss: 1.1537...  0.1231 sec/batch
Epoch: 10/20...  Training Step: 4421...  Training loss: 1.1535...  0.1284 sec/batch
Epoch: 10/20...  Training Step: 4422...  Training loss: 1.0833...  0.1234 sec/batch
Epoch: 10/20...  Training Step: 4423...  Training loss: 1.1872...  0.1312 sec/batch
Epoch: 10/20...  Training Step: 4424...  Training loss: 1.3153...  0.1357 sec/batch
Epoch: 10/20...  Training Step: 4425...  Training loss: 1.1688...  0.1372 sec/batch
Epoch: 10/20...  Training Step: 4426...  Training loss: 1.1577...  0.1385 sec/batch
Epoch: 10/20...  Training Step: 4427...  Training loss: 1.2140...  0.1339 sec/batch
Epoch: 10/20...  Training Step: 4428...  Training loss: 1.2322...  0.1403 sec/batch
Epoch: 10/20...  Training Step: 4429...  Training loss: 1.0904...  0.1304 sec/batch
Epoch: 10/20...  Training Step: 4430...  Training loss: 1.1745...  0.1270 sec/batch
Epoch: 10/20...  Training Step: 4431...  Training loss: 1.1599...  0.1208 sec/batch
Epoch: 10/20...  Training Step: 4432...  Training loss: 1.0859...  0.1125 sec/batch
Epoch: 10/20...  Training Step: 4433...  Training loss: 1.2790...  0.1152 sec/batch
Epoch: 10/20...  Training Step: 4434...  Training loss: 1.0800...  0.1207 sec/batch
Epoch: 10/20...  Training Step: 4435...  Training loss: 1.1067...  0.1300 sec/batch
Epoch: 10/20...  Training Step: 4436...  Training loss: 1.1353...  0.1255 sec/batch
Epoch: 10/20...  Training Step: 4437...  Training loss: 1.1365...  0.1274 sec/batch
Epoch: 10/20...  Training Step: 4438...  Training loss: 1.1661...  0.1156 sec/batch
Epoch: 10/20...  Training Step: 4439...  Training loss: 1.1877...  0.1297 sec/batch
Epoch: 10/20...  Training Step: 4440...  Training loss: 1.1833...  0.1274 sec/batch
Epoch: 10/20...  Training Step: 4441...  Training loss: 1.2668...  0.1241 sec/batch
Epoch: 10/20...  Training Step: 4442...  Training loss: 1.1587...  0.1218 sec/batch
Epoch: 10/20...  Training Step: 4443...  Training loss: 1.2971...  0.1245 sec/batch
Epoch: 10/20...  Training Step: 4444...  Training loss: 1.3498...  0.1242 sec/batch
Epoch: 10/20...  Training Step: 4445...  Training loss: 1.2474...  0.1250 sec/batch
Epoch: 10/20...  Training Step: 4446...  Training loss: 1.3338...  0.1268 sec/batch
Epoch: 10/20...  Training Step: 4447...  Training loss: 1.2505...  0.1300 sec/batch
Epoch: 10/20...  Training Step: 4448...  Training loss: 1.3320...  0.1191 sec/batch
Epoch: 10/20...  Training Step: 4449...  Training loss: 1.3439...  0.1233 sec/batch
Epoch: 10/20...  Training Step: 4450...  Training loss: 1.2009...  0.1234 sec/batch
Epoch: 10/20...  Training Step: 4451...  Training loss: 1.2329...  0.1283 sec/batch
Epoch: 10/20...  Training Step: 4452...  Training loss: 1.1888...  0.1180 sec/batch
Epoch: 10/20...  Training Step: 4453...  Training loss: 1.1294...  0.1290 sec/batch
Epoch: 10/20...  Training Step: 4454...  Training loss: 1.4821...  0.1228 sec/batch
Epoch: 10/20...  Training Step: 4455...  Training loss: 1.1302...  0.1146 sec/batch
Epoch: 10/20...  Training Step: 4456...  Training loss: 1.1801...  0.1197 sec/batch
Epoch: 10/20...  Training Step: 4457...  Training loss: 1.1166...  0.1253 sec/batch
Epoch: 10/20...  Training Step: 4458...  Training loss: 1.1651...  0.1258 sec/batch
Epoch: 10/20...  Training Step: 4459...  Training loss: 1.0581...  0.1248 sec/batch
Epoch: 10/20...  Training Step: 4460...  Training loss: 1.1582...  0.1231 sec/batch
Epoch: 10/20...  Training Step: 4461...  Training loss: 0.9890...  0.1224 sec/batch
Epoch: 10/20...  Training Step: 4462...  Training loss: 1.1723...  0.1177 sec/batch
Epoch: 10/20...  Training Step: 4463...  Training loss: 1.1728...  0.1238 sec/batch
Epoch: 10/20...  Training Step: 4464...  Training loss: 1.3096...  0.1273 sec/batch
Epoch: 10/20...  Training Step: 4465...  Training loss: 1.2258...  0.1166 sec/batch
Epoch: 10/20...  Training Step: 4466...  Training loss: 1.2037...  0.1208 sec/batch
Epoch: 10/20...  Training Step: 4467...  Training loss: 1.1569...  0.1206 sec/batch
Epoch: 10/20...  Training Step: 4468...  Training loss: 1.1468...  0.1305 sec/batch
Epoch: 10/20...  Training Step: 4469...  Training loss: 1.0369...  0.1243 sec/batch
Epoch: 10/20...  Training Step: 4470...  Training loss: 1.1994...  0.1239 sec/batch
Epoch: 10/20...  Training Step: 4471...  Training loss: 1.2354...  0.1197 sec/batch
Epoch: 10/20...  Training Step: 4472...  Training loss: 1.5016...  0.1252 sec/batch
Epoch: 10/20...  Training Step: 4473...  Training loss: 1.0471...  0.1172 sec/batch
Epoch: 10/20...  Training Step: 4474...  Training loss: 1.0868...  0.1239 sec/batch
Epoch: 10/20...  Training Step: 4475...  Training loss: 1.2115...  0.1340 sec/batch
Epoch: 10/20...  Training Step: 4476...  Training loss: 1.1275...  0.1198 sec/batch
Epoch: 10/20...  Training Step: 4477...  Training loss: 1.1468...  0.1197 sec/batch
Epoch: 10/20...  Training Step: 4478...  Training loss: 1.0887...  0.1194 sec/batch
Epoch: 10/20...  Training Step: 4479...  Training loss: 0.9418...  0.1179 sec/batch
Epoch: 10/20...  Training Step: 4480...  Training loss: 1.2917...  0.1269 sec/batch
Epoch: 10/20...  Training Step: 4481...  Training loss: 0.9377...  0.1306 sec/batch
Epoch: 10/20...  Training Step: 4482...  Training loss: 1.2369...  0.1231 sec/batch
Epoch: 10/20...  Training Step: 4483...  Training loss: 1.2461...  0.1225 sec/batch
Epoch: 10/20...  Training Step: 4484...  Training loss: 1.4924...  0.1250 sec/batch
Epoch: 10/20...  Training Step: 4485...  Training loss: 1.2933...  0.1255 sec/batch
Epoch: 10/20...  Training Step: 4486...  Training loss: 1.2726...  0.1235 sec/batch
Epoch: 10/20...  Training Step: 4487...  Training loss: 1.1304...  0.1151 sec/batch
Epoch: 10/20...  Training Step: 4488...  Training loss: 1.1263...  0.1171 sec/batch
Epoch: 10/20...  Training Step: 4489...  Training loss: 1.0737...  0.1173 sec/batch
Epoch: 10/20...  Training Step: 4490...  Training loss: 1.1072...  0.1154 sec/batch
Epoch: 10/20...  Training Step: 4491...  Training loss: 0.9944...  0.1206 sec/batch
Epoch: 10/20...  Training Step: 4492...  Training loss: 1.1145...  0.1198 sec/batch
Epoch: 10/20...  Training Step: 4493...  Training loss: 0.9994...  0.1198 sec/batch
Epoch: 10/20...  Training Step: 4494...  Training loss: 1.0649...  0.1147 sec/batch
Epoch: 10/20...  Training Step: 4495...  Training loss: 1.0713...  0.1200 sec/batch
Epoch: 10/20...  Training Step: 4496...  Training loss: 1.0945...  0.1192 sec/batch
Epoch: 10/20...  Training Step: 4497...  Training loss: 1.0212...  0.1200 sec/batch
Epoch: 10/20...  Training Step: 4498...  Training loss: 1.2230...  0.1197 sec/batch
Epoch: 10/20...  Training Step: 4499...  Training loss: 1.1378...  0.1165 sec/batch
Epoch: 10/20...  Training Step: 4500...  Training loss: 0.9861...  0.1162 sec/batch
Epoch: 10/20...  Training Step: 4501...  Training loss: 1.0085...  0.1190 sec/batch
Epoch: 10/20...  Training Step: 4502...  Training loss: 0.9308...  0.1192 sec/batch
Epoch: 10/20...  Training Step: 4503...  Training loss: 1.1057...  0.1144 sec/batch
Epoch: 10/20...  Training Step: 4504...  Training loss: 1.0586...  0.1177 sec/batch
Epoch: 10/20...  Training Step: 4505...  Training loss: 1.1945...  0.1206 sec/batch
Epoch: 10/20...  Training Step: 4506...  Training loss: 1.0820...  0.1199 sec/batch
Epoch: 10/20...  Training Step: 4507...  Training loss: 1.1202...  0.1169 sec/batch
Epoch: 10/20...  Training Step: 4508...  Training loss: 1.1445...  0.1169 sec/batch
Epoch: 10/20...  Training Step: 4509...  Training loss: 1.0296...  0.1202 sec/batch
Epoch: 10/20...  Training Step: 4510...  Training loss: 1.0141...  0.1168 sec/batch
Epoch: 10/20...  Training Step: 4511...  Training loss: 1.1108...  0.1178 sec/batch
Epoch: 10/20...  Training Step: 4512...  Training loss: 1.0957...  0.1168 sec/batch
Epoch: 10/20...  Training Step: 4513...  Training loss: 1.0818...  0.1172 sec/batch
Epoch: 10/20...  Training Step: 4514...  Training loss: 1.0928...  0.1179 sec/batch
Epoch: 10/20...  Training Step: 4515...  Training loss: 1.2028...  0.1150 sec/batch
Epoch: 10/20...  Training Step: 4516...  Training loss: 1.0266...  0.1197 sec/batch
Epoch: 10/20...  Training Step: 4517...  Training loss: 1.0507...  0.1184 sec/batch
Epoch: 10/20...  Training Step: 4518...  Training loss: 1.1874...  0.1169 sec/batch
Epoch: 10/20...  Training Step: 4519...  Training loss: 0.9585...  0.1194 sec/batch
Epoch: 10/20...  Training Step: 4520...  Training loss: 1.1214...  0.1168 sec/batch
Epoch: 10/20...  Training Step: 4521...  Training loss: 0.9954...  0.1201 sec/batch
Epoch: 10/20...  Training Step: 4522...  Training loss: 0.9271...  0.1194 sec/batch
Epoch: 10/20...  Training Step: 4523...  Training loss: 1.0071...  0.1180 sec/batch
Epoch: 10/20...  Training Step: 4524...  Training loss: 1.2725...  0.1180 sec/batch
Epoch: 10/20...  Training Step: 4525...  Training loss: 1.0510...  0.1200 sec/batch
Epoch: 10/20...  Training Step: 4526...  Training loss: 1.0897...  0.1184 sec/batch
Epoch: 10/20...  Training Step: 4527...  Training loss: 1.1484...  0.1235 sec/batch
Epoch: 10/20...  Training Step: 4528...  Training loss: 0.9878...  0.1188 sec/batch
Epoch: 10/20...  Training Step: 4529...  Training loss: 0.9875...  0.1239 sec/batch
Epoch: 10/20...  Training Step: 4530...  Training loss: 0.8642...  0.1252 sec/batch
Epoch: 10/20...  Training Step: 4531...  Training loss: 1.1479...  0.1246 sec/batch
Epoch: 10/20...  Training Step: 4532...  Training loss: 1.0864...  0.1302 sec/batch
Epoch: 10/20...  Training Step: 4533...  Training loss: 1.0855...  0.1257 sec/batch
Epoch: 10/20...  Training Step: 4534...  Training loss: 1.0987...  0.1288 sec/batch
Epoch: 10/20...  Training Step: 4535...  Training loss: 1.2618...  0.1253 sec/batch
Epoch: 10/20...  Training Step: 4536...  Training loss: 0.8980...  0.1277 sec/batch
Epoch: 10/20...  Training Step: 4537...  Training loss: 1.3095...  0.1407 sec/batch
Epoch: 10/20...  Training Step: 4538...  Training loss: 1.2216...  0.1280 sec/batch
Epoch: 10/20...  Training Step: 4539...  Training loss: 0.9911...  0.1265 sec/batch
Epoch: 10/20...  Training Step: 4540...  Training loss: 1.1801...  0.1215 sec/batch
Epoch: 10/20...  Training Step: 4541...  Training loss: 1.0355...  0.1281 sec/batch
Epoch: 10/20...  Training Step: 4542...  Training loss: 1.1698...  0.1282 sec/batch
Epoch: 10/20...  Training Step: 4543...  Training loss: 1.1100...  0.1263 sec/batch
Epoch: 10/20...  Training Step: 4544...  Training loss: 1.2247...  0.1322 sec/batch
Epoch: 10/20...  Training Step: 4545...  Training loss: 1.0733...  0.1172 sec/batch
Epoch: 10/20...  Training Step: 4546...  Training loss: 1.1531...  0.1336 sec/batch
Epoch: 10/20...  Training Step: 4547...  Training loss: 1.0669...  0.1252 sec/batch
Epoch: 10/20...  Training Step: 4548...  Training loss: 1.1865...  0.1177 sec/batch
Epoch: 10/20...  Training Step: 4549...  Training loss: 1.0054...  0.1170 sec/batch
Epoch: 10/20...  Training Step: 4550...  Training loss: 1.2985...  0.1311 sec/batch
Epoch: 10/20...  Training Step: 4551...  Training loss: 1.1856...  0.1204 sec/batch
Epoch: 10/20...  Training Step: 4552...  Training loss: 1.2340...  0.1270 sec/batch
Epoch: 10/20...  Training Step: 4553...  Training loss: 1.2563...  0.1210 sec/batch
Epoch: 10/20...  Training Step: 4554...  Training loss: 1.2695...  0.1263 sec/batch
Epoch: 10/20...  Training Step: 4555...  Training loss: 1.0730...  0.1205 sec/batch
Epoch: 10/20...  Training Step: 4556...  Training loss: 1.1440...  0.1252 sec/batch
Epoch: 10/20...  Training Step: 4557...  Training loss: 0.9799...  0.1297 sec/batch
Epoch: 10/20...  Training Step: 4558...  Training loss: 1.0193...  0.1175 sec/batch
Epoch: 10/20...  Training Step: 4559...  Training loss: 1.1328...  0.1203 sec/batch
Epoch: 10/20...  Training Step: 4560...  Training loss: 1.1403...  0.1213 sec/batch
Epoch: 10/20...  Training Step: 4561...  Training loss: 1.0056...  0.1238 sec/batch
Epoch: 10/20...  Training Step: 4562...  Training loss: 1.2066...  0.1288 sec/batch
Epoch: 10/20...  Training Step: 4563...  Training loss: 1.2016...  0.1253 sec/batch
Epoch: 10/20...  Training Step: 4564...  Training loss: 0.9988...  0.1284 sec/batch
Epoch: 10/20...  Training Step: 4565...  Training loss: 0.9943...  0.1246 sec/batch
Epoch: 10/20...  Training Step: 4566...  Training loss: 1.0810...  0.1304 sec/batch
Epoch: 10/20...  Training Step: 4567...  Training loss: 0.9547...  0.1391 sec/batch
Epoch: 10/20...  Training Step: 4568...  Training loss: 1.0027...  0.1273 sec/batch
Epoch: 10/20...  Training Step: 4569...  Training loss: 1.0545...  0.1247 sec/batch
Epoch: 10/20...  Training Step: 4570...  Training loss: 1.1210...  0.1379 sec/batch
Epoch: 10/20...  Training Step: 4571...  Training loss: 1.0725...  0.1288 sec/batch
Epoch: 10/20...  Training Step: 4572...  Training loss: 1.2257...  0.1273 sec/batch
Epoch: 10/20...  Training Step: 4573...  Training loss: 1.0468...  0.1264 sec/batch
Epoch: 10/20...  Training Step: 4574...  Training loss: 1.1249...  0.1246 sec/batch
Epoch: 10/20...  Training Step: 4575...  Training loss: 1.0677...  0.1221 sec/batch
Epoch: 10/20...  Training Step: 4576...  Training loss: 1.0749...  0.1219 sec/batch
Epoch: 10/20...  Training Step: 4577...  Training loss: 1.0591...  0.1293 sec/batch
Epoch: 10/20...  Training Step: 4578...  Training loss: 0.9274...  0.1282 sec/batch
Epoch: 10/20...  Training Step: 4579...  Training loss: 1.0867...  0.1275 sec/batch
Epoch: 10/20...  Training Step: 4580...  Training loss: 1.2035...  0.1283 sec/batch
Epoch: 10/20...  Training Step: 4581...  Training loss: 1.1601...  0.1315 sec/batch
Epoch: 10/20...  Training Step: 4582...  Training loss: 1.2037...  0.1343 sec/batch
Epoch: 10/20...  Training Step: 4583...  Training loss: 1.0257...  0.1341 sec/batch
Epoch: 10/20...  Training Step: 4584...  Training loss: 1.4482...  0.1222 sec/batch
Epoch: 10/20...  Training Step: 4585...  Training loss: 1.0753...  0.1174 sec/batch
Epoch: 10/20...  Training Step: 4586...  Training loss: 1.0926...  0.1246 sec/batch
Epoch: 10/20...  Training Step: 4587...  Training loss: 1.0699...  0.1184 sec/batch
Epoch: 10/20...  Training Step: 4588...  Training loss: 1.1414...  0.1176 sec/batch
Epoch: 10/20...  Training Step: 4589...  Training loss: 1.1855...  0.1237 sec/batch
Epoch: 10/20...  Training Step: 4590...  Training loss: 1.0433...  0.1248 sec/batch
Epoch: 10/20...  Training Step: 4591...  Training loss: 1.2090...  0.1231 sec/batch
Epoch: 10/20...  Training Step: 4592...  Training loss: 1.4427...  0.1291 sec/batch
Epoch: 10/20...  Training Step: 4593...  Training loss: 1.2945...  0.1301 sec/batch
Epoch: 10/20...  Training Step: 4594...  Training loss: 0.9396...  0.1180 sec/batch
Epoch: 10/20...  Training Step: 4595...  Training loss: 1.0788...  0.1161 sec/batch
Epoch: 10/20...  Training Step: 4596...  Training loss: 0.9580...  0.1257 sec/batch
Epoch: 10/20...  Training Step: 4597...  Training loss: 1.1633...  0.1194 sec/batch
Epoch: 10/20...  Training Step: 4598...  Training loss: 1.1444...  0.1165 sec/batch
Epoch: 10/20...  Training Step: 4599...  Training loss: 1.1302...  0.1147 sec/batch
Epoch: 10/20...  Training Step: 4600...  Training loss: 1.2508...  0.1204 sec/batch
Epoch: 10/20...  Training Step: 4601...  Training loss: 1.0822...  0.1165 sec/batch
Epoch: 10/20...  Training Step: 4602...  Training loss: 1.1038...  0.1170 sec/batch
Epoch: 10/20...  Training Step: 4603...  Training loss: 1.1726...  0.1276 sec/batch
Epoch: 10/20...  Training Step: 4604...  Training loss: 1.1037...  0.1211 sec/batch
Epoch: 10/20...  Training Step: 4605...  Training loss: 1.0177...  0.1192 sec/batch
Epoch: 10/20...  Training Step: 4606...  Training loss: 1.1115...  0.1289 sec/batch
Epoch: 10/20...  Training Step: 4607...  Training loss: 1.3377...  0.1181 sec/batch
Epoch: 10/20...  Training Step: 4608...  Training loss: 1.1337...  0.1225 sec/batch
Epoch: 10/20...  Training Step: 4609...  Training loss: 1.3820...  0.1271 sec/batch
Epoch: 10/20...  Training Step: 4610...  Training loss: 1.2910...  0.1222 sec/batch
Epoch: 10/20...  Training Step: 4611...  Training loss: 0.9744...  0.1175 sec/batch
Epoch: 10/20...  Training Step: 4612...  Training loss: 1.1844...  0.1256 sec/batch
Epoch: 10/20...  Training Step: 4613...  Training loss: 1.0170...  0.1194 sec/batch
Epoch: 10/20...  Training Step: 4614...  Training loss: 1.2857...  0.1103 sec/batch
Epoch: 10/20...  Training Step: 4615...  Training loss: 1.3345...  0.1289 sec/batch
Epoch: 10/20...  Training Step: 4616...  Training loss: 1.2986...  0.1235 sec/batch
Epoch: 10/20...  Training Step: 4617...  Training loss: 0.9613...  0.1251 sec/batch
Epoch: 10/20...  Training Step: 4618...  Training loss: 1.1451...  0.1227 sec/batch
Epoch: 10/20...  Training Step: 4619...  Training loss: 1.2347...  0.1267 sec/batch
Epoch: 10/20...  Training Step: 4620...  Training loss: 1.1812...  0.1261 sec/batch
Epoch: 10/20...  Training Step: 4621...  Training loss: 1.2636...  0.1266 sec/batch
Epoch: 10/20...  Training Step: 4622...  Training loss: 1.1318...  0.1233 sec/batch
Epoch: 10/20...  Training Step: 4623...  Training loss: 1.0897...  0.1202 sec/batch
Epoch: 10/20...  Training Step: 4624...  Training loss: 0.9945...  0.1241 sec/batch
Epoch: 10/20...  Training Step: 4625...  Training loss: 1.2001...  0.1261 sec/batch
Epoch: 10/20...  Training Step: 4626...  Training loss: 1.1133...  0.1224 sec/batch
Epoch: 10/20...  Training Step: 4627...  Training loss: 1.0142...  0.1242 sec/batch
Epoch: 10/20...  Training Step: 4628...  Training loss: 1.2936...  0.1266 sec/batch
Epoch: 10/20...  Training Step: 4629...  Training loss: 1.0897...  0.1272 sec/batch
Epoch: 10/20...  Training Step: 4630...  Training loss: 1.4363...  0.1220 sec/batch
Epoch: 10/20...  Training Step: 4631...  Training loss: 1.2970...  0.1225 sec/batch
Epoch: 10/20...  Training Step: 4632...  Training loss: 0.9405...  0.1284 sec/batch
Epoch: 10/20...  Training Step: 4633...  Training loss: 1.0258...  0.1305 sec/batch
Epoch: 10/20...  Training Step: 4634...  Training loss: 0.9095...  0.1160 sec/batch
Epoch: 10/20...  Training Step: 4635...  Training loss: 0.9603...  0.1237 sec/batch
Epoch: 10/20...  Training Step: 4636...  Training loss: 1.1425...  0.1233 sec/batch
Epoch: 10/20...  Training Step: 4637...  Training loss: 1.1764...  0.1144 sec/batch
Epoch: 10/20...  Training Step: 4638...  Training loss: 1.0053...  0.1136 sec/batch
Epoch: 10/20...  Training Step: 4639...  Training loss: 1.1530...  0.1142 sec/batch
Epoch: 10/20...  Training Step: 4640...  Training loss: 1.0521...  0.1248 sec/batch
Epoch: 11/20...  Training Step: 4641...  Training loss: 1.4921...  0.1404 sec/batch
Epoch: 11/20...  Training Step: 4642...  Training loss: 1.1083...  0.1331 sec/batch
Epoch: 11/20...  Training Step: 4643...  Training loss: 1.0618...  0.1265 sec/batch
Epoch: 11/20...  Training Step: 4644...  Training loss: 1.1195...  0.1216 sec/batch
Epoch: 11/20...  Training Step: 4645...  Training loss: 1.1696...  0.1255 sec/batch
Epoch: 11/20...  Training Step: 4646...  Training loss: 1.0381...  0.1247 sec/batch
Epoch: 11/20...  Training Step: 4647...  Training loss: 1.2149...  0.1256 sec/batch
Epoch: 11/20...  Training Step: 4648...  Training loss: 1.0056...  0.1261 sec/batch
Epoch: 11/20...  Training Step: 4649...  Training loss: 1.0171...  0.1266 sec/batch
Epoch: 11/20...  Training Step: 4650...  Training loss: 1.1183...  0.1316 sec/batch
Epoch: 11/20...  Training Step: 4651...  Training loss: 1.1784...  0.1261 sec/batch
Epoch: 11/20...  Training Step: 4652...  Training loss: 0.9166...  0.1324 sec/batch
Epoch: 11/20...  Training Step: 4653...  Training loss: 1.3499...  0.1275 sec/batch
Epoch: 11/20...  Training Step: 4654...  Training loss: 0.8840...  0.1201 sec/batch
Epoch: 11/20...  Training Step: 4655...  Training loss: 1.2156...  0.1242 sec/batch
Epoch: 11/20...  Training Step: 4656...  Training loss: 1.2437...  0.1250 sec/batch
Epoch: 11/20...  Training Step: 4657...  Training loss: 1.1115...  0.1195 sec/batch
Epoch: 11/20...  Training Step: 4658...  Training loss: 1.2028...  0.1223 sec/batch
Epoch: 11/20...  Training Step: 4659...  Training loss: 1.1225...  0.1240 sec/batch
Epoch: 11/20...  Training Step: 4660...  Training loss: 1.0719...  0.1304 sec/batch
Epoch: 11/20...  Training Step: 4661...  Training loss: 1.2356...  0.1283 sec/batch
Epoch: 11/20...  Training Step: 4662...  Training loss: 1.0856...  0.1282 sec/batch
Epoch: 11/20...  Training Step: 4663...  Training loss: 1.2345...  0.1260 sec/batch
Epoch: 11/20...  Training Step: 4664...  Training loss: 1.0212...  0.1320 sec/batch
Epoch: 11/20...  Training Step: 4665...  Training loss: 1.1290...  0.1410 sec/batch
Epoch: 11/20...  Training Step: 4666...  Training loss: 1.2081...  0.1309 sec/batch
Epoch: 11/20...  Training Step: 4667...  Training loss: 1.1725...  0.1348 sec/batch
Epoch: 11/20...  Training Step: 4668...  Training loss: 0.9423...  0.1353 sec/batch
Epoch: 11/20...  Training Step: 4669...  Training loss: 1.1628...  0.1345 sec/batch
Epoch: 11/20...  Training Step: 4670...  Training loss: 1.1500...  0.1296 sec/batch
Epoch: 11/20...  Training Step: 4671...  Training loss: 0.9715...  0.1242 sec/batch
Epoch: 11/20...  Training Step: 4672...  Training loss: 1.0834...  0.1317 sec/batch
Epoch: 11/20...  Training Step: 4673...  Training loss: 0.9153...  0.1307 sec/batch
Epoch: 11/20...  Training Step: 4674...  Training loss: 0.9855...  0.1238 sec/batch
Epoch: 11/20...  Training Step: 4675...  Training loss: 0.9815...  0.1328 sec/batch
Epoch: 11/20...  Training Step: 4676...  Training loss: 1.0538...  0.1295 sec/batch
Epoch: 11/20...  Training Step: 4677...  Training loss: 1.1860...  0.1275 sec/batch
Epoch: 11/20...  Training Step: 4678...  Training loss: 1.0233...  0.1294 sec/batch
Epoch: 11/20...  Training Step: 4679...  Training loss: 1.0704...  0.1239 sec/batch
Epoch: 11/20...  Training Step: 4680...  Training loss: 1.3717...  0.1304 sec/batch
Epoch: 11/20...  Training Step: 4681...  Training loss: 1.1149...  0.1337 sec/batch
Epoch: 11/20...  Training Step: 4682...  Training loss: 1.0458...  0.1337 sec/batch
Epoch: 11/20...  Training Step: 4683...  Training loss: 1.2264...  0.1287 sec/batch
Epoch: 11/20...  Training Step: 4684...  Training loss: 0.8999...  0.1347 sec/batch
Epoch: 11/20...  Training Step: 4685...  Training loss: 1.0690...  0.1288 sec/batch
Epoch: 11/20...  Training Step: 4686...  Training loss: 1.0925...  0.1311 sec/batch
Epoch: 11/20...  Training Step: 4687...  Training loss: 1.1917...  0.1299 sec/batch
Epoch: 11/20...  Training Step: 4688...  Training loss: 1.1074...  0.1171 sec/batch
Epoch: 11/20...  Training Step: 4689...  Training loss: 1.1166...  0.1326 sec/batch
Epoch: 11/20...  Training Step: 4690...  Training loss: 1.0760...  0.1327 sec/batch
Epoch: 11/20...  Training Step: 4691...  Training loss: 1.1052...  0.1282 sec/batch
Epoch: 11/20...  Training Step: 4692...  Training loss: 1.2709...  0.1327 sec/batch
Epoch: 11/20...  Training Step: 4693...  Training loss: 1.1516...  0.1324 sec/batch
Epoch: 11/20...  Training Step: 4694...  Training loss: 1.0441...  0.1301 sec/batch
Epoch: 11/20...  Training Step: 4695...  Training loss: 1.0230...  0.1282 sec/batch
Epoch: 11/20...  Training Step: 4696...  Training loss: 1.0265...  0.1307 sec/batch
Epoch: 11/20...  Training Step: 4697...  Training loss: 1.0687...  0.1218 sec/batch
Epoch: 11/20...  Training Step: 4698...  Training loss: 1.2254...  0.1244 sec/batch
Epoch: 11/20...  Training Step: 4699...  Training loss: 0.8824...  0.1268 sec/batch
Epoch: 11/20...  Training Step: 4700...  Training loss: 1.0476...  0.1315 sec/batch
Epoch: 11/20...  Training Step: 4701...  Training loss: 1.0092...  0.1349 sec/batch
Epoch: 11/20...  Training Step: 4702...  Training loss: 1.2437...  0.1364 sec/batch
Epoch: 11/20...  Training Step: 4703...  Training loss: 0.9482...  0.1277 sec/batch
Epoch: 11/20...  Training Step: 4704...  Training loss: 1.1660...  0.1343 sec/batch
Epoch: 11/20...  Training Step: 4705...  Training loss: 0.9995...  0.1366 sec/batch
Epoch: 11/20...  Training Step: 4706...  Training loss: 1.1502...  0.1349 sec/batch
Epoch: 11/20...  Training Step: 4707...  Training loss: 1.1054...  0.1373 sec/batch
Epoch: 11/20...  Training Step: 4708...  Training loss: 1.1832...  0.1276 sec/batch
Epoch: 11/20...  Training Step: 4709...  Training loss: 0.9893...  0.1375 sec/batch
Epoch: 11/20...  Training Step: 4710...  Training loss: 1.0998...  0.1383 sec/batch
Epoch: 11/20...  Training Step: 4711...  Training loss: 1.2514...  0.1275 sec/batch
Epoch: 11/20...  Training Step: 4712...  Training loss: 0.9839...  0.1307 sec/batch
Epoch: 11/20...  Training Step: 4713...  Training loss: 1.1467...  0.1353 sec/batch
Epoch: 11/20...  Training Step: 4714...  Training loss: 1.0221...  0.1285 sec/batch
Epoch: 11/20...  Training Step: 4715...  Training loss: 1.2358...  0.1355 sec/batch
Epoch: 11/20...  Training Step: 4716...  Training loss: 1.0757...  0.1316 sec/batch
Epoch: 11/20...  Training Step: 4717...  Training loss: 1.0302...  0.1318 sec/batch
Epoch: 11/20...  Training Step: 4718...  Training loss: 1.1453...  0.1320 sec/batch
Epoch: 11/20...  Training Step: 4719...  Training loss: 1.2225...  0.1351 sec/batch
Epoch: 11/20...  Training Step: 4720...  Training loss: 0.9376...  0.1336 sec/batch
Epoch: 11/20...  Training Step: 4721...  Training loss: 1.2626...  0.1297 sec/batch
Epoch: 11/20...  Training Step: 4722...  Training loss: 1.0687...  0.1204 sec/batch
Epoch: 11/20...  Training Step: 4723...  Training loss: 1.0852...  0.1272 sec/batch
Epoch: 11/20...  Training Step: 4724...  Training loss: 1.2302...  0.1272 sec/batch
Epoch: 11/20...  Training Step: 4725...  Training loss: 1.0821...  0.1205 sec/batch
Epoch: 11/20...  Training Step: 4726...  Training loss: 1.2828...  0.1286 sec/batch
Epoch: 11/20...  Training Step: 4727...  Training loss: 0.9707...  0.1257 sec/batch
Epoch: 11/20...  Training Step: 4728...  Training loss: 1.1511...  0.1257 sec/batch
Epoch: 11/20...  Training Step: 4729...  Training loss: 1.1880...  0.1204 sec/batch
Epoch: 11/20...  Training Step: 4730...  Training loss: 1.0725...  0.1249 sec/batch
Epoch: 11/20...  Training Step: 4731...  Training loss: 1.1922...  0.1298 sec/batch
Epoch: 11/20...  Training Step: 4732...  Training loss: 1.1713...  0.1228 sec/batch
Epoch: 11/20...  Training Step: 4733...  Training loss: 0.9878...  0.1242 sec/batch
Epoch: 11/20...  Training Step: 4734...  Training loss: 1.1699...  0.1230 sec/batch
Epoch: 11/20...  Training Step: 4735...  Training loss: 1.1313...  0.1278 sec/batch
Epoch: 11/20...  Training Step: 4736...  Training loss: 1.0617...  0.1284 sec/batch
Epoch: 11/20...  Training Step: 4737...  Training loss: 1.2983...  0.1285 sec/batch
Epoch: 11/20...  Training Step: 4738...  Training loss: 1.1603...  0.1184 sec/batch
Epoch: 11/20...  Training Step: 4739...  Training loss: 1.1196...  0.1170 sec/batch
Epoch: 11/20...  Training Step: 4740...  Training loss: 1.1457...  0.1281 sec/batch
Epoch: 11/20...  Training Step: 4741...  Training loss: 1.1547...  0.1291 sec/batch
Epoch: 11/20...  Training Step: 4742...  Training loss: 1.3098...  0.1180 sec/batch
Epoch: 11/20...  Training Step: 4743...  Training loss: 1.2495...  0.1270 sec/batch
Epoch: 11/20...  Training Step: 4744...  Training loss: 1.1795...  0.1278 sec/batch
Epoch: 11/20...  Training Step: 4745...  Training loss: 1.2423...  0.1138 sec/batch
Epoch: 11/20...  Training Step: 4746...  Training loss: 1.2896...  0.1226 sec/batch
Epoch: 11/20...  Training Step: 4747...  Training loss: 1.0368...  0.1258 sec/batch
Epoch: 11/20...  Training Step: 4748...  Training loss: 1.2004...  0.1190 sec/batch
Epoch: 11/20...  Training Step: 4749...  Training loss: 1.1259...  0.1195 sec/batch
Epoch: 11/20...  Training Step: 4750...  Training loss: 1.0013...  0.1258 sec/batch
Epoch: 11/20...  Training Step: 4751...  Training loss: 1.1755...  0.1277 sec/batch
Epoch: 11/20...  Training Step: 4752...  Training loss: 0.9733...  0.1249 sec/batch
Epoch: 11/20...  Training Step: 4753...  Training loss: 1.2272...  0.1228 sec/batch
Epoch: 11/20...  Training Step: 4754...  Training loss: 1.3725...  0.1227 sec/batch
Epoch: 11/20...  Training Step: 4755...  Training loss: 1.1601...  0.1187 sec/batch
Epoch: 11/20...  Training Step: 4756...  Training loss: 1.1013...  0.1307 sec/batch
Epoch: 11/20...  Training Step: 4757...  Training loss: 1.2000...  0.1242 sec/batch
Epoch: 11/20...  Training Step: 4758...  Training loss: 1.2297...  0.1234 sec/batch
Epoch: 11/20...  Training Step: 4759...  Training loss: 1.1158...  0.1196 sec/batch
Epoch: 11/20...  Training Step: 4760...  Training loss: 1.0306...  0.1190 sec/batch
Epoch: 11/20...  Training Step: 4761...  Training loss: 1.1655...  0.1194 sec/batch
Epoch: 11/20...  Training Step: 4762...  Training loss: 1.2437...  0.1229 sec/batch
Epoch: 11/20...  Training Step: 4763...  Training loss: 1.0416...  0.1205 sec/batch
Epoch: 11/20...  Training Step: 4764...  Training loss: 1.1350...  0.1180 sec/batch
Epoch: 11/20...  Training Step: 4765...  Training loss: 1.2315...  0.1232 sec/batch
Epoch: 11/20...  Training Step: 4766...  Training loss: 0.9244...  0.1179 sec/batch
Epoch: 11/20...  Training Step: 4767...  Training loss: 1.0917...  0.1162 sec/batch
Epoch: 11/20...  Training Step: 4768...  Training loss: 1.2004...  0.1220 sec/batch
Epoch: 11/20...  Training Step: 4769...  Training loss: 1.1902...  0.1283 sec/batch
Epoch: 11/20...  Training Step: 4770...  Training loss: 1.0264...  0.1217 sec/batch
Epoch: 11/20...  Training Step: 4771...  Training loss: 1.3077...  0.1200 sec/batch
Epoch: 11/20...  Training Step: 4772...  Training loss: 1.1000...  0.1209 sec/batch
Epoch: 11/20...  Training Step: 4773...  Training loss: 1.0438...  0.1192 sec/batch
Epoch: 11/20...  Training Step: 4774...  Training loss: 1.2187...  0.1253 sec/batch
Epoch: 11/20...  Training Step: 4775...  Training loss: 0.9529...  0.1157 sec/batch
Epoch: 11/20...  Training Step: 4776...  Training loss: 0.8896...  0.1170 sec/batch
Epoch: 11/20...  Training Step: 4777...  Training loss: 0.9323...  0.1278 sec/batch
Epoch: 11/20...  Training Step: 4778...  Training loss: 1.0310...  0.1322 sec/batch
Epoch: 11/20...  Training Step: 4779...  Training loss: 1.0382...  0.1279 sec/batch
Epoch: 11/20...  Training Step: 4780...  Training loss: 1.2112...  0.1289 sec/batch
Epoch: 11/20...  Training Step: 4781...  Training loss: 0.9467...  0.1230 sec/batch
Epoch: 11/20...  Training Step: 4782...  Training loss: 1.0605...  0.1251 sec/batch
Epoch: 11/20...  Training Step: 4783...  Training loss: 0.9958...  0.1199 sec/batch
Epoch: 11/20...  Training Step: 4784...  Training loss: 1.1139...  0.1181 sec/batch
Epoch: 11/20...  Training Step: 4785...  Training loss: 1.1536...  0.1255 sec/batch
Epoch: 11/20...  Training Step: 4786...  Training loss: 1.0271...  0.1270 sec/batch
Epoch: 11/20...  Training Step: 4787...  Training loss: 1.1275...  0.1238 sec/batch
Epoch: 11/20...  Training Step: 4788...  Training loss: 1.0454...  0.1307 sec/batch
Epoch: 11/20...  Training Step: 4789...  Training loss: 1.0988...  0.1279 sec/batch
Epoch: 11/20...  Training Step: 4790...  Training loss: 1.2477...  0.1334 sec/batch
Epoch: 11/20...  Training Step: 4791...  Training loss: 1.3062...  0.1250 sec/batch
Epoch: 11/20...  Training Step: 4792...  Training loss: 1.1342...  0.1270 sec/batch
Epoch: 11/20...  Training Step: 4793...  Training loss: 1.2682...  0.1228 sec/batch
Epoch: 11/20...  Training Step: 4794...  Training loss: 1.1047...  0.1213 sec/batch
Epoch: 11/20...  Training Step: 4795...  Training loss: 1.1321...  0.1207 sec/batch
Epoch: 11/20...  Training Step: 4796...  Training loss: 1.0380...  0.1226 sec/batch
Epoch: 11/20...  Training Step: 4797...  Training loss: 1.0676...  0.1265 sec/batch
Epoch: 11/20...  Training Step: 4798...  Training loss: 1.1071...  0.1245 sec/batch
Epoch: 11/20...  Training Step: 4799...  Training loss: 1.0611...  0.1301 sec/batch
Epoch: 11/20...  Training Step: 4800...  Training loss: 1.0948...  0.1302 sec/batch
Epoch: 11/20...  Training Step: 4801...  Training loss: 1.0266...  0.1236 sec/batch
Epoch: 11/20...  Training Step: 4802...  Training loss: 1.0968...  0.1261 sec/batch
Epoch: 11/20...  Training Step: 4803...  Training loss: 1.3681...  0.1304 sec/batch
Epoch: 11/20...  Training Step: 4804...  Training loss: 1.0007...  0.1185 sec/batch
Epoch: 11/20...  Training Step: 4805...  Training loss: 1.1843...  0.1248 sec/batch
Epoch: 11/20...  Training Step: 4806...  Training loss: 1.0318...  0.1206 sec/batch
Epoch: 11/20...  Training Step: 4807...  Training loss: 0.9896...  0.1269 sec/batch
Epoch: 11/20...  Training Step: 4808...  Training loss: 1.2661...  0.1266 sec/batch
Epoch: 11/20...  Training Step: 4809...  Training loss: 0.9427...  0.1264 sec/batch
Epoch: 11/20...  Training Step: 4810...  Training loss: 1.1186...  0.1323 sec/batch
Epoch: 11/20...  Training Step: 4811...  Training loss: 1.1462...  0.1232 sec/batch
Epoch: 11/20...  Training Step: 4812...  Training loss: 1.3441...  0.1241 sec/batch
Epoch: 11/20...  Training Step: 4813...  Training loss: 1.0161...  0.1240 sec/batch
Epoch: 11/20...  Training Step: 4814...  Training loss: 1.1153...  0.1221 sec/batch
Epoch: 11/20...  Training Step: 4815...  Training loss: 1.2902...  0.1254 sec/batch
Epoch: 11/20...  Training Step: 4816...  Training loss: 0.9128...  0.1263 sec/batch
Epoch: 11/20...  Training Step: 4817...  Training loss: 1.0423...  0.1315 sec/batch
Epoch: 11/20...  Training Step: 4818...  Training loss: 1.2092...  0.1275 sec/batch
Epoch: 11/20...  Training Step: 4819...  Training loss: 0.8835...  0.1276 sec/batch
Epoch: 11/20...  Training Step: 4820...  Training loss: 1.1247...  0.1240 sec/batch
Epoch: 11/20...  Training Step: 4821...  Training loss: 0.9866...  0.1320 sec/batch
Epoch: 11/20...  Training Step: 4822...  Training loss: 1.2689...  0.1262 sec/batch
Epoch: 11/20...  Training Step: 4823...  Training loss: 1.1629...  0.1333 sec/batch
Epoch: 11/20...  Training Step: 4824...  Training loss: 1.1107...  0.1232 sec/batch
Epoch: 11/20...  Training Step: 4825...  Training loss: 1.1944...  0.1270 sec/batch
Epoch: 11/20...  Training Step: 4826...  Training loss: 1.0856...  0.1250 sec/batch
Epoch: 11/20...  Training Step: 4827...  Training loss: 1.2338...  0.1293 sec/batch
Epoch: 11/20...  Training Step: 4828...  Training loss: 0.9642...  0.1335 sec/batch
Epoch: 11/20...  Training Step: 4829...  Training loss: 1.1981...  0.1326 sec/batch
Epoch: 11/20...  Training Step: 4830...  Training loss: 1.0794...  0.1316 sec/batch
Epoch: 11/20...  Training Step: 4831...  Training loss: 1.0407...  0.1341 sec/batch
Epoch: 11/20...  Training Step: 4832...  Training loss: 1.1374...  0.1289 sec/batch
Epoch: 11/20...  Training Step: 4833...  Training loss: 1.1122...  0.1229 sec/batch
Epoch: 11/20...  Training Step: 4834...  Training loss: 1.0800...  0.1253 sec/batch
Epoch: 11/20...  Training Step: 4835...  Training loss: 1.1373...  0.1355 sec/batch
Epoch: 11/20...  Training Step: 4836...  Training loss: 1.1398...  0.1286 sec/batch
Epoch: 11/20...  Training Step: 4837...  Training loss: 1.1156...  0.1211 sec/batch
Epoch: 11/20...  Training Step: 4838...  Training loss: 1.2383...  0.1221 sec/batch
Epoch: 11/20...  Training Step: 4839...  Training loss: 0.9259...  0.1417 sec/batch
Epoch: 11/20...  Training Step: 4840...  Training loss: 1.1507...  0.1457 sec/batch
Epoch: 11/20...  Training Step: 4841...  Training loss: 1.0600...  0.1292 sec/batch
Epoch: 11/20...  Training Step: 4842...  Training loss: 1.1656...  0.1279 sec/batch
Epoch: 11/20...  Training Step: 4843...  Training loss: 1.0673...  0.1206 sec/batch
Epoch: 11/20...  Training Step: 4844...  Training loss: 1.1729...  0.1153 sec/batch
Epoch: 11/20...  Training Step: 4845...  Training loss: 1.0391...  0.1160 sec/batch
Epoch: 11/20...  Training Step: 4846...  Training loss: 1.0933...  0.1131 sec/batch
Epoch: 11/20...  Training Step: 4847...  Training loss: 1.1472...  0.1207 sec/batch
Epoch: 11/20...  Training Step: 4848...  Training loss: 1.1318...  0.1339 sec/batch
Epoch: 11/20...  Training Step: 4849...  Training loss: 1.1397...  0.1247 sec/batch
Epoch: 11/20...  Training Step: 4850...  Training loss: 0.8784...  0.1283 sec/batch
Epoch: 11/20...  Training Step: 4851...  Training loss: 1.0675...  0.1284 sec/batch
Epoch: 11/20...  Training Step: 4852...  Training loss: 1.1329...  0.1405 sec/batch
Epoch: 11/20...  Training Step: 4853...  Training loss: 1.2140...  0.1228 sec/batch
Epoch: 11/20...  Training Step: 4854...  Training loss: 1.1356...  0.1260 sec/batch
Epoch: 11/20...  Training Step: 4855...  Training loss: 1.2288...  0.1262 sec/batch
Epoch: 11/20...  Training Step: 4856...  Training loss: 1.0714...  0.1262 sec/batch
Epoch: 11/20...  Training Step: 4857...  Training loss: 1.1303...  0.1291 sec/batch
Epoch: 11/20...  Training Step: 4858...  Training loss: 1.0694...  0.1220 sec/batch
Epoch: 11/20...  Training Step: 4859...  Training loss: 1.2721...  0.1183 sec/batch
Epoch: 11/20...  Training Step: 4860...  Training loss: 1.1571...  0.1213 sec/batch
Epoch: 11/20...  Training Step: 4861...  Training loss: 1.0495...  0.1200 sec/batch
Epoch: 11/20...  Training Step: 4862...  Training loss: 1.4137...  0.1238 sec/batch
Epoch: 11/20...  Training Step: 4863...  Training loss: 1.2239...  0.1278 sec/batch
Epoch: 11/20...  Training Step: 4864...  Training loss: 1.3537...  0.1234 sec/batch
Epoch: 11/20...  Training Step: 4865...  Training loss: 1.1408...  0.1143 sec/batch
Epoch: 11/20...  Training Step: 4866...  Training loss: 1.1576...  0.1237 sec/batch
Epoch: 11/20...  Training Step: 4867...  Training loss: 1.3145...  0.1311 sec/batch
Epoch: 11/20...  Training Step: 4868...  Training loss: 1.0490...  0.1179 sec/batch
Epoch: 11/20...  Training Step: 4869...  Training loss: 1.1972...  0.1247 sec/batch
Epoch: 11/20...  Training Step: 4870...  Training loss: 1.0759...  0.1237 sec/batch
Epoch: 11/20...  Training Step: 4871...  Training loss: 1.2380...  0.1209 sec/batch
Epoch: 11/20...  Training Step: 4872...  Training loss: 1.1119...  0.1148 sec/batch
Epoch: 11/20...  Training Step: 4873...  Training loss: 1.3718...  0.1215 sec/batch
Epoch: 11/20...  Training Step: 4874...  Training loss: 1.0930...  0.1157 sec/batch
Epoch: 11/20...  Training Step: 4875...  Training loss: 1.3889...  0.1182 sec/batch
Epoch: 11/20...  Training Step: 4876...  Training loss: 1.1328...  0.1272 sec/batch
Epoch: 11/20...  Training Step: 4877...  Training loss: 1.2214...  0.1303 sec/batch
Epoch: 11/20...  Training Step: 4878...  Training loss: 1.0544...  0.1287 sec/batch
Epoch: 11/20...  Training Step: 4879...  Training loss: 1.2240...  0.1281 sec/batch
Epoch: 11/20...  Training Step: 4880...  Training loss: 1.1975...  0.1341 sec/batch
Epoch: 11/20...  Training Step: 4881...  Training loss: 1.0868...  0.1267 sec/batch
Epoch: 11/20...  Training Step: 4882...  Training loss: 0.9920...  0.1178 sec/batch
Epoch: 11/20...  Training Step: 4883...  Training loss: 1.2159...  0.1250 sec/batch
Epoch: 11/20...  Training Step: 4884...  Training loss: 1.2499...  0.1354 sec/batch
Epoch: 11/20...  Training Step: 4885...  Training loss: 1.1721...  0.1196 sec/batch
Epoch: 11/20...  Training Step: 4886...  Training loss: 1.0664...  0.1257 sec/batch
Epoch: 11/20...  Training Step: 4887...  Training loss: 1.1064...  0.1249 sec/batch
Epoch: 11/20...  Training Step: 4888...  Training loss: 1.3557...  0.1325 sec/batch
Epoch: 11/20...  Training Step: 4889...  Training loss: 1.0984...  0.1239 sec/batch
Epoch: 11/20...  Training Step: 4890...  Training loss: 1.1912...  0.1188 sec/batch
Epoch: 11/20...  Training Step: 4891...  Training loss: 1.2272...  0.1246 sec/batch
Epoch: 11/20...  Training Step: 4892...  Training loss: 1.1222...  0.1286 sec/batch
Epoch: 11/20...  Training Step: 4893...  Training loss: 0.9849...  0.1315 sec/batch
Epoch: 11/20...  Training Step: 4894...  Training loss: 1.1243...  0.1241 sec/batch
Epoch: 11/20...  Training Step: 4895...  Training loss: 1.0579...  0.1216 sec/batch
Epoch: 11/20...  Training Step: 4896...  Training loss: 1.1446...  0.1239 sec/batch
Epoch: 11/20...  Training Step: 4897...  Training loss: 1.2571...  0.1255 sec/batch
Epoch: 11/20...  Training Step: 4898...  Training loss: 1.0187...  0.1315 sec/batch
Epoch: 11/20...  Training Step: 4899...  Training loss: 1.0706...  0.1278 sec/batch
Epoch: 11/20...  Training Step: 4900...  Training loss: 1.0472...  0.1288 sec/batch
Epoch: 11/20...  Training Step: 4901...  Training loss: 1.2050...  0.1305 sec/batch
Epoch: 11/20...  Training Step: 4902...  Training loss: 1.1247...  0.1357 sec/batch
Epoch: 11/20...  Training Step: 4903...  Training loss: 1.0568...  0.1241 sec/batch
Epoch: 11/20...  Training Step: 4904...  Training loss: 1.1952...  0.1213 sec/batch
Epoch: 11/20...  Training Step: 4905...  Training loss: 1.1611...  0.1257 sec/batch
Epoch: 11/20...  Training Step: 4906...  Training loss: 1.1831...  0.1283 sec/batch
Epoch: 11/20...  Training Step: 4907...  Training loss: 1.2288...  0.1233 sec/batch
Epoch: 11/20...  Training Step: 4908...  Training loss: 1.2805...  0.1247 sec/batch
Epoch: 11/20...  Training Step: 4909...  Training loss: 1.2931...  0.1287 sec/batch
Epoch: 11/20...  Training Step: 4910...  Training loss: 1.1108...  0.1284 sec/batch
Epoch: 11/20...  Training Step: 4911...  Training loss: 1.2995...  0.1222 sec/batch
Epoch: 11/20...  Training Step: 4912...  Training loss: 1.1967...  0.1199 sec/batch
Epoch: 11/20...  Training Step: 4913...  Training loss: 1.2149...  0.1194 sec/batch
Epoch: 11/20...  Training Step: 4914...  Training loss: 1.2176...  0.1184 sec/batch
Epoch: 11/20...  Training Step: 4915...  Training loss: 1.1471...  0.1116 sec/batch
Epoch: 11/20...  Training Step: 4916...  Training loss: 1.1075...  0.1133 sec/batch
Epoch: 11/20...  Training Step: 4917...  Training loss: 1.1894...  0.1258 sec/batch
Epoch: 11/20...  Training Step: 4918...  Training loss: 1.5159...  0.1258 sec/batch
Epoch: 11/20...  Training Step: 4919...  Training loss: 1.1788...  0.1279 sec/batch
Epoch: 11/20...  Training Step: 4920...  Training loss: 1.1268...  0.1347 sec/batch
Epoch: 11/20...  Training Step: 4921...  Training loss: 1.0580...  0.1261 sec/batch
Epoch: 11/20...  Training Step: 4922...  Training loss: 1.1290...  0.1275 sec/batch
Epoch: 11/20...  Training Step: 4923...  Training loss: 1.1377...  0.1224 sec/batch
Epoch: 11/20...  Training Step: 4924...  Training loss: 1.1104...  0.1230 sec/batch
Epoch: 11/20...  Training Step: 4925...  Training loss: 1.0183...  0.1223 sec/batch
Epoch: 11/20...  Training Step: 4926...  Training loss: 1.1025...  0.1458 sec/batch
Epoch: 11/20...  Training Step: 4927...  Training loss: 1.2140...  0.1243 sec/batch
Epoch: 11/20...  Training Step: 4928...  Training loss: 1.3478...  0.1227 sec/batch
Epoch: 11/20...  Training Step: 4929...  Training loss: 1.1146...  0.1332 sec/batch
Epoch: 11/20...  Training Step: 4930...  Training loss: 1.1414...  0.1236 sec/batch
Epoch: 11/20...  Training Step: 4931...  Training loss: 1.1390...  0.1322 sec/batch
Epoch: 11/20...  Training Step: 4932...  Training loss: 1.0771...  0.1259 sec/batch
Epoch: 11/20...  Training Step: 4933...  Training loss: 1.0427...  0.1221 sec/batch
Epoch: 11/20...  Training Step: 4934...  Training loss: 1.0906...  0.1114 sec/batch
Epoch: 11/20...  Training Step: 4935...  Training loss: 1.1365...  0.1123 sec/batch
Epoch: 11/20...  Training Step: 4936...  Training loss: 1.3540...  0.1230 sec/batch
Epoch: 11/20...  Training Step: 4937...  Training loss: 1.0467...  0.1256 sec/batch
Epoch: 11/20...  Training Step: 4938...  Training loss: 1.0572...  0.1255 sec/batch
Epoch: 11/20...  Training Step: 4939...  Training loss: 1.1151...  0.1278 sec/batch
Epoch: 11/20...  Training Step: 4940...  Training loss: 1.2470...  0.1215 sec/batch
Epoch: 11/20...  Training Step: 4941...  Training loss: 1.1244...  0.1216 sec/batch
Epoch: 11/20...  Training Step: 4942...  Training loss: 1.1057...  0.1217 sec/batch
Epoch: 11/20...  Training Step: 4943...  Training loss: 1.0041...  0.1241 sec/batch
Epoch: 11/20...  Training Step: 4944...  Training loss: 1.4346...  0.1253 sec/batch
Epoch: 11/20...  Training Step: 4945...  Training loss: 0.9307...  0.1245 sec/batch
Epoch: 11/20...  Training Step: 4946...  Training loss: 1.1272...  0.1120 sec/batch
Epoch: 11/20...  Training Step: 4947...  Training loss: 1.1968...  0.1230 sec/batch
Epoch: 11/20...  Training Step: 4948...  Training loss: 1.5039...  0.1279 sec/batch
Epoch: 11/20...  Training Step: 4949...  Training loss: 1.2444...  0.1275 sec/batch
Epoch: 11/20...  Training Step: 4950...  Training loss: 1.2528...  0.1169 sec/batch
Epoch: 11/20...  Training Step: 4951...  Training loss: 1.1908...  0.1244 sec/batch
Epoch: 11/20...  Training Step: 4952...  Training loss: 0.9701...  0.1214 sec/batch
Epoch: 11/20...  Training Step: 4953...  Training loss: 1.0428...  0.1176 sec/batch
Epoch: 11/20...  Training Step: 4954...  Training loss: 0.9969...  0.1236 sec/batch
Epoch: 11/20...  Training Step: 4955...  Training loss: 0.9004...  0.1194 sec/batch
Epoch: 11/20...  Training Step: 4956...  Training loss: 0.9995...  0.1226 sec/batch
Epoch: 11/20...  Training Step: 4957...  Training loss: 0.9840...  0.1205 sec/batch
Epoch: 11/20...  Training Step: 4958...  Training loss: 0.9400...  0.1232 sec/batch
Epoch: 11/20...  Training Step: 4959...  Training loss: 1.0455...  0.1254 sec/batch
Epoch: 11/20...  Training Step: 4960...  Training loss: 1.0490...  0.1195 sec/batch
Epoch: 11/20...  Training Step: 4961...  Training loss: 1.0894...  0.1136 sec/batch
Epoch: 11/20...  Training Step: 4962...  Training loss: 1.2477...  0.1141 sec/batch
Epoch: 11/20...  Training Step: 4963...  Training loss: 1.0198...  0.1121 sec/batch
Epoch: 11/20...  Training Step: 4964...  Training loss: 1.0049...  0.1231 sec/batch
Epoch: 11/20...  Training Step: 4965...  Training loss: 0.9652...  0.1273 sec/batch
Epoch: 11/20...  Training Step: 4966...  Training loss: 1.0048...  0.1239 sec/batch
Epoch: 11/20...  Training Step: 4967...  Training loss: 1.1590...  0.1257 sec/batch
Epoch: 11/20...  Training Step: 4968...  Training loss: 0.9918...  0.1236 sec/batch
Epoch: 11/20...  Training Step: 4969...  Training loss: 1.2386...  0.1283 sec/batch
Epoch: 11/20...  Training Step: 4970...  Training loss: 1.0326...  0.1255 sec/batch
Epoch: 11/20...  Training Step: 4971...  Training loss: 1.1362...  0.1257 sec/batch
Epoch: 11/20...  Training Step: 4972...  Training loss: 1.1222...  0.1251 sec/batch
Epoch: 11/20...  Training Step: 4973...  Training loss: 1.0147...  0.1252 sec/batch
Epoch: 11/20...  Training Step: 4974...  Training loss: 0.9779...  0.1244 sec/batch
Epoch: 11/20...  Training Step: 4975...  Training loss: 1.0798...  0.1229 sec/batch
Epoch: 11/20...  Training Step: 4976...  Training loss: 1.0467...  0.1245 sec/batch
Epoch: 11/20...  Training Step: 4977...  Training loss: 1.0238...  0.1211 sec/batch
Epoch: 11/20...  Training Step: 4978...  Training loss: 1.0102...  0.1324 sec/batch
Epoch: 11/20...  Training Step: 4979...  Training loss: 1.1478...  0.1273 sec/batch
Epoch: 11/20...  Training Step: 4980...  Training loss: 1.0061...  0.1305 sec/batch
Epoch: 11/20...  Training Step: 4981...  Training loss: 1.0986...  0.1289 sec/batch
Epoch: 11/20...  Training Step: 4982...  Training loss: 1.2405...  0.1251 sec/batch
Epoch: 11/20...  Training Step: 4983...  Training loss: 1.0040...  0.1235 sec/batch
Epoch: 11/20...  Training Step: 4984...  Training loss: 1.2078...  0.1207 sec/batch
Epoch: 11/20...  Training Step: 4985...  Training loss: 1.0444...  0.1225 sec/batch
Epoch: 11/20...  Training Step: 4986...  Training loss: 1.0842...  0.1252 sec/batch
Epoch: 11/20...  Training Step: 4987...  Training loss: 0.9174...  0.1249 sec/batch
Epoch: 11/20...  Training Step: 4988...  Training loss: 1.3370...  0.1324 sec/batch
Epoch: 11/20...  Training Step: 4989...  Training loss: 1.1495...  0.1221 sec/batch
Epoch: 11/20...  Training Step: 4990...  Training loss: 1.0358...  0.1304 sec/batch
Epoch: 11/20...  Training Step: 4991...  Training loss: 1.0640...  0.1270 sec/batch
Epoch: 11/20...  Training Step: 4992...  Training loss: 1.1091...  0.1233 sec/batch
Epoch: 11/20...  Training Step: 4993...  Training loss: 0.9694...  0.1298 sec/batch
Epoch: 11/20...  Training Step: 4994...  Training loss: 0.8153...  0.1288 sec/batch
Epoch: 11/20...  Training Step: 4995...  Training loss: 1.1708...  0.1253 sec/batch
Epoch: 11/20...  Training Step: 4996...  Training loss: 1.1145...  0.1256 sec/batch
Epoch: 11/20...  Training Step: 4997...  Training loss: 1.0493...  0.1348 sec/batch
Epoch: 11/20...  Training Step: 4998...  Training loss: 1.1900...  0.1279 sec/batch
Epoch: 11/20...  Training Step: 4999...  Training loss: 1.2238...  0.1262 sec/batch
Epoch: 11/20...  Training Step: 5000...  Training loss: 0.9184...  0.1237 sec/batch
Epoch: 11/20...  Training Step: 5001...  Training loss: 1.1696...  0.1173 sec/batch
Epoch: 11/20...  Training Step: 5002...  Training loss: 1.1307...  0.1148 sec/batch
Epoch: 11/20...  Training Step: 5003...  Training loss: 1.0353...  0.1166 sec/batch
Epoch: 11/20...  Training Step: 5004...  Training loss: 1.0680...  0.1172 sec/batch
Epoch: 11/20...  Training Step: 5005...  Training loss: 1.0882...  0.1158 sec/batch
Epoch: 11/20...  Training Step: 5006...  Training loss: 1.1675...  0.1219 sec/batch
Epoch: 11/20...  Training Step: 5007...  Training loss: 0.9894...  0.1195 sec/batch
Epoch: 11/20...  Training Step: 5008...  Training loss: 1.2022...  0.1173 sec/batch
Epoch: 11/20...  Training Step: 5009...  Training loss: 1.1112...  0.1218 sec/batch
Epoch: 11/20...  Training Step: 5010...  Training loss: 1.0014...  0.1184 sec/batch
Epoch: 11/20...  Training Step: 5011...  Training loss: 1.0114...  0.1204 sec/batch
Epoch: 11/20...  Training Step: 5012...  Training loss: 1.1007...  0.1161 sec/batch
Epoch: 11/20...  Training Step: 5013...  Training loss: 0.9593...  0.1160 sec/batch
Epoch: 11/20...  Training Step: 5014...  Training loss: 1.3197...  0.1186 sec/batch
Epoch: 11/20...  Training Step: 5015...  Training loss: 1.2819...  0.1129 sec/batch
Epoch: 11/20...  Training Step: 5016...  Training loss: 1.1930...  0.1148 sec/batch
Epoch: 11/20...  Training Step: 5017...  Training loss: 1.1820...  0.1178 sec/batch
Epoch: 11/20...  Training Step: 5018...  Training loss: 1.2356...  0.1185 sec/batch
Epoch: 11/20...  Training Step: 5019...  Training loss: 1.1693...  0.1130 sec/batch
Epoch: 11/20...  Training Step: 5020...  Training loss: 1.1366...  0.1176 sec/batch
Epoch: 11/20...  Training Step: 5021...  Training loss: 1.0466...  0.1143 sec/batch
Epoch: 11/20...  Training Step: 5022...  Training loss: 1.0704...  0.1165 sec/batch
Epoch: 11/20...  Training Step: 5023...  Training loss: 0.9570...  0.1175 sec/batch
Epoch: 11/20...  Training Step: 5024...  Training loss: 1.2216...  0.1216 sec/batch
Epoch: 11/20...  Training Step: 5025...  Training loss: 1.0085...  0.1175 sec/batch
Epoch: 11/20...  Training Step: 5026...  Training loss: 1.2038...  0.1207 sec/batch
Epoch: 11/20...  Training Step: 5027...  Training loss: 1.1117...  0.1197 sec/batch
Epoch: 11/20...  Training Step: 5028...  Training loss: 0.9754...  0.1164 sec/batch
Epoch: 11/20...  Training Step: 5029...  Training loss: 1.0205...  0.1147 sec/batch
Epoch: 11/20...  Training Step: 5030...  Training loss: 1.1224...  0.1155 sec/batch
Epoch: 11/20...  Training Step: 5031...  Training loss: 1.0367...  0.1184 sec/batch
Epoch: 11/20...  Training Step: 5032...  Training loss: 0.9474...  0.1269 sec/batch
Epoch: 11/20...  Training Step: 5033...  Training loss: 1.1800...  0.1241 sec/batch
Epoch: 11/20...  Training Step: 5034...  Training loss: 1.0170...  0.1214 sec/batch
Epoch: 11/20...  Training Step: 5035...  Training loss: 1.0606...  0.1258 sec/batch
Epoch: 11/20...  Training Step: 5036...  Training loss: 1.4241...  0.1249 sec/batch
Epoch: 11/20...  Training Step: 5037...  Training loss: 1.0868...  0.1291 sec/batch
Epoch: 11/20...  Training Step: 5038...  Training loss: 1.0705...  0.1230 sec/batch
Epoch: 11/20...  Training Step: 5039...  Training loss: 1.1233...  0.1226 sec/batch
Epoch: 11/20...  Training Step: 5040...  Training loss: 1.0278...  0.1310 sec/batch
Epoch: 11/20...  Training Step: 5041...  Training loss: 0.9417...  0.1253 sec/batch
Epoch: 11/20...  Training Step: 5042...  Training loss: 0.9718...  0.1261 sec/batch
Epoch: 11/20...  Training Step: 5043...  Training loss: 1.0202...  0.1270 sec/batch
Epoch: 11/20...  Training Step: 5044...  Training loss: 1.2210...  0.1249 sec/batch
Epoch: 11/20...  Training Step: 5045...  Training loss: 1.1368...  0.1174 sec/batch
Epoch: 11/20...  Training Step: 5046...  Training loss: 1.1775...  0.1181 sec/batch
Epoch: 11/20...  Training Step: 5047...  Training loss: 1.0307...  0.1283 sec/batch
Epoch: 11/20...  Training Step: 5048...  Training loss: 1.3948...  0.1295 sec/batch
Epoch: 11/20...  Training Step: 5049...  Training loss: 1.0848...  0.1269 sec/batch
Epoch: 11/20...  Training Step: 5050...  Training loss: 1.0328...  0.1424 sec/batch
Epoch: 11/20...  Training Step: 5051...  Training loss: 1.0617...  0.1316 sec/batch
Epoch: 11/20...  Training Step: 5052...  Training loss: 1.1103...  0.1366 sec/batch
Epoch: 11/20...  Training Step: 5053...  Training loss: 1.2257...  0.1282 sec/batch
Epoch: 11/20...  Training Step: 5054...  Training loss: 1.1041...  0.1292 sec/batch
Epoch: 11/20...  Training Step: 5055...  Training loss: 1.1635...  0.1258 sec/batch
Epoch: 11/20...  Training Step: 5056...  Training loss: 1.4214...  0.1432 sec/batch
Epoch: 11/20...  Training Step: 5057...  Training loss: 1.2003...  0.1333 sec/batch
Epoch: 11/20...  Training Step: 5058...  Training loss: 0.9734...  0.1285 sec/batch
Epoch: 11/20...  Training Step: 5059...  Training loss: 1.0361...  0.1191 sec/batch
Epoch: 11/20...  Training Step: 5060...  Training loss: 0.9972...  0.1200 sec/batch
Epoch: 11/20...  Training Step: 5061...  Training loss: 1.0632...  0.1340 sec/batch
Epoch: 11/20...  Training Step: 5062...  Training loss: 1.1002...  0.1298 sec/batch
Epoch: 11/20...  Training Step: 5063...  Training loss: 1.2750...  0.1322 sec/batch
Epoch: 11/20...  Training Step: 5064...  Training loss: 1.2022...  0.1275 sec/batch
Epoch: 11/20...  Training Step: 5065...  Training loss: 1.0809...  0.1269 sec/batch
Epoch: 11/20...  Training Step: 5066...  Training loss: 1.0754...  0.1181 sec/batch
Epoch: 11/20...  Training Step: 5067...  Training loss: 1.2190...  0.1236 sec/batch
Epoch: 11/20...  Training Step: 5068...  Training loss: 1.0759...  0.1184 sec/batch
Epoch: 11/20...  Training Step: 5069...  Training loss: 1.0237...  0.1200 sec/batch
Epoch: 11/20...  Training Step: 5070...  Training loss: 1.0702...  0.1205 sec/batch
Epoch: 11/20...  Training Step: 5071...  Training loss: 1.3340...  0.1307 sec/batch
Epoch: 11/20...  Training Step: 5072...  Training loss: 1.0133...  0.1273 sec/batch
Epoch: 11/20...  Training Step: 5073...  Training loss: 1.3138...  0.1238 sec/batch
Epoch: 11/20...  Training Step: 5074...  Training loss: 1.1220...  0.1293 sec/batch
Epoch: 11/20...  Training Step: 5075...  Training loss: 1.1279...  0.1217 sec/batch
Epoch: 11/20...  Training Step: 5076...  Training loss: 1.1375...  0.1245 sec/batch
Epoch: 11/20...  Training Step: 5077...  Training loss: 0.9701...  0.1204 sec/batch
Epoch: 11/20...  Training Step: 5078...  Training loss: 1.1495...  0.1228 sec/batch
Epoch: 11/20...  Training Step: 5079...  Training loss: 1.3327...  0.1343 sec/batch
Epoch: 11/20...  Training Step: 5080...  Training loss: 1.3571...  0.1233 sec/batch
Epoch: 11/20...  Training Step: 5081...  Training loss: 1.0410...  0.1242 sec/batch
Epoch: 11/20...  Training Step: 5082...  Training loss: 1.1000...  0.1171 sec/batch
Epoch: 11/20...  Training Step: 5083...  Training loss: 1.0846...  0.1179 sec/batch
Epoch: 11/20...  Training Step: 5084...  Training loss: 1.1892...  0.1182 sec/batch
Epoch: 11/20...  Training Step: 5085...  Training loss: 1.0498...  0.1205 sec/batch
Epoch: 11/20...  Training Step: 5086...  Training loss: 1.0820...  0.1208 sec/batch
Epoch: 11/20...  Training Step: 5087...  Training loss: 1.0856...  0.1223 sec/batch
Epoch: 11/20...  Training Step: 5088...  Training loss: 0.9303...  0.1168 sec/batch
Epoch: 11/20...  Training Step: 5089...  Training loss: 1.1110...  0.1132 sec/batch
Epoch: 11/20...  Training Step: 5090...  Training loss: 1.1511...  0.1177 sec/batch
Epoch: 11/20...  Training Step: 5091...  Training loss: 1.1148...  0.1166 sec/batch
Epoch: 11/20...  Training Step: 5092...  Training loss: 1.1982...  0.1184 sec/batch
Epoch: 11/20...  Training Step: 5093...  Training loss: 0.9776...  0.1186 sec/batch
Epoch: 11/20...  Training Step: 5094...  Training loss: 1.3615...  0.1158 sec/batch
Epoch: 11/20...  Training Step: 5095...  Training loss: 1.1975...  0.1182 sec/batch
Epoch: 11/20...  Training Step: 5096...  Training loss: 0.9654...  0.1131 sec/batch
Epoch: 11/20...  Training Step: 5097...  Training loss: 0.9458...  0.1175 sec/batch
Epoch: 11/20...  Training Step: 5098...  Training loss: 0.8823...  0.1153 sec/batch
Epoch: 11/20...  Training Step: 5099...  Training loss: 0.9578...  0.1170 sec/batch
Epoch: 11/20...  Training Step: 5100...  Training loss: 1.1142...  0.1096 sec/batch
Epoch: 11/20...  Training Step: 5101...  Training loss: 1.1936...  0.1156 sec/batch
Epoch: 11/20...  Training Step: 5102...  Training loss: 1.0961...  0.1151 sec/batch
Epoch: 11/20...  Training Step: 5103...  Training loss: 1.1698...  0.1222 sec/batch
Epoch: 11/20...  Training Step: 5104...  Training loss: 1.0350...  0.1252 sec/batch
Epoch: 12/20...  Training Step: 5105...  Training loss: 1.4142...  0.1187 sec/batch
Epoch: 12/20...  Training Step: 5106...  Training loss: 1.1721...  0.1151 sec/batch
Epoch: 12/20...  Training Step: 5107...  Training loss: 1.0289...  0.1172 sec/batch
Epoch: 12/20...  Training Step: 5108...  Training loss: 1.1744...  0.1161 sec/batch
Epoch: 12/20...  Training Step: 5109...  Training loss: 1.2136...  0.1182 sec/batch
Epoch: 12/20...  Training Step: 5110...  Training loss: 0.9706...  0.1127 sec/batch
Epoch: 12/20...  Training Step: 5111...  Training loss: 1.1469...  0.1182 sec/batch
Epoch: 12/20...  Training Step: 5112...  Training loss: 1.0269...  0.1200 sec/batch
Epoch: 12/20...  Training Step: 5113...  Training loss: 0.9189...  0.1158 sec/batch
Epoch: 12/20...  Training Step: 5114...  Training loss: 1.0982...  0.1151 sec/batch
Epoch: 12/20...  Training Step: 5115...  Training loss: 1.0628...  0.1134 sec/batch
Epoch: 12/20...  Training Step: 5116...  Training loss: 0.9487...  0.1161 sec/batch
Epoch: 12/20...  Training Step: 5117...  Training loss: 1.2776...  0.1203 sec/batch
Epoch: 12/20...  Training Step: 5118...  Training loss: 0.8961...  0.1111 sec/batch
Epoch: 12/20...  Training Step: 5119...  Training loss: 1.1663...  0.1220 sec/batch
Epoch: 12/20...  Training Step: 5120...  Training loss: 1.2996...  0.1187 sec/batch
Epoch: 12/20...  Training Step: 5121...  Training loss: 0.9345...  0.1133 sec/batch
Epoch: 12/20...  Training Step: 5122...  Training loss: 1.0150...  0.1156 sec/batch
Epoch: 12/20...  Training Step: 5123...  Training loss: 1.0259...  0.1188 sec/batch
Epoch: 12/20...  Training Step: 5124...  Training loss: 0.9745...  0.1162 sec/batch
Epoch: 12/20...  Training Step: 5125...  Training loss: 1.1571...  0.1213 sec/batch
Epoch: 12/20...  Training Step: 5126...  Training loss: 1.0914...  0.1194 sec/batch
Epoch: 12/20...  Training Step: 5127...  Training loss: 1.1719...  0.1215 sec/batch
Epoch: 12/20...  Training Step: 5128...  Training loss: 1.1430...  0.1209 sec/batch
Epoch: 12/20...  Training Step: 5129...  Training loss: 1.0628...  0.1261 sec/batch
Epoch: 12/20...  Training Step: 5130...  Training loss: 1.1231...  0.1153 sec/batch
Epoch: 12/20...  Training Step: 5131...  Training loss: 1.2585...  0.1209 sec/batch
Epoch: 12/20...  Training Step: 5132...  Training loss: 0.9396...  0.1144 sec/batch
Epoch: 12/20...  Training Step: 5133...  Training loss: 0.9057...  0.1191 sec/batch
Epoch: 12/20...  Training Step: 5134...  Training loss: 1.0739...  0.1143 sec/batch
Epoch: 12/20...  Training Step: 5135...  Training loss: 0.9512...  0.1117 sec/batch
Epoch: 12/20...  Training Step: 5136...  Training loss: 1.1012...  0.1171 sec/batch
Epoch: 12/20...  Training Step: 5137...  Training loss: 1.0350...  0.1178 sec/batch
Epoch: 12/20...  Training Step: 5138...  Training loss: 0.9233...  0.1234 sec/batch
Epoch: 12/20...  Training Step: 5139...  Training loss: 0.8927...  0.1169 sec/batch
Epoch: 12/20...  Training Step: 5140...  Training loss: 0.9756...  0.1196 sec/batch
Epoch: 12/20...  Training Step: 5141...  Training loss: 1.0965...  0.1144 sec/batch
Epoch: 12/20...  Training Step: 5142...  Training loss: 0.9747...  0.1152 sec/batch
Epoch: 12/20...  Training Step: 5143...  Training loss: 1.0009...  0.1143 sec/batch
Epoch: 12/20...  Training Step: 5144...  Training loss: 1.4048...  0.1173 sec/batch
Epoch: 12/20...  Training Step: 5145...  Training loss: 1.0809...  0.1187 sec/batch
Epoch: 12/20...  Training Step: 5146...  Training loss: 1.0230...  0.1214 sec/batch
Epoch: 12/20...  Training Step: 5147...  Training loss: 1.1283...  0.1202 sec/batch
Epoch: 12/20...  Training Step: 5148...  Training loss: 0.9043...  0.1196 sec/batch
Epoch: 12/20...  Training Step: 5149...  Training loss: 1.0910...  0.1166 sec/batch
Epoch: 12/20...  Training Step: 5150...  Training loss: 1.0659...  0.1150 sec/batch
Epoch: 12/20...  Training Step: 5151...  Training loss: 1.0197...  0.1193 sec/batch
Epoch: 12/20...  Training Step: 5152...  Training loss: 1.0000...  0.1198 sec/batch
Epoch: 12/20...  Training Step: 5153...  Training loss: 1.0132...  0.1153 sec/batch
Epoch: 12/20...  Training Step: 5154...  Training loss: 1.0408...  0.1155 sec/batch
Epoch: 12/20...  Training Step: 5155...  Training loss: 1.1277...  0.1204 sec/batch
Epoch: 12/20...  Training Step: 5156...  Training loss: 1.1061...  0.1152 sec/batch
Epoch: 12/20...  Training Step: 5157...  Training loss: 1.0119...  0.1151 sec/batch
Epoch: 12/20...  Training Step: 5158...  Training loss: 1.0702...  0.1175 sec/batch
Epoch: 12/20...  Training Step: 5159...  Training loss: 0.9235...  0.1202 sec/batch
Epoch: 12/20...  Training Step: 5160...  Training loss: 1.0441...  0.1190 sec/batch
Epoch: 12/20...  Training Step: 5161...  Training loss: 0.9708...  0.1214 sec/batch
Epoch: 12/20...  Training Step: 5162...  Training loss: 1.0648...  0.1180 sec/batch
Epoch: 12/20...  Training Step: 5163...  Training loss: 0.8983...  0.1172 sec/batch
Epoch: 12/20...  Training Step: 5164...  Training loss: 0.9960...  0.1147 sec/batch
Epoch: 12/20...  Training Step: 5165...  Training loss: 0.8723...  0.1210 sec/batch
Epoch: 12/20...  Training Step: 5166...  Training loss: 1.1072...  0.1189 sec/batch
Epoch: 12/20...  Training Step: 5167...  Training loss: 0.9797...  0.1182 sec/batch
Epoch: 12/20...  Training Step: 5168...  Training loss: 1.1227...  0.1222 sec/batch
Epoch: 12/20...  Training Step: 5169...  Training loss: 1.0150...  0.1175 sec/batch
Epoch: 12/20...  Training Step: 5170...  Training loss: 1.1242...  0.1164 sec/batch
Epoch: 12/20...  Training Step: 5171...  Training loss: 1.0613...  0.1193 sec/batch
Epoch: 12/20...  Training Step: 5172...  Training loss: 1.1191...  0.1182 sec/batch
Epoch: 12/20...  Training Step: 5173...  Training loss: 0.9786...  0.1185 sec/batch
Epoch: 12/20...  Training Step: 5174...  Training loss: 1.1233...  0.1187 sec/batch
Epoch: 12/20...  Training Step: 5175...  Training loss: 1.1962...  0.1128 sec/batch
Epoch: 12/20...  Training Step: 5176...  Training loss: 0.9027...  0.1171 sec/batch
Epoch: 12/20...  Training Step: 5177...  Training loss: 1.0632...  0.1164 sec/batch
Epoch: 12/20...  Training Step: 5178...  Training loss: 0.8529...  0.1178 sec/batch
Epoch: 12/20...  Training Step: 5179...  Training loss: 1.2325...  0.1185 sec/batch
Epoch: 12/20...  Training Step: 5180...  Training loss: 0.9724...  0.1193 sec/batch
Epoch: 12/20...  Training Step: 5181...  Training loss: 1.1633...  0.1214 sec/batch
Epoch: 12/20...  Training Step: 5182...  Training loss: 1.0489...  0.1151 sec/batch
Epoch: 12/20...  Training Step: 5183...  Training loss: 1.1474...  0.1147 sec/batch
Epoch: 12/20...  Training Step: 5184...  Training loss: 0.9846...  0.1182 sec/batch
Epoch: 12/20...  Training Step: 5185...  Training loss: 1.1437...  0.1187 sec/batch
Epoch: 12/20...  Training Step: 5186...  Training loss: 0.9682...  0.1201 sec/batch
Epoch: 12/20...  Training Step: 5187...  Training loss: 0.9544...  0.1203 sec/batch
Epoch: 12/20...  Training Step: 5188...  Training loss: 1.2107...  0.1146 sec/batch
Epoch: 12/20...  Training Step: 5189...  Training loss: 1.0264...  0.1180 sec/batch
Epoch: 12/20...  Training Step: 5190...  Training loss: 1.2433...  0.1152 sec/batch
Epoch: 12/20...  Training Step: 5191...  Training loss: 1.0571...  0.1175 sec/batch
Epoch: 12/20...  Training Step: 5192...  Training loss: 1.2446...  0.1156 sec/batch
Epoch: 12/20...  Training Step: 5193...  Training loss: 1.1871...  0.1134 sec/batch
Epoch: 12/20...  Training Step: 5194...  Training loss: 1.1907...  0.1187 sec/batch
Epoch: 12/20...  Training Step: 5195...  Training loss: 1.2205...  0.1214 sec/batch
Epoch: 12/20...  Training Step: 5196...  Training loss: 1.1104...  0.1199 sec/batch
Epoch: 12/20...  Training Step: 5197...  Training loss: 0.9431...  0.1169 sec/batch
Epoch: 12/20...  Training Step: 5198...  Training loss: 1.2025...  0.1147 sec/batch
Epoch: 12/20...  Training Step: 5199...  Training loss: 1.1489...  0.1172 sec/batch
Epoch: 12/20...  Training Step: 5200...  Training loss: 1.1071...  0.1127 sec/batch
Epoch: 12/20...  Training Step: 5201...  Training loss: 1.4176...  0.1174 sec/batch
Epoch: 12/20...  Training Step: 5202...  Training loss: 1.0926...  0.1192 sec/batch
Epoch: 12/20...  Training Step: 5203...  Training loss: 1.2379...  0.1168 sec/batch
Epoch: 12/20...  Training Step: 5204...  Training loss: 1.0813...  0.1200 sec/batch
Epoch: 12/20...  Training Step: 5205...  Training loss: 1.1806...  0.1142 sec/batch
Epoch: 12/20...  Training Step: 5206...  Training loss: 1.2245...  0.1136 sec/batch
Epoch: 12/20...  Training Step: 5207...  Training loss: 1.1935...  0.1191 sec/batch
Epoch: 12/20...  Training Step: 5208...  Training loss: 1.1219...  0.1172 sec/batch
Epoch: 12/20...  Training Step: 5209...  Training loss: 1.0549...  0.1209 sec/batch
Epoch: 12/20...  Training Step: 5210...  Training loss: 1.2464...  0.1182 sec/batch
Epoch: 12/20...  Training Step: 5211...  Training loss: 0.9978...  0.1169 sec/batch
Epoch: 12/20...  Training Step: 5212...  Training loss: 1.1931...  0.1154 sec/batch
Epoch: 12/20...  Training Step: 5213...  Training loss: 1.1989...  0.1134 sec/batch
Epoch: 12/20...  Training Step: 5214...  Training loss: 1.0488...  0.1180 sec/batch
Epoch: 12/20...  Training Step: 5215...  Training loss: 1.0849...  0.1226 sec/batch
Epoch: 12/20...  Training Step: 5216...  Training loss: 0.9717...  0.1206 sec/batch
Epoch: 12/20...  Training Step: 5217...  Training loss: 1.0430...  0.1125 sec/batch
Epoch: 12/20...  Training Step: 5218...  Training loss: 1.1636...  0.1180 sec/batch
Epoch: 12/20...  Training Step: 5219...  Training loss: 1.2120...  0.1115 sec/batch
Epoch: 12/20...  Training Step: 5220...  Training loss: 1.0625...  0.1173 sec/batch
Epoch: 12/20...  Training Step: 5221...  Training loss: 1.1903...  0.1168 sec/batch
Epoch: 12/20...  Training Step: 5222...  Training loss: 1.0665...  0.1186 sec/batch
Epoch: 12/20...  Training Step: 5223...  Training loss: 1.0429...  0.1126 sec/batch
Epoch: 12/20...  Training Step: 5224...  Training loss: 0.9700...  0.1167 sec/batch
Epoch: 12/20...  Training Step: 5225...  Training loss: 1.1898...  0.1177 sec/batch
Epoch: 12/20...  Training Step: 5226...  Training loss: 1.1240...  0.1175 sec/batch
Epoch: 12/20...  Training Step: 5227...  Training loss: 1.1615...  0.1179 sec/batch
Epoch: 12/20...  Training Step: 5228...  Training loss: 1.1693...  0.1176 sec/batch
Epoch: 12/20...  Training Step: 5229...  Training loss: 1.1489...  0.1164 sec/batch
Epoch: 12/20...  Training Step: 5230...  Training loss: 0.9618...  0.1172 sec/batch
Epoch: 12/20...  Training Step: 5231...  Training loss: 1.0733...  0.1236 sec/batch
Epoch: 12/20...  Training Step: 5232...  Training loss: 1.1840...  0.1203 sec/batch
Epoch: 12/20...  Training Step: 5233...  Training loss: 1.0688...  0.1188 sec/batch
Epoch: 12/20...  Training Step: 5234...  Training loss: 1.0007...  0.1178 sec/batch
Epoch: 12/20...  Training Step: 5235...  Training loss: 1.3750...  0.1159 sec/batch
Epoch: 12/20...  Training Step: 5236...  Training loss: 1.0796...  0.1198 sec/batch
Epoch: 12/20...  Training Step: 5237...  Training loss: 1.0814...  0.1181 sec/batch
Epoch: 12/20...  Training Step: 5238...  Training loss: 1.2044...  0.1197 sec/batch
Epoch: 12/20...  Training Step: 5239...  Training loss: 1.0451...  0.1182 sec/batch
Epoch: 12/20...  Training Step: 5240...  Training loss: 0.9268...  0.1218 sec/batch
Epoch: 12/20...  Training Step: 5241...  Training loss: 0.9755...  0.1183 sec/batch
Epoch: 12/20...  Training Step: 5242...  Training loss: 1.1974...  0.1199 sec/batch
Epoch: 12/20...  Training Step: 5243...  Training loss: 1.0494...  0.1193 sec/batch
Epoch: 12/20...  Training Step: 5244...  Training loss: 1.3648...  0.1192 sec/batch
Epoch: 12/20...  Training Step: 5245...  Training loss: 0.9793...  0.1220 sec/batch
Epoch: 12/20...  Training Step: 5246...  Training loss: 1.0033...  0.1176 sec/batch
Epoch: 12/20...  Training Step: 5247...  Training loss: 0.9796...  0.1155 sec/batch
Epoch: 12/20...  Training Step: 5248...  Training loss: 1.0842...  0.1201 sec/batch
Epoch: 12/20...  Training Step: 5249...  Training loss: 1.0875...  0.1113 sec/batch
Epoch: 12/20...  Training Step: 5250...  Training loss: 0.9786...  0.1207 sec/batch
Epoch: 12/20...  Training Step: 5251...  Training loss: 1.0157...  0.1193 sec/batch
Epoch: 12/20...  Training Step: 5252...  Training loss: 1.0133...  0.1186 sec/batch
Epoch: 12/20...  Training Step: 5253...  Training loss: 1.0435...  0.1186 sec/batch
Epoch: 12/20...  Training Step: 5254...  Training loss: 1.1882...  0.1183 sec/batch
Epoch: 12/20...  Training Step: 5255...  Training loss: 1.0183...  0.1163 sec/batch
Epoch: 12/20...  Training Step: 5256...  Training loss: 1.0762...  0.1217 sec/batch
Epoch: 12/20...  Training Step: 5257...  Training loss: 1.3494...  0.1219 sec/batch
Epoch: 12/20...  Training Step: 5258...  Training loss: 1.0157...  0.1142 sec/batch
Epoch: 12/20...  Training Step: 5259...  Training loss: 1.0496...  0.1193 sec/batch
Epoch: 12/20...  Training Step: 5260...  Training loss: 1.1749...  0.1187 sec/batch
Epoch: 12/20...  Training Step: 5261...  Training loss: 1.0177...  0.1153 sec/batch
Epoch: 12/20...  Training Step: 5262...  Training loss: 1.0962...  0.1129 sec/batch
Epoch: 12/20...  Training Step: 5263...  Training loss: 0.9087...  0.1199 sec/batch
Epoch: 12/20...  Training Step: 5264...  Training loss: 1.0270...  0.1216 sec/batch
Epoch: 12/20...  Training Step: 5265...  Training loss: 1.1752...  0.1228 sec/batch
Epoch: 12/20...  Training Step: 5266...  Training loss: 0.9844...  0.1194 sec/batch
Epoch: 12/20...  Training Step: 5267...  Training loss: 1.3654...  0.1198 sec/batch
Epoch: 12/20...  Training Step: 5268...  Training loss: 0.9639...  0.1175 sec/batch
Epoch: 12/20...  Training Step: 5269...  Training loss: 1.0557...  0.1175 sec/batch
Epoch: 12/20...  Training Step: 5270...  Training loss: 1.0558...  0.1144 sec/batch
Epoch: 12/20...  Training Step: 5271...  Training loss: 0.9249...  0.1163 sec/batch
Epoch: 12/20...  Training Step: 5272...  Training loss: 1.1500...  0.1174 sec/batch
Epoch: 12/20...  Training Step: 5273...  Training loss: 1.0205...  0.1188 sec/batch
Epoch: 12/20...  Training Step: 5274...  Training loss: 1.1874...  0.1159 sec/batch
Epoch: 12/20...  Training Step: 5275...  Training loss: 1.1283...  0.1152 sec/batch
Epoch: 12/20...  Training Step: 5276...  Training loss: 1.1485...  0.1124 sec/batch
Epoch: 12/20...  Training Step: 5277...  Training loss: 1.0996...  0.1163 sec/batch
Epoch: 12/20...  Training Step: 5278...  Training loss: 1.0358...  0.1157 sec/batch
Epoch: 12/20...  Training Step: 5279...  Training loss: 1.2695...  0.1127 sec/batch
Epoch: 12/20...  Training Step: 5280...  Training loss: 0.8548...  0.1154 sec/batch
Epoch: 12/20...  Training Step: 5281...  Training loss: 0.9665...  0.1211 sec/batch
Epoch: 12/20...  Training Step: 5282...  Training loss: 1.2284...  0.1194 sec/batch
Epoch: 12/20...  Training Step: 5283...  Training loss: 0.9621...  0.1160 sec/batch
Epoch: 12/20...  Training Step: 5284...  Training loss: 1.0711...  0.1186 sec/batch
Epoch: 12/20...  Training Step: 5285...  Training loss: 0.8494...  0.1229 sec/batch
Epoch: 12/20...  Training Step: 5286...  Training loss: 1.1662...  0.1148 sec/batch
Epoch: 12/20...  Training Step: 5287...  Training loss: 1.1658...  0.1178 sec/batch
Epoch: 12/20...  Training Step: 5288...  Training loss: 1.0719...  0.1200 sec/batch
Epoch: 12/20...  Training Step: 5289...  Training loss: 1.1881...  0.1176 sec/batch
Epoch: 12/20...  Training Step: 5290...  Training loss: 1.1373...  0.1230 sec/batch
Epoch: 12/20...  Training Step: 5291...  Training loss: 1.2057...  0.1171 sec/batch
Epoch: 12/20...  Training Step: 5292...  Training loss: 0.9799...  0.1183 sec/batch
Epoch: 12/20...  Training Step: 5293...  Training loss: 1.1890...  0.1127 sec/batch
Epoch: 12/20...  Training Step: 5294...  Training loss: 0.9925...  0.1158 sec/batch
Epoch: 12/20...  Training Step: 5295...  Training loss: 1.0576...  0.1189 sec/batch
Epoch: 12/20...  Training Step: 5296...  Training loss: 1.1865...  0.1133 sec/batch
Epoch: 12/20...  Training Step: 5297...  Training loss: 1.0959...  0.1162 sec/batch
Epoch: 12/20...  Training Step: 5298...  Training loss: 1.0166...  0.1209 sec/batch
Epoch: 12/20...  Training Step: 5299...  Training loss: 1.1671...  0.1147 sec/batch
Epoch: 12/20...  Training Step: 5300...  Training loss: 1.1471...  0.1147 sec/batch
Epoch: 12/20...  Training Step: 5301...  Training loss: 1.0791...  0.1192 sec/batch
Epoch: 12/20...  Training Step: 5302...  Training loss: 1.0793...  0.1174 sec/batch
Epoch: 12/20...  Training Step: 5303...  Training loss: 0.8736...  0.1243 sec/batch
Epoch: 12/20...  Training Step: 5304...  Training loss: 1.0324...  0.1248 sec/batch
Epoch: 12/20...  Training Step: 5305...  Training loss: 1.0305...  0.1223 sec/batch
Epoch: 12/20...  Training Step: 5306...  Training loss: 1.1022...  0.1118 sec/batch
Epoch: 12/20...  Training Step: 5307...  Training loss: 0.9932...  0.1239 sec/batch
Epoch: 12/20...  Training Step: 5308...  Training loss: 1.2179...  0.1157 sec/batch
Epoch: 12/20...  Training Step: 5309...  Training loss: 0.9815...  0.1178 sec/batch
Epoch: 12/20...  Training Step: 5310...  Training loss: 0.9831...  0.1215 sec/batch
Epoch: 12/20...  Training Step: 5311...  Training loss: 1.0817...  0.1181 sec/batch
Epoch: 12/20...  Training Step: 5312...  Training loss: 1.2052...  0.1115 sec/batch
Epoch: 12/20...  Training Step: 5313...  Training loss: 1.1035...  0.1155 sec/batch
Epoch: 12/20...  Training Step: 5314...  Training loss: 1.0144...  0.1211 sec/batch
Epoch: 12/20...  Training Step: 5315...  Training loss: 0.8852...  0.1217 sec/batch
Epoch: 12/20...  Training Step: 5316...  Training loss: 1.1755...  0.1203 sec/batch
Epoch: 12/20...  Training Step: 5317...  Training loss: 1.2227...  0.1202 sec/batch
Epoch: 12/20...  Training Step: 5318...  Training loss: 1.0826...  0.1397 sec/batch
Epoch: 12/20...  Training Step: 5319...  Training loss: 1.1640...  0.1708 sec/batch
Epoch: 12/20...  Training Step: 5320...  Training loss: 1.1022...  0.1607 sec/batch
Epoch: 12/20...  Training Step: 5321...  Training loss: 1.1221...  0.1442 sec/batch
Epoch: 12/20...  Training Step: 5322...  Training loss: 1.1403...  0.1339 sec/batch
Epoch: 12/20...  Training Step: 5323...  Training loss: 1.2717...  0.1267 sec/batch
Epoch: 12/20...  Training Step: 5324...  Training loss: 1.2186...  0.1289 sec/batch
Epoch: 12/20...  Training Step: 5325...  Training loss: 1.1091...  0.1265 sec/batch
Epoch: 12/20...  Training Step: 5326...  Training loss: 1.4322...  0.1233 sec/batch
Epoch: 12/20...  Training Step: 5327...  Training loss: 1.1888...  0.1211 sec/batch
Epoch: 12/20...  Training Step: 5328...  Training loss: 1.2495...  0.1288 sec/batch
Epoch: 12/20...  Training Step: 5329...  Training loss: 1.1285...  0.1269 sec/batch
Epoch: 12/20...  Training Step: 5330...  Training loss: 1.2364...  0.1254 sec/batch
Epoch: 12/20...  Training Step: 5331...  Training loss: 1.1506...  0.1232 sec/batch
Epoch: 12/20...  Training Step: 5332...  Training loss: 1.0002...  0.1168 sec/batch
Epoch: 12/20...  Training Step: 5333...  Training loss: 1.1027...  0.1169 sec/batch
Epoch: 12/20...  Training Step: 5334...  Training loss: 1.0917...  0.1146 sec/batch
Epoch: 12/20...  Training Step: 5335...  Training loss: 1.1348...  0.1140 sec/batch
Epoch: 12/20...  Training Step: 5336...  Training loss: 1.0260...  0.1188 sec/batch
Epoch: 12/20...  Training Step: 5337...  Training loss: 1.3320...  0.1173 sec/batch
Epoch: 12/20...  Training Step: 5338...  Training loss: 1.0997...  0.1205 sec/batch
Epoch: 12/20...  Training Step: 5339...  Training loss: 1.3327...  0.1185 sec/batch
Epoch: 12/20...  Training Step: 5340...  Training loss: 1.0975...  0.1226 sec/batch
Epoch: 12/20...  Training Step: 5341...  Training loss: 1.1783...  0.1210 sec/batch
Epoch: 12/20...  Training Step: 5342...  Training loss: 1.0085...  0.1161 sec/batch
Epoch: 12/20...  Training Step: 5343...  Training loss: 1.2126...  0.1148 sec/batch
Epoch: 12/20...  Training Step: 5344...  Training loss: 1.2089...  0.1144 sec/batch
Epoch: 12/20...  Training Step: 5345...  Training loss: 1.1329...  0.1190 sec/batch
Epoch: 12/20...  Training Step: 5346...  Training loss: 1.0562...  0.1147 sec/batch
Epoch: 12/20...  Training Step: 5347...  Training loss: 1.2914...  0.1198 sec/batch
Epoch: 12/20...  Training Step: 5348...  Training loss: 1.1848...  0.1235 sec/batch
Epoch: 12/20...  Training Step: 5349...  Training loss: 1.1234...  0.1209 sec/batch
Epoch: 12/20...  Training Step: 5350...  Training loss: 1.0085...  0.1300 sec/batch
Epoch: 12/20...  Training Step: 5351...  Training loss: 1.0727...  0.1244 sec/batch
Epoch: 12/20...  Training Step: 5352...  Training loss: 1.2099...  0.1318 sec/batch
Epoch: 12/20...  Training Step: 5353...  Training loss: 1.2048...  0.1301 sec/batch
Epoch: 12/20...  Training Step: 5354...  Training loss: 1.2328...  0.1207 sec/batch
Epoch: 12/20...  Training Step: 5355...  Training loss: 1.0792...  0.1198 sec/batch
Epoch: 12/20...  Training Step: 5356...  Training loss: 1.1655...  0.1199 sec/batch
Epoch: 12/20...  Training Step: 5357...  Training loss: 1.0726...  0.1180 sec/batch
Epoch: 12/20...  Training Step: 5358...  Training loss: 1.0632...  0.1172 sec/batch
Epoch: 12/20...  Training Step: 5359...  Training loss: 1.0885...  0.1182 sec/batch
Epoch: 12/20...  Training Step: 5360...  Training loss: 0.9936...  0.1213 sec/batch
Epoch: 12/20...  Training Step: 5361...  Training loss: 1.2502...  0.1134 sec/batch
Epoch: 12/20...  Training Step: 5362...  Training loss: 0.9723...  0.1120 sec/batch
Epoch: 12/20...  Training Step: 5363...  Training loss: 1.0517...  0.1185 sec/batch
Epoch: 12/20...  Training Step: 5364...  Training loss: 0.9542...  0.1181 sec/batch
Epoch: 12/20...  Training Step: 5365...  Training loss: 1.1759...  0.1144 sec/batch
Epoch: 12/20...  Training Step: 5366...  Training loss: 1.0715...  0.1171 sec/batch
Epoch: 12/20...  Training Step: 5367...  Training loss: 1.0393...  0.1187 sec/batch
Epoch: 12/20...  Training Step: 5368...  Training loss: 1.1450...  0.1211 sec/batch
Epoch: 12/20...  Training Step: 5369...  Training loss: 1.1453...  0.1195 sec/batch
Epoch: 12/20...  Training Step: 5370...  Training loss: 1.0778...  0.1169 sec/batch
Epoch: 12/20...  Training Step: 5371...  Training loss: 1.1707...  0.1198 sec/batch
Epoch: 12/20...  Training Step: 5372...  Training loss: 1.1699...  0.1183 sec/batch
Epoch: 12/20...  Training Step: 5373...  Training loss: 1.2210...  0.1186 sec/batch
Epoch: 12/20...  Training Step: 5374...  Training loss: 1.1800...  0.1149 sec/batch
Epoch: 12/20...  Training Step: 5375...  Training loss: 1.2940...  0.1171 sec/batch
Epoch: 12/20...  Training Step: 5376...  Training loss: 1.2030...  0.1144 sec/batch
Epoch: 12/20...  Training Step: 5377...  Training loss: 1.2182...  0.1155 sec/batch
Epoch: 12/20...  Training Step: 5378...  Training loss: 1.2161...  0.1157 sec/batch
Epoch: 12/20...  Training Step: 5379...  Training loss: 1.1148...  0.1157 sec/batch
Epoch: 12/20...  Training Step: 5380...  Training loss: 1.1100...  0.1149 sec/batch
Epoch: 12/20...  Training Step: 5381...  Training loss: 1.1688...  0.1176 sec/batch
Epoch: 12/20...  Training Step: 5382...  Training loss: 1.4487...  0.1203 sec/batch
Epoch: 12/20...  Training Step: 5383...  Training loss: 1.1221...  0.1267 sec/batch
Epoch: 12/20...  Training Step: 5384...  Training loss: 1.0969...  0.1270 sec/batch
Epoch: 12/20...  Training Step: 5385...  Training loss: 1.1091...  0.1278 sec/batch
Epoch: 12/20...  Training Step: 5386...  Training loss: 1.1765...  0.1227 sec/batch
Epoch: 12/20...  Training Step: 5387...  Training loss: 1.2145...  0.1260 sec/batch
Epoch: 12/20...  Training Step: 5388...  Training loss: 1.0692...  0.1333 sec/batch
Epoch: 12/20...  Training Step: 5389...  Training loss: 1.0011...  0.1339 sec/batch
Epoch: 12/20...  Training Step: 5390...  Training loss: 1.0422...  0.1250 sec/batch
Epoch: 12/20...  Training Step: 5391...  Training loss: 1.0962...  0.1181 sec/batch
Epoch: 12/20...  Training Step: 5392...  Training loss: 1.1401...  0.1159 sec/batch
Epoch: 12/20...  Training Step: 5393...  Training loss: 1.1092...  0.1189 sec/batch
Epoch: 12/20...  Training Step: 5394...  Training loss: 1.2086...  0.1156 sec/batch
Epoch: 12/20...  Training Step: 5395...  Training loss: 1.0422...  0.1190 sec/batch
Epoch: 12/20...  Training Step: 5396...  Training loss: 1.0748...  0.1189 sec/batch
Epoch: 12/20...  Training Step: 5397...  Training loss: 1.0397...  0.1167 sec/batch
Epoch: 12/20...  Training Step: 5398...  Training loss: 1.2013...  0.1192 sec/batch
Epoch: 12/20...  Training Step: 5399...  Training loss: 1.1654...  0.1144 sec/batch
Epoch: 12/20...  Training Step: 5400...  Training loss: 1.3651...  0.1143 sec/batch
Epoch: 12/20...  Training Step: 5401...  Training loss: 1.0557...  0.1216 sec/batch
Epoch: 12/20...  Training Step: 5402...  Training loss: 1.1651...  0.1202 sec/batch
Epoch: 12/20...  Training Step: 5403...  Training loss: 1.0372...  0.1188 sec/batch
Epoch: 12/20...  Training Step: 5404...  Training loss: 1.0920...  0.1193 sec/batch
Epoch: 12/20...  Training Step: 5405...  Training loss: 1.1163...  0.1180 sec/batch
Epoch: 12/20...  Training Step: 5406...  Training loss: 1.0369...  0.1145 sec/batch
Epoch: 12/20...  Training Step: 5407...  Training loss: 0.9360...  0.1166 sec/batch
Epoch: 12/20...  Training Step: 5408...  Training loss: 1.2003...  0.1177 sec/batch
Epoch: 12/20...  Training Step: 5409...  Training loss: 1.0891...  0.1165 sec/batch
Epoch: 12/20...  Training Step: 5410...  Training loss: 1.1936...  0.1157 sec/batch
Epoch: 12/20...  Training Step: 5411...  Training loss: 1.1581...  0.1185 sec/batch
Epoch: 12/20...  Training Step: 5412...  Training loss: 1.3471...  0.1139 sec/batch
Epoch: 12/20...  Training Step: 5413...  Training loss: 1.2748...  0.1130 sec/batch
Epoch: 12/20...  Training Step: 5414...  Training loss: 1.1956...  0.1165 sec/batch
Epoch: 12/20...  Training Step: 5415...  Training loss: 1.0855...  0.1184 sec/batch
Epoch: 12/20...  Training Step: 5416...  Training loss: 0.9911...  0.1170 sec/batch
Epoch: 12/20...  Training Step: 5417...  Training loss: 1.0482...  0.1201 sec/batch
Epoch: 12/20...  Training Step: 5418...  Training loss: 1.0498...  0.1200 sec/batch
Epoch: 12/20...  Training Step: 5419...  Training loss: 0.8816...  0.1194 sec/batch
Epoch: 12/20...  Training Step: 5420...  Training loss: 0.9670...  0.1163 sec/batch
Epoch: 12/20...  Training Step: 5421...  Training loss: 0.9866...  0.1181 sec/batch
Epoch: 12/20...  Training Step: 5422...  Training loss: 1.0369...  0.1207 sec/batch
Epoch: 12/20...  Training Step: 5423...  Training loss: 0.9199...  0.1179 sec/batch
Epoch: 12/20...  Training Step: 5424...  Training loss: 1.0007...  0.1184 sec/batch
Epoch: 12/20...  Training Step: 5425...  Training loss: 1.0885...  0.1226 sec/batch
Epoch: 12/20...  Training Step: 5426...  Training loss: 1.2334...  0.1237 sec/batch
Epoch: 12/20...  Training Step: 5427...  Training loss: 1.0422...  0.1159 sec/batch
Epoch: 12/20...  Training Step: 5428...  Training loss: 1.0035...  0.1184 sec/batch
Epoch: 12/20...  Training Step: 5429...  Training loss: 0.9049...  0.1168 sec/batch
Epoch: 12/20...  Training Step: 5430...  Training loss: 0.9246...  0.1154 sec/batch
Epoch: 12/20...  Training Step: 5431...  Training loss: 1.0600...  0.1171 sec/batch
Epoch: 12/20...  Training Step: 5432...  Training loss: 1.0225...  0.1209 sec/batch
Epoch: 12/20...  Training Step: 5433...  Training loss: 1.2918...  0.1299 sec/batch
Epoch: 12/20...  Training Step: 5434...  Training loss: 1.1308...  0.1208 sec/batch
Epoch: 12/20...  Training Step: 5435...  Training loss: 1.2680...  0.1177 sec/batch
Epoch: 12/20...  Training Step: 5436...  Training loss: 1.1368...  0.1207 sec/batch
Epoch: 12/20...  Training Step: 5437...  Training loss: 1.0148...  0.1134 sec/batch
Epoch: 12/20...  Training Step: 5438...  Training loss: 1.0274...  0.1127 sec/batch
Epoch: 12/20...  Training Step: 5439...  Training loss: 1.1550...  0.1174 sec/batch
Epoch: 12/20...  Training Step: 5440...  Training loss: 1.1138...  0.1182 sec/batch
Epoch: 12/20...  Training Step: 5441...  Training loss: 1.0068...  0.1189 sec/batch
Epoch: 12/20...  Training Step: 5442...  Training loss: 0.9072...  0.1137 sec/batch
Epoch: 12/20...  Training Step: 5443...  Training loss: 1.1344...  0.1262 sec/batch
Epoch: 12/20...  Training Step: 5444...  Training loss: 1.1066...  0.1259 sec/batch
Epoch: 12/20...  Training Step: 5445...  Training loss: 0.9642...  0.1186 sec/batch
Epoch: 12/20...  Training Step: 5446...  Training loss: 1.1181...  0.1170 sec/batch
Epoch: 12/20...  Training Step: 5447...  Training loss: 0.9039...  0.1162 sec/batch
Epoch: 12/20...  Training Step: 5448...  Training loss: 1.1667...  0.1219 sec/batch
Epoch: 12/20...  Training Step: 5449...  Training loss: 1.0114...  0.1160 sec/batch
Epoch: 12/20...  Training Step: 5450...  Training loss: 0.9648...  0.1314 sec/batch
Epoch: 12/20...  Training Step: 5451...  Training loss: 0.8987...  0.1295 sec/batch
Epoch: 12/20...  Training Step: 5452...  Training loss: 1.2681...  0.1242 sec/batch
Epoch: 12/20...  Training Step: 5453...  Training loss: 1.0488...  0.1213 sec/batch
Epoch: 12/20...  Training Step: 5454...  Training loss: 1.0638...  0.1187 sec/batch
Epoch: 12/20...  Training Step: 5455...  Training loss: 1.0791...  0.1191 sec/batch
Epoch: 12/20...  Training Step: 5456...  Training loss: 1.0221...  0.1201 sec/batch
Epoch: 12/20...  Training Step: 5457...  Training loss: 0.9847...  0.1192 sec/batch
Epoch: 12/20...  Training Step: 5458...  Training loss: 0.8453...  0.1258 sec/batch
Epoch: 12/20...  Training Step: 5459...  Training loss: 1.1506...  0.1172 sec/batch
Epoch: 12/20...  Training Step: 5460...  Training loss: 0.9920...  0.1149 sec/batch
Epoch: 12/20...  Training Step: 5461...  Training loss: 0.9764...  0.1165 sec/batch
Epoch: 12/20...  Training Step: 5462...  Training loss: 1.1520...  0.1183 sec/batch
Epoch: 12/20...  Training Step: 5463...  Training loss: 1.1799...  0.1160 sec/batch
Epoch: 12/20...  Training Step: 5464...  Training loss: 0.8891...  0.1185 sec/batch
Epoch: 12/20...  Training Step: 5465...  Training loss: 1.1656...  0.1180 sec/batch
Epoch: 12/20...  Training Step: 5466...  Training loss: 1.1706...  0.1170 sec/batch
Epoch: 12/20...  Training Step: 5467...  Training loss: 0.9097...  0.1201 sec/batch
Epoch: 12/20...  Training Step: 5468...  Training loss: 0.9815...  0.1181 sec/batch
Epoch: 12/20...  Training Step: 5469...  Training loss: 1.0729...  0.1207 sec/batch
Epoch: 12/20...  Training Step: 5470...  Training loss: 1.1732...  0.1144 sec/batch
Epoch: 12/20...  Training Step: 5471...  Training loss: 1.0378...  0.1176 sec/batch
Epoch: 12/20...  Training Step: 5472...  Training loss: 1.2122...  0.1182 sec/batch
Epoch: 12/20...  Training Step: 5473...  Training loss: 1.1046...  0.1163 sec/batch
Epoch: 12/20...  Training Step: 5474...  Training loss: 1.0272...  0.1142 sec/batch
Epoch: 12/20...  Training Step: 5475...  Training loss: 0.9494...  0.1152 sec/batch
Epoch: 12/20...  Training Step: 5476...  Training loss: 1.0452...  0.1183 sec/batch
Epoch: 12/20...  Training Step: 5477...  Training loss: 1.0181...  0.1211 sec/batch
Epoch: 12/20...  Training Step: 5478...  Training loss: 1.1632...  0.1168 sec/batch
Epoch: 12/20...  Training Step: 5479...  Training loss: 1.0928...  0.1216 sec/batch
Epoch: 12/20...  Training Step: 5480...  Training loss: 1.1298...  0.1174 sec/batch
Epoch: 12/20...  Training Step: 5481...  Training loss: 1.1823...  0.1181 sec/batch
Epoch: 12/20...  Training Step: 5482...  Training loss: 1.1460...  0.1226 sec/batch
Epoch: 12/20...  Training Step: 5483...  Training loss: 1.1489...  0.1173 sec/batch
Epoch: 12/20...  Training Step: 5484...  Training loss: 1.0619...  0.1163 sec/batch
Epoch: 12/20...  Training Step: 5485...  Training loss: 1.0710...  0.1155 sec/batch
Epoch: 12/20...  Training Step: 5486...  Training loss: 1.0093...  0.1197 sec/batch
Epoch: 12/20...  Training Step: 5487...  Training loss: 1.0150...  0.1200 sec/batch
Epoch: 12/20...  Training Step: 5488...  Training loss: 1.1927...  0.1162 sec/batch
Epoch: 12/20...  Training Step: 5489...  Training loss: 0.9960...  0.1151 sec/batch
Epoch: 12/20...  Training Step: 5490...  Training loss: 1.0273...  0.1155 sec/batch
Epoch: 12/20...  Training Step: 5491...  Training loss: 1.1085...  0.1162 sec/batch
Epoch: 12/20...  Training Step: 5492...  Training loss: 0.9275...  0.1149 sec/batch
Epoch: 12/20...  Training Step: 5493...  Training loss: 0.9343...  0.1198 sec/batch
Epoch: 12/20...  Training Step: 5494...  Training loss: 1.0043...  0.1176 sec/batch
Epoch: 12/20...  Training Step: 5495...  Training loss: 0.9880...  0.1160 sec/batch
Epoch: 12/20...  Training Step: 5496...  Training loss: 1.0750...  0.1166 sec/batch
Epoch: 12/20...  Training Step: 5497...  Training loss: 1.0498...  0.1181 sec/batch
Epoch: 12/20...  Training Step: 5498...  Training loss: 1.0232...  0.1191 sec/batch
Epoch: 12/20...  Training Step: 5499...  Training loss: 1.0628...  0.1175 sec/batch
Epoch: 12/20...  Training Step: 5500...  Training loss: 1.2021...  0.1171 sec/batch
Epoch: 12/20...  Training Step: 5501...  Training loss: 0.9822...  0.1175 sec/batch
Epoch: 12/20...  Training Step: 5502...  Training loss: 1.0198...  0.1210 sec/batch
Epoch: 12/20...  Training Step: 5503...  Training loss: 1.1256...  0.1141 sec/batch
Epoch: 12/20...  Training Step: 5504...  Training loss: 1.1327...  0.1188 sec/batch
Epoch: 12/20...  Training Step: 5505...  Training loss: 0.8461...  0.1181 sec/batch
Epoch: 12/20...  Training Step: 5506...  Training loss: 0.9670...  0.1149 sec/batch
Epoch: 12/20...  Training Step: 5507...  Training loss: 1.0041...  0.1168 sec/batch
Epoch: 12/20...  Training Step: 5508...  Training loss: 1.2398...  0.1209 sec/batch
Epoch: 12/20...  Training Step: 5509...  Training loss: 1.2107...  0.1271 sec/batch
Epoch: 12/20...  Training Step: 5510...  Training loss: 1.1720...  0.1331 sec/batch
Epoch: 12/20...  Training Step: 5511...  Training loss: 1.1550...  0.1289 sec/batch
Epoch: 12/20...  Training Step: 5512...  Training loss: 1.3129...  0.1255 sec/batch
Epoch: 12/20...  Training Step: 5513...  Training loss: 1.0926...  0.1231 sec/batch
Epoch: 12/20...  Training Step: 5514...  Training loss: 0.9667...  0.1119 sec/batch
Epoch: 12/20...  Training Step: 5515...  Training loss: 1.0049...  0.1130 sec/batch
Epoch: 12/20...  Training Step: 5516...  Training loss: 1.1309...  0.1175 sec/batch
Epoch: 12/20...  Training Step: 5517...  Training loss: 1.2509...  0.1183 sec/batch
Epoch: 12/20...  Training Step: 5518...  Training loss: 1.0779...  0.1191 sec/batch
Epoch: 12/20...  Training Step: 5519...  Training loss: 1.1168...  0.1200 sec/batch
Epoch: 12/20...  Training Step: 5520...  Training loss: 1.3585...  0.1222 sec/batch
Epoch: 12/20...  Training Step: 5521...  Training loss: 1.0795...  0.1190 sec/batch
Epoch: 12/20...  Training Step: 5522...  Training loss: 0.8915...  0.1202 sec/batch
Epoch: 12/20...  Training Step: 5523...  Training loss: 1.0037...  0.1173 sec/batch
Epoch: 12/20...  Training Step: 5524...  Training loss: 0.9645...  0.1189 sec/batch
Epoch: 12/20...  Training Step: 5525...  Training loss: 1.1401...  0.1173 sec/batch
Epoch: 12/20...  Training Step: 5526...  Training loss: 1.1361...  0.1145 sec/batch
Epoch: 12/20...  Training Step: 5527...  Training loss: 1.1051...  0.1235 sec/batch
Epoch: 12/20...  Training Step: 5528...  Training loss: 1.2129...  0.1188 sec/batch
Epoch: 12/20...  Training Step: 5529...  Training loss: 1.0280...  0.1157 sec/batch
Epoch: 12/20...  Training Step: 5530...  Training loss: 1.0719...  0.1200 sec/batch
Epoch: 12/20...  Training Step: 5531...  Training loss: 1.1050...  0.1163 sec/batch
Epoch: 12/20...  Training Step: 5532...  Training loss: 1.0545...  0.1191 sec/batch
Epoch: 12/20...  Training Step: 5533...  Training loss: 1.0259...  0.1194 sec/batch
Epoch: 12/20...  Training Step: 5534...  Training loss: 1.0757...  0.1168 sec/batch
Epoch: 12/20...  Training Step: 5535...  Training loss: 1.1904...  0.1178 sec/batch
Epoch: 12/20...  Training Step: 5536...  Training loss: 1.0580...  0.1241 sec/batch
Epoch: 12/20...  Training Step: 5537...  Training loss: 1.2774...  0.1145 sec/batch
Epoch: 12/20...  Training Step: 5538...  Training loss: 1.1851...  0.1144 sec/batch
Epoch: 12/20...  Training Step: 5539...  Training loss: 1.0188...  0.1173 sec/batch
Epoch: 12/20...  Training Step: 5540...  Training loss: 1.0965...  0.1208 sec/batch
Epoch: 12/20...  Training Step: 5541...  Training loss: 0.9629...  0.1191 sec/batch
Epoch: 12/20...  Training Step: 5542...  Training loss: 1.1808...  0.1142 sec/batch
Epoch: 12/20...  Training Step: 5543...  Training loss: 1.2846...  0.1243 sec/batch
Epoch: 12/20...  Training Step: 5544...  Training loss: 1.3076...  0.1185 sec/batch
Epoch: 12/20...  Training Step: 5545...  Training loss: 0.9763...  0.1231 sec/batch
Epoch: 12/20...  Training Step: 5546...  Training loss: 1.0871...  0.1154 sec/batch
Epoch: 12/20...  Training Step: 5547...  Training loss: 1.1039...  0.1202 sec/batch
Epoch: 12/20...  Training Step: 5548...  Training loss: 1.0214...  0.1185 sec/batch
Epoch: 12/20...  Training Step: 5549...  Training loss: 1.0235...  0.1157 sec/batch
Epoch: 12/20...  Training Step: 5550...  Training loss: 1.0577...  0.1181 sec/batch
Epoch: 12/20...  Training Step: 5551...  Training loss: 1.1562...  0.1196 sec/batch
Epoch: 12/20...  Training Step: 5552...  Training loss: 0.9709...  0.1196 sec/batch
Epoch: 12/20...  Training Step: 5553...  Training loss: 1.0205...  0.1190 sec/batch
Epoch: 12/20...  Training Step: 5554...  Training loss: 1.0785...  0.1208 sec/batch
Epoch: 12/20...  Training Step: 5555...  Training loss: 0.9845...  0.1221 sec/batch
Epoch: 12/20...  Training Step: 5556...  Training loss: 1.1813...  0.1187 sec/batch
Epoch: 12/20...  Training Step: 5557...  Training loss: 1.0523...  0.1171 sec/batch
Epoch: 12/20...  Training Step: 5558...  Training loss: 1.1619...  0.1211 sec/batch
Epoch: 12/20...  Training Step: 5559...  Training loss: 1.1088...  0.1165 sec/batch
Epoch: 12/20...  Training Step: 5560...  Training loss: 0.9128...  0.1230 sec/batch
Epoch: 12/20...  Training Step: 5561...  Training loss: 1.0081...  0.1217 sec/batch
Epoch: 12/20...  Training Step: 5562...  Training loss: 0.9231...  0.1285 sec/batch
Epoch: 12/20...  Training Step: 5563...  Training loss: 0.9855...  0.1194 sec/batch
Epoch: 12/20...  Training Step: 5564...  Training loss: 1.0339...  0.1187 sec/batch
Epoch: 12/20...  Training Step: 5565...  Training loss: 1.0894...  0.1292 sec/batch
Epoch: 12/20...  Training Step: 5566...  Training loss: 0.9788...  0.1190 sec/batch
Epoch: 12/20...  Training Step: 5567...  Training loss: 1.1114...  0.1258 sec/batch
Epoch: 12/20...  Training Step: 5568...  Training loss: 1.0465...  0.1245 sec/batch
Epoch: 13/20...  Training Step: 5569...  Training loss: 1.3773...  0.1265 sec/batch
Epoch: 13/20...  Training Step: 5570...  Training loss: 1.1320...  0.1238 sec/batch
Epoch: 13/20...  Training Step: 5571...  Training loss: 1.0771...  0.1168 sec/batch
Epoch: 13/20...  Training Step: 5572...  Training loss: 1.1517...  0.1285 sec/batch
Epoch: 13/20...  Training Step: 5573...  Training loss: 1.1432...  0.1247 sec/batch
Epoch: 13/20...  Training Step: 5574...  Training loss: 0.8844...  0.1227 sec/batch
Epoch: 13/20...  Training Step: 5575...  Training loss: 1.1936...  0.1192 sec/batch
Epoch: 13/20...  Training Step: 5576...  Training loss: 0.9261...  0.1207 sec/batch
Epoch: 13/20...  Training Step: 5577...  Training loss: 0.9836...  0.1277 sec/batch
Epoch: 13/20...  Training Step: 5578...  Training loss: 1.2503...  0.1231 sec/batch
Epoch: 13/20...  Training Step: 5579...  Training loss: 1.0426...  0.1173 sec/batch
Epoch: 13/20...  Training Step: 5580...  Training loss: 0.9293...  0.1219 sec/batch
Epoch: 13/20...  Training Step: 5581...  Training loss: 1.2648...  0.1271 sec/batch
Epoch: 13/20...  Training Step: 5582...  Training loss: 0.8409...  0.1239 sec/batch
Epoch: 13/20...  Training Step: 5583...  Training loss: 1.1342...  0.1228 sec/batch
Epoch: 13/20...  Training Step: 5584...  Training loss: 1.1728...  0.1218 sec/batch
Epoch: 13/20...  Training Step: 5585...  Training loss: 1.0151...  0.1204 sec/batch
Epoch: 13/20...  Training Step: 5586...  Training loss: 0.9533...  0.1304 sec/batch
Epoch: 13/20...  Training Step: 5587...  Training loss: 0.9729...  0.1254 sec/batch
Epoch: 13/20...  Training Step: 5588...  Training loss: 1.0116...  0.1232 sec/batch
Epoch: 13/20...  Training Step: 5589...  Training loss: 1.2089...  0.1370 sec/batch
Epoch: 13/20...  Training Step: 5590...  Training loss: 1.0977...  0.1292 sec/batch
Epoch: 13/20...  Training Step: 5591...  Training loss: 1.0918...  0.1295 sec/batch
Epoch: 13/20...  Training Step: 5592...  Training loss: 1.1186...  0.1288 sec/batch
Epoch: 13/20...  Training Step: 5593...  Training loss: 1.0331...  0.1244 sec/batch
Epoch: 13/20...  Training Step: 5594...  Training loss: 1.0919...  0.1181 sec/batch
Epoch: 13/20...  Training Step: 5595...  Training loss: 1.1071...  0.1267 sec/batch
Epoch: 13/20...  Training Step: 5596...  Training loss: 1.0819...  0.1231 sec/batch
Epoch: 13/20...  Training Step: 5597...  Training loss: 1.1049...  0.1213 sec/batch
Epoch: 13/20...  Training Step: 5598...  Training loss: 0.9935...  0.1226 sec/batch
Epoch: 13/20...  Training Step: 5599...  Training loss: 0.9303...  0.1214 sec/batch
Epoch: 13/20...  Training Step: 5600...  Training loss: 1.0773...  0.1182 sec/batch
Epoch: 13/20...  Training Step: 5601...  Training loss: 0.9290...  0.1183 sec/batch
Epoch: 13/20...  Training Step: 5602...  Training loss: 1.0208...  0.1144 sec/batch
Epoch: 13/20...  Training Step: 5603...  Training loss: 0.9689...  0.1200 sec/batch
Epoch: 13/20...  Training Step: 5604...  Training loss: 0.9886...  0.1176 sec/batch
Epoch: 13/20...  Training Step: 5605...  Training loss: 1.1212...  0.1155 sec/batch
Epoch: 13/20...  Training Step: 5606...  Training loss: 1.0071...  0.1249 sec/batch
Epoch: 13/20...  Training Step: 5607...  Training loss: 0.9879...  0.1212 sec/batch
Epoch: 13/20...  Training Step: 5608...  Training loss: 1.3244...  0.1155 sec/batch
Epoch: 13/20...  Training Step: 5609...  Training loss: 1.0181...  0.1132 sec/batch
Epoch: 13/20...  Training Step: 5610...  Training loss: 1.0072...  0.1194 sec/batch
Epoch: 13/20...  Training Step: 5611...  Training loss: 1.2114...  0.1191 sec/batch
Epoch: 13/20...  Training Step: 5612...  Training loss: 0.8667...  0.1205 sec/batch
Epoch: 13/20...  Training Step: 5613...  Training loss: 1.0466...  0.1136 sec/batch
Epoch: 13/20...  Training Step: 5614...  Training loss: 0.9367...  0.1102 sec/batch
Epoch: 13/20...  Training Step: 5615...  Training loss: 1.1105...  0.1191 sec/batch
Epoch: 13/20...  Training Step: 5616...  Training loss: 1.0418...  0.1160 sec/batch
Epoch: 13/20...  Training Step: 5617...  Training loss: 1.0921...  0.1184 sec/batch
Epoch: 13/20...  Training Step: 5618...  Training loss: 1.1215...  0.1181 sec/batch
Epoch: 13/20...  Training Step: 5619...  Training loss: 1.0523...  0.1162 sec/batch
Epoch: 13/20...  Training Step: 5620...  Training loss: 0.9645...  0.1130 sec/batch
Epoch: 13/20...  Training Step: 5621...  Training loss: 1.0764...  0.1166 sec/batch
Epoch: 13/20...  Training Step: 5622...  Training loss: 0.9832...  0.1268 sec/batch
Epoch: 13/20...  Training Step: 5623...  Training loss: 1.0096...  0.1163 sec/batch
Epoch: 13/20...  Training Step: 5624...  Training loss: 0.9549...  0.1162 sec/batch
Epoch: 13/20...  Training Step: 5625...  Training loss: 1.1362...  0.1195 sec/batch
Epoch: 13/20...  Training Step: 5626...  Training loss: 1.0608...  0.1197 sec/batch
Epoch: 13/20...  Training Step: 5627...  Training loss: 0.8064...  0.1144 sec/batch
Epoch: 13/20...  Training Step: 5628...  Training loss: 0.9704...  0.1188 sec/batch
Epoch: 13/20...  Training Step: 5629...  Training loss: 0.8764...  0.1166 sec/batch
Epoch: 13/20...  Training Step: 5630...  Training loss: 1.1256...  0.1198 sec/batch
Epoch: 13/20...  Training Step: 5631...  Training loss: 1.0087...  0.1186 sec/batch
Epoch: 13/20...  Training Step: 5632...  Training loss: 1.1505...  0.1158 sec/batch
Epoch: 13/20...  Training Step: 5633...  Training loss: 0.9525...  0.1267 sec/batch
Epoch: 13/20...  Training Step: 5634...  Training loss: 1.0417...  0.1164 sec/batch
Epoch: 13/20...  Training Step: 5635...  Training loss: 1.0441...  0.1266 sec/batch
Epoch: 13/20...  Training Step: 5636...  Training loss: 1.1168...  0.1180 sec/batch
Epoch: 13/20...  Training Step: 5637...  Training loss: 1.0455...  0.1172 sec/batch
Epoch: 13/20...  Training Step: 5638...  Training loss: 1.0731...  0.1129 sec/batch
Epoch: 13/20...  Training Step: 5639...  Training loss: 1.2368...  0.1277 sec/batch
Epoch: 13/20...  Training Step: 5640...  Training loss: 0.8461...  0.1203 sec/batch
Epoch: 13/20...  Training Step: 5641...  Training loss: 0.9797...  0.1198 sec/batch
Epoch: 13/20...  Training Step: 5642...  Training loss: 0.9000...  0.1147 sec/batch
Epoch: 13/20...  Training Step: 5643...  Training loss: 1.1810...  0.1136 sec/batch
Epoch: 13/20...  Training Step: 5644...  Training loss: 0.9090...  0.1193 sec/batch
Epoch: 13/20...  Training Step: 5645...  Training loss: 0.9550...  0.1184 sec/batch
Epoch: 13/20...  Training Step: 5646...  Training loss: 0.9823...  0.1145 sec/batch
Epoch: 13/20...  Training Step: 5647...  Training loss: 1.0535...  0.1124 sec/batch
Epoch: 13/20...  Training Step: 5648...  Training loss: 0.9529...  0.1155 sec/batch
Epoch: 13/20...  Training Step: 5649...  Training loss: 1.1554...  0.1158 sec/batch
Epoch: 13/20...  Training Step: 5650...  Training loss: 1.1047...  0.1120 sec/batch
Epoch: 13/20...  Training Step: 5651...  Training loss: 0.9081...  0.1145 sec/batch
Epoch: 13/20...  Training Step: 5652...  Training loss: 1.1565...  0.1141 sec/batch
Epoch: 13/20...  Training Step: 5653...  Training loss: 1.1734...  0.1184 sec/batch
Epoch: 13/20...  Training Step: 5654...  Training loss: 1.3048...  0.1134 sec/batch
Epoch: 13/20...  Training Step: 5655...  Training loss: 0.9735...  0.1155 sec/batch
Epoch: 13/20...  Training Step: 5656...  Training loss: 1.1335...  0.1118 sec/batch
Epoch: 13/20...  Training Step: 5657...  Training loss: 1.1569...  0.1139 sec/batch
Epoch: 13/20...  Training Step: 5658...  Training loss: 1.0987...  0.1210 sec/batch
Epoch: 13/20...  Training Step: 5659...  Training loss: 1.2668...  0.1206 sec/batch
Epoch: 13/20...  Training Step: 5660...  Training loss: 1.1898...  0.1174 sec/batch
Epoch: 13/20...  Training Step: 5661...  Training loss: 1.0015...  0.1205 sec/batch
Epoch: 13/20...  Training Step: 5662...  Training loss: 1.1512...  0.1162 sec/batch
Epoch: 13/20...  Training Step: 5663...  Training loss: 0.9458...  0.1192 sec/batch
Epoch: 13/20...  Training Step: 5664...  Training loss: 0.9985...  0.1198 sec/batch
Epoch: 13/20...  Training Step: 5665...  Training loss: 1.2202...  0.1157 sec/batch
Epoch: 13/20...  Training Step: 5666...  Training loss: 1.0659...  0.1163 sec/batch
Epoch: 13/20...  Training Step: 5667...  Training loss: 1.1439...  0.1206 sec/batch
Epoch: 13/20...  Training Step: 5668...  Training loss: 1.1210...  0.1144 sec/batch
Epoch: 13/20...  Training Step: 5669...  Training loss: 1.1505...  0.1201 sec/batch
Epoch: 13/20...  Training Step: 5670...  Training loss: 1.1517...  0.1150 sec/batch
Epoch: 13/20...  Training Step: 5671...  Training loss: 1.2223...  0.1149 sec/batch
Epoch: 13/20...  Training Step: 5672...  Training loss: 1.0279...  0.1145 sec/batch
Epoch: 13/20...  Training Step: 5673...  Training loss: 1.2505...  0.1133 sec/batch
Epoch: 13/20...  Training Step: 5674...  Training loss: 1.1406...  0.1178 sec/batch
Epoch: 13/20...  Training Step: 5675...  Training loss: 1.0036...  0.1202 sec/batch
Epoch: 13/20...  Training Step: 5676...  Training loss: 1.1441...  0.1186 sec/batch
Epoch: 13/20...  Training Step: 5677...  Training loss: 1.1081...  0.1141 sec/batch
Epoch: 13/20...  Training Step: 5678...  Training loss: 0.9733...  0.1155 sec/batch
Epoch: 13/20...  Training Step: 5679...  Training loss: 1.0260...  0.1136 sec/batch
Epoch: 13/20...  Training Step: 5680...  Training loss: 0.9091...  0.1195 sec/batch
Epoch: 13/20...  Training Step: 5681...  Training loss: 1.1194...  0.1247 sec/batch
Epoch: 13/20...  Training Step: 5682...  Training loss: 1.2502...  0.1164 sec/batch
Epoch: 13/20...  Training Step: 5683...  Training loss: 1.0613...  0.1168 sec/batch
Epoch: 13/20...  Training Step: 5684...  Training loss: 0.9593...  0.1134 sec/batch
Epoch: 13/20...  Training Step: 5685...  Training loss: 1.1959...  0.1181 sec/batch
Epoch: 13/20...  Training Step: 5686...  Training loss: 1.0440...  0.1129 sec/batch
Epoch: 13/20...  Training Step: 5687...  Training loss: 1.1355...  0.1186 sec/batch
Epoch: 13/20...  Training Step: 5688...  Training loss: 0.8495...  0.1130 sec/batch
Epoch: 13/20...  Training Step: 5689...  Training loss: 1.1435...  0.1158 sec/batch
Epoch: 13/20...  Training Step: 5690...  Training loss: 1.1619...  0.1176 sec/batch
Epoch: 13/20...  Training Step: 5691...  Training loss: 1.0946...  0.1150 sec/batch
Epoch: 13/20...  Training Step: 5692...  Training loss: 1.1469...  0.1169 sec/batch
Epoch: 13/20...  Training Step: 5693...  Training loss: 1.1945...  0.1211 sec/batch
Epoch: 13/20...  Training Step: 5694...  Training loss: 1.0238...  0.1133 sec/batch
Epoch: 13/20...  Training Step: 5695...  Training loss: 1.0639...  0.1151 sec/batch
Epoch: 13/20...  Training Step: 5696...  Training loss: 1.1610...  0.1163 sec/batch
Epoch: 13/20...  Training Step: 5697...  Training loss: 1.0304...  0.1183 sec/batch
Epoch: 13/20...  Training Step: 5698...  Training loss: 1.0398...  0.1186 sec/batch
Epoch: 13/20...  Training Step: 5699...  Training loss: 1.2267...  0.1177 sec/batch
Epoch: 13/20...  Training Step: 5700...  Training loss: 1.0870...  0.1186 sec/batch
Epoch: 13/20...  Training Step: 5701...  Training loss: 1.0448...  0.1176 sec/batch
Epoch: 13/20...  Training Step: 5702...  Training loss: 1.1577...  0.1129 sec/batch
Epoch: 13/20...  Training Step: 5703...  Training loss: 1.0593...  0.1172 sec/batch
Epoch: 13/20...  Training Step: 5704...  Training loss: 0.9506...  0.1146 sec/batch
Epoch: 13/20...  Training Step: 5705...  Training loss: 0.9555...  0.1178 sec/batch
Epoch: 13/20...  Training Step: 5706...  Training loss: 1.1189...  0.1183 sec/batch
Epoch: 13/20...  Training Step: 5707...  Training loss: 0.9679...  0.1220 sec/batch
Epoch: 13/20...  Training Step: 5708...  Training loss: 1.1256...  0.1164 sec/batch
Epoch: 13/20...  Training Step: 5709...  Training loss: 0.8427...  0.1189 sec/batch
Epoch: 13/20...  Training Step: 5710...  Training loss: 0.9894...  0.1172 sec/batch
Epoch: 13/20...  Training Step: 5711...  Training loss: 0.9464...  0.1145 sec/batch
Epoch: 13/20...  Training Step: 5712...  Training loss: 1.0726...  0.1176 sec/batch
Epoch: 13/20...  Training Step: 5713...  Training loss: 1.0870...  0.1170 sec/batch
Epoch: 13/20...  Training Step: 5714...  Training loss: 1.0116...  0.1140 sec/batch
Epoch: 13/20...  Training Step: 5715...  Training loss: 0.9717...  0.1217 sec/batch
Epoch: 13/20...  Training Step: 5716...  Training loss: 1.0249...  0.1143 sec/batch
Epoch: 13/20...  Training Step: 5717...  Training loss: 0.9965...  0.1189 sec/batch
Epoch: 13/20...  Training Step: 5718...  Training loss: 1.1242...  0.1160 sec/batch
Epoch: 13/20...  Training Step: 5719...  Training loss: 1.0635...  0.1199 sec/batch
Epoch: 13/20...  Training Step: 5720...  Training loss: 1.0863...  0.1170 sec/batch
Epoch: 13/20...  Training Step: 5721...  Training loss: 1.1673...  0.1201 sec/batch
Epoch: 13/20...  Training Step: 5722...  Training loss: 1.1029...  0.1187 sec/batch
Epoch: 13/20...  Training Step: 5723...  Training loss: 1.0336...  0.1184 sec/batch
Epoch: 13/20...  Training Step: 5724...  Training loss: 0.9700...  0.1197 sec/batch
Epoch: 13/20...  Training Step: 5725...  Training loss: 1.0438...  0.1156 sec/batch
Epoch: 13/20...  Training Step: 5726...  Training loss: 1.1453...  0.1190 sec/batch
Epoch: 13/20...  Training Step: 5727...  Training loss: 0.9405...  0.1150 sec/batch
Epoch: 13/20...  Training Step: 5728...  Training loss: 1.1404...  0.1177 sec/batch
Epoch: 13/20...  Training Step: 5729...  Training loss: 1.1644...  0.1167 sec/batch
Epoch: 13/20...  Training Step: 5730...  Training loss: 1.1370...  0.1186 sec/batch
Epoch: 13/20...  Training Step: 5731...  Training loss: 1.2007...  0.1202 sec/batch
Epoch: 13/20...  Training Step: 5732...  Training loss: 1.0676...  0.1177 sec/batch
Epoch: 13/20...  Training Step: 5733...  Training loss: 1.0706...  0.1202 sec/batch
Epoch: 13/20...  Training Step: 5734...  Training loss: 0.9332...  0.1156 sec/batch
Epoch: 13/20...  Training Step: 5735...  Training loss: 0.9124...  0.1163 sec/batch
Epoch: 13/20...  Training Step: 5736...  Training loss: 1.1066...  0.1193 sec/batch
Epoch: 13/20...  Training Step: 5737...  Training loss: 0.9197...  0.1197 sec/batch
Epoch: 13/20...  Training Step: 5738...  Training loss: 1.0114...  0.1187 sec/batch
Epoch: 13/20...  Training Step: 5739...  Training loss: 1.1476...  0.1196 sec/batch
Epoch: 13/20...  Training Step: 5740...  Training loss: 1.2345...  0.1228 sec/batch
Epoch: 13/20...  Training Step: 5741...  Training loss: 0.9743...  0.1173 sec/batch
Epoch: 13/20...  Training Step: 5742...  Training loss: 1.0378...  0.1173 sec/batch
Epoch: 13/20...  Training Step: 5743...  Training loss: 1.3188...  0.1183 sec/batch
Epoch: 13/20...  Training Step: 5744...  Training loss: 0.8592...  0.1145 sec/batch
Epoch: 13/20...  Training Step: 5745...  Training loss: 0.9050...  0.1166 sec/batch
Epoch: 13/20...  Training Step: 5746...  Training loss: 1.1900...  0.1183 sec/batch
Epoch: 13/20...  Training Step: 5747...  Training loss: 0.8705...  0.1174 sec/batch
Epoch: 13/20...  Training Step: 5748...  Training loss: 1.0660...  0.1134 sec/batch
Epoch: 13/20...  Training Step: 5749...  Training loss: 0.8465...  0.1184 sec/batch
Epoch: 13/20...  Training Step: 5750...  Training loss: 1.2064...  0.1181 sec/batch
Epoch: 13/20...  Training Step: 5751...  Training loss: 1.1062...  0.1151 sec/batch
Epoch: 13/20...  Training Step: 5752...  Training loss: 1.0066...  0.1178 sec/batch
Epoch: 13/20...  Training Step: 5753...  Training loss: 1.2165...  0.1184 sec/batch
Epoch: 13/20...  Training Step: 5754...  Training loss: 1.1694...  0.1176 sec/batch
Epoch: 13/20...  Training Step: 5755...  Training loss: 1.1626...  0.1209 sec/batch
Epoch: 13/20...  Training Step: 5756...  Training loss: 1.0335...  0.1207 sec/batch
Epoch: 13/20...  Training Step: 5757...  Training loss: 1.1198...  0.1136 sec/batch
Epoch: 13/20...  Training Step: 5758...  Training loss: 1.0774...  0.1147 sec/batch
Epoch: 13/20...  Training Step: 5759...  Training loss: 0.9965...  0.1141 sec/batch
Epoch: 13/20...  Training Step: 5760...  Training loss: 1.0717...  0.1158 sec/batch
Epoch: 13/20...  Training Step: 5761...  Training loss: 1.0366...  0.1210 sec/batch
Epoch: 13/20...  Training Step: 5762...  Training loss: 1.1383...  0.1221 sec/batch
Epoch: 13/20...  Training Step: 5763...  Training loss: 1.0045...  0.1217 sec/batch
Epoch: 13/20...  Training Step: 5764...  Training loss: 1.0870...  0.1227 sec/batch
Epoch: 13/20...  Training Step: 5765...  Training loss: 1.0689...  0.1222 sec/batch
Epoch: 13/20...  Training Step: 5766...  Training loss: 1.1760...  0.1259 sec/batch
Epoch: 13/20...  Training Step: 5767...  Training loss: 0.9432...  0.1260 sec/batch
Epoch: 13/20...  Training Step: 5768...  Training loss: 1.1234...  0.1282 sec/batch
Epoch: 13/20...  Training Step: 5769...  Training loss: 0.9797...  0.1267 sec/batch
Epoch: 13/20...  Training Step: 5770...  Training loss: 1.0814...  0.1250 sec/batch
Epoch: 13/20...  Training Step: 5771...  Training loss: 1.0289...  0.1191 sec/batch
Epoch: 13/20...  Training Step: 5772...  Training loss: 1.1869...  0.1233 sec/batch
Epoch: 13/20...  Training Step: 5773...  Training loss: 0.9786...  0.1218 sec/batch
Epoch: 13/20...  Training Step: 5774...  Training loss: 1.0491...  0.1152 sec/batch
Epoch: 13/20...  Training Step: 5775...  Training loss: 0.9800...  0.1134 sec/batch
Epoch: 13/20...  Training Step: 5776...  Training loss: 1.1676...  0.1119 sec/batch
Epoch: 13/20...  Training Step: 5777...  Training loss: 1.1212...  0.1163 sec/batch
Epoch: 13/20...  Training Step: 5778...  Training loss: 0.9392...  0.1241 sec/batch
Epoch: 13/20...  Training Step: 5779...  Training loss: 0.9613...  0.1224 sec/batch
Epoch: 13/20...  Training Step: 5780...  Training loss: 1.1864...  0.1304 sec/batch
Epoch: 13/20...  Training Step: 5781...  Training loss: 1.1274...  0.1262 sec/batch
Epoch: 13/20...  Training Step: 5782...  Training loss: 1.0919...  0.1238 sec/batch
Epoch: 13/20...  Training Step: 5783...  Training loss: 1.0729...  0.1322 sec/batch
Epoch: 13/20...  Training Step: 5784...  Training loss: 1.0474...  0.1377 sec/batch
Epoch: 13/20...  Training Step: 5785...  Training loss: 0.9907...  0.1387 sec/batch
Epoch: 13/20...  Training Step: 5786...  Training loss: 1.1713...  0.1359 sec/batch
Epoch: 13/20...  Training Step: 5787...  Training loss: 1.1332...  0.1315 sec/batch
Epoch: 13/20...  Training Step: 5788...  Training loss: 1.0928...  0.1266 sec/batch
Epoch: 13/20...  Training Step: 5789...  Training loss: 0.9800...  0.1223 sec/batch
Epoch: 13/20...  Training Step: 5790...  Training loss: 1.4158...  0.1313 sec/batch
Epoch: 13/20...  Training Step: 5791...  Training loss: 1.2106...  0.1277 sec/batch
Epoch: 13/20...  Training Step: 5792...  Training loss: 1.1993...  0.1263 sec/batch
Epoch: 13/20...  Training Step: 5793...  Training loss: 1.0229...  0.1237 sec/batch
Epoch: 13/20...  Training Step: 5794...  Training loss: 1.1773...  0.1283 sec/batch
Epoch: 13/20...  Training Step: 5795...  Training loss: 1.2700...  0.1223 sec/batch
Epoch: 13/20...  Training Step: 5796...  Training loss: 0.9858...  0.1220 sec/batch
Epoch: 13/20...  Training Step: 5797...  Training loss: 1.0560...  0.1491 sec/batch
Epoch: 13/20...  Training Step: 5798...  Training loss: 1.0637...  0.1421 sec/batch
Epoch: 13/20...  Training Step: 5799...  Training loss: 1.0386...  0.1328 sec/batch
Epoch: 13/20...  Training Step: 5800...  Training loss: 1.0529...  0.1331 sec/batch
Epoch: 13/20...  Training Step: 5801...  Training loss: 1.2854...  0.1236 sec/batch
Epoch: 13/20...  Training Step: 5802...  Training loss: 1.0100...  0.1249 sec/batch
Epoch: 13/20...  Training Step: 5803...  Training loss: 1.3172...  0.1214 sec/batch
Epoch: 13/20...  Training Step: 5804...  Training loss: 0.9567...  0.1222 sec/batch
Epoch: 13/20...  Training Step: 5805...  Training loss: 1.2188...  0.1214 sec/batch
Epoch: 13/20...  Training Step: 5806...  Training loss: 0.9628...  0.1243 sec/batch
Epoch: 13/20...  Training Step: 5807...  Training loss: 1.1559...  0.1254 sec/batch
Epoch: 13/20...  Training Step: 5808...  Training loss: 1.1193...  0.1193 sec/batch
Epoch: 13/20...  Training Step: 5809...  Training loss: 1.0281...  0.1173 sec/batch
Epoch: 13/20...  Training Step: 5810...  Training loss: 0.9938...  0.1169 sec/batch
Epoch: 13/20...  Training Step: 5811...  Training loss: 1.1712...  0.1264 sec/batch
Epoch: 13/20...  Training Step: 5812...  Training loss: 1.1241...  0.1273 sec/batch
Epoch: 13/20...  Training Step: 5813...  Training loss: 1.0484...  0.1255 sec/batch
Epoch: 13/20...  Training Step: 5814...  Training loss: 0.9546...  0.1241 sec/batch
Epoch: 13/20...  Training Step: 5815...  Training loss: 1.0930...  0.1227 sec/batch
Epoch: 13/20...  Training Step: 5816...  Training loss: 1.3008...  0.1227 sec/batch
Epoch: 13/20...  Training Step: 5817...  Training loss: 1.1660...  0.1113 sec/batch
Epoch: 13/20...  Training Step: 5818...  Training loss: 1.0542...  0.1145 sec/batch
Epoch: 13/20...  Training Step: 5819...  Training loss: 1.1609...  0.1150 sec/batch
Epoch: 13/20...  Training Step: 5820...  Training loss: 0.9762...  0.1314 sec/batch
Epoch: 13/20...  Training Step: 5821...  Training loss: 0.9722...  0.1342 sec/batch
Epoch: 13/20...  Training Step: 5822...  Training loss: 1.0023...  0.1311 sec/batch
Epoch: 13/20...  Training Step: 5823...  Training loss: 1.0188...  0.1335 sec/batch
Epoch: 13/20...  Training Step: 5824...  Training loss: 1.1014...  0.1228 sec/batch
Epoch: 13/20...  Training Step: 5825...  Training loss: 1.2401...  0.1255 sec/batch
Epoch: 13/20...  Training Step: 5826...  Training loss: 1.0450...  0.1337 sec/batch
Epoch: 13/20...  Training Step: 5827...  Training loss: 0.9192...  0.1320 sec/batch
Epoch: 13/20...  Training Step: 5828...  Training loss: 1.0921...  0.1322 sec/batch
Epoch: 13/20...  Training Step: 5829...  Training loss: 1.0379...  0.1334 sec/batch
Epoch: 13/20...  Training Step: 5830...  Training loss: 1.0958...  0.1291 sec/batch
Epoch: 13/20...  Training Step: 5831...  Training loss: 1.0439...  0.1264 sec/batch
Epoch: 13/20...  Training Step: 5832...  Training loss: 1.0391...  0.1423 sec/batch
Epoch: 13/20...  Training Step: 5833...  Training loss: 1.1474...  0.1314 sec/batch
Epoch: 13/20...  Training Step: 5834...  Training loss: 1.0413...  0.1279 sec/batch
Epoch: 13/20...  Training Step: 5835...  Training loss: 1.1615...  0.1270 sec/batch
Epoch: 13/20...  Training Step: 5836...  Training loss: 1.1515...  0.1269 sec/batch
Epoch: 13/20...  Training Step: 5837...  Training loss: 1.2108...  0.1251 sec/batch
Epoch: 13/20...  Training Step: 5838...  Training loss: 1.1510...  0.1269 sec/batch
Epoch: 13/20...  Training Step: 5839...  Training loss: 1.2229...  0.1211 sec/batch
Epoch: 13/20...  Training Step: 5840...  Training loss: 1.2778...  0.1260 sec/batch
Epoch: 13/20...  Training Step: 5841...  Training loss: 1.1301...  0.1306 sec/batch
Epoch: 13/20...  Training Step: 5842...  Training loss: 1.1939...  0.1302 sec/batch
Epoch: 13/20...  Training Step: 5843...  Training loss: 1.2127...  0.1162 sec/batch
Epoch: 13/20...  Training Step: 5844...  Training loss: 1.0126...  0.1276 sec/batch
Epoch: 13/20...  Training Step: 5845...  Training loss: 1.0920...  0.1253 sec/batch
Epoch: 13/20...  Training Step: 5846...  Training loss: 1.3305...  0.1247 sec/batch
Epoch: 13/20...  Training Step: 5847...  Training loss: 1.1012...  0.1188 sec/batch
Epoch: 13/20...  Training Step: 5848...  Training loss: 1.0337...  0.1143 sec/batch
Epoch: 13/20...  Training Step: 5849...  Training loss: 1.0578...  0.1271 sec/batch
Epoch: 13/20...  Training Step: 5850...  Training loss: 1.0482...  0.1221 sec/batch
Epoch: 13/20...  Training Step: 5851...  Training loss: 1.1506...  0.1202 sec/batch
Epoch: 13/20...  Training Step: 5852...  Training loss: 1.1729...  0.1272 sec/batch
Epoch: 13/20...  Training Step: 5853...  Training loss: 1.0237...  0.1306 sec/batch
Epoch: 13/20...  Training Step: 5854...  Training loss: 1.1389...  0.1344 sec/batch
Epoch: 13/20...  Training Step: 5855...  Training loss: 1.1731...  0.1285 sec/batch
Epoch: 13/20...  Training Step: 5856...  Training loss: 1.1811...  0.1221 sec/batch
Epoch: 13/20...  Training Step: 5857...  Training loss: 1.0998...  0.1229 sec/batch
Epoch: 13/20...  Training Step: 5858...  Training loss: 1.1203...  0.1176 sec/batch
Epoch: 13/20...  Training Step: 5859...  Training loss: 1.1398...  0.1301 sec/batch
Epoch: 13/20...  Training Step: 5860...  Training loss: 1.0682...  0.1255 sec/batch
Epoch: 13/20...  Training Step: 5861...  Training loss: 1.0534...  0.1209 sec/batch
Epoch: 13/20...  Training Step: 5862...  Training loss: 1.1779...  0.1275 sec/batch
Epoch: 13/20...  Training Step: 5863...  Training loss: 1.1096...  0.1307 sec/batch
Epoch: 13/20...  Training Step: 5864...  Training loss: 1.2879...  0.1263 sec/batch
Epoch: 13/20...  Training Step: 5865...  Training loss: 1.0792...  0.1272 sec/batch
Epoch: 13/20...  Training Step: 5866...  Training loss: 1.0546...  0.1322 sec/batch
Epoch: 13/20...  Training Step: 5867...  Training loss: 1.1716...  0.1247 sec/batch
Epoch: 13/20...  Training Step: 5868...  Training loss: 1.0897...  0.1338 sec/batch
Epoch: 13/20...  Training Step: 5869...  Training loss: 1.1111...  0.1253 sec/batch
Epoch: 13/20...  Training Step: 5870...  Training loss: 1.0674...  0.1309 sec/batch
Epoch: 13/20...  Training Step: 5871...  Training loss: 0.9127...  0.1295 sec/batch
Epoch: 13/20...  Training Step: 5872...  Training loss: 1.3002...  0.1223 sec/batch
Epoch: 13/20...  Training Step: 5873...  Training loss: 1.0138...  0.1321 sec/batch
Epoch: 13/20...  Training Step: 5874...  Training loss: 1.2084...  0.1301 sec/batch
Epoch: 13/20...  Training Step: 5875...  Training loss: 1.0819...  0.1282 sec/batch
Epoch: 13/20...  Training Step: 5876...  Training loss: 1.2980...  0.1270 sec/batch
Epoch: 13/20...  Training Step: 5877...  Training loss: 1.2670...  0.1294 sec/batch
Epoch: 13/20...  Training Step: 5878...  Training loss: 1.1470...  0.1283 sec/batch
Epoch: 13/20...  Training Step: 5879...  Training loss: 1.0598...  0.1198 sec/batch
Epoch: 13/20...  Training Step: 5880...  Training loss: 1.0111...  0.1175 sec/batch
Epoch: 13/20...  Training Step: 5881...  Training loss: 0.9267...  0.1134 sec/batch
Epoch: 13/20...  Training Step: 5882...  Training loss: 0.9689...  0.1123 sec/batch
Epoch: 13/20...  Training Step: 5883...  Training loss: 0.9120...  0.1188 sec/batch
Epoch: 13/20...  Training Step: 5884...  Training loss: 0.9019...  0.1172 sec/batch
Epoch: 13/20...  Training Step: 5885...  Training loss: 0.9019...  0.1169 sec/batch
Epoch: 13/20...  Training Step: 5886...  Training loss: 1.0401...  0.1190 sec/batch
Epoch: 13/20...  Training Step: 5887...  Training loss: 0.9876...  0.1155 sec/batch
Epoch: 13/20...  Training Step: 5888...  Training loss: 1.0192...  0.1190 sec/batch
Epoch: 13/20...  Training Step: 5889...  Training loss: 0.9558...  0.1157 sec/batch
Epoch: 13/20...  Training Step: 5890...  Training loss: 1.1911...  0.1148 sec/batch
Epoch: 13/20...  Training Step: 5891...  Training loss: 0.9931...  0.1186 sec/batch
Epoch: 13/20...  Training Step: 5892...  Training loss: 0.9508...  0.1224 sec/batch
Epoch: 13/20...  Training Step: 5893...  Training loss: 0.9365...  0.1277 sec/batch
Epoch: 13/20...  Training Step: 5894...  Training loss: 0.8580...  0.1258 sec/batch
Epoch: 13/20...  Training Step: 5895...  Training loss: 1.0147...  0.1209 sec/batch
Epoch: 13/20...  Training Step: 5896...  Training loss: 0.9630...  0.1125 sec/batch
Epoch: 13/20...  Training Step: 5897...  Training loss: 1.1806...  0.1192 sec/batch
Epoch: 13/20...  Training Step: 5898...  Training loss: 0.9471...  0.1172 sec/batch
Epoch: 13/20...  Training Step: 5899...  Training loss: 1.0372...  0.1161 sec/batch
Epoch: 13/20...  Training Step: 5900...  Training loss: 1.1011...  0.1183 sec/batch
Epoch: 13/20...  Training Step: 5901...  Training loss: 0.9619...  0.1195 sec/batch
Epoch: 13/20...  Training Step: 5902...  Training loss: 0.9586...  0.1159 sec/batch
Epoch: 13/20...  Training Step: 5903...  Training loss: 1.0569...  0.1157 sec/batch
Epoch: 13/20...  Training Step: 5904...  Training loss: 1.0157...  0.1218 sec/batch
Epoch: 13/20...  Training Step: 5905...  Training loss: 0.9543...  0.1187 sec/batch
Epoch: 13/20...  Training Step: 5906...  Training loss: 0.9973...  0.1189 sec/batch
Epoch: 13/20...  Training Step: 5907...  Training loss: 1.0254...  0.1147 sec/batch
Epoch: 13/20...  Training Step: 5908...  Training loss: 0.9831...  0.1157 sec/batch
Epoch: 13/20...  Training Step: 5909...  Training loss: 1.0192...  0.1204 sec/batch
Epoch: 13/20...  Training Step: 5910...  Training loss: 1.1074...  0.1189 sec/batch
Epoch: 13/20...  Training Step: 5911...  Training loss: 0.9342...  0.1136 sec/batch
Epoch: 13/20...  Training Step: 5912...  Training loss: 1.0559...  0.1146 sec/batch
Epoch: 13/20...  Training Step: 5913...  Training loss: 0.9799...  0.1161 sec/batch
Epoch: 13/20...  Training Step: 5914...  Training loss: 1.0144...  0.1179 sec/batch
Epoch: 13/20...  Training Step: 5915...  Training loss: 0.9269...  0.1177 sec/batch
Epoch: 13/20...  Training Step: 5916...  Training loss: 1.2286...  0.1159 sec/batch
Epoch: 13/20...  Training Step: 5917...  Training loss: 0.9672...  0.1175 sec/batch
Epoch: 13/20...  Training Step: 5918...  Training loss: 1.0843...  0.1218 sec/batch
Epoch: 13/20...  Training Step: 5919...  Training loss: 1.0767...  0.1173 sec/batch
Epoch: 13/20...  Training Step: 5920...  Training loss: 1.0132...  0.1202 sec/batch
Epoch: 13/20...  Training Step: 5921...  Training loss: 0.8628...  0.1160 sec/batch
Epoch: 13/20...  Training Step: 5922...  Training loss: 0.7347...  0.1124 sec/batch
Epoch: 13/20...  Training Step: 5923...  Training loss: 1.0701...  0.1185 sec/batch
Epoch: 13/20...  Training Step: 5924...  Training loss: 1.0710...  0.1182 sec/batch
Epoch: 13/20...  Training Step: 5925...  Training loss: 0.9147...  0.1161 sec/batch
Epoch: 13/20...  Training Step: 5926...  Training loss: 1.1501...  0.1167 sec/batch
Epoch: 13/20...  Training Step: 5927...  Training loss: 1.1788...  0.1169 sec/batch
Epoch: 13/20...  Training Step: 5928...  Training loss: 0.8771...  0.1198 sec/batch
Epoch: 13/20...  Training Step: 5929...  Training loss: 1.0714...  0.1136 sec/batch
Epoch: 13/20...  Training Step: 5930...  Training loss: 1.1129...  0.1184 sec/batch
Epoch: 13/20...  Training Step: 5931...  Training loss: 0.9651...  0.1183 sec/batch
Epoch: 13/20...  Training Step: 5932...  Training loss: 0.9424...  0.1179 sec/batch
Epoch: 13/20...  Training Step: 5933...  Training loss: 0.9477...  0.1180 sec/batch
Epoch: 13/20...  Training Step: 5934...  Training loss: 1.1097...  0.1151 sec/batch
Epoch: 13/20...  Training Step: 5935...  Training loss: 0.9688...  0.1147 sec/batch
Epoch: 13/20...  Training Step: 5936...  Training loss: 1.2317...  0.1178 sec/batch
Epoch: 13/20...  Training Step: 5937...  Training loss: 1.0855...  0.1152 sec/batch
Epoch: 13/20...  Training Step: 5938...  Training loss: 1.1371...  0.1184 sec/batch
Epoch: 13/20...  Training Step: 5939...  Training loss: 0.9546...  0.1145 sec/batch
Epoch: 13/20...  Training Step: 5940...  Training loss: 1.1839...  0.1192 sec/batch
Epoch: 13/20...  Training Step: 5941...  Training loss: 0.8850...  0.1167 sec/batch
Epoch: 13/20...  Training Step: 5942...  Training loss: 1.1405...  0.1197 sec/batch
Epoch: 13/20...  Training Step: 5943...  Training loss: 1.0854...  0.1186 sec/batch
Epoch: 13/20...  Training Step: 5944...  Training loss: 1.1355...  0.1174 sec/batch
Epoch: 13/20...  Training Step: 5945...  Training loss: 1.2111...  0.1100 sec/batch
Epoch: 13/20...  Training Step: 5946...  Training loss: 1.1404...  0.1176 sec/batch
Epoch: 13/20...  Training Step: 5947...  Training loss: 0.9584...  0.1186 sec/batch
Epoch: 13/20...  Training Step: 5948...  Training loss: 1.0545...  0.1143 sec/batch
Epoch: 13/20...  Training Step: 5949...  Training loss: 0.9521...  0.1215 sec/batch
Epoch: 13/20...  Training Step: 5950...  Training loss: 1.0845...  0.1172 sec/batch
Epoch: 13/20...  Training Step: 5951...  Training loss: 1.0280...  0.1159 sec/batch
Epoch: 13/20...  Training Step: 5952...  Training loss: 1.0984...  0.1151 sec/batch
Epoch: 13/20...  Training Step: 5953...  Training loss: 1.0157...  0.1145 sec/batch
Epoch: 13/20...  Training Step: 5954...  Training loss: 1.0705...  0.1192 sec/batch
Epoch: 13/20...  Training Step: 5955...  Training loss: 1.0640...  0.1183 sec/batch
Epoch: 13/20...  Training Step: 5956...  Training loss: 0.9209...  0.1180 sec/batch
Epoch: 13/20...  Training Step: 5957...  Training loss: 0.9253...  0.1217 sec/batch
Epoch: 13/20...  Training Step: 5958...  Training loss: 1.0451...  0.1182 sec/batch
Epoch: 13/20...  Training Step: 5959...  Training loss: 0.9848...  0.1178 sec/batch
Epoch: 13/20...  Training Step: 5960...  Training loss: 0.9407...  0.1167 sec/batch
Epoch: 13/20...  Training Step: 5961...  Training loss: 1.0383...  0.1192 sec/batch
Epoch: 13/20...  Training Step: 5962...  Training loss: 0.9617...  0.1175 sec/batch
Epoch: 13/20...  Training Step: 5963...  Training loss: 0.8913...  0.1174 sec/batch
Epoch: 13/20...  Training Step: 5964...  Training loss: 1.1068...  0.1166 sec/batch
Epoch: 13/20...  Training Step: 5965...  Training loss: 1.0062...  0.1122 sec/batch
Epoch: 13/20...  Training Step: 5966...  Training loss: 0.9666...  0.1164 sec/batch
Epoch: 13/20...  Training Step: 5967...  Training loss: 1.0783...  0.1218 sec/batch
Epoch: 13/20...  Training Step: 5968...  Training loss: 1.1321...  0.1156 sec/batch
Epoch: 13/20...  Training Step: 5969...  Training loss: 0.9649...  0.1140 sec/batch
Epoch: 13/20...  Training Step: 5970...  Training loss: 0.9317...  0.1174 sec/batch
Epoch: 13/20...  Training Step: 5971...  Training loss: 0.8931...  0.1165 sec/batch
Epoch: 13/20...  Training Step: 5972...  Training loss: 1.1227...  0.1173 sec/batch
Epoch: 13/20...  Training Step: 5973...  Training loss: 1.0442...  0.1177 sec/batch
Epoch: 13/20...  Training Step: 5974...  Training loss: 1.2312...  0.1174 sec/batch
Epoch: 13/20...  Training Step: 5975...  Training loss: 0.8789...  0.1175 sec/batch
Epoch: 13/20...  Training Step: 5976...  Training loss: 1.2985...  0.1224 sec/batch
Epoch: 13/20...  Training Step: 5977...  Training loss: 0.9790...  0.1163 sec/batch
Epoch: 13/20...  Training Step: 5978...  Training loss: 1.0406...  0.1161 sec/batch
Epoch: 13/20...  Training Step: 5979...  Training loss: 1.0173...  0.1206 sec/batch
Epoch: 13/20...  Training Step: 5980...  Training loss: 1.0306...  0.1240 sec/batch
Epoch: 13/20...  Training Step: 5981...  Training loss: 1.1039...  0.1189 sec/batch
Epoch: 13/20...  Training Step: 5982...  Training loss: 1.0494...  0.1208 sec/batch
Epoch: 13/20...  Training Step: 5983...  Training loss: 1.1112...  0.1223 sec/batch
Epoch: 13/20...  Training Step: 5984...  Training loss: 1.1670...  0.1199 sec/batch
Epoch: 13/20...  Training Step: 5985...  Training loss: 1.1274...  0.1205 sec/batch
Epoch: 13/20...  Training Step: 5986...  Training loss: 0.9250...  0.1167 sec/batch
Epoch: 13/20...  Training Step: 5987...  Training loss: 1.0573...  0.1177 sec/batch
Epoch: 13/20...  Training Step: 5988...  Training loss: 1.0105...  0.1152 sec/batch
Epoch: 13/20...  Training Step: 5989...  Training loss: 1.0946...  0.1193 sec/batch
Epoch: 13/20...  Training Step: 5990...  Training loss: 1.0256...  0.1187 sec/batch
Epoch: 13/20...  Training Step: 5991...  Training loss: 0.9351...  0.1171 sec/batch
Epoch: 13/20...  Training Step: 5992...  Training loss: 1.1021...  0.1180 sec/batch
Epoch: 13/20...  Training Step: 5993...  Training loss: 1.0737...  0.1163 sec/batch
Epoch: 13/20...  Training Step: 5994...  Training loss: 1.0272...  0.1166 sec/batch
Epoch: 13/20...  Training Step: 5995...  Training loss: 1.0123...  0.1195 sec/batch
Epoch: 13/20...  Training Step: 5996...  Training loss: 0.9407...  0.1186 sec/batch
Epoch: 13/20...  Training Step: 5997...  Training loss: 0.9677...  0.1187 sec/batch
Epoch: 13/20...  Training Step: 5998...  Training loss: 1.0411...  0.1166 sec/batch
Epoch: 13/20...  Training Step: 5999...  Training loss: 1.1877...  0.1220 sec/batch
Epoch: 13/20...  Training Step: 6000...  Training loss: 0.9704...  0.1181 sec/batch
Epoch: 13/20...  Training Step: 6001...  Training loss: 1.1624...  0.1184 sec/batch
Epoch: 13/20...  Training Step: 6002...  Training loss: 1.1678...  0.1173 sec/batch
Epoch: 13/20...  Training Step: 6003...  Training loss: 0.9040...  0.1154 sec/batch
Epoch: 13/20...  Training Step: 6004...  Training loss: 1.0860...  0.1188 sec/batch
Epoch: 13/20...  Training Step: 6005...  Training loss: 1.0141...  0.1192 sec/batch
Epoch: 13/20...  Training Step: 6006...  Training loss: 1.1756...  0.1143 sec/batch
Epoch: 13/20...  Training Step: 6007...  Training loss: 1.2744...  0.1184 sec/batch
Epoch: 13/20...  Training Step: 6008...  Training loss: 1.2727...  0.1164 sec/batch
Epoch: 13/20...  Training Step: 6009...  Training loss: 0.9475...  0.1152 sec/batch
Epoch: 13/20...  Training Step: 6010...  Training loss: 0.9778...  0.1131 sec/batch
Epoch: 13/20...  Training Step: 6011...  Training loss: 1.0596...  0.1173 sec/batch
Epoch: 13/20...  Training Step: 6012...  Training loss: 1.0744...  0.1161 sec/batch
Epoch: 13/20...  Training Step: 6013...  Training loss: 1.0827...  0.1173 sec/batch
Epoch: 13/20...  Training Step: 6014...  Training loss: 1.0531...  0.1182 sec/batch
Epoch: 13/20...  Training Step: 6015...  Training loss: 1.1182...  0.1155 sec/batch
Epoch: 13/20...  Training Step: 6016...  Training loss: 0.9992...  0.1182 sec/batch
Epoch: 13/20...  Training Step: 6017...  Training loss: 1.1095...  0.1161 sec/batch
Epoch: 13/20...  Training Step: 6018...  Training loss: 0.9687...  0.1138 sec/batch
Epoch: 13/20...  Training Step: 6019...  Training loss: 1.0952...  0.1143 sec/batch
Epoch: 13/20...  Training Step: 6020...  Training loss: 1.0801...  0.1142 sec/batch
Epoch: 13/20...  Training Step: 6021...  Training loss: 1.0713...  0.1107 sec/batch
Epoch: 13/20...  Training Step: 6022...  Training loss: 1.2881...  0.1168 sec/batch
Epoch: 13/20...  Training Step: 6023...  Training loss: 1.1043...  0.1144 sec/batch
Epoch: 13/20...  Training Step: 6024...  Training loss: 0.9579...  0.1159 sec/batch
Epoch: 13/20...  Training Step: 6025...  Training loss: 1.0226...  0.1145 sec/batch
Epoch: 13/20...  Training Step: 6026...  Training loss: 0.8833...  0.1110 sec/batch
Epoch: 13/20...  Training Step: 6027...  Training loss: 0.9355...  0.1142 sec/batch
Epoch: 13/20...  Training Step: 6028...  Training loss: 0.9681...  0.1140 sec/batch
Epoch: 13/20...  Training Step: 6029...  Training loss: 1.0691...  0.1139 sec/batch
Epoch: 13/20...  Training Step: 6030...  Training loss: 0.9311...  0.1205 sec/batch
Epoch: 13/20...  Training Step: 6031...  Training loss: 1.0487...  0.1140 sec/batch
Epoch: 13/20...  Training Step: 6032...  Training loss: 1.0070...  0.1148 sec/batch
Epoch: 14/20...  Training Step: 6033...  Training loss: 1.2961...  0.1144 sec/batch
Epoch: 14/20...  Training Step: 6034...  Training loss: 1.1728...  0.1256 sec/batch
Epoch: 14/20...  Training Step: 6035...  Training loss: 1.1087...  0.1270 sec/batch
Epoch: 14/20...  Training Step: 6036...  Training loss: 1.1693...  0.1216 sec/batch
Epoch: 14/20...  Training Step: 6037...  Training loss: 1.2070...  0.1181 sec/batch
Epoch: 14/20...  Training Step: 6038...  Training loss: 0.9317...  0.1198 sec/batch
Epoch: 14/20...  Training Step: 6039...  Training loss: 1.1026...  0.1159 sec/batch
Epoch: 14/20...  Training Step: 6040...  Training loss: 0.9525...  0.1190 sec/batch
Epoch: 14/20...  Training Step: 6041...  Training loss: 0.9236...  0.1169 sec/batch
Epoch: 14/20...  Training Step: 6042...  Training loss: 1.1687...  0.1200 sec/batch
Epoch: 14/20...  Training Step: 6043...  Training loss: 0.9674...  0.1179 sec/batch
Epoch: 14/20...  Training Step: 6044...  Training loss: 0.9133...  0.1146 sec/batch
Epoch: 14/20...  Training Step: 6045...  Training loss: 1.3963...  0.1181 sec/batch
Epoch: 14/20...  Training Step: 6046...  Training loss: 0.8980...  0.1151 sec/batch
Epoch: 14/20...  Training Step: 6047...  Training loss: 1.2099...  0.1203 sec/batch
Epoch: 14/20...  Training Step: 6048...  Training loss: 1.0376...  0.1153 sec/batch
Epoch: 14/20...  Training Step: 6049...  Training loss: 1.0367...  0.1158 sec/batch
Epoch: 14/20...  Training Step: 6050...  Training loss: 1.1146...  0.1146 sec/batch
Epoch: 14/20...  Training Step: 6051...  Training loss: 1.0556...  0.1164 sec/batch
Epoch: 14/20...  Training Step: 6052...  Training loss: 0.9177...  0.1186 sec/batch
Epoch: 14/20...  Training Step: 6053...  Training loss: 1.0905...  0.1154 sec/batch
Epoch: 14/20...  Training Step: 6054...  Training loss: 0.9839...  0.1195 sec/batch
Epoch: 14/20...  Training Step: 6055...  Training loss: 1.1136...  0.1172 sec/batch
Epoch: 14/20...  Training Step: 6056...  Training loss: 0.9829...  0.1164 sec/batch
Epoch: 14/20...  Training Step: 6057...  Training loss: 1.0527...  0.1181 sec/batch
Epoch: 14/20...  Training Step: 6058...  Training loss: 1.1337...  0.1159 sec/batch
Epoch: 14/20...  Training Step: 6059...  Training loss: 1.0706...  0.1156 sec/batch
Epoch: 14/20...  Training Step: 6060...  Training loss: 1.0465...  0.1184 sec/batch
Epoch: 14/20...  Training Step: 6061...  Training loss: 0.9616...  0.1205 sec/batch
Epoch: 14/20...  Training Step: 6062...  Training loss: 1.0266...  0.1177 sec/batch
Epoch: 14/20...  Training Step: 6063...  Training loss: 0.9469...  0.1182 sec/batch
Epoch: 14/20...  Training Step: 6064...  Training loss: 0.9679...  0.1202 sec/batch
Epoch: 14/20...  Training Step: 6065...  Training loss: 0.8196...  0.1181 sec/batch
Epoch: 14/20...  Training Step: 6066...  Training loss: 0.8530...  0.1177 sec/batch
Epoch: 14/20...  Training Step: 6067...  Training loss: 0.9440...  0.1133 sec/batch
Epoch: 14/20...  Training Step: 6068...  Training loss: 0.9537...  0.1164 sec/batch
Epoch: 14/20...  Training Step: 6069...  Training loss: 1.0813...  0.1194 sec/batch
Epoch: 14/20...  Training Step: 6070...  Training loss: 0.9330...  0.1184 sec/batch
Epoch: 14/20...  Training Step: 6071...  Training loss: 1.0285...  0.1159 sec/batch
Epoch: 14/20...  Training Step: 6072...  Training loss: 1.2696...  0.1117 sec/batch
Epoch: 14/20...  Training Step: 6073...  Training loss: 0.9434...  0.1170 sec/batch
Epoch: 14/20...  Training Step: 6074...  Training loss: 1.0306...  0.1209 sec/batch
Epoch: 14/20...  Training Step: 6075...  Training loss: 1.0857...  0.1173 sec/batch
Epoch: 14/20...  Training Step: 6076...  Training loss: 0.9824...  0.1162 sec/batch
Epoch: 14/20...  Training Step: 6077...  Training loss: 1.0387...  0.1185 sec/batch
Epoch: 14/20...  Training Step: 6078...  Training loss: 0.9971...  0.1192 sec/batch
Epoch: 14/20...  Training Step: 6079...  Training loss: 1.0759...  0.1159 sec/batch
Epoch: 14/20...  Training Step: 6080...  Training loss: 0.9997...  0.1179 sec/batch
Epoch: 14/20...  Training Step: 6081...  Training loss: 1.0406...  0.1212 sec/batch
Epoch: 14/20...  Training Step: 6082...  Training loss: 1.0174...  0.1167 sec/batch
Epoch: 14/20...  Training Step: 6083...  Training loss: 0.9687...  0.1205 sec/batch
Epoch: 14/20...  Training Step: 6084...  Training loss: 0.9207...  0.1193 sec/batch
Epoch: 14/20...  Training Step: 6085...  Training loss: 0.9838...  0.1219 sec/batch
Epoch: 14/20...  Training Step: 6086...  Training loss: 0.8567...  0.1174 sec/batch
Epoch: 14/20...  Training Step: 6087...  Training loss: 0.9781...  0.1161 sec/batch
Epoch: 14/20...  Training Step: 6088...  Training loss: 0.9774...  0.1170 sec/batch
Epoch: 14/20...  Training Step: 6089...  Training loss: 1.0788...  0.1218 sec/batch
Epoch: 14/20...  Training Step: 6090...  Training loss: 1.1065...  0.1198 sec/batch
Epoch: 14/20...  Training Step: 6091...  Training loss: 0.8738...  0.1176 sec/batch
Epoch: 14/20...  Training Step: 6092...  Training loss: 0.9117...  0.1192 sec/batch
Epoch: 14/20...  Training Step: 6093...  Training loss: 0.9346...  0.1149 sec/batch
Epoch: 14/20...  Training Step: 6094...  Training loss: 1.1451...  0.1127 sec/batch
Epoch: 14/20...  Training Step: 6095...  Training loss: 0.9118...  0.1138 sec/batch
Epoch: 14/20...  Training Step: 6096...  Training loss: 1.1009...  0.1167 sec/batch
Epoch: 14/20...  Training Step: 6097...  Training loss: 0.7968...  0.1156 sec/batch
Epoch: 14/20...  Training Step: 6098...  Training loss: 1.0258...  0.1182 sec/batch
Epoch: 14/20...  Training Step: 6099...  Training loss: 0.9808...  0.1168 sec/batch
Epoch: 14/20...  Training Step: 6100...  Training loss: 1.1356...  0.1202 sec/batch
Epoch: 14/20...  Training Step: 6101...  Training loss: 0.9594...  0.1139 sec/batch
Epoch: 14/20...  Training Step: 6102...  Training loss: 1.0993...  0.1181 sec/batch
Epoch: 14/20...  Training Step: 6103...  Training loss: 1.1937...  0.1212 sec/batch
Epoch: 14/20...  Training Step: 6104...  Training loss: 0.8838...  0.1162 sec/batch
Epoch: 14/20...  Training Step: 6105...  Training loss: 1.0027...  0.1207 sec/batch
Epoch: 14/20...  Training Step: 6106...  Training loss: 0.8681...  0.1205 sec/batch
Epoch: 14/20...  Training Step: 6107...  Training loss: 1.1555...  0.1177 sec/batch
Epoch: 14/20...  Training Step: 6108...  Training loss: 0.9221...  0.1210 sec/batch
Epoch: 14/20...  Training Step: 6109...  Training loss: 0.9617...  0.1170 sec/batch
Epoch: 14/20...  Training Step: 6110...  Training loss: 1.0764...  0.1166 sec/batch
Epoch: 14/20...  Training Step: 6111...  Training loss: 1.1041...  0.1184 sec/batch
Epoch: 14/20...  Training Step: 6112...  Training loss: 0.8907...  0.1179 sec/batch
Epoch: 14/20...  Training Step: 6113...  Training loss: 1.1636...  0.1211 sec/batch
Epoch: 14/20...  Training Step: 6114...  Training loss: 1.0236...  0.1159 sec/batch
Epoch: 14/20...  Training Step: 6115...  Training loss: 0.9199...  0.1151 sec/batch
Epoch: 14/20...  Training Step: 6116...  Training loss: 1.0834...  0.1130 sec/batch
Epoch: 14/20...  Training Step: 6117...  Training loss: 0.9698...  0.1138 sec/batch
Epoch: 14/20...  Training Step: 6118...  Training loss: 1.2158...  0.1192 sec/batch
Epoch: 14/20...  Training Step: 6119...  Training loss: 0.8816...  0.1087 sec/batch
Epoch: 14/20...  Training Step: 6120...  Training loss: 1.2221...  0.1152 sec/batch
Epoch: 14/20...  Training Step: 6121...  Training loss: 1.1928...  0.1118 sec/batch
Epoch: 14/20...  Training Step: 6122...  Training loss: 1.0258...  0.1153 sec/batch
Epoch: 14/20...  Training Step: 6123...  Training loss: 1.1122...  0.1162 sec/batch
Epoch: 14/20...  Training Step: 6124...  Training loss: 1.1394...  0.1195 sec/batch
Epoch: 14/20...  Training Step: 6125...  Training loss: 0.8936...  0.1224 sec/batch
Epoch: 14/20...  Training Step: 6126...  Training loss: 1.2011...  0.1167 sec/batch
Epoch: 14/20...  Training Step: 6127...  Training loss: 1.0308...  0.1159 sec/batch
Epoch: 14/20...  Training Step: 6128...  Training loss: 1.0308...  0.1174 sec/batch
Epoch: 14/20...  Training Step: 6129...  Training loss: 1.0679...  0.1159 sec/batch
Epoch: 14/20...  Training Step: 6130...  Training loss: 1.0780...  0.1175 sec/batch
Epoch: 14/20...  Training Step: 6131...  Training loss: 1.0733...  0.1178 sec/batch
Epoch: 14/20...  Training Step: 6132...  Training loss: 1.0537...  0.1163 sec/batch
Epoch: 14/20...  Training Step: 6133...  Training loss: 1.1148...  0.1210 sec/batch
Epoch: 14/20...  Training Step: 6134...  Training loss: 1.1783...  0.1220 sec/batch
Epoch: 14/20...  Training Step: 6135...  Training loss: 1.1921...  0.1192 sec/batch
Epoch: 14/20...  Training Step: 6136...  Training loss: 1.1196...  0.1178 sec/batch
Epoch: 14/20...  Training Step: 6137...  Training loss: 1.2029...  0.1150 sec/batch
Epoch: 14/20...  Training Step: 6138...  Training loss: 1.0645...  0.1160 sec/batch
Epoch: 14/20...  Training Step: 6139...  Training loss: 0.9903...  0.1213 sec/batch
Epoch: 14/20...  Training Step: 6140...  Training loss: 1.1413...  0.1199 sec/batch
Epoch: 14/20...  Training Step: 6141...  Training loss: 1.0815...  0.1171 sec/batch
Epoch: 14/20...  Training Step: 6142...  Training loss: 0.9366...  0.1187 sec/batch
Epoch: 14/20...  Training Step: 6143...  Training loss: 0.9656...  0.1200 sec/batch
Epoch: 14/20...  Training Step: 6144...  Training loss: 0.9758...  0.1143 sec/batch
Epoch: 14/20...  Training Step: 6145...  Training loss: 1.0252...  0.1176 sec/batch
Epoch: 14/20...  Training Step: 6146...  Training loss: 1.2684...  0.1213 sec/batch
Epoch: 14/20...  Training Step: 6147...  Training loss: 1.1370...  0.1188 sec/batch
Epoch: 14/20...  Training Step: 6148...  Training loss: 0.9744...  0.1174 sec/batch
Epoch: 14/20...  Training Step: 6149...  Training loss: 1.0066...  0.1210 sec/batch
Epoch: 14/20...  Training Step: 6150...  Training loss: 0.9685...  0.1192 sec/batch
Epoch: 14/20...  Training Step: 6151...  Training loss: 1.0640...  0.1181 sec/batch
Epoch: 14/20...  Training Step: 6152...  Training loss: 0.9032...  0.1203 sec/batch
Epoch: 14/20...  Training Step: 6153...  Training loss: 1.1268...  0.1213 sec/batch
Epoch: 14/20...  Training Step: 6154...  Training loss: 1.1569...  0.1163 sec/batch
Epoch: 14/20...  Training Step: 6155...  Training loss: 1.0851...  0.1180 sec/batch
Epoch: 14/20...  Training Step: 6156...  Training loss: 1.0266...  0.1135 sec/batch
Epoch: 14/20...  Training Step: 6157...  Training loss: 1.0239...  0.1167 sec/batch
Epoch: 14/20...  Training Step: 6158...  Training loss: 0.9686...  0.1203 sec/batch
Epoch: 14/20...  Training Step: 6159...  Training loss: 1.0969...  0.1184 sec/batch
Epoch: 14/20...  Training Step: 6160...  Training loss: 1.1447...  0.1172 sec/batch
Epoch: 14/20...  Training Step: 6161...  Training loss: 1.0187...  0.1186 sec/batch
Epoch: 14/20...  Training Step: 6162...  Training loss: 1.0395...  0.1216 sec/batch
Epoch: 14/20...  Training Step: 6163...  Training loss: 1.0642...  0.1147 sec/batch
Epoch: 14/20...  Training Step: 6164...  Training loss: 1.0620...  0.1139 sec/batch
Epoch: 14/20...  Training Step: 6165...  Training loss: 0.9606...  0.1188 sec/batch
Epoch: 14/20...  Training Step: 6166...  Training loss: 1.1632...  0.1213 sec/batch
Epoch: 14/20...  Training Step: 6167...  Training loss: 1.0257...  0.1152 sec/batch
Epoch: 14/20...  Training Step: 6168...  Training loss: 1.0209...  0.1211 sec/batch
Epoch: 14/20...  Training Step: 6169...  Training loss: 0.9277...  0.1185 sec/batch
Epoch: 14/20...  Training Step: 6170...  Training loss: 1.0728...  0.1188 sec/batch
Epoch: 14/20...  Training Step: 6171...  Training loss: 1.0634...  0.1155 sec/batch
Epoch: 14/20...  Training Step: 6172...  Training loss: 0.9896...  0.1169 sec/batch
Epoch: 14/20...  Training Step: 6173...  Training loss: 0.8330...  0.1174 sec/batch
Epoch: 14/20...  Training Step: 6174...  Training loss: 0.9357...  0.1235 sec/batch
Epoch: 14/20...  Training Step: 6175...  Training loss: 1.0683...  0.1239 sec/batch
Epoch: 14/20...  Training Step: 6176...  Training loss: 1.1600...  0.1229 sec/batch
Epoch: 14/20...  Training Step: 6177...  Training loss: 0.9302...  0.1141 sec/batch
Epoch: 14/20...  Training Step: 6178...  Training loss: 0.9992...  0.1108 sec/batch
Epoch: 14/20...  Training Step: 6179...  Training loss: 1.0094...  0.1130 sec/batch
Epoch: 14/20...  Training Step: 6180...  Training loss: 0.9302...  0.1159 sec/batch
Epoch: 14/20...  Training Step: 6181...  Training loss: 1.0559...  0.1161 sec/batch
Epoch: 14/20...  Training Step: 6182...  Training loss: 1.0481...  0.1181 sec/batch
Epoch: 14/20...  Training Step: 6183...  Training loss: 0.9837...  0.1199 sec/batch
Epoch: 14/20...  Training Step: 6184...  Training loss: 1.0992...  0.1191 sec/batch
Epoch: 14/20...  Training Step: 6185...  Training loss: 1.1908...  0.1204 sec/batch
Epoch: 14/20...  Training Step: 6186...  Training loss: 1.0263...  0.1171 sec/batch
Epoch: 14/20...  Training Step: 6187...  Training loss: 1.0046...  0.1195 sec/batch
Epoch: 14/20...  Training Step: 6188...  Training loss: 0.9674...  0.1200 sec/batch
Epoch: 14/20...  Training Step: 6189...  Training loss: 1.0302...  0.1185 sec/batch
Epoch: 14/20...  Training Step: 6190...  Training loss: 0.9790...  0.1168 sec/batch
Epoch: 14/20...  Training Step: 6191...  Training loss: 0.9789...  0.1187 sec/batch
Epoch: 14/20...  Training Step: 6192...  Training loss: 1.0191...  0.1227 sec/batch
Epoch: 14/20...  Training Step: 6193...  Training loss: 1.0524...  0.1194 sec/batch
Epoch: 14/20...  Training Step: 6194...  Training loss: 0.9542...  0.1199 sec/batch
Epoch: 14/20...  Training Step: 6195...  Training loss: 1.2836...  0.1204 sec/batch
Epoch: 14/20...  Training Step: 6196...  Training loss: 0.9629...  0.1195 sec/batch
Epoch: 14/20...  Training Step: 6197...  Training loss: 0.9843...  0.1209 sec/batch
Epoch: 14/20...  Training Step: 6198...  Training loss: 1.0029...  0.1150 sec/batch
Epoch: 14/20...  Training Step: 6199...  Training loss: 1.0015...  0.1198 sec/batch
Epoch: 14/20...  Training Step: 6200...  Training loss: 1.2660...  0.1184 sec/batch
Epoch: 14/20...  Training Step: 6201...  Training loss: 1.0076...  0.1195 sec/batch
Epoch: 14/20...  Training Step: 6202...  Training loss: 1.1125...  0.1159 sec/batch
Epoch: 14/20...  Training Step: 6203...  Training loss: 1.1167...  0.1170 sec/batch
Epoch: 14/20...  Training Step: 6204...  Training loss: 1.0816...  0.1156 sec/batch
Epoch: 14/20...  Training Step: 6205...  Training loss: 0.8679...  0.1208 sec/batch
Epoch: 14/20...  Training Step: 6206...  Training loss: 1.0603...  0.1167 sec/batch
Epoch: 14/20...  Training Step: 6207...  Training loss: 1.0702...  0.1218 sec/batch
Epoch: 14/20...  Training Step: 6208...  Training loss: 0.9660...  0.1140 sec/batch
Epoch: 14/20...  Training Step: 6209...  Training loss: 0.9270...  0.1191 sec/batch
Epoch: 14/20...  Training Step: 6210...  Training loss: 1.2484...  0.1127 sec/batch
Epoch: 14/20...  Training Step: 6211...  Training loss: 0.9675...  0.1150 sec/batch
Epoch: 14/20...  Training Step: 6212...  Training loss: 1.0959...  0.1187 sec/batch
Epoch: 14/20...  Training Step: 6213...  Training loss: 0.8406...  0.1153 sec/batch
Epoch: 14/20...  Training Step: 6214...  Training loss: 1.2503...  0.1206 sec/batch
Epoch: 14/20...  Training Step: 6215...  Training loss: 1.1344...  0.1197 sec/batch
Epoch: 14/20...  Training Step: 6216...  Training loss: 0.9361...  0.1203 sec/batch
Epoch: 14/20...  Training Step: 6217...  Training loss: 1.0893...  0.1223 sec/batch
Epoch: 14/20...  Training Step: 6218...  Training loss: 1.0839...  0.1162 sec/batch
Epoch: 14/20...  Training Step: 6219...  Training loss: 1.1129...  0.1154 sec/batch
Epoch: 14/20...  Training Step: 6220...  Training loss: 0.9389...  0.1139 sec/batch
Epoch: 14/20...  Training Step: 6221...  Training loss: 1.1668...  0.1156 sec/batch
Epoch: 14/20...  Training Step: 6222...  Training loss: 0.9710...  0.1195 sec/batch
Epoch: 14/20...  Training Step: 6223...  Training loss: 0.9363...  0.1189 sec/batch
Epoch: 14/20...  Training Step: 6224...  Training loss: 1.0183...  0.1203 sec/batch
Epoch: 14/20...  Training Step: 6225...  Training loss: 1.0922...  0.1193 sec/batch
Epoch: 14/20...  Training Step: 6226...  Training loss: 1.0419...  0.1165 sec/batch
Epoch: 14/20...  Training Step: 6227...  Training loss: 1.0978...  0.1178 sec/batch
Epoch: 14/20...  Training Step: 6228...  Training loss: 0.9899...  0.1179 sec/batch
Epoch: 14/20...  Training Step: 6229...  Training loss: 1.0524...  0.1184 sec/batch
Epoch: 14/20...  Training Step: 6230...  Training loss: 1.1951...  0.1189 sec/batch
Epoch: 14/20...  Training Step: 6231...  Training loss: 0.7932...  0.1188 sec/batch
Epoch: 14/20...  Training Step: 6232...  Training loss: 1.0639...  0.1161 sec/batch
Epoch: 14/20...  Training Step: 6233...  Training loss: 1.0269...  0.1168 sec/batch
Epoch: 14/20...  Training Step: 6234...  Training loss: 1.2400...  0.1179 sec/batch
Epoch: 14/20...  Training Step: 6235...  Training loss: 1.0549...  0.1171 sec/batch
Epoch: 14/20...  Training Step: 6236...  Training loss: 1.2438...  0.1163 sec/batch
Epoch: 14/20...  Training Step: 6237...  Training loss: 1.0145...  0.1249 sec/batch
Epoch: 14/20...  Training Step: 6238...  Training loss: 0.9624...  0.1336 sec/batch
Epoch: 14/20...  Training Step: 6239...  Training loss: 1.0088...  0.1172 sec/batch
Epoch: 14/20...  Training Step: 6240...  Training loss: 1.0412...  0.1144 sec/batch
Epoch: 14/20...  Training Step: 6241...  Training loss: 1.1553...  0.1177 sec/batch
Epoch: 14/20...  Training Step: 6242...  Training loss: 0.8003...  0.1202 sec/batch
Epoch: 14/20...  Training Step: 6243...  Training loss: 1.0273...  0.1169 sec/batch
Epoch: 14/20...  Training Step: 6244...  Training loss: 1.0985...  0.1212 sec/batch
Epoch: 14/20...  Training Step: 6245...  Training loss: 1.0781...  0.1203 sec/batch
Epoch: 14/20...  Training Step: 6246...  Training loss: 1.0827...  0.1215 sec/batch
Epoch: 14/20...  Training Step: 6247...  Training loss: 1.1260...  0.1295 sec/batch
Epoch: 14/20...  Training Step: 6248...  Training loss: 0.9234...  0.1228 sec/batch
Epoch: 14/20...  Training Step: 6249...  Training loss: 1.1417...  0.1244 sec/batch
Epoch: 14/20...  Training Step: 6250...  Training loss: 1.1762...  0.1239 sec/batch
Epoch: 14/20...  Training Step: 6251...  Training loss: 1.2129...  0.1265 sec/batch
Epoch: 14/20...  Training Step: 6252...  Training loss: 1.1257...  0.1361 sec/batch
Epoch: 14/20...  Training Step: 6253...  Training loss: 0.8510...  0.1369 sec/batch
Epoch: 14/20...  Training Step: 6254...  Training loss: 1.3085...  0.1266 sec/batch
Epoch: 14/20...  Training Step: 6255...  Training loss: 1.0438...  0.1361 sec/batch
Epoch: 14/20...  Training Step: 6256...  Training loss: 1.2506...  0.1340 sec/batch
Epoch: 14/20...  Training Step: 6257...  Training loss: 1.1083...  0.1230 sec/batch
Epoch: 14/20...  Training Step: 6258...  Training loss: 1.1652...  0.1295 sec/batch
Epoch: 14/20...  Training Step: 6259...  Training loss: 1.3196...  0.1238 sec/batch
Epoch: 14/20...  Training Step: 6260...  Training loss: 1.0479...  0.1192 sec/batch
Epoch: 14/20...  Training Step: 6261...  Training loss: 1.1389...  0.1175 sec/batch
Epoch: 14/20...  Training Step: 6262...  Training loss: 0.9657...  0.1145 sec/batch
Epoch: 14/20...  Training Step: 6263...  Training loss: 1.1744...  0.1226 sec/batch
Epoch: 14/20...  Training Step: 6264...  Training loss: 1.1350...  0.1188 sec/batch
Epoch: 14/20...  Training Step: 6265...  Training loss: 1.2018...  0.1206 sec/batch
Epoch: 14/20...  Training Step: 6266...  Training loss: 1.1414...  0.1369 sec/batch
Epoch: 14/20...  Training Step: 6267...  Training loss: 1.2602...  0.1540 sec/batch
Epoch: 14/20...  Training Step: 6268...  Training loss: 1.0825...  0.1195 sec/batch
Epoch: 14/20...  Training Step: 6269...  Training loss: 1.1693...  0.1331 sec/batch
Epoch: 14/20...  Training Step: 6270...  Training loss: 0.9710...  0.1227 sec/batch
Epoch: 14/20...  Training Step: 6271...  Training loss: 1.0783...  0.1174 sec/batch
Epoch: 14/20...  Training Step: 6272...  Training loss: 1.2176...  0.1224 sec/batch
Epoch: 14/20...  Training Step: 6273...  Training loss: 1.1114...  0.1201 sec/batch
Epoch: 14/20...  Training Step: 6274...  Training loss: 1.0065...  0.1206 sec/batch
Epoch: 14/20...  Training Step: 6275...  Training loss: 1.1669...  0.1158 sec/batch
Epoch: 14/20...  Training Step: 6276...  Training loss: 1.0916...  0.1188 sec/batch
Epoch: 14/20...  Training Step: 6277...  Training loss: 1.0807...  0.1212 sec/batch
Epoch: 14/20...  Training Step: 6278...  Training loss: 0.9392...  0.1191 sec/batch
Epoch: 14/20...  Training Step: 6279...  Training loss: 1.1110...  0.1176 sec/batch
Epoch: 14/20...  Training Step: 6280...  Training loss: 1.1069...  0.1156 sec/batch
Epoch: 14/20...  Training Step: 6281...  Training loss: 1.0992...  0.1165 sec/batch
Epoch: 14/20...  Training Step: 6282...  Training loss: 1.1012...  0.1182 sec/batch
Epoch: 14/20...  Training Step: 6283...  Training loss: 1.0689...  0.1211 sec/batch
Epoch: 14/20...  Training Step: 6284...  Training loss: 1.0458...  0.1186 sec/batch
Epoch: 14/20...  Training Step: 6285...  Training loss: 1.0230...  0.1206 sec/batch
Epoch: 14/20...  Training Step: 6286...  Training loss: 1.0363...  0.1156 sec/batch
Epoch: 14/20...  Training Step: 6287...  Training loss: 1.0226...  0.1220 sec/batch
Epoch: 14/20...  Training Step: 6288...  Training loss: 1.1075...  0.1196 sec/batch
Epoch: 14/20...  Training Step: 6289...  Training loss: 1.1400...  0.1209 sec/batch
Epoch: 14/20...  Training Step: 6290...  Training loss: 0.9389...  0.1185 sec/batch
Epoch: 14/20...  Training Step: 6291...  Training loss: 0.9802...  0.1171 sec/batch
Epoch: 14/20...  Training Step: 6292...  Training loss: 1.0128...  0.1190 sec/batch
Epoch: 14/20...  Training Step: 6293...  Training loss: 1.0792...  0.1157 sec/batch
Epoch: 14/20...  Training Step: 6294...  Training loss: 1.1787...  0.1203 sec/batch
Epoch: 14/20...  Training Step: 6295...  Training loss: 0.9909...  0.1162 sec/batch
Epoch: 14/20...  Training Step: 6296...  Training loss: 1.1521...  0.1149 sec/batch
Epoch: 14/20...  Training Step: 6297...  Training loss: 1.0679...  0.1141 sec/batch
Epoch: 14/20...  Training Step: 6298...  Training loss: 0.9963...  0.1187 sec/batch
Epoch: 14/20...  Training Step: 6299...  Training loss: 1.2207...  0.1210 sec/batch
Epoch: 14/20...  Training Step: 6300...  Training loss: 1.1573...  0.1110 sec/batch
Epoch: 14/20...  Training Step: 6301...  Training loss: 1.1256...  0.1201 sec/batch
Epoch: 14/20...  Training Step: 6302...  Training loss: 1.0474...  0.1228 sec/batch
Epoch: 14/20...  Training Step: 6303...  Training loss: 1.0965...  0.1270 sec/batch
Epoch: 14/20...  Training Step: 6304...  Training loss: 1.1614...  0.1226 sec/batch
Epoch: 14/20...  Training Step: 6305...  Training loss: 1.2317...  0.1272 sec/batch
Epoch: 14/20...  Training Step: 6306...  Training loss: 1.1865...  0.1188 sec/batch
Epoch: 14/20...  Training Step: 6307...  Training loss: 1.0834...  0.1234 sec/batch
Epoch: 14/20...  Training Step: 6308...  Training loss: 1.0725...  0.1169 sec/batch
Epoch: 14/20...  Training Step: 6309...  Training loss: 0.9982...  0.1180 sec/batch
Epoch: 14/20...  Training Step: 6310...  Training loss: 1.3003...  0.1265 sec/batch
Epoch: 14/20...  Training Step: 6311...  Training loss: 1.1485...  0.1182 sec/batch
Epoch: 14/20...  Training Step: 6312...  Training loss: 1.0416...  0.1214 sec/batch
Epoch: 14/20...  Training Step: 6313...  Training loss: 1.0824...  0.1286 sec/batch
Epoch: 14/20...  Training Step: 6314...  Training loss: 1.0028...  0.1185 sec/batch
Epoch: 14/20...  Training Step: 6315...  Training loss: 0.9683...  0.1183 sec/batch
Epoch: 14/20...  Training Step: 6316...  Training loss: 1.0979...  0.1181 sec/batch
Epoch: 14/20...  Training Step: 6317...  Training loss: 0.9491...  0.1171 sec/batch
Epoch: 14/20...  Training Step: 6318...  Training loss: 0.9500...  0.1168 sec/batch
Epoch: 14/20...  Training Step: 6319...  Training loss: 1.0420...  0.1141 sec/batch
Epoch: 14/20...  Training Step: 6320...  Training loss: 1.0673...  0.1160 sec/batch
Epoch: 14/20...  Training Step: 6321...  Training loss: 1.1374...  0.1170 sec/batch
Epoch: 14/20...  Training Step: 6322...  Training loss: 1.1389...  0.1156 sec/batch
Epoch: 14/20...  Training Step: 6323...  Training loss: 1.0871...  0.1173 sec/batch
Epoch: 14/20...  Training Step: 6324...  Training loss: 1.0488...  0.1183 sec/batch
Epoch: 14/20...  Training Step: 6325...  Training loss: 0.9894...  0.1177 sec/batch
Epoch: 14/20...  Training Step: 6326...  Training loss: 1.1681...  0.1194 sec/batch
Epoch: 14/20...  Training Step: 6327...  Training loss: 1.1304...  0.1202 sec/batch
Epoch: 14/20...  Training Step: 6328...  Training loss: 1.3254...  0.1159 sec/batch
Epoch: 14/20...  Training Step: 6329...  Training loss: 0.9449...  0.1183 sec/batch
Epoch: 14/20...  Training Step: 6330...  Training loss: 1.0695...  0.1198 sec/batch
Epoch: 14/20...  Training Step: 6331...  Training loss: 0.9940...  0.1193 sec/batch
Epoch: 14/20...  Training Step: 6332...  Training loss: 1.1792...  0.1185 sec/batch
Epoch: 14/20...  Training Step: 6333...  Training loss: 1.1351...  0.1210 sec/batch
Epoch: 14/20...  Training Step: 6334...  Training loss: 0.9843...  0.1154 sec/batch
Epoch: 14/20...  Training Step: 6335...  Training loss: 0.8815...  0.1139 sec/batch
Epoch: 14/20...  Training Step: 6336...  Training loss: 1.3051...  0.1193 sec/batch
Epoch: 14/20...  Training Step: 6337...  Training loss: 1.0715...  0.1216 sec/batch
Epoch: 14/20...  Training Step: 6338...  Training loss: 1.1297...  0.1175 sec/batch
Epoch: 14/20...  Training Step: 6339...  Training loss: 1.1352...  0.1190 sec/batch
Epoch: 14/20...  Training Step: 6340...  Training loss: 1.3274...  0.1156 sec/batch
Epoch: 14/20...  Training Step: 6341...  Training loss: 0.9987...  0.1173 sec/batch
Epoch: 14/20...  Training Step: 6342...  Training loss: 1.1374...  0.1133 sec/batch
Epoch: 14/20...  Training Step: 6343...  Training loss: 1.0737...  0.1113 sec/batch
Epoch: 14/20...  Training Step: 6344...  Training loss: 0.9686...  0.1192 sec/batch
Epoch: 14/20...  Training Step: 6345...  Training loss: 0.9744...  0.1171 sec/batch
Epoch: 14/20...  Training Step: 6346...  Training loss: 0.9467...  0.1189 sec/batch
Epoch: 14/20...  Training Step: 6347...  Training loss: 0.9367...  0.1210 sec/batch
Epoch: 14/20...  Training Step: 6348...  Training loss: 0.9322...  0.1279 sec/batch
Epoch: 14/20...  Training Step: 6349...  Training loss: 0.9013...  0.1295 sec/batch
Epoch: 14/20...  Training Step: 6350...  Training loss: 0.8782...  0.1256 sec/batch
Epoch: 14/20...  Training Step: 6351...  Training loss: 1.1037...  0.1288 sec/batch
Epoch: 14/20...  Training Step: 6352...  Training loss: 0.9403...  0.1248 sec/batch
Epoch: 14/20...  Training Step: 6353...  Training loss: 0.9096...  0.1154 sec/batch
Epoch: 14/20...  Training Step: 6354...  Training loss: 1.2227...  0.1192 sec/batch
Epoch: 14/20...  Training Step: 6355...  Training loss: 0.9494...  0.1189 sec/batch
Epoch: 14/20...  Training Step: 6356...  Training loss: 1.0220...  0.1148 sec/batch
Epoch: 14/20...  Training Step: 6357...  Training loss: 0.8881...  0.1171 sec/batch
Epoch: 14/20...  Training Step: 6358...  Training loss: 0.7877...  0.1223 sec/batch
Epoch: 14/20...  Training Step: 6359...  Training loss: 0.9653...  0.1203 sec/batch
Epoch: 14/20...  Training Step: 6360...  Training loss: 0.8959...  0.1184 sec/batch
Epoch: 14/20...  Training Step: 6361...  Training loss: 1.0951...  0.1174 sec/batch
Epoch: 14/20...  Training Step: 6362...  Training loss: 1.1210...  0.1150 sec/batch
Epoch: 14/20...  Training Step: 6363...  Training loss: 1.0734...  0.1207 sec/batch
Epoch: 14/20...  Training Step: 6364...  Training loss: 1.0055...  0.1146 sec/batch
Epoch: 14/20...  Training Step: 6365...  Training loss: 0.8983...  0.1206 sec/batch
Epoch: 14/20...  Training Step: 6366...  Training loss: 1.0385...  0.1159 sec/batch
Epoch: 14/20...  Training Step: 6367...  Training loss: 1.0853...  0.1182 sec/batch
Epoch: 14/20...  Training Step: 6368...  Training loss: 0.9644...  0.1174 sec/batch
Epoch: 14/20...  Training Step: 6369...  Training loss: 0.9516...  0.1164 sec/batch
Epoch: 14/20...  Training Step: 6370...  Training loss: 0.8981...  0.1115 sec/batch
Epoch: 14/20...  Training Step: 6371...  Training loss: 1.0400...  0.1211 sec/batch
Epoch: 14/20...  Training Step: 6372...  Training loss: 0.9828...  0.1184 sec/batch
Epoch: 14/20...  Training Step: 6373...  Training loss: 0.9521...  0.1190 sec/batch
Epoch: 14/20...  Training Step: 6374...  Training loss: 1.0644...  0.1169 sec/batch
Epoch: 14/20...  Training Step: 6375...  Training loss: 0.8286...  0.1205 sec/batch
Epoch: 14/20...  Training Step: 6376...  Training loss: 1.1601...  0.1130 sec/batch
Epoch: 14/20...  Training Step: 6377...  Training loss: 0.9789...  0.1157 sec/batch
Epoch: 14/20...  Training Step: 6378...  Training loss: 0.8578...  0.1205 sec/batch
Epoch: 14/20...  Training Step: 6379...  Training loss: 0.9312...  0.1196 sec/batch
Epoch: 14/20...  Training Step: 6380...  Training loss: 1.2524...  0.1203 sec/batch
Epoch: 14/20...  Training Step: 6381...  Training loss: 0.9438...  0.1159 sec/batch
Epoch: 14/20...  Training Step: 6382...  Training loss: 1.0497...  0.1208 sec/batch
Epoch: 14/20...  Training Step: 6383...  Training loss: 0.9257...  0.1214 sec/batch
Epoch: 14/20...  Training Step: 6384...  Training loss: 1.0236...  0.1141 sec/batch
Epoch: 14/20...  Training Step: 6385...  Training loss: 0.8407...  0.1218 sec/batch
Epoch: 14/20...  Training Step: 6386...  Training loss: 0.8575...  0.1169 sec/batch
Epoch: 14/20...  Training Step: 6387...  Training loss: 1.0574...  0.1174 sec/batch
Epoch: 14/20...  Training Step: 6388...  Training loss: 1.0353...  0.1215 sec/batch
Epoch: 14/20...  Training Step: 6389...  Training loss: 0.8543...  0.1173 sec/batch
Epoch: 14/20...  Training Step: 6390...  Training loss: 1.0560...  0.1161 sec/batch
Epoch: 14/20...  Training Step: 6391...  Training loss: 1.2402...  0.1232 sec/batch
Epoch: 14/20...  Training Step: 6392...  Training loss: 0.8486...  0.1186 sec/batch
Epoch: 14/20...  Training Step: 6393...  Training loss: 1.0563...  0.1139 sec/batch
Epoch: 14/20...  Training Step: 6394...  Training loss: 1.1088...  0.1153 sec/batch
Epoch: 14/20...  Training Step: 6395...  Training loss: 0.9646...  0.1162 sec/batch
Epoch: 14/20...  Training Step: 6396...  Training loss: 0.9813...  0.1224 sec/batch
Epoch: 14/20...  Training Step: 6397...  Training loss: 0.9595...  0.1224 sec/batch
Epoch: 14/20...  Training Step: 6398...  Training loss: 1.0756...  0.1174 sec/batch
Epoch: 14/20...  Training Step: 6399...  Training loss: 0.9937...  0.1155 sec/batch
Epoch: 14/20...  Training Step: 6400...  Training loss: 1.0924...  0.1140 sec/batch
Epoch: 14/20...  Training Step: 6401...  Training loss: 1.0441...  0.1198 sec/batch
Epoch: 14/20...  Training Step: 6402...  Training loss: 1.1455...  0.1207 sec/batch
Epoch: 14/20...  Training Step: 6403...  Training loss: 0.9106...  0.1172 sec/batch
Epoch: 14/20...  Training Step: 6404...  Training loss: 1.0306...  0.1218 sec/batch
Epoch: 14/20...  Training Step: 6405...  Training loss: 0.9194...  0.1195 sec/batch
Epoch: 14/20...  Training Step: 6406...  Training loss: 1.0911...  0.1306 sec/batch
Epoch: 14/20...  Training Step: 6407...  Training loss: 1.0538...  0.1203 sec/batch
Epoch: 14/20...  Training Step: 6408...  Training loss: 1.0753...  0.1147 sec/batch
Epoch: 14/20...  Training Step: 6409...  Training loss: 1.1767...  0.1156 sec/batch
Epoch: 14/20...  Training Step: 6410...  Training loss: 1.0190...  0.1141 sec/batch
Epoch: 14/20...  Training Step: 6411...  Training loss: 1.1205...  0.1161 sec/batch
Epoch: 14/20...  Training Step: 6412...  Training loss: 1.0595...  0.1207 sec/batch
Epoch: 14/20...  Training Step: 6413...  Training loss: 0.9372...  0.1201 sec/batch
Epoch: 14/20...  Training Step: 6414...  Training loss: 0.9471...  0.1190 sec/batch
Epoch: 14/20...  Training Step: 6415...  Training loss: 0.9019...  0.1211 sec/batch
Epoch: 14/20...  Training Step: 6416...  Training loss: 0.9762...  0.1172 sec/batch
Epoch: 14/20...  Training Step: 6417...  Training loss: 0.9473...  0.1170 sec/batch
Epoch: 14/20...  Training Step: 6418...  Training loss: 0.9947...  0.1159 sec/batch
Epoch: 14/20...  Training Step: 6419...  Training loss: 1.0144...  0.1163 sec/batch
Epoch: 14/20...  Training Step: 6420...  Training loss: 1.0289...  0.1140 sec/batch
Epoch: 14/20...  Training Step: 6421...  Training loss: 0.8719...  0.1179 sec/batch
Epoch: 14/20...  Training Step: 6422...  Training loss: 0.9312...  0.1203 sec/batch
Epoch: 14/20...  Training Step: 6423...  Training loss: 1.0465...  0.1202 sec/batch
Epoch: 14/20...  Training Step: 6424...  Training loss: 0.9681...  0.1165 sec/batch
Epoch: 14/20...  Training Step: 6425...  Training loss: 1.1075...  0.1168 sec/batch
Epoch: 14/20...  Training Step: 6426...  Training loss: 1.0145...  0.1160 sec/batch
Epoch: 14/20...  Training Step: 6427...  Training loss: 0.9441...  0.1116 sec/batch
Epoch: 14/20...  Training Step: 6428...  Training loss: 1.1169...  0.1164 sec/batch
Epoch: 14/20...  Training Step: 6429...  Training loss: 0.9344...  0.1209 sec/batch
Epoch: 14/20...  Training Step: 6430...  Training loss: 0.9738...  0.1186 sec/batch
Epoch: 14/20...  Training Step: 6431...  Training loss: 1.0245...  0.1187 sec/batch
Epoch: 14/20...  Training Step: 6432...  Training loss: 1.1152...  0.1128 sec/batch
Epoch: 14/20...  Training Step: 6433...  Training loss: 0.9822...  0.1116 sec/batch
Epoch: 14/20...  Training Step: 6434...  Training loss: 1.0283...  0.1190 sec/batch
Epoch: 14/20...  Training Step: 6435...  Training loss: 0.9509...  0.1182 sec/batch
Epoch: 14/20...  Training Step: 6436...  Training loss: 1.0086...  0.1155 sec/batch
Epoch: 14/20...  Training Step: 6437...  Training loss: 1.0257...  0.1265 sec/batch
Epoch: 14/20...  Training Step: 6438...  Training loss: 1.2245...  0.1299 sec/batch
Epoch: 14/20...  Training Step: 6439...  Training loss: 0.9468...  0.1213 sec/batch
Epoch: 14/20...  Training Step: 6440...  Training loss: 1.3210...  0.1163 sec/batch
Epoch: 14/20...  Training Step: 6441...  Training loss: 0.8813...  0.1141 sec/batch
Epoch: 14/20...  Training Step: 6442...  Training loss: 0.9792...  0.1170 sec/batch
Epoch: 14/20...  Training Step: 6443...  Training loss: 0.9356...  0.1190 sec/batch
Epoch: 14/20...  Training Step: 6444...  Training loss: 1.0518...  0.1205 sec/batch
Epoch: 14/20...  Training Step: 6445...  Training loss: 1.0451...  0.1184 sec/batch
Epoch: 14/20...  Training Step: 6446...  Training loss: 1.0324...  0.1154 sec/batch
Epoch: 14/20...  Training Step: 6447...  Training loss: 1.0358...  0.1200 sec/batch
Epoch: 14/20...  Training Step: 6448...  Training loss: 1.1485...  0.1135 sec/batch
Epoch: 14/20...  Training Step: 6449...  Training loss: 1.0353...  0.1158 sec/batch
Epoch: 14/20...  Training Step: 6450...  Training loss: 0.9186...  0.1163 sec/batch
Epoch: 14/20...  Training Step: 6451...  Training loss: 0.8943...  0.1189 sec/batch
Epoch: 14/20...  Training Step: 6452...  Training loss: 0.8637...  0.1210 sec/batch
Epoch: 14/20...  Training Step: 6453...  Training loss: 1.0143...  0.1168 sec/batch
Epoch: 14/20...  Training Step: 6454...  Training loss: 1.0244...  0.1170 sec/batch
Epoch: 14/20...  Training Step: 6455...  Training loss: 0.9745...  0.1160 sec/batch
Epoch: 14/20...  Training Step: 6456...  Training loss: 1.1139...  0.1170 sec/batch
Epoch: 14/20...  Training Step: 6457...  Training loss: 0.9066...  0.1202 sec/batch
Epoch: 14/20...  Training Step: 6458...  Training loss: 1.0449...  0.1115 sec/batch
Epoch: 14/20...  Training Step: 6459...  Training loss: 1.0001...  0.1183 sec/batch
Epoch: 14/20...  Training Step: 6460...  Training loss: 0.9708...  0.1201 sec/batch
Epoch: 14/20...  Training Step: 6461...  Training loss: 0.8630...  0.1193 sec/batch
Epoch: 14/20...  Training Step: 6462...  Training loss: 0.9434...  0.1196 sec/batch
Epoch: 14/20...  Training Step: 6463...  Training loss: 1.1453...  0.1181 sec/batch
Epoch: 14/20...  Training Step: 6464...  Training loss: 1.0092...  0.1140 sec/batch
Epoch: 14/20...  Training Step: 6465...  Training loss: 1.2091...  0.1195 sec/batch
Epoch: 14/20...  Training Step: 6466...  Training loss: 1.1181...  0.1139 sec/batch
Epoch: 14/20...  Training Step: 6467...  Training loss: 0.9178...  0.1216 sec/batch
Epoch: 14/20...  Training Step: 6468...  Training loss: 1.0125...  0.1193 sec/batch
Epoch: 14/20...  Training Step: 6469...  Training loss: 0.9544...  0.1185 sec/batch
Epoch: 14/20...  Training Step: 6470...  Training loss: 1.0264...  0.1214 sec/batch
Epoch: 14/20...  Training Step: 6471...  Training loss: 1.2917...  0.1200 sec/batch
Epoch: 14/20...  Training Step: 6472...  Training loss: 1.1969...  0.1139 sec/batch
Epoch: 14/20...  Training Step: 6473...  Training loss: 0.8943...  0.1211 sec/batch
Epoch: 14/20...  Training Step: 6474...  Training loss: 1.0267...  0.1187 sec/batch
Epoch: 14/20...  Training Step: 6475...  Training loss: 1.0499...  0.1169 sec/batch
Epoch: 14/20...  Training Step: 6476...  Training loss: 1.0705...  0.1145 sec/batch
Epoch: 14/20...  Training Step: 6477...  Training loss: 1.0031...  0.1149 sec/batch
Epoch: 14/20...  Training Step: 6478...  Training loss: 1.0499...  0.1224 sec/batch
Epoch: 14/20...  Training Step: 6479...  Training loss: 1.0875...  0.1177 sec/batch
Epoch: 14/20...  Training Step: 6480...  Training loss: 0.8728...  0.1179 sec/batch
Epoch: 14/20...  Training Step: 6481...  Training loss: 1.0427...  0.1173 sec/batch
Epoch: 14/20...  Training Step: 6482...  Training loss: 0.9039...  0.1182 sec/batch
Epoch: 14/20...  Training Step: 6483...  Training loss: 0.9467...  0.1210 sec/batch
Epoch: 14/20...  Training Step: 6484...  Training loss: 0.9465...  0.1113 sec/batch
Epoch: 14/20...  Training Step: 6485...  Training loss: 0.9940...  0.1174 sec/batch
Epoch: 14/20...  Training Step: 6486...  Training loss: 1.2397...  0.1184 sec/batch
Epoch: 14/20...  Training Step: 6487...  Training loss: 1.1351...  0.1148 sec/batch
Epoch: 14/20...  Training Step: 6488...  Training loss: 0.9120...  0.1192 sec/batch
Epoch: 14/20...  Training Step: 6489...  Training loss: 0.9764...  0.1204 sec/batch
Epoch: 14/20...  Training Step: 6490...  Training loss: 0.8268...  0.1130 sec/batch
Epoch: 14/20...  Training Step: 6491...  Training loss: 0.8747...  0.1220 sec/batch
Epoch: 14/20...  Training Step: 6492...  Training loss: 1.0247...  0.1216 sec/batch
Epoch: 14/20...  Training Step: 6493...  Training loss: 1.1252...  0.1148 sec/batch
Epoch: 14/20...  Training Step: 6494...  Training loss: 0.9685...  0.1164 sec/batch
Epoch: 14/20...  Training Step: 6495...  Training loss: 1.0748...  0.1154 sec/batch
Epoch: 14/20...  Training Step: 6496...  Training loss: 1.0625...  0.1173 sec/batch
Epoch: 15/20...  Training Step: 6497...  Training loss: 1.2148...  0.1206 sec/batch
Epoch: 15/20...  Training Step: 6498...  Training loss: 1.0638...  0.1208 sec/batch
Epoch: 15/20...  Training Step: 6499...  Training loss: 1.0443...  0.1197 sec/batch
Epoch: 15/20...  Training Step: 6500...  Training loss: 1.1011...  0.1173 sec/batch
Epoch: 15/20...  Training Step: 6501...  Training loss: 1.1199...  0.1159 sec/batch
Epoch: 15/20...  Training Step: 6502...  Training loss: 0.9277...  0.1182 sec/batch
Epoch: 15/20...  Training Step: 6503...  Training loss: 1.0633...  0.1217 sec/batch
Epoch: 15/20...  Training Step: 6504...  Training loss: 0.9488...  0.1213 sec/batch
Epoch: 15/20...  Training Step: 6505...  Training loss: 0.9093...  0.1149 sec/batch
Epoch: 15/20...  Training Step: 6506...  Training loss: 1.0097...  0.1163 sec/batch
Epoch: 15/20...  Training Step: 6507...  Training loss: 0.9992...  0.1151 sec/batch
Epoch: 15/20...  Training Step: 6508...  Training loss: 0.9115...  0.1159 sec/batch
Epoch: 15/20...  Training Step: 6509...  Training loss: 1.2983...  0.1181 sec/batch
Epoch: 15/20...  Training Step: 6510...  Training loss: 0.7449...  0.1148 sec/batch
Epoch: 15/20...  Training Step: 6511...  Training loss: 1.1020...  0.1150 sec/batch
Epoch: 15/20...  Training Step: 6512...  Training loss: 1.0742...  0.1173 sec/batch
Epoch: 15/20...  Training Step: 6513...  Training loss: 0.9355...  0.1171 sec/batch
Epoch: 15/20...  Training Step: 6514...  Training loss: 0.9916...  0.1171 sec/batch
Epoch: 15/20...  Training Step: 6515...  Training loss: 1.0735...  0.1197 sec/batch
Epoch: 15/20...  Training Step: 6516...  Training loss: 0.8985...  0.1230 sec/batch
Epoch: 15/20...  Training Step: 6517...  Training loss: 1.1651...  0.1159 sec/batch
Epoch: 15/20...  Training Step: 6518...  Training loss: 0.8727...  0.1177 sec/batch
Epoch: 15/20...  Training Step: 6519...  Training loss: 0.9992...  0.1173 sec/batch
Epoch: 15/20...  Training Step: 6520...  Training loss: 0.9217...  0.1164 sec/batch
Epoch: 15/20...  Training Step: 6521...  Training loss: 0.9802...  0.1205 sec/batch
Epoch: 15/20...  Training Step: 6522...  Training loss: 1.0753...  0.1152 sec/batch
Epoch: 15/20...  Training Step: 6523...  Training loss: 1.0089...  0.1140 sec/batch
Epoch: 15/20...  Training Step: 6524...  Training loss: 0.9454...  0.1169 sec/batch
Epoch: 15/20...  Training Step: 6525...  Training loss: 0.9372...  0.1179 sec/batch
Epoch: 15/20...  Training Step: 6526...  Training loss: 1.0956...  0.1136 sec/batch
Epoch: 15/20...  Training Step: 6527...  Training loss: 0.9237...  0.1130 sec/batch
Epoch: 15/20...  Training Step: 6528...  Training loss: 0.9975...  0.1167 sec/batch
Epoch: 15/20...  Training Step: 6529...  Training loss: 0.8425...  0.1150 sec/batch
Epoch: 15/20...  Training Step: 6530...  Training loss: 0.9580...  0.1186 sec/batch
Epoch: 15/20...  Training Step: 6531...  Training loss: 0.9338...  0.1235 sec/batch
Epoch: 15/20...  Training Step: 6532...  Training loss: 1.0316...  0.1198 sec/batch
Epoch: 15/20...  Training Step: 6533...  Training loss: 0.9630...  0.1183 sec/batch
Epoch: 15/20...  Training Step: 6534...  Training loss: 0.9514...  0.1199 sec/batch
Epoch: 15/20...  Training Step: 6535...  Training loss: 0.9280...  0.1181 sec/batch
Epoch: 15/20...  Training Step: 6536...  Training loss: 1.1863...  0.1175 sec/batch
Epoch: 15/20...  Training Step: 6537...  Training loss: 0.9145...  0.1201 sec/batch
Epoch: 15/20...  Training Step: 6538...  Training loss: 0.9129...  0.1169 sec/batch
Epoch: 15/20...  Training Step: 6539...  Training loss: 1.0702...  0.1158 sec/batch
Epoch: 15/20...  Training Step: 6540...  Training loss: 0.8249...  0.1218 sec/batch
Epoch: 15/20...  Training Step: 6541...  Training loss: 0.8341...  0.1189 sec/batch
Epoch: 15/20...  Training Step: 6542...  Training loss: 0.9770...  0.1165 sec/batch
Epoch: 15/20...  Training Step: 6543...  Training loss: 1.0112...  0.1215 sec/batch
Epoch: 15/20...  Training Step: 6544...  Training loss: 1.0506...  0.1196 sec/batch
Epoch: 15/20...  Training Step: 6545...  Training loss: 0.9560...  0.1153 sec/batch
Epoch: 15/20...  Training Step: 6546...  Training loss: 0.9887...  0.1146 sec/batch
Epoch: 15/20...  Training Step: 6547...  Training loss: 0.9952...  0.1158 sec/batch
Epoch: 15/20...  Training Step: 6548...  Training loss: 0.9991...  0.1236 sec/batch
Epoch: 15/20...  Training Step: 6549...  Training loss: 0.9890...  0.1178 sec/batch
Epoch: 15/20...  Training Step: 6550...  Training loss: 1.0352...  0.1111 sec/batch
Epoch: 15/20...  Training Step: 6551...  Training loss: 0.9910...  0.1156 sec/batch
Epoch: 15/20...  Training Step: 6552...  Training loss: 0.9242...  0.1246 sec/batch
Epoch: 15/20...  Training Step: 6553...  Training loss: 0.9414...  0.1150 sec/batch
Epoch: 15/20...  Training Step: 6554...  Training loss: 0.9151...  0.1166 sec/batch
Epoch: 15/20...  Training Step: 6555...  Training loss: 0.8432...  0.1144 sec/batch
Epoch: 15/20...  Training Step: 6556...  Training loss: 0.9375...  0.1202 sec/batch
Epoch: 15/20...  Training Step: 6557...  Training loss: 0.8462...  0.1189 sec/batch
Epoch: 15/20...  Training Step: 6558...  Training loss: 0.9743...  0.1198 sec/batch
Epoch: 15/20...  Training Step: 6559...  Training loss: 0.9561...  0.1175 sec/batch
Epoch: 15/20...  Training Step: 6560...  Training loss: 1.0516...  0.1181 sec/batch
Epoch: 15/20...  Training Step: 6561...  Training loss: 0.9668...  0.1173 sec/batch
Epoch: 15/20...  Training Step: 6562...  Training loss: 1.0971...  0.1148 sec/batch
Epoch: 15/20...  Training Step: 6563...  Training loss: 1.0778...  0.1134 sec/batch
Epoch: 15/20...  Training Step: 6564...  Training loss: 1.0977...  0.1152 sec/batch
Epoch: 15/20...  Training Step: 6565...  Training loss: 1.0055...  0.1208 sec/batch
Epoch: 15/20...  Training Step: 6566...  Training loss: 0.9878...  0.1156 sec/batch
Epoch: 15/20...  Training Step: 6567...  Training loss: 1.1049...  0.1180 sec/batch
Epoch: 15/20...  Training Step: 6568...  Training loss: 0.9652...  0.1169 sec/batch
Epoch: 15/20...  Training Step: 6569...  Training loss: 0.9011...  0.1182 sec/batch
Epoch: 15/20...  Training Step: 6570...  Training loss: 0.8016...  0.1140 sec/batch
Epoch: 15/20...  Training Step: 6571...  Training loss: 1.1508...  0.1152 sec/batch
Epoch: 15/20...  Training Step: 6572...  Training loss: 0.9086...  0.1129 sec/batch
Epoch: 15/20...  Training Step: 6573...  Training loss: 1.0246...  0.1154 sec/batch
Epoch: 15/20...  Training Step: 6574...  Training loss: 1.0211...  0.1146 sec/batch
Epoch: 15/20...  Training Step: 6575...  Training loss: 1.0818...  0.1121 sec/batch
Epoch: 15/20...  Training Step: 6576...  Training loss: 0.9304...  0.1102 sec/batch
Epoch: 15/20...  Training Step: 6577...  Training loss: 1.1453...  0.1127 sec/batch
Epoch: 15/20...  Training Step: 6578...  Training loss: 0.9516...  0.1153 sec/batch
Epoch: 15/20...  Training Step: 6579...  Training loss: 0.8698...  0.1107 sec/batch
Epoch: 15/20...  Training Step: 6580...  Training loss: 1.0909...  0.1124 sec/batch
Epoch: 15/20...  Training Step: 6581...  Training loss: 1.0760...  0.1122 sec/batch
Epoch: 15/20...  Training Step: 6582...  Training loss: 1.1121...  0.1169 sec/batch
Epoch: 15/20...  Training Step: 6583...  Training loss: 0.9409...  0.1201 sec/batch
Epoch: 15/20...  Training Step: 6584...  Training loss: 1.0680...  0.1154 sec/batch
Epoch: 15/20...  Training Step: 6585...  Training loss: 1.1619...  0.1150 sec/batch
Epoch: 15/20...  Training Step: 6586...  Training loss: 1.0195...  0.1176 sec/batch
Epoch: 15/20...  Training Step: 6587...  Training loss: 1.2174...  0.1113 sec/batch
Epoch: 15/20...  Training Step: 6588...  Training loss: 1.1185...  0.1089 sec/batch
Epoch: 15/20...  Training Step: 6589...  Training loss: 0.8110...  0.1148 sec/batch
Epoch: 15/20...  Training Step: 6590...  Training loss: 1.1494...  0.1175 sec/batch
Epoch: 15/20...  Training Step: 6591...  Training loss: 1.0392...  0.1164 sec/batch
Epoch: 15/20...  Training Step: 6592...  Training loss: 1.0222...  0.1147 sec/batch
Epoch: 15/20...  Training Step: 6593...  Training loss: 1.1374...  0.1150 sec/batch
Epoch: 15/20...  Training Step: 6594...  Training loss: 1.0661...  0.1140 sec/batch
Epoch: 15/20...  Training Step: 6595...  Training loss: 1.1600...  0.1134 sec/batch
Epoch: 15/20...  Training Step: 6596...  Training loss: 1.0167...  0.1143 sec/batch
Epoch: 15/20...  Training Step: 6597...  Training loss: 1.1114...  0.1136 sec/batch
Epoch: 15/20...  Training Step: 6598...  Training loss: 1.3140...  0.1134 sec/batch
Epoch: 15/20...  Training Step: 6599...  Training loss: 1.0463...  0.1171 sec/batch
Epoch: 15/20...  Training Step: 6600...  Training loss: 1.0237...  0.1146 sec/batch
Epoch: 15/20...  Training Step: 6601...  Training loss: 1.0832...  0.1190 sec/batch
Epoch: 15/20...  Training Step: 6602...  Training loss: 1.0839...  0.1172 sec/batch
Epoch: 15/20...  Training Step: 6603...  Training loss: 0.9039...  0.1202 sec/batch
Epoch: 15/20...  Training Step: 6604...  Training loss: 1.0736...  0.1147 sec/batch
Epoch: 15/20...  Training Step: 6605...  Training loss: 1.0353...  0.1202 sec/batch
Epoch: 15/20...  Training Step: 6606...  Training loss: 1.0342...  0.1195 sec/batch
Epoch: 15/20...  Training Step: 6607...  Training loss: 1.1910...  0.1172 sec/batch
Epoch: 15/20...  Training Step: 6608...  Training loss: 0.8902...  0.1156 sec/batch
Epoch: 15/20...  Training Step: 6609...  Training loss: 1.0578...  0.1184 sec/batch
Epoch: 15/20...  Training Step: 6610...  Training loss: 1.1924...  0.1151 sec/batch
Epoch: 15/20...  Training Step: 6611...  Training loss: 1.0410...  0.1239 sec/batch
Epoch: 15/20...  Training Step: 6612...  Training loss: 0.9392...  0.1191 sec/batch
Epoch: 15/20...  Training Step: 6613...  Training loss: 0.9452...  0.1148 sec/batch
Epoch: 15/20...  Training Step: 6614...  Training loss: 1.0066...  0.1170 sec/batch
Epoch: 15/20...  Training Step: 6615...  Training loss: 0.9634...  0.1251 sec/batch
Epoch: 15/20...  Training Step: 6616...  Training loss: 0.8985...  0.1142 sec/batch
Epoch: 15/20...  Training Step: 6617...  Training loss: 0.9929...  0.1189 sec/batch
Epoch: 15/20...  Training Step: 6618...  Training loss: 1.0907...  0.1235 sec/batch
Epoch: 15/20...  Training Step: 6619...  Training loss: 1.0533...  0.1207 sec/batch
Epoch: 15/20...  Training Step: 6620...  Training loss: 0.9391...  0.1151 sec/batch
Epoch: 15/20...  Training Step: 6621...  Training loss: 1.0379...  0.1170 sec/batch
Epoch: 15/20...  Training Step: 6622...  Training loss: 0.9608...  0.1166 sec/batch
Epoch: 15/20...  Training Step: 6623...  Training loss: 0.9554...  0.1183 sec/batch
Epoch: 15/20...  Training Step: 6624...  Training loss: 1.0189...  0.1170 sec/batch
Epoch: 15/20...  Training Step: 6625...  Training loss: 1.0278...  0.1165 sec/batch
Epoch: 15/20...  Training Step: 6626...  Training loss: 1.0175...  0.1158 sec/batch
Epoch: 15/20...  Training Step: 6627...  Training loss: 1.2254...  0.1222 sec/batch
Epoch: 15/20...  Training Step: 6628...  Training loss: 0.9965...  0.1145 sec/batch
Epoch: 15/20...  Training Step: 6629...  Training loss: 1.0442...  0.1191 sec/batch
Epoch: 15/20...  Training Step: 6630...  Training loss: 1.1546...  0.1192 sec/batch
Epoch: 15/20...  Training Step: 6631...  Training loss: 1.0361...  0.1145 sec/batch
Epoch: 15/20...  Training Step: 6632...  Training loss: 0.9321...  0.1178 sec/batch
Epoch: 15/20...  Training Step: 6633...  Training loss: 0.8642...  0.1221 sec/batch
Epoch: 15/20...  Training Step: 6634...  Training loss: 0.9812...  0.1152 sec/batch
Epoch: 15/20...  Training Step: 6635...  Training loss: 0.9977...  0.1196 sec/batch
Epoch: 15/20...  Training Step: 6636...  Training loss: 1.0382...  0.1200 sec/batch
Epoch: 15/20...  Training Step: 6637...  Training loss: 0.8041...  0.1149 sec/batch
Epoch: 15/20...  Training Step: 6638...  Training loss: 0.8584...  0.1179 sec/batch
Epoch: 15/20...  Training Step: 6639...  Training loss: 0.9440...  0.1164 sec/batch
Epoch: 15/20...  Training Step: 6640...  Training loss: 0.9785...  0.1167 sec/batch
Epoch: 15/20...  Training Step: 6641...  Training loss: 0.8768...  0.1252 sec/batch
Epoch: 15/20...  Training Step: 6642...  Training loss: 0.9954...  0.1304 sec/batch
Epoch: 15/20...  Training Step: 6643...  Training loss: 0.9437...  0.1194 sec/batch
Epoch: 15/20...  Training Step: 6644...  Training loss: 0.8881...  0.1151 sec/batch
Epoch: 15/20...  Training Step: 6645...  Training loss: 0.9013...  0.1204 sec/batch
Epoch: 15/20...  Training Step: 6646...  Training loss: 1.0874...  0.1201 sec/batch
Epoch: 15/20...  Training Step: 6647...  Training loss: 1.0120...  0.1110 sec/batch
Epoch: 15/20...  Training Step: 6648...  Training loss: 1.0680...  0.1150 sec/batch
Epoch: 15/20...  Training Step: 6649...  Training loss: 1.1810...  0.1182 sec/batch
Epoch: 15/20...  Training Step: 6650...  Training loss: 1.0743...  0.1178 sec/batch
Epoch: 15/20...  Training Step: 6651...  Training loss: 0.9031...  0.1190 sec/batch
Epoch: 15/20...  Training Step: 6652...  Training loss: 0.9817...  0.1197 sec/batch
Epoch: 15/20...  Training Step: 6653...  Training loss: 0.9548...  0.1198 sec/batch
Epoch: 15/20...  Training Step: 6654...  Training loss: 0.8601...  0.1180 sec/batch
Epoch: 15/20...  Training Step: 6655...  Training loss: 0.8315...  0.1180 sec/batch
Epoch: 15/20...  Training Step: 6656...  Training loss: 0.9283...  0.1132 sec/batch
Epoch: 15/20...  Training Step: 6657...  Training loss: 1.0954...  0.1176 sec/batch
Epoch: 15/20...  Training Step: 6658...  Training loss: 1.0525...  0.1171 sec/batch
Epoch: 15/20...  Training Step: 6659...  Training loss: 1.1597...  0.1145 sec/batch
Epoch: 15/20...  Training Step: 6660...  Training loss: 0.9152...  0.1164 sec/batch
Epoch: 15/20...  Training Step: 6661...  Training loss: 1.0588...  0.1186 sec/batch
Epoch: 15/20...  Training Step: 6662...  Training loss: 0.8314...  0.1123 sec/batch
Epoch: 15/20...  Training Step: 6663...  Training loss: 0.8971...  0.1189 sec/batch
Epoch: 15/20...  Training Step: 6664...  Training loss: 1.2110...  0.1189 sec/batch
Epoch: 15/20...  Training Step: 6665...  Training loss: 0.9778...  0.1183 sec/batch
Epoch: 15/20...  Training Step: 6666...  Training loss: 1.0694...  0.1148 sec/batch
Epoch: 15/20...  Training Step: 6667...  Training loss: 1.0552...  0.1172 sec/batch
Epoch: 15/20...  Training Step: 6668...  Training loss: 1.0590...  0.1199 sec/batch
Epoch: 15/20...  Training Step: 6669...  Training loss: 1.0338...  0.1272 sec/batch
Epoch: 15/20...  Training Step: 6670...  Training loss: 0.9354...  0.1244 sec/batch
Epoch: 15/20...  Training Step: 6671...  Training loss: 1.1733...  0.1223 sec/batch
Epoch: 15/20...  Training Step: 6672...  Training loss: 0.8865...  0.1197 sec/batch
Epoch: 15/20...  Training Step: 6673...  Training loss: 0.8780...  0.1154 sec/batch
Epoch: 15/20...  Training Step: 6674...  Training loss: 1.0993...  0.1187 sec/batch
Epoch: 15/20...  Training Step: 6675...  Training loss: 0.9595...  0.1170 sec/batch
Epoch: 15/20...  Training Step: 6676...  Training loss: 1.0176...  0.1181 sec/batch
Epoch: 15/20...  Training Step: 6677...  Training loss: 0.8136...  0.1137 sec/batch
Epoch: 15/20...  Training Step: 6678...  Training loss: 1.2664...  0.1202 sec/batch
Epoch: 15/20...  Training Step: 6679...  Training loss: 1.0579...  0.1176 sec/batch
Epoch: 15/20...  Training Step: 6680...  Training loss: 1.0296...  0.1171 sec/batch
Epoch: 15/20...  Training Step: 6681...  Training loss: 1.0904...  0.1174 sec/batch
Epoch: 15/20...  Training Step: 6682...  Training loss: 1.0876...  0.1183 sec/batch
Epoch: 15/20...  Training Step: 6683...  Training loss: 1.0146...  0.1192 sec/batch
Epoch: 15/20...  Training Step: 6684...  Training loss: 0.8607...  0.1191 sec/batch
Epoch: 15/20...  Training Step: 6685...  Training loss: 1.0775...  0.1245 sec/batch
Epoch: 15/20...  Training Step: 6686...  Training loss: 0.8987...  0.1200 sec/batch
Epoch: 15/20...  Training Step: 6687...  Training loss: 0.9457...  0.1196 sec/batch
Epoch: 15/20...  Training Step: 6688...  Training loss: 1.0857...  0.1141 sec/batch
Epoch: 15/20...  Training Step: 6689...  Training loss: 1.1209...  0.1242 sec/batch
Epoch: 15/20...  Training Step: 6690...  Training loss: 1.1142...  0.1162 sec/batch
Epoch: 15/20...  Training Step: 6691...  Training loss: 0.9697...  0.1224 sec/batch
Epoch: 15/20...  Training Step: 6692...  Training loss: 1.0663...  0.1173 sec/batch
Epoch: 15/20...  Training Step: 6693...  Training loss: 0.8911...  0.1150 sec/batch
Epoch: 15/20...  Training Step: 6694...  Training loss: 1.1323...  0.1198 sec/batch
Epoch: 15/20...  Training Step: 6695...  Training loss: 0.7624...  0.1211 sec/batch
Epoch: 15/20...  Training Step: 6696...  Training loss: 0.9626...  0.1181 sec/batch
Epoch: 15/20...  Training Step: 6697...  Training loss: 0.9555...  0.1191 sec/batch
Epoch: 15/20...  Training Step: 6698...  Training loss: 1.1357...  0.1122 sec/batch
Epoch: 15/20...  Training Step: 6699...  Training loss: 1.0132...  0.1100 sec/batch
Epoch: 15/20...  Training Step: 6700...  Training loss: 1.0271...  0.1107 sec/batch
Epoch: 15/20...  Training Step: 6701...  Training loss: 0.9760...  0.1131 sec/batch
Epoch: 15/20...  Training Step: 6702...  Training loss: 0.9447...  0.1156 sec/batch
Epoch: 15/20...  Training Step: 6703...  Training loss: 0.9878...  0.1120 sec/batch
Epoch: 15/20...  Training Step: 6704...  Training loss: 0.9601...  0.1152 sec/batch
Epoch: 15/20...  Training Step: 6705...  Training loss: 1.1364...  0.1095 sec/batch
Epoch: 15/20...  Training Step: 6706...  Training loss: 0.8565...  0.1133 sec/batch
Epoch: 15/20...  Training Step: 6707...  Training loss: 0.9928...  0.1231 sec/batch
Epoch: 15/20...  Training Step: 6708...  Training loss: 1.1110...  0.1325 sec/batch
Epoch: 15/20...  Training Step: 6709...  Training loss: 1.0820...  0.1195 sec/batch
Epoch: 15/20...  Training Step: 6710...  Training loss: 0.9605...  0.1182 sec/batch
Epoch: 15/20...  Training Step: 6711...  Training loss: 1.0650...  0.1212 sec/batch
Epoch: 15/20...  Training Step: 6712...  Training loss: 1.0848...  0.1192 sec/batch
Epoch: 15/20...  Training Step: 6713...  Training loss: 1.0196...  0.1211 sec/batch
Epoch: 15/20...  Training Step: 6714...  Training loss: 1.0632...  0.1167 sec/batch
Epoch: 15/20...  Training Step: 6715...  Training loss: 1.1063...  0.1151 sec/batch
Epoch: 15/20...  Training Step: 6716...  Training loss: 1.0487...  0.1159 sec/batch
Epoch: 15/20...  Training Step: 6717...  Training loss: 0.9490...  0.1202 sec/batch
Epoch: 15/20...  Training Step: 6718...  Training loss: 1.2351...  0.1179 sec/batch
Epoch: 15/20...  Training Step: 6719...  Training loss: 1.0429...  0.1191 sec/batch
Epoch: 15/20...  Training Step: 6720...  Training loss: 1.2708...  0.1149 sec/batch
Epoch: 15/20...  Training Step: 6721...  Training loss: 1.0779...  0.1169 sec/batch
Epoch: 15/20...  Training Step: 6722...  Training loss: 1.1814...  0.1186 sec/batch
Epoch: 15/20...  Training Step: 6723...  Training loss: 1.1660...  0.1204 sec/batch
Epoch: 15/20...  Training Step: 6724...  Training loss: 0.9568...  0.1136 sec/batch
Epoch: 15/20...  Training Step: 6725...  Training loss: 1.1080...  0.1153 sec/batch
Epoch: 15/20...  Training Step: 6726...  Training loss: 0.9785...  0.1187 sec/batch
Epoch: 15/20...  Training Step: 6727...  Training loss: 1.0305...  0.1173 sec/batch
Epoch: 15/20...  Training Step: 6728...  Training loss: 0.9777...  0.1162 sec/batch
Epoch: 15/20...  Training Step: 6729...  Training loss: 1.2124...  0.1211 sec/batch
Epoch: 15/20...  Training Step: 6730...  Training loss: 1.0597...  0.1184 sec/batch
Epoch: 15/20...  Training Step: 6731...  Training loss: 1.0805...  0.1212 sec/batch
Epoch: 15/20...  Training Step: 6732...  Training loss: 1.0248...  0.1240 sec/batch
Epoch: 15/20...  Training Step: 6733...  Training loss: 1.1033...  0.1253 sec/batch
Epoch: 15/20...  Training Step: 6734...  Training loss: 0.9329...  0.1190 sec/batch
Epoch: 15/20...  Training Step: 6735...  Training loss: 1.1220...  0.1153 sec/batch
Epoch: 15/20...  Training Step: 6736...  Training loss: 1.0138...  0.1192 sec/batch
Epoch: 15/20...  Training Step: 6737...  Training loss: 1.0124...  0.1303 sec/batch
Epoch: 15/20...  Training Step: 6738...  Training loss: 0.9229...  0.1368 sec/batch
Epoch: 15/20...  Training Step: 6739...  Training loss: 0.9560...  0.1272 sec/batch
Epoch: 15/20...  Training Step: 6740...  Training loss: 1.0456...  0.1355 sec/batch
Epoch: 15/20...  Training Step: 6741...  Training loss: 1.1132...  0.1237 sec/batch
Epoch: 15/20...  Training Step: 6742...  Training loss: 0.9404...  0.1191 sec/batch
Epoch: 15/20...  Training Step: 6743...  Training loss: 0.9950...  0.1194 sec/batch
Epoch: 15/20...  Training Step: 6744...  Training loss: 1.1028...  0.1182 sec/batch
Epoch: 15/20...  Training Step: 6745...  Training loss: 1.0576...  0.1169 sec/batch
Epoch: 15/20...  Training Step: 6746...  Training loss: 1.0528...  0.1185 sec/batch
Epoch: 15/20...  Training Step: 6747...  Training loss: 0.9890...  0.1237 sec/batch
Epoch: 15/20...  Training Step: 6748...  Training loss: 1.0118...  0.1215 sec/batch
Epoch: 15/20...  Training Step: 6749...  Training loss: 0.9943...  0.1222 sec/batch
Epoch: 15/20...  Training Step: 6750...  Training loss: 0.9480...  0.1208 sec/batch
Epoch: 15/20...  Training Step: 6751...  Training loss: 0.9692...  0.1583 sec/batch
Epoch: 15/20...  Training Step: 6752...  Training loss: 1.1063...  0.1410 sec/batch
Epoch: 15/20...  Training Step: 6753...  Training loss: 1.1836...  0.1335 sec/batch
Epoch: 15/20...  Training Step: 6754...  Training loss: 0.9717...  0.1276 sec/batch
Epoch: 15/20...  Training Step: 6755...  Training loss: 0.9445...  0.1215 sec/batch
Epoch: 15/20...  Training Step: 6756...  Training loss: 0.9991...  0.1167 sec/batch
Epoch: 15/20...  Training Step: 6757...  Training loss: 1.0592...  0.1166 sec/batch
Epoch: 15/20...  Training Step: 6758...  Training loss: 1.0950...  0.1165 sec/batch
Epoch: 15/20...  Training Step: 6759...  Training loss: 1.0532...  0.1131 sec/batch
Epoch: 15/20...  Training Step: 6760...  Training loss: 1.0834...  0.1183 sec/batch
Epoch: 15/20...  Training Step: 6761...  Training loss: 1.1485...  0.1175 sec/batch
Epoch: 15/20...  Training Step: 6762...  Training loss: 0.9363...  0.1148 sec/batch
Epoch: 15/20...  Training Step: 6763...  Training loss: 1.2381...  0.1177 sec/batch
Epoch: 15/20...  Training Step: 6764...  Training loss: 1.1352...  0.1182 sec/batch
Epoch: 15/20...  Training Step: 6765...  Training loss: 1.1141...  0.1156 sec/batch
Epoch: 15/20...  Training Step: 6766...  Training loss: 1.1718...  0.1139 sec/batch
Epoch: 15/20...  Training Step: 6767...  Training loss: 1.0655...  0.1174 sec/batch
Epoch: 15/20...  Training Step: 6768...  Training loss: 1.1071...  0.1215 sec/batch
Epoch: 15/20...  Training Step: 6769...  Training loss: 1.0768...  0.1222 sec/batch
Epoch: 15/20...  Training Step: 6770...  Training loss: 1.1926...  0.1134 sec/batch
Epoch: 15/20...  Training Step: 6771...  Training loss: 0.9967...  0.1162 sec/batch
Epoch: 15/20...  Training Step: 6772...  Training loss: 1.0076...  0.1176 sec/batch
Epoch: 15/20...  Training Step: 6773...  Training loss: 0.9750...  0.1161 sec/batch
Epoch: 15/20...  Training Step: 6774...  Training loss: 1.2019...  0.1155 sec/batch
Epoch: 15/20...  Training Step: 6775...  Training loss: 1.0330...  0.1173 sec/batch
Epoch: 15/20...  Training Step: 6776...  Training loss: 0.9422...  0.1153 sec/batch
Epoch: 15/20...  Training Step: 6777...  Training loss: 1.0327...  0.1168 sec/batch
Epoch: 15/20...  Training Step: 6778...  Training loss: 1.0618...  0.1171 sec/batch
Epoch: 15/20...  Training Step: 6779...  Training loss: 1.0575...  0.1200 sec/batch
Epoch: 15/20...  Training Step: 6780...  Training loss: 1.0900...  0.1188 sec/batch
Epoch: 15/20...  Training Step: 6781...  Training loss: 0.9455...  0.1166 sec/batch
Epoch: 15/20...  Training Step: 6782...  Training loss: 0.9305...  0.1188 sec/batch
Epoch: 15/20...  Training Step: 6783...  Training loss: 1.0326...  0.1155 sec/batch
Epoch: 15/20...  Training Step: 6784...  Training loss: 1.1005...  0.1188 sec/batch
Epoch: 15/20...  Training Step: 6785...  Training loss: 1.0623...  0.1164 sec/batch
Epoch: 15/20...  Training Step: 6786...  Training loss: 1.0778...  0.1152 sec/batch
Epoch: 15/20...  Training Step: 6787...  Training loss: 1.0997...  0.1164 sec/batch
Epoch: 15/20...  Training Step: 6788...  Training loss: 0.9744...  0.1216 sec/batch
Epoch: 15/20...  Training Step: 6789...  Training loss: 0.9911...  0.1179 sec/batch
Epoch: 15/20...  Training Step: 6790...  Training loss: 1.0803...  0.1136 sec/batch
Epoch: 15/20...  Training Step: 6791...  Training loss: 1.1085...  0.1166 sec/batch
Epoch: 15/20...  Training Step: 6792...  Training loss: 1.1659...  0.1167 sec/batch
Epoch: 15/20...  Training Step: 6793...  Training loss: 1.0423...  0.1192 sec/batch
Epoch: 15/20...  Training Step: 6794...  Training loss: 0.8949...  0.1190 sec/batch
Epoch: 15/20...  Training Step: 6795...  Training loss: 1.0118...  0.1185 sec/batch
Epoch: 15/20...  Training Step: 6796...  Training loss: 1.0223...  0.1123 sec/batch
Epoch: 15/20...  Training Step: 6797...  Training loss: 1.0608...  0.1212 sec/batch
Epoch: 15/20...  Training Step: 6798...  Training loss: 0.9903...  0.1134 sec/batch
Epoch: 15/20...  Training Step: 6799...  Training loss: 0.9151...  0.1214 sec/batch
Epoch: 15/20...  Training Step: 6800...  Training loss: 1.2882...  0.1181 sec/batch
Epoch: 15/20...  Training Step: 6801...  Training loss: 1.0631...  0.1164 sec/batch
Epoch: 15/20...  Training Step: 6802...  Training loss: 1.1026...  0.1124 sec/batch
Epoch: 15/20...  Training Step: 6803...  Training loss: 1.0593...  0.1196 sec/batch
Epoch: 15/20...  Training Step: 6804...  Training loss: 1.1869...  0.1163 sec/batch
Epoch: 15/20...  Training Step: 6805...  Training loss: 1.2459...  0.1145 sec/batch
Epoch: 15/20...  Training Step: 6806...  Training loss: 1.0611...  0.1186 sec/batch
Epoch: 15/20...  Training Step: 6807...  Training loss: 0.9280...  0.1183 sec/batch
Epoch: 15/20...  Training Step: 6808...  Training loss: 0.9916...  0.1137 sec/batch
Epoch: 15/20...  Training Step: 6809...  Training loss: 0.9397...  0.1216 sec/batch
Epoch: 15/20...  Training Step: 6810...  Training loss: 0.9364...  0.1166 sec/batch
Epoch: 15/20...  Training Step: 6811...  Training loss: 0.9037...  0.1159 sec/batch
Epoch: 15/20...  Training Step: 6812...  Training loss: 0.8956...  0.1139 sec/batch
Epoch: 15/20...  Training Step: 6813...  Training loss: 0.9080...  0.1127 sec/batch
Epoch: 15/20...  Training Step: 6814...  Training loss: 1.1136...  0.1161 sec/batch
Epoch: 15/20...  Training Step: 6815...  Training loss: 0.9171...  0.1168 sec/batch
Epoch: 15/20...  Training Step: 6816...  Training loss: 0.9328...  0.1167 sec/batch
Epoch: 15/20...  Training Step: 6817...  Training loss: 0.8656...  0.1158 sec/batch
Epoch: 15/20...  Training Step: 6818...  Training loss: 1.1470...  0.1197 sec/batch
Epoch: 15/20...  Training Step: 6819...  Training loss: 0.9779...  0.1159 sec/batch
Epoch: 15/20...  Training Step: 6820...  Training loss: 0.8542...  0.1222 sec/batch
Epoch: 15/20...  Training Step: 6821...  Training loss: 0.8958...  0.1196 sec/batch
Epoch: 15/20...  Training Step: 6822...  Training loss: 0.8009...  0.1178 sec/batch
Epoch: 15/20...  Training Step: 6823...  Training loss: 0.9194...  0.1201 sec/batch
Epoch: 15/20...  Training Step: 6824...  Training loss: 1.0190...  0.1190 sec/batch
Epoch: 15/20...  Training Step: 6825...  Training loss: 1.1042...  0.1166 sec/batch
Epoch: 15/20...  Training Step: 6826...  Training loss: 0.9625...  0.1175 sec/batch
Epoch: 15/20...  Training Step: 6827...  Training loss: 1.0845...  0.1160 sec/batch
Epoch: 15/20...  Training Step: 6828...  Training loss: 1.1032...  0.1146 sec/batch
Epoch: 15/20...  Training Step: 6829...  Training loss: 0.8736...  0.1197 sec/batch
Epoch: 15/20...  Training Step: 6830...  Training loss: 1.1096...  0.1170 sec/batch
Epoch: 15/20...  Training Step: 6831...  Training loss: 1.0335...  0.1154 sec/batch
Epoch: 15/20...  Training Step: 6832...  Training loss: 0.9511...  0.1183 sec/batch
Epoch: 15/20...  Training Step: 6833...  Training loss: 0.9516...  0.1207 sec/batch
Epoch: 15/20...  Training Step: 6834...  Training loss: 0.9326...  0.1184 sec/batch
Epoch: 15/20...  Training Step: 6835...  Training loss: 0.9433...  0.1204 sec/batch
Epoch: 15/20...  Training Step: 6836...  Training loss: 0.9690...  0.1131 sec/batch
Epoch: 15/20...  Training Step: 6837...  Training loss: 1.0162...  0.1133 sec/batch
Epoch: 15/20...  Training Step: 6838...  Training loss: 1.1990...  0.1160 sec/batch
Epoch: 15/20...  Training Step: 6839...  Training loss: 0.9595...  0.1194 sec/batch
Epoch: 15/20...  Training Step: 6840...  Training loss: 1.1413...  0.1268 sec/batch
Epoch: 15/20...  Training Step: 6841...  Training loss: 0.9334...  0.1149 sec/batch
Epoch: 15/20...  Training Step: 6842...  Training loss: 0.9415...  0.1235 sec/batch
Epoch: 15/20...  Training Step: 6843...  Training loss: 0.8384...  0.1480 sec/batch
Epoch: 15/20...  Training Step: 6844...  Training loss: 1.2064...  0.1430 sec/batch
Epoch: 15/20...  Training Step: 6845...  Training loss: 0.9263...  0.1186 sec/batch
Epoch: 15/20...  Training Step: 6846...  Training loss: 0.9278...  0.1267 sec/batch
Epoch: 15/20...  Training Step: 6847...  Training loss: 1.0316...  0.1188 sec/batch
Epoch: 15/20...  Training Step: 6848...  Training loss: 0.9625...  0.1255 sec/batch
Epoch: 15/20...  Training Step: 6849...  Training loss: 0.8615...  0.1191 sec/batch
Epoch: 15/20...  Training Step: 6850...  Training loss: 0.7743...  0.1157 sec/batch
Epoch: 15/20...  Training Step: 6851...  Training loss: 1.0212...  0.1166 sec/batch
Epoch: 15/20...  Training Step: 6852...  Training loss: 0.9841...  0.1204 sec/batch
Epoch: 15/20...  Training Step: 6853...  Training loss: 0.9253...  0.1162 sec/batch
Epoch: 15/20...  Training Step: 6854...  Training loss: 1.1074...  0.1199 sec/batch
Epoch: 15/20...  Training Step: 6855...  Training loss: 1.2505...  0.1165 sec/batch
Epoch: 15/20...  Training Step: 6856...  Training loss: 0.8603...  0.1213 sec/batch
Epoch: 15/20...  Training Step: 6857...  Training loss: 1.0941...  0.1202 sec/batch
Epoch: 15/20...  Training Step: 6858...  Training loss: 1.0111...  0.1193 sec/batch
Epoch: 15/20...  Training Step: 6859...  Training loss: 0.9568...  0.1189 sec/batch
Epoch: 15/20...  Training Step: 6860...  Training loss: 1.0315...  0.1149 sec/batch
Epoch: 15/20...  Training Step: 6861...  Training loss: 1.0891...  0.1138 sec/batch
Epoch: 15/20...  Training Step: 6862...  Training loss: 1.0784...  0.1163 sec/batch
Epoch: 15/20...  Training Step: 6863...  Training loss: 0.9990...  0.1273 sec/batch
Epoch: 15/20...  Training Step: 6864...  Training loss: 1.1366...  0.1161 sec/batch
Epoch: 15/20...  Training Step: 6865...  Training loss: 1.0290...  0.1181 sec/batch
Epoch: 15/20...  Training Step: 6866...  Training loss: 0.9312...  0.1164 sec/batch
Epoch: 15/20...  Training Step: 6867...  Training loss: 0.7874...  0.1173 sec/batch
Epoch: 15/20...  Training Step: 6868...  Training loss: 1.0315...  0.1153 sec/batch
Epoch: 15/20...  Training Step: 6869...  Training loss: 0.9255...  0.1146 sec/batch
Epoch: 15/20...  Training Step: 6870...  Training loss: 1.2053...  0.1178 sec/batch
Epoch: 15/20...  Training Step: 6871...  Training loss: 0.9280...  0.1223 sec/batch
Epoch: 15/20...  Training Step: 6872...  Training loss: 1.0346...  0.1202 sec/batch
Epoch: 15/20...  Training Step: 6873...  Training loss: 0.9944...  0.1166 sec/batch
Epoch: 15/20...  Training Step: 6874...  Training loss: 1.1464...  0.1219 sec/batch
Epoch: 15/20...  Training Step: 6875...  Training loss: 1.0061...  0.1174 sec/batch
Epoch: 15/20...  Training Step: 6876...  Training loss: 1.0994...  0.1150 sec/batch
Epoch: 15/20...  Training Step: 6877...  Training loss: 0.9471...  0.1185 sec/batch
Epoch: 15/20...  Training Step: 6878...  Training loss: 0.9179...  0.1190 sec/batch
Epoch: 15/20...  Training Step: 6879...  Training loss: 0.9368...  0.1170 sec/batch
Epoch: 15/20...  Training Step: 6880...  Training loss: 1.1119...  0.1174 sec/batch
Epoch: 15/20...  Training Step: 6881...  Training loss: 0.8844...  0.1207 sec/batch
Epoch: 15/20...  Training Step: 6882...  Training loss: 0.9586...  0.1167 sec/batch
Epoch: 15/20...  Training Step: 6883...  Training loss: 1.1055...  0.1151 sec/batch
Epoch: 15/20...  Training Step: 6884...  Training loss: 0.8867...  0.1205 sec/batch
Epoch: 15/20...  Training Step: 6885...  Training loss: 0.8877...  0.1206 sec/batch
Epoch: 15/20...  Training Step: 6886...  Training loss: 1.0813...  0.1163 sec/batch
Epoch: 15/20...  Training Step: 6887...  Training loss: 0.9972...  0.1192 sec/batch
Epoch: 15/20...  Training Step: 6888...  Training loss: 0.9264...  0.1161 sec/batch
Epoch: 15/20...  Training Step: 6889...  Training loss: 1.0818...  0.1170 sec/batch
Epoch: 15/20...  Training Step: 6890...  Training loss: 0.9540...  0.1162 sec/batch
Epoch: 15/20...  Training Step: 6891...  Training loss: 1.0140...  0.1184 sec/batch
Epoch: 15/20...  Training Step: 6892...  Training loss: 1.0507...  0.1175 sec/batch
Epoch: 15/20...  Training Step: 6893...  Training loss: 0.9124...  0.1180 sec/batch
Epoch: 15/20...  Training Step: 6894...  Training loss: 1.0473...  0.1167 sec/batch
Epoch: 15/20...  Training Step: 6895...  Training loss: 1.0077...  0.1173 sec/batch
Epoch: 15/20...  Training Step: 6896...  Training loss: 1.1014...  0.1151 sec/batch
Epoch: 15/20...  Training Step: 6897...  Training loss: 0.9444...  0.1212 sec/batch
Epoch: 15/20...  Training Step: 6898...  Training loss: 0.9800...  0.1191 sec/batch
Epoch: 15/20...  Training Step: 6899...  Training loss: 0.8963...  0.1167 sec/batch
Epoch: 15/20...  Training Step: 6900...  Training loss: 1.0347...  0.1145 sec/batch
Epoch: 15/20...  Training Step: 6901...  Training loss: 1.0744...  0.1185 sec/batch
Epoch: 15/20...  Training Step: 6902...  Training loss: 1.0411...  0.1167 sec/batch
Epoch: 15/20...  Training Step: 6903...  Training loss: 1.0946...  0.1175 sec/batch
Epoch: 15/20...  Training Step: 6904...  Training loss: 1.2304...  0.1195 sec/batch
Epoch: 15/20...  Training Step: 6905...  Training loss: 0.8963...  0.1189 sec/batch
Epoch: 15/20...  Training Step: 6906...  Training loss: 0.9680...  0.1162 sec/batch
Epoch: 15/20...  Training Step: 6907...  Training loss: 0.9032...  0.1202 sec/batch
Epoch: 15/20...  Training Step: 6908...  Training loss: 0.9238...  0.1150 sec/batch
Epoch: 15/20...  Training Step: 6909...  Training loss: 1.0511...  0.1189 sec/batch
Epoch: 15/20...  Training Step: 6910...  Training loss: 0.8669...  0.1237 sec/batch
Epoch: 15/20...  Training Step: 6911...  Training loss: 1.0622...  0.1187 sec/batch
Epoch: 15/20...  Training Step: 6912...  Training loss: 1.0791...  0.1245 sec/batch
Epoch: 15/20...  Training Step: 6913...  Training loss: 1.0049...  0.1250 sec/batch
Epoch: 15/20...  Training Step: 6914...  Training loss: 0.9165...  0.1222 sec/batch
Epoch: 15/20...  Training Step: 6915...  Training loss: 0.8702...  0.1197 sec/batch
Epoch: 15/20...  Training Step: 6916...  Training loss: 0.8922...  0.1174 sec/batch
Epoch: 15/20...  Training Step: 6917...  Training loss: 1.0856...  0.1242 sec/batch
Epoch: 15/20...  Training Step: 6918...  Training loss: 0.9481...  0.1285 sec/batch
Epoch: 15/20...  Training Step: 6919...  Training loss: 1.0237...  0.1251 sec/batch
Epoch: 15/20...  Training Step: 6920...  Training loss: 1.0620...  0.1282 sec/batch
Epoch: 15/20...  Training Step: 6921...  Training loss: 0.9703...  0.1183 sec/batch
Epoch: 15/20...  Training Step: 6922...  Training loss: 1.0614...  0.1185 sec/batch
Epoch: 15/20...  Training Step: 6923...  Training loss: 0.8612...  0.1201 sec/batch
Epoch: 15/20...  Training Step: 6924...  Training loss: 0.8586...  0.1189 sec/batch
Epoch: 15/20...  Training Step: 6925...  Training loss: 0.8879...  0.1214 sec/batch
Epoch: 15/20...  Training Step: 6926...  Training loss: 0.9492...  0.1180 sec/batch
Epoch: 15/20...  Training Step: 6927...  Training loss: 1.1726...  0.1204 sec/batch
Epoch: 15/20...  Training Step: 6928...  Training loss: 0.9658...  0.1220 sec/batch
Epoch: 15/20...  Training Step: 6929...  Training loss: 1.1220...  0.1199 sec/batch
Epoch: 15/20...  Training Step: 6930...  Training loss: 1.0549...  0.1245 sec/batch
Epoch: 15/20...  Training Step: 6931...  Training loss: 0.9249...  0.1147 sec/batch
Epoch: 15/20...  Training Step: 6932...  Training loss: 1.0239...  0.1171 sec/batch
Epoch: 15/20...  Training Step: 6933...  Training loss: 0.8686...  0.1160 sec/batch
Epoch: 15/20...  Training Step: 6934...  Training loss: 1.1294...  0.1155 sec/batch
Epoch: 15/20...  Training Step: 6935...  Training loss: 1.1428...  0.1221 sec/batch
Epoch: 15/20...  Training Step: 6936...  Training loss: 1.1718...  0.1160 sec/batch
Epoch: 15/20...  Training Step: 6937...  Training loss: 0.8317...  0.1192 sec/batch
Epoch: 15/20...  Training Step: 6938...  Training loss: 1.0967...  0.1169 sec/batch
Epoch: 15/20...  Training Step: 6939...  Training loss: 0.9797...  0.1203 sec/batch
Epoch: 15/20...  Training Step: 6940...  Training loss: 0.9744...  0.1197 sec/batch
Epoch: 15/20...  Training Step: 6941...  Training loss: 1.0083...  0.1235 sec/batch
Epoch: 15/20...  Training Step: 6942...  Training loss: 1.0474...  0.1170 sec/batch
Epoch: 15/20...  Training Step: 6943...  Training loss: 0.9513...  0.1165 sec/batch
Epoch: 15/20...  Training Step: 6944...  Training loss: 0.8746...  0.1196 sec/batch
Epoch: 15/20...  Training Step: 6945...  Training loss: 0.9480...  0.1179 sec/batch
Epoch: 15/20...  Training Step: 6946...  Training loss: 1.0609...  0.1215 sec/batch
Epoch: 15/20...  Training Step: 6947...  Training loss: 0.8445...  0.1174 sec/batch
Epoch: 15/20...  Training Step: 6948...  Training loss: 0.9921...  0.1178 sec/batch
Epoch: 15/20...  Training Step: 6949...  Training loss: 0.9529...  0.1197 sec/batch
Epoch: 15/20...  Training Step: 6950...  Training loss: 1.2175...  0.1244 sec/batch
Epoch: 15/20...  Training Step: 6951...  Training loss: 1.0624...  0.1286 sec/batch
Epoch: 15/20...  Training Step: 6952...  Training loss: 0.8996...  0.1319 sec/batch
Epoch: 15/20...  Training Step: 6953...  Training loss: 0.8964...  0.1185 sec/batch
Epoch: 15/20...  Training Step: 6954...  Training loss: 0.8401...  0.1239 sec/batch
Epoch: 15/20...  Training Step: 6955...  Training loss: 0.8732...  0.1262 sec/batch
Epoch: 15/20...  Training Step: 6956...  Training loss: 0.9830...  0.1166 sec/batch
Epoch: 15/20...  Training Step: 6957...  Training loss: 0.9378...  0.1210 sec/batch
Epoch: 15/20...  Training Step: 6958...  Training loss: 1.0186...  0.1177 sec/batch
Epoch: 15/20...  Training Step: 6959...  Training loss: 0.9887...  0.1175 sec/batch
Epoch: 15/20...  Training Step: 6960...  Training loss: 1.0287...  0.1169 sec/batch
Epoch: 16/20...  Training Step: 6961...  Training loss: 1.1333...  0.1178 sec/batch
Epoch: 16/20...  Training Step: 6962...  Training loss: 1.0790...  0.1171 sec/batch
Epoch: 16/20...  Training Step: 6963...  Training loss: 1.1068...  0.1228 sec/batch
Epoch: 16/20...  Training Step: 6964...  Training loss: 1.1257...  0.1194 sec/batch
Epoch: 16/20...  Training Step: 6965...  Training loss: 1.0779...  0.1212 sec/batch
Epoch: 16/20...  Training Step: 6966...  Training loss: 0.9662...  0.1206 sec/batch
Epoch: 16/20...  Training Step: 6967...  Training loss: 1.0658...  0.1223 sec/batch
Epoch: 16/20...  Training Step: 6968...  Training loss: 0.9632...  0.1154 sec/batch
Epoch: 16/20...  Training Step: 6969...  Training loss: 0.7743...  0.1211 sec/batch
Epoch: 16/20...  Training Step: 6970...  Training loss: 1.0108...  0.1197 sec/batch
Epoch: 16/20...  Training Step: 6971...  Training loss: 0.9431...  0.1210 sec/batch
Epoch: 16/20...  Training Step: 6972...  Training loss: 0.9104...  0.1216 sec/batch
Epoch: 16/20...  Training Step: 6973...  Training loss: 1.1822...  0.1188 sec/batch
Epoch: 16/20...  Training Step: 6974...  Training loss: 0.7536...  0.1178 sec/batch
Epoch: 16/20...  Training Step: 6975...  Training loss: 1.0279...  0.1155 sec/batch
Epoch: 16/20...  Training Step: 6976...  Training loss: 1.0946...  0.1171 sec/batch
Epoch: 16/20...  Training Step: 6977...  Training loss: 0.9426...  0.1193 sec/batch
Epoch: 16/20...  Training Step: 6978...  Training loss: 1.0575...  0.1138 sec/batch
Epoch: 16/20...  Training Step: 6979...  Training loss: 1.0523...  0.1186 sec/batch
Epoch: 16/20...  Training Step: 6980...  Training loss: 0.9840...  0.1188 sec/batch
Epoch: 16/20...  Training Step: 6981...  Training loss: 0.9742...  0.1186 sec/batch
Epoch: 16/20...  Training Step: 6982...  Training loss: 0.9730...  0.1229 sec/batch
Epoch: 16/20...  Training Step: 6983...  Training loss: 1.1550...  0.1198 sec/batch
Epoch: 16/20...  Training Step: 6984...  Training loss: 0.8731...  0.1166 sec/batch
Epoch: 16/20...  Training Step: 6985...  Training loss: 0.9552...  0.1178 sec/batch
Epoch: 16/20...  Training Step: 6986...  Training loss: 0.9489...  0.1202 sec/batch
Epoch: 16/20...  Training Step: 6987...  Training loss: 1.1260...  0.1149 sec/batch
Epoch: 16/20...  Training Step: 6988...  Training loss: 0.9653...  0.1175 sec/batch
Epoch: 16/20...  Training Step: 6989...  Training loss: 0.9294...  0.1159 sec/batch
Epoch: 16/20...  Training Step: 6990...  Training loss: 1.0076...  0.1172 sec/batch
Epoch: 16/20...  Training Step: 6991...  Training loss: 0.9812...  0.1167 sec/batch
Epoch: 16/20...  Training Step: 6992...  Training loss: 0.8331...  0.1224 sec/batch
Epoch: 16/20...  Training Step: 6993...  Training loss: 0.9562...  0.1190 sec/batch
Epoch: 16/20...  Training Step: 6994...  Training loss: 0.9095...  0.1189 sec/batch
Epoch: 16/20...  Training Step: 6995...  Training loss: 0.8529...  0.1183 sec/batch
Epoch: 16/20...  Training Step: 6996...  Training loss: 0.8459...  0.1180 sec/batch
Epoch: 16/20...  Training Step: 6997...  Training loss: 0.9255...  0.1183 sec/batch
Epoch: 16/20...  Training Step: 6998...  Training loss: 0.8733...  0.1188 sec/batch
Epoch: 16/20...  Training Step: 6999...  Training loss: 0.8411...  0.1188 sec/batch
Epoch: 16/20...  Training Step: 7000...  Training loss: 1.2394...  0.1137 sec/batch
Epoch: 16/20...  Training Step: 7001...  Training loss: 0.9696...  0.1195 sec/batch
Epoch: 16/20...  Training Step: 7002...  Training loss: 0.9215...  0.1175 sec/batch
Epoch: 16/20...  Training Step: 7003...  Training loss: 1.0692...  0.1204 sec/batch
Epoch: 16/20...  Training Step: 7004...  Training loss: 0.8447...  0.1126 sec/batch
Epoch: 16/20...  Training Step: 7005...  Training loss: 0.9486...  0.1207 sec/batch
Epoch: 16/20...  Training Step: 7006...  Training loss: 0.8775...  0.1159 sec/batch
Epoch: 16/20...  Training Step: 7007...  Training loss: 0.9330...  0.1152 sec/batch
Epoch: 16/20...  Training Step: 7008...  Training loss: 1.0170...  0.1156 sec/batch
Epoch: 16/20...  Training Step: 7009...  Training loss: 0.8771...  0.1181 sec/batch
Epoch: 16/20...  Training Step: 7010...  Training loss: 0.9803...  0.1192 sec/batch
Epoch: 16/20...  Training Step: 7011...  Training loss: 0.9992...  0.1208 sec/batch
Epoch: 16/20...  Training Step: 7012...  Training loss: 0.9405...  0.1144 sec/batch
Epoch: 16/20...  Training Step: 7013...  Training loss: 0.9195...  0.1185 sec/batch
Epoch: 16/20...  Training Step: 7014...  Training loss: 0.9362...  0.1134 sec/batch
Epoch: 16/20...  Training Step: 7015...  Training loss: 0.8977...  0.1178 sec/batch
Epoch: 16/20...  Training Step: 7016...  Training loss: 0.9771...  0.1142 sec/batch
Epoch: 16/20...  Training Step: 7017...  Training loss: 0.9191...  0.1199 sec/batch
Epoch: 16/20...  Training Step: 7018...  Training loss: 1.0372...  0.1164 sec/batch
Epoch: 16/20...  Training Step: 7019...  Training loss: 0.8353...  0.1191 sec/batch
Epoch: 16/20...  Training Step: 7020...  Training loss: 0.8787...  0.1177 sec/batch
Epoch: 16/20...  Training Step: 7021...  Training loss: 0.9051...  0.1150 sec/batch
Epoch: 16/20...  Training Step: 7022...  Training loss: 1.0401...  0.1161 sec/batch
Epoch: 16/20...  Training Step: 7023...  Training loss: 0.9227...  0.1165 sec/batch
Epoch: 16/20...  Training Step: 7024...  Training loss: 0.9573...  0.1178 sec/batch
Epoch: 16/20...  Training Step: 7025...  Training loss: 0.9478...  0.1159 sec/batch
Epoch: 16/20...  Training Step: 7026...  Training loss: 1.1726...  0.1155 sec/batch
Epoch: 16/20...  Training Step: 7027...  Training loss: 0.9931...  0.1180 sec/batch
Epoch: 16/20...  Training Step: 7028...  Training loss: 0.9751...  0.1182 sec/batch
Epoch: 16/20...  Training Step: 7029...  Training loss: 0.9094...  0.1135 sec/batch
Epoch: 16/20...  Training Step: 7030...  Training loss: 0.9701...  0.1223 sec/batch
Epoch: 16/20...  Training Step: 7031...  Training loss: 1.0832...  0.1175 sec/batch
Epoch: 16/20...  Training Step: 7032...  Training loss: 0.9279...  0.1173 sec/batch
Epoch: 16/20...  Training Step: 7033...  Training loss: 0.9121...  0.1177 sec/batch
Epoch: 16/20...  Training Step: 7034...  Training loss: 0.8121...  0.1197 sec/batch
Epoch: 16/20...  Training Step: 7035...  Training loss: 1.1808...  0.1157 sec/batch
Epoch: 16/20...  Training Step: 7036...  Training loss: 0.8829...  0.1135 sec/batch
Epoch: 16/20...  Training Step: 7037...  Training loss: 0.9465...  0.1123 sec/batch
Epoch: 16/20...  Training Step: 7038...  Training loss: 0.9284...  0.1222 sec/batch
Epoch: 16/20...  Training Step: 7039...  Training loss: 1.0453...  0.1182 sec/batch
Epoch: 16/20...  Training Step: 7040...  Training loss: 0.8917...  0.1209 sec/batch
Epoch: 16/20...  Training Step: 7041...  Training loss: 1.2090...  0.1170 sec/batch
Epoch: 16/20...  Training Step: 7042...  Training loss: 0.9012...  0.1149 sec/batch
Epoch: 16/20...  Training Step: 7043...  Training loss: 0.8297...  0.1192 sec/batch
Epoch: 16/20...  Training Step: 7044...  Training loss: 1.0218...  0.1236 sec/batch
Epoch: 16/20...  Training Step: 7045...  Training loss: 0.9748...  0.1294 sec/batch
Epoch: 16/20...  Training Step: 7046...  Training loss: 1.1056...  0.1172 sec/batch
Epoch: 16/20...  Training Step: 7047...  Training loss: 0.9351...  0.1185 sec/batch
Epoch: 16/20...  Training Step: 7048...  Training loss: 1.1500...  0.1165 sec/batch
Epoch: 16/20...  Training Step: 7049...  Training loss: 1.0241...  0.1148 sec/batch
Epoch: 16/20...  Training Step: 7050...  Training loss: 0.9663...  0.1124 sec/batch
Epoch: 16/20...  Training Step: 7051...  Training loss: 1.0666...  0.1171 sec/batch
Epoch: 16/20...  Training Step: 7052...  Training loss: 1.1449...  0.1119 sec/batch
Epoch: 16/20...  Training Step: 7053...  Training loss: 0.8723...  0.1130 sec/batch
Epoch: 16/20...  Training Step: 7054...  Training loss: 1.2180...  0.1191 sec/batch
Epoch: 16/20...  Training Step: 7055...  Training loss: 1.0186...  0.1196 sec/batch
Epoch: 16/20...  Training Step: 7056...  Training loss: 0.9323...  0.1184 sec/batch
Epoch: 16/20...  Training Step: 7057...  Training loss: 1.1748...  0.1187 sec/batch
Epoch: 16/20...  Training Step: 7058...  Training loss: 1.0486...  0.1222 sec/batch
Epoch: 16/20...  Training Step: 7059...  Training loss: 1.1499...  0.1130 sec/batch
Epoch: 16/20...  Training Step: 7060...  Training loss: 1.0585...  0.1169 sec/batch
Epoch: 16/20...  Training Step: 7061...  Training loss: 1.0631...  0.1194 sec/batch
Epoch: 16/20...  Training Step: 7062...  Training loss: 1.1187...  0.1189 sec/batch
Epoch: 16/20...  Training Step: 7063...  Training loss: 1.1889...  0.1177 sec/batch
Epoch: 16/20...  Training Step: 7064...  Training loss: 1.0728...  0.1149 sec/batch
Epoch: 16/20...  Training Step: 7065...  Training loss: 1.0737...  0.1190 sec/batch
Epoch: 16/20...  Training Step: 7066...  Training loss: 1.1936...  0.1165 sec/batch
Epoch: 16/20...  Training Step: 7067...  Training loss: 0.9580...  0.1197 sec/batch
Epoch: 16/20...  Training Step: 7068...  Training loss: 1.0927...  0.1185 sec/batch
Epoch: 16/20...  Training Step: 7069...  Training loss: 1.0151...  0.1181 sec/batch
Epoch: 16/20...  Training Step: 7070...  Training loss: 1.0413...  0.1151 sec/batch
Epoch: 16/20...  Training Step: 7071...  Training loss: 1.0231...  0.1181 sec/batch
Epoch: 16/20...  Training Step: 7072...  Training loss: 0.8874...  0.1139 sec/batch
Epoch: 16/20...  Training Step: 7073...  Training loss: 1.0379...  0.1177 sec/batch
Epoch: 16/20...  Training Step: 7074...  Training loss: 1.0661...  0.1184 sec/batch
Epoch: 16/20...  Training Step: 7075...  Training loss: 0.9258...  0.1185 sec/batch
Epoch: 16/20...  Training Step: 7076...  Training loss: 0.8496...  0.1199 sec/batch
Epoch: 16/20...  Training Step: 7077...  Training loss: 0.9543...  0.1189 sec/batch
Epoch: 16/20...  Training Step: 7078...  Training loss: 1.0415...  0.1176 sec/batch
Epoch: 16/20...  Training Step: 7079...  Training loss: 1.0289...  0.1179 sec/batch
Epoch: 16/20...  Training Step: 7080...  Training loss: 0.9020...  0.1188 sec/batch
Epoch: 16/20...  Training Step: 7081...  Training loss: 1.0283...  0.1190 sec/batch
Epoch: 16/20...  Training Step: 7082...  Training loss: 1.0413...  0.1182 sec/batch
Epoch: 16/20...  Training Step: 7083...  Training loss: 1.0489...  0.1181 sec/batch
Epoch: 16/20...  Training Step: 7084...  Training loss: 1.0675...  0.1193 sec/batch
Epoch: 16/20...  Training Step: 7085...  Training loss: 1.0815...  0.1158 sec/batch
Epoch: 16/20...  Training Step: 7086...  Training loss: 0.9366...  0.1155 sec/batch
Epoch: 16/20...  Training Step: 7087...  Training loss: 1.0213...  0.1197 sec/batch
Epoch: 16/20...  Training Step: 7088...  Training loss: 1.0737...  0.1172 sec/batch
Epoch: 16/20...  Training Step: 7089...  Training loss: 1.0044...  0.1200 sec/batch
Epoch: 16/20...  Training Step: 7090...  Training loss: 0.9933...  0.1159 sec/batch
Epoch: 16/20...  Training Step: 7091...  Training loss: 1.2071...  0.1170 sec/batch
Epoch: 16/20...  Training Step: 7092...  Training loss: 0.9217...  0.1159 sec/batch
Epoch: 16/20...  Training Step: 7093...  Training loss: 0.9370...  0.1185 sec/batch
Epoch: 16/20...  Training Step: 7094...  Training loss: 1.0313...  0.1228 sec/batch
Epoch: 16/20...  Training Step: 7095...  Training loss: 0.9422...  0.1143 sec/batch
Epoch: 16/20...  Training Step: 7096...  Training loss: 0.9349...  0.1196 sec/batch
Epoch: 16/20...  Training Step: 7097...  Training loss: 0.8812...  0.1171 sec/batch
Epoch: 16/20...  Training Step: 7098...  Training loss: 0.9308...  0.1225 sec/batch
Epoch: 16/20...  Training Step: 7099...  Training loss: 0.9520...  0.1340 sec/batch
Epoch: 16/20...  Training Step: 7100...  Training loss: 0.9668...  0.1261 sec/batch
Epoch: 16/20...  Training Step: 7101...  Training loss: 0.7827...  0.1275 sec/batch
Epoch: 16/20...  Training Step: 7102...  Training loss: 0.9841...  0.1224 sec/batch
Epoch: 16/20...  Training Step: 7103...  Training loss: 0.8243...  0.1286 sec/batch
Epoch: 16/20...  Training Step: 7104...  Training loss: 1.0372...  0.1219 sec/batch
Epoch: 16/20...  Training Step: 7105...  Training loss: 0.9583...  0.1272 sec/batch
Epoch: 16/20...  Training Step: 7106...  Training loss: 0.9429...  0.1237 sec/batch
Epoch: 16/20...  Training Step: 7107...  Training loss: 1.0338...  0.1239 sec/batch
Epoch: 16/20...  Training Step: 7108...  Training loss: 0.9952...  0.1244 sec/batch
Epoch: 16/20...  Training Step: 7109...  Training loss: 0.8094...  0.1233 sec/batch
Epoch: 16/20...  Training Step: 7110...  Training loss: 0.9154...  0.1228 sec/batch
Epoch: 16/20...  Training Step: 7111...  Training loss: 0.9617...  0.1239 sec/batch
Epoch: 16/20...  Training Step: 7112...  Training loss: 1.0545...  0.1246 sec/batch
Epoch: 16/20...  Training Step: 7113...  Training loss: 1.0915...  0.1236 sec/batch
Epoch: 16/20...  Training Step: 7114...  Training loss: 0.9728...  0.1235 sec/batch
Epoch: 16/20...  Training Step: 7115...  Training loss: 0.9007...  0.1263 sec/batch
Epoch: 16/20...  Training Step: 7116...  Training loss: 0.8959...  0.1202 sec/batch
Epoch: 16/20...  Training Step: 7117...  Training loss: 1.0533...  0.1254 sec/batch
Epoch: 16/20...  Training Step: 7118...  Training loss: 0.9299...  0.1194 sec/batch
Epoch: 16/20...  Training Step: 7119...  Training loss: 0.9442...  0.1229 sec/batch
Epoch: 16/20...  Training Step: 7120...  Training loss: 0.8549...  0.1228 sec/batch
Epoch: 16/20...  Training Step: 7121...  Training loss: 0.9460...  0.1273 sec/batch
Epoch: 16/20...  Training Step: 7122...  Training loss: 1.0103...  0.1211 sec/batch
Epoch: 16/20...  Training Step: 7123...  Training loss: 1.3024...  0.1205 sec/batch
Epoch: 16/20...  Training Step: 7124...  Training loss: 0.9533...  0.1197 sec/batch
Epoch: 16/20...  Training Step: 7125...  Training loss: 0.9239...  0.1252 sec/batch
Epoch: 16/20...  Training Step: 7126...  Training loss: 0.8062...  0.1191 sec/batch
Epoch: 16/20...  Training Step: 7127...  Training loss: 0.9346...  0.1290 sec/batch
Epoch: 16/20...  Training Step: 7128...  Training loss: 1.0651...  0.1245 sec/batch
Epoch: 16/20...  Training Step: 7129...  Training loss: 0.9690...  0.1180 sec/batch
Epoch: 16/20...  Training Step: 7130...  Training loss: 0.9410...  0.1169 sec/batch
Epoch: 16/20...  Training Step: 7131...  Training loss: 1.0013...  0.1214 sec/batch
Epoch: 16/20...  Training Step: 7132...  Training loss: 1.1654...  0.1216 sec/batch
Epoch: 16/20...  Training Step: 7133...  Training loss: 0.9002...  0.1200 sec/batch
Epoch: 16/20...  Training Step: 7134...  Training loss: 0.8454...  0.1288 sec/batch
Epoch: 16/20...  Training Step: 7135...  Training loss: 1.1436...  0.1191 sec/batch
Epoch: 16/20...  Training Step: 7136...  Training loss: 0.8330...  0.1236 sec/batch
Epoch: 16/20...  Training Step: 7137...  Training loss: 0.8549...  0.1247 sec/batch
Epoch: 16/20...  Training Step: 7138...  Training loss: 1.0686...  0.1226 sec/batch
Epoch: 16/20...  Training Step: 7139...  Training loss: 0.8185...  0.1226 sec/batch
Epoch: 16/20...  Training Step: 7140...  Training loss: 0.9772...  0.1178 sec/batch
Epoch: 16/20...  Training Step: 7141...  Training loss: 0.8435...  0.1206 sec/batch
Epoch: 16/20...  Training Step: 7142...  Training loss: 1.1695...  0.1162 sec/batch
Epoch: 16/20...  Training Step: 7143...  Training loss: 1.1011...  0.1248 sec/batch
Epoch: 16/20...  Training Step: 7144...  Training loss: 0.9327...  0.1221 sec/batch
Epoch: 16/20...  Training Step: 7145...  Training loss: 1.0942...  0.1189 sec/batch
Epoch: 16/20...  Training Step: 7146...  Training loss: 1.0482...  0.1267 sec/batch
Epoch: 16/20...  Training Step: 7147...  Training loss: 1.1672...  0.1255 sec/batch
Epoch: 16/20...  Training Step: 7148...  Training loss: 0.9443...  0.1216 sec/batch
Epoch: 16/20...  Training Step: 7149...  Training loss: 1.1316...  0.1198 sec/batch
Epoch: 16/20...  Training Step: 7150...  Training loss: 1.0162...  0.1199 sec/batch
Epoch: 16/20...  Training Step: 7151...  Training loss: 0.8867...  0.1246 sec/batch
Epoch: 16/20...  Training Step: 7152...  Training loss: 0.9985...  0.1201 sec/batch
Epoch: 16/20...  Training Step: 7153...  Training loss: 0.9455...  0.1188 sec/batch
Epoch: 16/20...  Training Step: 7154...  Training loss: 0.9223...  0.1189 sec/batch
Epoch: 16/20...  Training Step: 7155...  Training loss: 0.9996...  0.1212 sec/batch
Epoch: 16/20...  Training Step: 7156...  Training loss: 1.0858...  0.1293 sec/batch
Epoch: 16/20...  Training Step: 7157...  Training loss: 0.9614...  0.1246 sec/batch
Epoch: 16/20...  Training Step: 7158...  Training loss: 1.0139...  0.1215 sec/batch
Epoch: 16/20...  Training Step: 7159...  Training loss: 0.7818...  0.1306 sec/batch
Epoch: 16/20...  Training Step: 7160...  Training loss: 0.8248...  0.1190 sec/batch
Epoch: 16/20...  Training Step: 7161...  Training loss: 0.8844...  0.1212 sec/batch
Epoch: 16/20...  Training Step: 7162...  Training loss: 0.9583...  0.1274 sec/batch
Epoch: 16/20...  Training Step: 7163...  Training loss: 0.9800...  0.1221 sec/batch
Epoch: 16/20...  Training Step: 7164...  Training loss: 1.2071...  0.1216 sec/batch
Epoch: 16/20...  Training Step: 7165...  Training loss: 0.8434...  0.1235 sec/batch
Epoch: 16/20...  Training Step: 7166...  Training loss: 0.9767...  0.1222 sec/batch
Epoch: 16/20...  Training Step: 7167...  Training loss: 1.0140...  0.1266 sec/batch
Epoch: 16/20...  Training Step: 7168...  Training loss: 0.9341...  0.1307 sec/batch
Epoch: 16/20...  Training Step: 7169...  Training loss: 1.0838...  0.1258 sec/batch
Epoch: 16/20...  Training Step: 7170...  Training loss: 0.9163...  0.1237 sec/batch
Epoch: 16/20...  Training Step: 7171...  Training loss: 0.9695...  0.1241 sec/batch
Epoch: 16/20...  Training Step: 7172...  Training loss: 1.1038...  0.1277 sec/batch
Epoch: 16/20...  Training Step: 7173...  Training loss: 0.9695...  0.1253 sec/batch
Epoch: 16/20...  Training Step: 7174...  Training loss: 0.9735...  0.1232 sec/batch
Epoch: 16/20...  Training Step: 7175...  Training loss: 1.0980...  0.1265 sec/batch
Epoch: 16/20...  Training Step: 7176...  Training loss: 0.9503...  0.1235 sec/batch
Epoch: 16/20...  Training Step: 7177...  Training loss: 1.0635...  0.1182 sec/batch
Epoch: 16/20...  Training Step: 7178...  Training loss: 1.1084...  0.1182 sec/batch
Epoch: 16/20...  Training Step: 7179...  Training loss: 1.1657...  0.1194 sec/batch
Epoch: 16/20...  Training Step: 7180...  Training loss: 1.0264...  0.1206 sec/batch
Epoch: 16/20...  Training Step: 7181...  Training loss: 0.8898...  0.1172 sec/batch
Epoch: 16/20...  Training Step: 7182...  Training loss: 1.2179...  0.1160 sec/batch
Epoch: 16/20...  Training Step: 7183...  Training loss: 0.9448...  0.1178 sec/batch
Epoch: 16/20...  Training Step: 7184...  Training loss: 1.2049...  0.1175 sec/batch
Epoch: 16/20...  Training Step: 7185...  Training loss: 1.0032...  0.1196 sec/batch
Epoch: 16/20...  Training Step: 7186...  Training loss: 1.0960...  0.1190 sec/batch
Epoch: 16/20...  Training Step: 7187...  Training loss: 1.1483...  0.1336 sec/batch
Epoch: 16/20...  Training Step: 7188...  Training loss: 1.0206...  0.1303 sec/batch
Epoch: 16/20...  Training Step: 7189...  Training loss: 1.1042...  0.1249 sec/batch
Epoch: 16/20...  Training Step: 7190...  Training loss: 0.9747...  0.1164 sec/batch
Epoch: 16/20...  Training Step: 7191...  Training loss: 1.0922...  0.1162 sec/batch
Epoch: 16/20...  Training Step: 7192...  Training loss: 0.9913...  0.1140 sec/batch
Epoch: 16/20...  Training Step: 7193...  Training loss: 1.2074...  0.1168 sec/batch
Epoch: 16/20...  Training Step: 7194...  Training loss: 1.1012...  0.1170 sec/batch
Epoch: 16/20...  Training Step: 7195...  Training loss: 1.1730...  0.1150 sec/batch
Epoch: 16/20...  Training Step: 7196...  Training loss: 0.9284...  0.1156 sec/batch
Epoch: 16/20...  Training Step: 7197...  Training loss: 1.1750...  0.1141 sec/batch
Epoch: 16/20...  Training Step: 7198...  Training loss: 0.8527...  0.1202 sec/batch
Epoch: 16/20...  Training Step: 7199...  Training loss: 1.0429...  0.1179 sec/batch
Epoch: 16/20...  Training Step: 7200...  Training loss: 1.0536...  0.1156 sec/batch
Epoch: 16/20...  Training Step: 7201...  Training loss: 0.9911...  0.1185 sec/batch
Epoch: 16/20...  Training Step: 7202...  Training loss: 1.0055...  0.1168 sec/batch
Epoch: 16/20...  Training Step: 7203...  Training loss: 1.1098...  0.1146 sec/batch
Epoch: 16/20...  Training Step: 7204...  Training loss: 0.9554...  0.1138 sec/batch
Epoch: 16/20...  Training Step: 7205...  Training loss: 0.9940...  0.1171 sec/batch
Epoch: 16/20...  Training Step: 7206...  Training loss: 0.9573...  0.1244 sec/batch
Epoch: 16/20...  Training Step: 7207...  Training loss: 0.9461...  0.1246 sec/batch
Epoch: 16/20...  Training Step: 7208...  Training loss: 1.1863...  0.1207 sec/batch
Epoch: 16/20...  Training Step: 7209...  Training loss: 0.9815...  0.1241 sec/batch
Epoch: 16/20...  Training Step: 7210...  Training loss: 1.0252...  0.1136 sec/batch
Epoch: 16/20...  Training Step: 7211...  Training loss: 1.1695...  0.1219 sec/batch
Epoch: 16/20...  Training Step: 7212...  Training loss: 0.9241...  0.1195 sec/batch
Epoch: 16/20...  Training Step: 7213...  Training loss: 0.9655...  0.1180 sec/batch
Epoch: 16/20...  Training Step: 7214...  Training loss: 1.0326...  0.1193 sec/batch
Epoch: 16/20...  Training Step: 7215...  Training loss: 0.9465...  0.1136 sec/batch
Epoch: 16/20...  Training Step: 7216...  Training loss: 0.9759...  0.1158 sec/batch
Epoch: 16/20...  Training Step: 7217...  Training loss: 1.1148...  0.1198 sec/batch
Epoch: 16/20...  Training Step: 7218...  Training loss: 0.9243...  0.1431 sec/batch
Epoch: 16/20...  Training Step: 7219...  Training loss: 0.8535...  0.1384 sec/batch
Epoch: 16/20...  Training Step: 7220...  Training loss: 0.9929...  0.1255 sec/batch
Epoch: 16/20...  Training Step: 7221...  Training loss: 1.0345...  0.1350 sec/batch
Epoch: 16/20...  Training Step: 7222...  Training loss: 1.0050...  0.1346 sec/batch
Epoch: 16/20...  Training Step: 7223...  Training loss: 1.0223...  0.1266 sec/batch
Epoch: 16/20...  Training Step: 7224...  Training loss: 1.0488...  0.1184 sec/batch
Epoch: 16/20...  Training Step: 7225...  Training loss: 1.1503...  0.1234 sec/batch
Epoch: 16/20...  Training Step: 7226...  Training loss: 0.9873...  0.1238 sec/batch
Epoch: 16/20...  Training Step: 7227...  Training loss: 1.2493...  0.1235 sec/batch
Epoch: 16/20...  Training Step: 7228...  Training loss: 1.0929...  0.1216 sec/batch
Epoch: 16/20...  Training Step: 7229...  Training loss: 1.0527...  0.1214 sec/batch
Epoch: 16/20...  Training Step: 7230...  Training loss: 1.2109...  0.1150 sec/batch
Epoch: 16/20...  Training Step: 7231...  Training loss: 0.9655...  0.1213 sec/batch
Epoch: 16/20...  Training Step: 7232...  Training loss: 1.0385...  0.1204 sec/batch
Epoch: 16/20...  Training Step: 7233...  Training loss: 1.0869...  0.1164 sec/batch
Epoch: 16/20...  Training Step: 7234...  Training loss: 1.0442...  0.1170 sec/batch
Epoch: 16/20...  Training Step: 7235...  Training loss: 1.1196...  0.1183 sec/batch
Epoch: 16/20...  Training Step: 7236...  Training loss: 0.9849...  0.1207 sec/batch
Epoch: 16/20...  Training Step: 7237...  Training loss: 0.8516...  0.1195 sec/batch
Epoch: 16/20...  Training Step: 7238...  Training loss: 1.1631...  0.1162 sec/batch
Epoch: 16/20...  Training Step: 7239...  Training loss: 1.0645...  0.1198 sec/batch
Epoch: 16/20...  Training Step: 7240...  Training loss: 0.9901...  0.1188 sec/batch
Epoch: 16/20...  Training Step: 7241...  Training loss: 0.9352...  0.1184 sec/batch
Epoch: 16/20...  Training Step: 7242...  Training loss: 1.0522...  0.1174 sec/batch
Epoch: 16/20...  Training Step: 7243...  Training loss: 0.9754...  0.1189 sec/batch
Epoch: 16/20...  Training Step: 7244...  Training loss: 1.0874...  0.1175 sec/batch
Epoch: 16/20...  Training Step: 7245...  Training loss: 0.9472...  0.1178 sec/batch
Epoch: 16/20...  Training Step: 7246...  Training loss: 0.9805...  0.1208 sec/batch
Epoch: 16/20...  Training Step: 7247...  Training loss: 1.0668...  0.1168 sec/batch
Epoch: 16/20...  Training Step: 7248...  Training loss: 1.0769...  0.1185 sec/batch
Epoch: 16/20...  Training Step: 7249...  Training loss: 1.1234...  0.1195 sec/batch
Epoch: 16/20...  Training Step: 7250...  Training loss: 1.1183...  0.1167 sec/batch
Epoch: 16/20...  Training Step: 7251...  Training loss: 1.0462...  0.1186 sec/batch
Epoch: 16/20...  Training Step: 7252...  Training loss: 0.9488...  0.1147 sec/batch
Epoch: 16/20...  Training Step: 7253...  Training loss: 0.9421...  0.1156 sec/batch
Epoch: 16/20...  Training Step: 7254...  Training loss: 1.0472...  0.1205 sec/batch
Epoch: 16/20...  Training Step: 7255...  Training loss: 1.0568...  0.1188 sec/batch
Epoch: 16/20...  Training Step: 7256...  Training loss: 1.2393...  0.1143 sec/batch
Epoch: 16/20...  Training Step: 7257...  Training loss: 1.0126...  0.1160 sec/batch
Epoch: 16/20...  Training Step: 7258...  Training loss: 0.9098...  0.1285 sec/batch
Epoch: 16/20...  Training Step: 7259...  Training loss: 1.0679...  0.1116 sec/batch
Epoch: 16/20...  Training Step: 7260...  Training loss: 1.0590...  0.1185 sec/batch
Epoch: 16/20...  Training Step: 7261...  Training loss: 1.0165...  0.1168 sec/batch
Epoch: 16/20...  Training Step: 7262...  Training loss: 1.0519...  0.1198 sec/batch
Epoch: 16/20...  Training Step: 7263...  Training loss: 0.8759...  0.1173 sec/batch
Epoch: 16/20...  Training Step: 7264...  Training loss: 1.2913...  0.1201 sec/batch
Epoch: 16/20...  Training Step: 7265...  Training loss: 1.0032...  0.1148 sec/batch
Epoch: 16/20...  Training Step: 7266...  Training loss: 1.2126...  0.1164 sec/batch
Epoch: 16/20...  Training Step: 7267...  Training loss: 0.9678...  0.1215 sec/batch
Epoch: 16/20...  Training Step: 7268...  Training loss: 1.2948...  0.1144 sec/batch
Epoch: 16/20...  Training Step: 7269...  Training loss: 1.1026...  0.1156 sec/batch
Epoch: 16/20...  Training Step: 7270...  Training loss: 0.9990...  0.1161 sec/batch
Epoch: 16/20...  Training Step: 7271...  Training loss: 1.0190...  0.1204 sec/batch
Epoch: 16/20...  Training Step: 7272...  Training loss: 0.8508...  0.1188 sec/batch
Epoch: 16/20...  Training Step: 7273...  Training loss: 0.7859...  0.1172 sec/batch
Epoch: 16/20...  Training Step: 7274...  Training loss: 0.8772...  0.1159 sec/batch
Epoch: 16/20...  Training Step: 7275...  Training loss: 0.9595...  0.1178 sec/batch
Epoch: 16/20...  Training Step: 7276...  Training loss: 0.9077...  0.1224 sec/batch
Epoch: 16/20...  Training Step: 7277...  Training loss: 0.8664...  0.1187 sec/batch
Epoch: 16/20...  Training Step: 7278...  Training loss: 0.9955...  0.1236 sec/batch
Epoch: 16/20...  Training Step: 7279...  Training loss: 0.7814...  0.1257 sec/batch
Epoch: 16/20...  Training Step: 7280...  Training loss: 0.8483...  0.1331 sec/batch
Epoch: 16/20...  Training Step: 7281...  Training loss: 0.8619...  0.1257 sec/batch
Epoch: 16/20...  Training Step: 7282...  Training loss: 1.1923...  0.1252 sec/batch
Epoch: 16/20...  Training Step: 7283...  Training loss: 0.8776...  0.1277 sec/batch
Epoch: 16/20...  Training Step: 7284...  Training loss: 0.9697...  0.1180 sec/batch
Epoch: 16/20...  Training Step: 7285...  Training loss: 0.9111...  0.1188 sec/batch
Epoch: 16/20...  Training Step: 7286...  Training loss: 0.8335...  0.1148 sec/batch
Epoch: 16/20...  Training Step: 7287...  Training loss: 0.8900...  0.1181 sec/batch
Epoch: 16/20...  Training Step: 7288...  Training loss: 0.9816...  0.1202 sec/batch
Epoch: 16/20...  Training Step: 7289...  Training loss: 1.1606...  0.1162 sec/batch
Epoch: 16/20...  Training Step: 7290...  Training loss: 0.9877...  0.1129 sec/batch
Epoch: 16/20...  Training Step: 7291...  Training loss: 1.1471...  0.1214 sec/batch
Epoch: 16/20...  Training Step: 7292...  Training loss: 1.0699...  0.1196 sec/batch
Epoch: 16/20...  Training Step: 7293...  Training loss: 0.9670...  0.1168 sec/batch
Epoch: 16/20...  Training Step: 7294...  Training loss: 0.9260...  0.1187 sec/batch
Epoch: 16/20...  Training Step: 7295...  Training loss: 0.9859...  0.1206 sec/batch
Epoch: 16/20...  Training Step: 7296...  Training loss: 0.9927...  0.1200 sec/batch
Epoch: 16/20...  Training Step: 7297...  Training loss: 0.8566...  0.1190 sec/batch
Epoch: 16/20...  Training Step: 7298...  Training loss: 0.8312...  0.1145 sec/batch
Epoch: 16/20...  Training Step: 7299...  Training loss: 1.0638...  0.1186 sec/batch
Epoch: 16/20...  Training Step: 7300...  Training loss: 0.9062...  0.1160 sec/batch
Epoch: 16/20...  Training Step: 7301...  Training loss: 0.9216...  0.1201 sec/batch
Epoch: 16/20...  Training Step: 7302...  Training loss: 1.0963...  0.1174 sec/batch
Epoch: 16/20...  Training Step: 7303...  Training loss: 0.8319...  0.1171 sec/batch
Epoch: 16/20...  Training Step: 7304...  Training loss: 1.0693...  0.1137 sec/batch
Epoch: 16/20...  Training Step: 7305...  Training loss: 0.9685...  0.1235 sec/batch
Epoch: 16/20...  Training Step: 7306...  Training loss: 0.8661...  0.1169 sec/batch
Epoch: 16/20...  Training Step: 7307...  Training loss: 0.8549...  0.1161 sec/batch
Epoch: 16/20...  Training Step: 7308...  Training loss: 1.0942...  0.1177 sec/batch
Epoch: 16/20...  Training Step: 7309...  Training loss: 1.0093...  0.1197 sec/batch
Epoch: 16/20...  Training Step: 7310...  Training loss: 0.9761...  0.1193 sec/batch
Epoch: 16/20...  Training Step: 7311...  Training loss: 0.8739...  0.1138 sec/batch
Epoch: 16/20...  Training Step: 7312...  Training loss: 0.9419...  0.1201 sec/batch
Epoch: 16/20...  Training Step: 7313...  Training loss: 0.8492...  0.1232 sec/batch
Epoch: 16/20...  Training Step: 7314...  Training loss: 0.7005...  0.1172 sec/batch
Epoch: 16/20...  Training Step: 7315...  Training loss: 1.0550...  0.1221 sec/batch
Epoch: 16/20...  Training Step: 7316...  Training loss: 0.9364...  0.1202 sec/batch
Epoch: 16/20...  Training Step: 7317...  Training loss: 0.8346...  0.1207 sec/batch
Epoch: 16/20...  Training Step: 7318...  Training loss: 1.0312...  0.1216 sec/batch
Epoch: 16/20...  Training Step: 7319...  Training loss: 1.1805...  0.1203 sec/batch
Epoch: 16/20...  Training Step: 7320...  Training loss: 0.7913...  0.1230 sec/batch
Epoch: 16/20...  Training Step: 7321...  Training loss: 1.0203...  0.1175 sec/batch
Epoch: 16/20...  Training Step: 7322...  Training loss: 1.0855...  0.1163 sec/batch
Epoch: 16/20...  Training Step: 7323...  Training loss: 0.8778...  0.1206 sec/batch
Epoch: 16/20...  Training Step: 7324...  Training loss: 0.9792...  0.1206 sec/batch
Epoch: 16/20...  Training Step: 7325...  Training loss: 0.9376...  0.1221 sec/batch
Epoch: 16/20...  Training Step: 7326...  Training loss: 0.9029...  0.1177 sec/batch
Epoch: 16/20...  Training Step: 7327...  Training loss: 1.0093...  0.1202 sec/batch
Epoch: 16/20...  Training Step: 7328...  Training loss: 1.0112...  0.1192 sec/batch
Epoch: 16/20...  Training Step: 7329...  Training loss: 1.0603...  0.1174 sec/batch
Epoch: 16/20...  Training Step: 7330...  Training loss: 1.0764...  0.1163 sec/batch
Epoch: 16/20...  Training Step: 7331...  Training loss: 0.7596...  0.1187 sec/batch
Epoch: 16/20...  Training Step: 7332...  Training loss: 0.9766...  0.1137 sec/batch
Epoch: 16/20...  Training Step: 7333...  Training loss: 0.9030...  0.1182 sec/batch
Epoch: 16/20...  Training Step: 7334...  Training loss: 0.9787...  0.1228 sec/batch
Epoch: 16/20...  Training Step: 7335...  Training loss: 0.9047...  0.1183 sec/batch
Epoch: 16/20...  Training Step: 7336...  Training loss: 0.9618...  0.1157 sec/batch
Epoch: 16/20...  Training Step: 7337...  Training loss: 1.0378...  0.1187 sec/batch
Epoch: 16/20...  Training Step: 7338...  Training loss: 1.0733...  0.1198 sec/batch
Epoch: 16/20...  Training Step: 7339...  Training loss: 0.9802...  0.1160 sec/batch
Epoch: 16/20...  Training Step: 7340...  Training loss: 1.0639...  0.1176 sec/batch
Epoch: 16/20...  Training Step: 7341...  Training loss: 0.9540...  0.1196 sec/batch
Epoch: 16/20...  Training Step: 7342...  Training loss: 0.9072...  0.1201 sec/batch
Epoch: 16/20...  Training Step: 7343...  Training loss: 0.9127...  0.1219 sec/batch
Epoch: 16/20...  Training Step: 7344...  Training loss: 1.0027...  0.1198 sec/batch
Epoch: 16/20...  Training Step: 7345...  Training loss: 0.8694...  0.1188 sec/batch
Epoch: 16/20...  Training Step: 7346...  Training loss: 1.0554...  0.1222 sec/batch
Epoch: 16/20...  Training Step: 7347...  Training loss: 1.0891...  0.1141 sec/batch
Epoch: 16/20...  Training Step: 7348...  Training loss: 0.8809...  0.1161 sec/batch
Epoch: 16/20...  Training Step: 7349...  Training loss: 0.8647...  0.1237 sec/batch
Epoch: 16/20...  Training Step: 7350...  Training loss: 0.9802...  0.1178 sec/batch
Epoch: 16/20...  Training Step: 7351...  Training loss: 0.9163...  0.1159 sec/batch
Epoch: 16/20...  Training Step: 7352...  Training loss: 0.8882...  0.1183 sec/batch
Epoch: 16/20...  Training Step: 7353...  Training loss: 0.9980...  0.1202 sec/batch
Epoch: 16/20...  Training Step: 7354...  Training loss: 0.8858...  0.1231 sec/batch
Epoch: 16/20...  Training Step: 7355...  Training loss: 0.9462...  0.1160 sec/batch
Epoch: 16/20...  Training Step: 7356...  Training loss: 1.1218...  0.1218 sec/batch
Epoch: 16/20...  Training Step: 7357...  Training loss: 0.9369...  0.1194 sec/batch
Epoch: 16/20...  Training Step: 7358...  Training loss: 1.0149...  0.1193 sec/batch
Epoch: 16/20...  Training Step: 7359...  Training loss: 1.1552...  0.1158 sec/batch
Epoch: 16/20...  Training Step: 7360...  Training loss: 1.0324...  0.1197 sec/batch
Epoch: 16/20...  Training Step: 7361...  Training loss: 0.8413...  0.1164 sec/batch
Epoch: 16/20...  Training Step: 7362...  Training loss: 0.9688...  0.1191 sec/batch
Epoch: 16/20...  Training Step: 7363...  Training loss: 1.0105...  0.1179 sec/batch
Epoch: 16/20...  Training Step: 7364...  Training loss: 1.0749...  0.1178 sec/batch
Epoch: 16/20...  Training Step: 7365...  Training loss: 0.8610...  0.1163 sec/batch
Epoch: 16/20...  Training Step: 7366...  Training loss: 1.1139...  0.1218 sec/batch
Epoch: 16/20...  Training Step: 7367...  Training loss: 0.9920...  0.1181 sec/batch
Epoch: 16/20...  Training Step: 7368...  Training loss: 1.1791...  0.1149 sec/batch
Epoch: 16/20...  Training Step: 7369...  Training loss: 1.0172...  0.1206 sec/batch
Epoch: 16/20...  Training Step: 7370...  Training loss: 0.9526...  0.1171 sec/batch
Epoch: 16/20...  Training Step: 7371...  Training loss: 0.8940...  0.1164 sec/batch
Epoch: 16/20...  Training Step: 7372...  Training loss: 0.9337...  0.1171 sec/batch
Epoch: 16/20...  Training Step: 7373...  Training loss: 1.0722...  0.1147 sec/batch
Epoch: 16/20...  Training Step: 7374...  Training loss: 0.8702...  0.1182 sec/batch
Epoch: 16/20...  Training Step: 7375...  Training loss: 1.0802...  0.1170 sec/batch
Epoch: 16/20...  Training Step: 7376...  Training loss: 1.1055...  0.1230 sec/batch
Epoch: 16/20...  Training Step: 7377...  Training loss: 1.0352...  0.1189 sec/batch
Epoch: 16/20...  Training Step: 7378...  Training loss: 0.8461...  0.1175 sec/batch
Epoch: 16/20...  Training Step: 7379...  Training loss: 0.8477...  0.1220 sec/batch
Epoch: 16/20...  Training Step: 7380...  Training loss: 0.8587...  0.1188 sec/batch
Epoch: 16/20...  Training Step: 7381...  Training loss: 1.0856...  0.1155 sec/batch
Epoch: 16/20...  Training Step: 7382...  Training loss: 0.9543...  0.1185 sec/batch
Epoch: 16/20...  Training Step: 7383...  Training loss: 0.9332...  0.1181 sec/batch
Epoch: 16/20...  Training Step: 7384...  Training loss: 0.9917...  0.1140 sec/batch
Epoch: 16/20...  Training Step: 7385...  Training loss: 0.8651...  0.1204 sec/batch
Epoch: 16/20...  Training Step: 7386...  Training loss: 0.9978...  0.1184 sec/batch
Epoch: 16/20...  Training Step: 7387...  Training loss: 0.9609...  0.1213 sec/batch
Epoch: 16/20...  Training Step: 7388...  Training loss: 0.9847...  0.1156 sec/batch
Epoch: 16/20...  Training Step: 7389...  Training loss: 0.8744...  0.1150 sec/batch
Epoch: 16/20...  Training Step: 7390...  Training loss: 0.9034...  0.1195 sec/batch
Epoch: 16/20...  Training Step: 7391...  Training loss: 1.0747...  0.1202 sec/batch
Epoch: 16/20...  Training Step: 7392...  Training loss: 0.9331...  0.1174 sec/batch
Epoch: 16/20...  Training Step: 7393...  Training loss: 1.1351...  0.1206 sec/batch
Epoch: 16/20...  Training Step: 7394...  Training loss: 1.1514...  0.1191 sec/batch
Epoch: 16/20...  Training Step: 7395...  Training loss: 0.9472...  0.1164 sec/batch
Epoch: 16/20...  Training Step: 7396...  Training loss: 1.0009...  0.1190 sec/batch
Epoch: 16/20...  Training Step: 7397...  Training loss: 0.8792...  0.1190 sec/batch
Epoch: 16/20...  Training Step: 7398...  Training loss: 1.0517...  0.1180 sec/batch
Epoch: 16/20...  Training Step: 7399...  Training loss: 1.1396...  0.1165 sec/batch
Epoch: 16/20...  Training Step: 7400...  Training loss: 1.0700...  0.1141 sec/batch
Epoch: 16/20...  Training Step: 7401...  Training loss: 0.8898...  0.1220 sec/batch
Epoch: 16/20...  Training Step: 7402...  Training loss: 0.9373...  0.1180 sec/batch
Epoch: 16/20...  Training Step: 7403...  Training loss: 0.9424...  0.1145 sec/batch
Epoch: 16/20...  Training Step: 7404...  Training loss: 0.9334...  0.1184 sec/batch
Epoch: 16/20...  Training Step: 7405...  Training loss: 0.9794...  0.1311 sec/batch
Epoch: 16/20...  Training Step: 7406...  Training loss: 0.8790...  0.1259 sec/batch
Epoch: 16/20...  Training Step: 7407...  Training loss: 1.0081...  0.1206 sec/batch
Epoch: 16/20...  Training Step: 7408...  Training loss: 0.8851...  0.1200 sec/batch
Epoch: 16/20...  Training Step: 7409...  Training loss: 1.0115...  0.1204 sec/batch
Epoch: 16/20...  Training Step: 7410...  Training loss: 1.0029...  0.1269 sec/batch
Epoch: 16/20...  Training Step: 7411...  Training loss: 0.7710...  0.1273 sec/batch
Epoch: 16/20...  Training Step: 7412...  Training loss: 0.9725...  0.1207 sec/batch
Epoch: 16/20...  Training Step: 7413...  Training loss: 0.8879...  0.1260 sec/batch
Epoch: 16/20...  Training Step: 7414...  Training loss: 1.0535...  0.1238 sec/batch
Epoch: 16/20...  Training Step: 7415...  Training loss: 0.9410...  0.1201 sec/batch
Epoch: 16/20...  Training Step: 7416...  Training loss: 0.8912...  0.1203 sec/batch
Epoch: 16/20...  Training Step: 7417...  Training loss: 1.0432...  0.1255 sec/batch
Epoch: 16/20...  Training Step: 7418...  Training loss: 0.8465...  0.1327 sec/batch
Epoch: 16/20...  Training Step: 7419...  Training loss: 0.8933...  0.1288 sec/batch
Epoch: 16/20...  Training Step: 7420...  Training loss: 0.9438...  0.1233 sec/batch
Epoch: 16/20...  Training Step: 7421...  Training loss: 0.9202...  0.1296 sec/batch
Epoch: 16/20...  Training Step: 7422...  Training loss: 0.8877...  0.1273 sec/batch
Epoch: 16/20...  Training Step: 7423...  Training loss: 0.9089...  0.1278 sec/batch
Epoch: 16/20...  Training Step: 7424...  Training loss: 0.9856...  0.1247 sec/batch
Epoch: 17/20...  Training Step: 7425...  Training loss: 1.0333...  0.1220 sec/batch
Epoch: 17/20...  Training Step: 7426...  Training loss: 1.1775...  0.1244 sec/batch
Epoch: 17/20...  Training Step: 7427...  Training loss: 0.9914...  0.1266 sec/batch
Epoch: 17/20...  Training Step: 7428...  Training loss: 0.9352...  0.1262 sec/batch
Epoch: 17/20...  Training Step: 7429...  Training loss: 1.0677...  0.1172 sec/batch
Epoch: 17/20...  Training Step: 7430...  Training loss: 0.9514...  0.1308 sec/batch
Epoch: 17/20...  Training Step: 7431...  Training loss: 1.0281...  0.1258 sec/batch
Epoch: 17/20...  Training Step: 7432...  Training loss: 0.9520...  0.1268 sec/batch
Epoch: 17/20...  Training Step: 7433...  Training loss: 0.8031...  0.1223 sec/batch
Epoch: 17/20...  Training Step: 7434...  Training loss: 0.9197...  0.1221 sec/batch
Epoch: 17/20...  Training Step: 7435...  Training loss: 0.9625...  0.1261 sec/batch
Epoch: 17/20...  Training Step: 7436...  Training loss: 0.8654...  0.1219 sec/batch
Epoch: 17/20...  Training Step: 7437...  Training loss: 1.1076...  0.1271 sec/batch
Epoch: 17/20...  Training Step: 7438...  Training loss: 0.8309...  0.1173 sec/batch
Epoch: 17/20...  Training Step: 7439...  Training loss: 1.0039...  0.1215 sec/batch
Epoch: 17/20...  Training Step: 7440...  Training loss: 1.0778...  0.1243 sec/batch
Epoch: 17/20...  Training Step: 7441...  Training loss: 0.9140...  0.1210 sec/batch
Epoch: 17/20...  Training Step: 7442...  Training loss: 0.8599...  0.1175 sec/batch
Epoch: 17/20...  Training Step: 7443...  Training loss: 0.9571...  0.1181 sec/batch
Epoch: 17/20...  Training Step: 7444...  Training loss: 0.8579...  0.1194 sec/batch
Epoch: 17/20...  Training Step: 7445...  Training loss: 1.0841...  0.1180 sec/batch
Epoch: 17/20...  Training Step: 7446...  Training loss: 0.9871...  0.1148 sec/batch
Epoch: 17/20...  Training Step: 7447...  Training loss: 0.9422...  0.1233 sec/batch
Epoch: 17/20...  Training Step: 7448...  Training loss: 1.0039...  0.1142 sec/batch
Epoch: 17/20...  Training Step: 7449...  Training loss: 0.9928...  0.1170 sec/batch
Epoch: 17/20...  Training Step: 7450...  Training loss: 1.0169...  0.1189 sec/batch
Epoch: 17/20...  Training Step: 7451...  Training loss: 0.9827...  0.1190 sec/batch
Epoch: 17/20...  Training Step: 7452...  Training loss: 0.9841...  0.1193 sec/batch
Epoch: 17/20...  Training Step: 7453...  Training loss: 0.9129...  0.1175 sec/batch
Epoch: 17/20...  Training Step: 7454...  Training loss: 1.0231...  0.1215 sec/batch
Epoch: 17/20...  Training Step: 7455...  Training loss: 0.9552...  0.1184 sec/batch
Epoch: 17/20...  Training Step: 7456...  Training loss: 0.8649...  0.1169 sec/batch
Epoch: 17/20...  Training Step: 7457...  Training loss: 0.8092...  0.1186 sec/batch
Epoch: 17/20...  Training Step: 7458...  Training loss: 0.9689...  0.1168 sec/batch
Epoch: 17/20...  Training Step: 7459...  Training loss: 0.9043...  0.1220 sec/batch
Epoch: 17/20...  Training Step: 7460...  Training loss: 0.9120...  0.1142 sec/batch
Epoch: 17/20...  Training Step: 7461...  Training loss: 0.9656...  0.1165 sec/batch
Epoch: 17/20...  Training Step: 7462...  Training loss: 0.8794...  0.1206 sec/batch
Epoch: 17/20...  Training Step: 7463...  Training loss: 0.9485...  0.1180 sec/batch
Epoch: 17/20...  Training Step: 7464...  Training loss: 1.2767...  0.1182 sec/batch
Epoch: 17/20...  Training Step: 7465...  Training loss: 0.8201...  0.1178 sec/batch
Epoch: 17/20...  Training Step: 7466...  Training loss: 0.8500...  0.1212 sec/batch
Epoch: 17/20...  Training Step: 7467...  Training loss: 1.1227...  0.1184 sec/batch
Epoch: 17/20...  Training Step: 7468...  Training loss: 0.7817...  0.1165 sec/batch
Epoch: 17/20...  Training Step: 7469...  Training loss: 0.9291...  0.1159 sec/batch
Epoch: 17/20...  Training Step: 7470...  Training loss: 0.9822...  0.1169 sec/batch
Epoch: 17/20...  Training Step: 7471...  Training loss: 1.1247...  0.1154 sec/batch
Epoch: 17/20...  Training Step: 7472...  Training loss: 1.0248...  0.1177 sec/batch
Epoch: 17/20...  Training Step: 7473...  Training loss: 0.8430...  0.1199 sec/batch
Epoch: 17/20...  Training Step: 7474...  Training loss: 0.9110...  0.1227 sec/batch
Epoch: 17/20...  Training Step: 7475...  Training loss: 0.9535...  0.1184 sec/batch
Epoch: 17/20...  Training Step: 7476...  Training loss: 0.8898...  0.1205 sec/batch
Epoch: 17/20...  Training Step: 7477...  Training loss: 0.8979...  0.1208 sec/batch
Epoch: 17/20...  Training Step: 7478...  Training loss: 0.8340...  0.1216 sec/batch
Epoch: 17/20...  Training Step: 7479...  Training loss: 0.8538...  0.1209 sec/batch
Epoch: 17/20...  Training Step: 7480...  Training loss: 0.8215...  0.1163 sec/batch
Epoch: 17/20...  Training Step: 7481...  Training loss: 0.8283...  0.1212 sec/batch
Epoch: 17/20...  Training Step: 7482...  Training loss: 1.0017...  0.1181 sec/batch
Epoch: 17/20...  Training Step: 7483...  Training loss: 0.9329...  0.1187 sec/batch
Epoch: 17/20...  Training Step: 7484...  Training loss: 0.9490...  0.1171 sec/batch
Epoch: 17/20...  Training Step: 7485...  Training loss: 0.8781...  0.1191 sec/batch
Epoch: 17/20...  Training Step: 7486...  Training loss: 1.1409...  0.1158 sec/batch
Epoch: 17/20...  Training Step: 7487...  Training loss: 0.8490...  0.1185 sec/batch
Epoch: 17/20...  Training Step: 7488...  Training loss: 1.0230...  0.1165 sec/batch
Epoch: 17/20...  Training Step: 7489...  Training loss: 0.8984...  0.1154 sec/batch
Epoch: 17/20...  Training Step: 7490...  Training loss: 0.9909...  0.1194 sec/batch
Epoch: 17/20...  Training Step: 7491...  Training loss: 0.9871...  0.1216 sec/batch
Epoch: 17/20...  Training Step: 7492...  Training loss: 1.0095...  0.1171 sec/batch
Epoch: 17/20...  Training Step: 7493...  Training loss: 0.8722...  0.1160 sec/batch
Epoch: 17/20...  Training Step: 7494...  Training loss: 1.0246...  0.1218 sec/batch
Epoch: 17/20...  Training Step: 7495...  Training loss: 1.1678...  0.1153 sec/batch
Epoch: 17/20...  Training Step: 7496...  Training loss: 0.7873...  0.1183 sec/batch
Epoch: 17/20...  Training Step: 7497...  Training loss: 0.9370...  0.1182 sec/batch
Epoch: 17/20...  Training Step: 7498...  Training loss: 0.7969...  0.1179 sec/batch
Epoch: 17/20...  Training Step: 7499...  Training loss: 1.0537...  0.1205 sec/batch
Epoch: 17/20...  Training Step: 7500...  Training loss: 0.8932...  0.1171 sec/batch
Epoch: 17/20...  Training Step: 7501...  Training loss: 0.9333...  0.1171 sec/batch
Epoch: 17/20...  Training Step: 7502...  Training loss: 0.9720...  0.1200 sec/batch
Epoch: 17/20...  Training Step: 7503...  Training loss: 0.9630...  0.1212 sec/batch
Epoch: 17/20...  Training Step: 7504...  Training loss: 0.8621...  0.1142 sec/batch
Epoch: 17/20...  Training Step: 7505...  Training loss: 1.0115...  0.1167 sec/batch
Epoch: 17/20...  Training Step: 7506...  Training loss: 1.1031...  0.1188 sec/batch
Epoch: 17/20...  Training Step: 7507...  Training loss: 0.8903...  0.1154 sec/batch
Epoch: 17/20...  Training Step: 7508...  Training loss: 1.0580...  0.1232 sec/batch
Epoch: 17/20...  Training Step: 7509...  Training loss: 0.9510...  0.1159 sec/batch
Epoch: 17/20...  Training Step: 7510...  Training loss: 1.0550...  0.1162 sec/batch
Epoch: 17/20...  Training Step: 7511...  Training loss: 0.8593...  0.1167 sec/batch
Epoch: 17/20...  Training Step: 7512...  Training loss: 1.0617...  0.1196 sec/batch
Epoch: 17/20...  Training Step: 7513...  Training loss: 0.9661...  0.1198 sec/batch
Epoch: 17/20...  Training Step: 7514...  Training loss: 0.9131...  0.1186 sec/batch
Epoch: 17/20...  Training Step: 7515...  Training loss: 1.0762...  0.1183 sec/batch
Epoch: 17/20...  Training Step: 7516...  Training loss: 1.0090...  0.1205 sec/batch
Epoch: 17/20...  Training Step: 7517...  Training loss: 0.9239...  0.1211 sec/batch
Epoch: 17/20...  Training Step: 7518...  Training loss: 1.0661...  0.1188 sec/batch
Epoch: 17/20...  Training Step: 7519...  Training loss: 0.9472...  0.1161 sec/batch
Epoch: 17/20...  Training Step: 7520...  Training loss: 0.8851...  0.1160 sec/batch
Epoch: 17/20...  Training Step: 7521...  Training loss: 1.0625...  0.1194 sec/batch
Epoch: 17/20...  Training Step: 7522...  Training loss: 1.0531...  0.1190 sec/batch
Epoch: 17/20...  Training Step: 7523...  Training loss: 1.0132...  0.1201 sec/batch
Epoch: 17/20...  Training Step: 7524...  Training loss: 1.0897...  0.1235 sec/batch
Epoch: 17/20...  Training Step: 7525...  Training loss: 0.9534...  0.1158 sec/batch
Epoch: 17/20...  Training Step: 7526...  Training loss: 1.0157...  0.1130 sec/batch
Epoch: 17/20...  Training Step: 7527...  Training loss: 1.0468...  0.1164 sec/batch
Epoch: 17/20...  Training Step: 7528...  Training loss: 1.0618...  0.1160 sec/batch
Epoch: 17/20...  Training Step: 7529...  Training loss: 1.0175...  0.1183 sec/batch
Epoch: 17/20...  Training Step: 7530...  Training loss: 1.0706...  0.1178 sec/batch
Epoch: 17/20...  Training Step: 7531...  Training loss: 0.7300...  0.1151 sec/batch
Epoch: 17/20...  Training Step: 7532...  Training loss: 1.0010...  0.1157 sec/batch
Epoch: 17/20...  Training Step: 7533...  Training loss: 1.0156...  0.1206 sec/batch
Epoch: 17/20...  Training Step: 7534...  Training loss: 0.9551...  0.1145 sec/batch
Epoch: 17/20...  Training Step: 7535...  Training loss: 0.9636...  0.1186 sec/batch
Epoch: 17/20...  Training Step: 7536...  Training loss: 0.9775...  0.1210 sec/batch
Epoch: 17/20...  Training Step: 7537...  Training loss: 0.9876...  0.1163 sec/batch
Epoch: 17/20...  Training Step: 7538...  Training loss: 0.9509...  0.1184 sec/batch
Epoch: 17/20...  Training Step: 7539...  Training loss: 0.9307...  0.1219 sec/batch
Epoch: 17/20...  Training Step: 7540...  Training loss: 0.9075...  0.1214 sec/batch
Epoch: 17/20...  Training Step: 7541...  Training loss: 0.9341...  0.1191 sec/batch
Epoch: 17/20...  Training Step: 7542...  Training loss: 0.8804...  0.1190 sec/batch
Epoch: 17/20...  Training Step: 7543...  Training loss: 0.9352...  0.1235 sec/batch
Epoch: 17/20...  Training Step: 7544...  Training loss: 0.8452...  0.1187 sec/batch
Epoch: 17/20...  Training Step: 7545...  Training loss: 0.9517...  0.1157 sec/batch
Epoch: 17/20...  Training Step: 7546...  Training loss: 1.0174...  0.1205 sec/batch
Epoch: 17/20...  Training Step: 7547...  Training loss: 1.0147...  0.1185 sec/batch
Epoch: 17/20...  Training Step: 7548...  Training loss: 0.9275...  0.1180 sec/batch
Epoch: 17/20...  Training Step: 7549...  Training loss: 1.0263...  0.1191 sec/batch
Epoch: 17/20...  Training Step: 7550...  Training loss: 0.8176...  0.1168 sec/batch
Epoch: 17/20...  Training Step: 7551...  Training loss: 0.9501...  0.1190 sec/batch
Epoch: 17/20...  Training Step: 7552...  Training loss: 1.0168...  0.1191 sec/batch
Epoch: 17/20...  Training Step: 7553...  Training loss: 1.0768...  0.1165 sec/batch
Epoch: 17/20...  Training Step: 7554...  Training loss: 0.8834...  0.1186 sec/batch
Epoch: 17/20...  Training Step: 7555...  Training loss: 1.0707...  0.1167 sec/batch
Epoch: 17/20...  Training Step: 7556...  Training loss: 0.9973...  0.1155 sec/batch
Epoch: 17/20...  Training Step: 7557...  Training loss: 0.9818...  0.1221 sec/batch
Epoch: 17/20...  Training Step: 7558...  Training loss: 1.0164...  0.1209 sec/batch
Epoch: 17/20...  Training Step: 7559...  Training loss: 0.9536...  0.1183 sec/batch
Epoch: 17/20...  Training Step: 7560...  Training loss: 0.8337...  0.1149 sec/batch
Epoch: 17/20...  Training Step: 7561...  Training loss: 0.7724...  0.1192 sec/batch
Epoch: 17/20...  Training Step: 7562...  Training loss: 0.9340...  0.1217 sec/batch
Epoch: 17/20...  Training Step: 7563...  Training loss: 0.8227...  0.1179 sec/batch
Epoch: 17/20...  Training Step: 7564...  Training loss: 0.7714...  0.1182 sec/batch
Epoch: 17/20...  Training Step: 7565...  Training loss: 0.7822...  0.1154 sec/batch
Epoch: 17/20...  Training Step: 7566...  Training loss: 0.8526...  0.1178 sec/batch
Epoch: 17/20...  Training Step: 7567...  Training loss: 0.9067...  0.1173 sec/batch
Epoch: 17/20...  Training Step: 7568...  Training loss: 0.9889...  0.1184 sec/batch
Epoch: 17/20...  Training Step: 7569...  Training loss: 1.0468...  0.1177 sec/batch
Epoch: 17/20...  Training Step: 7570...  Training loss: 0.8339...  0.1187 sec/batch
Epoch: 17/20...  Training Step: 7571...  Training loss: 0.9876...  0.1184 sec/batch
Epoch: 17/20...  Training Step: 7572...  Training loss: 0.8769...  0.1224 sec/batch
Epoch: 17/20...  Training Step: 7573...  Training loss: 0.8354...  0.1141 sec/batch
Epoch: 17/20...  Training Step: 7574...  Training loss: 0.9154...  0.1185 sec/batch
Epoch: 17/20...  Training Step: 7575...  Training loss: 0.8165...  0.1219 sec/batch
Epoch: 17/20...  Training Step: 7576...  Training loss: 1.0832...  0.1199 sec/batch
Epoch: 17/20...  Training Step: 7577...  Training loss: 1.0626...  0.1166 sec/batch
Epoch: 17/20...  Training Step: 7578...  Training loss: 1.0009...  0.1181 sec/batch
Epoch: 17/20...  Training Step: 7579...  Training loss: 0.8916...  0.1199 sec/batch
Epoch: 17/20...  Training Step: 7580...  Training loss: 0.8597...  0.1212 sec/batch
Epoch: 17/20...  Training Step: 7581...  Training loss: 1.0149...  0.1188 sec/batch
Epoch: 17/20...  Training Step: 7582...  Training loss: 0.9623...  0.1242 sec/batch
Epoch: 17/20...  Training Step: 7583...  Training loss: 0.8451...  0.1219 sec/batch
Epoch: 17/20...  Training Step: 7584...  Training loss: 0.8855...  0.1187 sec/batch
Epoch: 17/20...  Training Step: 7585...  Training loss: 0.9494...  0.1171 sec/batch
Epoch: 17/20...  Training Step: 7586...  Training loss: 0.9350...  0.1203 sec/batch
Epoch: 17/20...  Training Step: 7587...  Training loss: 1.0929...  0.1216 sec/batch
Epoch: 17/20...  Training Step: 7588...  Training loss: 0.8620...  0.1190 sec/batch
Epoch: 17/20...  Training Step: 7589...  Training loss: 0.9157...  0.1166 sec/batch
Epoch: 17/20...  Training Step: 7590...  Training loss: 1.0346...  0.1166 sec/batch
Epoch: 17/20...  Training Step: 7591...  Training loss: 0.7260...  0.1230 sec/batch
Epoch: 17/20...  Training Step: 7592...  Training loss: 1.1059...  0.1165 sec/batch
Epoch: 17/20...  Training Step: 7593...  Training loss: 0.8651...  0.1192 sec/batch
Epoch: 17/20...  Training Step: 7594...  Training loss: 0.8713...  0.1210 sec/batch
Epoch: 17/20...  Training Step: 7595...  Training loss: 1.0930...  0.1181 sec/batch
Epoch: 17/20...  Training Step: 7596...  Training loss: 1.0123...  0.1137 sec/batch
Epoch: 17/20...  Training Step: 7597...  Training loss: 0.8744...  0.1164 sec/batch
Epoch: 17/20...  Training Step: 7598...  Training loss: 0.9491...  0.1181 sec/batch
Epoch: 17/20...  Training Step: 7599...  Training loss: 1.0495...  0.1183 sec/batch
Epoch: 17/20...  Training Step: 7600...  Training loss: 0.9545...  0.1193 sec/batch
Epoch: 17/20...  Training Step: 7601...  Training loss: 0.8928...  0.1191 sec/batch
Epoch: 17/20...  Training Step: 7602...  Training loss: 1.0662...  0.1163 sec/batch
Epoch: 17/20...  Training Step: 7603...  Training loss: 0.7664...  0.1240 sec/batch
Epoch: 17/20...  Training Step: 7604...  Training loss: 1.0224...  0.1168 sec/batch
Epoch: 17/20...  Training Step: 7605...  Training loss: 0.7197...  0.1161 sec/batch
Epoch: 17/20...  Training Step: 7606...  Training loss: 1.1342...  0.1157 sec/batch
Epoch: 17/20...  Training Step: 7607...  Training loss: 1.0307...  0.1183 sec/batch
Epoch: 17/20...  Training Step: 7608...  Training loss: 0.9343...  0.1167 sec/batch
Epoch: 17/20...  Training Step: 7609...  Training loss: 0.9865...  0.1215 sec/batch
Epoch: 17/20...  Training Step: 7610...  Training loss: 1.0059...  0.1162 sec/batch
Epoch: 17/20...  Training Step: 7611...  Training loss: 0.9068...  0.1168 sec/batch
Epoch: 17/20...  Training Step: 7612...  Training loss: 0.8240...  0.1190 sec/batch
Epoch: 17/20...  Training Step: 7613...  Training loss: 1.0441...  0.1164 sec/batch
Epoch: 17/20...  Training Step: 7614...  Training loss: 0.8881...  0.1155 sec/batch
Epoch: 17/20...  Training Step: 7615...  Training loss: 0.9123...  0.1178 sec/batch
Epoch: 17/20...  Training Step: 7616...  Training loss: 0.9527...  0.1125 sec/batch
Epoch: 17/20...  Training Step: 7617...  Training loss: 0.9239...  0.1192 sec/batch
Epoch: 17/20...  Training Step: 7618...  Training loss: 0.8655...  0.1207 sec/batch
Epoch: 17/20...  Training Step: 7619...  Training loss: 0.9059...  0.1188 sec/batch
Epoch: 17/20...  Training Step: 7620...  Training loss: 1.0119...  0.1210 sec/batch
Epoch: 17/20...  Training Step: 7621...  Training loss: 0.9645...  0.1203 sec/batch
Epoch: 17/20...  Training Step: 7622...  Training loss: 0.9696...  0.1148 sec/batch
Epoch: 17/20...  Training Step: 7623...  Training loss: 0.7758...  0.1192 sec/batch
Epoch: 17/20...  Training Step: 7624...  Training loss: 0.8919...  0.1176 sec/batch
Epoch: 17/20...  Training Step: 7625...  Training loss: 1.0184...  0.1181 sec/batch
Epoch: 17/20...  Training Step: 7626...  Training loss: 1.0663...  0.1162 sec/batch
Epoch: 17/20...  Training Step: 7627...  Training loss: 0.9039...  0.1150 sec/batch
Epoch: 17/20...  Training Step: 7628...  Training loss: 1.1074...  0.1201 sec/batch
Epoch: 17/20...  Training Step: 7629...  Training loss: 0.7958...  0.1154 sec/batch
Epoch: 17/20...  Training Step: 7630...  Training loss: 0.9493...  0.1201 sec/batch
Epoch: 17/20...  Training Step: 7631...  Training loss: 0.8707...  0.1219 sec/batch
Epoch: 17/20...  Training Step: 7632...  Training loss: 0.9724...  0.1155 sec/batch
Epoch: 17/20...  Training Step: 7633...  Training loss: 0.9428...  0.1165 sec/batch
Epoch: 17/20...  Training Step: 7634...  Training loss: 0.7927...  0.1145 sec/batch
Epoch: 17/20...  Training Step: 7635...  Training loss: 0.9360...  0.1224 sec/batch
Epoch: 17/20...  Training Step: 7636...  Training loss: 1.0147...  0.1120 sec/batch
Epoch: 17/20...  Training Step: 7637...  Training loss: 0.9434...  0.1190 sec/batch
Epoch: 17/20...  Training Step: 7638...  Training loss: 0.8344...  0.1212 sec/batch
Epoch: 17/20...  Training Step: 7639...  Training loss: 0.9031...  0.1204 sec/batch
Epoch: 17/20...  Training Step: 7640...  Training loss: 0.9628...  0.1165 sec/batch
Epoch: 17/20...  Training Step: 7641...  Training loss: 0.9721...  0.1218 sec/batch
Epoch: 17/20...  Training Step: 7642...  Training loss: 1.0787...  0.1185 sec/batch
Epoch: 17/20...  Training Step: 7643...  Training loss: 1.1125...  0.1138 sec/batch
Epoch: 17/20...  Training Step: 7644...  Training loss: 0.9989...  0.1145 sec/batch
Epoch: 17/20...  Training Step: 7645...  Training loss: 0.8999...  0.1239 sec/batch
Epoch: 17/20...  Training Step: 7646...  Training loss: 1.2745...  0.1186 sec/batch
Epoch: 17/20...  Training Step: 7647...  Training loss: 1.0430...  0.1196 sec/batch
Epoch: 17/20...  Training Step: 7648...  Training loss: 1.1943...  0.1157 sec/batch
Epoch: 17/20...  Training Step: 7649...  Training loss: 1.0093...  0.1144 sec/batch
Epoch: 17/20...  Training Step: 7650...  Training loss: 1.2891...  0.1198 sec/batch
Epoch: 17/20...  Training Step: 7651...  Training loss: 1.0854...  0.1125 sec/batch
Epoch: 17/20...  Training Step: 7652...  Training loss: 0.9130...  0.1216 sec/batch
Epoch: 17/20...  Training Step: 7653...  Training loss: 1.0676...  0.1166 sec/batch
Epoch: 17/20...  Training Step: 7654...  Training loss: 0.9209...  0.1145 sec/batch
Epoch: 17/20...  Training Step: 7655...  Training loss: 0.9432...  0.1258 sec/batch
Epoch: 17/20...  Training Step: 7656...  Training loss: 1.0736...  0.1207 sec/batch
Epoch: 17/20...  Training Step: 7657...  Training loss: 1.1605...  0.1197 sec/batch
Epoch: 17/20...  Training Step: 7658...  Training loss: 0.9957...  0.1172 sec/batch
Epoch: 17/20...  Training Step: 7659...  Training loss: 0.9871...  0.1168 sec/batch
Epoch: 17/20...  Training Step: 7660...  Training loss: 0.9200...  0.1134 sec/batch
Epoch: 17/20...  Training Step: 7661...  Training loss: 1.0561...  0.1168 sec/batch
Epoch: 17/20...  Training Step: 7662...  Training loss: 0.8203...  0.1156 sec/batch
Epoch: 17/20...  Training Step: 7663...  Training loss: 0.9567...  0.1168 sec/batch
Epoch: 17/20...  Training Step: 7664...  Training loss: 0.9583...  0.1212 sec/batch
Epoch: 17/20...  Training Step: 7665...  Training loss: 0.8857...  0.1217 sec/batch
Epoch: 17/20...  Training Step: 7666...  Training loss: 0.9042...  0.1216 sec/batch
Epoch: 17/20...  Training Step: 7667...  Training loss: 1.0988...  0.1191 sec/batch
Epoch: 17/20...  Training Step: 7668...  Training loss: 0.9539...  0.1149 sec/batch
Epoch: 17/20...  Training Step: 7669...  Training loss: 1.0620...  0.1153 sec/batch
Epoch: 17/20...  Training Step: 7670...  Training loss: 0.8340...  0.1207 sec/batch
Epoch: 17/20...  Training Step: 7671...  Training loss: 0.9603...  0.1194 sec/batch
Epoch: 17/20...  Training Step: 7672...  Training loss: 1.1377...  0.1191 sec/batch
Epoch: 17/20...  Training Step: 7673...  Training loss: 1.0218...  0.1155 sec/batch
Epoch: 17/20...  Training Step: 7674...  Training loss: 0.8680...  0.1185 sec/batch
Epoch: 17/20...  Training Step: 7675...  Training loss: 1.0277...  0.1187 sec/batch
Epoch: 17/20...  Training Step: 7676...  Training loss: 0.9630...  0.1185 sec/batch
Epoch: 17/20...  Training Step: 7677...  Training loss: 0.9364...  0.1162 sec/batch
Epoch: 17/20...  Training Step: 7678...  Training loss: 1.0068...  0.1147 sec/batch
Epoch: 17/20...  Training Step: 7679...  Training loss: 0.8872...  0.1169 sec/batch
Epoch: 17/20...  Training Step: 7680...  Training loss: 1.0560...  0.1180 sec/batch
Epoch: 17/20...  Training Step: 7681...  Training loss: 0.9233...  0.1174 sec/batch
Epoch: 17/20...  Training Step: 7682...  Training loss: 0.9237...  0.1167 sec/batch
Epoch: 17/20...  Training Step: 7683...  Training loss: 0.7914...  0.1186 sec/batch
Epoch: 17/20...  Training Step: 7684...  Training loss: 0.9460...  0.1170 sec/batch
Epoch: 17/20...  Training Step: 7685...  Training loss: 1.0508...  0.1182 sec/batch
Epoch: 17/20...  Training Step: 7686...  Training loss: 1.0089...  0.1246 sec/batch
Epoch: 17/20...  Training Step: 7687...  Training loss: 1.0545...  0.1272 sec/batch
Epoch: 17/20...  Training Step: 7688...  Training loss: 1.0533...  0.1428 sec/batch
Epoch: 17/20...  Training Step: 7689...  Training loss: 1.0566...  0.1436 sec/batch
Epoch: 17/20...  Training Step: 7690...  Training loss: 1.0239...  0.1250 sec/batch
Epoch: 17/20...  Training Step: 7691...  Training loss: 1.0952...  0.1226 sec/batch
Epoch: 17/20...  Training Step: 7692...  Training loss: 1.1392...  0.1285 sec/batch
Epoch: 17/20...  Training Step: 7693...  Training loss: 1.0355...  0.1224 sec/batch
Epoch: 17/20...  Training Step: 7694...  Training loss: 1.0973...  0.1127 sec/batch
Epoch: 17/20...  Training Step: 7695...  Training loss: 0.9231...  0.1186 sec/batch
Epoch: 17/20...  Training Step: 7696...  Training loss: 1.1138...  0.1139 sec/batch
Epoch: 17/20...  Training Step: 7697...  Training loss: 1.1840...  0.1154 sec/batch
Epoch: 17/20...  Training Step: 7698...  Training loss: 1.0642...  0.1183 sec/batch
Epoch: 17/20...  Training Step: 7699...  Training loss: 1.0121...  0.1218 sec/batch
Epoch: 17/20...  Training Step: 7700...  Training loss: 0.9120...  0.1706 sec/batch
Epoch: 17/20...  Training Step: 7701...  Training loss: 0.9362...  0.1575 sec/batch
Epoch: 17/20...  Training Step: 7702...  Training loss: 1.1631...  0.1387 sec/batch
Epoch: 17/20...  Training Step: 7703...  Training loss: 0.9457...  0.1363 sec/batch
Epoch: 17/20...  Training Step: 7704...  Training loss: 0.8650...  0.1383 sec/batch
Epoch: 17/20...  Training Step: 7705...  Training loss: 0.9386...  0.1288 sec/batch
Epoch: 17/20...  Training Step: 7706...  Training loss: 1.1486...  0.1293 sec/batch
Epoch: 17/20...  Training Step: 7707...  Training loss: 0.9780...  0.1361 sec/batch
Epoch: 17/20...  Training Step: 7708...  Training loss: 1.0553...  0.1321 sec/batch
Epoch: 17/20...  Training Step: 7709...  Training loss: 0.8693...  0.1198 sec/batch
Epoch: 17/20...  Training Step: 7710...  Training loss: 0.9172...  0.1239 sec/batch
Epoch: 17/20...  Training Step: 7711...  Training loss: 1.0286...  0.1208 sec/batch
Epoch: 17/20...  Training Step: 7712...  Training loss: 1.0101...  0.1175 sec/batch
Epoch: 17/20...  Training Step: 7713...  Training loss: 0.9967...  0.1129 sec/batch
Epoch: 17/20...  Training Step: 7714...  Training loss: 1.0746...  0.1134 sec/batch
Epoch: 17/20...  Training Step: 7715...  Training loss: 1.0987...  0.1136 sec/batch
Epoch: 17/20...  Training Step: 7716...  Training loss: 0.9198...  0.1126 sec/batch
Epoch: 17/20...  Training Step: 7717...  Training loss: 0.9158...  0.1145 sec/batch
Epoch: 17/20...  Training Step: 7718...  Training loss: 0.9746...  0.1147 sec/batch
Epoch: 17/20...  Training Step: 7719...  Training loss: 0.9864...  0.1162 sec/batch
Epoch: 17/20...  Training Step: 7720...  Training loss: 1.1914...  0.1141 sec/batch
Epoch: 17/20...  Training Step: 7721...  Training loss: 0.8432...  0.1139 sec/batch
Epoch: 17/20...  Training Step: 7722...  Training loss: 0.8635...  0.1166 sec/batch
Epoch: 17/20...  Training Step: 7723...  Training loss: 0.9918...  0.1171 sec/batch
Epoch: 17/20...  Training Step: 7724...  Training loss: 1.0455...  0.1153 sec/batch
Epoch: 17/20...  Training Step: 7725...  Training loss: 1.0136...  0.1135 sec/batch
Epoch: 17/20...  Training Step: 7726...  Training loss: 0.9810...  0.1159 sec/batch
Epoch: 17/20...  Training Step: 7727...  Training loss: 0.8493...  0.1177 sec/batch
Epoch: 17/20...  Training Step: 7728...  Training loss: 1.2489...  0.1187 sec/batch
Epoch: 17/20...  Training Step: 7729...  Training loss: 0.8705...  0.1209 sec/batch
Epoch: 17/20...  Training Step: 7730...  Training loss: 0.9788...  0.1185 sec/batch
Epoch: 17/20...  Training Step: 7731...  Training loss: 1.0159...  0.1163 sec/batch
Epoch: 17/20...  Training Step: 7732...  Training loss: 1.1089...  0.1208 sec/batch
Epoch: 17/20...  Training Step: 7733...  Training loss: 1.0211...  0.1265 sec/batch
Epoch: 17/20...  Training Step: 7734...  Training loss: 1.0765...  0.1226 sec/batch
Epoch: 17/20...  Training Step: 7735...  Training loss: 0.9183...  0.1194 sec/batch
Epoch: 17/20...  Training Step: 7736...  Training loss: 0.8194...  0.1155 sec/batch
Epoch: 17/20...  Training Step: 7737...  Training loss: 0.9145...  0.1215 sec/batch
Epoch: 17/20...  Training Step: 7738...  Training loss: 0.9098...  0.1172 sec/batch
Epoch: 17/20...  Training Step: 7739...  Training loss: 0.8095...  0.1169 sec/batch
Epoch: 17/20...  Training Step: 7740...  Training loss: 0.8436...  0.1200 sec/batch
Epoch: 17/20...  Training Step: 7741...  Training loss: 0.8461...  0.1174 sec/batch
Epoch: 17/20...  Training Step: 7742...  Training loss: 0.8953...  0.1204 sec/batch
Epoch: 17/20...  Training Step: 7743...  Training loss: 0.9596...  0.1222 sec/batch
Epoch: 17/20...  Training Step: 7744...  Training loss: 0.8824...  0.1168 sec/batch
Epoch: 17/20...  Training Step: 7745...  Training loss: 0.8691...  0.1195 sec/batch
Epoch: 17/20...  Training Step: 7746...  Training loss: 1.0833...  0.1189 sec/batch
Epoch: 17/20...  Training Step: 7747...  Training loss: 0.8606...  0.1191 sec/batch
Epoch: 17/20...  Training Step: 7748...  Training loss: 0.9145...  0.1168 sec/batch
Epoch: 17/20...  Training Step: 7749...  Training loss: 0.9241...  0.1160 sec/batch
Epoch: 17/20...  Training Step: 7750...  Training loss: 0.8388...  0.1202 sec/batch
Epoch: 17/20...  Training Step: 7751...  Training loss: 0.9148...  0.1189 sec/batch
Epoch: 17/20...  Training Step: 7752...  Training loss: 0.9581...  0.1130 sec/batch
Epoch: 17/20...  Training Step: 7753...  Training loss: 1.0082...  0.1157 sec/batch
Epoch: 17/20...  Training Step: 7754...  Training loss: 0.8972...  0.1152 sec/batch
Epoch: 17/20...  Training Step: 7755...  Training loss: 0.9282...  0.1137 sec/batch
Epoch: 17/20...  Training Step: 7756...  Training loss: 1.0269...  0.1169 sec/batch
Epoch: 17/20...  Training Step: 7757...  Training loss: 0.8683...  0.1178 sec/batch
Epoch: 17/20...  Training Step: 7758...  Training loss: 0.9311...  0.1192 sec/batch
Epoch: 17/20...  Training Step: 7759...  Training loss: 0.9769...  0.1162 sec/batch
Epoch: 17/20...  Training Step: 7760...  Training loss: 0.9495...  0.1195 sec/batch
Epoch: 17/20...  Training Step: 7761...  Training loss: 0.7861...  0.1204 sec/batch
Epoch: 17/20...  Training Step: 7762...  Training loss: 0.8909...  0.1193 sec/batch
Epoch: 17/20...  Training Step: 7763...  Training loss: 0.9373...  0.1208 sec/batch
Epoch: 17/20...  Training Step: 7764...  Training loss: 1.0478...  0.1162 sec/batch
Epoch: 17/20...  Training Step: 7765...  Training loss: 0.9689...  0.1171 sec/batch
Epoch: 17/20...  Training Step: 7766...  Training loss: 1.0228...  0.1180 sec/batch
Epoch: 17/20...  Training Step: 7767...  Training loss: 0.7975...  0.1189 sec/batch
Epoch: 17/20...  Training Step: 7768...  Training loss: 0.9392...  0.1176 sec/batch
Epoch: 17/20...  Training Step: 7769...  Training loss: 0.9383...  0.1215 sec/batch
Epoch: 17/20...  Training Step: 7770...  Training loss: 0.9312...  0.1169 sec/batch
Epoch: 17/20...  Training Step: 7771...  Training loss: 0.9745...  0.1146 sec/batch
Epoch: 17/20...  Training Step: 7772...  Training loss: 1.0195...  0.1198 sec/batch
Epoch: 17/20...  Training Step: 7773...  Training loss: 0.9749...  0.1172 sec/batch
Epoch: 17/20...  Training Step: 7774...  Training loss: 0.9201...  0.1195 sec/batch
Epoch: 17/20...  Training Step: 7775...  Training loss: 1.0011...  0.1173 sec/batch
Epoch: 17/20...  Training Step: 7776...  Training loss: 0.9899...  0.1185 sec/batch
Epoch: 17/20...  Training Step: 7777...  Training loss: 0.9441...  0.1151 sec/batch
Epoch: 17/20...  Training Step: 7778...  Training loss: 0.6978...  0.1185 sec/batch
Epoch: 17/20...  Training Step: 7779...  Training loss: 1.1155...  0.1148 sec/batch
Epoch: 17/20...  Training Step: 7780...  Training loss: 0.9921...  0.1220 sec/batch
Epoch: 17/20...  Training Step: 7781...  Training loss: 0.8366...  0.1205 sec/batch
Epoch: 17/20...  Training Step: 7782...  Training loss: 0.9573...  0.1223 sec/batch
Epoch: 17/20...  Training Step: 7783...  Training loss: 1.0737...  0.1164 sec/batch
Epoch: 17/20...  Training Step: 7784...  Training loss: 0.7820...  0.1155 sec/batch
Epoch: 17/20...  Training Step: 7785...  Training loss: 1.0242...  0.1161 sec/batch
Epoch: 17/20...  Training Step: 7786...  Training loss: 0.9245...  0.1199 sec/batch
Epoch: 17/20...  Training Step: 7787...  Training loss: 0.7734...  0.1200 sec/batch
Epoch: 17/20...  Training Step: 7788...  Training loss: 0.9467...  0.1205 sec/batch
Epoch: 17/20...  Training Step: 7789...  Training loss: 0.9479...  0.1202 sec/batch
Epoch: 17/20...  Training Step: 7790...  Training loss: 0.9764...  0.1163 sec/batch
Epoch: 17/20...  Training Step: 7791...  Training loss: 1.0488...  0.1186 sec/batch
Epoch: 17/20...  Training Step: 7792...  Training loss: 1.0427...  0.1204 sec/batch
Epoch: 17/20...  Training Step: 7793...  Training loss: 1.0280...  0.1125 sec/batch
Epoch: 17/20...  Training Step: 7794...  Training loss: 0.9535...  0.1208 sec/batch
Epoch: 17/20...  Training Step: 7795...  Training loss: 0.7960...  0.1200 sec/batch
Epoch: 17/20...  Training Step: 7796...  Training loss: 1.1333...  0.1203 sec/batch
Epoch: 17/20...  Training Step: 7797...  Training loss: 0.8023...  0.1166 sec/batch
Epoch: 17/20...  Training Step: 7798...  Training loss: 0.9380...  0.1126 sec/batch
Epoch: 17/20...  Training Step: 7799...  Training loss: 0.9718...  0.1179 sec/batch
Epoch: 17/20...  Training Step: 7800...  Training loss: 1.1013...  0.1158 sec/batch
Epoch: 17/20...  Training Step: 7801...  Training loss: 1.0058...  0.1218 sec/batch
Epoch: 17/20...  Training Step: 7802...  Training loss: 0.9654...  0.1194 sec/batch
Epoch: 17/20...  Training Step: 7803...  Training loss: 1.0426...  0.1166 sec/batch
Epoch: 17/20...  Training Step: 7804...  Training loss: 1.0371...  0.1205 sec/batch
Epoch: 17/20...  Training Step: 7805...  Training loss: 0.8512...  0.1136 sec/batch
Epoch: 17/20...  Training Step: 7806...  Training loss: 0.9506...  0.1199 sec/batch
Epoch: 17/20...  Training Step: 7807...  Training loss: 0.8110...  0.1200 sec/batch
Epoch: 17/20...  Training Step: 7808...  Training loss: 1.0672...  0.1185 sec/batch
Epoch: 17/20...  Training Step: 7809...  Training loss: 0.8601...  0.1158 sec/batch
Epoch: 17/20...  Training Step: 7810...  Training loss: 0.9556...  0.1154 sec/batch
Epoch: 17/20...  Training Step: 7811...  Training loss: 0.9254...  0.1162 sec/batch
Epoch: 17/20...  Training Step: 7812...  Training loss: 0.9630...  0.1200 sec/batch
Epoch: 17/20...  Training Step: 7813...  Training loss: 0.8986...  0.1219 sec/batch
Epoch: 17/20...  Training Step: 7814...  Training loss: 0.8760...  0.1173 sec/batch
Epoch: 17/20...  Training Step: 7815...  Training loss: 0.9856...  0.1175 sec/batch
Epoch: 17/20...  Training Step: 7816...  Training loss: 0.9426...  0.1181 sec/batch
Epoch: 17/20...  Training Step: 7817...  Training loss: 1.0794...  0.1173 sec/batch
Epoch: 17/20...  Training Step: 7818...  Training loss: 0.9432...  0.1186 sec/batch
Epoch: 17/20...  Training Step: 7819...  Training loss: 0.8393...  0.1183 sec/batch
Epoch: 17/20...  Training Step: 7820...  Training loss: 1.0847...  0.1190 sec/batch
Epoch: 17/20...  Training Step: 7821...  Training loss: 0.9931...  0.1146 sec/batch
Epoch: 17/20...  Training Step: 7822...  Training loss: 1.0252...  0.1157 sec/batch
Epoch: 17/20...  Training Step: 7823...  Training loss: 1.0269...  0.1188 sec/batch
Epoch: 17/20...  Training Step: 7824...  Training loss: 1.0074...  0.1141 sec/batch
Epoch: 17/20...  Training Step: 7825...  Training loss: 0.9165...  0.1132 sec/batch
Epoch: 17/20...  Training Step: 7826...  Training loss: 0.9436...  0.1163 sec/batch
Epoch: 17/20...  Training Step: 7827...  Training loss: 0.8004...  0.1191 sec/batch
Epoch: 17/20...  Training Step: 7828...  Training loss: 1.0844...  0.1195 sec/batch
Epoch: 17/20...  Training Step: 7829...  Training loss: 1.0157...  0.1155 sec/batch
Epoch: 17/20...  Training Step: 7830...  Training loss: 1.0270...  0.1174 sec/batch
Epoch: 17/20...  Training Step: 7831...  Training loss: 0.8478...  0.1205 sec/batch
Epoch: 17/20...  Training Step: 7832...  Training loss: 1.1326...  0.1189 sec/batch
Epoch: 17/20...  Training Step: 7833...  Training loss: 0.9257...  0.1189 sec/batch
Epoch: 17/20...  Training Step: 7834...  Training loss: 0.9394...  0.1175 sec/batch
Epoch: 17/20...  Training Step: 7835...  Training loss: 0.8721...  0.1190 sec/batch
Epoch: 17/20...  Training Step: 7836...  Training loss: 0.9763...  0.1155 sec/batch
Epoch: 17/20...  Training Step: 7837...  Training loss: 0.8704...  0.1191 sec/batch
Epoch: 17/20...  Training Step: 7838...  Training loss: 0.8241...  0.1171 sec/batch
Epoch: 17/20...  Training Step: 7839...  Training loss: 1.0608...  0.1123 sec/batch
Epoch: 17/20...  Training Step: 7840...  Training loss: 1.0637...  0.1168 sec/batch
Epoch: 17/20...  Training Step: 7841...  Training loss: 0.9631...  0.1159 sec/batch
Epoch: 17/20...  Training Step: 7842...  Training loss: 0.7367...  0.1229 sec/batch
Epoch: 17/20...  Training Step: 7843...  Training loss: 0.7610...  0.1188 sec/batch
Epoch: 17/20...  Training Step: 7844...  Training loss: 0.7639...  0.1208 sec/batch
Epoch: 17/20...  Training Step: 7845...  Training loss: 0.9957...  0.1195 sec/batch
Epoch: 17/20...  Training Step: 7846...  Training loss: 0.9063...  0.1193 sec/batch
Epoch: 17/20...  Training Step: 7847...  Training loss: 0.8249...  0.1193 sec/batch
Epoch: 17/20...  Training Step: 7848...  Training loss: 0.9927...  0.1167 sec/batch
Epoch: 17/20...  Training Step: 7849...  Training loss: 0.9300...  0.1185 sec/batch
Epoch: 17/20...  Training Step: 7850...  Training loss: 0.9739...  0.1159 sec/batch
Epoch: 17/20...  Training Step: 7851...  Training loss: 0.9649...  0.1143 sec/batch
Epoch: 17/20...  Training Step: 7852...  Training loss: 1.0460...  0.1169 sec/batch
Epoch: 17/20...  Training Step: 7853...  Training loss: 0.7819...  0.1179 sec/batch
Epoch: 17/20...  Training Step: 7854...  Training loss: 0.8943...  0.1203 sec/batch
Epoch: 17/20...  Training Step: 7855...  Training loss: 1.1191...  0.1172 sec/batch
Epoch: 17/20...  Training Step: 7856...  Training loss: 0.8638...  0.1167 sec/batch
Epoch: 17/20...  Training Step: 7857...  Training loss: 1.0553...  0.1152 sec/batch
Epoch: 17/20...  Training Step: 7858...  Training loss: 1.1494...  0.1158 sec/batch
Epoch: 17/20...  Training Step: 7859...  Training loss: 0.8994...  0.1226 sec/batch
Epoch: 17/20...  Training Step: 7860...  Training loss: 0.8861...  0.1164 sec/batch
Epoch: 17/20...  Training Step: 7861...  Training loss: 0.8727...  0.1160 sec/batch
Epoch: 17/20...  Training Step: 7862...  Training loss: 0.9328...  0.1189 sec/batch
Epoch: 17/20...  Training Step: 7863...  Training loss: 1.2164...  0.1236 sec/batch
Epoch: 17/20...  Training Step: 7864...  Training loss: 0.9871...  0.1171 sec/batch
Epoch: 17/20...  Training Step: 7865...  Training loss: 0.8112...  0.1173 sec/batch
Epoch: 17/20...  Training Step: 7866...  Training loss: 0.9822...  0.1107 sec/batch
Epoch: 17/20...  Training Step: 7867...  Training loss: 0.9252...  0.1127 sec/batch
Epoch: 17/20...  Training Step: 7868...  Training loss: 0.9748...  0.1201 sec/batch
Epoch: 17/20...  Training Step: 7869...  Training loss: 0.9338...  0.1140 sec/batch
Epoch: 17/20...  Training Step: 7870...  Training loss: 0.8434...  0.1197 sec/batch
Epoch: 17/20...  Training Step: 7871...  Training loss: 0.9744...  0.1204 sec/batch
Epoch: 17/20...  Training Step: 7872...  Training loss: 0.8666...  0.1198 sec/batch
Epoch: 17/20...  Training Step: 7873...  Training loss: 0.9455...  0.1120 sec/batch
Epoch: 17/20...  Training Step: 7874...  Training loss: 0.9204...  0.1191 sec/batch
Epoch: 17/20...  Training Step: 7875...  Training loss: 0.8797...  0.1174 sec/batch
Epoch: 17/20...  Training Step: 7876...  Training loss: 0.9268...  0.1192 sec/batch
Epoch: 17/20...  Training Step: 7877...  Training loss: 0.8885...  0.1204 sec/batch
Epoch: 17/20...  Training Step: 7878...  Training loss: 1.0372...  0.1215 sec/batch
Epoch: 17/20...  Training Step: 7879...  Training loss: 1.0256...  0.1163 sec/batch
Epoch: 17/20...  Training Step: 7880...  Training loss: 0.8117...  0.1168 sec/batch
Epoch: 17/20...  Training Step: 7881...  Training loss: 0.9232...  0.1167 sec/batch
Epoch: 17/20...  Training Step: 7882...  Training loss: 0.7020...  0.1194 sec/batch
Epoch: 17/20...  Training Step: 7883...  Training loss: 0.7614...  0.1193 sec/batch
Epoch: 17/20...  Training Step: 7884...  Training loss: 0.8893...  0.1198 sec/batch
Epoch: 17/20...  Training Step: 7885...  Training loss: 0.9904...  0.1186 sec/batch
Epoch: 17/20...  Training Step: 7886...  Training loss: 0.9654...  0.1153 sec/batch
Epoch: 17/20...  Training Step: 7887...  Training loss: 0.9167...  0.1171 sec/batch
Epoch: 17/20...  Training Step: 7888...  Training loss: 0.8447...  0.1171 sec/batch
Epoch: 18/20...  Training Step: 7889...  Training loss: 1.1995...  0.1177 sec/batch
Epoch: 18/20...  Training Step: 7890...  Training loss: 1.0684...  0.1203 sec/batch
Epoch: 18/20...  Training Step: 7891...  Training loss: 0.8733...  0.1199 sec/batch
Epoch: 18/20...  Training Step: 7892...  Training loss: 0.9680...  0.1172 sec/batch
Epoch: 18/20...  Training Step: 7893...  Training loss: 1.0246...  0.1195 sec/batch
Epoch: 18/20...  Training Step: 7894...  Training loss: 0.9127...  0.1169 sec/batch
Epoch: 18/20...  Training Step: 7895...  Training loss: 1.0226...  0.1153 sec/batch
Epoch: 18/20...  Training Step: 7896...  Training loss: 0.9170...  0.1174 sec/batch
Epoch: 18/20...  Training Step: 7897...  Training loss: 0.8426...  0.1205 sec/batch
Epoch: 18/20...  Training Step: 7898...  Training loss: 0.9265...  0.1180 sec/batch
Epoch: 18/20...  Training Step: 7899...  Training loss: 0.8791...  0.1188 sec/batch
Epoch: 18/20...  Training Step: 7900...  Training loss: 0.7734...  0.1218 sec/batch
Epoch: 18/20...  Training Step: 7901...  Training loss: 1.2357...  0.1194 sec/batch
Epoch: 18/20...  Training Step: 7902...  Training loss: 0.8615...  0.1157 sec/batch
Epoch: 18/20...  Training Step: 7903...  Training loss: 1.0372...  0.1182 sec/batch
Epoch: 18/20...  Training Step: 7904...  Training loss: 1.0844...  0.1130 sec/batch
Epoch: 18/20...  Training Step: 7905...  Training loss: 0.8359...  0.1209 sec/batch
Epoch: 18/20...  Training Step: 7906...  Training loss: 0.9369...  0.1176 sec/batch
Epoch: 18/20...  Training Step: 7907...  Training loss: 0.9592...  0.1222 sec/batch
Epoch: 18/20...  Training Step: 7908...  Training loss: 0.8428...  0.1267 sec/batch
Epoch: 18/20...  Training Step: 7909...  Training loss: 0.9839...  0.1339 sec/batch
Epoch: 18/20...  Training Step: 7910...  Training loss: 0.9263...  0.1176 sec/batch
Epoch: 18/20...  Training Step: 7911...  Training loss: 0.9808...  0.1168 sec/batch
Epoch: 18/20...  Training Step: 7912...  Training loss: 0.8629...  0.1163 sec/batch
Epoch: 18/20...  Training Step: 7913...  Training loss: 0.8626...  0.1167 sec/batch
Epoch: 18/20...  Training Step: 7914...  Training loss: 0.9200...  0.1183 sec/batch
Epoch: 18/20...  Training Step: 7915...  Training loss: 0.9436...  0.1177 sec/batch
Epoch: 18/20...  Training Step: 7916...  Training loss: 0.8317...  0.1177 sec/batch
Epoch: 18/20...  Training Step: 7917...  Training loss: 0.9107...  0.1169 sec/batch
Epoch: 18/20...  Training Step: 7918...  Training loss: 0.9756...  0.1193 sec/batch
Epoch: 18/20...  Training Step: 7919...  Training loss: 0.8670...  0.1182 sec/batch
Epoch: 18/20...  Training Step: 7920...  Training loss: 0.8497...  0.1181 sec/batch
Epoch: 18/20...  Training Step: 7921...  Training loss: 0.8997...  0.1189 sec/batch
Epoch: 18/20...  Training Step: 7922...  Training loss: 0.8192...  0.1189 sec/batch
Epoch: 18/20...  Training Step: 7923...  Training loss: 0.6792...  0.1218 sec/batch
Epoch: 18/20...  Training Step: 7924...  Training loss: 0.8264...  0.1214 sec/batch
Epoch: 18/20...  Training Step: 7925...  Training loss: 0.8558...  0.1217 sec/batch
Epoch: 18/20...  Training Step: 7926...  Training loss: 0.9954...  0.1183 sec/batch
Epoch: 18/20...  Training Step: 7927...  Training loss: 0.8058...  0.1194 sec/batch
Epoch: 18/20...  Training Step: 7928...  Training loss: 1.1310...  0.1159 sec/batch
Epoch: 18/20...  Training Step: 7929...  Training loss: 0.8716...  0.1202 sec/batch
Epoch: 18/20...  Training Step: 7930...  Training loss: 0.7727...  0.1172 sec/batch
Epoch: 18/20...  Training Step: 7931...  Training loss: 1.0995...  0.1175 sec/batch
Epoch: 18/20...  Training Step: 7932...  Training loss: 0.8041...  0.1169 sec/batch
Epoch: 18/20...  Training Step: 7933...  Training loss: 0.9710...  0.1166 sec/batch
Epoch: 18/20...  Training Step: 7934...  Training loss: 0.9638...  0.1168 sec/batch
Epoch: 18/20...  Training Step: 7935...  Training loss: 1.0008...  0.1169 sec/batch
Epoch: 18/20...  Training Step: 7936...  Training loss: 1.0090...  0.1210 sec/batch
Epoch: 18/20...  Training Step: 7937...  Training loss: 0.8667...  0.1159 sec/batch
Epoch: 18/20...  Training Step: 7938...  Training loss: 0.8430...  0.1201 sec/batch
Epoch: 18/20...  Training Step: 7939...  Training loss: 0.8686...  0.1175 sec/batch
Epoch: 18/20...  Training Step: 7940...  Training loss: 0.9892...  0.1193 sec/batch
Epoch: 18/20...  Training Step: 7941...  Training loss: 1.0496...  0.1212 sec/batch
Epoch: 18/20...  Training Step: 7942...  Training loss: 0.8455...  0.1210 sec/batch
Epoch: 18/20...  Training Step: 7943...  Training loss: 0.9363...  0.1207 sec/batch
Epoch: 18/20...  Training Step: 7944...  Training loss: 0.9014...  0.1214 sec/batch
Epoch: 18/20...  Training Step: 7945...  Training loss: 0.9003...  0.1175 sec/batch
Epoch: 18/20...  Training Step: 7946...  Training loss: 0.9270...  0.1160 sec/batch
Epoch: 18/20...  Training Step: 7947...  Training loss: 0.7539...  0.1199 sec/batch
Epoch: 18/20...  Training Step: 7948...  Training loss: 0.8953...  0.1186 sec/batch
Epoch: 18/20...  Training Step: 7949...  Training loss: 0.8480...  0.1136 sec/batch
Epoch: 18/20...  Training Step: 7950...  Training loss: 1.0207...  0.1183 sec/batch
Epoch: 18/20...  Training Step: 7951...  Training loss: 0.7833...  0.1202 sec/batch
Epoch: 18/20...  Training Step: 7952...  Training loss: 0.9327...  0.1179 sec/batch
Epoch: 18/20...  Training Step: 7953...  Training loss: 0.8019...  0.1255 sec/batch
Epoch: 18/20...  Training Step: 7954...  Training loss: 0.9437...  0.1179 sec/batch
Epoch: 18/20...  Training Step: 7955...  Training loss: 0.9194...  0.1215 sec/batch
Epoch: 18/20...  Training Step: 7956...  Training loss: 0.9374...  0.1151 sec/batch
Epoch: 18/20...  Training Step: 7957...  Training loss: 0.9134...  0.1169 sec/batch
Epoch: 18/20...  Training Step: 7958...  Training loss: 0.9952...  0.1154 sec/batch
Epoch: 18/20...  Training Step: 7959...  Training loss: 1.0159...  0.1186 sec/batch
Epoch: 18/20...  Training Step: 7960...  Training loss: 0.7809...  0.1135 sec/batch
Epoch: 18/20...  Training Step: 7961...  Training loss: 0.8349...  0.1204 sec/batch
Epoch: 18/20...  Training Step: 7962...  Training loss: 0.7444...  0.1170 sec/batch
Epoch: 18/20...  Training Step: 7963...  Training loss: 0.9804...  0.1161 sec/batch
Epoch: 18/20...  Training Step: 7964...  Training loss: 0.8149...  0.1181 sec/batch
Epoch: 18/20...  Training Step: 7965...  Training loss: 0.7889...  0.1210 sec/batch
Epoch: 18/20...  Training Step: 7966...  Training loss: 0.8883...  0.1211 sec/batch
Epoch: 18/20...  Training Step: 7967...  Training loss: 0.8855...  0.1188 sec/batch
Epoch: 18/20...  Training Step: 7968...  Training loss: 0.9566...  0.1240 sec/batch
Epoch: 18/20...  Training Step: 7969...  Training loss: 1.1487...  0.1272 sec/batch
Epoch: 18/20...  Training Step: 7970...  Training loss: 0.8824...  0.1241 sec/batch
Epoch: 18/20...  Training Step: 7971...  Training loss: 0.8013...  0.1163 sec/batch
Epoch: 18/20...  Training Step: 7972...  Training loss: 0.9481...  0.1264 sec/batch
Epoch: 18/20...  Training Step: 7973...  Training loss: 0.9345...  0.1178 sec/batch
Epoch: 18/20...  Training Step: 7974...  Training loss: 1.0905...  0.1176 sec/batch
Epoch: 18/20...  Training Step: 7975...  Training loss: 0.8692...  0.1110 sec/batch
Epoch: 18/20...  Training Step: 7976...  Training loss: 1.1108...  0.1171 sec/batch
Epoch: 18/20...  Training Step: 7977...  Training loss: 1.0913...  0.1193 sec/batch
Epoch: 18/20...  Training Step: 7978...  Training loss: 0.8404...  0.1153 sec/batch
Epoch: 18/20...  Training Step: 7979...  Training loss: 1.0650...  0.1161 sec/batch
Epoch: 18/20...  Training Step: 7980...  Training loss: 1.1844...  0.1158 sec/batch
Epoch: 18/20...  Training Step: 7981...  Training loss: 0.8515...  0.1180 sec/batch
Epoch: 18/20...  Training Step: 7982...  Training loss: 1.1220...  0.1168 sec/batch
Epoch: 18/20...  Training Step: 7983...  Training loss: 0.9597...  0.1211 sec/batch
Epoch: 18/20...  Training Step: 7984...  Training loss: 0.9243...  0.1179 sec/batch
Epoch: 18/20...  Training Step: 7985...  Training loss: 1.1161...  0.1161 sec/batch
Epoch: 18/20...  Training Step: 7986...  Training loss: 1.1634...  0.1167 sec/batch
Epoch: 18/20...  Training Step: 7987...  Training loss: 0.9546...  0.1217 sec/batch
Epoch: 18/20...  Training Step: 7988...  Training loss: 0.9782...  0.1207 sec/batch
Epoch: 18/20...  Training Step: 7989...  Training loss: 1.0073...  0.1175 sec/batch
Epoch: 18/20...  Training Step: 7990...  Training loss: 1.0732...  0.1195 sec/batch
Epoch: 18/20...  Training Step: 7991...  Training loss: 1.0252...  0.1204 sec/batch
Epoch: 18/20...  Training Step: 7992...  Training loss: 0.9697...  0.1182 sec/batch
Epoch: 18/20...  Training Step: 7993...  Training loss: 1.1179...  0.1156 sec/batch
Epoch: 18/20...  Training Step: 7994...  Training loss: 1.0784...  0.1167 sec/batch
Epoch: 18/20...  Training Step: 7995...  Training loss: 0.8086...  0.1202 sec/batch
Epoch: 18/20...  Training Step: 7996...  Training loss: 1.0706...  0.1162 sec/batch
Epoch: 18/20...  Training Step: 7997...  Training loss: 0.8760...  0.1195 sec/batch
Epoch: 18/20...  Training Step: 7998...  Training loss: 0.9013...  0.1172 sec/batch
Epoch: 18/20...  Training Step: 7999...  Training loss: 0.9819...  0.1173 sec/batch
Epoch: 18/20...  Training Step: 8000...  Training loss: 0.8261...  0.1156 sec/batch
Epoch: 18/20...  Training Step: 8001...  Training loss: 1.0421...  0.1286 sec/batch
Epoch: 18/20...  Training Step: 8002...  Training loss: 0.9984...  0.1220 sec/batch
Epoch: 18/20...  Training Step: 8003...  Training loss: 0.9274...  0.1222 sec/batch
Epoch: 18/20...  Training Step: 8004...  Training loss: 0.7786...  0.1184 sec/batch
Epoch: 18/20...  Training Step: 8005...  Training loss: 0.9699...  0.1242 sec/batch
Epoch: 18/20...  Training Step: 8006...  Training loss: 0.8397...  0.1165 sec/batch
Epoch: 18/20...  Training Step: 8007...  Training loss: 0.9524...  0.1263 sec/batch
Epoch: 18/20...  Training Step: 8008...  Training loss: 0.8017...  0.1329 sec/batch
Epoch: 18/20...  Training Step: 8009...  Training loss: 0.9855...  0.1382 sec/batch
Epoch: 18/20...  Training Step: 8010...  Training loss: 0.9705...  0.1201 sec/batch
Epoch: 18/20...  Training Step: 8011...  Training loss: 0.9152...  0.1277 sec/batch
Epoch: 18/20...  Training Step: 8012...  Training loss: 0.9492...  0.1275 sec/batch
Epoch: 18/20...  Training Step: 8013...  Training loss: 1.0271...  0.1276 sec/batch
Epoch: 18/20...  Training Step: 8014...  Training loss: 0.9085...  0.1257 sec/batch
Epoch: 18/20...  Training Step: 8015...  Training loss: 0.9085...  0.1275 sec/batch
Epoch: 18/20...  Training Step: 8016...  Training loss: 1.0138...  0.1252 sec/batch
Epoch: 18/20...  Training Step: 8017...  Training loss: 0.8808...  0.1231 sec/batch
Epoch: 18/20...  Training Step: 8018...  Training loss: 0.9200...  0.1210 sec/batch
Epoch: 18/20...  Training Step: 8019...  Training loss: 0.9594...  0.1178 sec/batch
Epoch: 18/20...  Training Step: 8020...  Training loss: 0.9125...  0.1142 sec/batch
Epoch: 18/20...  Training Step: 8021...  Training loss: 0.8698...  0.1255 sec/batch
Epoch: 18/20...  Training Step: 8022...  Training loss: 1.1484...  0.1260 sec/batch
Epoch: 18/20...  Training Step: 8023...  Training loss: 0.9393...  0.1245 sec/batch
Epoch: 18/20...  Training Step: 8024...  Training loss: 0.8751...  0.1236 sec/batch
Epoch: 18/20...  Training Step: 8025...  Training loss: 0.8713...  0.1197 sec/batch
Epoch: 18/20...  Training Step: 8026...  Training loss: 1.1181...  0.1244 sec/batch
Epoch: 18/20...  Training Step: 8027...  Training loss: 0.9432...  0.1203 sec/batch
Epoch: 18/20...  Training Step: 8028...  Training loss: 0.8842...  0.1161 sec/batch
Epoch: 18/20...  Training Step: 8029...  Training loss: 0.8274...  0.1273 sec/batch
Epoch: 18/20...  Training Step: 8030...  Training loss: 0.8489...  0.1278 sec/batch
Epoch: 18/20...  Training Step: 8031...  Training loss: 0.8770...  0.1328 sec/batch
Epoch: 18/20...  Training Step: 8032...  Training loss: 1.0657...  0.1236 sec/batch
Epoch: 18/20...  Training Step: 8033...  Training loss: 0.8777...  0.1286 sec/batch
Epoch: 18/20...  Training Step: 8034...  Training loss: 0.9082...  0.1343 sec/batch
Epoch: 18/20...  Training Step: 8035...  Training loss: 0.8404...  0.1273 sec/batch
Epoch: 18/20...  Training Step: 8036...  Training loss: 0.8007...  0.1344 sec/batch
Epoch: 18/20...  Training Step: 8037...  Training loss: 0.7997...  0.1369 sec/batch
Epoch: 18/20...  Training Step: 8038...  Training loss: 0.9636...  0.1354 sec/batch
Epoch: 18/20...  Training Step: 8039...  Training loss: 0.8807...  0.1296 sec/batch
Epoch: 18/20...  Training Step: 8040...  Training loss: 0.9865...  0.1283 sec/batch
Epoch: 18/20...  Training Step: 8041...  Training loss: 1.1032...  0.1255 sec/batch
Epoch: 18/20...  Training Step: 8042...  Training loss: 1.0164...  0.1228 sec/batch
Epoch: 18/20...  Training Step: 8043...  Training loss: 0.8208...  0.1244 sec/batch
Epoch: 18/20...  Training Step: 8044...  Training loss: 0.8725...  0.1214 sec/batch
Epoch: 18/20...  Training Step: 8045...  Training loss: 0.9739...  0.1248 sec/batch
Epoch: 18/20...  Training Step: 8046...  Training loss: 0.8841...  0.1254 sec/batch
Epoch: 18/20...  Training Step: 8047...  Training loss: 0.7506...  0.1231 sec/batch
Epoch: 18/20...  Training Step: 8048...  Training loss: 0.9009...  0.1174 sec/batch
Epoch: 18/20...  Training Step: 8049...  Training loss: 1.0109...  0.1143 sec/batch
Epoch: 18/20...  Training Step: 8050...  Training loss: 0.8988...  0.1162 sec/batch
Epoch: 18/20...  Training Step: 8051...  Training loss: 1.0202...  0.1162 sec/batch
Epoch: 18/20...  Training Step: 8052...  Training loss: 0.8769...  0.1193 sec/batch
Epoch: 18/20...  Training Step: 8053...  Training loss: 0.9770...  0.1174 sec/batch
Epoch: 18/20...  Training Step: 8054...  Training loss: 0.9409...  0.1143 sec/batch
Epoch: 18/20...  Training Step: 8055...  Training loss: 0.7155...  0.1210 sec/batch
Epoch: 18/20...  Training Step: 8056...  Training loss: 1.0102...  0.1190 sec/batch
Epoch: 18/20...  Training Step: 8057...  Training loss: 0.7779...  0.1177 sec/batch
Epoch: 18/20...  Training Step: 8058...  Training loss: 0.8849...  0.1157 sec/batch
Epoch: 18/20...  Training Step: 8059...  Training loss: 1.0222...  0.1177 sec/batch
Epoch: 18/20...  Training Step: 8060...  Training loss: 0.9792...  0.1152 sec/batch
Epoch: 18/20...  Training Step: 8061...  Training loss: 1.0303...  0.1288 sec/batch
Epoch: 18/20...  Training Step: 8062...  Training loss: 0.9550...  0.1234 sec/batch
Epoch: 18/20...  Training Step: 8063...  Training loss: 1.1485...  0.1168 sec/batch
Epoch: 18/20...  Training Step: 8064...  Training loss: 0.8554...  0.1162 sec/batch
Epoch: 18/20...  Training Step: 8065...  Training loss: 0.8532...  0.1183 sec/batch
Epoch: 18/20...  Training Step: 8066...  Training loss: 1.0216...  0.1193 sec/batch
Epoch: 18/20...  Training Step: 8067...  Training loss: 0.8422...  0.1214 sec/batch
Epoch: 18/20...  Training Step: 8068...  Training loss: 0.9236...  0.1156 sec/batch
Epoch: 18/20...  Training Step: 8069...  Training loss: 0.7626...  0.1207 sec/batch
Epoch: 18/20...  Training Step: 8070...  Training loss: 1.0368...  0.1165 sec/batch
Epoch: 18/20...  Training Step: 8071...  Training loss: 1.0480...  0.1140 sec/batch
Epoch: 18/20...  Training Step: 8072...  Training loss: 0.9790...  0.1197 sec/batch
Epoch: 18/20...  Training Step: 8073...  Training loss: 1.1193...  0.1159 sec/batch
Epoch: 18/20...  Training Step: 8074...  Training loss: 1.0596...  0.1234 sec/batch
Epoch: 18/20...  Training Step: 8075...  Training loss: 1.0123...  0.1168 sec/batch
Epoch: 18/20...  Training Step: 8076...  Training loss: 0.8581...  0.1193 sec/batch
Epoch: 18/20...  Training Step: 8077...  Training loss: 1.0114...  0.1220 sec/batch
Epoch: 18/20...  Training Step: 8078...  Training loss: 0.8254...  0.1176 sec/batch
Epoch: 18/20...  Training Step: 8079...  Training loss: 0.8717...  0.1170 sec/batch
Epoch: 18/20...  Training Step: 8080...  Training loss: 0.8136...  0.1144 sec/batch
Epoch: 18/20...  Training Step: 8081...  Training loss: 0.8587...  0.1256 sec/batch
Epoch: 18/20...  Training Step: 8082...  Training loss: 0.9035...  0.1208 sec/batch
Epoch: 18/20...  Training Step: 8083...  Training loss: 1.0676...  0.1183 sec/batch
Epoch: 18/20...  Training Step: 8084...  Training loss: 1.0160...  0.1162 sec/batch
Epoch: 18/20...  Training Step: 8085...  Training loss: 0.8906...  0.1157 sec/batch
Epoch: 18/20...  Training Step: 8086...  Training loss: 0.9471...  0.1129 sec/batch
Epoch: 18/20...  Training Step: 8087...  Training loss: 0.8009...  0.1164 sec/batch
Epoch: 18/20...  Training Step: 8088...  Training loss: 0.9353...  0.1190 sec/batch
Epoch: 18/20...  Training Step: 8089...  Training loss: 0.9493...  0.1217 sec/batch
Epoch: 18/20...  Training Step: 8090...  Training loss: 1.1290...  0.1135 sec/batch
Epoch: 18/20...  Training Step: 8091...  Training loss: 0.8424...  0.1187 sec/batch
Epoch: 18/20...  Training Step: 8092...  Training loss: 1.1078...  0.1188 sec/batch
Epoch: 18/20...  Training Step: 8093...  Training loss: 0.9812...  0.1204 sec/batch
Epoch: 18/20...  Training Step: 8094...  Training loss: 1.0555...  0.1181 sec/batch
Epoch: 18/20...  Training Step: 8095...  Training loss: 0.8589...  0.1164 sec/batch
Epoch: 18/20...  Training Step: 8096...  Training loss: 0.9411...  0.1181 sec/batch
Epoch: 18/20...  Training Step: 8097...  Training loss: 0.9387...  0.1205 sec/batch
Epoch: 18/20...  Training Step: 8098...  Training loss: 0.7482...  0.1185 sec/batch
Epoch: 18/20...  Training Step: 8099...  Training loss: 0.9148...  0.1177 sec/batch
Epoch: 18/20...  Training Step: 8100...  Training loss: 1.0871...  0.1220 sec/batch
Epoch: 18/20...  Training Step: 8101...  Training loss: 0.9539...  0.1185 sec/batch
Epoch: 18/20...  Training Step: 8102...  Training loss: 0.8992...  0.1165 sec/batch
Epoch: 18/20...  Training Step: 8103...  Training loss: 1.0452...  0.1169 sec/batch
Epoch: 18/20...  Training Step: 8104...  Training loss: 0.8627...  0.1185 sec/batch
Epoch: 18/20...  Training Step: 8105...  Training loss: 0.9289...  0.1166 sec/batch
Epoch: 18/20...  Training Step: 8106...  Training loss: 1.0311...  0.1199 sec/batch
Epoch: 18/20...  Training Step: 8107...  Training loss: 1.0387...  0.1166 sec/batch
Epoch: 18/20...  Training Step: 8108...  Training loss: 0.8090...  0.1214 sec/batch
Epoch: 18/20...  Training Step: 8109...  Training loss: 0.9531...  0.1181 sec/batch
Epoch: 18/20...  Training Step: 8110...  Training loss: 1.2152...  0.1172 sec/batch
Epoch: 18/20...  Training Step: 8111...  Training loss: 1.0350...  0.1184 sec/batch
Epoch: 18/20...  Training Step: 8112...  Training loss: 1.1994...  0.1176 sec/batch
Epoch: 18/20...  Training Step: 8113...  Training loss: 0.9079...  0.1210 sec/batch
Epoch: 18/20...  Training Step: 8114...  Training loss: 1.1227...  0.1218 sec/batch
Epoch: 18/20...  Training Step: 8115...  Training loss: 1.0919...  0.1216 sec/batch
Epoch: 18/20...  Training Step: 8116...  Training loss: 0.9687...  0.1152 sec/batch
Epoch: 18/20...  Training Step: 8117...  Training loss: 1.0246...  0.1171 sec/batch
Epoch: 18/20...  Training Step: 8118...  Training loss: 0.8806...  0.1178 sec/batch
Epoch: 18/20...  Training Step: 8119...  Training loss: 0.9861...  0.1166 sec/batch
Epoch: 18/20...  Training Step: 8120...  Training loss: 1.0741...  0.1104 sec/batch
Epoch: 18/20...  Training Step: 8121...  Training loss: 1.1239...  0.1171 sec/batch
Epoch: 18/20...  Training Step: 8122...  Training loss: 0.9833...  0.1200 sec/batch
Epoch: 18/20...  Training Step: 8123...  Training loss: 1.1755...  0.1201 sec/batch
Epoch: 18/20...  Training Step: 8124...  Training loss: 1.0677...  0.1159 sec/batch
Epoch: 18/20...  Training Step: 8125...  Training loss: 0.9615...  0.1155 sec/batch
Epoch: 18/20...  Training Step: 8126...  Training loss: 0.9244...  0.1134 sec/batch
Epoch: 18/20...  Training Step: 8127...  Training loss: 0.9304...  0.1137 sec/batch
Epoch: 18/20...  Training Step: 8128...  Training loss: 1.0288...  0.1139 sec/batch
Epoch: 18/20...  Training Step: 8129...  Training loss: 0.8746...  0.1203 sec/batch
Epoch: 18/20...  Training Step: 8130...  Training loss: 0.8947...  0.1183 sec/batch
Epoch: 18/20...  Training Step: 8131...  Training loss: 1.1132...  0.1164 sec/batch
Epoch: 18/20...  Training Step: 8132...  Training loss: 0.8238...  0.1209 sec/batch
Epoch: 18/20...  Training Step: 8133...  Training loss: 0.9429...  0.1167 sec/batch
Epoch: 18/20...  Training Step: 8134...  Training loss: 0.8887...  0.1181 sec/batch
Epoch: 18/20...  Training Step: 8135...  Training loss: 0.8454...  0.1147 sec/batch
Epoch: 18/20...  Training Step: 8136...  Training loss: 1.0062...  0.1193 sec/batch
Epoch: 18/20...  Training Step: 8137...  Training loss: 0.8936...  0.1175 sec/batch
Epoch: 18/20...  Training Step: 8138...  Training loss: 0.8471...  0.1188 sec/batch
Epoch: 18/20...  Training Step: 8139...  Training loss: 0.8942...  0.1201 sec/batch
Epoch: 18/20...  Training Step: 8140...  Training loss: 1.0764...  0.1175 sec/batch
Epoch: 18/20...  Training Step: 8141...  Training loss: 0.9471...  0.1139 sec/batch
Epoch: 18/20...  Training Step: 8142...  Training loss: 0.8576...  0.1212 sec/batch
Epoch: 18/20...  Training Step: 8143...  Training loss: 0.8660...  0.1156 sec/batch
Epoch: 18/20...  Training Step: 8144...  Training loss: 0.9709...  0.1229 sec/batch
Epoch: 18/20...  Training Step: 8145...  Training loss: 1.1029...  0.1199 sec/batch
Epoch: 18/20...  Training Step: 8146...  Training loss: 0.9685...  0.1188 sec/batch
Epoch: 18/20...  Training Step: 8147...  Training loss: 0.7179...  0.1179 sec/batch
Epoch: 18/20...  Training Step: 8148...  Training loss: 0.8535...  0.1224 sec/batch
Epoch: 18/20...  Training Step: 8149...  Training loss: 0.9524...  0.1176 sec/batch
Epoch: 18/20...  Training Step: 8150...  Training loss: 1.0426...  0.1134 sec/batch
Epoch: 18/20...  Training Step: 8151...  Training loss: 0.9237...  0.1166 sec/batch
Epoch: 18/20...  Training Step: 8152...  Training loss: 1.0667...  0.1184 sec/batch
Epoch: 18/20...  Training Step: 8153...  Training loss: 1.0212...  0.1316 sec/batch
Epoch: 18/20...  Training Step: 8154...  Training loss: 1.0083...  0.1202 sec/batch
Epoch: 18/20...  Training Step: 8155...  Training loss: 1.0856...  0.1129 sec/batch
Epoch: 18/20...  Training Step: 8156...  Training loss: 1.1464...  0.1154 sec/batch
Epoch: 18/20...  Training Step: 8157...  Training loss: 0.9247...  0.1188 sec/batch
Epoch: 18/20...  Training Step: 8158...  Training loss: 0.9509...  0.1174 sec/batch
Epoch: 18/20...  Training Step: 8159...  Training loss: 0.9575...  0.1175 sec/batch
Epoch: 18/20...  Training Step: 8160...  Training loss: 1.0754...  0.1189 sec/batch
Epoch: 18/20...  Training Step: 8161...  Training loss: 1.1622...  0.1166 sec/batch
Epoch: 18/20...  Training Step: 8162...  Training loss: 1.0672...  0.1236 sec/batch
Epoch: 18/20...  Training Step: 8163...  Training loss: 0.9871...  0.1174 sec/batch
Epoch: 18/20...  Training Step: 8164...  Training loss: 0.8056...  0.1185 sec/batch
Epoch: 18/20...  Training Step: 8165...  Training loss: 0.8612...  0.1228 sec/batch
Epoch: 18/20...  Training Step: 8166...  Training loss: 1.1043...  0.1214 sec/batch
Epoch: 18/20...  Training Step: 8167...  Training loss: 1.0337...  0.1284 sec/batch
Epoch: 18/20...  Training Step: 8168...  Training loss: 1.0249...  0.1330 sec/batch
Epoch: 18/20...  Training Step: 8169...  Training loss: 0.9251...  0.1286 sec/batch
Epoch: 18/20...  Training Step: 8170...  Training loss: 0.9611...  0.1203 sec/batch
Epoch: 18/20...  Training Step: 8171...  Training loss: 1.0260...  0.1179 sec/batch
Epoch: 18/20...  Training Step: 8172...  Training loss: 0.9884...  0.1249 sec/batch
Epoch: 18/20...  Training Step: 8173...  Training loss: 0.9052...  0.1246 sec/batch
Epoch: 18/20...  Training Step: 8174...  Training loss: 0.9370...  0.1199 sec/batch
Epoch: 18/20...  Training Step: 8175...  Training loss: 1.0528...  0.1198 sec/batch
Epoch: 18/20...  Training Step: 8176...  Training loss: 0.9738...  0.1207 sec/batch
Epoch: 18/20...  Training Step: 8177...  Training loss: 0.9602...  0.1179 sec/batch
Epoch: 18/20...  Training Step: 8178...  Training loss: 0.9295...  0.1170 sec/batch
Epoch: 18/20...  Training Step: 8179...  Training loss: 0.9293...  0.1179 sec/batch
Epoch: 18/20...  Training Step: 8180...  Training loss: 0.8706...  0.1196 sec/batch
Epoch: 18/20...  Training Step: 8181...  Training loss: 0.8723...  0.1175 sec/batch
Epoch: 18/20...  Training Step: 8182...  Training loss: 1.1081...  0.1460 sec/batch
Epoch: 18/20...  Training Step: 8183...  Training loss: 0.9854...  0.1202 sec/batch
Epoch: 18/20...  Training Step: 8184...  Training loss: 1.1135...  0.1171 sec/batch
Epoch: 18/20...  Training Step: 8185...  Training loss: 0.8220...  0.1162 sec/batch
Epoch: 18/20...  Training Step: 8186...  Training loss: 0.8876...  0.1209 sec/batch
Epoch: 18/20...  Training Step: 8187...  Training loss: 0.9418...  0.1154 sec/batch
Epoch: 18/20...  Training Step: 8188...  Training loss: 0.9915...  0.1153 sec/batch
Epoch: 18/20...  Training Step: 8189...  Training loss: 1.0160...  0.1179 sec/batch
Epoch: 18/20...  Training Step: 8190...  Training loss: 0.9817...  0.1191 sec/batch
Epoch: 18/20...  Training Step: 8191...  Training loss: 0.9962...  0.1229 sec/batch
Epoch: 18/20...  Training Step: 8192...  Training loss: 1.2310...  0.1313 sec/batch
Epoch: 18/20...  Training Step: 8193...  Training loss: 0.9498...  0.1287 sec/batch
Epoch: 18/20...  Training Step: 8194...  Training loss: 1.0884...  0.1335 sec/batch
Epoch: 18/20...  Training Step: 8195...  Training loss: 1.0174...  0.1225 sec/batch
Epoch: 18/20...  Training Step: 8196...  Training loss: 1.2694...  0.1305 sec/batch
Epoch: 18/20...  Training Step: 8197...  Training loss: 1.0784...  0.1250 sec/batch
Epoch: 18/20...  Training Step: 8198...  Training loss: 1.0894...  0.1146 sec/batch
Epoch: 18/20...  Training Step: 8199...  Training loss: 1.0071...  0.1240 sec/batch
Epoch: 18/20...  Training Step: 8200...  Training loss: 0.7382...  0.1158 sec/batch
Epoch: 18/20...  Training Step: 8201...  Training loss: 0.8460...  0.1170 sec/batch
Epoch: 18/20...  Training Step: 8202...  Training loss: 0.9063...  0.1251 sec/batch
Epoch: 18/20...  Training Step: 8203...  Training loss: 0.8354...  0.1271 sec/batch
Epoch: 18/20...  Training Step: 8204...  Training loss: 0.7939...  0.1262 sec/batch
Epoch: 18/20...  Training Step: 8205...  Training loss: 0.7599...  0.1243 sec/batch
Epoch: 18/20...  Training Step: 8206...  Training loss: 0.9393...  0.1318 sec/batch
Epoch: 18/20...  Training Step: 8207...  Training loss: 0.9621...  0.1306 sec/batch
Epoch: 18/20...  Training Step: 8208...  Training loss: 0.9248...  0.1178 sec/batch
Epoch: 18/20...  Training Step: 8209...  Training loss: 0.8865...  0.1153 sec/batch
Epoch: 18/20...  Training Step: 8210...  Training loss: 1.1410...  0.1196 sec/batch
Epoch: 18/20...  Training Step: 8211...  Training loss: 0.9348...  0.1175 sec/batch
Epoch: 18/20...  Training Step: 8212...  Training loss: 1.0744...  0.1181 sec/batch
Epoch: 18/20...  Training Step: 8213...  Training loss: 0.8063...  0.1173 sec/batch
Epoch: 18/20...  Training Step: 8214...  Training loss: 0.7476...  0.1127 sec/batch
Epoch: 18/20...  Training Step: 8215...  Training loss: 0.9746...  0.1206 sec/batch
Epoch: 18/20...  Training Step: 8216...  Training loss: 0.8606...  0.1176 sec/batch
Epoch: 18/20...  Training Step: 8217...  Training loss: 0.8909...  0.1195 sec/batch
Epoch: 18/20...  Training Step: 8218...  Training loss: 0.8404...  0.1193 sec/batch
Epoch: 18/20...  Training Step: 8219...  Training loss: 1.0188...  0.1175 sec/batch
Epoch: 18/20...  Training Step: 8220...  Training loss: 0.9484...  0.1185 sec/batch
Epoch: 18/20...  Training Step: 8221...  Training loss: 0.8516...  0.1215 sec/batch
Epoch: 18/20...  Training Step: 8222...  Training loss: 0.9533...  0.1159 sec/batch
Epoch: 18/20...  Training Step: 8223...  Training loss: 0.9483...  0.1218 sec/batch
Epoch: 18/20...  Training Step: 8224...  Training loss: 0.9496...  0.1174 sec/batch
Epoch: 18/20...  Training Step: 8225...  Training loss: 0.7561...  0.1186 sec/batch
Epoch: 18/20...  Training Step: 8226...  Training loss: 0.8701...  0.1182 sec/batch
Epoch: 18/20...  Training Step: 8227...  Training loss: 0.9415...  0.1168 sec/batch
Epoch: 18/20...  Training Step: 8228...  Training loss: 0.8039...  0.1177 sec/batch
Epoch: 18/20...  Training Step: 8229...  Training loss: 0.8570...  0.1218 sec/batch
Epoch: 18/20...  Training Step: 8230...  Training loss: 0.9960...  0.1165 sec/batch
Epoch: 18/20...  Training Step: 8231...  Training loss: 0.8151...  0.1149 sec/batch
Epoch: 18/20...  Training Step: 8232...  Training loss: 0.9258...  0.1205 sec/batch
Epoch: 18/20...  Training Step: 8233...  Training loss: 0.8916...  0.1256 sec/batch
Epoch: 18/20...  Training Step: 8234...  Training loss: 0.9030...  0.1265 sec/batch
Epoch: 18/20...  Training Step: 8235...  Training loss: 0.8692...  0.1291 sec/batch
Epoch: 18/20...  Training Step: 8236...  Training loss: 1.1076...  0.1228 sec/batch
Epoch: 18/20...  Training Step: 8237...  Training loss: 0.9123...  0.1229 sec/batch
Epoch: 18/20...  Training Step: 8238...  Training loss: 0.9279...  0.1329 sec/batch
Epoch: 18/20...  Training Step: 8239...  Training loss: 0.9361...  0.1231 sec/batch
Epoch: 18/20...  Training Step: 8240...  Training loss: 0.8841...  0.1274 sec/batch
Epoch: 18/20...  Training Step: 8241...  Training loss: 0.8461...  0.1373 sec/batch
Epoch: 18/20...  Training Step: 8242...  Training loss: 0.6147...  0.1312 sec/batch
Epoch: 18/20...  Training Step: 8243...  Training loss: 0.8658...  0.1299 sec/batch
Epoch: 18/20...  Training Step: 8244...  Training loss: 0.8986...  0.1278 sec/batch
Epoch: 18/20...  Training Step: 8245...  Training loss: 0.8657...  0.1251 sec/batch
Epoch: 18/20...  Training Step: 8246...  Training loss: 1.0087...  0.1306 sec/batch
Epoch: 18/20...  Training Step: 8247...  Training loss: 1.1485...  0.1229 sec/batch
Epoch: 18/20...  Training Step: 8248...  Training loss: 0.8012...  0.1225 sec/batch
Epoch: 18/20...  Training Step: 8249...  Training loss: 0.9908...  0.1217 sec/batch
Epoch: 18/20...  Training Step: 8250...  Training loss: 0.8224...  0.1212 sec/batch
Epoch: 18/20...  Training Step: 8251...  Training loss: 0.7865...  0.1271 sec/batch
Epoch: 18/20...  Training Step: 8252...  Training loss: 1.0453...  0.1177 sec/batch
Epoch: 18/20...  Training Step: 8253...  Training loss: 0.9370...  0.1296 sec/batch
Epoch: 18/20...  Training Step: 8254...  Training loss: 0.9823...  0.1238 sec/batch
Epoch: 18/20...  Training Step: 8255...  Training loss: 0.9448...  0.1307 sec/batch
Epoch: 18/20...  Training Step: 8256...  Training loss: 1.0134...  0.1250 sec/batch
Epoch: 18/20...  Training Step: 8257...  Training loss: 1.1106...  0.1300 sec/batch
Epoch: 18/20...  Training Step: 8258...  Training loss: 1.0409...  0.1275 sec/batch
Epoch: 18/20...  Training Step: 8259...  Training loss: 0.8549...  0.1173 sec/batch
Epoch: 18/20...  Training Step: 8260...  Training loss: 0.9710...  0.1241 sec/batch
Epoch: 18/20...  Training Step: 8261...  Training loss: 0.7809...  0.1309 sec/batch
Epoch: 18/20...  Training Step: 8262...  Training loss: 0.8501...  0.1220 sec/batch
Epoch: 18/20...  Training Step: 8263...  Training loss: 0.8758...  0.1296 sec/batch
Epoch: 18/20...  Training Step: 8264...  Training loss: 0.9564...  0.1180 sec/batch
Epoch: 18/20...  Training Step: 8265...  Training loss: 0.9971...  0.1228 sec/batch
Epoch: 18/20...  Training Step: 8266...  Training loss: 1.0631...  0.1323 sec/batch
Epoch: 18/20...  Training Step: 8267...  Training loss: 0.9565...  0.1234 sec/batch
Epoch: 18/20...  Training Step: 8268...  Training loss: 0.8936...  0.1190 sec/batch
Epoch: 18/20...  Training Step: 8269...  Training loss: 0.8845...  0.1210 sec/batch
Epoch: 18/20...  Training Step: 8270...  Training loss: 0.9486...  0.1250 sec/batch
Epoch: 18/20...  Training Step: 8271...  Training loss: 0.9039...  0.1321 sec/batch
Epoch: 18/20...  Training Step: 8272...  Training loss: 0.9601...  0.1383 sec/batch
Epoch: 18/20...  Training Step: 8273...  Training loss: 0.9400...  0.1337 sec/batch
Epoch: 18/20...  Training Step: 8274...  Training loss: 0.7964...  0.1159 sec/batch
Epoch: 18/20...  Training Step: 8275...  Training loss: 0.9777...  0.1472 sec/batch
Epoch: 18/20...  Training Step: 8276...  Training loss: 0.8723...  0.1259 sec/batch
Epoch: 18/20...  Training Step: 8277...  Training loss: 0.8566...  0.1253 sec/batch
Epoch: 18/20...  Training Step: 8278...  Training loss: 0.9050...  0.1234 sec/batch
Epoch: 18/20...  Training Step: 8279...  Training loss: 0.7644...  0.1182 sec/batch
Epoch: 18/20...  Training Step: 8280...  Training loss: 0.8686...  0.1250 sec/batch
Epoch: 18/20...  Training Step: 8281...  Training loss: 0.9575...  0.1222 sec/batch
Epoch: 18/20...  Training Step: 8282...  Training loss: 0.8188...  0.1210 sec/batch
Epoch: 18/20...  Training Step: 8283...  Training loss: 0.9057...  0.1293 sec/batch
Epoch: 18/20...  Training Step: 8284...  Training loss: 0.9609...  0.1260 sec/batch
Epoch: 18/20...  Training Step: 8285...  Training loss: 0.9091...  0.1128 sec/batch
Epoch: 18/20...  Training Step: 8286...  Training loss: 1.0026...  0.1135 sec/batch
Epoch: 18/20...  Training Step: 8287...  Training loss: 1.0357...  0.1245 sec/batch
Epoch: 18/20...  Training Step: 8288...  Training loss: 1.0003...  0.1289 sec/batch
Epoch: 18/20...  Training Step: 8289...  Training loss: 0.8817...  0.1226 sec/batch
Epoch: 18/20...  Training Step: 8290...  Training loss: 0.9983...  0.1162 sec/batch
Epoch: 18/20...  Training Step: 8291...  Training loss: 0.8399...  0.1175 sec/batch
Epoch: 18/20...  Training Step: 8292...  Training loss: 0.9412...  0.1177 sec/batch
Epoch: 18/20...  Training Step: 8293...  Training loss: 1.1047...  0.1290 sec/batch
Epoch: 18/20...  Training Step: 8294...  Training loss: 1.0386...  0.1183 sec/batch
Epoch: 18/20...  Training Step: 8295...  Training loss: 0.8983...  0.1179 sec/batch
Epoch: 18/20...  Training Step: 8296...  Training loss: 1.1281...  0.1164 sec/batch
Epoch: 18/20...  Training Step: 8297...  Training loss: 0.9074...  0.1155 sec/batch
Epoch: 18/20...  Training Step: 8298...  Training loss: 0.8986...  0.1204 sec/batch
Epoch: 18/20...  Training Step: 8299...  Training loss: 0.8398...  0.1152 sec/batch
Epoch: 18/20...  Training Step: 8300...  Training loss: 0.8613...  0.1245 sec/batch
Epoch: 18/20...  Training Step: 8301...  Training loss: 0.9387...  0.1234 sec/batch
Epoch: 18/20...  Training Step: 8302...  Training loss: 0.9664...  0.1222 sec/batch
Epoch: 18/20...  Training Step: 8303...  Training loss: 1.0031...  0.1224 sec/batch
Epoch: 18/20...  Training Step: 8304...  Training loss: 0.9805...  0.1257 sec/batch
Epoch: 18/20...  Training Step: 8305...  Training loss: 0.8408...  0.1226 sec/batch
Epoch: 18/20...  Training Step: 8306...  Training loss: 0.7967...  0.1292 sec/batch
Epoch: 18/20...  Training Step: 8307...  Training loss: 0.7992...  0.1304 sec/batch
Epoch: 18/20...  Training Step: 8308...  Training loss: 0.8665...  0.1325 sec/batch
Epoch: 18/20...  Training Step: 8309...  Training loss: 1.0707...  0.1248 sec/batch
Epoch: 18/20...  Training Step: 8310...  Training loss: 0.8937...  0.1171 sec/batch
Epoch: 18/20...  Training Step: 8311...  Training loss: 0.8534...  0.1227 sec/batch
Epoch: 18/20...  Training Step: 8312...  Training loss: 0.7684...  0.1243 sec/batch
Epoch: 18/20...  Training Step: 8313...  Training loss: 0.8224...  0.1245 sec/batch
Epoch: 18/20...  Training Step: 8314...  Training loss: 0.8700...  0.1200 sec/batch
Epoch: 18/20...  Training Step: 8315...  Training loss: 0.8424...  0.1270 sec/batch
Epoch: 18/20...  Training Step: 8316...  Training loss: 0.9440...  0.1265 sec/batch
Epoch: 18/20...  Training Step: 8317...  Training loss: 0.8613...  0.1299 sec/batch
Epoch: 18/20...  Training Step: 8318...  Training loss: 0.8361...  0.1295 sec/batch
Epoch: 18/20...  Training Step: 8319...  Training loss: 1.1101...  0.1287 sec/batch
Epoch: 18/20...  Training Step: 8320...  Training loss: 0.8756...  0.1187 sec/batch
Epoch: 18/20...  Training Step: 8321...  Training loss: 1.0962...  0.1199 sec/batch
Epoch: 18/20...  Training Step: 8322...  Training loss: 1.0412...  0.1233 sec/batch
Epoch: 18/20...  Training Step: 8323...  Training loss: 0.8265...  0.1280 sec/batch
Epoch: 18/20...  Training Step: 8324...  Training loss: 0.9549...  0.1273 sec/batch
Epoch: 18/20...  Training Step: 8325...  Training loss: 0.9535...  0.1380 sec/batch
Epoch: 18/20...  Training Step: 8326...  Training loss: 1.0093...  0.1380 sec/batch
Epoch: 18/20...  Training Step: 8327...  Training loss: 1.0489...  0.1301 sec/batch
Epoch: 18/20...  Training Step: 8328...  Training loss: 1.0168...  0.1279 sec/batch
Epoch: 18/20...  Training Step: 8329...  Training loss: 0.8217...  0.1333 sec/batch
Epoch: 18/20...  Training Step: 8330...  Training loss: 0.8971...  0.1275 sec/batch
Epoch: 18/20...  Training Step: 8331...  Training loss: 0.8663...  0.1273 sec/batch
Epoch: 18/20...  Training Step: 8332...  Training loss: 0.9541...  0.1206 sec/batch
Epoch: 18/20...  Training Step: 8333...  Training loss: 0.8503...  0.1332 sec/batch
Epoch: 18/20...  Training Step: 8334...  Training loss: 0.9915...  0.1265 sec/batch
Epoch: 18/20...  Training Step: 8335...  Training loss: 1.0506...  0.1230 sec/batch
Epoch: 18/20...  Training Step: 8336...  Training loss: 0.8883...  0.1306 sec/batch
Epoch: 18/20...  Training Step: 8337...  Training loss: 0.8749...  0.1255 sec/batch
Epoch: 18/20...  Training Step: 8338...  Training loss: 0.9057...  0.1265 sec/batch
Epoch: 18/20...  Training Step: 8339...  Training loss: 0.9189...  0.1322 sec/batch
Epoch: 18/20...  Training Step: 8340...  Training loss: 0.9050...  0.1283 sec/batch
Epoch: 18/20...  Training Step: 8341...  Training loss: 0.8074...  0.1338 sec/batch
Epoch: 18/20...  Training Step: 8342...  Training loss: 1.0813...  0.1305 sec/batch
Epoch: 18/20...  Training Step: 8343...  Training loss: 0.9666...  0.1312 sec/batch
Epoch: 18/20...  Training Step: 8344...  Training loss: 0.7942...  0.1281 sec/batch
Epoch: 18/20...  Training Step: 8345...  Training loss: 0.8730...  0.1282 sec/batch
Epoch: 18/20...  Training Step: 8346...  Training loss: 0.8492...  0.1381 sec/batch
Epoch: 18/20...  Training Step: 8347...  Training loss: 0.7918...  0.1263 sec/batch
Epoch: 18/20...  Training Step: 8348...  Training loss: 0.8757...  0.1147 sec/batch
Epoch: 18/20...  Training Step: 8349...  Training loss: 0.8952...  0.1152 sec/batch
Epoch: 18/20...  Training Step: 8350...  Training loss: 0.8989...  0.1155 sec/batch
Epoch: 18/20...  Training Step: 8351...  Training loss: 0.8481...  0.1227 sec/batch
Epoch: 18/20...  Training Step: 8352...  Training loss: 1.0280...  0.1206 sec/batch
Epoch: 19/20...  Training Step: 8353...  Training loss: 1.0587...  0.1220 sec/batch
Epoch: 19/20...  Training Step: 8354...  Training loss: 1.0556...  0.1198 sec/batch
Epoch: 19/20...  Training Step: 8355...  Training loss: 1.0438...  0.1250 sec/batch
Epoch: 19/20...  Training Step: 8356...  Training loss: 0.9614...  0.1175 sec/batch
Epoch: 19/20...  Training Step: 8357...  Training loss: 0.9439...  0.1158 sec/batch
Epoch: 19/20...  Training Step: 8358...  Training loss: 0.9039...  0.1157 sec/batch
Epoch: 19/20...  Training Step: 8359...  Training loss: 1.0787...  0.1212 sec/batch
Epoch: 19/20...  Training Step: 8360...  Training loss: 0.8546...  0.1199 sec/batch
Epoch: 19/20...  Training Step: 8361...  Training loss: 0.8039...  0.1147 sec/batch
Epoch: 19/20...  Training Step: 8362...  Training loss: 0.9296...  0.1197 sec/batch
Epoch: 19/20...  Training Step: 8363...  Training loss: 0.8431...  0.1162 sec/batch
Epoch: 19/20...  Training Step: 8364...  Training loss: 0.8165...  0.1147 sec/batch
Epoch: 19/20...  Training Step: 8365...  Training loss: 1.1344...  0.1172 sec/batch
Epoch: 19/20...  Training Step: 8366...  Training loss: 0.8621...  0.1177 sec/batch
Epoch: 19/20...  Training Step: 8367...  Training loss: 1.0189...  0.1140 sec/batch
Epoch: 19/20...  Training Step: 8368...  Training loss: 1.0598...  0.1173 sec/batch
Epoch: 19/20...  Training Step: 8369...  Training loss: 0.8167...  0.1173 sec/batch
Epoch: 19/20...  Training Step: 8370...  Training loss: 0.9923...  0.1135 sec/batch
Epoch: 19/20...  Training Step: 8371...  Training loss: 0.9444...  0.1170 sec/batch
Epoch: 19/20...  Training Step: 8372...  Training loss: 0.9540...  0.1179 sec/batch
Epoch: 19/20...  Training Step: 8373...  Training loss: 1.0692...  0.1165 sec/batch
Epoch: 19/20...  Training Step: 8374...  Training loss: 0.8254...  0.1213 sec/batch
Epoch: 19/20...  Training Step: 8375...  Training loss: 0.8837...  0.1226 sec/batch
Epoch: 19/20...  Training Step: 8376...  Training loss: 0.8781...  0.1257 sec/batch
Epoch: 19/20...  Training Step: 8377...  Training loss: 0.8781...  0.1273 sec/batch
Epoch: 19/20...  Training Step: 8378...  Training loss: 0.9567...  0.1235 sec/batch
Epoch: 19/20...  Training Step: 8379...  Training loss: 0.9714...  0.1223 sec/batch
Epoch: 19/20...  Training Step: 8380...  Training loss: 0.9400...  0.1168 sec/batch
Epoch: 19/20...  Training Step: 8381...  Training loss: 0.8993...  0.1221 sec/batch
Epoch: 19/20...  Training Step: 8382...  Training loss: 0.9313...  0.1207 sec/batch
Epoch: 19/20...  Training Step: 8383...  Training loss: 0.8792...  0.1210 sec/batch
Epoch: 19/20...  Training Step: 8384...  Training loss: 0.9148...  0.1190 sec/batch
Epoch: 19/20...  Training Step: 8385...  Training loss: 0.8202...  0.1182 sec/batch
Epoch: 19/20...  Training Step: 8386...  Training loss: 0.7683...  0.1195 sec/batch
Epoch: 19/20...  Training Step: 8387...  Training loss: 0.8810...  0.1163 sec/batch
Epoch: 19/20...  Training Step: 8388...  Training loss: 0.8269...  0.1198 sec/batch
Epoch: 19/20...  Training Step: 8389...  Training loss: 0.8952...  0.1222 sec/batch
Epoch: 19/20...  Training Step: 8390...  Training loss: 0.9436...  0.1198 sec/batch
Epoch: 19/20...  Training Step: 8391...  Training loss: 0.8302...  0.1229 sec/batch
Epoch: 19/20...  Training Step: 8392...  Training loss: 1.1219...  0.1226 sec/batch
Epoch: 19/20...  Training Step: 8393...  Training loss: 0.9437...  0.1233 sec/batch
Epoch: 19/20...  Training Step: 8394...  Training loss: 0.7915...  0.1166 sec/batch
Epoch: 19/20...  Training Step: 8395...  Training loss: 0.9694...  0.1148 sec/batch
Epoch: 19/20...  Training Step: 8396...  Training loss: 0.8930...  0.1174 sec/batch
Epoch: 19/20...  Training Step: 8397...  Training loss: 1.0146...  0.1205 sec/batch
Epoch: 19/20...  Training Step: 8398...  Training loss: 1.0760...  0.1140 sec/batch
Epoch: 19/20...  Training Step: 8399...  Training loss: 0.9584...  0.1209 sec/batch
Epoch: 19/20...  Training Step: 8400...  Training loss: 0.9542...  0.1158 sec/batch
Epoch: 19/20...  Training Step: 8401...  Training loss: 0.9357...  0.1244 sec/batch
Epoch: 19/20...  Training Step: 8402...  Training loss: 0.7961...  0.1183 sec/batch
Epoch: 19/20...  Training Step: 8403...  Training loss: 0.8254...  0.1199 sec/batch
Epoch: 19/20...  Training Step: 8404...  Training loss: 0.9462...  0.1189 sec/batch
Epoch: 19/20...  Training Step: 8405...  Training loss: 0.8446...  0.1151 sec/batch
Epoch: 19/20...  Training Step: 8406...  Training loss: 0.7961...  0.1223 sec/batch
Epoch: 19/20...  Training Step: 8407...  Training loss: 0.7677...  0.1262 sec/batch
Epoch: 19/20...  Training Step: 8408...  Training loss: 0.7660...  0.1204 sec/batch
Epoch: 19/20...  Training Step: 8409...  Training loss: 1.0374...  0.1166 sec/batch
Epoch: 19/20...  Training Step: 8410...  Training loss: 0.9679...  0.1210 sec/batch
Epoch: 19/20...  Training Step: 8411...  Training loss: 0.7982...  0.1174 sec/batch
Epoch: 19/20...  Training Step: 8412...  Training loss: 0.8126...  0.1145 sec/batch
Epoch: 19/20...  Training Step: 8413...  Training loss: 0.7423...  0.1170 sec/batch
Epoch: 19/20...  Training Step: 8414...  Training loss: 0.9773...  0.1184 sec/batch
Epoch: 19/20...  Training Step: 8415...  Training loss: 0.7734...  0.1165 sec/batch
Epoch: 19/20...  Training Step: 8416...  Training loss: 0.9459...  0.1204 sec/batch
Epoch: 19/20...  Training Step: 8417...  Training loss: 0.9017...  0.1189 sec/batch
Epoch: 19/20...  Training Step: 8418...  Training loss: 0.8908...  0.1177 sec/batch
Epoch: 19/20...  Training Step: 8419...  Training loss: 0.9738...  0.1134 sec/batch
Epoch: 19/20...  Training Step: 8420...  Training loss: 0.9495...  0.1178 sec/batch
Epoch: 19/20...  Training Step: 8421...  Training loss: 0.8455...  0.1154 sec/batch
Epoch: 19/20...  Training Step: 8422...  Training loss: 0.9506...  0.1233 sec/batch
Epoch: 19/20...  Training Step: 8423...  Training loss: 0.9734...  0.1200 sec/batch
Epoch: 19/20...  Training Step: 8424...  Training loss: 0.7675...  0.1202 sec/batch
Epoch: 19/20...  Training Step: 8425...  Training loss: 0.7218...  0.1160 sec/batch
Epoch: 19/20...  Training Step: 8426...  Training loss: 0.7669...  0.1198 sec/batch
Epoch: 19/20...  Training Step: 8427...  Training loss: 1.0333...  0.1179 sec/batch
Epoch: 19/20...  Training Step: 8428...  Training loss: 0.7807...  0.1222 sec/batch
Epoch: 19/20...  Training Step: 8429...  Training loss: 0.8915...  0.1133 sec/batch
Epoch: 19/20...  Training Step: 8430...  Training loss: 0.8830...  0.1193 sec/batch
Epoch: 19/20...  Training Step: 8431...  Training loss: 0.9661...  0.1157 sec/batch
Epoch: 19/20...  Training Step: 8432...  Training loss: 0.8350...  0.1174 sec/batch
Epoch: 19/20...  Training Step: 8433...  Training loss: 0.9652...  0.1219 sec/batch
Epoch: 19/20...  Training Step: 8434...  Training loss: 0.8430...  0.1146 sec/batch
Epoch: 19/20...  Training Step: 8435...  Training loss: 0.7620...  0.1208 sec/batch
Epoch: 19/20...  Training Step: 8436...  Training loss: 0.9202...  0.1159 sec/batch
Epoch: 19/20...  Training Step: 8437...  Training loss: 0.8618...  0.1174 sec/batch
Epoch: 19/20...  Training Step: 8438...  Training loss: 1.0756...  0.1145 sec/batch
Epoch: 19/20...  Training Step: 8439...  Training loss: 0.8286...  0.1186 sec/batch
Epoch: 19/20...  Training Step: 8440...  Training loss: 1.0239...  0.1171 sec/batch
Epoch: 19/20...  Training Step: 8441...  Training loss: 0.9587...  0.1199 sec/batch
Epoch: 19/20...  Training Step: 8442...  Training loss: 0.8869...  0.1184 sec/batch
Epoch: 19/20...  Training Step: 8443...  Training loss: 1.1216...  0.1185 sec/batch
Epoch: 19/20...  Training Step: 8444...  Training loss: 0.9900...  0.1176 sec/batch
Epoch: 19/20...  Training Step: 8445...  Training loss: 0.7984...  0.1139 sec/batch
Epoch: 19/20...  Training Step: 8446...  Training loss: 1.0059...  0.1171 sec/batch
Epoch: 19/20...  Training Step: 8447...  Training loss: 0.8419...  0.1172 sec/batch
Epoch: 19/20...  Training Step: 8448...  Training loss: 0.8825...  0.1179 sec/batch
Epoch: 19/20...  Training Step: 8449...  Training loss: 1.0125...  0.1224 sec/batch
Epoch: 19/20...  Training Step: 8450...  Training loss: 0.9263...  0.1176 sec/batch
Epoch: 19/20...  Training Step: 8451...  Training loss: 0.9444...  0.1203 sec/batch
Epoch: 19/20...  Training Step: 8452...  Training loss: 0.8991...  0.1198 sec/batch
Epoch: 19/20...  Training Step: 8453...  Training loss: 0.8962...  0.1179 sec/batch
Epoch: 19/20...  Training Step: 8454...  Training loss: 0.9280...  0.1147 sec/batch
Epoch: 19/20...  Training Step: 8455...  Training loss: 1.0750...  0.1187 sec/batch
Epoch: 19/20...  Training Step: 8456...  Training loss: 0.9232...  0.1197 sec/batch
Epoch: 19/20...  Training Step: 8457...  Training loss: 0.9161...  0.1170 sec/batch
Epoch: 19/20...  Training Step: 8458...  Training loss: 0.9629...  0.1212 sec/batch
Epoch: 19/20...  Training Step: 8459...  Training loss: 0.7476...  0.1118 sec/batch
Epoch: 19/20...  Training Step: 8460...  Training loss: 1.0200...  0.1174 sec/batch
Epoch: 19/20...  Training Step: 8461...  Training loss: 0.8787...  0.1152 sec/batch
Epoch: 19/20...  Training Step: 8462...  Training loss: 0.8643...  0.1204 sec/batch
Epoch: 19/20...  Training Step: 8463...  Training loss: 1.0533...  0.1215 sec/batch
Epoch: 19/20...  Training Step: 8464...  Training loss: 0.8282...  0.1159 sec/batch
Epoch: 19/20...  Training Step: 8465...  Training loss: 1.0126...  0.1193 sec/batch
Epoch: 19/20...  Training Step: 8466...  Training loss: 1.0185...  0.1131 sec/batch
Epoch: 19/20...  Training Step: 8467...  Training loss: 0.9487...  0.1217 sec/batch
Epoch: 19/20...  Training Step: 8468...  Training loss: 0.8747...  0.1171 sec/batch
Epoch: 19/20...  Training Step: 8469...  Training loss: 0.9117...  0.1177 sec/batch
Epoch: 19/20...  Training Step: 8470...  Training loss: 0.8894...  0.1213 sec/batch
Epoch: 19/20...  Training Step: 8471...  Training loss: 0.8600...  0.1166 sec/batch
Epoch: 19/20...  Training Step: 8472...  Training loss: 0.7770...  0.1186 sec/batch
Epoch: 19/20...  Training Step: 8473...  Training loss: 0.8908...  0.1200 sec/batch
Epoch: 19/20...  Training Step: 8474...  Training loss: 0.9971...  0.1163 sec/batch
Epoch: 19/20...  Training Step: 8475...  Training loss: 1.0990...  0.1155 sec/batch
Epoch: 19/20...  Training Step: 8476...  Training loss: 0.8303...  0.1198 sec/batch
Epoch: 19/20...  Training Step: 8477...  Training loss: 0.9536...  0.1181 sec/batch
Epoch: 19/20...  Training Step: 8478...  Training loss: 0.7994...  0.1185 sec/batch
Epoch: 19/20...  Training Step: 8479...  Training loss: 0.8484...  0.1181 sec/batch
Epoch: 19/20...  Training Step: 8480...  Training loss: 0.9479...  0.1187 sec/batch
Epoch: 19/20...  Training Step: 8481...  Training loss: 0.7923...  0.1172 sec/batch
Epoch: 19/20...  Training Step: 8482...  Training loss: 0.9580...  0.1206 sec/batch
Epoch: 19/20...  Training Step: 8483...  Training loss: 0.8753...  0.1155 sec/batch
Epoch: 19/20...  Training Step: 8484...  Training loss: 0.9433...  0.1182 sec/batch
Epoch: 19/20...  Training Step: 8485...  Training loss: 0.8936...  0.1173 sec/batch
Epoch: 19/20...  Training Step: 8486...  Training loss: 1.0282...  0.1162 sec/batch
Epoch: 19/20...  Training Step: 8487...  Training loss: 0.9032...  0.1119 sec/batch
Epoch: 19/20...  Training Step: 8488...  Training loss: 0.8713...  0.1213 sec/batch
Epoch: 19/20...  Training Step: 8489...  Training loss: 0.8299...  0.1222 sec/batch
Epoch: 19/20...  Training Step: 8490...  Training loss: 1.0685...  0.1249 sec/batch
Epoch: 19/20...  Training Step: 8491...  Training loss: 0.9670...  0.1142 sec/batch
Epoch: 19/20...  Training Step: 8492...  Training loss: 0.7985...  0.1147 sec/batch
Epoch: 19/20...  Training Step: 8493...  Training loss: 0.8016...  0.1153 sec/batch
Epoch: 19/20...  Training Step: 8494...  Training loss: 0.8926...  0.1171 sec/batch
Epoch: 19/20...  Training Step: 8495...  Training loss: 0.8007...  0.1189 sec/batch
Epoch: 19/20...  Training Step: 8496...  Training loss: 0.9549...  0.1201 sec/batch
Epoch: 19/20...  Training Step: 8497...  Training loss: 1.0459...  0.1232 sec/batch
Epoch: 19/20...  Training Step: 8498...  Training loss: 0.8770...  0.1228 sec/batch
Epoch: 19/20...  Training Step: 8499...  Training loss: 0.9543...  0.1149 sec/batch
Epoch: 19/20...  Training Step: 8500...  Training loss: 0.7901...  0.1177 sec/batch
Epoch: 19/20...  Training Step: 8501...  Training loss: 0.7204...  0.1175 sec/batch
Epoch: 19/20...  Training Step: 8502...  Training loss: 0.9400...  0.1163 sec/batch
Epoch: 19/20...  Training Step: 8503...  Training loss: 0.7681...  0.1215 sec/batch
Epoch: 19/20...  Training Step: 8504...  Training loss: 0.8676...  0.1165 sec/batch
Epoch: 19/20...  Training Step: 8505...  Training loss: 1.0977...  0.1168 sec/batch
Epoch: 19/20...  Training Step: 8506...  Training loss: 0.8863...  0.1170 sec/batch
Epoch: 19/20...  Training Step: 8507...  Training loss: 0.8087...  0.1157 sec/batch
Epoch: 19/20...  Training Step: 8508...  Training loss: 0.9008...  0.1166 sec/batch
Epoch: 19/20...  Training Step: 8509...  Training loss: 0.8643...  0.1184 sec/batch
Epoch: 19/20...  Training Step: 8510...  Training loss: 1.0224...  0.1149 sec/batch
Epoch: 19/20...  Training Step: 8511...  Training loss: 0.8687...  0.1121 sec/batch
Epoch: 19/20...  Training Step: 8512...  Training loss: 0.9445...  0.1146 sec/batch
Epoch: 19/20...  Training Step: 8513...  Training loss: 0.9442...  0.1182 sec/batch
Epoch: 19/20...  Training Step: 8514...  Training loss: 0.8869...  0.1192 sec/batch
Epoch: 19/20...  Training Step: 8515...  Training loss: 1.1530...  0.1189 sec/batch
Epoch: 19/20...  Training Step: 8516...  Training loss: 0.8034...  0.1155 sec/batch
Epoch: 19/20...  Training Step: 8517...  Training loss: 0.8737...  0.1161 sec/batch
Epoch: 19/20...  Training Step: 8518...  Training loss: 0.7627...  0.1159 sec/batch
Epoch: 19/20...  Training Step: 8519...  Training loss: 0.7290...  0.1218 sec/batch
Epoch: 19/20...  Training Step: 8520...  Training loss: 0.9330...  0.1160 sec/batch
Epoch: 19/20...  Training Step: 8521...  Training loss: 0.8120...  0.1152 sec/batch
Epoch: 19/20...  Training Step: 8522...  Training loss: 0.8742...  0.1194 sec/batch
Epoch: 19/20...  Training Step: 8523...  Training loss: 1.0302...  0.1207 sec/batch
Epoch: 19/20...  Training Step: 8524...  Training loss: 0.9148...  0.1150 sec/batch
Epoch: 19/20...  Training Step: 8525...  Training loss: 0.8373...  0.1171 sec/batch
Epoch: 19/20...  Training Step: 8526...  Training loss: 0.7907...  0.1207 sec/batch
Epoch: 19/20...  Training Step: 8527...  Training loss: 0.9564...  0.1143 sec/batch
Epoch: 19/20...  Training Step: 8528...  Training loss: 0.8016...  0.1201 sec/batch
Epoch: 19/20...  Training Step: 8529...  Training loss: 0.8069...  0.1173 sec/batch
Epoch: 19/20...  Training Step: 8530...  Training loss: 1.0213...  0.1168 sec/batch
Epoch: 19/20...  Training Step: 8531...  Training loss: 0.7455...  0.1182 sec/batch
Epoch: 19/20...  Training Step: 8532...  Training loss: 0.9283...  0.1161 sec/batch
Epoch: 19/20...  Training Step: 8533...  Training loss: 0.6879...  0.1167 sec/batch
Epoch: 19/20...  Training Step: 8534...  Training loss: 0.9287...  0.1208 sec/batch
Epoch: 19/20...  Training Step: 8535...  Training loss: 0.9933...  0.1138 sec/batch
Epoch: 19/20...  Training Step: 8536...  Training loss: 0.7984...  0.1146 sec/batch
Epoch: 19/20...  Training Step: 8537...  Training loss: 0.9559...  0.1219 sec/batch
Epoch: 19/20...  Training Step: 8538...  Training loss: 0.9550...  0.1232 sec/batch
Epoch: 19/20...  Training Step: 8539...  Training loss: 0.9578...  0.1142 sec/batch
Epoch: 19/20...  Training Step: 8540...  Training loss: 0.7711...  0.1207 sec/batch
Epoch: 19/20...  Training Step: 8541...  Training loss: 0.9588...  0.1180 sec/batch
Epoch: 19/20...  Training Step: 8542...  Training loss: 0.8390...  0.1141 sec/batch
Epoch: 19/20...  Training Step: 8543...  Training loss: 0.7988...  0.1225 sec/batch
Epoch: 19/20...  Training Step: 8544...  Training loss: 1.0284...  0.1288 sec/batch
Epoch: 19/20...  Training Step: 8545...  Training loss: 0.8730...  0.1292 sec/batch
Epoch: 19/20...  Training Step: 8546...  Training loss: 0.9307...  0.1177 sec/batch
Epoch: 19/20...  Training Step: 8547...  Training loss: 0.8652...  0.1191 sec/batch
Epoch: 19/20...  Training Step: 8548...  Training loss: 0.9512...  0.1167 sec/batch
Epoch: 19/20...  Training Step: 8549...  Training loss: 0.8991...  0.1168 sec/batch
Epoch: 19/20...  Training Step: 8550...  Training loss: 1.0117...  0.1154 sec/batch
Epoch: 19/20...  Training Step: 8551...  Training loss: 0.8501...  0.1162 sec/batch
Epoch: 19/20...  Training Step: 8552...  Training loss: 0.8786...  0.1168 sec/batch
Epoch: 19/20...  Training Step: 8553...  Training loss: 1.0003...  0.1189 sec/batch
Epoch: 19/20...  Training Step: 8554...  Training loss: 0.9783...  0.1186 sec/batch
Epoch: 19/20...  Training Step: 8555...  Training loss: 0.9469...  0.1162 sec/batch
Epoch: 19/20...  Training Step: 8556...  Training loss: 1.0828...  0.1193 sec/batch
Epoch: 19/20...  Training Step: 8557...  Training loss: 0.6757...  0.1149 sec/batch
Epoch: 19/20...  Training Step: 8558...  Training loss: 0.8746...  0.1123 sec/batch
Epoch: 19/20...  Training Step: 8559...  Training loss: 0.8508...  0.1191 sec/batch
Epoch: 19/20...  Training Step: 8560...  Training loss: 0.9709...  0.1139 sec/batch
Epoch: 19/20...  Training Step: 8561...  Training loss: 0.8308...  0.1182 sec/batch
Epoch: 19/20...  Training Step: 8562...  Training loss: 0.6846...  0.1168 sec/batch
Epoch: 19/20...  Training Step: 8563...  Training loss: 0.9316...  0.1197 sec/batch
Epoch: 19/20...  Training Step: 8564...  Training loss: 0.9513...  0.1175 sec/batch
Epoch: 19/20...  Training Step: 8565...  Training loss: 1.0825...  0.1231 sec/batch
Epoch: 19/20...  Training Step: 8566...  Training loss: 0.8784...  0.1151 sec/batch
Epoch: 19/20...  Training Step: 8567...  Training loss: 0.9835...  0.1220 sec/batch
Epoch: 19/20...  Training Step: 8568...  Training loss: 0.8494...  0.1170 sec/batch
Epoch: 19/20...  Training Step: 8569...  Training loss: 0.8551...  0.1178 sec/batch
Epoch: 19/20...  Training Step: 8570...  Training loss: 1.0499...  0.1148 sec/batch
Epoch: 19/20...  Training Step: 8571...  Training loss: 0.9505...  0.1172 sec/batch
Epoch: 19/20...  Training Step: 8572...  Training loss: 0.8673...  0.1144 sec/batch
Epoch: 19/20...  Training Step: 8573...  Training loss: 0.8359...  0.1179 sec/batch
Epoch: 19/20...  Training Step: 8574...  Training loss: 1.2105...  0.1184 sec/batch
Epoch: 19/20...  Training Step: 8575...  Training loss: 1.0522...  0.1144 sec/batch
Epoch: 19/20...  Training Step: 8576...  Training loss: 1.0579...  0.1202 sec/batch
Epoch: 19/20...  Training Step: 8577...  Training loss: 1.0010...  0.1121 sec/batch
Epoch: 19/20...  Training Step: 8578...  Training loss: 1.0513...  0.1114 sec/batch
Epoch: 19/20...  Training Step: 8579...  Training loss: 1.0687...  0.1185 sec/batch
Epoch: 19/20...  Training Step: 8580...  Training loss: 0.9669...  0.1174 sec/batch
Epoch: 19/20...  Training Step: 8581...  Training loss: 1.1172...  0.1152 sec/batch
Epoch: 19/20...  Training Step: 8582...  Training loss: 0.8063...  0.1178 sec/batch
Epoch: 19/20...  Training Step: 8583...  Training loss: 1.0568...  0.1143 sec/batch
Epoch: 19/20...  Training Step: 8584...  Training loss: 0.9305...  0.1190 sec/batch
Epoch: 19/20...  Training Step: 8585...  Training loss: 1.1201...  0.1191 sec/batch
Epoch: 19/20...  Training Step: 8586...  Training loss: 0.9428...  0.1192 sec/batch
Epoch: 19/20...  Training Step: 8587...  Training loss: 1.0178...  0.1187 sec/batch
Epoch: 19/20...  Training Step: 8588...  Training loss: 0.9685...  0.1165 sec/batch
Epoch: 19/20...  Training Step: 8589...  Training loss: 0.9347...  0.1205 sec/batch
Epoch: 19/20...  Training Step: 8590...  Training loss: 0.7622...  0.1183 sec/batch
Epoch: 19/20...  Training Step: 8591...  Training loss: 1.0445...  0.1191 sec/batch
Epoch: 19/20...  Training Step: 8592...  Training loss: 1.0142...  0.1166 sec/batch
Epoch: 19/20...  Training Step: 8593...  Training loss: 0.9628...  0.1200 sec/batch
Epoch: 19/20...  Training Step: 8594...  Training loss: 0.9034...  0.1171 sec/batch
Epoch: 19/20...  Training Step: 8595...  Training loss: 1.1135...  0.1158 sec/batch
Epoch: 19/20...  Training Step: 8596...  Training loss: 1.0147...  0.1189 sec/batch
Epoch: 19/20...  Training Step: 8597...  Training loss: 1.0397...  0.1145 sec/batch
Epoch: 19/20...  Training Step: 8598...  Training loss: 0.9975...  0.1202 sec/batch
Epoch: 19/20...  Training Step: 8599...  Training loss: 0.9117...  0.1158 sec/batch
Epoch: 19/20...  Training Step: 8600...  Training loss: 1.1003...  0.1194 sec/batch
Epoch: 19/20...  Training Step: 8601...  Training loss: 0.9365...  0.1186 sec/batch
Epoch: 19/20...  Training Step: 8602...  Training loss: 0.8750...  0.1159 sec/batch
Epoch: 19/20...  Training Step: 8603...  Training loss: 0.9296...  0.1169 sec/batch
Epoch: 19/20...  Training Step: 8604...  Training loss: 0.9686...  0.1171 sec/batch
Epoch: 19/20...  Training Step: 8605...  Training loss: 0.9746...  0.1163 sec/batch
Epoch: 19/20...  Training Step: 8606...  Training loss: 0.8738...  0.1193 sec/batch
Epoch: 19/20...  Training Step: 8607...  Training loss: 0.9815...  0.1199 sec/batch
Epoch: 19/20...  Training Step: 8608...  Training loss: 0.9565...  0.1142 sec/batch
Epoch: 19/20...  Training Step: 8609...  Training loss: 1.1181...  0.1176 sec/batch
Epoch: 19/20...  Training Step: 8610...  Training loss: 0.8457...  0.1153 sec/batch
Epoch: 19/20...  Training Step: 8611...  Training loss: 0.7799...  0.1192 sec/batch
Epoch: 19/20...  Training Step: 8612...  Training loss: 0.9150...  0.1174 sec/batch
Epoch: 19/20...  Training Step: 8613...  Training loss: 0.9450...  0.1153 sec/batch
Epoch: 19/20...  Training Step: 8614...  Training loss: 1.0222...  0.1160 sec/batch
Epoch: 19/20...  Training Step: 8615...  Training loss: 0.9535...  0.1161 sec/batch
Epoch: 19/20...  Training Step: 8616...  Training loss: 0.9967...  0.1193 sec/batch
Epoch: 19/20...  Training Step: 8617...  Training loss: 1.0886...  0.1188 sec/batch
Epoch: 19/20...  Training Step: 8618...  Training loss: 0.8254...  0.1149 sec/batch
Epoch: 19/20...  Training Step: 8619...  Training loss: 0.9543...  0.1164 sec/batch
Epoch: 19/20...  Training Step: 8620...  Training loss: 1.0099...  0.1165 sec/batch
Epoch: 19/20...  Training Step: 8621...  Training loss: 0.9523...  0.1165 sec/batch
Epoch: 19/20...  Training Step: 8622...  Training loss: 0.9749...  0.1164 sec/batch
Epoch: 19/20...  Training Step: 8623...  Training loss: 0.8621...  0.1192 sec/batch
Epoch: 19/20...  Training Step: 8624...  Training loss: 1.0358...  0.1183 sec/batch
Epoch: 19/20...  Training Step: 8625...  Training loss: 1.0173...  0.1153 sec/batch
Epoch: 19/20...  Training Step: 8626...  Training loss: 0.9774...  0.1171 sec/batch
Epoch: 19/20...  Training Step: 8627...  Training loss: 1.0074...  0.1215 sec/batch
Epoch: 19/20...  Training Step: 8628...  Training loss: 0.8813...  0.1180 sec/batch
Epoch: 19/20...  Training Step: 8629...  Training loss: 0.9027...  0.1164 sec/batch
Epoch: 19/20...  Training Step: 8630...  Training loss: 1.0435...  0.1199 sec/batch
Epoch: 19/20...  Training Step: 8631...  Training loss: 1.0327...  0.1221 sec/batch
Epoch: 19/20...  Training Step: 8632...  Training loss: 1.0001...  0.1297 sec/batch
Epoch: 19/20...  Training Step: 8633...  Training loss: 0.9351...  0.1373 sec/batch
Epoch: 19/20...  Training Step: 8634...  Training loss: 0.9994...  0.1347 sec/batch
Epoch: 19/20...  Training Step: 8635...  Training loss: 0.9588...  0.1331 sec/batch
Epoch: 19/20...  Training Step: 8636...  Training loss: 1.0084...  0.1430 sec/batch
Epoch: 19/20...  Training Step: 8637...  Training loss: 0.9255...  0.1279 sec/batch
Epoch: 19/20...  Training Step: 8638...  Training loss: 0.9660...  0.1264 sec/batch
Epoch: 19/20...  Training Step: 8639...  Training loss: 0.9446...  0.1227 sec/batch
Epoch: 19/20...  Training Step: 8640...  Training loss: 0.9855...  0.1240 sec/batch
Epoch: 19/20...  Training Step: 8641...  Training loss: 0.9979...  0.1274 sec/batch
Epoch: 19/20...  Training Step: 8642...  Training loss: 0.8923...  0.1425 sec/batch
Epoch: 19/20...  Training Step: 8643...  Training loss: 0.9811...  0.1348 sec/batch
Epoch: 19/20...  Training Step: 8644...  Training loss: 0.8238...  0.1243 sec/batch
Epoch: 19/20...  Training Step: 8645...  Training loss: 0.8397...  0.1239 sec/batch
Epoch: 19/20...  Training Step: 8646...  Training loss: 0.8761...  0.1522 sec/batch
Epoch: 19/20...  Training Step: 8647...  Training loss: 1.0037...  0.1488 sec/batch
Epoch: 19/20...  Training Step: 8648...  Training loss: 1.2037...  0.1475 sec/batch
Epoch: 19/20...  Training Step: 8649...  Training loss: 0.8352...  0.1397 sec/batch
Epoch: 19/20...  Training Step: 8650...  Training loss: 0.8239...  0.1374 sec/batch
Epoch: 19/20...  Training Step: 8651...  Training loss: 0.8554...  0.1343 sec/batch
Epoch: 19/20...  Training Step: 8652...  Training loss: 1.0027...  0.1313 sec/batch
Epoch: 19/20...  Training Step: 8653...  Training loss: 1.0996...  0.1231 sec/batch
Epoch: 19/20...  Training Step: 8654...  Training loss: 0.9638...  0.1236 sec/batch
Epoch: 19/20...  Training Step: 8655...  Training loss: 0.8117...  0.1274 sec/batch
Epoch: 19/20...  Training Step: 8656...  Training loss: 1.2058...  0.1277 sec/batch
Epoch: 19/20...  Training Step: 8657...  Training loss: 0.8405...  0.1434 sec/batch
Epoch: 19/20...  Training Step: 8658...  Training loss: 0.9427...  0.1224 sec/batch
Epoch: 19/20...  Training Step: 8659...  Training loss: 1.0139...  0.1215 sec/batch
Epoch: 19/20...  Training Step: 8660...  Training loss: 1.0978...  0.1315 sec/batch
Epoch: 19/20...  Training Step: 8661...  Training loss: 1.1291...  0.1297 sec/batch
Epoch: 19/20...  Training Step: 8662...  Training loss: 1.0864...  0.1273 sec/batch
Epoch: 19/20...  Training Step: 8663...  Training loss: 0.8977...  0.1274 sec/batch
Epoch: 19/20...  Training Step: 8664...  Training loss: 0.7591...  0.1293 sec/batch
Epoch: 19/20...  Training Step: 8665...  Training loss: 0.8880...  0.1242 sec/batch
Epoch: 19/20...  Training Step: 8666...  Training loss: 0.7792...  0.1245 sec/batch
Epoch: 19/20...  Training Step: 8667...  Training loss: 0.8290...  0.1212 sec/batch
Epoch: 19/20...  Training Step: 8668...  Training loss: 0.7427...  0.1218 sec/batch
Epoch: 19/20...  Training Step: 8669...  Training loss: 0.8176...  0.1274 sec/batch
Epoch: 19/20...  Training Step: 8670...  Training loss: 0.8131...  0.1260 sec/batch
Epoch: 19/20...  Training Step: 8671...  Training loss: 0.7967...  0.1211 sec/batch
Epoch: 19/20...  Training Step: 8672...  Training loss: 0.8071...  0.1161 sec/batch
Epoch: 19/20...  Training Step: 8673...  Training loss: 0.7006...  0.1173 sec/batch
Epoch: 19/20...  Training Step: 8674...  Training loss: 1.0849...  0.1196 sec/batch
Epoch: 19/20...  Training Step: 8675...  Training loss: 0.8566...  0.1170 sec/batch
Epoch: 19/20...  Training Step: 8676...  Training loss: 0.9283...  0.1175 sec/batch
Epoch: 19/20...  Training Step: 8677...  Training loss: 0.8146...  0.1168 sec/batch
Epoch: 19/20...  Training Step: 8678...  Training loss: 0.7066...  0.1147 sec/batch
Epoch: 19/20...  Training Step: 8679...  Training loss: 0.8988...  0.1200 sec/batch
Epoch: 19/20...  Training Step: 8680...  Training loss: 0.9267...  0.1192 sec/batch
Epoch: 19/20...  Training Step: 8681...  Training loss: 0.9626...  0.1170 sec/batch
Epoch: 19/20...  Training Step: 8682...  Training loss: 0.8964...  0.1194 sec/batch
Epoch: 19/20...  Training Step: 8683...  Training loss: 0.9740...  0.1182 sec/batch
Epoch: 19/20...  Training Step: 8684...  Training loss: 0.9962...  0.1174 sec/batch
Epoch: 19/20...  Training Step: 8685...  Training loss: 0.8942...  0.1203 sec/batch
Epoch: 19/20...  Training Step: 8686...  Training loss: 0.9387...  0.1171 sec/batch
Epoch: 19/20...  Training Step: 8687...  Training loss: 0.8946...  0.1194 sec/batch
Epoch: 19/20...  Training Step: 8688...  Training loss: 0.9599...  0.1200 sec/batch
Epoch: 19/20...  Training Step: 8689...  Training loss: 0.6985...  0.1163 sec/batch
Epoch: 19/20...  Training Step: 8690...  Training loss: 0.7347...  0.1163 sec/batch
Epoch: 19/20...  Training Step: 8691...  Training loss: 0.9406...  0.1179 sec/batch
Epoch: 19/20...  Training Step: 8692...  Training loss: 0.8813...  0.1219 sec/batch
Epoch: 19/20...  Training Step: 8693...  Training loss: 0.8424...  0.1159 sec/batch
Epoch: 19/20...  Training Step: 8694...  Training loss: 0.9697...  0.1283 sec/batch
Epoch: 19/20...  Training Step: 8695...  Training loss: 0.7380...  0.1292 sec/batch
Epoch: 19/20...  Training Step: 8696...  Training loss: 0.9233...  0.1218 sec/batch
Epoch: 19/20...  Training Step: 8697...  Training loss: 0.8900...  0.1233 sec/batch
Epoch: 19/20...  Training Step: 8698...  Training loss: 0.9082...  0.1179 sec/batch
Epoch: 19/20...  Training Step: 8699...  Training loss: 0.8763...  0.1228 sec/batch
Epoch: 19/20...  Training Step: 8700...  Training loss: 0.9928...  0.1186 sec/batch
Epoch: 19/20...  Training Step: 8701...  Training loss: 0.8402...  0.1180 sec/batch
Epoch: 19/20...  Training Step: 8702...  Training loss: 0.9897...  0.1172 sec/batch
Epoch: 19/20...  Training Step: 8703...  Training loss: 0.9604...  0.1224 sec/batch
Epoch: 19/20...  Training Step: 8704...  Training loss: 0.8964...  0.1131 sec/batch
Epoch: 19/20...  Training Step: 8705...  Training loss: 0.8219...  0.1166 sec/batch
Epoch: 19/20...  Training Step: 8706...  Training loss: 0.7084...  0.1142 sec/batch
Epoch: 19/20...  Training Step: 8707...  Training loss: 0.8767...  0.1204 sec/batch
Epoch: 19/20...  Training Step: 8708...  Training loss: 0.8752...  0.1146 sec/batch
Epoch: 19/20...  Training Step: 8709...  Training loss: 0.8093...  0.1177 sec/batch
Epoch: 19/20...  Training Step: 8710...  Training loss: 0.8577...  0.1158 sec/batch
Epoch: 19/20...  Training Step: 8711...  Training loss: 0.9847...  0.1165 sec/batch
Epoch: 19/20...  Training Step: 8712...  Training loss: 0.7757...  0.1171 sec/batch
Epoch: 19/20...  Training Step: 8713...  Training loss: 0.9510...  0.1175 sec/batch
Epoch: 19/20...  Training Step: 8714...  Training loss: 0.9913...  0.1164 sec/batch
Epoch: 19/20...  Training Step: 8715...  Training loss: 0.7603...  0.1202 sec/batch
Epoch: 19/20...  Training Step: 8716...  Training loss: 0.8238...  0.1169 sec/batch
Epoch: 19/20...  Training Step: 8717...  Training loss: 0.9700...  0.1213 sec/batch
Epoch: 19/20...  Training Step: 8718...  Training loss: 0.9789...  0.1178 sec/batch
Epoch: 19/20...  Training Step: 8719...  Training loss: 0.9734...  0.1237 sec/batch
Epoch: 19/20...  Training Step: 8720...  Training loss: 1.0859...  0.1177 sec/batch
Epoch: 19/20...  Training Step: 8721...  Training loss: 1.0095...  0.1153 sec/batch
Epoch: 19/20...  Training Step: 8722...  Training loss: 1.0099...  0.1182 sec/batch
Epoch: 19/20...  Training Step: 8723...  Training loss: 0.8584...  0.1184 sec/batch
Epoch: 19/20...  Training Step: 8724...  Training loss: 1.1378...  0.1194 sec/batch
Epoch: 19/20...  Training Step: 8725...  Training loss: 0.8288...  0.1172 sec/batch
Epoch: 19/20...  Training Step: 8726...  Training loss: 1.0795...  0.1178 sec/batch
Epoch: 19/20...  Training Step: 8727...  Training loss: 0.7935...  0.1206 sec/batch
Epoch: 19/20...  Training Step: 8728...  Training loss: 1.0034...  0.1162 sec/batch
Epoch: 19/20...  Training Step: 8729...  Training loss: 1.0003...  0.1158 sec/batch
Epoch: 19/20...  Training Step: 8730...  Training loss: 0.8986...  0.1177 sec/batch
Epoch: 19/20...  Training Step: 8731...  Training loss: 0.8314...  0.1149 sec/batch
Epoch: 19/20...  Training Step: 8732...  Training loss: 1.0329...  0.1178 sec/batch
Epoch: 19/20...  Training Step: 8733...  Training loss: 0.8491...  0.1193 sec/batch
Epoch: 19/20...  Training Step: 8734...  Training loss: 0.9716...  0.1177 sec/batch
Epoch: 19/20...  Training Step: 8735...  Training loss: 0.8704...  0.1201 sec/batch
Epoch: 19/20...  Training Step: 8736...  Training loss: 0.9014...  0.1194 sec/batch
Epoch: 19/20...  Training Step: 8737...  Training loss: 0.7960...  0.1207 sec/batch
Epoch: 19/20...  Training Step: 8738...  Training loss: 0.8435...  0.1170 sec/batch
Epoch: 19/20...  Training Step: 8739...  Training loss: 1.0479...  0.1174 sec/batch
Epoch: 19/20...  Training Step: 8740...  Training loss: 0.8862...  0.1185 sec/batch
Epoch: 19/20...  Training Step: 8741...  Training loss: 0.7509...  0.1162 sec/batch
Epoch: 19/20...  Training Step: 8742...  Training loss: 0.8779...  0.1159 sec/batch
Epoch: 19/20...  Training Step: 8743...  Training loss: 0.8517...  0.1161 sec/batch
Epoch: 19/20...  Training Step: 8744...  Training loss: 0.9834...  0.1145 sec/batch
Epoch: 19/20...  Training Step: 8745...  Training loss: 0.9837...  0.1199 sec/batch
Epoch: 19/20...  Training Step: 8746...  Training loss: 1.0526...  0.1218 sec/batch
Epoch: 19/20...  Training Step: 8747...  Training loss: 0.7602...  0.1190 sec/batch
Epoch: 19/20...  Training Step: 8748...  Training loss: 1.0128...  0.1181 sec/batch
Epoch: 19/20...  Training Step: 8749...  Training loss: 0.9509...  0.1185 sec/batch
Epoch: 19/20...  Training Step: 8750...  Training loss: 0.8920...  0.1210 sec/batch
Epoch: 19/20...  Training Step: 8751...  Training loss: 1.0427...  0.1161 sec/batch
Epoch: 19/20...  Training Step: 8752...  Training loss: 0.9562...  0.1207 sec/batch
Epoch: 19/20...  Training Step: 8753...  Training loss: 0.8233...  0.1194 sec/batch
Epoch: 19/20...  Training Step: 8754...  Training loss: 0.9544...  0.1204 sec/batch
Epoch: 19/20...  Training Step: 8755...  Training loss: 0.7760...  0.1171 sec/batch
Epoch: 19/20...  Training Step: 8756...  Training loss: 0.8668...  0.1190 sec/batch
Epoch: 19/20...  Training Step: 8757...  Training loss: 0.9271...  0.1156 sec/batch
Epoch: 19/20...  Training Step: 8758...  Training loss: 1.0837...  0.1217 sec/batch
Epoch: 19/20...  Training Step: 8759...  Training loss: 0.7970...  0.1213 sec/batch
Epoch: 19/20...  Training Step: 8760...  Training loss: 1.2281...  0.1144 sec/batch
Epoch: 19/20...  Training Step: 8761...  Training loss: 0.9041...  0.1228 sec/batch
Epoch: 19/20...  Training Step: 8762...  Training loss: 0.8894...  0.1187 sec/batch
Epoch: 19/20...  Training Step: 8763...  Training loss: 0.8122...  0.1200 sec/batch
Epoch: 19/20...  Training Step: 8764...  Training loss: 1.0320...  0.1200 sec/batch
Epoch: 19/20...  Training Step: 8765...  Training loss: 0.9956...  0.1184 sec/batch
Epoch: 19/20...  Training Step: 8766...  Training loss: 0.8656...  0.1149 sec/batch
Epoch: 19/20...  Training Step: 8767...  Training loss: 1.0168...  0.1220 sec/batch
Epoch: 19/20...  Training Step: 8768...  Training loss: 0.9922...  0.1212 sec/batch
Epoch: 19/20...  Training Step: 8769...  Training loss: 0.8688...  0.1184 sec/batch
Epoch: 19/20...  Training Step: 8770...  Training loss: 0.7241...  0.1239 sec/batch
Epoch: 19/20...  Training Step: 8771...  Training loss: 0.7804...  0.1130 sec/batch
Epoch: 19/20...  Training Step: 8772...  Training loss: 0.8463...  0.1195 sec/batch
Epoch: 19/20...  Training Step: 8773...  Training loss: 1.0351...  0.1163 sec/batch
Epoch: 19/20...  Training Step: 8774...  Training loss: 0.8016...  0.1175 sec/batch
Epoch: 19/20...  Training Step: 8775...  Training loss: 0.7794...  0.1181 sec/batch
Epoch: 19/20...  Training Step: 8776...  Training loss: 0.9351...  0.1183 sec/batch
Epoch: 19/20...  Training Step: 8777...  Training loss: 0.9148...  0.1163 sec/batch
Epoch: 19/20...  Training Step: 8778...  Training loss: 0.9751...  0.1190 sec/batch
Epoch: 19/20...  Training Step: 8779...  Training loss: 0.9197...  0.1190 sec/batch
Epoch: 19/20...  Training Step: 8780...  Training loss: 0.9926...  0.1164 sec/batch
Epoch: 19/20...  Training Step: 8781...  Training loss: 0.7751...  0.1211 sec/batch
Epoch: 19/20...  Training Step: 8782...  Training loss: 0.9099...  0.1200 sec/batch
Epoch: 19/20...  Training Step: 8783...  Training loss: 1.0411...  0.1097 sec/batch
Epoch: 19/20...  Training Step: 8784...  Training loss: 0.8835...  0.1168 sec/batch
Epoch: 19/20...  Training Step: 8785...  Training loss: 1.0076...  0.1143 sec/batch
Epoch: 19/20...  Training Step: 8786...  Training loss: 0.8266...  0.1189 sec/batch
Epoch: 19/20...  Training Step: 8787...  Training loss: 0.8346...  0.1179 sec/batch
Epoch: 19/20...  Training Step: 8788...  Training loss: 0.9870...  0.1151 sec/batch
Epoch: 19/20...  Training Step: 8789...  Training loss: 0.9223...  0.1161 sec/batch
Epoch: 19/20...  Training Step: 8790...  Training loss: 0.8797...  0.1157 sec/batch
Epoch: 19/20...  Training Step: 8791...  Training loss: 1.0656...  0.1131 sec/batch
Epoch: 19/20...  Training Step: 8792...  Training loss: 1.0199...  0.1102 sec/batch
Epoch: 19/20...  Training Step: 8793...  Training loss: 0.7454...  0.1139 sec/batch
Epoch: 19/20...  Training Step: 8794...  Training loss: 0.8662...  0.1198 sec/batch
Epoch: 19/20...  Training Step: 8795...  Training loss: 0.9000...  0.1232 sec/batch
Epoch: 19/20...  Training Step: 8796...  Training loss: 1.0188...  0.1210 sec/batch
Epoch: 19/20...  Training Step: 8797...  Training loss: 0.8860...  0.1230 sec/batch
Epoch: 19/20...  Training Step: 8798...  Training loss: 0.7374...  0.1207 sec/batch
Epoch: 19/20...  Training Step: 8799...  Training loss: 1.0156...  0.1212 sec/batch
Epoch: 19/20...  Training Step: 8800...  Training loss: 0.8740...  0.1180 sec/batch
Epoch: 19/20...  Training Step: 8801...  Training loss: 0.8626...  0.1194 sec/batch
Epoch: 19/20...  Training Step: 8802...  Training loss: 0.9347...  0.1161 sec/batch
Epoch: 19/20...  Training Step: 8803...  Training loss: 0.8098...  0.1204 sec/batch
Epoch: 19/20...  Training Step: 8804...  Training loss: 0.9239...  0.1198 sec/batch
Epoch: 19/20...  Training Step: 8805...  Training loss: 0.8367...  0.1152 sec/batch
Epoch: 19/20...  Training Step: 8806...  Training loss: 0.9629...  0.1188 sec/batch
Epoch: 19/20...  Training Step: 8807...  Training loss: 1.0109...  0.1137 sec/batch
Epoch: 19/20...  Training Step: 8808...  Training loss: 0.8238...  0.1196 sec/batch
Epoch: 19/20...  Training Step: 8809...  Training loss: 0.8892...  0.1185 sec/batch
Epoch: 19/20...  Training Step: 8810...  Training loss: 0.7549...  0.1147 sec/batch
Epoch: 19/20...  Training Step: 8811...  Training loss: 0.8526...  0.1213 sec/batch
Epoch: 19/20...  Training Step: 8812...  Training loss: 0.9960...  0.1158 sec/batch
Epoch: 19/20...  Training Step: 8813...  Training loss: 0.8994...  0.1143 sec/batch
Epoch: 19/20...  Training Step: 8814...  Training loss: 0.8955...  0.1191 sec/batch
Epoch: 19/20...  Training Step: 8815...  Training loss: 0.9463...  0.1141 sec/batch
Epoch: 19/20...  Training Step: 8816...  Training loss: 0.7819...  0.1106 sec/batch
Epoch: 20/20...  Training Step: 8817...  Training loss: 1.0918...  0.1172 sec/batch
Epoch: 20/20...  Training Step: 8818...  Training loss: 1.0805...  0.1164 sec/batch
Epoch: 20/20...  Training Step: 8819...  Training loss: 1.0695...  0.1190 sec/batch
Epoch: 20/20...  Training Step: 8820...  Training loss: 0.9268...  0.1171 sec/batch
Epoch: 20/20...  Training Step: 8821...  Training loss: 0.8752...  0.1182 sec/batch
Epoch: 20/20...  Training Step: 8822...  Training loss: 0.9147...  0.1162 sec/batch
Epoch: 20/20...  Training Step: 8823...  Training loss: 0.9330...  0.1240 sec/batch
Epoch: 20/20...  Training Step: 8824...  Training loss: 0.9079...  0.1152 sec/batch
Epoch: 20/20...  Training Step: 8825...  Training loss: 0.6895...  0.1154 sec/batch
Epoch: 20/20...  Training Step: 8826...  Training loss: 0.9698...  0.1180 sec/batch
Epoch: 20/20...  Training Step: 8827...  Training loss: 0.8608...  0.1205 sec/batch
Epoch: 20/20...  Training Step: 8828...  Training loss: 0.8357...  0.1154 sec/batch
Epoch: 20/20...  Training Step: 8829...  Training loss: 1.0906...  0.1214 sec/batch
Epoch: 20/20...  Training Step: 8830...  Training loss: 0.6900...  0.1225 sec/batch
Epoch: 20/20...  Training Step: 8831...  Training loss: 1.0717...  0.1142 sec/batch
Epoch: 20/20...  Training Step: 8832...  Training loss: 1.0810...  0.1204 sec/batch
Epoch: 20/20...  Training Step: 8833...  Training loss: 0.8748...  0.1198 sec/batch
Epoch: 20/20...  Training Step: 8834...  Training loss: 0.8476...  0.1131 sec/batch
Epoch: 20/20...  Training Step: 8835...  Training loss: 0.8136...  0.1154 sec/batch
Epoch: 20/20...  Training Step: 8836...  Training loss: 0.8428...  0.1176 sec/batch
Epoch: 20/20...  Training Step: 8837...  Training loss: 0.9948...  0.1209 sec/batch
Epoch: 20/20...  Training Step: 8838...  Training loss: 0.8176...  0.1139 sec/batch
Epoch: 20/20...  Training Step: 8839...  Training loss: 1.0069...  0.1182 sec/batch
Epoch: 20/20...  Training Step: 8840...  Training loss: 0.8593...  0.1183 sec/batch
Epoch: 20/20...  Training Step: 8841...  Training loss: 0.8265...  0.1131 sec/batch
Epoch: 20/20...  Training Step: 8842...  Training loss: 0.8502...  0.1268 sec/batch
Epoch: 20/20...  Training Step: 8843...  Training loss: 0.9686...  0.1345 sec/batch
Epoch: 20/20...  Training Step: 8844...  Training loss: 0.8820...  0.1171 sec/batch
Epoch: 20/20...  Training Step: 8845...  Training loss: 0.9037...  0.1175 sec/batch
Epoch: 20/20...  Training Step: 8846...  Training loss: 0.9038...  0.1177 sec/batch
Epoch: 20/20...  Training Step: 8847...  Training loss: 0.9061...  0.1191 sec/batch
Epoch: 20/20...  Training Step: 8848...  Training loss: 0.8266...  0.1160 sec/batch
Epoch: 20/20...  Training Step: 8849...  Training loss: 0.8659...  0.1143 sec/batch
Epoch: 20/20...  Training Step: 8850...  Training loss: 0.7976...  0.1224 sec/batch
Epoch: 20/20...  Training Step: 8851...  Training loss: 0.7555...  0.1194 sec/batch
Epoch: 20/20...  Training Step: 8852...  Training loss: 0.7740...  0.1179 sec/batch
Epoch: 20/20...  Training Step: 8853...  Training loss: 0.9216...  0.1195 sec/batch
Epoch: 20/20...  Training Step: 8854...  Training loss: 0.8655...  0.1156 sec/batch
Epoch: 20/20...  Training Step: 8855...  Training loss: 0.8282...  0.1189 sec/batch
Epoch: 20/20...  Training Step: 8856...  Training loss: 1.0132...  0.1179 sec/batch
Epoch: 20/20...  Training Step: 8857...  Training loss: 0.8730...  0.1152 sec/batch
Epoch: 20/20...  Training Step: 8858...  Training loss: 0.7320...  0.1182 sec/batch
Epoch: 20/20...  Training Step: 8859...  Training loss: 1.1051...  0.1289 sec/batch
Epoch: 20/20...  Training Step: 8860...  Training loss: 0.7503...  0.1258 sec/batch
Epoch: 20/20...  Training Step: 8861...  Training loss: 0.8582...  0.1179 sec/batch
Epoch: 20/20...  Training Step: 8862...  Training loss: 0.8159...  0.1169 sec/batch
Epoch: 20/20...  Training Step: 8863...  Training loss: 0.8863...  0.1155 sec/batch
Epoch: 20/20...  Training Step: 8864...  Training loss: 0.9806...  0.1163 sec/batch
Epoch: 20/20...  Training Step: 8865...  Training loss: 1.0301...  0.1211 sec/batch
Epoch: 20/20...  Training Step: 8866...  Training loss: 0.8712...  0.1141 sec/batch
Epoch: 20/20...  Training Step: 8867...  Training loss: 0.8991...  0.1161 sec/batch
Epoch: 20/20...  Training Step: 8868...  Training loss: 0.7802...  0.1168 sec/batch
Epoch: 20/20...  Training Step: 8869...  Training loss: 0.9684...  0.1172 sec/batch
Epoch: 20/20...  Training Step: 8870...  Training loss: 0.7605...  0.1141 sec/batch
Epoch: 20/20...  Training Step: 8871...  Training loss: 0.9067...  0.1160 sec/batch
Epoch: 20/20...  Training Step: 8872...  Training loss: 0.8332...  0.1187 sec/batch
Epoch: 20/20...  Training Step: 8873...  Training loss: 0.8143...  0.1180 sec/batch
Epoch: 20/20...  Training Step: 8874...  Training loss: 1.0187...  0.1187 sec/batch
Epoch: 20/20...  Training Step: 8875...  Training loss: 0.7384...  0.1196 sec/batch
Epoch: 20/20...  Training Step: 8876...  Training loss: 0.7842...  0.1199 sec/batch
Epoch: 20/20...  Training Step: 8877...  Training loss: 0.7306...  0.1163 sec/batch
Epoch: 20/20...  Training Step: 8878...  Training loss: 0.9839...  0.1163 sec/batch
Epoch: 20/20...  Training Step: 8879...  Training loss: 0.8883...  0.1173 sec/batch
Epoch: 20/20...  Training Step: 8880...  Training loss: 1.0608...  0.1187 sec/batch
Epoch: 20/20...  Training Step: 8881...  Training loss: 0.7529...  0.1189 sec/batch
Epoch: 20/20...  Training Step: 8882...  Training loss: 0.9504...  0.1229 sec/batch
Epoch: 20/20...  Training Step: 8883...  Training loss: 0.9425...  0.1221 sec/batch
Epoch: 20/20...  Training Step: 8884...  Training loss: 0.9367...  0.1154 sec/batch
Epoch: 20/20...  Training Step: 8885...  Training loss: 0.7165...  0.1208 sec/batch
Epoch: 20/20...  Training Step: 8886...  Training loss: 0.8613...  0.1175 sec/batch
Epoch: 20/20...  Training Step: 8887...  Training loss: 1.0065...  0.1179 sec/batch
Epoch: 20/20...  Training Step: 8888...  Training loss: 0.7723...  0.1197 sec/batch
Epoch: 20/20...  Training Step: 8889...  Training loss: 0.8223...  0.1185 sec/batch
Epoch: 20/20...  Training Step: 8890...  Training loss: 0.6290...  0.1131 sec/batch
Epoch: 20/20...  Training Step: 8891...  Training loss: 1.0759...  0.1154 sec/batch
Epoch: 20/20...  Training Step: 8892...  Training loss: 0.7993...  0.1140 sec/batch
Epoch: 20/20...  Training Step: 8893...  Training loss: 0.7310...  0.1145 sec/batch
Epoch: 20/20...  Training Step: 8894...  Training loss: 0.9518...  0.1179 sec/batch
Epoch: 20/20...  Training Step: 8895...  Training loss: 0.9445...  0.1132 sec/batch
Epoch: 20/20...  Training Step: 8896...  Training loss: 0.8700...  0.1199 sec/batch
Epoch: 20/20...  Training Step: 8897...  Training loss: 1.0118...  0.1145 sec/batch
Epoch: 20/20...  Training Step: 8898...  Training loss: 0.9731...  0.1158 sec/batch
Epoch: 20/20...  Training Step: 8899...  Training loss: 0.8536...  0.1167 sec/batch
Epoch: 20/20...  Training Step: 8900...  Training loss: 0.8950...  0.1189 sec/batch
Epoch: 20/20...  Training Step: 8901...  Training loss: 0.9437...  0.1188 sec/batch
Epoch: 20/20...  Training Step: 8902...  Training loss: 1.0797...  0.1211 sec/batch
Epoch: 20/20...  Training Step: 8903...  Training loss: 0.8976...  0.1198 sec/batch
Epoch: 20/20...  Training Step: 8904...  Training loss: 0.8999...  0.1206 sec/batch
Epoch: 20/20...  Training Step: 8905...  Training loss: 1.0418...  0.1176 sec/batch
Epoch: 20/20...  Training Step: 8906...  Training loss: 0.9332...  0.1169 sec/batch
Epoch: 20/20...  Training Step: 8907...  Training loss: 1.0459...  0.1130 sec/batch
Epoch: 20/20...  Training Step: 8908...  Training loss: 1.0899...  0.1190 sec/batch
Epoch: 20/20...  Training Step: 8909...  Training loss: 0.8505...  0.1187 sec/batch
Epoch: 20/20...  Training Step: 8910...  Training loss: 1.0208...  0.1172 sec/batch
Epoch: 20/20...  Training Step: 8911...  Training loss: 0.9588...  0.1227 sec/batch
Epoch: 20/20...  Training Step: 8912...  Training loss: 0.8852...  0.1204 sec/batch
Epoch: 20/20...  Training Step: 8913...  Training loss: 1.0525...  0.1170 sec/batch
Epoch: 20/20...  Training Step: 8914...  Training loss: 1.0746...  0.1161 sec/batch
Epoch: 20/20...  Training Step: 8915...  Training loss: 0.9858...  0.1180 sec/batch
Epoch: 20/20...  Training Step: 8916...  Training loss: 0.8754...  0.1180 sec/batch
Epoch: 20/20...  Training Step: 8917...  Training loss: 1.0372...  0.1178 sec/batch
Epoch: 20/20...  Training Step: 8918...  Training loss: 0.8220...  0.1195 sec/batch
Epoch: 20/20...  Training Step: 8919...  Training loss: 0.9794...  0.1132 sec/batch
Epoch: 20/20...  Training Step: 8920...  Training loss: 0.9106...  0.1205 sec/batch
Epoch: 20/20...  Training Step: 8921...  Training loss: 0.9227...  0.1192 sec/batch
Epoch: 20/20...  Training Step: 8922...  Training loss: 1.1097...  0.1190 sec/batch
Epoch: 20/20...  Training Step: 8923...  Training loss: 0.8144...  0.1179 sec/batch
Epoch: 20/20...  Training Step: 8924...  Training loss: 0.9399...  0.1141 sec/batch
Epoch: 20/20...  Training Step: 8925...  Training loss: 1.0074...  0.1138 sec/batch
Epoch: 20/20...  Training Step: 8926...  Training loss: 0.7132...  0.1181 sec/batch
Epoch: 20/20...  Training Step: 8927...  Training loss: 0.9699...  0.1200 sec/batch
Epoch: 20/20...  Training Step: 8928...  Training loss: 0.7882...  0.1172 sec/batch
Epoch: 20/20...  Training Step: 8929...  Training loss: 0.9458...  0.1201 sec/batch
Epoch: 20/20...  Training Step: 8930...  Training loss: 1.0983...  0.1215 sec/batch
Epoch: 20/20...  Training Step: 8931...  Training loss: 0.8896...  0.1201 sec/batch
Epoch: 20/20...  Training Step: 8932...  Training loss: 0.7918...  0.1152 sec/batch
Epoch: 20/20...  Training Step: 8933...  Training loss: 0.9632...  0.1158 sec/batch
Epoch: 20/20...  Training Step: 8934...  Training loss: 0.9932...  0.1174 sec/batch
Epoch: 20/20...  Training Step: 8935...  Training loss: 1.0321...  0.1182 sec/batch
Epoch: 20/20...  Training Step: 8936...  Training loss: 0.7612...  0.1168 sec/batch
Epoch: 20/20...  Training Step: 8937...  Training loss: 1.0257...  0.1177 sec/batch
Epoch: 20/20...  Training Step: 8938...  Training loss: 0.8816...  0.1170 sec/batch
Epoch: 20/20...  Training Step: 8939...  Training loss: 1.0040...  0.1204 sec/batch
Epoch: 20/20...  Training Step: 8940...  Training loss: 0.9266...  0.1206 sec/batch
Epoch: 20/20...  Training Step: 8941...  Training loss: 0.9272...  0.1186 sec/batch
Epoch: 20/20...  Training Step: 8942...  Training loss: 0.8968...  0.1139 sec/batch
Epoch: 20/20...  Training Step: 8943...  Training loss: 0.8247...  0.1134 sec/batch
Epoch: 20/20...  Training Step: 8944...  Training loss: 1.0753...  0.1100 sec/batch
Epoch: 20/20...  Training Step: 8945...  Training loss: 0.8436...  0.1133 sec/batch
Epoch: 20/20...  Training Step: 8946...  Training loss: 0.9316...  0.1225 sec/batch
Epoch: 20/20...  Training Step: 8947...  Training loss: 0.9805...  0.1190 sec/batch
Epoch: 20/20...  Training Step: 8948...  Training loss: 0.8653...  0.1171 sec/batch
Epoch: 20/20...  Training Step: 8949...  Training loss: 0.8466...  0.1186 sec/batch
Epoch: 20/20...  Training Step: 8950...  Training loss: 0.9421...  0.1197 sec/batch
Epoch: 20/20...  Training Step: 8951...  Training loss: 0.7982...  0.1128 sec/batch
Epoch: 20/20...  Training Step: 8952...  Training loss: 0.8027...  0.1160 sec/batch
Epoch: 20/20...  Training Step: 8953...  Training loss: 0.7150...  0.1182 sec/batch
Epoch: 20/20...  Training Step: 8954...  Training loss: 0.9663...  0.1172 sec/batch
Epoch: 20/20...  Training Step: 8955...  Training loss: 0.8010...  0.1191 sec/batch
Epoch: 20/20...  Training Step: 8956...  Training loss: 1.0654...  0.1174 sec/batch
Epoch: 20/20...  Training Step: 8957...  Training loss: 0.7089...  0.1165 sec/batch
Epoch: 20/20...  Training Step: 8958...  Training loss: 0.7683...  0.1134 sec/batch
Epoch: 20/20...  Training Step: 8959...  Training loss: 0.8130...  0.1157 sec/batch
Epoch: 20/20...  Training Step: 8960...  Training loss: 0.8918...  0.1122 sec/batch
Epoch: 20/20...  Training Step: 8961...  Training loss: 0.9360...  0.1203 sec/batch
Epoch: 20/20...  Training Step: 8962...  Training loss: 0.7608...  0.1181 sec/batch
Epoch: 20/20...  Training Step: 8963...  Training loss: 0.9019...  0.1231 sec/batch
Epoch: 20/20...  Training Step: 8964...  Training loss: 0.7517...  0.1161 sec/batch
Epoch: 20/20...  Training Step: 8965...  Training loss: 0.8221...  0.1180 sec/batch
Epoch: 20/20...  Training Step: 8966...  Training loss: 0.8391...  0.1187 sec/batch
Epoch: 20/20...  Training Step: 8967...  Training loss: 0.7844...  0.1140 sec/batch
Epoch: 20/20...  Training Step: 8968...  Training loss: 1.0134...  0.1152 sec/batch
Epoch: 20/20...  Training Step: 8969...  Training loss: 0.9644...  0.1176 sec/batch
Epoch: 20/20...  Training Step: 8970...  Training loss: 0.8928...  0.1212 sec/batch
Epoch: 20/20...  Training Step: 8971...  Training loss: 0.8457...  0.1171 sec/batch
Epoch: 20/20...  Training Step: 8972...  Training loss: 0.9593...  0.1174 sec/batch
Epoch: 20/20...  Training Step: 8973...  Training loss: 0.8496...  0.1214 sec/batch
Epoch: 20/20...  Training Step: 8974...  Training loss: 0.8228...  0.1195 sec/batch
Epoch: 20/20...  Training Step: 8975...  Training loss: 0.7430...  0.1168 sec/batch
Epoch: 20/20...  Training Step: 8976...  Training loss: 0.9144...  0.1174 sec/batch
Epoch: 20/20...  Training Step: 8977...  Training loss: 0.8245...  0.1210 sec/batch
Epoch: 20/20...  Training Step: 8978...  Training loss: 0.8815...  0.1221 sec/batch
Epoch: 20/20...  Training Step: 8979...  Training loss: 1.0613...  0.1161 sec/batch
Epoch: 20/20...  Training Step: 8980...  Training loss: 0.9469...  0.1210 sec/batch
Epoch: 20/20...  Training Step: 8981...  Training loss: 0.9437...  0.1194 sec/batch
Epoch: 20/20...  Training Step: 8982...  Training loss: 0.7776...  0.1173 sec/batch
Epoch: 20/20...  Training Step: 8983...  Training loss: 0.7832...  0.1162 sec/batch
Epoch: 20/20...  Training Step: 8984...  Training loss: 0.8391...  0.1196 sec/batch
Epoch: 20/20...  Training Step: 8985...  Training loss: 0.6496...  0.1165 sec/batch
Epoch: 20/20...  Training Step: 8986...  Training loss: 0.8317...  0.1165 sec/batch
Epoch: 20/20...  Training Step: 8987...  Training loss: 1.0071...  0.1171 sec/batch
Epoch: 20/20...  Training Step: 8988...  Training loss: 0.9279...  0.1160 sec/batch
Epoch: 20/20...  Training Step: 8989...  Training loss: 0.7862...  0.1204 sec/batch
Epoch: 20/20...  Training Step: 8990...  Training loss: 0.8126...  0.1193 sec/batch
Epoch: 20/20...  Training Step: 8991...  Training loss: 0.9678...  0.1147 sec/batch
Epoch: 20/20...  Training Step: 8992...  Training loss: 0.8604...  0.1148 sec/batch
Epoch: 20/20...  Training Step: 8993...  Training loss: 0.8688...  0.1164 sec/batch
Epoch: 20/20...  Training Step: 8994...  Training loss: 0.9994...  0.1177 sec/batch
Epoch: 20/20...  Training Step: 8995...  Training loss: 0.7886...  0.1161 sec/batch
Epoch: 20/20...  Training Step: 8996...  Training loss: 0.9292...  0.1195 sec/batch
Epoch: 20/20...  Training Step: 8997...  Training loss: 0.6824...  0.1184 sec/batch
Epoch: 20/20...  Training Step: 8998...  Training loss: 1.0078...  0.1176 sec/batch
Epoch: 20/20...  Training Step: 8999...  Training loss: 0.9503...  0.1149 sec/batch
Epoch: 20/20...  Training Step: 9000...  Training loss: 0.9196...  0.1191 sec/batch
Epoch: 20/20...  Training Step: 9001...  Training loss: 0.9192...  0.1183 sec/batch
Epoch: 20/20...  Training Step: 9002...  Training loss: 1.0750...  0.1187 sec/batch
Epoch: 20/20...  Training Step: 9003...  Training loss: 0.9306...  0.1181 sec/batch
Epoch: 20/20...  Training Step: 9004...  Training loss: 0.8100...  0.1174 sec/batch
Epoch: 20/20...  Training Step: 9005...  Training loss: 1.1632...  0.1185 sec/batch
Epoch: 20/20...  Training Step: 9006...  Training loss: 0.9077...  0.1190 sec/batch
Epoch: 20/20...  Training Step: 9007...  Training loss: 0.8224...  0.1177 sec/batch
Epoch: 20/20...  Training Step: 9008...  Training loss: 0.8393...  0.1125 sec/batch
Epoch: 20/20...  Training Step: 9009...  Training loss: 0.7627...  0.1145 sec/batch
Epoch: 20/20...  Training Step: 9010...  Training loss: 0.8851...  0.1089 sec/batch
Epoch: 20/20...  Training Step: 9011...  Training loss: 0.9932...  0.1207 sec/batch
Epoch: 20/20...  Training Step: 9012...  Training loss: 0.8448...  0.1153 sec/batch
Epoch: 20/20...  Training Step: 9013...  Training loss: 0.8176...  0.1219 sec/batch
Epoch: 20/20...  Training Step: 9014...  Training loss: 0.8369...  0.1165 sec/batch
Epoch: 20/20...  Training Step: 9015...  Training loss: 0.8002...  0.1172 sec/batch
Epoch: 20/20...  Training Step: 9016...  Training loss: 0.8148...  0.1175 sec/batch
Epoch: 20/20...  Training Step: 9017...  Training loss: 0.9777...  0.1215 sec/batch
Epoch: 20/20...  Training Step: 9018...  Training loss: 1.0977...  0.1207 sec/batch
Epoch: 20/20...  Training Step: 9019...  Training loss: 0.9244...  0.1208 sec/batch
Epoch: 20/20...  Training Step: 9020...  Training loss: 0.8794...  0.1138 sec/batch
Epoch: 20/20...  Training Step: 9021...  Training loss: 0.8934...  0.1165 sec/batch
Epoch: 20/20...  Training Step: 9022...  Training loss: 0.9645...  0.1143 sec/batch
Epoch: 20/20...  Training Step: 9023...  Training loss: 0.8764...  0.1190 sec/batch
Epoch: 20/20...  Training Step: 9024...  Training loss: 0.8991...  0.1172 sec/batch
Epoch: 20/20...  Training Step: 9025...  Training loss: 0.9204...  0.1150 sec/batch
Epoch: 20/20...  Training Step: 9026...  Training loss: 0.8325...  0.1164 sec/batch
Epoch: 20/20...  Training Step: 9027...  Training loss: 0.9816...  0.1192 sec/batch
Epoch: 20/20...  Training Step: 9028...  Training loss: 0.9713...  0.1168 sec/batch
Epoch: 20/20...  Training Step: 9029...  Training loss: 0.9334...  0.1181 sec/batch
Epoch: 20/20...  Training Step: 9030...  Training loss: 0.8470...  0.1177 sec/batch
Epoch: 20/20...  Training Step: 9031...  Training loss: 1.1104...  0.1119 sec/batch
Epoch: 20/20...  Training Step: 9032...  Training loss: 0.8809...  0.1183 sec/batch
Epoch: 20/20...  Training Step: 9033...  Training loss: 0.9266...  0.1155 sec/batch
Epoch: 20/20...  Training Step: 9034...  Training loss: 0.9784...  0.1167 sec/batch
Epoch: 20/20...  Training Step: 9035...  Training loss: 0.8808...  0.1171 sec/batch
Epoch: 20/20...  Training Step: 9036...  Training loss: 1.0281...  0.1172 sec/batch
Epoch: 20/20...  Training Step: 9037...  Training loss: 0.8338...  0.1182 sec/batch
Epoch: 20/20...  Training Step: 9038...  Training loss: 1.1674...  0.1155 sec/batch
Epoch: 20/20...  Training Step: 9039...  Training loss: 1.0344...  0.1195 sec/batch
Epoch: 20/20...  Training Step: 9040...  Training loss: 1.0054...  0.1200 sec/batch
Epoch: 20/20...  Training Step: 9041...  Training loss: 0.9361...  0.1168 sec/batch
Epoch: 20/20...  Training Step: 9042...  Training loss: 1.1434...  0.1133 sec/batch
Epoch: 20/20...  Training Step: 9043...  Training loss: 0.9198...  0.1181 sec/batch
Epoch: 20/20...  Training Step: 9044...  Training loss: 0.9942...  0.1177 sec/batch
Epoch: 20/20...  Training Step: 9045...  Training loss: 0.9581...  0.1160 sec/batch
Epoch: 20/20...  Training Step: 9046...  Training loss: 0.9332...  0.1274 sec/batch
Epoch: 20/20...  Training Step: 9047...  Training loss: 0.9967...  0.1257 sec/batch
Epoch: 20/20...  Training Step: 9048...  Training loss: 0.9245...  0.1157 sec/batch
Epoch: 20/20...  Training Step: 9049...  Training loss: 1.1653...  0.1201 sec/batch
Epoch: 20/20...  Training Step: 9050...  Training loss: 1.0346...  0.1197 sec/batch
Epoch: 20/20...  Training Step: 9051...  Training loss: 1.0542...  0.1158 sec/batch
Epoch: 20/20...  Training Step: 9052...  Training loss: 0.9176...  0.1170 sec/batch
Epoch: 20/20...  Training Step: 9053...  Training loss: 0.8489...  0.1212 sec/batch
Epoch: 20/20...  Training Step: 9054...  Training loss: 0.7823...  0.1205 sec/batch
Epoch: 20/20...  Training Step: 9055...  Training loss: 1.0207...  0.1205 sec/batch
Epoch: 20/20...  Training Step: 9056...  Training loss: 0.8765...  0.1148 sec/batch
Epoch: 20/20...  Training Step: 9057...  Training loss: 0.8805...  0.1184 sec/batch
Epoch: 20/20...  Training Step: 9058...  Training loss: 0.8755...  0.1192 sec/batch
Epoch: 20/20...  Training Step: 9059...  Training loss: 0.9831...  0.1142 sec/batch
Epoch: 20/20...  Training Step: 9060...  Training loss: 0.8332...  0.1214 sec/batch
Epoch: 20/20...  Training Step: 9061...  Training loss: 0.9621...  0.1170 sec/batch
Epoch: 20/20...  Training Step: 9062...  Training loss: 0.8524...  0.1140 sec/batch
Epoch: 20/20...  Training Step: 9063...  Training loss: 0.9578...  0.1139 sec/batch
Epoch: 20/20...  Training Step: 9064...  Training loss: 0.9394...  0.1210 sec/batch
Epoch: 20/20...  Training Step: 9065...  Training loss: 1.0679...  0.1170 sec/batch
Epoch: 20/20...  Training Step: 9066...  Training loss: 0.8760...  0.1193 sec/batch
Epoch: 20/20...  Training Step: 9067...  Training loss: 0.9247...  0.1196 sec/batch
Epoch: 20/20...  Training Step: 9068...  Training loss: 0.9506...  0.1180 sec/batch
Epoch: 20/20...  Training Step: 9069...  Training loss: 0.8852...  0.1186 sec/batch
Epoch: 20/20...  Training Step: 9070...  Training loss: 0.9142...  0.1185 sec/batch
Epoch: 20/20...  Training Step: 9071...  Training loss: 0.8686...  0.1145 sec/batch
Epoch: 20/20...  Training Step: 9072...  Training loss: 0.9100...  0.1211 sec/batch
Epoch: 20/20...  Training Step: 9073...  Training loss: 1.0016...  0.1199 sec/batch
Epoch: 20/20...  Training Step: 9074...  Training loss: 0.8183...  0.1176 sec/batch
Epoch: 20/20...  Training Step: 9075...  Training loss: 0.8091...  0.1187 sec/batch
Epoch: 20/20...  Training Step: 9076...  Training loss: 0.7784...  0.1165 sec/batch
Epoch: 20/20...  Training Step: 9077...  Training loss: 0.9054...  0.1138 sec/batch
Epoch: 20/20...  Training Step: 9078...  Training loss: 1.0017...  0.1151 sec/batch
Epoch: 20/20...  Training Step: 9079...  Training loss: 0.9674...  0.1178 sec/batch
Epoch: 20/20...  Training Step: 9080...  Training loss: 1.0301...  0.1191 sec/batch
Epoch: 20/20...  Training Step: 9081...  Training loss: 1.1748...  0.1171 sec/batch
Epoch: 20/20...  Training Step: 9082...  Training loss: 1.0022...  0.1170 sec/batch
Epoch: 20/20...  Training Step: 9083...  Training loss: 0.9662...  0.1175 sec/batch
Epoch: 20/20...  Training Step: 9084...  Training loss: 0.9975...  0.1147 sec/batch
Epoch: 20/20...  Training Step: 9085...  Training loss: 0.9101...  0.1140 sec/batch
Epoch: 20/20...  Training Step: 9086...  Training loss: 0.9773...  0.1227 sec/batch
Epoch: 20/20...  Training Step: 9087...  Training loss: 0.8906...  0.1247 sec/batch
Epoch: 20/20...  Training Step: 9088...  Training loss: 0.8837...  0.1171 sec/batch
Epoch: 20/20...  Training Step: 9089...  Training loss: 1.0338...  0.1165 sec/batch
Epoch: 20/20...  Training Step: 9090...  Training loss: 1.0483...  0.1209 sec/batch
Epoch: 20/20...  Training Step: 9091...  Training loss: 0.9335...  0.1163 sec/batch
Epoch: 20/20...  Training Step: 9092...  Training loss: 0.8116...  0.1182 sec/batch
Epoch: 20/20...  Training Step: 9093...  Training loss: 0.9218...  0.1195 sec/batch
Epoch: 20/20...  Training Step: 9094...  Training loss: 1.1158...  0.1193 sec/batch
Epoch: 20/20...  Training Step: 9095...  Training loss: 0.9219...  0.1176 sec/batch
Epoch: 20/20...  Training Step: 9096...  Training loss: 0.9410...  0.1170 sec/batch
Epoch: 20/20...  Training Step: 9097...  Training loss: 0.7958...  0.1229 sec/batch
Epoch: 20/20...  Training Step: 9098...  Training loss: 0.8869...  0.1194 sec/batch
Epoch: 20/20...  Training Step: 9099...  Training loss: 0.9153...  0.1169 sec/batch
Epoch: 20/20...  Training Step: 9100...  Training loss: 1.0001...  0.1194 sec/batch
Epoch: 20/20...  Training Step: 9101...  Training loss: 0.8537...  0.1187 sec/batch
Epoch: 20/20...  Training Step: 9102...  Training loss: 0.9385...  0.1153 sec/batch
Epoch: 20/20...  Training Step: 9103...  Training loss: 1.0786...  0.1181 sec/batch
Epoch: 20/20...  Training Step: 9104...  Training loss: 0.9444...  0.1138 sec/batch
Epoch: 20/20...  Training Step: 9105...  Training loss: 0.8390...  0.1134 sec/batch
Epoch: 20/20...  Training Step: 9106...  Training loss: 1.0368...  0.1173 sec/batch
Epoch: 20/20...  Training Step: 9107...  Training loss: 0.9639...  0.1185 sec/batch
Epoch: 20/20...  Training Step: 9108...  Training loss: 0.9923...  0.1167 sec/batch
Epoch: 20/20...  Training Step: 9109...  Training loss: 0.8351...  0.1201 sec/batch
Epoch: 20/20...  Training Step: 9110...  Training loss: 0.9332...  0.1130 sec/batch
Epoch: 20/20...  Training Step: 9111...  Training loss: 1.0789...  0.1106 sec/batch
Epoch: 20/20...  Training Step: 9112...  Training loss: 1.1003...  0.1172 sec/batch
Epoch: 20/20...  Training Step: 9113...  Training loss: 0.8674...  0.1155 sec/batch
Epoch: 20/20...  Training Step: 9114...  Training loss: 0.7972...  0.1174 sec/batch
Epoch: 20/20...  Training Step: 9115...  Training loss: 0.9226...  0.1217 sec/batch
Epoch: 20/20...  Training Step: 9116...  Training loss: 0.8909...  0.1274 sec/batch
Epoch: 20/20...  Training Step: 9117...  Training loss: 0.9538...  0.1308 sec/batch
Epoch: 20/20...  Training Step: 9118...  Training loss: 0.9089...  0.1216 sec/batch
Epoch: 20/20...  Training Step: 9119...  Training loss: 0.8399...  0.1308 sec/batch
Epoch: 20/20...  Training Step: 9120...  Training loss: 1.2607...  0.1263 sec/batch
Epoch: 20/20...  Training Step: 9121...  Training loss: 0.8090...  0.1125 sec/batch
Epoch: 20/20...  Training Step: 9122...  Training loss: 0.9580...  0.1166 sec/batch
Epoch: 20/20...  Training Step: 9123...  Training loss: 0.8290...  0.1239 sec/batch
Epoch: 20/20...  Training Step: 9124...  Training loss: 1.0782...  0.1214 sec/batch
Epoch: 20/20...  Training Step: 9125...  Training loss: 1.1134...  0.1203 sec/batch
Epoch: 20/20...  Training Step: 9126...  Training loss: 0.9353...  0.1157 sec/batch
Epoch: 20/20...  Training Step: 9127...  Training loss: 0.8706...  0.1212 sec/batch
Epoch: 20/20...  Training Step: 9128...  Training loss: 0.7608...  0.1314 sec/batch
Epoch: 20/20...  Training Step: 9129...  Training loss: 0.8094...  0.1494 sec/batch
Epoch: 20/20...  Training Step: 9130...  Training loss: 0.8067...  0.1278 sec/batch
Epoch: 20/20...  Training Step: 9131...  Training loss: 0.8531...  0.1254 sec/batch
Epoch: 20/20...  Training Step: 9132...  Training loss: 0.8277...  0.1317 sec/batch
Epoch: 20/20...  Training Step: 9133...  Training loss: 0.7916...  0.1203 sec/batch
Epoch: 20/20...  Training Step: 9134...  Training loss: 0.8373...  0.1156 sec/batch
Epoch: 20/20...  Training Step: 9135...  Training loss: 0.6891...  0.1166 sec/batch
Epoch: 20/20...  Training Step: 9136...  Training loss: 0.7458...  0.1270 sec/batch
Epoch: 20/20...  Training Step: 9137...  Training loss: 0.7324...  0.1249 sec/batch
Epoch: 20/20...  Training Step: 9138...  Training loss: 1.0459...  0.1278 sec/batch
Epoch: 20/20...  Training Step: 9139...  Training loss: 0.8258...  0.1252 sec/batch
Epoch: 20/20...  Training Step: 9140...  Training loss: 0.8109...  0.1242 sec/batch
Epoch: 20/20...  Training Step: 9141...  Training loss: 0.8508...  0.1227 sec/batch
Epoch: 20/20...  Training Step: 9142...  Training loss: 0.8534...  0.1210 sec/batch
Epoch: 20/20...  Training Step: 9143...  Training loss: 0.9070...  0.1242 sec/batch
Epoch: 20/20...  Training Step: 9144...  Training loss: 0.8348...  0.1233 sec/batch
Epoch: 20/20...  Training Step: 9145...  Training loss: 0.7611...  0.1239 sec/batch
Epoch: 20/20...  Training Step: 9146...  Training loss: 0.8233...  0.1243 sec/batch
Epoch: 20/20...  Training Step: 9147...  Training loss: 0.9967...  0.1277 sec/batch
Epoch: 20/20...  Training Step: 9148...  Training loss: 1.0010...  0.1205 sec/batch
Epoch: 20/20...  Training Step: 9149...  Training loss: 0.8395...  0.1287 sec/batch
Epoch: 20/20...  Training Step: 9150...  Training loss: 0.8898...  0.1257 sec/batch
Epoch: 20/20...  Training Step: 9151...  Training loss: 0.8111...  0.1186 sec/batch
Epoch: 20/20...  Training Step: 9152...  Training loss: 0.8784...  0.1283 sec/batch
Epoch: 20/20...  Training Step: 9153...  Training loss: 0.7651...  0.1228 sec/batch
Epoch: 20/20...  Training Step: 9154...  Training loss: 0.7204...  0.1277 sec/batch
Epoch: 20/20...  Training Step: 9155...  Training loss: 0.8882...  0.1260 sec/batch
Epoch: 20/20...  Training Step: 9156...  Training loss: 0.7722...  0.1225 sec/batch
Epoch: 20/20...  Training Step: 9157...  Training loss: 0.8136...  0.1170 sec/batch
Epoch: 20/20...  Training Step: 9158...  Training loss: 0.9192...  0.1200 sec/batch
Epoch: 20/20...  Training Step: 9159...  Training loss: 0.7656...  0.1284 sec/batch
Epoch: 20/20...  Training Step: 9160...  Training loss: 0.8754...  0.1283 sec/batch
Epoch: 20/20...  Training Step: 9161...  Training loss: 0.8530...  0.1264 sec/batch
Epoch: 20/20...  Training Step: 9162...  Training loss: 0.8523...  0.1206 sec/batch
Epoch: 20/20...  Training Step: 9163...  Training loss: 0.8422...  0.1254 sec/batch
Epoch: 20/20...  Training Step: 9164...  Training loss: 0.9419...  0.1249 sec/batch
Epoch: 20/20...  Training Step: 9165...  Training loss: 0.8304...  0.1235 sec/batch
Epoch: 20/20...  Training Step: 9166...  Training loss: 0.9151...  0.1234 sec/batch
Epoch: 20/20...  Training Step: 9167...  Training loss: 1.0094...  0.1217 sec/batch
Epoch: 20/20...  Training Step: 9168...  Training loss: 0.9293...  0.1205 sec/batch
Epoch: 20/20...  Training Step: 9169...  Training loss: 0.8099...  0.1269 sec/batch
Epoch: 20/20...  Training Step: 9170...  Training loss: 0.6800...  0.1221 sec/batch
Epoch: 20/20...  Training Step: 9171...  Training loss: 0.9715...  0.1273 sec/batch
Epoch: 20/20...  Training Step: 9172...  Training loss: 0.7667...  0.1244 sec/batch
Epoch: 20/20...  Training Step: 9173...  Training loss: 0.7911...  0.1243 sec/batch
Epoch: 20/20...  Training Step: 9174...  Training loss: 0.8325...  0.1271 sec/batch
Epoch: 20/20...  Training Step: 9175...  Training loss: 0.9590...  0.1193 sec/batch
Epoch: 20/20...  Training Step: 9176...  Training loss: 0.6770...  0.1139 sec/batch
Epoch: 20/20...  Training Step: 9177...  Training loss: 1.0022...  0.1191 sec/batch
Epoch: 20/20...  Training Step: 9178...  Training loss: 0.8403...  0.1164 sec/batch
Epoch: 20/20...  Training Step: 9179...  Training loss: 0.8026...  0.1150 sec/batch
Epoch: 20/20...  Training Step: 9180...  Training loss: 1.0784...  0.1169 sec/batch
Epoch: 20/20...  Training Step: 9181...  Training loss: 0.8979...  0.1183 sec/batch
Epoch: 20/20...  Training Step: 9182...  Training loss: 0.8947...  0.1181 sec/batch
Epoch: 20/20...  Training Step: 9183...  Training loss: 0.8670...  0.1172 sec/batch
Epoch: 20/20...  Training Step: 9184...  Training loss: 0.9312...  0.1210 sec/batch
Epoch: 20/20...  Training Step: 9185...  Training loss: 0.9310...  0.1216 sec/batch
Epoch: 20/20...  Training Step: 9186...  Training loss: 0.8900...  0.1186 sec/batch
Epoch: 20/20...  Training Step: 9187...  Training loss: 0.8245...  0.1218 sec/batch
Epoch: 20/20...  Training Step: 9188...  Training loss: 0.9285...  0.1167 sec/batch
Epoch: 20/20...  Training Step: 9189...  Training loss: 0.8415...  0.1220 sec/batch
Epoch: 20/20...  Training Step: 9190...  Training loss: 0.8847...  0.1155 sec/batch
Epoch: 20/20...  Training Step: 9191...  Training loss: 0.8504...  0.1202 sec/batch
Epoch: 20/20...  Training Step: 9192...  Training loss: 1.0037...  0.1150 sec/batch
Epoch: 20/20...  Training Step: 9193...  Training loss: 0.8610...  0.1163 sec/batch
Epoch: 20/20...  Training Step: 9194...  Training loss: 0.8698...  0.1194 sec/batch
Epoch: 20/20...  Training Step: 9195...  Training loss: 0.9475...  0.1181 sec/batch
Epoch: 20/20...  Training Step: 9196...  Training loss: 0.9406...  0.1183 sec/batch
Epoch: 20/20...  Training Step: 9197...  Training loss: 0.8920...  0.1215 sec/batch
Epoch: 20/20...  Training Step: 9198...  Training loss: 0.9076...  0.1172 sec/batch
Epoch: 20/20...  Training Step: 9199...  Training loss: 0.8521...  0.1166 sec/batch
Epoch: 20/20...  Training Step: 9200...  Training loss: 0.8944...  0.1149 sec/batch
Epoch: 20/20...  Training Step: 9201...  Training loss: 0.6967...  0.1137 sec/batch
Epoch: 20/20...  Training Step: 9202...  Training loss: 0.8023...  0.1204 sec/batch
Epoch: 20/20...  Training Step: 9203...  Training loss: 0.8994...  0.1186 sec/batch
Epoch: 20/20...  Training Step: 9204...  Training loss: 0.7692...  0.1233 sec/batch
Epoch: 20/20...  Training Step: 9205...  Training loss: 0.6939...  0.1220 sec/batch
Epoch: 20/20...  Training Step: 9206...  Training loss: 0.8485...  0.1142 sec/batch
Epoch: 20/20...  Training Step: 9207...  Training loss: 0.7875...  0.1177 sec/batch
Epoch: 20/20...  Training Step: 9208...  Training loss: 0.9031...  0.1158 sec/batch
Epoch: 20/20...  Training Step: 9209...  Training loss: 0.9788...  0.1142 sec/batch
Epoch: 20/20...  Training Step: 9210...  Training loss: 1.0018...  0.1176 sec/batch
Epoch: 20/20...  Training Step: 9211...  Training loss: 0.8500...  0.1161 sec/batch
Epoch: 20/20...  Training Step: 9212...  Training loss: 1.0816...  0.1175 sec/batch
Epoch: 20/20...  Training Step: 9213...  Training loss: 0.8450...  0.1159 sec/batch
Epoch: 20/20...  Training Step: 9214...  Training loss: 0.9057...  0.1123 sec/batch
Epoch: 20/20...  Training Step: 9215...  Training loss: 0.9670...  0.1196 sec/batch
Epoch: 20/20...  Training Step: 9216...  Training loss: 0.9195...  0.1144 sec/batch
Epoch: 20/20...  Training Step: 9217...  Training loss: 0.7314...  0.1240 sec/batch
Epoch: 20/20...  Training Step: 9218...  Training loss: 0.8477...  0.1133 sec/batch
Epoch: 20/20...  Training Step: 9219...  Training loss: 0.7342...  0.1154 sec/batch
Epoch: 20/20...  Training Step: 9220...  Training loss: 0.9355...  0.1166 sec/batch
Epoch: 20/20...  Training Step: 9221...  Training loss: 0.9337...  0.1175 sec/batch
Epoch: 20/20...  Training Step: 9222...  Training loss: 1.0801...  0.1161 sec/batch
Epoch: 20/20...  Training Step: 9223...  Training loss: 0.9220...  0.1194 sec/batch
Epoch: 20/20...  Training Step: 9224...  Training loss: 1.1191...  0.1160 sec/batch
Epoch: 20/20...  Training Step: 9225...  Training loss: 0.8040...  0.1190 sec/batch
Epoch: 20/20...  Training Step: 9226...  Training loss: 0.9374...  0.1133 sec/batch
Epoch: 20/20...  Training Step: 9227...  Training loss: 0.7240...  0.1201 sec/batch
Epoch: 20/20...  Training Step: 9228...  Training loss: 0.8963...  0.1194 sec/batch
Epoch: 20/20...  Training Step: 9229...  Training loss: 0.9193...  0.1185 sec/batch
Epoch: 20/20...  Training Step: 9230...  Training loss: 0.9049...  0.1157 sec/batch
Epoch: 20/20...  Training Step: 9231...  Training loss: 0.9062...  0.1194 sec/batch
Epoch: 20/20...  Training Step: 9232...  Training loss: 1.0257...  0.1199 sec/batch
Epoch: 20/20...  Training Step: 9233...  Training loss: 0.8126...  0.1172 sec/batch
Epoch: 20/20...  Training Step: 9234...  Training loss: 0.7696...  0.1148 sec/batch
Epoch: 20/20...  Training Step: 9235...  Training loss: 0.8442...  0.1285 sec/batch
Epoch: 20/20...  Training Step: 9236...  Training loss: 0.7972...  0.1297 sec/batch
Epoch: 20/20...  Training Step: 9237...  Training loss: 0.9349...  0.1242 sec/batch
Epoch: 20/20...  Training Step: 9238...  Training loss: 0.8360...  0.1222 sec/batch
Epoch: 20/20...  Training Step: 9239...  Training loss: 0.8206...  0.1214 sec/batch
Epoch: 20/20...  Training Step: 9240...  Training loss: 0.9688...  0.1175 sec/batch
Epoch: 20/20...  Training Step: 9241...  Training loss: 0.8671...  0.1229 sec/batch
Epoch: 20/20...  Training Step: 9242...  Training loss: 0.8892...  0.1220 sec/batch
Epoch: 20/20...  Training Step: 9243...  Training loss: 0.8687...  0.1170 sec/batch
Epoch: 20/20...  Training Step: 9244...  Training loss: 0.9668...  0.1178 sec/batch
Epoch: 20/20...  Training Step: 9245...  Training loss: 0.7701...  0.1207 sec/batch
Epoch: 20/20...  Training Step: 9246...  Training loss: 0.8911...  0.1278 sec/batch
Epoch: 20/20...  Training Step: 9247...  Training loss: 0.9587...  0.1214 sec/batch
Epoch: 20/20...  Training Step: 9248...  Training loss: 0.8099...  0.1161 sec/batch
Epoch: 20/20...  Training Step: 9249...  Training loss: 1.0678...  0.1172 sec/batch
Epoch: 20/20...  Training Step: 9250...  Training loss: 0.8974...  0.1172 sec/batch
Epoch: 20/20...  Training Step: 9251...  Training loss: 0.9147...  0.1152 sec/batch
Epoch: 20/20...  Training Step: 9252...  Training loss: 1.0028...  0.1169 sec/batch
Epoch: 20/20...  Training Step: 9253...  Training loss: 0.8413...  0.1182 sec/batch
Epoch: 20/20...  Training Step: 9254...  Training loss: 0.8912...  0.1136 sec/batch
Epoch: 20/20...  Training Step: 9255...  Training loss: 1.0755...  0.1168 sec/batch
Epoch: 20/20...  Training Step: 9256...  Training loss: 1.0946...  0.1162 sec/batch
Epoch: 20/20...  Training Step: 9257...  Training loss: 0.7189...  0.1166 sec/batch
Epoch: 20/20...  Training Step: 9258...  Training loss: 0.8001...  0.1200 sec/batch
Epoch: 20/20...  Training Step: 9259...  Training loss: 0.9301...  0.1205 sec/batch
Epoch: 20/20...  Training Step: 9260...  Training loss: 0.9401...  0.1161 sec/batch
Epoch: 20/20...  Training Step: 9261...  Training loss: 0.8072...  0.1201 sec/batch
Epoch: 20/20...  Training Step: 9262...  Training loss: 0.8136...  0.1177 sec/batch
Epoch: 20/20...  Training Step: 9263...  Training loss: 0.8872...  0.1165 sec/batch
Epoch: 20/20...  Training Step: 9264...  Training loss: 0.8398...  0.1177 sec/batch
Epoch: 20/20...  Training Step: 9265...  Training loss: 0.8970...  0.1162 sec/batch
Epoch: 20/20...  Training Step: 9266...  Training loss: 0.8698...  0.1164 sec/batch
Epoch: 20/20...  Training Step: 9267...  Training loss: 0.8412...  0.1169 sec/batch
Epoch: 20/20...  Training Step: 9268...  Training loss: 0.8722...  0.1204 sec/batch
Epoch: 20/20...  Training Step: 9269...  Training loss: 0.8469...  0.1127 sec/batch
Epoch: 20/20...  Training Step: 9270...  Training loss: 0.9399...  0.1190 sec/batch
Epoch: 20/20...  Training Step: 9271...  Training loss: 0.7496...  0.1161 sec/batch
Epoch: 20/20...  Training Step: 9272...  Training loss: 0.7365...  0.1182 sec/batch
Epoch: 20/20...  Training Step: 9273...  Training loss: 0.8525...  0.1149 sec/batch
Epoch: 20/20...  Training Step: 9274...  Training loss: 0.7399...  0.1126 sec/batch
Epoch: 20/20...  Training Step: 9275...  Training loss: 0.9084...  0.1162 sec/batch
Epoch: 20/20...  Training Step: 9276...  Training loss: 0.8220...  0.1133 sec/batch
Epoch: 20/20...  Training Step: 9277...  Training loss: 0.9242...  0.1167 sec/batch
Epoch: 20/20...  Training Step: 9278...  Training loss: 0.7732...  0.1181 sec/batch
Epoch: 20/20...  Training Step: 9279...  Training loss: 0.9238...  0.1149 sec/batch
Epoch: 20/20...  Training Step: 9280...  Training loss: 0.8466...  0.1156 sec/batch
In [25]:
tf.train.get_checkpoint_state('checkpoints')
Out[25]:
model_checkpoint_path: "checkpoints/i9280_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i200_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i400_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i600_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i800_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i1000_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i1200_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i1400_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i1600_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i1800_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i2000_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i2200_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i2400_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i2600_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i2800_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i3000_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i3200_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i3400_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i3600_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i3800_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i4000_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i4200_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i4400_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i4600_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i4800_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i5000_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i5200_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i5400_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i5600_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i5800_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i6000_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i6200_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i6400_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i6600_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i6800_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i7000_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i7200_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i7400_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i7600_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i7800_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i8000_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i8200_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i8400_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i8600_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i8800_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i9000_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i9200_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i9280_l512.ckpt"

Sampling

In [26]:
def pick_top_n(preds, vocab_size, top_n=5):
    p = np.squeeze(preds)
    p[np.argsort(p)[:-top_n]] = 0
    p = p / np.sum(p)
    c = np.random.choice(vocab_size, 1, p=p)[0]
    return c
In [27]:
def sample(checkpoint, n_samples, lstm_size, vocab_size, prime="@\n"):
    samples = [c for c in prime]
    model = CharRNN(len(vocab), lstm_size=lstm_size, sampling=True)
    saver = tf.train.Saver()
    with tf.Session() as sess:
        saver.restore(sess, checkpoint)
        new_state = sess.run(model.initial_state)
        for c in prime:
            x = np.zeros((1, 1))
            x[0,0] = vocab_to_int[c]
            feed = {model.inputs: x,
                    model.keep_prob: 1.,
                    model.initial_state: new_state}
            preds, new_state = sess.run([model.prediction, model.final_state], 
                                         feed_dict=feed)

        c = pick_top_n(preds, len(vocab))
        samples.append(int_to_vocab[c])

        for i in range(n_samples):
            x[0,0] = c
            feed = {model.inputs: x,
                    model.keep_prob: 1.,
                    model.initial_state: new_state}
            preds, new_state = sess.run([model.prediction, model.final_state], 
                                         feed_dict=feed)

            c = pick_top_n(preds, len(vocab))
            samples.append(int_to_vocab[c])
        
    return ''.join(samples)
In [28]:
tf.train.latest_checkpoint('checkpoints')
Out[28]:
'checkpoints/i9280_l512.ckpt'
In [70]:
checkpoint = tf.train.latest_checkpoint('checkpoints')
samp = sample(checkpoint, 5000, lstm_size, len(vocab), prime="@\n")
In [87]:
r = []

r.append("**kern\t**kern\n")
r.append("*staff2\t*staff1\n")
r.append("*clefF4\t*clefG2\n")
r.append("*k[]\t*k[]\n")
r.append("*C:\t*C:\n")
r.append("*M4/4\t*M4/4\n")
r.append("*MM80\t*MM80\n")

bar = 1
for line in samp.splitlines():
    sp = line.split('\t')
    if sp[0] == '@':
        r.append("={bar}\t={bar}\n".format(bar=bar))
        bar += 1
    else:
        ln = len(sp)
        if ln == 1 and sp[0] != "":
            r.append(sp[0])
            r.append('\t')
            r.append('.')
            r.append('\n')
        elif ln == 1 and sp[0] == "":
            r.append(".")
            r.append('\t')
            r.append('.')
            r.append('\n')
        elif sp[0] == "*-" or sp[1] == "*-":
            continue
        else:
            r.append(sp[0])
            r.append('\t')
            r.append(sp[1])
            r.append('\n')

r.append("==|!\t==|!\n")
r.append("*-\t*-\n")

open("results/bach2ai.krn","w").writelines(r)
In [46]:
from music21 import *
m1 = converter.parse("results/bach2ai.krn")
m1.write('midi', fp='midi/bach2ai.mid')
humdrum.spineParser: WARNING: Error in parsing event ('#') at position 33 for spine None: Could not parse # for note information
humdrum.spineParser: WARNING: Error in parsing event ('32') at position 75 for spine None: Could not parse 32 for note information
humdrum.spineParser: WARNING: Error in parsing event ('32') at position 79 for spine None: Could not parse 32 for note information
humdrum.spineParser: WARNING: Error in parsing event ('1') at position 86 for spine None: Could not parse 1 for note information
humdrum.spineParser: WARNING: Error in parsing event ('16') at position 120 for spine None: Could not parse 16 for note information
humdrum.spineParser: WARNING: Error in parsing event ('116#') at position 141 for spine None: Could not parse 116# for note information
humdrum.spineParser: WARNING: Error in parsing event ('16') at position 152 for spine None: Could not parse 16 for note information
humdrum.spineParser: WARNING: Error in parsing event ('16') at position 155 for spine None: Could not parse 16 for note information
humdrum.spineParser: WARNING: Error in parsing event ('1') at position 184 for spine None: Could not parse 1 for note information
humdrum.spineParser: WARNING: Error in parsing event ('16') at position 217 for spine None: Could not parse 16 for note information
humdrum.spineParser: WARNING: Error in parsing event ('1') at position 231 for spine None: Could not parse 1 for note information
humdrum.spineParser: WARNING: Error in parsing event ('1') at position 239 for spine None: Could not parse 1 for note information
humdrum.spineParser: WARNING: Error in parsing event ('8') at position 298 for spine None: Could not parse 8 for note information
humdrum.spineParser: WARNING: Error in parsing event ('1') at position 336 for spine None: Could not parse 1 for note information
humdrum.spineParser: WARNING: Error in parsing event ('8.') at position 374 for spine None: Could not parse 8. for note information
humdrum.spineParser: WARNING: Error in parsing event ('16.') at position 389 for spine None: Could not parse 16. for note information
humdrum.spineParser: WARNING: Error in parsing event ('1') at position 407 for spine None: Could not parse 1 for note information
humdrum.spineParser: WARNING: Error in parsing event ('16') at position 430 for spine None: Could not parse 16 for note information
humdrum.spineParser: WARNING: Error in parsing event ('#') at position 448 for spine None: Could not parse # for note information
humdrum.spineParser: WARNING: Error in parsing event ('1616.') at position 456 for spine None: Could not parse 1616. for note information
humdrum.spineParser: WARNING: Error in parsing event ('.##') at position 461 for spine None: Could not parse .## for note information
humdrum.spineParser: WARNING: Error in parsing event ('116#') at position 477 for spine None: Could not parse 116# for note information
humdrum.spineParser: WARNING: Error in parsing event ('1') at position 480 for spine None: Could not parse 1 for note information
humdrum.spineParser: WARNING: Error in parsing event ('16') at position 498 for spine None: Could not parse 16 for note information
humdrum.spineParser: WARNING: Error in parsing event ('1') at position 500 for spine None: Could not parse 1 for note information
humdrum.spineParser: WARNING: Error in parsing event ('16') at position 511 for spine None: Could not parse 16 for note information
humdrum.spineParser: WARNING: Error in parsing event ('16') at position 525 for spine None: Could not parse 16 for note information
humdrum.spineParser: WARNING: Error in parsing event ('W') at position 527 for spine None: Could not parse W for note information
humdrum.spineParser: WARNING: Error in parsing event ('16') at position 537 for spine None: Could not parse 16 for note information
humdrum.spineParser: WARNING: Error in parsing event ('#') at position 538 for spine None: Could not parse # for note information
humdrum.spineParser: WARNING: Error in parsing event ('16') at position 562 for spine None: Could not parse 16 for note information
humdrum.spineParser: WARNING: Error in parsing event ('.#') at position 614 for spine None: Could not parse .# for note information
humdrum.spineParser: WARNING: Error in parsing event ('1') at position 649 for spine None: Could not parse 1 for note information
humdrum.spineParser: WARNING: Error in parsing event ('{166') at position 28 for spine None: Could not parse {166 for note information
humdrum.spineParser: WARNING: Error in parsing event ('16') at position 43 for spine None: Could not parse 16 for note information
humdrum.spineParser: WARNING: Error in parsing event ('#J#') at position 54 for spine None: Could not parse #J# for note information
humdrum.spineParser: WARNING: Error in parsing event ('#') at position 67 for spine None: Could not parse # for note information
humdrum.spineParser: WARNING: Error in parsing event ('1') at position 95 for spine None: Could not parse 1 for note information
humdrum.spineParser: WARNING: Error in parsing event ('16') at position 126 for spine None: Could not parse 16 for note information
humdrum.spineParser: WARNING: Error in parsing event ('116') at position 131 for spine None: Could not parse 116 for note information
humdrum.spineParser: WARNING: Error in parsing event ('1') at position 145 for spine None: Could not parse 1 for note information
humdrum.spineParser: WARNING: Error in parsing event ('161') at position 162 for spine None: Could not parse 161 for note information
humdrum.spineParser: WARNING: Error in parsing event ('1') at position 209 for spine None: Could not parse 1 for note information
humdrum.spineParser: WARNING: Error in parsing event ('16') at position 211 for spine None: Could not parse 16 for note information
humdrum.spineParser: WARNING: Error in parsing event ('8') at position 238 for spine None: Could not parse 8 for note information
humdrum.spineParser: WARNING: Error in parsing event ('{') at position 278 for spine None: Could not parse { for note information
humdrum.spineParser: WARNING: Error in parsing event ('16.#}') at position 281 for spine None: Could not parse 16.#} for note information
humdrum.spineParser: WARNING: Error in parsing event ('1') at position 328 for spine None: Could not parse 1 for note information
humdrum.spineParser: WARNING: Error in parsing event ('1') at position 344 for spine None: Could not parse 1 for note information
humdrum.spineParser: WARNING: Error in parsing event ('8') at position 357 for spine None: Could not parse 8 for note information
humdrum.spineParser: WARNING: Error in parsing event ('16') at position 377 for spine None: Could not parse 16 for note information
humdrum.spineParser: WARNING: Error in parsing event ('[email protected]') at position 389 for spine None: Could not parse [email protected] for note information
humdrum.spineParser: WARNING: Error in parsing event ('16') at position 391 for spine None: Could not parse 16 for note information
humdrum.spineParser: WARNING: Error in parsing event ('1') at position 394 for spine None: Could not parse 1 for note information
humdrum.spineParser: WARNING: Error in parsing event ('1') at position 400 for spine None: Could not parse 1 for note information
humdrum.spineParser: WARNING: Error in parsing event ('16') at position 416 for spine None: Could not parse 16 for note information
humdrum.spineParser: WARNING: Error in parsing event ('8.') at position 417 for spine None: Could not parse 8. for note information
humdrum.spineParser: WARNING: Error in parsing event ('{') at position 447 for spine None: Could not parse { for note information
humdrum.spineParser: WARNING: Error in parsing event ('{1') at position 462 for spine None: Could not parse {1 for note information
humdrum.spineParser: WARNING: Error in parsing event ('16') at position 468 for spine None: Could not parse 16 for note information
humdrum.spineParser: WARNING: Error in parsing event ('16') at position 490 for spine None: Could not parse 16 for note information
humdrum.spineParser: WARNING: Error in parsing event ('8.M') at position 526 for spine None: Could not parse 8.M for note information
humdrum.spineParser: WARNING: Error in parsing event ('1616') at position 530 for spine None: Could not parse 1616 for note information
humdrum.spineParser: WARNING: Error in parsing event ('1') at position 532 for spine None: Could not parse 1 for note information
humdrum.spineParser: WARNING: Error in parsing event ('1616#') at position 601 for spine None: Could not parse 1616# for note information
humdrum.spineParser: WARNING: Error in parsing event ('1616') at position 604 for spine None: Could not parse 1616 for note information
humdrum.spineParser: WARNING: Error in parsing event ('1') at position 630 for spine None: Could not parse 1 for note information
humdrum.spineParser: WARNING: Error in parsing event ('16') at position 648 for spine None: Could not parse 16 for note information
Out[46]:
'midi/bach2ai.mid'
In [47]:
m1.show("midi")