In [1]:
from google.colab import drive
drive.mount('/content/gdrive')
import os
os.chdir('/content/gdrive/My Drive/finch/tensorflow2/text_matching/ant/main')
Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
In [2]:
!pip install transformers
Requirement already satisfied: transformers in /usr/local/lib/python3.6/dist-packages (4.0.1)
Requirement already satisfied: packaging in /usr/local/lib/python3.6/dist-packages (from transformers) (20.4)
Requirement already satisfied: filelock in /usr/local/lib/python3.6/dist-packages (from transformers) (3.0.12)
Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from transformers) (1.18.5)
Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.6/dist-packages (from transformers) (2019.12.20)
Requirement already satisfied: sacremoses in /usr/local/lib/python3.6/dist-packages (from transformers) (0.0.43)
Requirement already satisfied: dataclasses; python_version < "3.7" in /usr/local/lib/python3.6/dist-packages (from transformers) (0.8)
Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from transformers) (2.23.0)
Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.6/dist-packages (from transformers) (4.41.1)
Requirement already satisfied: tokenizers==0.9.4 in /usr/local/lib/python3.6/dist-packages (from transformers) (0.9.4)
Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from packaging->transformers) (1.15.0)
Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.6/dist-packages (from packaging->transformers) (2.4.7)
Requirement already satisfied: click in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers) (7.1.2)
Requirement already satisfied: joblib in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers) (0.17.0)
Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (3.0.4)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (2020.11.8)
Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (1.24.3)
Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (2.10)
In [3]:
from transformers import BertTokenizer, TFBertLMHeadModel
import os
import json
import time
import logging
import pprint
import numpy as np
import tensorflow as tf
import tensorflow_addons as tfa
import random

print("TensorFlow Version", tf.__version__)
print('GPU Enabled:', tf.test.is_gpu_available())
TensorFlow Version 2.3.0
WARNING:tensorflow:From <ipython-input-3-05a3004c9c96>:13: is_gpu_available (from tensorflow.python.framework.test_util) is deprecated and will be removed in a future version.
Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.
GPU Enabled: True
In [4]:
params = {
  'pretrain_path': 'bert-base-chinese',
  'train_path': '../data/train.json',
  'test_path': '../data/dev.json',
  'batch_size': 16,
  'max_len': 130,
  'buffer_size': 34334,
  'init_lr': 1e-5,
  'max_lr': 3e-5,
  'n_epochs': 4 * 10,
}

tokenizer = BertTokenizer.from_pretrained(params['pretrain_path'],
                                          lowercase = True,
                                          add_special_tokens = True)
In [5]:
# stream data from text files
def data_generator(f_path, params):
  with open(f_path) as f:
    print('Reading', f_path)
    for line in f:
      line = json.loads(line.rstrip())
      text1, text2, label = line['sentence1'], line['sentence2'], line['label']
      if len(text1) + len(text2) + 3 > params['max_len']:
        _max_len = (params['max_len'] - 3) // 2
        text1 = text1[:_max_len]
        text2 = text2[:_max_len]
      text1 = list(text1)
      text2 = list(text2)
      text = ['[CLS]'] + text1 + ['[SEP]'] + text2 + ['[SEP]']
      seg = [0] + [0] * len(text1) + [0] + [1] * len(text2) + [1]
      text = tokenizer.convert_tokens_to_ids(text)
      
      noises = []
      labels_mask = []
      for idx in text:
        if (random.random() <= 0.15) and (idx != 101) and (idx != 102) and (idx != 100):
          dice = random.random()
          if dice <= 0.8:
            noises.append(103)
          elif dice <= 0.9:
            noises.append(idx)
          else:
            noises.append(random.randint(0, 21127))
          labels_mask.append(1)
        else:
          noises.append(idx)
          labels_mask.append(0)

      yield (noises, seg), (text, labels_mask)


def dataset(is_training, params):
  _shapes = (([None], [None]), ([None], [None]))
  _types = ((tf.int32, tf.int32), (tf.int32, tf.int32))
  _pads = ((0, 0), (0, 0))
  
  if is_training:
    ds = tf.data.Dataset.from_generator(
      lambda: data_generator(params['train_path'], params),
      output_shapes = _shapes,
      output_types = _types,)
    ds = ds.shuffle(params['buffer_size'])
    ds = ds.padded_batch(params['batch_size'], _shapes, _pads)
    ds = ds.prefetch(tf.data.experimental.AUTOTUNE)
  else:
    ds = tf.data.Dataset.from_generator(
      lambda: data_generator(params['test_path'], params),
      output_shapes = _shapes,
      output_types = _types,)
    ds = ds.padded_batch(params['batch_size'], _shapes, _pads)
    ds = ds.prefetch(tf.data.experimental.AUTOTUNE)
  
  return ds
In [6]:
# input stream ids check
(text, seg), (labels, labels_mask) = next(data_generator(params['train_path'], params))
print(text)
print(seg)
print(labels)
print(labels_mask)
Reading ../data/train.json
[101, 6010, 6009, 955, 1446, 5023, 7583, 6820, 103, 103, 103, 2940, 2768, 1044, 2622, 1400, 103, 1408, 102, 955, 103, 3300, 1044, 2622, 103, 3309, 6820, 3315, 1408, 102]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
[101, 6010, 6009, 955, 1446, 5023, 7583, 6820, 3621, 1377, 809, 2940, 2768, 1044, 2622, 1400, 3315, 1408, 102, 955, 1446, 3300, 1044, 2622, 1168, 3309, 6820, 3315, 1408, 102]
[0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0]
In [7]:
model = TFBertLMHeadModel.from_pretrained(params['pretrain_path'],
                                          trainable = True,
                                          return_dict = True)
model.load_weights('../model/bert_further_pretrain.h5')
If you want to use `TFBertLMHeadModel` as a standalone, add `is_decoder=True.`
Some layers from the model checkpoint at bert-base-chinese were not used when initializing TFBertLMHeadModel: ['nsp___cls']
- This IS expected if you are initializing TFBertLMHeadModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertLMHeadModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertLMHeadModel were initialized from the model checkpoint at bert-base-chinese.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertLMHeadModel for predictions without further training.
In [8]:
step_size = 4 * params['buffer_size'] // params['batch_size']
decay_lr = tfa.optimizers.Triangular2CyclicalLearningRate(
  initial_learning_rate = params['init_lr'],
  maximal_learning_rate = params['max_lr'],
  step_size = step_size,)
optim = tf.optimizers.Adam(params['init_lr'])
global_step = 0

t0 = time.time()
logger = logging.getLogger('tensorflow')
logger.setLevel(logging.INFO)

# Baseline Accuracy
m = tf.keras.metrics.Accuracy()
for ((text, seg), (labels, labels_mask)) in dataset(is_training=False, params=params):
  logits = model([text, tf.sign(text), seg], training=False).logits
  m.update_state(
    y_true = labels,
    y_pred = tf.argmax(logits, -1),
    sample_weight = labels_mask,)
best_acc = m.result().numpy()
logger.info("Baseline Accuracy: {:.3f}".format(best_acc))

for _ in range(params['n_epochs']):
  # Training
  for ((text, seg), (labels, labels_mask)) in dataset(is_training=True, params=params):
    with tf.GradientTape() as tape:
      logits = model([text, tf.sign(text), seg], training=True).logits
      loss = tf.compat.v1.losses.softmax_cross_entropy(
        onehot_labels = tf.one_hot(labels, 21128),
        logits = logits,
        weights = tf.cast(labels_mask, tf.float32),
        label_smoothing = .2,)
    
    trainable_vars = [v for v in model.trainable_variables if 'pooler' not in v.name]
    optim.lr.assign(decay_lr(global_step))
    grads = tape.gradient(loss, trainable_vars)
    grads, _ = tf.clip_by_global_norm(grads, 5.)
    optim.apply_gradients(zip(grads, trainable_vars))
    
    if global_step % 100 == 0:
      logger.info("Step {} | Loss: {:.4f} | Spent: {:.1f} secs | LR: {:.6f}".format(
        global_step, loss.numpy().item(), time.time()-t0, optim.lr.numpy().item()))
      t0 = time.time()
    global_step += 1
  
  # Evaluation
  m = tf.keras.metrics.Accuracy()

  for ((text, seg), (labels, labels_mask)) in dataset(is_training=False, params=params):
    logits = model([text, tf.sign(text), seg], training=False).logits
    m.update_state(
      y_true = labels,
      y_pred = tf.argmax(logits, -1),
      sample_weight = labels_mask,)

  acc = m.result().numpy()
  logger.info("Evaluation: Testing Accuracy: {:.3f}".format(acc))

  if acc > best_acc:
    best_acc = acc
    model.save_weights('../model/bert_further_pretrain.h5', save_format='h5')

  logger.info("Best Accuracy: {:.3f}".format(best_acc))
Reading ../data/dev.json
INFO:tensorflow:Baseline Accuracy: 0.851
Reading ../data/train.json
INFO:tensorflow:Step 0 | Loss: 3.0320 | Spent: 42.8 secs | LR: 0.000010
INFO:tensorflow:Step 100 | Loss: 2.7848 | Spent: 33.9 secs | LR: 0.000010
INFO:tensorflow:Step 200 | Loss: 2.7437 | Spent: 33.9 secs | LR: 0.000010
INFO:tensorflow:Step 300 | Loss: 3.0670 | Spent: 34.4 secs | LR: 0.000011
INFO:tensorflow:Step 400 | Loss: 2.9599 | Spent: 33.9 secs | LR: 0.000011
INFO:tensorflow:Step 500 | Loss: 3.1526 | Spent: 33.6 secs | LR: 0.000011
INFO:tensorflow:Step 600 | Loss: 3.0552 | Spent: 33.9 secs | LR: 0.000011
INFO:tensorflow:Step 700 | Loss: 3.0144 | Spent: 34.5 secs | LR: 0.000012
INFO:tensorflow:Step 800 | Loss: 2.8411 | Spent: 33.7 secs | LR: 0.000012
INFO:tensorflow:Step 900 | Loss: 2.7428 | Spent: 34.5 secs | LR: 0.000012
INFO:tensorflow:Step 1000 | Loss: 2.9510 | Spent: 33.9 secs | LR: 0.000012
INFO:tensorflow:Step 1100 | Loss: 2.6413 | Spent: 35.1 secs | LR: 0.000013
INFO:tensorflow:Step 1200 | Loss: 2.8135 | Spent: 33.7 secs | LR: 0.000013
INFO:tensorflow:Step 1300 | Loss: 3.4347 | Spent: 34.7 secs | LR: 0.000013
INFO:tensorflow:Step 1400 | Loss: 2.8554 | Spent: 34.7 secs | LR: 0.000013
INFO:tensorflow:Step 1500 | Loss: 3.3914 | Spent: 34.8 secs | LR: 0.000013
INFO:tensorflow:Step 1600 | Loss: 2.7641 | Spent: 34.2 secs | LR: 0.000014
INFO:tensorflow:Step 1700 | Loss: 3.0267 | Spent: 34.2 secs | LR: 0.000014
INFO:tensorflow:Step 1800 | Loss: 2.8499 | Spent: 34.4 secs | LR: 0.000014
INFO:tensorflow:Step 1900 | Loss: 2.8702 | Spent: 34.0 secs | LR: 0.000014
INFO:tensorflow:Step 2000 | Loss: 2.9787 | Spent: 34.8 secs | LR: 0.000015
INFO:tensorflow:Step 2100 | Loss: 3.3084 | Spent: 33.8 secs | LR: 0.000015
Reading ../data/dev.json
INFO:tensorflow:Evaluation: Testing Accuracy: 0.852
INFO:tensorflow:Best Accuracy: 0.852
Reading ../data/train.json
INFO:tensorflow:Step 2200 | Loss: 2.7923 | Spent: 79.7 secs | LR: 0.000015
INFO:tensorflow:Step 2300 | Loss: 2.8164 | Spent: 34.3 secs | LR: 0.000015
INFO:tensorflow:Step 2400 | Loss: 2.8498 | Spent: 35.7 secs | LR: 0.000016
INFO:tensorflow:Step 2500 | Loss: 3.0618 | Spent: 33.9 secs | LR: 0.000016
INFO:tensorflow:Step 2600 | Loss: 3.0317 | Spent: 34.2 secs | LR: 0.000016
INFO:tensorflow:Step 2700 | Loss: 2.9873 | Spent: 33.9 secs | LR: 0.000016
INFO:tensorflow:Step 2800 | Loss: 3.0351 | Spent: 34.4 secs | LR: 0.000017
INFO:tensorflow:Step 2900 | Loss: 3.2515 | Spent: 33.8 secs | LR: 0.000017
INFO:tensorflow:Step 3000 | Loss: 3.0203 | Spent: 34.1 secs | LR: 0.000017
INFO:tensorflow:Step 3100 | Loss: 2.9316 | Spent: 34.2 secs | LR: 0.000017
INFO:tensorflow:Step 3200 | Loss: 2.8565 | Spent: 32.9 secs | LR: 0.000017
INFO:tensorflow:Step 3300 | Loss: 2.8065 | Spent: 34.0 secs | LR: 0.000018
INFO:tensorflow:Step 3400 | Loss: 2.7815 | Spent: 34.1 secs | LR: 0.000018
INFO:tensorflow:Step 3500 | Loss: 2.8858 | Spent: 34.5 secs | LR: 0.000018
INFO:tensorflow:Step 3600 | Loss: 2.9126 | Spent: 33.5 secs | LR: 0.000018
INFO:tensorflow:Step 3700 | Loss: 3.0727 | Spent: 34.1 secs | LR: 0.000019
INFO:tensorflow:Step 3800 | Loss: 2.9328 | Spent: 34.5 secs | LR: 0.000019
INFO:tensorflow:Step 3900 | Loss: 2.8748 | Spent: 34.6 secs | LR: 0.000019
INFO:tensorflow:Step 4000 | Loss: 3.0257 | Spent: 34.9 secs | LR: 0.000019
INFO:tensorflow:Step 4100 | Loss: 2.9722 | Spent: 34.5 secs | LR: 0.000020
INFO:tensorflow:Step 4200 | Loss: 3.0562 | Spent: 33.7 secs | LR: 0.000020
Reading ../data/dev.json
INFO:tensorflow:Evaluation: Testing Accuracy: 0.849
INFO:tensorflow:Best Accuracy: 0.852
Reading ../data/train.json
INFO:tensorflow:Step 4300 | Loss: 3.0919 | Spent: 75.8 secs | LR: 0.000020
INFO:tensorflow:Step 4400 | Loss: 2.7759 | Spent: 32.8 secs | LR: 0.000020
INFO:tensorflow:Step 4500 | Loss: 3.1010 | Spent: 34.7 secs | LR: 0.000020
INFO:tensorflow:Step 4600 | Loss: 2.8937 | Spent: 34.3 secs | LR: 0.000021
INFO:tensorflow:Step 4700 | Loss: 2.6697 | Spent: 34.0 secs | LR: 0.000021
INFO:tensorflow:Step 4800 | Loss: 2.8979 | Spent: 33.7 secs | LR: 0.000021
INFO:tensorflow:Step 4900 | Loss: 3.2428 | Spent: 35.6 secs | LR: 0.000021
INFO:tensorflow:Step 5000 | Loss: 2.7781 | Spent: 34.1 secs | LR: 0.000022
INFO:tensorflow:Step 5100 | Loss: 3.1505 | Spent: 34.8 secs | LR: 0.000022
INFO:tensorflow:Step 5200 | Loss: 3.1498 | Spent: 34.1 secs | LR: 0.000022
INFO:tensorflow:Step 5300 | Loss: 2.9560 | Spent: 34.1 secs | LR: 0.000022
INFO:tensorflow:Step 5400 | Loss: 3.3205 | Spent: 34.0 secs | LR: 0.000023
INFO:tensorflow:Step 5500 | Loss: 3.1163 | Spent: 34.8 secs | LR: 0.000023
INFO:tensorflow:Step 5600 | Loss: 2.6986 | Spent: 33.7 secs | LR: 0.000023
INFO:tensorflow:Step 5700 | Loss: 2.8479 | Spent: 33.7 secs | LR: 0.000023
INFO:tensorflow:Step 5800 | Loss: 3.3694 | Spent: 34.5 secs | LR: 0.000024
INFO:tensorflow:Step 5900 | Loss: 2.8964 | Spent: 34.9 secs | LR: 0.000024
INFO:tensorflow:Step 6000 | Loss: 2.9124 | Spent: 34.5 secs | LR: 0.000024
INFO:tensorflow:Step 6100 | Loss: 2.8196 | Spent: 34.6 secs | LR: 0.000024
INFO:tensorflow:Step 6200 | Loss: 2.9271 | Spent: 34.0 secs | LR: 0.000024
INFO:tensorflow:Step 6300 | Loss: 2.8769 | Spent: 34.5 secs | LR: 0.000025
INFO:tensorflow:Step 6400 | Loss: 2.8842 | Spent: 33.6 secs | LR: 0.000025
Reading ../data/dev.json
INFO:tensorflow:Evaluation: Testing Accuracy: 0.848
INFO:tensorflow:Best Accuracy: 0.852
Reading ../data/train.json
INFO:tensorflow:Step 6500 | Loss: 2.8486 | Spent: 75.0 secs | LR: 0.000025
INFO:tensorflow:Step 6600 | Loss: 2.7246 | Spent: 34.1 secs | LR: 0.000025
INFO:tensorflow:Step 6700 | Loss: 2.8428 | Spent: 33.4 secs | LR: 0.000026
INFO:tensorflow:Step 6800 | Loss: 2.8497 | Spent: 33.6 secs | LR: 0.000026
INFO:tensorflow:Step 6900 | Loss: 2.7478 | Spent: 34.1 secs | LR: 0.000026
INFO:tensorflow:Step 7000 | Loss: 2.8056 | Spent: 34.7 secs | LR: 0.000026
INFO:tensorflow:Step 7100 | Loss: 2.9873 | Spent: 34.7 secs | LR: 0.000027
INFO:tensorflow:Step 7200 | Loss: 2.7689 | Spent: 34.1 secs | LR: 0.000027
INFO:tensorflow:Step 7300 | Loss: 2.7229 | Spent: 34.0 secs | LR: 0.000027
INFO:tensorflow:Step 7400 | Loss: 3.1024 | Spent: 34.0 secs | LR: 0.000027
INFO:tensorflow:Step 7500 | Loss: 2.9432 | Spent: 33.2 secs | LR: 0.000027
INFO:tensorflow:Step 7600 | Loss: 2.9976 | Spent: 33.7 secs | LR: 0.000028
INFO:tensorflow:Step 7700 | Loss: 2.9148 | Spent: 34.8 secs | LR: 0.000028
INFO:tensorflow:Step 7800 | Loss: 2.8493 | Spent: 34.7 secs | LR: 0.000028
INFO:tensorflow:Step 7900 | Loss: 3.1491 | Spent: 34.1 secs | LR: 0.000028
INFO:tensorflow:Step 8000 | Loss: 2.9998 | Spent: 34.4 secs | LR: 0.000029
INFO:tensorflow:Step 8100 | Loss: 2.8722 | Spent: 35.1 secs | LR: 0.000029
INFO:tensorflow:Step 8200 | Loss: 3.0852 | Spent: 35.2 secs | LR: 0.000029
INFO:tensorflow:Step 8300 | Loss: 2.7353 | Spent: 34.3 secs | LR: 0.000029
INFO:tensorflow:Step 8400 | Loss: 2.9623 | Spent: 34.5 secs | LR: 0.000030
INFO:tensorflow:Step 8500 | Loss: 2.8876 | Spent: 33.9 secs | LR: 0.000030
Reading ../data/dev.json
INFO:tensorflow:Evaluation: Testing Accuracy: 0.846
INFO:tensorflow:Best Accuracy: 0.852
Reading ../data/train.json
INFO:tensorflow:Step 8600 | Loss: 2.6752 | Spent: 76.1 secs | LR: 0.000030
INFO:tensorflow:Step 8700 | Loss: 3.0000 | Spent: 34.6 secs | LR: 0.000030
INFO:tensorflow:Step 8800 | Loss: 2.9544 | Spent: 35.3 secs | LR: 0.000029
INFO:tensorflow:Step 8900 | Loss: 3.1356 | Spent: 35.0 secs | LR: 0.000029
INFO:tensorflow:Step 9000 | Loss: 2.8148 | Spent: 33.6 secs | LR: 0.000029
INFO:tensorflow:Step 9100 | Loss: 2.8841 | Spent: 33.6 secs | LR: 0.000029
INFO:tensorflow:Step 9200 | Loss: 2.8193 | Spent: 33.4 secs | LR: 0.000029
INFO:tensorflow:Step 9300 | Loss: 2.8907 | Spent: 34.7 secs | LR: 0.000028
INFO:tensorflow:Step 9400 | Loss: 2.8676 | Spent: 34.7 secs | LR: 0.000028
INFO:tensorflow:Step 9500 | Loss: 2.7948 | Spent: 34.5 secs | LR: 0.000028
INFO:tensorflow:Step 9600 | Loss: 3.0000 | Spent: 33.7 secs | LR: 0.000028
INFO:tensorflow:Step 9700 | Loss: 2.9098 | Spent: 34.2 secs | LR: 0.000027
INFO:tensorflow:Step 9800 | Loss: 3.0773 | Spent: 33.7 secs | LR: 0.000027
INFO:tensorflow:Step 9900 | Loss: 3.0993 | Spent: 34.1 secs | LR: 0.000027
INFO:tensorflow:Step 10000 | Loss: 2.9428 | Spent: 33.7 secs | LR: 0.000027
INFO:tensorflow:Step 10100 | Loss: 2.8468 | Spent: 33.8 secs | LR: 0.000026
INFO:tensorflow:Step 10200 | Loss: 3.1143 | Spent: 34.4 secs | LR: 0.000026
INFO:tensorflow:Step 10300 | Loss: 2.9872 | Spent: 33.1 secs | LR: 0.000026
INFO:tensorflow:Step 10400 | Loss: 2.9007 | Spent: 34.3 secs | LR: 0.000026
INFO:tensorflow:Step 10500 | Loss: 2.7440 | Spent: 33.6 secs | LR: 0.000026
INFO:tensorflow:Step 10600 | Loss: 2.8800 | Spent: 34.8 secs | LR: 0.000025
INFO:tensorflow:Step 10700 | Loss: 3.1612 | Spent: 34.2 secs | LR: 0.000025
Reading ../data/dev.json
INFO:tensorflow:Evaluation: Testing Accuracy: 0.850
INFO:tensorflow:Best Accuracy: 0.852
Reading ../data/train.json
INFO:tensorflow:Step 10800 | Loss: 3.0292 | Spent: 75.9 secs | LR: 0.000025
INFO:tensorflow:Step 10900 | Loss: 2.9323 | Spent: 34.0 secs | LR: 0.000025
INFO:tensorflow:Step 11000 | Loss: 3.2419 | Spent: 34.1 secs | LR: 0.000024
INFO:tensorflow:Step 11100 | Loss: 3.0252 | Spent: 33.9 secs | LR: 0.000024
INFO:tensorflow:Step 11200 | Loss: 3.0298 | Spent: 33.5 secs | LR: 0.000024
INFO:tensorflow:Step 11300 | Loss: 2.9951 | Spent: 35.0 secs | LR: 0.000024
INFO:tensorflow:Step 11400 | Loss: 3.1734 | Spent: 33.9 secs | LR: 0.000023
INFO:tensorflow:Step 11500 | Loss: 3.1992 | Spent: 34.0 secs | LR: 0.000023
INFO:tensorflow:Step 11600 | Loss: 3.0970 | Spent: 34.7 secs | LR: 0.000023
INFO:tensorflow:Step 11700 | Loss: 2.9086 | Spent: 34.8 secs | LR: 0.000023
INFO:tensorflow:Step 11800 | Loss: 3.1699 | Spent: 33.5 secs | LR: 0.000023
INFO:tensorflow:Step 11900 | Loss: 3.1841 | Spent: 35.0 secs | LR: 0.000022
INFO:tensorflow:Step 12000 | Loss: 2.9623 | Spent: 33.3 secs | LR: 0.000022
INFO:tensorflow:Step 12100 | Loss: 2.7164 | Spent: 33.5 secs | LR: 0.000022
INFO:tensorflow:Step 12200 | Loss: 2.9071 | Spent: 33.1 secs | LR: 0.000022
INFO:tensorflow:Step 12300 | Loss: 3.1441 | Spent: 34.5 secs | LR: 0.000021
INFO:tensorflow:Step 12400 | Loss: 2.6054 | Spent: 34.4 secs | LR: 0.000021
INFO:tensorflow:Step 12500 | Loss: 2.9947 | Spent: 33.7 secs | LR: 0.000021
INFO:tensorflow:Step 12600 | Loss: 3.1606 | Spent: 33.6 secs | LR: 0.000021
INFO:tensorflow:Step 12700 | Loss: 2.8517 | Spent: 34.1 secs | LR: 0.000020
INFO:tensorflow:Step 12800 | Loss: 3.2337 | Spent: 34.2 secs | LR: 0.000020
Reading ../data/dev.json
INFO:tensorflow:Evaluation: Testing Accuracy: 0.855
INFO:tensorflow:Best Accuracy: 0.855
Reading ../data/train.json
INFO:tensorflow:Step 12900 | Loss: 2.7832 | Spent: 79.3 secs | LR: 0.000020
INFO:tensorflow:Step 13000 | Loss: 2.8079 | Spent: 33.6 secs | LR: 0.000020
INFO:tensorflow:Step 13100 | Loss: 2.9467 | Spent: 34.2 secs | LR: 0.000019
INFO:tensorflow:Step 13200 | Loss: 2.8128 | Spent: 34.2 secs | LR: 0.000019
INFO:tensorflow:Step 13300 | Loss: 2.9430 | Spent: 33.9 secs | LR: 0.000019
INFO:tensorflow:Step 13400 | Loss: 3.0292 | Spent: 34.2 secs | LR: 0.000019
INFO:tensorflow:Step 13500 | Loss: 2.8830 | Spent: 34.1 secs | LR: 0.000019
INFO:tensorflow:Step 13600 | Loss: 2.8439 | Spent: 33.6 secs | LR: 0.000018
INFO:tensorflow:Step 13700 | Loss: 2.9065 | Spent: 34.3 secs | LR: 0.000018
INFO:tensorflow:Step 13800 | Loss: 3.2864 | Spent: 34.2 secs | LR: 0.000018
INFO:tensorflow:Step 13900 | Loss: 2.9203 | Spent: 34.1 secs | LR: 0.000018
INFO:tensorflow:Step 14000 | Loss: 2.8605 | Spent: 35.0 secs | LR: 0.000017
INFO:tensorflow:Step 14100 | Loss: 2.9818 | Spent: 34.3 secs | LR: 0.000017
INFO:tensorflow:Step 14200 | Loss: 2.7897 | Spent: 33.6 secs | LR: 0.000017
INFO:tensorflow:Step 14300 | Loss: 2.7864 | Spent: 33.7 secs | LR: 0.000017
INFO:tensorflow:Step 14400 | Loss: 3.0549 | Spent: 34.5 secs | LR: 0.000016
INFO:tensorflow:Step 14500 | Loss: 2.8171 | Spent: 33.1 secs | LR: 0.000016
INFO:tensorflow:Step 14600 | Loss: 2.8843 | Spent: 34.3 secs | LR: 0.000016
INFO:tensorflow:Step 14700 | Loss: 2.9182 | Spent: 33.2 secs | LR: 0.000016
INFO:tensorflow:Step 14800 | Loss: 2.9457 | Spent: 34.3 secs | LR: 0.000016
INFO:tensorflow:Step 14900 | Loss: 2.9193 | Spent: 35.2 secs | LR: 0.000015
INFO:tensorflow:Step 15000 | Loss: 3.0615 | Spent: 34.4 secs | LR: 0.000015
Reading ../data/dev.json
INFO:tensorflow:Evaluation: Testing Accuracy: 0.851
INFO:tensorflow:Best Accuracy: 0.855
Reading ../data/train.json
INFO:tensorflow:Step 15100 | Loss: 3.0214 | Spent: 75.4 secs | LR: 0.000015
INFO:tensorflow:Step 15200 | Loss: 3.2181 | Spent: 33.3 secs | LR: 0.000015
INFO:tensorflow:Step 15300 | Loss: 2.8750 | Spent: 33.6 secs | LR: 0.000014
INFO:tensorflow:Step 15400 | Loss: 2.8616 | Spent: 33.5 secs | LR: 0.000014
INFO:tensorflow:Step 15500 | Loss: 2.7063 | Spent: 34.6 secs | LR: 0.000014
INFO:tensorflow:Step 15600 | Loss: 2.8868 | Spent: 34.6 secs | LR: 0.000014
INFO:tensorflow:Step 15700 | Loss: 3.0534 | Spent: 34.2 secs | LR: 0.000013
INFO:tensorflow:Step 15800 | Loss: 3.3384 | Spent: 33.8 secs | LR: 0.000013
INFO:tensorflow:Step 15900 | Loss: 2.9575 | Spent: 34.3 secs | LR: 0.000013
INFO:tensorflow:Step 16000 | Loss: 2.8709 | Spent: 33.3 secs | LR: 0.000013
INFO:tensorflow:Step 16100 | Loss: 3.6435 | Spent: 34.6 secs | LR: 0.000012
INFO:tensorflow:Step 16200 | Loss: 2.9147 | Spent: 33.4 secs | LR: 0.000012
INFO:tensorflow:Step 16300 | Loss: 2.9730 | Spent: 34.7 secs | LR: 0.000012
INFO:tensorflow:Step 16400 | Loss: 3.1337 | Spent: 33.2 secs | LR: 0.000012
INFO:tensorflow:Step 16500 | Loss: 2.9893 | Spent: 33.7 secs | LR: 0.000012
INFO:tensorflow:Step 16600 | Loss: 3.1096 | Spent: 33.9 secs | LR: 0.000011
INFO:tensorflow:Step 16700 | Loss: 2.9378 | Spent: 34.7 secs | LR: 0.000011
INFO:tensorflow:Step 16800 | Loss: 2.9561 | Spent: 33.7 secs | LR: 0.000011
INFO:tensorflow:Step 16900 | Loss: 2.9984 | Spent: 34.4 secs | LR: 0.000011
INFO:tensorflow:Step 17000 | Loss: 2.6703 | Spent: 34.6 secs | LR: 0.000010
INFO:tensorflow:Step 17100 | Loss: 2.8967 | Spent: 35.7 secs | LR: 0.000010
Reading ../data/dev.json
INFO:tensorflow:Evaluation: Testing Accuracy: 0.856
INFO:tensorflow:Best Accuracy: 0.856
Reading ../data/train.json
INFO:tensorflow:Step 17200 | Loss: 2.9195 | Spent: 78.9 secs | LR: 0.000010
INFO:tensorflow:Step 17300 | Loss: 2.8241 | Spent: 34.5 secs | LR: 0.000010
INFO:tensorflow:Step 17400 | Loss: 3.1611 | Spent: 34.3 secs | LR: 0.000010
INFO:tensorflow:Step 17500 | Loss: 2.7955 | Spent: 33.8 secs | LR: 0.000010
INFO:tensorflow:Step 17600 | Loss: 3.2296 | Spent: 33.4 secs | LR: 0.000011
INFO:tensorflow:Step 17700 | Loss: 2.7705 | Spent: 34.3 secs | LR: 0.000011
INFO:tensorflow:Step 17800 | Loss: 2.8133 | Spent: 34.7 secs | LR: 0.000011
INFO:tensorflow:Step 17900 | Loss: 3.3304 | Spent: 33.8 secs | LR: 0.000011
INFO:tensorflow:Step 18000 | Loss: 2.6590 | Spent: 35.4 secs | LR: 0.000011
INFO:tensorflow:Step 18100 | Loss: 2.8913 | Spent: 33.8 secs | LR: 0.000011
INFO:tensorflow:Step 18200 | Loss: 2.6048 | Spent: 34.6 secs | LR: 0.000011
INFO:tensorflow:Step 18300 | Loss: 2.9779 | Spent: 34.6 secs | LR: 0.000011
INFO:tensorflow:Step 18400 | Loss: 2.8179 | Spent: 33.8 secs | LR: 0.000011
INFO:tensorflow:Step 18500 | Loss: 2.7558 | Spent: 34.0 secs | LR: 0.000012
INFO:tensorflow:Step 18600 | Loss: 2.8503 | Spent: 33.4 secs | LR: 0.000012
INFO:tensorflow:Step 18700 | Loss: 2.7739 | Spent: 34.6 secs | LR: 0.000012
INFO:tensorflow:Step 18800 | Loss: 2.9376 | Spent: 33.7 secs | LR: 0.000012
INFO:tensorflow:Step 18900 | Loss: 3.2397 | Spent: 34.0 secs | LR: 0.000012
INFO:tensorflow:Step 19000 | Loss: 2.7491 | Spent: 33.8 secs | LR: 0.000012
INFO:tensorflow:Step 19100 | Loss: 2.9298 | Spent: 34.7 secs | LR: 0.000012
INFO:tensorflow:Step 19200 | Loss: 2.7720 | Spent: 33.7 secs | LR: 0.000012
INFO:tensorflow:Step 19300 | Loss: 3.1465 | Spent: 34.0 secs | LR: 0.000012
Reading ../data/dev.json
INFO:tensorflow:Evaluation: Testing Accuracy: 0.861
INFO:tensorflow:Best Accuracy: 0.861
Reading ../data/train.json
INFO:tensorflow:Step 19400 | Loss: 2.8339 | Spent: 78.6 secs | LR: 0.000013
INFO:tensorflow:Step 19500 | Loss: 3.1110 | Spent: 34.7 secs | LR: 0.000013
INFO:tensorflow:Step 19600 | Loss: 2.8842 | Spent: 33.8 secs | LR: 0.000013
INFO:tensorflow:Step 19700 | Loss: 2.6964 | Spent: 34.4 secs | LR: 0.000013
INFO:tensorflow:Step 19800 | Loss: 2.7186 | Spent: 34.0 secs | LR: 0.000013
INFO:tensorflow:Step 19900 | Loss: 2.9587 | Spent: 34.6 secs | LR: 0.000013
INFO:tensorflow:Step 20000 | Loss: 2.6354 | Spent: 35.3 secs | LR: 0.000013
INFO:tensorflow:Step 20100 | Loss: 2.9561 | Spent: 34.2 secs | LR: 0.000013
INFO:tensorflow:Step 20200 | Loss: 2.7948 | Spent: 34.1 secs | LR: 0.000014
INFO:tensorflow:Step 20300 | Loss: 2.7679 | Spent: 33.7 secs | LR: 0.000014
INFO:tensorflow:Step 20400 | Loss: 2.8211 | Spent: 34.5 secs | LR: 0.000014
INFO:tensorflow:Step 20500 | Loss: 3.0014 | Spent: 33.5 secs | LR: 0.000014
INFO:tensorflow:Step 20600 | Loss: 3.0117 | Spent: 34.8 secs | LR: 0.000014
INFO:tensorflow:Step 20700 | Loss: 2.9254 | Spent: 34.3 secs | LR: 0.000014
INFO:tensorflow:Step 20800 | Loss: 2.9582 | Spent: 33.8 secs | LR: 0.000014
INFO:tensorflow:Step 20900 | Loss: 3.0077 | Spent: 34.1 secs | LR: 0.000014
INFO:tensorflow:Step 21000 | Loss: 2.8578 | Spent: 33.5 secs | LR: 0.000014
INFO:tensorflow:Step 21100 | Loss: 3.0609 | Spent: 33.8 secs | LR: 0.000015
INFO:tensorflow:Step 21200 | Loss: 2.7414 | Spent: 34.9 secs | LR: 0.000015
INFO:tensorflow:Step 21300 | Loss: 2.8741 | Spent: 33.8 secs | LR: 0.000015
INFO:tensorflow:Step 21400 | Loss: 2.8372 | Spent: 34.4 secs | LR: 0.000015
Reading ../data/dev.json
INFO:tensorflow:Evaluation: Testing Accuracy: 0.858
INFO:tensorflow:Best Accuracy: 0.861
Reading ../data/train.json
INFO:tensorflow:Step 21500 | Loss: 2.6654 | Spent: 75.9 secs | LR: 0.000015
INFO:tensorflow:Step 21600 | Loss: 2.7234 | Spent: 34.1 secs | LR: 0.000015
INFO:tensorflow:Step 21700 | Loss: 3.3004 | Spent: 34.6 secs | LR: 0.000015
INFO:tensorflow:Step 21800 | Loss: 2.6888 | Spent: 34.5 secs | LR: 0.000015
INFO:tensorflow:Step 21900 | Loss: 2.8612 | Spent: 34.4 secs | LR: 0.000016
INFO:tensorflow:Step 22000 | Loss: 2.9185 | Spent: 34.1 secs | LR: 0.000016
INFO:tensorflow:Step 22100 | Loss: 2.9008 | Spent: 33.4 secs | LR: 0.000016
INFO:tensorflow:Step 22200 | Loss: 2.6160 | Spent: 34.4 secs | LR: 0.000016
INFO:tensorflow:Step 22300 | Loss: 2.9733 | Spent: 33.8 secs | LR: 0.000016
INFO:tensorflow:Step 22400 | Loss: 2.8641 | Spent: 35.0 secs | LR: 0.000016
INFO:tensorflow:Step 22500 | Loss: 3.0581 | Spent: 34.7 secs | LR: 0.000016
INFO:tensorflow:Step 22600 | Loss: 2.7973 | Spent: 34.8 secs | LR: 0.000016
INFO:tensorflow:Step 22700 | Loss: 2.9555 | Spent: 34.2 secs | LR: 0.000016
INFO:tensorflow:Step 22800 | Loss: 3.0850 | Spent: 33.9 secs | LR: 0.000017
INFO:tensorflow:Step 22900 | Loss: 2.9405 | Spent: 33.5 secs | LR: 0.000017
INFO:tensorflow:Step 23000 | Loss: 3.0476 | Spent: 33.8 secs | LR: 0.000017
INFO:tensorflow:Step 23100 | Loss: 2.7562 | Spent: 34.0 secs | LR: 0.000017
INFO:tensorflow:Step 23200 | Loss: 3.0016 | Spent: 34.7 secs | LR: 0.000017
INFO:tensorflow:Step 23300 | Loss: 3.2179 | Spent: 34.3 secs | LR: 0.000017
INFO:tensorflow:Step 23400 | Loss: 2.6847 | Spent: 33.6 secs | LR: 0.000017
INFO:tensorflow:Step 23500 | Loss: 2.8275 | Spent: 35.2 secs | LR: 0.000017
INFO:tensorflow:Step 23600 | Loss: 2.8057 | Spent: 34.3 secs | LR: 0.000017
Reading ../data/dev.json
INFO:tensorflow:Evaluation: Testing Accuracy: 0.851
INFO:tensorflow:Best Accuracy: 0.861
Reading ../data/train.json
INFO:tensorflow:Step 23700 | Loss: 3.0548 | Spent: 76.1 secs | LR: 0.000018
INFO:tensorflow:Step 23800 | Loss: 3.0818 | Spent: 33.8 secs | LR: 0.000018
INFO:tensorflow:Step 23900 | Loss: 2.9011 | Spent: 34.1 secs | LR: 0.000018
INFO:tensorflow:Step 24000 | Loss: 2.6842 | Spent: 34.3 secs | LR: 0.000018
INFO:tensorflow:Step 24100 | Loss: 2.7976 | Spent: 34.3 secs | LR: 0.000018
INFO:tensorflow:Step 24200 | Loss: 2.8816 | Spent: 33.9 secs | LR: 0.000018
INFO:tensorflow:Step 24300 | Loss: 3.1344 | Spent: 34.0 secs | LR: 0.000018
INFO:tensorflow:Step 24400 | Loss: 2.6713 | Spent: 35.9 secs | LR: 0.000018
INFO:tensorflow:Step 24500 | Loss: 3.0497 | Spent: 33.8 secs | LR: 0.000019
INFO:tensorflow:Step 24600 | Loss: 3.0186 | Spent: 34.5 secs | LR: 0.000019
INFO:tensorflow:Step 24700 | Loss: 2.9544 | Spent: 34.5 secs | LR: 0.000019
INFO:tensorflow:Step 24800 | Loss: 2.8790 | Spent: 34.1 secs | LR: 0.000019
INFO:tensorflow:Step 24900 | Loss: 2.9158 | Spent: 34.4 secs | LR: 0.000019
INFO:tensorflow:Step 25000 | Loss: 2.9351 | Spent: 34.5 secs | LR: 0.000019
INFO:tensorflow:Step 25100 | Loss: 3.0545 | Spent: 34.5 secs | LR: 0.000019
INFO:tensorflow:Step 25200 | Loss: 2.9130 | Spent: 34.3 secs | LR: 0.000019
INFO:tensorflow:Step 25300 | Loss: 2.7675 | Spent: 33.4 secs | LR: 0.000019
INFO:tensorflow:Step 25400 | Loss: 3.0030 | Spent: 34.4 secs | LR: 0.000020
INFO:tensorflow:Step 25500 | Loss: 2.9504 | Spent: 34.2 secs | LR: 0.000020
INFO:tensorflow:Step 25600 | Loss: 2.8934 | Spent: 34.7 secs | LR: 0.000020
INFO:tensorflow:Step 25700 | Loss: 2.7402 | Spent: 34.2 secs | LR: 0.000020
Reading ../data/dev.json
INFO:tensorflow:Evaluation: Testing Accuracy: 0.856
INFO:tensorflow:Best Accuracy: 0.861
Reading ../data/train.json
INFO:tensorflow:Step 25800 | Loss: 2.7640 | Spent: 76.8 secs | LR: 0.000020
INFO:tensorflow:Step 25900 | Loss: 2.8375 | Spent: 34.6 secs | LR: 0.000020
INFO:tensorflow:Step 26000 | Loss: 2.9022 | Spent: 34.4 secs | LR: 0.000020
INFO:tensorflow:Step 26100 | Loss: 2.9612 | Spent: 34.1 secs | LR: 0.000020
INFO:tensorflow:Step 26200 | Loss: 2.9264 | Spent: 34.3 secs | LR: 0.000019
INFO:tensorflow:Step 26300 | Loss: 2.6415 | Spent: 33.2 secs | LR: 0.000019
INFO:tensorflow:Step 26400 | Loss: 2.8950 | Spent: 34.7 secs | LR: 0.000019
INFO:tensorflow:Step 26500 | Loss: 2.7470 | Spent: 34.1 secs | LR: 0.000019
INFO:tensorflow:Step 26600 | Loss: 3.2618 | Spent: 33.8 secs | LR: 0.000019
INFO:tensorflow:Step 26700 | Loss: 2.9019 | Spent: 33.3 secs | LR: 0.000019
INFO:tensorflow:Step 26800 | Loss: 3.1680 | Spent: 33.3 secs | LR: 0.000019
INFO:tensorflow:Step 26900 | Loss: 2.8628 | Spent: 34.4 secs | LR: 0.000019
INFO:tensorflow:Step 27000 | Loss: 2.6509 | Spent: 33.4 secs | LR: 0.000019
INFO:tensorflow:Step 27100 | Loss: 2.8289 | Spent: 34.3 secs | LR: 0.000018
INFO:tensorflow:Step 27200 | Loss: 2.8466 | Spent: 35.0 secs | LR: 0.000018
INFO:tensorflow:Step 27300 | Loss: 3.1738 | Spent: 33.8 secs | LR: 0.000018
INFO:tensorflow:Step 27400 | Loss: 2.7250 | Spent: 33.1 secs | LR: 0.000018
INFO:tensorflow:Step 27500 | Loss: 2.8496 | Spent: 34.2 secs | LR: 0.000018
INFO:tensorflow:Step 27600 | Loss: 3.2138 | Spent: 33.5 secs | LR: 0.000018
INFO:tensorflow:Step 27700 | Loss: 2.8787 | Spent: 33.7 secs | LR: 0.000018
INFO:tensorflow:Step 27800 | Loss: 2.7204 | Spent: 33.9 secs | LR: 0.000018
Reading ../data/dev.json
INFO:tensorflow:Evaluation: Testing Accuracy: 0.852
INFO:tensorflow:Best Accuracy: 0.861
Reading ../data/train.json
INFO:tensorflow:Step 27900 | Loss: 2.7525 | Spent: 76.8 secs | LR: 0.000017
INFO:tensorflow:Step 28000 | Loss: 2.6683 | Spent: 34.7 secs | LR: 0.000017
INFO:tensorflow:Step 28100 | Loss: 2.7624 | Spent: 33.8 secs | LR: 0.000017
INFO:tensorflow:Step 28200 | Loss: 2.7987 | Spent: 35.2 secs | LR: 0.000017
INFO:tensorflow:Step 28300 | Loss: 2.8365 | Spent: 34.7 secs | LR: 0.000017
INFO:tensorflow:Step 28400 | Loss: 2.8946 | Spent: 33.7 secs | LR: 0.000017
INFO:tensorflow:Step 28500 | Loss: 2.8221 | Spent: 34.3 secs | LR: 0.000017
INFO:tensorflow:Step 28600 | Loss: 2.6618 | Spent: 34.8 secs | LR: 0.000017
INFO:tensorflow:Step 28700 | Loss: 2.7485 | Spent: 33.6 secs | LR: 0.000017
INFO:tensorflow:Step 28800 | Loss: 2.8272 | Spent: 34.5 secs | LR: 0.000016
INFO:tensorflow:Step 28900 | Loss: 2.9808 | Spent: 34.2 secs | LR: 0.000016
INFO:tensorflow:Step 29000 | Loss: 2.8500 | Spent: 33.6 secs | LR: 0.000016
INFO:tensorflow:Step 29100 | Loss: 2.9061 | Spent: 33.5 secs | LR: 0.000016
INFO:tensorflow:Step 29200 | Loss: 2.8092 | Spent: 33.8 secs | LR: 0.000016
INFO:tensorflow:Step 29300 | Loss: 2.8088 | Spent: 34.8 secs | LR: 0.000016
INFO:tensorflow:Step 29400 | Loss: 3.0219 | Spent: 34.3 secs | LR: 0.000016
INFO:tensorflow:Step 29500 | Loss: 2.8388 | Spent: 34.7 secs | LR: 0.000016
INFO:tensorflow:Step 29600 | Loss: 2.8144 | Spent: 33.7 secs | LR: 0.000016
INFO:tensorflow:Step 29700 | Loss: 2.9077 | Spent: 33.4 secs | LR: 0.000015
INFO:tensorflow:Step 29800 | Loss: 2.8239 | Spent: 34.4 secs | LR: 0.000015
INFO:tensorflow:Step 29900 | Loss: 2.8691 | Spent: 34.0 secs | LR: 0.000015
INFO:tensorflow:Step 30000 | Loss: 3.0736 | Spent: 33.4 secs | LR: 0.000015
Reading ../data/dev.json
INFO:tensorflow:Evaluation: Testing Accuracy: 0.854
INFO:tensorflow:Best Accuracy: 0.861
Reading ../data/train.json
INFO:tensorflow:Step 30100 | Loss: 2.6119 | Spent: 75.8 secs | LR: 0.000015
INFO:tensorflow:Step 30200 | Loss: 2.9340 | Spent: 34.3 secs | LR: 0.000015
INFO:tensorflow:Step 30300 | Loss: 2.8066 | Spent: 34.5 secs | LR: 0.000015
INFO:tensorflow:Step 30400 | Loss: 2.5994 | Spent: 34.0 secs | LR: 0.000015
INFO:tensorflow:Step 30500 | Loss: 2.7391 | Spent: 34.4 secs | LR: 0.000014
INFO:tensorflow:Step 30600 | Loss: 2.7662 | Spent: 34.4 secs | LR: 0.000014
INFO:tensorflow:Step 30700 | Loss: 3.1820 | Spent: 33.9 secs | LR: 0.000014
INFO:tensorflow:Step 30800 | Loss: 2.8035 | Spent: 33.4 secs | LR: 0.000014
INFO:tensorflow:Step 30900 | Loss: 2.6741 | Spent: 33.3 secs | LR: 0.000014
INFO:tensorflow:Step 31000 | Loss: 2.8819 | Spent: 33.6 secs | LR: 0.000014
INFO:tensorflow:Step 31100 | Loss: 2.9764 | Spent: 33.3 secs | LR: 0.000014
INFO:tensorflow:Step 31200 | Loss: 2.9288 | Spent: 34.5 secs | LR: 0.000014
INFO:tensorflow:Step 31300 | Loss: 2.8164 | Spent: 33.9 secs | LR: 0.000014
INFO:tensorflow:Step 31400 | Loss: 2.7531 | Spent: 33.6 secs | LR: 0.000013
INFO:tensorflow:Step 31500 | Loss: 2.7089 | Spent: 34.0 secs | LR: 0.000013
INFO:tensorflow:Step 31600 | Loss: 2.9494 | Spent: 33.3 secs | LR: 0.000013
INFO:tensorflow:Step 31700 | Loss: 3.0313 | Spent: 34.0 secs | LR: 0.000013
INFO:tensorflow:Step 31800 | Loss: 3.1979 | Spent: 33.9 secs | LR: 0.000013
INFO:tensorflow:Step 31900 | Loss: 3.1061 | Spent: 34.3 secs | LR: 0.000013
INFO:tensorflow:Step 32000 | Loss: 2.8616 | Spent: 34.2 secs | LR: 0.000013
INFO:tensorflow:Step 32100 | Loss: 3.0192 | Spent: 33.7 secs | LR: 0.000013
Reading ../data/dev.json
INFO:tensorflow:Evaluation: Testing Accuracy: 0.862
INFO:tensorflow:Best Accuracy: 0.862
Reading ../data/train.json
INFO:tensorflow:Step 32200 | Loss: 3.0307 | Spent: 79.5 secs | LR: 0.000012
INFO:tensorflow:Step 32300 | Loss: 2.9218 | Spent: 33.6 secs | LR: 0.000012
INFO:tensorflow:Step 32400 | Loss: 2.8501 | Spent: 34.7 secs | LR: 0.000012
INFO:tensorflow:Step 32500 | Loss: 2.9517 | Spent: 34.5 secs | LR: 0.000012
INFO:tensorflow:Step 32600 | Loss: 2.7414 | Spent: 33.3 secs | LR: 0.000012
INFO:tensorflow:Step 32700 | Loss: 2.9083 | Spent: 33.9 secs | LR: 0.000012
INFO:tensorflow:Step 32800 | Loss: 2.7911 | Spent: 33.6 secs | LR: 0.000012
INFO:tensorflow:Step 32900 | Loss: 2.7873 | Spent: 34.1 secs | LR: 0.000012
INFO:tensorflow:Step 33000 | Loss: 2.9281 | Spent: 35.0 secs | LR: 0.000012
INFO:tensorflow:Step 33100 | Loss: 2.9047 | Spent: 33.7 secs | LR: 0.000011
INFO:tensorflow:Step 33200 | Loss: 2.7281 | Spent: 33.7 secs | LR: 0.000011
INFO:tensorflow:Step 33300 | Loss: 3.1287 | Spent: 34.1 secs | LR: 0.000011
INFO:tensorflow:Step 33400 | Loss: 2.8252 | Spent: 33.9 secs | LR: 0.000011
INFO:tensorflow:Step 33500 | Loss: 3.1593 | Spent: 33.1 secs | LR: 0.000011
INFO:tensorflow:Step 33600 | Loss: 2.7914 | Spent: 34.1 secs | LR: 0.000011
INFO:tensorflow:Step 33700 | Loss: 2.7595 | Spent: 33.8 secs | LR: 0.000011
INFO:tensorflow:Step 33800 | Loss: 2.8736 | Spent: 34.4 secs | LR: 0.000011
INFO:tensorflow:Step 33900 | Loss: 2.7690 | Spent: 35.6 secs | LR: 0.000011
INFO:tensorflow:Step 34000 | Loss: 2.8911 | Spent: 33.8 secs | LR: 0.000010
INFO:tensorflow:Step 34100 | Loss: 2.8368 | Spent: 33.6 secs | LR: 0.000010
INFO:tensorflow:Step 34200 | Loss: 2.7811 | Spent: 34.1 secs | LR: 0.000010
INFO:tensorflow:Step 34300 | Loss: 2.6650 | Spent: 34.1 secs | LR: 0.000010
Reading ../data/dev.json
INFO:tensorflow:Evaluation: Testing Accuracy: 0.857
INFO:tensorflow:Best Accuracy: 0.862
Reading ../data/train.json
INFO:tensorflow:Step 34400 | Loss: 2.7698 | Spent: 75.5 secs | LR: 0.000010
INFO:tensorflow:Step 34500 | Loss: 2.8880 | Spent: 34.3 secs | LR: 0.000010
INFO:tensorflow:Step 34600 | Loss: 2.7202 | Spent: 34.5 secs | LR: 0.000010
INFO:tensorflow:Step 34700 | Loss: 3.1417 | Spent: 34.8 secs | LR: 0.000010
INFO:tensorflow:Step 34800 | Loss: 2.9996 | Spent: 33.7 secs | LR: 0.000010
INFO:tensorflow:Step 34900 | Loss: 2.8776 | Spent: 34.7 secs | LR: 0.000010
INFO:tensorflow:Step 35000 | Loss: 2.8487 | Spent: 33.6 secs | LR: 0.000010
INFO:tensorflow:Step 35100 | Loss: 2.8281 | Spent: 33.8 secs | LR: 0.000010
INFO:tensorflow:Step 35200 | Loss: 2.8194 | Spent: 34.9 secs | LR: 0.000011
INFO:tensorflow:Step 35300 | Loss: 2.7868 | Spent: 33.5 secs | LR: 0.000011
INFO:tensorflow:Step 35400 | Loss: 2.7308 | Spent: 34.5 secs | LR: 0.000011
INFO:tensorflow:Step 35500 | Loss: 3.0369 | Spent: 34.2 secs | LR: 0.000011
INFO:tensorflow:Step 35600 | Loss: 2.6968 | Spent: 34.3 secs | LR: 0.000011
INFO:tensorflow:Step 35700 | Loss: 2.7231 | Spent: 34.8 secs | LR: 0.000011
INFO:tensorflow:Step 35800 | Loss: 3.3129 | Spent: 33.5 secs | LR: 0.000011
INFO:tensorflow:Step 35900 | Loss: 2.9766 | Spent: 33.6 secs | LR: 0.000011
INFO:tensorflow:Step 36000 | Loss: 2.8958 | Spent: 33.3 secs | LR: 0.000011
INFO:tensorflow:Step 36100 | Loss: 2.8615 | Spent: 34.5 secs | LR: 0.000011
INFO:tensorflow:Step 36200 | Loss: 2.6727 | Spent: 34.8 secs | LR: 0.000011
INFO:tensorflow:Step 36300 | Loss: 2.7204 | Spent: 33.7 secs | LR: 0.000011
INFO:tensorflow:Step 36400 | Loss: 2.8681 | Spent: 34.8 secs | LR: 0.000011
Reading ../data/dev.json
INFO:tensorflow:Evaluation: Testing Accuracy: 0.861
INFO:tensorflow:Best Accuracy: 0.862
Reading ../data/train.json
INFO:tensorflow:Step 36500 | Loss: 2.7310 | Spent: 75.5 secs | LR: 0.000011
INFO:tensorflow:Step 36600 | Loss: 2.8106 | Spent: 35.0 secs | LR: 0.000011
INFO:tensorflow:Step 36700 | Loss: 2.9265 | Spent: 35.0 secs | LR: 0.000011
INFO:tensorflow:Step 36800 | Loss: 2.8401 | Spent: 34.2 secs | LR: 0.000011
INFO:tensorflow:Step 36900 | Loss: 3.1649 | Spent: 33.7 secs | LR: 0.000011
INFO:tensorflow:Step 37000 | Loss: 3.0313 | Spent: 34.0 secs | LR: 0.000012
INFO:tensorflow:Step 37100 | Loss: 3.2119 | Spent: 34.7 secs | LR: 0.000012
INFO:tensorflow:Step 37200 | Loss: 3.2297 | Spent: 34.0 secs | LR: 0.000012
INFO:tensorflow:Step 37300 | Loss: 2.6738 | Spent: 33.7 secs | LR: 0.000012
INFO:tensorflow:Step 37400 | Loss: 2.7683 | Spent: 34.0 secs | LR: 0.000012
INFO:tensorflow:Step 37500 | Loss: 3.0022 | Spent: 34.0 secs | LR: 0.000012
INFO:tensorflow:Step 37600 | Loss: 2.9266 | Spent: 33.5 secs | LR: 0.000012
INFO:tensorflow:Step 37700 | Loss: 2.9851 | Spent: 34.1 secs | LR: 0.000012
INFO:tensorflow:Step 37800 | Loss: 2.8446 | Spent: 34.3 secs | LR: 0.000012
INFO:tensorflow:Step 37900 | Loss: 2.6931 | Spent: 34.7 secs | LR: 0.000012
INFO:tensorflow:Step 38000 | Loss: 2.8604 | Spent: 33.9 secs | LR: 0.000012
INFO:tensorflow:Step 38100 | Loss: 2.7408 | Spent: 34.8 secs | LR: 0.000012
INFO:tensorflow:Step 38200 | Loss: 2.7685 | Spent: 33.5 secs | LR: 0.000012
INFO:tensorflow:Step 38300 | Loss: 2.7799 | Spent: 33.8 secs | LR: 0.000012
INFO:tensorflow:Step 38400 | Loss: 2.7866 | Spent: 33.5 secs | LR: 0.000012
INFO:tensorflow:Step 38500 | Loss: 3.3484 | Spent: 33.7 secs | LR: 0.000012
INFO:tensorflow:Step 38600 | Loss: 2.9983 | Spent: 34.1 secs | LR: 0.000012
Reading ../data/dev.json
INFO:tensorflow:Evaluation: Testing Accuracy: 0.859
INFO:tensorflow:Best Accuracy: 0.862
Reading ../data/train.json
INFO:tensorflow:Step 38700 | Loss: 2.7091 | Spent: 75.3 secs | LR: 0.000013
INFO:tensorflow:Step 38800 | Loss: 2.8202 | Spent: 34.1 secs | LR: 0.000013
INFO:tensorflow:Step 38900 | Loss: 2.8998 | Spent: 34.7 secs | LR: 0.000013
INFO:tensorflow:Step 39000 | Loss: 2.7218 | Spent: 33.6 secs | LR: 0.000013
INFO:tensorflow:Step 39100 | Loss: 2.7839 | Spent: 34.0 secs | LR: 0.000013
INFO:tensorflow:Step 39200 | Loss: 2.7108 | Spent: 34.5 secs | LR: 0.000013
INFO:tensorflow:Step 39300 | Loss: 2.8640 | Spent: 33.8 secs | LR: 0.000013
INFO:tensorflow:Step 39400 | Loss: 2.6245 | Spent: 33.8 secs | LR: 0.000013
INFO:tensorflow:Step 39500 | Loss: 2.7384 | Spent: 33.5 secs | LR: 0.000013
INFO:tensorflow:Step 39600 | Loss: 2.7396 | Spent: 33.4 secs | LR: 0.000013
INFO:tensorflow:Step 39700 | Loss: 3.0916 | Spent: 34.4 secs | LR: 0.000013
INFO:tensorflow:Step 39800 | Loss: 2.6453 | Spent: 34.6 secs | LR: 0.000013
INFO:tensorflow:Step 39900 | Loss: 2.7824 | Spent: 34.8 secs | LR: 0.000013
INFO:tensorflow:Step 40000 | Loss: 2.7949 | Spent: 34.0 secs | LR: 0.000013
INFO:tensorflow:Step 40100 | Loss: 2.7480 | Spent: 35.8 secs | LR: 0.000013
INFO:tensorflow:Step 40200 | Loss: 2.8162 | Spent: 35.6 secs | LR: 0.000013
INFO:tensorflow:Step 40300 | Loss: 3.1213 | Spent: 34.7 secs | LR: 0.000013
INFO:tensorflow:Step 40400 | Loss: 2.7348 | Spent: 34.6 secs | LR: 0.000014
INFO:tensorflow:Step 40500 | Loss: 2.7198 | Spent: 34.8 secs | LR: 0.000014
INFO:tensorflow:Step 40600 | Loss: 2.6608 | Spent: 34.4 secs | LR: 0.000014
INFO:tensorflow:Step 40700 | Loss: 2.6522 | Spent: 36.5 secs | LR: 0.000014
Reading ../data/dev.json
INFO:tensorflow:Evaluation: Testing Accuracy: 0.854
INFO:tensorflow:Best Accuracy: 0.862
Reading ../data/train.json
INFO:tensorflow:Step 40800 | Loss: 2.7060 | Spent: 76.9 secs | LR: 0.000014
INFO:tensorflow:Step 40900 | Loss: 2.6669 | Spent: 34.7 secs | LR: 0.000014
INFO:tensorflow:Step 41000 | Loss: 2.9245 | Spent: 34.8 secs | LR: 0.000014
INFO:tensorflow:Step 41100 | Loss: 2.8069 | Spent: 35.5 secs | LR: 0.000014
INFO:tensorflow:Step 41200 | Loss: 2.9811 | Spent: 34.6 secs | LR: 0.000014
INFO:tensorflow:Step 41300 | Loss: 3.1604 | Spent: 34.8 secs | LR: 0.000014
INFO:tensorflow:Step 41400 | Loss: 2.7183 | Spent: 34.3 secs | LR: 0.000014
INFO:tensorflow:Step 41500 | Loss: 2.7662 | Spent: 34.4 secs | LR: 0.000014
INFO:tensorflow:Step 41600 | Loss: 3.0616 | Spent: 35.0 secs | LR: 0.000014
INFO:tensorflow:Step 41700 | Loss: 3.0803 | Spent: 33.9 secs | LR: 0.000014
INFO:tensorflow:Step 41800 | Loss: 2.8385 | Spent: 34.7 secs | LR: 0.000014
INFO:tensorflow:Step 41900 | Loss: 2.8205 | Spent: 34.6 secs | LR: 0.000014
INFO:tensorflow:Step 42000 | Loss: 2.6495 | Spent: 33.8 secs | LR: 0.000014
INFO:tensorflow:Step 42100 | Loss: 2.8911 | Spent: 33.6 secs | LR: 0.000015
INFO:tensorflow:Step 42200 | Loss: 2.8555 | Spent: 35.0 secs | LR: 0.000015
INFO:tensorflow:Step 42300 | Loss: 2.6938 | Spent: 33.7 secs | LR: 0.000015
INFO:tensorflow:Step 42400 | Loss: 2.7608 | Spent: 35.2 secs | LR: 0.000015
INFO:tensorflow:Step 42500 | Loss: 2.9261 | Spent: 33.8 secs | LR: 0.000015
INFO:tensorflow:Step 42600 | Loss: 2.9024 | Spent: 34.6 secs | LR: 0.000015
INFO:tensorflow:Step 42700 | Loss: 2.6116 | Spent: 34.2 secs | LR: 0.000015
INFO:tensorflow:Step 42800 | Loss: 2.8067 | Spent: 34.0 secs | LR: 0.000015
INFO:tensorflow:Step 42900 | Loss: 2.8201 | Spent: 35.3 secs | LR: 0.000015
Reading ../data/dev.json
INFO:tensorflow:Evaluation: Testing Accuracy: 0.862
INFO:tensorflow:Best Accuracy: 0.862
Reading ../data/train.json
INFO:tensorflow:Step 43000 | Loss: 2.8034 | Spent: 80.5 secs | LR: 0.000015
INFO:tensorflow:Step 43100 | Loss: 2.8531 | Spent: 34.7 secs | LR: 0.000015
INFO:tensorflow:Step 43200 | Loss: 2.7472 | Spent: 34.5 secs | LR: 0.000015
INFO:tensorflow:Step 43300 | Loss: 2.7869 | Spent: 34.9 secs | LR: 0.000015
INFO:tensorflow:Step 43400 | Loss: 2.7548 | Spent: 34.9 secs | LR: 0.000015
INFO:tensorflow:Step 43500 | Loss: 2.8409 | Spent: 34.0 secs | LR: 0.000015
INFO:tensorflow:Step 43600 | Loss: 3.1078 | Spent: 34.8 secs | LR: 0.000015
INFO:tensorflow:Step 43700 | Loss: 2.5605 | Spent: 34.1 secs | LR: 0.000015
INFO:tensorflow:Step 43800 | Loss: 2.9036 | Spent: 34.6 secs | LR: 0.000014
INFO:tensorflow:Step 43900 | Loss: 3.0838 | Spent: 34.4 secs | LR: 0.000014
INFO:tensorflow:Step 44000 | Loss: 2.8659 | Spent: 34.5 secs | LR: 0.000014
INFO:tensorflow:Step 44100 | Loss: 2.7491 | Spent: 34.7 secs | LR: 0.000014
INFO:tensorflow:Step 44200 | Loss: 2.6667 | Spent: 34.8 secs | LR: 0.000014
INFO:tensorflow:Step 44300 | Loss: 3.0907 | Spent: 34.8 secs | LR: 0.000014
INFO:tensorflow:Step 44400 | Loss: 2.7365 | Spent: 34.2 secs | LR: 0.000014
INFO:tensorflow:Step 44500 | Loss: 2.9040 | Spent: 34.3 secs | LR: 0.000014
INFO:tensorflow:Step 44600 | Loss: 2.9353 | Spent: 34.6 secs | LR: 0.000014
INFO:tensorflow:Step 44700 | Loss: 2.7896 | Spent: 35.0 secs | LR: 0.000014
INFO:tensorflow:Step 44800 | Loss: 2.8549 | Spent: 34.6 secs | LR: 0.000014
INFO:tensorflow:Step 44900 | Loss: 3.0783 | Spent: 35.2 secs | LR: 0.000014
INFO:tensorflow:Step 45000 | Loss: 2.9779 | Spent: 34.0 secs | LR: 0.000014
Reading ../data/dev.json
INFO:tensorflow:Evaluation: Testing Accuracy: 0.856
INFO:tensorflow:Best Accuracy: 0.862
Reading ../data/train.json
INFO:tensorflow:Step 45100 | Loss: 2.8690 | Spent: 76.7 secs | LR: 0.000014
INFO:tensorflow:Step 45200 | Loss: 2.9059 | Spent: 34.5 secs | LR: 0.000014
INFO:tensorflow:Step 45300 | Loss: 2.9136 | Spent: 34.6 secs | LR: 0.000014
INFO:tensorflow:Step 45400 | Loss: 2.9448 | Spent: 34.3 secs | LR: 0.000014
INFO:tensorflow:Step 45500 | Loss: 3.2468 | Spent: 34.5 secs | LR: 0.000013
INFO:tensorflow:Step 45600 | Loss: 3.0043 | Spent: 34.5 secs | LR: 0.000013
INFO:tensorflow:Step 45700 | Loss: 2.8967 | Spent: 35.3 secs | LR: 0.000013
INFO:tensorflow:Step 45800 | Loss: 2.9130 | Spent: 34.8 secs | LR: 0.000013
INFO:tensorflow:Step 45900 | Loss: 3.0735 | Spent: 35.1 secs | LR: 0.000013
INFO:tensorflow:Step 46000 | Loss: 2.5875 | Spent: 34.9 secs | LR: 0.000013
INFO:tensorflow:Step 46100 | Loss: 2.8157 | Spent: 34.1 secs | LR: 0.000013
INFO:tensorflow:Step 46200 | Loss: 2.9153 | Spent: 34.4 secs | LR: 0.000013
INFO:tensorflow:Step 46300 | Loss: 2.8418 | Spent: 34.4 secs | LR: 0.000013
INFO:tensorflow:Step 46400 | Loss: 3.1022 | Spent: 35.1 secs | LR: 0.000013
INFO:tensorflow:Step 46500 | Loss: 2.8990 | Spent: 34.0 secs | LR: 0.000013
INFO:tensorflow:Step 46600 | Loss: 2.9365 | Spent: 34.3 secs | LR: 0.000013
INFO:tensorflow:Step 46700 | Loss: 3.1568 | Spent: 34.5 secs | LR: 0.000013
INFO:tensorflow:Step 46800 | Loss: 3.0512 | Spent: 34.6 secs | LR: 0.000013
INFO:tensorflow:Step 46900 | Loss: 2.6648 | Spent: 34.4 secs | LR: 0.000013
INFO:tensorflow:Step 47000 | Loss: 2.7248 | Spent: 34.1 secs | LR: 0.000013
INFO:tensorflow:Step 47100 | Loss: 2.6762 | Spent: 34.9 secs | LR: 0.000013
INFO:tensorflow:Step 47200 | Loss: 2.9088 | Spent: 34.7 secs | LR: 0.000013
Reading ../data/dev.json
INFO:tensorflow:Evaluation: Testing Accuracy: 0.858
INFO:tensorflow:Best Accuracy: 0.862
Reading ../data/train.json
INFO:tensorflow:Step 47300 | Loss: 2.7745 | Spent: 76.3 secs | LR: 0.000012
INFO:tensorflow:Step 47400 | Loss: 2.8694 | Spent: 35.2 secs | LR: 0.000012
INFO:tensorflow:Step 47500 | Loss: 2.6556 | Spent: 34.5 secs | LR: 0.000012
INFO:tensorflow:Step 47600 | Loss: 2.7069 | Spent: 34.0 secs | LR: 0.000012
INFO:tensorflow:Step 47700 | Loss: 2.7386 | Spent: 35.2 secs | LR: 0.000012
INFO:tensorflow:Step 47800 | Loss: 2.7086 | Spent: 33.9 secs | LR: 0.000012
INFO:tensorflow:Step 47900 | Loss: 3.0156 | Spent: 34.5 secs | LR: 0.000012
INFO:tensorflow:Step 48000 | Loss: 2.8626 | Spent: 34.9 secs | LR: 0.000012
INFO:tensorflow:Step 48100 | Loss: 2.8176 | Spent: 34.1 secs | LR: 0.000012
INFO:tensorflow:Step 48200 | Loss: 2.7016 | Spent: 34.0 secs | LR: 0.000012
INFO:tensorflow:Step 48300 | Loss: 3.1125 | Spent: 34.1 secs | LR: 0.000012
INFO:tensorflow:Step 48400 | Loss: 2.6506 | Spent: 35.2 secs | LR: 0.000012
INFO:tensorflow:Step 48500 | Loss: 2.9277 | Spent: 35.9 secs | LR: 0.000012
INFO:tensorflow:Step 48600 | Loss: 3.0022 | Spent: 35.1 secs | LR: 0.000012
INFO:tensorflow:Step 48700 | Loss: 2.7231 | Spent: 34.9 secs | LR: 0.000012
INFO:tensorflow:Step 48800 | Loss: 2.7760 | Spent: 34.9 secs | LR: 0.000012
INFO:tensorflow:Step 48900 | Loss: 2.7255 | Spent: 34.3 secs | LR: 0.000012
INFO:tensorflow:Step 49000 | Loss: 2.8012 | Spent: 33.4 secs | LR: 0.000011
INFO:tensorflow:Step 49100 | Loss: 2.8760 | Spent: 34.6 secs | LR: 0.000011
INFO:tensorflow:Step 49200 | Loss: 2.8596 | Spent: 34.5 secs | LR: 0.000011
INFO:tensorflow:Step 49300 | Loss: 3.2599 | Spent: 33.9 secs | LR: 0.000011
Reading ../data/dev.json
INFO:tensorflow:Evaluation: Testing Accuracy: 0.858
INFO:tensorflow:Best Accuracy: 0.862
Reading ../data/train.json
INFO:tensorflow:Step 49400 | Loss: 2.9986 | Spent: 76.4 secs | LR: 0.000011
INFO:tensorflow:Step 49500 | Loss: 2.9383 | Spent: 35.1 secs | LR: 0.000011
INFO:tensorflow:Step 49600 | Loss: 2.7351 | Spent: 34.2 secs | LR: 0.000011
INFO:tensorflow:Step 49700 | Loss: 3.0801 | Spent: 34.6 secs | LR: 0.000011
INFO:tensorflow:Step 49800 | Loss: 2.8272 | Spent: 35.3 secs | LR: 0.000011
INFO:tensorflow:Step 49900 | Loss: 2.8827 | Spent: 34.3 secs | LR: 0.000011
INFO:tensorflow:Step 50000 | Loss: 2.8037 | Spent: 35.8 secs | LR: 0.000011
INFO:tensorflow:Step 50100 | Loss: 2.8449 | Spent: 34.1 secs | LR: 0.000011
INFO:tensorflow:Step 50200 | Loss: 2.8213 | Spent: 35.2 secs | LR: 0.000011
INFO:tensorflow:Step 50300 | Loss: 2.9558 | Spent: 35.1 secs | LR: 0.000011
INFO:tensorflow:Step 50400 | Loss: 2.7770 | Spent: 35.3 secs | LR: 0.000011
INFO:tensorflow:Step 50500 | Loss: 2.9968 | Spent: 35.6 secs | LR: 0.000011
INFO:tensorflow:Step 50600 | Loss: 2.6864 | Spent: 35.9 secs | LR: 0.000011
INFO:tensorflow:Step 50700 | Loss: 2.8305 | Spent: 34.6 secs | LR: 0.000010
INFO:tensorflow:Step 50800 | Loss: 2.9143 | Spent: 35.2 secs | LR: 0.000010
INFO:tensorflow:Step 50900 | Loss: 2.8616 | Spent: 34.5 secs | LR: 0.000010
INFO:tensorflow:Step 51000 | Loss: 2.7867 | Spent: 35.6 secs | LR: 0.000010
INFO:tensorflow:Step 51100 | Loss: 2.8333 | Spent: 34.8 secs | LR: 0.000010
INFO:tensorflow:Step 51200 | Loss: 2.6833 | Spent: 34.8 secs | LR: 0.000010
INFO:tensorflow:Step 51300 | Loss: 2.8173 | Spent: 34.4 secs | LR: 0.000010
INFO:tensorflow:Step 51400 | Loss: 3.1277 | Spent: 34.9 secs | LR: 0.000010
INFO:tensorflow:Step 51500 | Loss: 2.6408 | Spent: 35.0 secs | LR: 0.000010
Reading ../data/dev.json
INFO:tensorflow:Evaluation: Testing Accuracy: 0.864
INFO:tensorflow:Best Accuracy: 0.864
Reading ../data/train.json
INFO:tensorflow:Step 51600 | Loss: 2.9303 | Spent: 80.4 secs | LR: 0.000010
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-8-20416d7fdbc2> in <module>()
     35     trainable_vars = [v for v in model.trainable_variables if 'pooler' not in v.name]
     36     optim.lr.assign(decay_lr(global_step))
---> 37     grads = tape.gradient(loss, trainable_vars)
     38     grads, _ = tf.clip_by_global_norm(grads, 5.)
     39     optim.apply_gradients(zip(grads, trainable_vars))

/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/backprop.py in gradient(self, target, sources, output_gradients, unconnected_gradients)
   1071         output_gradients=output_gradients,
   1072         sources_raw=flat_sources_raw,
-> 1073         unconnected_gradients=unconnected_gradients)
   1074 
   1075     if not self._persistent:

/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/imperative_grad.py in imperative_grad(tape, target, sources, output_gradients, sources_raw, unconnected_gradients)
     75       output_gradients,
     76       sources_raw,
---> 77       compat.as_str(unconnected_gradients.value))

/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/backprop.py in _gradient_function(op_name, attr_tuple, num_inputs, inputs, outputs, out_grads, skip_input_indices, forward_pass_name_scope)
    160       gradient_name_scope += forward_pass_name_scope + "/"
    161     with ops.name_scope(gradient_name_scope):
--> 162       return grad_fn(mock_op, *out_grads)
    163   else:
    164     return grad_fn(mock_op, *out_grads)

/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_grad.py in _MeanGrad(op, grad)
    264     factor = _safe_shape_div(
    265         math_ops.reduce_prod(input_shape), math_ops.reduce_prod(output_shape))
--> 266   return math_ops.truediv(sum_grad, math_ops.cast(factor, sum_grad.dtype)), None
    267 
    268 

/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/dispatch.py in wrapper(*args, **kwargs)
    199     """Call target, and fall back on dispatchers if there is a TypeError."""
    200     try:
--> 201       return target(*args, **kwargs)
    202     except (TypeError, ValueError):
    203       # Note: convert_to_eager_tensor currently raises a ValueError, not a

/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_ops.py in truediv(x, y, name)
   1295     TypeError: If `x` and `y` have different dtypes.
   1296   """
-> 1297   return _truediv_python3(x, y, name)
   1298 
   1299 

/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_ops.py in _truediv_python3(x, y, name)
   1234       x = cast(x, dtype)
   1235       y = cast(y, dtype)
-> 1236     return gen_math_ops.real_div(x, y, name=name)
   1237 
   1238 

/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/gen_math_ops.py in real_div(x, y, name)
   7438       _result = pywrap_tfe.TFE_Py_FastPathExecute(
   7439         _ctx._context_handle, tld.device_name, "RealDiv", name,
-> 7440         tld.op_callbacks, x, y)
   7441       return _result
   7442     except _core._NotOkStatusException as e:

KeyboardInterrupt: 
In [9]:
model.load_weights('../model/bert_further_pretrain.h5')
m = tf.keras.metrics.Accuracy()
for ((text, seg), (labels, labels_mask)) in dataset(is_training=False, params=params):
  logits = model([text, tf.sign(text), seg], training=False).logits
  m.update_state(
    y_true = labels,
    y_pred = tf.argmax(logits, -1),
    sample_weight = labels_mask,)
best_acc = m.result().numpy()
print("MLM Accuracy: {:.3f}".format(best_acc))
print(model.weights[5]) # for later check if the weight is correctly transferred to other task
Reading ../data/dev.json
MLM Accuracy: 0.856
<tf.Variable 'tf_bert_lm_head_model/bert/encoder/layer_._0/attention/self/query/kernel:0' shape=(768, 768) dtype=float32, numpy=
array([[ 0.12166668,  0.00772147,  0.01382446, ..., -0.05134514,
         0.01727283,  0.01975559],
       [-0.01919131, -0.01730992,  0.00995409, ...,  0.01526493,
        -0.0815515 , -0.02229767],
       [ 0.00750611, -0.00214197,  0.02398042, ...,  0.00636234,
        -0.06394118, -0.00126247],
       ...,
       [ 0.00831017, -0.00127873,  0.11854228, ...,  0.05074448,
        -0.0348739 ,  0.09707301],
       [ 0.01920506,  0.04018742, -0.03493238, ..., -0.00172372,
         0.04089048, -0.0429044 ],
       [-0.07598049,  0.02195707, -0.01592555, ...,  0.06175591,
        -0.02131575, -0.00702451]], dtype=float32)>