In [ ]:
from google.colab import drive
drive.mount('/content/gdrive')
import os
os.chdir('/content/gdrive/My Drive/finch/tensorflow2/text_classification/clue/main')
Mounted at /content/gdrive
In [ ]:
!pip install transformers
Collecting transformers
  Downloading https://files.pythonhosted.org/packages/2c/4e/4f1ede0fd7a36278844a277f8d53c21f88f37f3754abf76a5d6224f76d4a/transformers-3.4.0-py3-none-any.whl (1.3MB)
     |████████████████████████████████| 1.3MB 13.8MB/s 
Collecting sacremoses
  Downloading https://files.pythonhosted.org/packages/7d/34/09d19aff26edcc8eb2a01bed8e98f13a1537005d31e95233fd48216eed10/sacremoses-0.0.43.tar.gz (883kB)
     |████████████████████████████████| 890kB 50.9MB/s 
Requirement already satisfied: protobuf in /usr/local/lib/python3.6/dist-packages (from transformers) (3.12.4)
Requirement already satisfied: dataclasses; python_version < "3.7" in /usr/local/lib/python3.6/dist-packages (from transformers) (0.7)
Collecting sentencepiece!=0.1.92
  Downloading https://files.pythonhosted.org/packages/e5/2d/6d4ca4bef9a67070fa1cac508606328329152b1df10bdf31fb6e4e727894/sentencepiece-0.1.94-cp36-cp36m-manylinux2014_x86_64.whl (1.1MB)
     |████████████████████████████████| 1.1MB 36.7MB/s 
Requirement already satisfied: filelock in /usr/local/lib/python3.6/dist-packages (from transformers) (3.0.12)
Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.6/dist-packages (from transformers) (4.41.1)
Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from transformers) (1.18.5)
Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from transformers) (2.23.0)
Collecting tokenizers==0.9.2
  Downloading https://files.pythonhosted.org/packages/7c/a5/78be1a55b2ac8d6a956f0a211d372726e2b1dd2666bb537fea9b03abd62c/tokenizers-0.9.2-cp36-cp36m-manylinux1_x86_64.whl (2.9MB)
     |████████████████████████████████| 2.9MB 54.7MB/s 
Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.6/dist-packages (from transformers) (2019.12.20)
Requirement already satisfied: packaging in /usr/local/lib/python3.6/dist-packages (from transformers) (20.4)
Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers) (1.15.0)
Requirement already satisfied: click in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers) (7.1.2)
Requirement already satisfied: joblib in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers) (0.17.0)
Requirement already satisfied: setuptools in /usr/local/lib/python3.6/dist-packages (from protobuf->transformers) (50.3.2)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (2020.6.20)
Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (3.0.4)
Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (1.24.3)
Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (2.10)
Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.6/dist-packages (from packaging->transformers) (2.4.7)
Building wheels for collected packages: sacremoses
  Building wheel for sacremoses (setup.py) ... done
  Created wheel for sacremoses: filename=sacremoses-0.0.43-cp36-none-any.whl size=893257 sha256=24cb962f37640e9d039ac2f68b1629c3dd62fbabc7bfab73dfae80af309bdeac
  Stored in directory: /root/.cache/pip/wheels/29/3c/fd/7ce5c3f0666dab31a50123635e6fb5e19ceb42ce38d4e58f45
Successfully built sacremoses
Installing collected packages: sacremoses, sentencepiece, tokenizers, transformers
Successfully installed sacremoses-0.0.43 sentencepiece-0.1.94 tokenizers-0.9.2 transformers-3.4.0
In [ ]:
from transformers import BertTokenizer, TFBertLMHeadModel
import os
import json
import time
import logging
import pprint
import numpy as np
import tensorflow as tf
import tensorflow_addons as tfa
import random

print("TensorFlow Version", tf.__version__)
print('GPU Enabled:', tf.test.is_gpu_available())
TensorFlow Version 2.3.0
WARNING:tensorflow:From <ipython-input-3-05a3004c9c96>:13: is_gpu_available (from tensorflow.python.framework.test_util) is deprecated and will be removed in a future version.
Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.
GPU Enabled: True
In [ ]:
params = {
  'pretrain_path': 'bert-base-chinese',
  'train_path': '../data/train.txt',
  'test_path': '../data/test.txt',
  'batch_size': 16,
  'max_len': 130,
  'buffer_size': 31728,
  'init_lr': 1e-5,
  'max_lr': 3e-5,
  'n_epochs': 4 * 10,
}

tokenizer = BertTokenizer.from_pretrained(params['pretrain_path'],
                                          lowercase = True,
                                          add_special_tokens = True)
In [ ]:
# stream data from text files
def data_generator(f_path, params):
  with open(f_path) as f:
    print('Reading', f_path)
    for line in f:
      line = json.loads(line.rstrip())
      text, label = line['content'], line['label']
      text = list(text)
      text = ['[CLS]'] + text + ['[SEP]']
      text = tokenizer.convert_tokens_to_ids(text)
      text = [idx for idx in text if idx != 100]
      if len(text) > params['max_len']:
        _max_len = params['max_len'] // 2
        text = text[:_max_len] + text[-_max_len:]
      seg = [0] * len(text)
      
      noises = []
      labels_mask = []
      for idx in text:
        if (random.random() <= 0.15) and (idx != 101) and (idx != 102):
          dice = random.random()
          if dice <= 0.8:
            noises.append(103)
          elif dice <= 0.9:
            noises.append(idx)
          else:
            noises.append(random.randint(0, 21127))
          labels_mask.append(1)
        else:
          noises.append(idx)
          labels_mask.append(0)

      yield (noises, seg), (text, labels_mask)


def dataset(is_training, params):
  _shapes = (([None], [None]), ([None], [None]))
  _types = ((tf.int32, tf.int32), (tf.int32, tf.int32))
  _pads = ((0, 0), (0, 0))
  
  if is_training:
    ds = tf.data.Dataset.from_generator(
      lambda: data_generator(params['train_path'], params),
      output_shapes = _shapes,
      output_types = _types,)
    ds = ds.shuffle(params['buffer_size'])
    ds = ds.padded_batch(params['batch_size'], _shapes, _pads)
    ds = ds.prefetch(tf.data.experimental.AUTOTUNE)
  else:
    ds = tf.data.Dataset.from_generator(
      lambda: data_generator(params['test_path'], params),
      output_shapes = _shapes,
      output_types = _types,)
    ds = ds.padded_batch(params['batch_size'], _shapes, _pads)
    ds = ds.prefetch(tf.data.experimental.AUTOTUNE)
  
  return ds
In [ ]:
# input stream ids check
(text, seg), (labels, labels_mask) = next(data_generator(params['train_path'], params))
print(text)
print(seg)
print(labels)
print(labels_mask)
Reading ../data/train.txt
[101, 112, 872, 4761, 6887, 1914, 103, 1914, 7353, 6818, 3300, 784, 720, 1408, 8043, 1506, 1506, 3300, 4788, 103, 5456, 4696, 4638, 741, 677, 1091, 4638, 872, 1420, 1521, 872, 2157, 6929, 1779, 4788, 2357, 3221, 686, 4518, 677, 3297, 1920, 4638, 4788, 2357, 8024, 1506, 1506, 103, 7745, 872, 4638, 1568, 2124, 103, 6432, 2225, 1217, 2861, 10671, 4105, 2357, 3221, 7391, 4518, 677, 3297, 103, 4638, 4105, 2357, 1568, 1506, 1506, 1506, 112, 112, 4268, 4268, 8024, 1961, 4638, 103, 1355, 5456, 8013, 2769, 812, 1920, 2812, 7370, 3488, 2094, 6963, 103, 5436, 677, 3341, 2769, 4692, 1168, 3312, 1928, 5361, 7027, 103, 14509, 1355, 671, 2137, 103, 103, 166, 103, 1184, 1931, 1168, 4638, 103, 872, 6432, 3221, 679, 3221, 8043, 138, 4495, 4567, 140, 102]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[101, 112, 872, 4761, 6887, 1914, 840, 1914, 7353, 6818, 3300, 784, 720, 1408, 8043, 1506, 1506, 3300, 4788, 2357, 5456, 4696, 4638, 741, 677, 1091, 4638, 872, 1420, 1521, 872, 2157, 6929, 1779, 4788, 2357, 3221, 686, 4518, 677, 3297, 1920, 4638, 4788, 2357, 8024, 1506, 1506, 8024, 7745, 872, 4638, 1568, 2124, 3221, 6432, 2225, 1217, 2861, 4478, 4105, 2357, 3221, 686, 4518, 677, 3297, 1920, 4638, 4105, 2357, 1568, 1506, 1506, 1506, 112, 112, 4268, 4268, 8024, 1961, 4638, 1928, 1355, 5456, 8013, 2769, 812, 1920, 2812, 7370, 3488, 2094, 6963, 6206, 5436, 677, 3341, 2769, 4692, 1168, 3312, 1928, 5361, 7027, 3300, 1928, 1355, 671, 2137, 3221, 166, 166, 809, 1184, 1931, 1168, 4638, 8024, 872, 6432, 3221, 679, 3221, 8043, 138, 4495, 4567, 140, 102]
[0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]
In [ ]:
model = TFBertLMHeadModel.from_pretrained(params['pretrain_path'],
                                          trainable = True,
                                          return_dict = True)
model.load_weights('../model/bert_further_pretrain.h5')
In [ ]:
step_size = 4 * params['buffer_size'] // params['batch_size']
decay_lr = tfa.optimizers.Triangular2CyclicalLearningRate(
  initial_learning_rate = params['init_lr'],
  maximal_learning_rate = params['max_lr'],
  step_size = step_size,)
optim = tf.optimizers.Adam(params['init_lr'])
global_step = 0

best_acc = 0.69
t0 = time.time()
logger = logging.getLogger('tensorflow')
logger.setLevel(logging.INFO)

# Baseline Accuracy
m = tf.keras.metrics.Accuracy()
for ((text, seg), (labels, labels_mask)) in dataset(is_training=False, params=params):
  logits = model([text, tf.sign(text), seg], training=False).logits
  m.update_state(
    y_true = labels,
    y_pred = tf.argmax(logits, -1),
    sample_weight = labels_mask,)
acc = m.result().numpy()
logger.info("Baseline Accuracy: {:.3f}".format(acc))

for _ in range(params['n_epochs']):
  # Training
  for ((text, seg), (labels, labels_mask)) in dataset(is_training=True, params=params):
    with tf.GradientTape() as tape:
      logits = model([text, tf.sign(text), seg], training=True).logits
      loss = tf.compat.v1.losses.softmax_cross_entropy(
        onehot_labels = tf.one_hot(labels, 21128),
        logits = logits,
        weights = tf.cast(labels_mask, tf.float32),
        label_smoothing = .2,)
    
    trainable_vars = [v for v in model.trainable_variables if 'pooler' not in v.name]
    optim.lr.assign(decay_lr(global_step))
    grads = tape.gradient(loss, trainable_vars)
    grads, _ = tf.clip_by_global_norm(grads, 5.)
    optim.apply_gradients(zip(grads, trainable_vars))
    
    if global_step % 100 == 0:
      logger.info("Step {} | Loss: {:.4f} | Spent: {:.1f} secs | LR: {:.6f}".format(
        global_step, loss.numpy().item(), time.time()-t0, optim.lr.numpy().item()))
      t0 = time.time()
    global_step += 1
  
  # Evaluation
  m = tf.keras.metrics.Accuracy()

  for ((text, seg), (labels, labels_mask)) in dataset(is_training=False, params=params):
    logits = model([text, tf.sign(text), seg], training=False).logits
    m.update_state(
      y_true = labels,
      y_pred = tf.argmax(logits, -1),
      sample_weight = labels_mask,)

  acc = m.result().numpy()
  logger.info("Evaluation: Testing Accuracy: {:.3f}".format(acc))

  if acc > best_acc:
    best_acc = acc
    model.save_weights('../model/bert_further_pretrain.h5', save_format='h5')

  logger.info("Best Accuracy: {:.3f}".format(best_acc))
Reading ../data/test.txt
INFO:tensorflow:Baseline Accuracy: 0.692
Reading ../data/train.txt
INFO:tensorflow:Step 0 | Loss: 5.6769 | Spent: 52.0 secs | LR: 0.000010
INFO:tensorflow:Step 100 | Loss: 3.8050 | Spent: 53.5 secs | LR: 0.000010
INFO:tensorflow:Step 200 | Loss: 3.5434 | Spent: 54.6 secs | LR: 0.000011
INFO:tensorflow:Step 300 | Loss: 3.6904 | Spent: 55.3 secs | LR: 0.000011
INFO:tensorflow:Step 400 | Loss: 3.6488 | Spent: 55.3 secs | LR: 0.000011
INFO:tensorflow:Step 500 | Loss: 3.8685 | Spent: 55.4 secs | LR: 0.000011
INFO:tensorflow:Step 600 | Loss: 3.5431 | Spent: 54.7 secs | LR: 0.000012
INFO:tensorflow:Step 700 | Loss: 3.7322 | Spent: 55.2 secs | LR: 0.000012
INFO:tensorflow:Step 800 | Loss: 3.9790 | Spent: 54.4 secs | LR: 0.000012
INFO:tensorflow:Step 900 | Loss: 3.4762 | Spent: 55.2 secs | LR: 0.000012
INFO:tensorflow:Step 1000 | Loss: 3.5651 | Spent: 55.4 secs | LR: 0.000013
INFO:tensorflow:Step 1100 | Loss: 3.6787 | Spent: 55.1 secs | LR: 0.000013
INFO:tensorflow:Step 1200 | Loss: 3.5938 | Spent: 54.7 secs | LR: 0.000013
INFO:tensorflow:Step 1300 | Loss: 3.7927 | Spent: 55.2 secs | LR: 0.000013
INFO:tensorflow:Step 1400 | Loss: 3.5362 | Spent: 55.6 secs | LR: 0.000014
INFO:tensorflow:Step 1500 | Loss: 3.5193 | Spent: 55.2 secs | LR: 0.000014
INFO:tensorflow:Step 1600 | Loss: 3.2629 | Spent: 54.5 secs | LR: 0.000014
INFO:tensorflow:Step 1700 | Loss: 3.7281 | Spent: 55.1 secs | LR: 0.000014
INFO:tensorflow:Step 1800 | Loss: 3.3162 | Spent: 55.4 secs | LR: 0.000015
INFO:tensorflow:Step 1900 | Loss: 3.2278 | Spent: 54.9 secs | LR: 0.000015
Reading ../data/test.txt
INFO:tensorflow:Evaluation: Testing Accuracy: 0.689
INFO:tensorflow:Best Accuracy: 0.690
Reading ../data/train.txt
INFO:tensorflow:Step 2000 | Loss: 3.6417 | Spent: 107.2 secs | LR: 0.000015
INFO:tensorflow:Step 2100 | Loss: 3.7316 | Spent: 54.8 secs | LR: 0.000015
INFO:tensorflow:Step 2200 | Loss: 3.6453 | Spent: 55.0 secs | LR: 0.000016
INFO:tensorflow:Step 2300 | Loss: 3.6747 | Spent: 55.2 secs | LR: 0.000016
INFO:tensorflow:Step 2400 | Loss: 3.7780 | Spent: 55.1 secs | LR: 0.000016
INFO:tensorflow:Step 2500 | Loss: 3.6128 | Spent: 55.3 secs | LR: 0.000016
INFO:tensorflow:Step 2600 | Loss: 3.5500 | Spent: 55.1 secs | LR: 0.000017
INFO:tensorflow:Step 2700 | Loss: 3.4986 | Spent: 54.5 secs | LR: 0.000017
INFO:tensorflow:Step 2800 | Loss: 3.7140 | Spent: 54.4 secs | LR: 0.000017
INFO:tensorflow:Step 2900 | Loss: 3.4243 | Spent: 53.6 secs | LR: 0.000017
INFO:tensorflow:Step 3000 | Loss: 3.4860 | Spent: 54.0 secs | LR: 0.000018
INFO:tensorflow:Step 3100 | Loss: 3.4368 | Spent: 55.8 secs | LR: 0.000018
INFO:tensorflow:Step 3200 | Loss: 3.5442 | Spent: 55.5 secs | LR: 0.000018
INFO:tensorflow:Step 3300 | Loss: 3.8786 | Spent: 53.6 secs | LR: 0.000018
INFO:tensorflow:Step 3400 | Loss: 3.4672 | Spent: 54.9 secs | LR: 0.000019
INFO:tensorflow:Step 3500 | Loss: 3.8257 | Spent: 55.0 secs | LR: 0.000019
INFO:tensorflow:Step 3600 | Loss: 3.6563 | Spent: 54.8 secs | LR: 0.000019
INFO:tensorflow:Step 3700 | Loss: 3.8164 | Spent: 55.3 secs | LR: 0.000019
INFO:tensorflow:Step 3800 | Loss: 3.5402 | Spent: 54.8 secs | LR: 0.000020
INFO:tensorflow:Step 3900 | Loss: 3.6487 | Spent: 55.2 secs | LR: 0.000020
Reading ../data/test.txt
INFO:tensorflow:Evaluation: Testing Accuracy: 0.683
INFO:tensorflow:Best Accuracy: 0.690
Reading ../data/train.txt
INFO:tensorflow:Step 4000 | Loss: 3.5141 | Spent: 106.9 secs | LR: 0.000020
INFO:tensorflow:Step 4100 | Loss: 3.5216 | Spent: 55.1 secs | LR: 0.000020
INFO:tensorflow:Step 4200 | Loss: 3.4435 | Spent: 55.3 secs | LR: 0.000021
INFO:tensorflow:Step 4300 | Loss: 3.3913 | Spent: 54.9 secs | LR: 0.000021
INFO:tensorflow:Step 4400 | Loss: 3.4233 | Spent: 55.8 secs | LR: 0.000021
INFO:tensorflow:Step 4500 | Loss: 3.4909 | Spent: 54.4 secs | LR: 0.000021
INFO:tensorflow:Step 4600 | Loss: 3.3695 | Spent: 55.0 secs | LR: 0.000022
INFO:tensorflow:Step 4700 | Loss: 3.5487 | Spent: 54.1 secs | LR: 0.000022
INFO:tensorflow:Step 4800 | Loss: 4.2573 | Spent: 55.1 secs | LR: 0.000022
INFO:tensorflow:Step 4900 | Loss: 3.4999 | Spent: 56.2 secs | LR: 0.000022
INFO:tensorflow:Step 5000 | Loss: 3.2891 | Spent: 55.5 secs | LR: 0.000023
INFO:tensorflow:Step 5100 | Loss: 3.2396 | Spent: 55.1 secs | LR: 0.000023
INFO:tensorflow:Step 5200 | Loss: 3.9447 | Spent: 55.4 secs | LR: 0.000023
INFO:tensorflow:Step 5300 | Loss: 3.2533 | Spent: 54.7 secs | LR: 0.000023
INFO:tensorflow:Step 5400 | Loss: 3.2201 | Spent: 55.0 secs | LR: 0.000024
INFO:tensorflow:Step 5500 | Loss: 3.4030 | Spent: 55.0 secs | LR: 0.000024
INFO:tensorflow:Step 5600 | Loss: 3.7845 | Spent: 54.9 secs | LR: 0.000024
INFO:tensorflow:Step 5700 | Loss: 3.5878 | Spent: 54.9 secs | LR: 0.000024
INFO:tensorflow:Step 5800 | Loss: 4.1874 | Spent: 54.7 secs | LR: 0.000025
INFO:tensorflow:Step 5900 | Loss: 3.2908 | Spent: 56.2 secs | LR: 0.000025
Reading ../data/test.txt
INFO:tensorflow:Evaluation: Testing Accuracy: 0.678
INFO:tensorflow:Best Accuracy: 0.690
Reading ../data/train.txt
INFO:tensorflow:Step 6000 | Loss: 4.0138 | Spent: 107.9 secs | LR: 0.000025
INFO:tensorflow:Step 6100 | Loss: 3.9018 | Spent: 55.2 secs | LR: 0.000025
INFO:tensorflow:Step 6200 | Loss: 3.5683 | Spent: 55.0 secs | LR: 0.000026
INFO:tensorflow:Step 6300 | Loss: 3.7102 | Spent: 55.0 secs | LR: 0.000026
INFO:tensorflow:Step 6400 | Loss: 3.5180 | Spent: 55.2 secs | LR: 0.000026
INFO:tensorflow:Step 6500 | Loss: 3.4196 | Spent: 55.9 secs | LR: 0.000026
INFO:tensorflow:Step 6600 | Loss: 3.4177 | Spent: 55.2 secs | LR: 0.000027
INFO:tensorflow:Step 6700 | Loss: 3.8939 | Spent: 55.1 secs | LR: 0.000027
INFO:tensorflow:Step 6800 | Loss: 3.7608 | Spent: 55.2 secs | LR: 0.000027
INFO:tensorflow:Step 6900 | Loss: 3.8222 | Spent: 55.0 secs | LR: 0.000027
INFO:tensorflow:Step 7000 | Loss: 3.7076 | Spent: 55.2 secs | LR: 0.000028
INFO:tensorflow:Step 7100 | Loss: 3.3064 | Spent: 55.5 secs | LR: 0.000028
INFO:tensorflow:Step 7200 | Loss: 3.4243 | Spent: 54.6 secs | LR: 0.000028
INFO:tensorflow:Step 7300 | Loss: 3.4759 | Spent: 55.4 secs | LR: 0.000028
INFO:tensorflow:Step 7400 | Loss: 3.4724 | Spent: 54.9 secs | LR: 0.000029
INFO:tensorflow:Step 7500 | Loss: 3.8504 | Spent: 54.2 secs | LR: 0.000029
INFO:tensorflow:Step 7600 | Loss: 3.6520 | Spent: 55.6 secs | LR: 0.000029
INFO:tensorflow:Step 7700 | Loss: 3.6649 | Spent: 54.9 secs | LR: 0.000029
INFO:tensorflow:Step 7800 | Loss: 3.5080 | Spent: 55.7 secs | LR: 0.000030
INFO:tensorflow:Step 7900 | Loss: 3.4644 | Spent: 55.1 secs | LR: 0.000030
Reading ../data/test.txt
INFO:tensorflow:Evaluation: Testing Accuracy: 0.678
INFO:tensorflow:Best Accuracy: 0.690
Reading ../data/train.txt
INFO:tensorflow:Step 8000 | Loss: 3.1942 | Spent: 108.9 secs | LR: 0.000030
INFO:tensorflow:Step 8100 | Loss: 3.5159 | Spent: 55.8 secs | LR: 0.000030
INFO:tensorflow:Step 8200 | Loss: 3.8925 | Spent: 55.7 secs | LR: 0.000029
INFO:tensorflow:Step 8300 | Loss: 3.3938 | Spent: 54.2 secs | LR: 0.000029
INFO:tensorflow:Step 8400 | Loss: 3.8378 | Spent: 54.9 secs | LR: 0.000029
INFO:tensorflow:Step 8500 | Loss: 3.6447 | Spent: 55.2 secs | LR: 0.000029
INFO:tensorflow:Step 8600 | Loss: 3.7910 | Spent: 54.4 secs | LR: 0.000028
INFO:tensorflow:Step 8700 | Loss: 3.3949 | Spent: 55.1 secs | LR: 0.000028
INFO:tensorflow:Step 8800 | Loss: 3.5859 | Spent: 55.7 secs | LR: 0.000028
INFO:tensorflow:Step 8900 | Loss: 3.3711 | Spent: 55.1 secs | LR: 0.000028
INFO:tensorflow:Step 9000 | Loss: 3.0112 | Spent: 54.7 secs | LR: 0.000027
INFO:tensorflow:Step 9100 | Loss: 3.2547 | Spent: 55.1 secs | LR: 0.000027
INFO:tensorflow:Step 9200 | Loss: 3.7495 | Spent: 55.6 secs | LR: 0.000027
INFO:tensorflow:Step 9300 | Loss: 3.6319 | Spent: 54.3 secs | LR: 0.000027
INFO:tensorflow:Step 9400 | Loss: 3.3819 | Spent: 55.2 secs | LR: 0.000026
INFO:tensorflow:Step 9500 | Loss: 3.6728 | Spent: 55.4 secs | LR: 0.000026
INFO:tensorflow:Step 9600 | Loss: 3.7229 | Spent: 55.5 secs | LR: 0.000026
INFO:tensorflow:Step 9700 | Loss: 3.3451 | Spent: 55.8 secs | LR: 0.000026
INFO:tensorflow:Step 9800 | Loss: 3.3644 | Spent: 55.0 secs | LR: 0.000025
INFO:tensorflow:Step 9900 | Loss: 3.6134 | Spent: 54.8 secs | LR: 0.000025
Reading ../data/test.txt
INFO:tensorflow:Evaluation: Testing Accuracy: 0.675
INFO:tensorflow:Best Accuracy: 0.690
Reading ../data/train.txt
INFO:tensorflow:Step 10000 | Loss: 3.2713 | Spent: 109.4 secs | LR: 0.000025
INFO:tensorflow:Step 10100 | Loss: 3.1902 | Spent: 54.8 secs | LR: 0.000025
INFO:tensorflow:Step 10200 | Loss: 3.7453 | Spent: 55.8 secs | LR: 0.000024
INFO:tensorflow:Step 10300 | Loss: 3.7865 | Spent: 55.7 secs | LR: 0.000024
INFO:tensorflow:Step 10400 | Loss: 3.6286 | Spent: 54.6 secs | LR: 0.000024
INFO:tensorflow:Step 10500 | Loss: 3.8200 | Spent: 54.6 secs | LR: 0.000024
INFO:tensorflow:Step 10600 | Loss: 3.4029 | Spent: 56.2 secs | LR: 0.000023
INFO:tensorflow:Step 10700 | Loss: 3.5689 | Spent: 55.6 secs | LR: 0.000023
INFO:tensorflow:Step 10800 | Loss: 3.4493 | Spent: 55.3 secs | LR: 0.000023
INFO:tensorflow:Step 10900 | Loss: 3.7443 | Spent: 55.2 secs | LR: 0.000023
INFO:tensorflow:Step 11000 | Loss: 3.3699 | Spent: 55.4 secs | LR: 0.000022
INFO:tensorflow:Step 11100 | Loss: 3.5517 | Spent: 55.5 secs | LR: 0.000022
INFO:tensorflow:Step 11200 | Loss: 3.3877 | Spent: 54.1 secs | LR: 0.000022
INFO:tensorflow:Step 11300 | Loss: 3.5568 | Spent: 55.3 secs | LR: 0.000022
INFO:tensorflow:Step 11400 | Loss: 3.6565 | Spent: 56.0 secs | LR: 0.000021
INFO:tensorflow:Step 11500 | Loss: 3.6198 | Spent: 54.1 secs | LR: 0.000021
INFO:tensorflow:Step 11600 | Loss: 4.1227 | Spent: 54.4 secs | LR: 0.000021
INFO:tensorflow:Step 11700 | Loss: 3.7193 | Spent: 55.2 secs | LR: 0.000020
INFO:tensorflow:Step 11800 | Loss: 3.7078 | Spent: 55.4 secs | LR: 0.000020
Reading ../data/test.txt
INFO:tensorflow:Evaluation: Testing Accuracy: 0.678
INFO:tensorflow:Best Accuracy: 0.690
Reading ../data/train.txt
INFO:tensorflow:Step 11900 | Loss: 3.2168 | Spent: 108.3 secs | LR: 0.000020
INFO:tensorflow:Step 12000 | Loss: 3.3429 | Spent: 55.2 secs | LR: 0.000020
INFO:tensorflow:Step 12100 | Loss: 3.6657 | Spent: 56.2 secs | LR: 0.000019
INFO:tensorflow:Step 12200 | Loss: 3.5878 | Spent: 54.7 secs | LR: 0.000019
INFO:tensorflow:Step 12300 | Loss: 3.1195 | Spent: 54.3 secs | LR: 0.000019
INFO:tensorflow:Step 12400 | Loss: 3.3120 | Spent: 55.7 secs | LR: 0.000019
INFO:tensorflow:Step 12500 | Loss: 3.3013 | Spent: 55.6 secs | LR: 0.000018
INFO:tensorflow:Step 12600 | Loss: 3.1425 | Spent: 55.5 secs | LR: 0.000018
INFO:tensorflow:Step 12700 | Loss: 3.6787 | Spent: 55.3 secs | LR: 0.000018
INFO:tensorflow:Step 12800 | Loss: 3.2395 | Spent: 55.1 secs | LR: 0.000018
INFO:tensorflow:Step 12900 | Loss: 3.3843 | Spent: 54.7 secs | LR: 0.000017
INFO:tensorflow:Step 13000 | Loss: 3.3074 | Spent: 55.1 secs | LR: 0.000017
INFO:tensorflow:Step 13100 | Loss: 3.0785 | Spent: 55.8 secs | LR: 0.000017
INFO:tensorflow:Step 13200 | Loss: 3.9031 | Spent: 54.4 secs | LR: 0.000017
INFO:tensorflow:Step 13300 | Loss: 3.4434 | Spent: 55.1 secs | LR: 0.000016
INFO:tensorflow:Step 13400 | Loss: 3.5457 | Spent: 53.9 secs | LR: 0.000016
INFO:tensorflow:Step 13500 | Loss: 3.5615 | Spent: 56.1 secs | LR: 0.000016
INFO:tensorflow:Step 13600 | Loss: 3.5185 | Spent: 56.2 secs | LR: 0.000016
INFO:tensorflow:Step 13700 | Loss: 3.3583 | Spent: 55.0 secs | LR: 0.000015
INFO:tensorflow:Step 13800 | Loss: 3.4007 | Spent: 55.3 secs | LR: 0.000015
Reading ../data/test.txt
INFO:tensorflow:Evaluation: Testing Accuracy: 0.687
INFO:tensorflow:Best Accuracy: 0.690
Reading ../data/train.txt
INFO:tensorflow:Step 13900 | Loss: 3.7554 | Spent: 108.1 secs | LR: 0.000015
INFO:tensorflow:Step 14000 | Loss: 3.3485 | Spent: 55.4 secs | LR: 0.000015
INFO:tensorflow:Step 14100 | Loss: 3.4840 | Spent: 55.8 secs | LR: 0.000014
INFO:tensorflow:Step 14200 | Loss: 3.5770 | Spent: 54.8 secs | LR: 0.000014
INFO:tensorflow:Step 14300 | Loss: 3.2948 | Spent: 55.0 secs | LR: 0.000014
INFO:tensorflow:Step 14400 | Loss: 3.6543 | Spent: 55.7 secs | LR: 0.000014
INFO:tensorflow:Step 14500 | Loss: 3.5392 | Spent: 55.7 secs | LR: 0.000013
INFO:tensorflow:Step 14600 | Loss: 3.4148 | Spent: 55.8 secs | LR: 0.000013
INFO:tensorflow:Step 14700 | Loss: 3.6276 | Spent: 54.9 secs | LR: 0.000013
INFO:tensorflow:Step 14800 | Loss: 3.6287 | Spent: 55.2 secs | LR: 0.000013
INFO:tensorflow:Step 14900 | Loss: 3.3177 | Spent: 55.2 secs | LR: 0.000012
INFO:tensorflow:Step 15000 | Loss: 3.9401 | Spent: 55.9 secs | LR: 0.000012
INFO:tensorflow:Step 15100 | Loss: 3.4821 | Spent: 55.1 secs | LR: 0.000012
INFO:tensorflow:Step 15200 | Loss: 3.4535 | Spent: 55.1 secs | LR: 0.000012
INFO:tensorflow:Step 15300 | Loss: 3.3881 | Spent: 54.6 secs | LR: 0.000011
INFO:tensorflow:Step 15400 | Loss: 3.5375 | Spent: 55.1 secs | LR: 0.000011
INFO:tensorflow:Step 15500 | Loss: 3.2492 | Spent: 54.8 secs | LR: 0.000011
INFO:tensorflow:Step 15600 | Loss: 3.5275 | Spent: 55.5 secs | LR: 0.000011
INFO:tensorflow:Step 15700 | Loss: 3.4098 | Spent: 55.8 secs | LR: 0.000010
INFO:tensorflow:Step 15800 | Loss: 3.7177 | Spent: 54.2 secs | LR: 0.000010
Reading ../data/test.txt
INFO:tensorflow:Evaluation: Testing Accuracy: 0.684
INFO:tensorflow:Best Accuracy: 0.690
Reading ../data/train.txt
INFO:tensorflow:Step 15900 | Loss: 3.6681 | Spent: 107.3 secs | LR: 0.000010
INFO:tensorflow:Step 16000 | Loss: 3.5731 | Spent: 56.0 secs | LR: 0.000010
INFO:tensorflow:Step 16100 | Loss: 4.0069 | Spent: 54.8 secs | LR: 0.000010
INFO:tensorflow:Step 16200 | Loss: 3.2370 | Spent: 56.0 secs | LR: 0.000010
INFO:tensorflow:Step 16300 | Loss: 3.3906 | Spent: 54.9 secs | LR: 0.000011
INFO:tensorflow:Step 16400 | Loss: 3.5402 | Spent: 55.2 secs | LR: 0.000011
INFO:tensorflow:Step 16500 | Loss: 3.4992 | Spent: 54.9 secs | LR: 0.000011
INFO:tensorflow:Step 16600 | Loss: 3.5970 | Spent: 56.6 secs | LR: 0.000011
INFO:tensorflow:Step 16700 | Loss: 3.5464 | Spent: 55.6 secs | LR: 0.000011
INFO:tensorflow:Step 16800 | Loss: 3.4909 | Spent: 55.3 secs | LR: 0.000011
INFO:tensorflow:Step 16900 | Loss: 3.3388 | Spent: 55.2 secs | LR: 0.000011
INFO:tensorflow:Step 17000 | Loss: 3.6430 | Spent: 55.6 secs | LR: 0.000011
INFO:tensorflow:Step 17100 | Loss: 3.3316 | Spent: 56.3 secs | LR: 0.000012
INFO:tensorflow:Step 17200 | Loss: 3.5725 | Spent: 54.9 secs | LR: 0.000012
INFO:tensorflow:Step 17300 | Loss: 3.3967 | Spent: 56.1 secs | LR: 0.000012
INFO:tensorflow:Step 17400 | Loss: 3.3284 | Spent: 54.9 secs | LR: 0.000012
INFO:tensorflow:Step 17500 | Loss: 3.3785 | Spent: 55.6 secs | LR: 0.000012
INFO:tensorflow:Step 17600 | Loss: 3.3781 | Spent: 55.9 secs | LR: 0.000012
INFO:tensorflow:Step 17700 | Loss: 3.3265 | Spent: 55.5 secs | LR: 0.000012
INFO:tensorflow:Step 17800 | Loss: 3.2192 | Spent: 55.2 secs | LR: 0.000012
Reading ../data/test.txt
INFO:tensorflow:Evaluation: Testing Accuracy: 0.683
INFO:tensorflow:Best Accuracy: 0.690
Reading ../data/train.txt
INFO:tensorflow:Step 17900 | Loss: 3.7973 | Spent: 108.7 secs | LR: 0.000013
INFO:tensorflow:Step 18000 | Loss: 3.7477 | Spent: 55.7 secs | LR: 0.000013
INFO:tensorflow:Step 18100 | Loss: 3.7151 | Spent: 55.8 secs | LR: 0.000013
INFO:tensorflow:Step 18200 | Loss: 3.1842 | Spent: 55.8 secs | LR: 0.000013
INFO:tensorflow:Step 18300 | Loss: 3.5950 | Spent: 55.4 secs | LR: 0.000013
INFO:tensorflow:Step 18400 | Loss: 3.5127 | Spent: 55.1 secs | LR: 0.000013
INFO:tensorflow:Step 18500 | Loss: 3.5419 | Spent: 55.8 secs | LR: 0.000013
INFO:tensorflow:Step 18600 | Loss: 3.1175 | Spent: 55.9 secs | LR: 0.000013
INFO:tensorflow:Step 18700 | Loss: 3.2977 | Spent: 55.9 secs | LR: 0.000014
INFO:tensorflow:Step 18800 | Loss: 4.0662 | Spent: 56.7 secs | LR: 0.000014
INFO:tensorflow:Step 18900 | Loss: 3.3256 | Spent: 55.6 secs | LR: 0.000014
INFO:tensorflow:Step 19000 | Loss: 3.3141 | Spent: 56.1 secs | LR: 0.000014
INFO:tensorflow:Step 19100 | Loss: 3.4430 | Spent: 54.9 secs | LR: 0.000014
INFO:tensorflow:Step 19200 | Loss: 3.4444 | Spent: 55.8 secs | LR: 0.000014
INFO:tensorflow:Step 19300 | Loss: 3.1612 | Spent: 56.0 secs | LR: 0.000014
INFO:tensorflow:Step 19400 | Loss: 3.5657 | Spent: 55.3 secs | LR: 0.000014
INFO:tensorflow:Step 19500 | Loss: 3.6605 | Spent: 56.0 secs | LR: 0.000015
INFO:tensorflow:Step 19600 | Loss: 3.5686 | Spent: 55.6 secs | LR: 0.000015
INFO:tensorflow:Step 19700 | Loss: 3.5282 | Spent: 56.1 secs | LR: 0.000015
INFO:tensorflow:Step 19800 | Loss: 3.3508 | Spent: 55.5 secs | LR: 0.000015
Reading ../data/test.txt
INFO:tensorflow:Evaluation: Testing Accuracy: 0.687
INFO:tensorflow:Best Accuracy: 0.690
Reading ../data/train.txt
INFO:tensorflow:Step 19900 | Loss: 3.2009 | Spent: 108.6 secs | LR: 0.000015
INFO:tensorflow:Step 20000 | Loss: 3.3953 | Spent: 55.6 secs | LR: 0.000015
INFO:tensorflow:Step 20100 | Loss: 3.6633 | Spent: 55.7 secs | LR: 0.000015
INFO:tensorflow:Step 20200 | Loss: 3.3986 | Spent: 55.7 secs | LR: 0.000015
INFO:tensorflow:Step 20300 | Loss: 3.5184 | Spent: 56.9 secs | LR: 0.000016
INFO:tensorflow:Step 20400 | Loss: 3.1824 | Spent: 55.9 secs | LR: 0.000016
INFO:tensorflow:Step 20500 | Loss: 3.5896 | Spent: 55.2 secs | LR: 0.000016
INFO:tensorflow:Step 20600 | Loss: 3.3970 | Spent: 55.5 secs | LR: 0.000016
INFO:tensorflow:Step 20700 | Loss: 3.2737 | Spent: 55.4 secs | LR: 0.000016
INFO:tensorflow:Step 20800 | Loss: 3.8020 | Spent: 55.6 secs | LR: 0.000016
INFO:tensorflow:Step 20900 | Loss: 3.5589 | Spent: 55.5 secs | LR: 0.000016
INFO:tensorflow:Step 21000 | Loss: 3.6861 | Spent: 55.2 secs | LR: 0.000016
INFO:tensorflow:Step 21100 | Loss: 3.4643 | Spent: 54.8 secs | LR: 0.000017
INFO:tensorflow:Step 21200 | Loss: 3.1286 | Spent: 55.1 secs | LR: 0.000017
INFO:tensorflow:Step 21300 | Loss: 3.5893 | Spent: 55.8 secs | LR: 0.000017
INFO:tensorflow:Step 21400 | Loss: 3.7732 | Spent: 55.2 secs | LR: 0.000017
INFO:tensorflow:Step 21500 | Loss: 3.5662 | Spent: 56.3 secs | LR: 0.000017
INFO:tensorflow:Step 21600 | Loss: 3.7830 | Spent: 56.4 secs | LR: 0.000017
INFO:tensorflow:Step 21700 | Loss: 3.3065 | Spent: 56.2 secs | LR: 0.000017
INFO:tensorflow:Step 21800 | Loss: 3.2869 | Spent: 54.7 secs | LR: 0.000017
Reading ../data/test.txt
INFO:tensorflow:Evaluation: Testing Accuracy: 0.688
INFO:tensorflow:Best Accuracy: 0.690
Reading ../data/train.txt
INFO:tensorflow:Step 21900 | Loss: 3.4965 | Spent: 108.6 secs | LR: 0.000018
INFO:tensorflow:Step 22000 | Loss: 3.5695 | Spent: 55.2 secs | LR: 0.000018
INFO:tensorflow:Step 22100 | Loss: 3.3541 | Spent: 55.7 secs | LR: 0.000018
INFO:tensorflow:Step 22200 | Loss: 3.5576 | Spent: 55.0 secs | LR: 0.000018
INFO:tensorflow:Step 22300 | Loss: 3.5603 | Spent: 55.7 secs | LR: 0.000018
INFO:tensorflow:Step 22400 | Loss: 3.5441 | Spent: 56.0 secs | LR: 0.000018
INFO:tensorflow:Step 22500 | Loss: 3.3596 | Spent: 55.7 secs | LR: 0.000018
INFO:tensorflow:Step 22600 | Loss: 3.4712 | Spent: 55.7 secs | LR: 0.000018
INFO:tensorflow:Step 22700 | Loss: 3.4746 | Spent: 55.9 secs | LR: 0.000019
INFO:tensorflow:Step 22800 | Loss: 3.3712 | Spent: 56.5 secs | LR: 0.000019
INFO:tensorflow:Step 22900 | Loss: 3.8276 | Spent: 55.9 secs | LR: 0.000019
INFO:tensorflow:Step 23000 | Loss: 3.3725 | Spent: 55.2 secs | LR: 0.000019
INFO:tensorflow:Step 23100 | Loss: 3.3543 | Spent: 56.2 secs | LR: 0.000019
INFO:tensorflow:Step 23200 | Loss: 3.3482 | Spent: 55.9 secs | LR: 0.000019
INFO:tensorflow:Step 23300 | Loss: 3.5583 | Spent: 55.7 secs | LR: 0.000019
INFO:tensorflow:Step 23400 | Loss: 3.4913 | Spent: 56.5 secs | LR: 0.000020
INFO:tensorflow:Step 23500 | Loss: 3.5286 | Spent: 55.9 secs | LR: 0.000020
INFO:tensorflow:Step 23600 | Loss: 3.4798 | Spent: 56.2 secs | LR: 0.000020
INFO:tensorflow:Step 23700 | Loss: 3.0739 | Spent: 55.0 secs | LR: 0.000020
Reading ../data/test.txt
INFO:tensorflow:Evaluation: Testing Accuracy: 0.681
INFO:tensorflow:Best Accuracy: 0.690
Reading ../data/train.txt
INFO:tensorflow:Step 23800 | Loss: 3.3896 | Spent: 109.0 secs | LR: 0.000020
INFO:tensorflow:Step 23900 | Loss: 3.7211 | Spent: 56.1 secs | LR: 0.000020
INFO:tensorflow:Step 24000 | Loss: 3.4311 | Spent: 56.1 secs | LR: 0.000020
INFO:tensorflow:Step 24100 | Loss: 3.6965 | Spent: 55.4 secs | LR: 0.000020
INFO:tensorflow:Step 24200 | Loss: 3.5498 | Spent: 56.5 secs | LR: 0.000019
INFO:tensorflow:Step 24300 | Loss: 3.4646 | Spent: 56.1 secs | LR: 0.000019
INFO:tensorflow:Step 24400 | Loss: 3.1341 | Spent: 54.8 secs | LR: 0.000019
INFO:tensorflow:Step 24500 | Loss: 3.4642 | Spent: 55.8 secs | LR: 0.000019
INFO:tensorflow:Step 24600 | Loss: 3.4722 | Spent: 56.5 secs | LR: 0.000019
INFO:tensorflow:Step 24700 | Loss: 3.3730 | Spent: 56.1 secs | LR: 0.000019
INFO:tensorflow:Step 24800 | Loss: 3.1634 | Spent: 55.6 secs | LR: 0.000019
INFO:tensorflow:Step 24900 | Loss: 3.3980 | Spent: 55.9 secs | LR: 0.000019
INFO:tensorflow:Step 25000 | Loss: 3.7908 | Spent: 54.8 secs | LR: 0.000018
INFO:tensorflow:Step 25100 | Loss: 3.4017 | Spent: 55.5 secs | LR: 0.000018
INFO:tensorflow:Step 25200 | Loss: 3.6200 | Spent: 56.3 secs | LR: 0.000018
INFO:tensorflow:Step 25300 | Loss: 3.4212 | Spent: 55.0 secs | LR: 0.000018
INFO:tensorflow:Step 25400 | Loss: 3.4646 | Spent: 56.5 secs | LR: 0.000018
INFO:tensorflow:Step 25500 | Loss: 3.7782 | Spent: 55.7 secs | LR: 0.000018
INFO:tensorflow:Step 25600 | Loss: 3.4364 | Spent: 55.0 secs | LR: 0.000018
INFO:tensorflow:Step 25700 | Loss: 3.7769 | Spent: 55.7 secs | LR: 0.000018
Reading ../data/test.txt
INFO:tensorflow:Evaluation: Testing Accuracy: 0.683
INFO:tensorflow:Best Accuracy: 0.690
Reading ../data/train.txt
INFO:tensorflow:Step 25800 | Loss: 3.6888 | Spent: 109.1 secs | LR: 0.000017
INFO:tensorflow:Step 25900 | Loss: 3.3691 | Spent: 56.2 secs | LR: 0.000017
INFO:tensorflow:Step 26000 | Loss: 3.2641 | Spent: 55.0 secs | LR: 0.000017
INFO:tensorflow:Step 26100 | Loss: 3.4814 | Spent: 55.0 secs | LR: 0.000017
INFO:tensorflow:Step 26200 | Loss: 3.2663 | Spent: 54.8 secs | LR: 0.000017
INFO:tensorflow:Step 26300 | Loss: 3.3012 | Spent: 56.2 secs | LR: 0.000017
INFO:tensorflow:Step 26400 | Loss: 3.1017 | Spent: 54.7 secs | LR: 0.000017
INFO:tensorflow:Step 26500 | Loss: 3.0863 | Spent: 56.0 secs | LR: 0.000017
INFO:tensorflow:Step 26600 | Loss: 3.8584 | Spent: 56.0 secs | LR: 0.000016
INFO:tensorflow:Step 26700 | Loss: 3.7103 | Spent: 56.2 secs | LR: 0.000016
INFO:tensorflow:Step 26800 | Loss: 3.1810 | Spent: 56.3 secs | LR: 0.000016
INFO:tensorflow:Step 26900 | Loss: 3.2902 | Spent: 55.8 secs | LR: 0.000016
INFO:tensorflow:Step 27000 | Loss: 3.2728 | Spent: 56.3 secs | LR: 0.000016
INFO:tensorflow:Step 27100 | Loss: 3.5410 | Spent: 55.3 secs | LR: 0.000016
INFO:tensorflow:Step 27200 | Loss: 3.6078 | Spent: 55.1 secs | LR: 0.000016
INFO:tensorflow:Step 27300 | Loss: 3.2122 | Spent: 55.1 secs | LR: 0.000016
INFO:tensorflow:Step 27400 | Loss: 3.1172 | Spent: 55.6 secs | LR: 0.000015
INFO:tensorflow:Step 27500 | Loss: 3.4266 | Spent: 55.8 secs | LR: 0.000015
INFO:tensorflow:Step 27600 | Loss: 3.4485 | Spent: 56.0 secs | LR: 0.000015
INFO:tensorflow:Step 27700 | Loss: 3.1243 | Spent: 55.8 secs | LR: 0.000015
Reading ../data/test.txt
INFO:tensorflow:Evaluation: Testing Accuracy: 0.684
INFO:tensorflow:Best Accuracy: 0.690
Reading ../data/train.txt
INFO:tensorflow:Step 27800 | Loss: 3.2837 | Spent: 109.3 secs | LR: 0.000015
INFO:tensorflow:Step 27900 | Loss: 3.7226 | Spent: 55.8 secs | LR: 0.000015
INFO:tensorflow:Step 28000 | Loss: 3.2673 | Spent: 55.2 secs | LR: 0.000015
INFO:tensorflow:Step 28100 | Loss: 3.3249 | Spent: 56.4 secs | LR: 0.000015
INFO:tensorflow:Step 28200 | Loss: 3.5676 | Spent: 55.7 secs | LR: 0.000014
INFO:tensorflow:Step 28300 | Loss: 3.4144 | Spent: 55.1 secs | LR: 0.000014
INFO:tensorflow:Step 28400 | Loss: 3.4208 | Spent: 55.4 secs | LR: 0.000014
INFO:tensorflow:Step 28500 | Loss: 3.4895 | Spent: 56.2 secs | LR: 0.000014
INFO:tensorflow:Step 28600 | Loss: 3.3893 | Spent: 55.8 secs | LR: 0.000014
INFO:tensorflow:Step 28700 | Loss: 3.4956 | Spent: 54.8 secs | LR: 0.000014
INFO:tensorflow:Step 28800 | Loss: 3.1416 | Spent: 55.2 secs | LR: 0.000014
INFO:tensorflow:Step 28900 | Loss: 3.2458 | Spent: 55.2 secs | LR: 0.000014
INFO:tensorflow:Step 29000 | Loss: 3.3024 | Spent: 55.7 secs | LR: 0.000013
INFO:tensorflow:Step 29100 | Loss: 3.3627 | Spent: 55.9 secs | LR: 0.000013
INFO:tensorflow:Step 29200 | Loss: 3.4465 | Spent: 54.8 secs | LR: 0.000013
INFO:tensorflow:Step 29300 | Loss: 3.3412 | Spent: 54.8 secs | LR: 0.000013
INFO:tensorflow:Step 29400 | Loss: 3.3679 | Spent: 55.5 secs | LR: 0.000013
INFO:tensorflow:Step 29500 | Loss: 3.5603 | Spent: 56.1 secs | LR: 0.000013
INFO:tensorflow:Step 29600 | Loss: 3.5400 | Spent: 55.4 secs | LR: 0.000013
INFO:tensorflow:Step 29700 | Loss: 3.3857 | Spent: 55.5 secs | LR: 0.000013
Reading ../data/test.txt
INFO:tensorflow:Evaluation: Testing Accuracy: 0.685
INFO:tensorflow:Best Accuracy: 0.690
Reading ../data/train.txt
INFO:tensorflow:Step 29800 | Loss: 3.3477 | Spent: 107.6 secs | LR: 0.000012
INFO:tensorflow:Step 29900 | Loss: 3.3515 | Spent: 54.5 secs | LR: 0.000012
INFO:tensorflow:Step 30000 | Loss: 3.3351 | Spent: 55.6 secs | LR: 0.000012
INFO:tensorflow:Step 30100 | Loss: 3.1330 | Spent: 54.8 secs | LR: 0.000012
INFO:tensorflow:Step 30200 | Loss: 3.7729 | Spent: 55.3 secs | LR: 0.000012
INFO:tensorflow:Step 30300 | Loss: 3.8072 | Spent: 55.9 secs | LR: 0.000012
INFO:tensorflow:Step 30400 | Loss: 3.7825 | Spent: 55.5 secs | LR: 0.000012
INFO:tensorflow:Step 30500 | Loss: 3.3082 | Spent: 55.3 secs | LR: 0.000012
INFO:tensorflow:Step 30600 | Loss: 3.3584 | Spent: 55.2 secs | LR: 0.000011
INFO:tensorflow:Step 30700 | Loss: 3.4730 | Spent: 55.6 secs | LR: 0.000011
INFO:tensorflow:Step 30800 | Loss: 3.2926 | Spent: 55.1 secs | LR: 0.000011
INFO:tensorflow:Step 30900 | Loss: 3.5787 | Spent: 55.6 secs | LR: 0.000011
INFO:tensorflow:Step 31000 | Loss: 3.4806 | Spent: 54.7 secs | LR: 0.000011
INFO:tensorflow:Step 31100 | Loss: 3.2747 | Spent: 55.5 secs | LR: 0.000011
INFO:tensorflow:Step 31200 | Loss: 3.2920 | Spent: 55.7 secs | LR: 0.000011
INFO:tensorflow:Step 31300 | Loss: 3.4519 | Spent: 54.6 secs | LR: 0.000011
INFO:tensorflow:Step 31400 | Loss: 3.3185 | Spent: 55.6 secs | LR: 0.000010
INFO:tensorflow:Step 31500 | Loss: 3.3589 | Spent: 55.2 secs | LR: 0.000010
INFO:tensorflow:Step 31600 | Loss: 3.0051 | Spent: 54.9 secs | LR: 0.000010
INFO:tensorflow:Step 31700 | Loss: 3.6349 | Spent: 54.5 secs | LR: 0.000010
Reading ../data/test.txt
INFO:tensorflow:Evaluation: Testing Accuracy: 0.693
INFO:tensorflow:Best Accuracy: 0.693
Reading ../data/train.txt
INFO:tensorflow:Step 31800 | Loss: 3.2938 | Spent: 110.5 secs | LR: 0.000010
INFO:tensorflow:Step 31900 | Loss: 3.6376 | Spent: 56.0 secs | LR: 0.000010
INFO:tensorflow:Step 32000 | Loss: 3.7436 | Spent: 54.6 secs | LR: 0.000010
INFO:tensorflow:Step 32100 | Loss: 3.6514 | Spent: 56.6 secs | LR: 0.000010
INFO:tensorflow:Step 32200 | Loss: 3.3633 | Spent: 55.5 secs | LR: 0.000010
INFO:tensorflow:Step 32300 | Loss: 3.3325 | Spent: 56.0 secs | LR: 0.000010
INFO:tensorflow:Step 32400 | Loss: 3.4948 | Spent: 55.2 secs | LR: 0.000010
INFO:tensorflow:Step 32500 | Loss: 3.3165 | Spent: 55.3 secs | LR: 0.000010
INFO:tensorflow:Step 32600 | Loss: 3.0973 | Spent: 56.2 secs | LR: 0.000011
INFO:tensorflow:Step 32700 | Loss: 3.3240 | Spent: 54.6 secs | LR: 0.000011
INFO:tensorflow:Step 32800 | Loss: 3.4633 | Spent: 57.0 secs | LR: 0.000011
INFO:tensorflow:Step 32900 | Loss: 3.4034 | Spent: 55.3 secs | LR: 0.000011
INFO:tensorflow:Step 33000 | Loss: 3.1864 | Spent: 55.1 secs | LR: 0.000011
INFO:tensorflow:Step 33100 | Loss: 3.3376 | Spent: 56.2 secs | LR: 0.000011
INFO:tensorflow:Step 33200 | Loss: 3.3107 | Spent: 55.4 secs | LR: 0.000011
INFO:tensorflow:Step 33300 | Loss: 3.1326 | Spent: 55.8 secs | LR: 0.000011
INFO:tensorflow:Step 33400 | Loss: 3.9004 | Spent: 56.3 secs | LR: 0.000011
INFO:tensorflow:Step 33500 | Loss: 3.0202 | Spent: 55.0 secs | LR: 0.000011
INFO:tensorflow:Step 33600 | Loss: 3.3646 | Spent: 55.6 secs | LR: 0.000011
INFO:tensorflow:Step 33700 | Loss: 3.3256 | Spent: 55.0 secs | LR: 0.000011
Reading ../data/test.txt
INFO:tensorflow:Evaluation: Testing Accuracy: 0.685
INFO:tensorflow:Best Accuracy: 0.693
Reading ../data/train.txt
INFO:tensorflow:Step 33800 | Loss: 3.4914 | Spent: 108.8 secs | LR: 0.000011
INFO:tensorflow:Step 33900 | Loss: 3.6445 | Spent: 55.3 secs | LR: 0.000011
INFO:tensorflow:Step 34000 | Loss: 3.0491 | Spent: 54.5 secs | LR: 0.000011
INFO:tensorflow:Step 34100 | Loss: 3.5138 | Spent: 55.9 secs | LR: 0.000011
INFO:tensorflow:Step 34200 | Loss: 3.2992 | Spent: 56.1 secs | LR: 0.000012
INFO:tensorflow:Step 34300 | Loss: 3.3099 | Spent: 56.4 secs | LR: 0.000012
INFO:tensorflow:Step 34400 | Loss: 3.4717 | Spent: 56.2 secs | LR: 0.000012
INFO:tensorflow:Step 34500 | Loss: 3.7465 | Spent: 56.6 secs | LR: 0.000012
INFO:tensorflow:Step 34600 | Loss: 3.1300 | Spent: 56.3 secs | LR: 0.000012
INFO:tensorflow:Step 34700 | Loss: 3.5147 | Spent: 56.4 secs | LR: 0.000012
INFO:tensorflow:Step 34800 | Loss: 3.2946 | Spent: 56.2 secs | LR: 0.000012
INFO:tensorflow:Step 34900 | Loss: 3.6527 | Spent: 56.0 secs | LR: 0.000012
INFO:tensorflow:Step 35000 | Loss: 3.4655 | Spent: 56.2 secs | LR: 0.000012
INFO:tensorflow:Step 35100 | Loss: 3.5019 | Spent: 56.8 secs | LR: 0.000012
INFO:tensorflow:Step 35200 | Loss: 3.3319 | Spent: 54.7 secs | LR: 0.000012
INFO:tensorflow:Step 35300 | Loss: 3.2019 | Spent: 56.6 secs | LR: 0.000012
INFO:tensorflow:Step 35400 | Loss: 3.4453 | Spent: 55.2 secs | LR: 0.000012
INFO:tensorflow:Step 35500 | Loss: 3.6620 | Spent: 55.6 secs | LR: 0.000012
INFO:tensorflow:Step 35600 | Loss: 3.7082 | Spent: 55.6 secs | LR: 0.000012
Reading ../data/test.txt
INFO:tensorflow:Evaluation: Testing Accuracy: 0.690
INFO:tensorflow:Best Accuracy: 0.693
Reading ../data/train.txt
INFO:tensorflow:Step 35700 | Loss: 3.3247 | Spent: 108.3 secs | LR: 0.000013
INFO:tensorflow:Step 35800 | Loss: 3.3083 | Spent: 55.5 secs | LR: 0.000013
INFO:tensorflow:Step 35900 | Loss: 3.1719 | Spent: 56.7 secs | LR: 0.000013
INFO:tensorflow:Step 36000 | Loss: 3.4384 | Spent: 56.1 secs | LR: 0.000013
INFO:tensorflow:Step 36100 | Loss: 3.5294 | Spent: 56.2 secs | LR: 0.000013
INFO:tensorflow:Step 36200 | Loss: 3.3919 | Spent: 56.3 secs | LR: 0.000013
INFO:tensorflow:Step 36300 | Loss: 3.6008 | Spent: 55.5 secs | LR: 0.000013
INFO:tensorflow:Step 36400 | Loss: 3.4085 | Spent: 56.5 secs | LR: 0.000013
INFO:tensorflow:Step 36500 | Loss: 3.5704 | Spent: 56.9 secs | LR: 0.000013
INFO:tensorflow:Step 36600 | Loss: 3.3474 | Spent: 56.6 secs | LR: 0.000013
INFO:tensorflow:Step 36700 | Loss: 3.3920 | Spent: 55.8 secs | LR: 0.000013
INFO:tensorflow:Step 36800 | Loss: 3.0878 | Spent: 56.5 secs | LR: 0.000013
INFO:tensorflow:Step 36900 | Loss: 2.8937 | Spent: 56.7 secs | LR: 0.000013
INFO:tensorflow:Step 37000 | Loss: 3.3712 | Spent: 56.8 secs | LR: 0.000013
INFO:tensorflow:Step 37100 | Loss: 3.5150 | Spent: 56.3 secs | LR: 0.000013
INFO:tensorflow:Step 37200 | Loss: 3.3100 | Spent: 56.4 secs | LR: 0.000013
INFO:tensorflow:Step 37300 | Loss: 2.9511 | Spent: 56.4 secs | LR: 0.000014
INFO:tensorflow:Step 37400 | Loss: 3.7832 | Spent: 56.2 secs | LR: 0.000014
INFO:tensorflow:Step 37500 | Loss: 3.5332 | Spent: 55.9 secs | LR: 0.000014
INFO:tensorflow:Step 37600 | Loss: 3.3874 | Spent: 56.6 secs | LR: 0.000014
Reading ../data/test.txt
INFO:tensorflow:Evaluation: Testing Accuracy: 0.686
INFO:tensorflow:Best Accuracy: 0.693
Reading ../data/train.txt
INFO:tensorflow:Step 37700 | Loss: 3.1126 | Spent: 108.7 secs | LR: 0.000014
INFO:tensorflow:Step 37800 | Loss: 3.5773 | Spent: 56.5 secs | LR: 0.000014
INFO:tensorflow:Step 37900 | Loss: 3.3865 | Spent: 55.5 secs | LR: 0.000014
INFO:tensorflow:Step 38000 | Loss: 3.7670 | Spent: 55.8 secs | LR: 0.000014
INFO:tensorflow:Step 38100 | Loss: 3.4764 | Spent: 56.4 secs | LR: 0.000014
INFO:tensorflow:Step 38200 | Loss: 3.3747 | Spent: 56.5 secs | LR: 0.000014
INFO:tensorflow:Step 38300 | Loss: 3.4750 | Spent: 56.5 secs | LR: 0.000014
INFO:tensorflow:Step 38400 | Loss: 4.0073 | Spent: 56.3 secs | LR: 0.000014
INFO:tensorflow:Step 38500 | Loss: 3.0804 | Spent: 55.9 secs | LR: 0.000014
INFO:tensorflow:Step 38600 | Loss: 3.3201 | Spent: 55.9 secs | LR: 0.000014
INFO:tensorflow:Step 38700 | Loss: 3.4264 | Spent: 56.4 secs | LR: 0.000014
INFO:tensorflow:Step 38800 | Loss: 3.5607 | Spent: 56.8 secs | LR: 0.000014
INFO:tensorflow:Step 38900 | Loss: 3.3944 | Spent: 56.0 secs | LR: 0.000015
INFO:tensorflow:Step 39000 | Loss: 3.3954 | Spent: 56.2 secs | LR: 0.000015
INFO:tensorflow:Step 39100 | Loss: 3.0821 | Spent: 57.1 secs | LR: 0.000015
INFO:tensorflow:Step 39200 | Loss: 3.2155 | Spent: 56.7 secs | LR: 0.000015
INFO:tensorflow:Step 39300 | Loss: 3.4314 | Spent: 54.8 secs | LR: 0.000015
INFO:tensorflow:Step 39400 | Loss: 3.3212 | Spent: 55.9 secs | LR: 0.000015
INFO:tensorflow:Step 39500 | Loss: 3.5627 | Spent: 56.5 secs | LR: 0.000015
INFO:tensorflow:Step 39600 | Loss: 3.1086 | Spent: 56.3 secs | LR: 0.000015
Reading ../data/test.txt
INFO:tensorflow:Evaluation: Testing Accuracy: 0.681
INFO:tensorflow:Best Accuracy: 0.693
Reading ../data/train.txt
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-8-4c23c1b6df0f> in <module>()
     27   for ((text, seg), (labels, labels_mask)) in dataset(is_training=True, params=params):
     28     with tf.GradientTape() as tape:
---> 29       logits = model([text, tf.sign(text), seg], training=True).logits
     30       loss = tf.compat.v1.losses.softmax_cross_entropy(
     31         onehot_labels = tf.one_hot(labels, 21128),

/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs)
    983 
    984         with ops.enable_auto_cast_variables(self._compute_dtype_object):
--> 985           outputs = call_fn(inputs, *args, **kwargs)
    986 
    987         if self._activity_regularizer:

/usr/local/lib/python3.6/dist-packages/transformers/modeling_tf_bert.py in call(self, inputs, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, output_attentions, output_hidden_states, return_dict, labels, training)
   1000             output_hidden_states=output_hidden_states,
   1001             return_dict=return_dict,
-> 1002             training=training,
   1003         )
   1004 

/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs)
    983 
    984         with ops.enable_auto_cast_variables(self._compute_dtype_object):
--> 985           outputs = call_fn(inputs, *args, **kwargs)
    986 
    987         if self._activity_regularizer:

/usr/local/lib/python3.6/dist-packages/transformers/modeling_tf_bert.py in call(self, inputs, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, output_attentions, output_hidden_states, return_dict, training)
    627             output_hidden_states,
    628             return_dict,
--> 629             training=training,
    630         )
    631 

/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs)
    983 
    984         with ops.enable_auto_cast_variables(self._compute_dtype_object):
--> 985           outputs = call_fn(inputs, *args, **kwargs)
    986 
    987         if self._activity_regularizer:

/usr/local/lib/python3.6/dist-packages/transformers/modeling_tf_bert.py in call(self, hidden_states, attention_mask, head_mask, output_attentions, output_hidden_states, return_dict, training)
    393 
    394             layer_outputs = layer_module(
--> 395                 hidden_states, attention_mask, head_mask[i], output_attentions, training=training
    396             )
    397             hidden_states = layer_outputs[0]

/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs)
    983 
    984         with ops.enable_auto_cast_variables(self._compute_dtype_object):
--> 985           outputs = call_fn(inputs, *args, **kwargs)
    986 
    987         if self._activity_regularizer:

/usr/local/lib/python3.6/dist-packages/transformers/modeling_tf_bert.py in call(self, hidden_states, attention_mask, head_mask, output_attentions, training)
    363         attention_output = attention_outputs[0]
    364         intermediate_output = self.intermediate(attention_output)
--> 365         layer_output = self.bert_output(intermediate_output, attention_output, training=training)
    366         outputs = (layer_output,) + attention_outputs[1:]  # add attentions if we output them
    367 

/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs)
    983 
    984         with ops.enable_auto_cast_variables(self._compute_dtype_object):
--> 985           outputs = call_fn(inputs, *args, **kwargs)
    986 
    987         if self._activity_regularizer:

/usr/local/lib/python3.6/dist-packages/transformers/modeling_tf_bert.py in call(self, hidden_states, input_tensor, training)
    344         hidden_states = self.dense(hidden_states)
    345         hidden_states = self.dropout(hidden_states, training=training)
--> 346         hidden_states = self.LayerNorm(hidden_states + input_tensor)
    347 
    348         return hidden_states

/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs)
    983 
    984         with ops.enable_auto_cast_variables(self._compute_dtype_object):
--> 985           outputs = call_fn(inputs, *args, **kwargs)
    986 
    987         if self._activity_regularizer:

/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/layers/normalization.py in call(self, inputs)
   1201 
   1202       # Calculate the moments on the last axis (layer activations).
-> 1203       mean, variance = nn.moments(inputs, self.axis, keep_dims=True)
   1204 
   1205       scale, offset = _broadcast(self.gamma), _broadcast(self.beta)

/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/dispatch.py in wrapper(*args, **kwargs)
    199     """Call target, and fall back on dispatchers if there is a TypeError."""
    200     try:
--> 201       return target(*args, **kwargs)
    202     except (TypeError, ValueError):
    203       # Note: convert_to_eager_tensor currently raises a ValueError, not a

/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py in moments(x, axes, shift, name, keep_dims, keepdims)
   1305   if keep_dims is None:
   1306     keep_dims = False
-> 1307   with ops.name_scope(name, "moments", [x, axes]):
   1308     # The dynamic range of fp16 is too limited to support the collection of
   1309     # sufficient statistics. As a workaround we simply perform the operations

/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py in name_scope(name, default_name, values, skip_on_eager)
   6409   """
   6410   ctx = context.context()
-> 6411   in_eager_mode = ctx.executing_eagerly()
   6412   if not in_eager_mode:
   6413     return internal_name_scope_v1(name, default_name, values)

KeyboardInterrupt: 
In [ ]:
print(model.weights[5]) # for later check if the weight is correctly transferred to other task
<tf.Variable 'tf_bert_lm_head_model/bert/encoder/layer_._0/attention/self/query/kernel:0' shape=(768, 768) dtype=float32, numpy=
array([[ 0.11825976,  0.01059594,  0.00478886, ..., -0.04938788,
         0.01636124,  0.01673737],
       [-0.00918018, -0.00905642, -0.00512347, ...,  0.02321002,
        -0.09011449, -0.03552252],
       [ 0.0065572 , -0.00295785,  0.03182212, ...,  0.00119713,
        -0.04689885,  0.00050256],
       ...,
       [ 0.00973585, -0.00485693,  0.10374205, ...,  0.06686458,
        -0.03649763,  0.09566212],
       [ 0.00776388,  0.04061504, -0.03333197, ...,  0.00960802,
         0.04296997, -0.02524005],
       [-0.08363243,  0.01113833, -0.00531893, ...,  0.07489353,
        -0.01407121,  0.01147269]], dtype=float32)>