In [1]:
from google.colab import drive
drive.mount('/content/gdrive')
import os
os.chdir('/content/gdrive/My Drive/finch/tensorflow2/text_matching/ant/main')
Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
In [2]:
!pip install transformers
Requirement already satisfied: transformers in /usr/local/lib/python3.6/dist-packages (3.5.0)
Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from transformers) (1.18.5)
Requirement already satisfied: tokenizers==0.9.3 in /usr/local/lib/python3.6/dist-packages (from transformers) (0.9.3)
Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.6/dist-packages (from transformers) (2019.12.20)
Requirement already satisfied: sentencepiece==0.1.91 in /usr/local/lib/python3.6/dist-packages (from transformers) (0.1.91)
Requirement already satisfied: filelock in /usr/local/lib/python3.6/dist-packages (from transformers) (3.0.12)
Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.6/dist-packages (from transformers) (4.41.1)
Requirement already satisfied: dataclasses; python_version < "3.7" in /usr/local/lib/python3.6/dist-packages (from transformers) (0.7)
Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from transformers) (2.23.0)
Requirement already satisfied: protobuf in /usr/local/lib/python3.6/dist-packages (from transformers) (3.12.4)
Requirement already satisfied: packaging in /usr/local/lib/python3.6/dist-packages (from transformers) (20.4)
Requirement already satisfied: sacremoses in /usr/local/lib/python3.6/dist-packages (from transformers) (0.0.43)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (2020.6.20)
Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (2.10)
Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (1.24.3)
Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->transformers) (3.0.4)
Requirement already satisfied: setuptools in /usr/local/lib/python3.6/dist-packages (from protobuf->transformers) (50.3.2)
Requirement already satisfied: six>=1.9 in /usr/local/lib/python3.6/dist-packages (from protobuf->transformers) (1.15.0)
Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.6/dist-packages (from packaging->transformers) (2.4.7)
Requirement already satisfied: joblib in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers) (0.17.0)
Requirement already satisfied: click in /usr/local/lib/python3.6/dist-packages (from sacremoses->transformers) (7.1.2)
In [3]:
from transformers import BertTokenizer, TFBertModel
from sklearn.metrics import classification_report

import os
import json
import time
import logging
import pprint
import numpy as np
import tensorflow as tf
import tensorflow_addons as tfa

print("TensorFlow Version", tf.__version__)
print('GPU Enabled:', tf.test.is_gpu_available())
TensorFlow Version 2.3.0
WARNING:tensorflow:From <ipython-input-3-2c65ccb3f96b>:14: is_gpu_available (from tensorflow.python.framework.test_util) is deprecated and will be removed in a future version.
Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.
GPU Enabled: True
In [4]:
params = {
  'pretrain_path': 'bert-base-chinese',
  'train_path': '../data/train.json',
  'test_path': '../data/dev.json',
  'batch_size': 32,
  'max_len': 128,
  'buffer_size': 34334,
  'init_lr': 1e-5,
  'max_lr': 4e-5,
  'n_epochs': 12,
  'num_patience': 7,
}

tokenizer = BertTokenizer.from_pretrained(params['pretrain_path'],
                                          lowercase = True,
                                          add_special_tokens = True)
In [5]:
# stream data from text files
def data_generator(f_path, params):
  with open(f_path) as f:
    print('Reading', f_path)
    for line in f:
      line = json.loads(line.rstrip())
      text1, text2, label = line['sentence1'], line['sentence2'], line['label']
      if len(text1) + len(text2) + 3 > params['max_len']:
        _max_len = (params['max_len'] - 3) // 2
        text1 = text1[:_max_len]
        text2 = text2[:_max_len]
      text1 = list(text1)
      text2 = list(text2)
      text = ['[CLS]'] + text1 + ['[SEP]'] + text2 + ['[SEP]']
      seg = [0] + [0] * len(text1) + [0] + [1] * len(text2) + [1]
      text = tokenizer.convert_tokens_to_ids(text)
      yield (text, seg), int(label)


def dataset(is_training, params):
  _shapes = (([None], [None]), ())
  _types = ((tf.int32, tf.int32), tf.int32)
  _pads = ((0, 0), -1)
  
  if is_training:
    ds = tf.data.Dataset.from_generator(
      lambda: data_generator(params['train_path'], params),
      output_shapes = _shapes,
      output_types = _types,)
    ds = ds.shuffle(params['buffer_size'])
    ds = ds.padded_batch(params['batch_size'], _shapes, _pads)
    ds = ds.prefetch(tf.data.experimental.AUTOTUNE)
  else:
    ds = tf.data.Dataset.from_generator(
      lambda: data_generator(params['test_path'], params),
      output_shapes = _shapes,
      output_types = _types,)
    ds = ds.padded_batch(params['batch_size'], _shapes, _pads)
    ds = ds.prefetch(tf.data.experimental.AUTOTUNE)
  
  return ds
In [6]:
# input stream ids check
(text, seg), _ = next(data_generator(params['train_path'], params))
print(text)
print(seg)
Reading ../data/train.json
[101, 6010, 6009, 955, 1446, 5023, 7583, 6820, 3621, 1377, 809, 2940, 2768, 1044, 2622, 1400, 3315, 1408, 102, 955, 1446, 3300, 1044, 2622, 1168, 3309, 6820, 3315, 1408, 102]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
In [7]:
class BertFinetune(tf.keras.Model):
  def __init__(self, params):
    super(BertFinetune, self).__init__()
    self.bert = TFBertModel.from_pretrained(params['pretrain_path'],
                                            trainable = True)
    self.bert.load_weights('../model/bert_further_pretrain.h5',
                           by_name = True,
                           skip_mismatch = True)
    self.drop_1 = tf.keras.layers.Dropout(.1)
    self.fc = tf.keras.layers.Dense(300, tf.nn.swish, name='down_stream/fc')
    self.drop_2 = tf.keras.layers.Dropout(.1)
    self.out = tf.keras.layers.Dense(1, name='down_stream/out')

  def call(self, bert_inputs, training):
    bert_inputs = [tf.cast(inp, tf.int32) for inp in bert_inputs]
    x = self.bert(bert_inputs, training=training)
    x = x[1]
    x = self.drop_1(x, training=training)
    x = self.fc(x)
    x = self.drop_2(x, training=training)
    x = self.out(x)
    x = tf.squeeze(x, 1)
    return x
In [8]:
model = BertFinetune(params)
model.build([[None, None], [None, None], [None, None]])
print(model.weights[5])
Some layers from the model checkpoint at bert-base-chinese were not used when initializing TFBertModel: ['mlm___cls', 'nsp___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-chinese.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.
<tf.Variable 'tf_bert_model/bert/encoder/layer_._0/attention/self/query/kernel:0' shape=(768, 768) dtype=float32, numpy=
array([[ 0.11868321,  0.00452176,  0.01265684, ..., -0.05029925,
         0.02201132,  0.01740021],
       [-0.01231334, -0.01947716,  0.0063817 , ...,  0.02255814,
        -0.08575422, -0.02734046],
       [ 0.00752212, -0.00881757,  0.02715787, ...,  0.01072933,
        -0.05966277,  0.00141719],
       ...,
       [ 0.00860101,  0.00155984,  0.11660421, ...,  0.05327065,
        -0.04224441,  0.09870512],
       [ 0.01724561,  0.04100509, -0.0330872 , ...,  0.00296495,
         0.04215806, -0.04148998],
       [-0.07975583,  0.01556155, -0.01744738, ...,  0.06569684,
        -0.01931422, -0.00433144]], dtype=float32)>
In [9]:
step_size = 2 * params['buffer_size'] // params['batch_size']
decay_lr = tfa.optimizers.Triangular2CyclicalLearningRate(
  initial_learning_rate = params['init_lr'],
  maximal_learning_rate = params['max_lr'],
  step_size = step_size,)
optim = tf.optimizers.Adam(params['init_lr'])
global_step = 0

best_acc = .0
count = 0

t0 = time.time()
logger = logging.getLogger('tensorflow')
logger.setLevel(logging.INFO)

for _ in range(params['n_epochs']):
  # TRAINING
  for ((text, seg), labels) in dataset(is_training=True, params=params):
    with tf.GradientTape() as tape:
      logits = model([text, tf.sign(text), seg], training=True)
      labels = tf.cast(labels, tf.float32)
      num_neg = tf.reduce_sum(tf.cast(tf.equal(labels, 0.), tf.float32)).numpy()
      num_pos = tf.reduce_sum(labels).numpy()
      if num_pos == 0.:
        pos_weight = 1.
      else:
        pos_weight = num_neg / num_pos
      loss = tf.reduce_mean(tf.nn.weighted_cross_entropy_with_logits(
        labels = labels,
        logits = logits,
        pos_weight = pos_weight))
      
    optim.lr.assign(decay_lr(global_step))
    grads = tape.gradient(loss, model.trainable_variables)
    grads, _ = tf.clip_by_global_norm(grads, 5.)
    optim.apply_gradients(zip(grads, model.trainable_variables))
    
    if global_step % 100 == 0:
      logger.info("Step {} | Loss: {:.4f} | Spent: {:.1f} secs | LR: {:.6f}".format(
        global_step, loss.numpy().item(), time.time()-t0, optim.lr.numpy().item()))
      t0 = time.time()
    global_step += 1
  
  # EVALUATION
  m = tf.keras.metrics.Accuracy()
  intent_true = []
  intent_pred = []

  for ((text, seg), labels) in dataset(is_training=False, params=params):
    logits = tf.sigmoid(model([text, tf.sign(text), seg], training=False))
    y_pred = tf.cast(tf.math.greater_equal(logits, .5), tf.int32)
    m.update_state(y_true=labels, y_pred=y_pred)
    intent_true += labels.numpy().flatten().tolist()
    intent_pred += y_pred.numpy().flatten().tolist()

  acc = m.result().numpy()
  logger.info("Evaluation: Testing Accuracy: {:.3f}".format(acc))

  logger.info('\n'+classification_report(y_true = intent_true,
                                         y_pred = intent_pred,
                                         labels = [0, 1],
                                         target_names = ['Not Matched', 'Matched'],
                                         digits = 3))

  if acc > best_acc:
    best_acc = acc
    # you can save model here
    count = 0
  else:
    count += 1
  logger.info("Best Accuracy: {:.3f}".format(best_acc))

  if count == params['num_patience']:
    print(params['num_patience'], "times not improve the best result, therefore stop training")
    break
Reading ../data/train.json
INFO:tensorflow:Step 0 | Loss: 1.0480 | Spent: 7.5 secs | LR: 0.000010
INFO:tensorflow:Step 100 | Loss: 0.9945 | Spent: 49.2 secs | LR: 0.000011
INFO:tensorflow:Step 200 | Loss: 0.9535 | Spent: 48.1 secs | LR: 0.000013
INFO:tensorflow:Step 300 | Loss: 0.8335 | Spent: 49.5 secs | LR: 0.000014
INFO:tensorflow:Step 400 | Loss: 0.5187 | Spent: 49.1 secs | LR: 0.000016
INFO:tensorflow:Step 500 | Loss: 0.7609 | Spent: 47.9 secs | LR: 0.000017
INFO:tensorflow:Step 600 | Loss: 0.4976 | Spent: 48.3 secs | LR: 0.000018
INFO:tensorflow:Step 700 | Loss: 1.2219 | Spent: 50.2 secs | LR: 0.000020
INFO:tensorflow:Step 800 | Loss: 0.9580 | Spent: 48.1 secs | LR: 0.000021
INFO:tensorflow:Step 900 | Loss: 0.9314 | Spent: 50.1 secs | LR: 0.000023
INFO:tensorflow:Step 1000 | Loss: 0.6420 | Spent: 49.0 secs | LR: 0.000024
Reading ../data/dev.json
INFO:tensorflow:Evaluation: Testing Accuracy: 0.693
INFO:tensorflow:
              precision    recall  f1-score   support

 Not Matched      0.881     0.642     0.743      2978
     Matched      0.503     0.807     0.620      1338

    accuracy                          0.693      4316
   macro avg      0.692     0.725     0.682      4316
weighted avg      0.764     0.693     0.705      4316

INFO:tensorflow:Best Accuracy: 0.693
Reading ../data/train.json
INFO:tensorflow:Step 1100 | Loss: 0.4893 | Spent: 74.1 secs | LR: 0.000025
INFO:tensorflow:Step 1200 | Loss: 0.5681 | Spent: 48.1 secs | LR: 0.000027
INFO:tensorflow:Step 1300 | Loss: 0.8003 | Spent: 48.5 secs | LR: 0.000028
INFO:tensorflow:Step 1400 | Loss: 0.4164 | Spent: 49.8 secs | LR: 0.000030
INFO:tensorflow:Step 1500 | Loss: 0.6328 | Spent: 48.4 secs | LR: 0.000031
INFO:tensorflow:Step 1600 | Loss: 1.0343 | Spent: 49.4 secs | LR: 0.000032
INFO:tensorflow:Step 1700 | Loss: 0.9843 | Spent: 47.8 secs | LR: 0.000034
INFO:tensorflow:Step 1800 | Loss: 0.8749 | Spent: 47.8 secs | LR: 0.000035
INFO:tensorflow:Step 1900 | Loss: 0.7201 | Spent: 50.1 secs | LR: 0.000037
INFO:tensorflow:Step 2000 | Loss: 0.6021 | Spent: 49.1 secs | LR: 0.000038
INFO:tensorflow:Step 2100 | Loss: 0.5807 | Spent: 48.3 secs | LR: 0.000039
Reading ../data/dev.json
INFO:tensorflow:Evaluation: Testing Accuracy: 0.693
INFO:tensorflow:
              precision    recall  f1-score   support

 Not Matched      0.884     0.639     0.742      2978
     Matched      0.503     0.814     0.622      1338

    accuracy                          0.693      4316
   macro avg      0.694     0.726     0.682      4316
weighted avg      0.766     0.693     0.704      4316

INFO:tensorflow:Best Accuracy: 0.693
Reading ../data/train.json
INFO:tensorflow:Step 2200 | Loss: 0.3846 | Spent: 73.2 secs | LR: 0.000039
INFO:tensorflow:Step 2300 | Loss: 0.6442 | Spent: 48.1 secs | LR: 0.000038
INFO:tensorflow:Step 2400 | Loss: 0.9689 | Spent: 47.8 secs | LR: 0.000036
INFO:tensorflow:Step 2500 | Loss: 0.6000 | Spent: 49.9 secs | LR: 0.000035
INFO:tensorflow:Step 2600 | Loss: 0.6241 | Spent: 47.2 secs | LR: 0.000034
INFO:tensorflow:Step 2700 | Loss: 0.6520 | Spent: 47.1 secs | LR: 0.000032
INFO:tensorflow:Step 2800 | Loss: 0.6482 | Spent: 48.6 secs | LR: 0.000031
INFO:tensorflow:Step 2900 | Loss: 0.6117 | Spent: 48.3 secs | LR: 0.000029
INFO:tensorflow:Step 3000 | Loss: 0.6686 | Spent: 50.7 secs | LR: 0.000028
INFO:tensorflow:Step 3100 | Loss: 0.7968 | Spent: 49.0 secs | LR: 0.000027
INFO:tensorflow:Step 3200 | Loss: 0.4121 | Spent: 49.6 secs | LR: 0.000025
Reading ../data/dev.json
INFO:tensorflow:Evaluation: Testing Accuracy: 0.734
INFO:tensorflow:
              precision    recall  f1-score   support

 Not Matched      0.861     0.733     0.792      2978
     Matched      0.553     0.737     0.632      1338

    accuracy                          0.734      4316
   macro avg      0.707     0.735     0.712      4316
weighted avg      0.766     0.734     0.742      4316

INFO:tensorflow:Best Accuracy: 0.734
Reading ../data/train.json
INFO:tensorflow:Step 3300 | Loss: 0.2737 | Spent: 73.7 secs | LR: 0.000024
INFO:tensorflow:Step 3400 | Loss: 0.2595 | Spent: 49.0 secs | LR: 0.000022
INFO:tensorflow:Step 3500 | Loss: 0.5529 | Spent: 47.0 secs | LR: 0.000021
INFO:tensorflow:Step 3600 | Loss: 0.3632 | Spent: 49.8 secs | LR: 0.000020
INFO:tensorflow:Step 3700 | Loss: 0.5128 | Spent: 48.6 secs | LR: 0.000018
INFO:tensorflow:Step 3800 | Loss: 0.3167 | Spent: 50.2 secs | LR: 0.000017
INFO:tensorflow:Step 3900 | Loss: 0.5409 | Spent: 48.0 secs | LR: 0.000015
INFO:tensorflow:Step 4000 | Loss: 0.4579 | Spent: 47.2 secs | LR: 0.000014
INFO:tensorflow:Step 4100 | Loss: 0.4816 | Spent: 48.4 secs | LR: 0.000013
INFO:tensorflow:Step 4200 | Loss: 0.4360 | Spent: 48.8 secs | LR: 0.000011
Reading ../data/dev.json
INFO:tensorflow:Evaluation: Testing Accuracy: 0.743
INFO:tensorflow:
              precision    recall  f1-score   support

 Not Matched      0.834     0.783     0.808      2978
     Matched      0.575     0.652     0.611      1338

    accuracy                          0.743      4316
   macro avg      0.704     0.718     0.709      4316
weighted avg      0.753     0.743     0.747      4316

INFO:tensorflow:Best Accuracy: 0.743
Reading ../data/train.json
INFO:tensorflow:Step 4300 | Loss: 0.2636 | Spent: 75.5 secs | LR: 0.000010
INFO:tensorflow:Step 4400 | Loss: 0.1857 | Spent: 49.0 secs | LR: 0.000011
INFO:tensorflow:Step 4500 | Loss: 0.1914 | Spent: 47.4 secs | LR: 0.000011
INFO:tensorflow:Step 4600 | Loss: 0.3414 | Spent: 49.2 secs | LR: 0.000012
INFO:tensorflow:Step 4700 | Loss: 0.1449 | Spent: 50.9 secs | LR: 0.000013
INFO:tensorflow:Step 4800 | Loss: 0.1879 | Spent: 49.5 secs | LR: 0.000014
INFO:tensorflow:Step 4900 | Loss: 0.5413 | Spent: 47.6 secs | LR: 0.000014
INFO:tensorflow:Step 5000 | Loss: 0.1819 | Spent: 51.0 secs | LR: 0.000015
INFO:tensorflow:Step 5100 | Loss: 0.4742 | Spent: 47.9 secs | LR: 0.000016
INFO:tensorflow:Step 5200 | Loss: 0.1654 | Spent: 48.5 secs | LR: 0.000016
INFO:tensorflow:Step 5300 | Loss: 0.2691 | Spent: 48.5 secs | LR: 0.000017
Reading ../data/dev.json
INFO:tensorflow:Evaluation: Testing Accuracy: 0.729
INFO:tensorflow:
              precision    recall  f1-score   support

 Not Matched      0.827     0.769     0.797      2978
     Matched      0.555     0.642     0.595      1338

    accuracy                          0.729      4316
   macro avg      0.691     0.705     0.696      4316
weighted avg      0.743     0.729     0.734      4316

INFO:tensorflow:Best Accuracy: 0.743
Reading ../data/train.json
INFO:tensorflow:Step 5400 | Loss: 0.3089 | Spent: 74.7 secs | LR: 0.000018
INFO:tensorflow:Step 5500 | Loss: 0.2842 | Spent: 50.9 secs | LR: 0.000018
INFO:tensorflow:Step 5600 | Loss: 0.2899 | Spent: 49.0 secs | LR: 0.000019
INFO:tensorflow:Step 5700 | Loss: 0.2910 | Spent: 49.0 secs | LR: 0.000020
INFO:tensorflow:Step 5800 | Loss: 0.0622 | Spent: 48.1 secs | LR: 0.000021
INFO:tensorflow:Step 5900 | Loss: 0.2042 | Spent: 47.8 secs | LR: 0.000021
INFO:tensorflow:Step 6000 | Loss: 0.3401 | Spent: 49.5 secs | LR: 0.000022
INFO:tensorflow:Step 6100 | Loss: 0.2290 | Spent: 46.1 secs | LR: 0.000023
INFO:tensorflow:Step 6200 | Loss: 0.4061 | Spent: 50.3 secs | LR: 0.000023
INFO:tensorflow:Step 6300 | Loss: 0.1785 | Spent: 49.2 secs | LR: 0.000024
INFO:tensorflow:Step 6400 | Loss: 0.2143 | Spent: 48.7 secs | LR: 0.000025
Reading ../data/dev.json
INFO:tensorflow:Evaluation: Testing Accuracy: 0.730
INFO:tensorflow:
              precision    recall  f1-score   support

 Not Matched      0.831     0.765     0.797      2978
     Matched      0.555     0.652     0.600      1338

    accuracy                          0.730      4316
   macro avg      0.693     0.709     0.698      4316
weighted avg      0.745     0.730     0.736      4316

INFO:tensorflow:Best Accuracy: 0.743
Reading ../data/train.json
INFO:tensorflow:Step 6500 | Loss: 0.1361 | Spent: 74.9 secs | LR: 0.000025
INFO:tensorflow:Step 6600 | Loss: 0.6711 | Spent: 49.3 secs | LR: 0.000024
INFO:tensorflow:Step 6700 | Loss: 0.3073 | Spent: 47.8 secs | LR: 0.000023
INFO:tensorflow:Step 6800 | Loss: 0.1332 | Spent: 48.6 secs | LR: 0.000022
INFO:tensorflow:Step 6900 | Loss: 0.3580 | Spent: 47.1 secs | LR: 0.000022
INFO:tensorflow:Step 7000 | Loss: 0.5636 | Spent: 49.5 secs | LR: 0.000021
INFO:tensorflow:Step 7100 | Loss: 0.0456 | Spent: 47.2 secs | LR: 0.000020
INFO:tensorflow:Step 7200 | Loss: 0.3993 | Spent: 49.7 secs | LR: 0.000020
INFO:tensorflow:Step 7300 | Loss: 0.3231 | Spent: 51.0 secs | LR: 0.000019
INFO:tensorflow:Step 7400 | Loss: 0.2878 | Spent: 48.6 secs | LR: 0.000018
INFO:tensorflow:Step 7500 | Loss: 0.1579 | Spent: 48.4 secs | LR: 0.000018
Reading ../data/dev.json
INFO:tensorflow:Evaluation: Testing Accuracy: 0.735
INFO:tensorflow:
              precision    recall  f1-score   support

 Not Matched      0.815     0.798     0.806      2978
     Matched      0.570     0.596     0.583      1338

    accuracy                          0.735      4316
   macro avg      0.692     0.697     0.695      4316
weighted avg      0.739     0.735     0.737      4316

INFO:tensorflow:Best Accuracy: 0.743
Reading ../data/train.json
INFO:tensorflow:Step 7600 | Loss: 0.1677 | Spent: 75.1 secs | LR: 0.000017
INFO:tensorflow:Step 7700 | Loss: 0.4317 | Spent: 49.5 secs | LR: 0.000016
INFO:tensorflow:Step 7800 | Loss: 0.0803 | Spent: 47.0 secs | LR: 0.000015
INFO:tensorflow:Step 7900 | Loss: 0.2213 | Spent: 47.6 secs | LR: 0.000015
INFO:tensorflow:Step 8000 | Loss: 0.0920 | Spent: 49.0 secs | LR: 0.000014
INFO:tensorflow:Step 8100 | Loss: 0.4001 | Spent: 48.9 secs | LR: 0.000013
INFO:tensorflow:Step 8200 | Loss: 0.0159 | Spent: 49.2 secs | LR: 0.000013
INFO:tensorflow:Step 8300 | Loss: 0.0432 | Spent: 49.3 secs | LR: 0.000012
INFO:tensorflow:Step 8400 | Loss: 0.1207 | Spent: 47.9 secs | LR: 0.000011
INFO:tensorflow:Step 8500 | Loss: 0.1081 | Spent: 49.2 secs | LR: 0.000011
Reading ../data/dev.json
INFO:tensorflow:Evaluation: Testing Accuracy: 0.742
INFO:tensorflow:
              precision    recall  f1-score   support

 Not Matched      0.803     0.830     0.816      2978
     Matched      0.591     0.548     0.569      1338

    accuracy                          0.742      4316
   macro avg      0.697     0.689     0.692      4316
weighted avg      0.738     0.742     0.740      4316

INFO:tensorflow:Best Accuracy: 0.743
Reading ../data/train.json
INFO:tensorflow:Step 8600 | Loss: 0.0201 | Spent: 75.0 secs | LR: 0.000010
INFO:tensorflow:Step 8700 | Loss: 0.0103 | Spent: 48.7 secs | LR: 0.000010
INFO:tensorflow:Step 8800 | Loss: 0.4103 | Spent: 48.9 secs | LR: 0.000011
INFO:tensorflow:Step 8900 | Loss: 0.0547 | Spent: 50.1 secs | LR: 0.000011
INFO:tensorflow:Step 9000 | Loss: 0.1247 | Spent: 47.5 secs | LR: 0.000011
INFO:tensorflow:Step 9100 | Loss: 0.0078 | Spent: 48.1 secs | LR: 0.000012
INFO:tensorflow:Step 9200 | Loss: 0.4536 | Spent: 48.6 secs | LR: 0.000012
INFO:tensorflow:Step 9300 | Loss: 0.1458 | Spent: 48.6 secs | LR: 0.000013
INFO:tensorflow:Step 9400 | Loss: 0.0346 | Spent: 50.0 secs | LR: 0.000013
INFO:tensorflow:Step 9500 | Loss: 0.0440 | Spent: 49.3 secs | LR: 0.000013
INFO:tensorflow:Step 9600 | Loss: 0.1734 | Spent: 50.0 secs | LR: 0.000014
Reading ../data/dev.json
INFO:tensorflow:Evaluation: Testing Accuracy: 0.739
INFO:tensorflow:
              precision    recall  f1-score   support

 Not Matched      0.810     0.811     0.811      2978
     Matched      0.579     0.578     0.578      1338

    accuracy                          0.739      4316
   macro avg      0.694     0.694     0.694      4316
weighted avg      0.739     0.739     0.739      4316

INFO:tensorflow:Best Accuracy: 0.743
Reading ../data/train.json
INFO:tensorflow:Step 9700 | Loss: 0.0074 | Spent: 72.7 secs | LR: 0.000014
INFO:tensorflow:Step 9800 | Loss: 0.0515 | Spent: 47.9 secs | LR: 0.000014
INFO:tensorflow:Step 9900 | Loss: 0.0182 | Spent: 49.0 secs | LR: 0.000015
INFO:tensorflow:Step 10000 | Loss: 0.0030 | Spent: 49.8 secs | LR: 0.000015
INFO:tensorflow:Step 10100 | Loss: 0.0101 | Spent: 47.7 secs | LR: 0.000015
INFO:tensorflow:Step 10200 | Loss: 0.0052 | Spent: 48.6 secs | LR: 0.000016
INFO:tensorflow:Step 10300 | Loss: 0.0391 | Spent: 47.1 secs | LR: 0.000016
INFO:tensorflow:Step 10400 | Loss: 0.0494 | Spent: 47.3 secs | LR: 0.000016
INFO:tensorflow:Step 10500 | Loss: 0.1090 | Spent: 50.1 secs | LR: 0.000017
INFO:tensorflow:Step 10600 | Loss: 0.6652 | Spent: 49.4 secs | LR: 0.000017
INFO:tensorflow:Step 10700 | Loss: 0.0290 | Spent: 50.0 secs | LR: 0.000017
Reading ../data/dev.json
INFO:tensorflow:Evaluation: Testing Accuracy: 0.733
INFO:tensorflow:
              precision    recall  f1-score   support

 Not Matched      0.795     0.826     0.810      2978
     Matched      0.576     0.527     0.551      1338

    accuracy                          0.733      4316
   macro avg      0.686     0.676     0.680      4316
weighted avg      0.727     0.733     0.730      4316

INFO:tensorflow:Best Accuracy: 0.743
Reading ../data/train.json
INFO:tensorflow:Step 10800 | Loss: 0.0516 | Spent: 76.1 secs | LR: 0.000017
INFO:tensorflow:Step 10900 | Loss: 0.0302 | Spent: 48.2 secs | LR: 0.000017
INFO:tensorflow:Step 11000 | Loss: 0.0387 | Spent: 48.9 secs | LR: 0.000017
INFO:tensorflow:Step 11100 | Loss: 0.0858 | Spent: 49.0 secs | LR: 0.000016
INFO:tensorflow:Step 11200 | Loss: 0.1474 | Spent: 49.3 secs | LR: 0.000016
INFO:tensorflow:Step 11300 | Loss: 0.1094 | Spent: 48.1 secs | LR: 0.000015
INFO:tensorflow:Step 11400 | Loss: 0.2256 | Spent: 48.7 secs | LR: 0.000015
INFO:tensorflow:Step 11500 | Loss: 0.0042 | Spent: 49.0 secs | LR: 0.000015
INFO:tensorflow:Step 11600 | Loss: 0.1513 | Spent: 50.1 secs | LR: 0.000014
INFO:tensorflow:Step 11700 | Loss: 0.0551 | Spent: 47.9 secs | LR: 0.000014
INFO:tensorflow:Step 11800 | Loss: 0.1287 | Spent: 47.5 secs | LR: 0.000014
Reading ../data/dev.json
INFO:tensorflow:Evaluation: Testing Accuracy: 0.737
INFO:tensorflow:
              precision    recall  f1-score   support

 Not Matched      0.804     0.819     0.811      2978
     Matched      0.580     0.557     0.568      1338

    accuracy                          0.737      4316
   macro avg      0.692     0.688     0.690      4316
weighted avg      0.735     0.737     0.736      4316

INFO:tensorflow:Best Accuracy: 0.743
7 times not improve the best result, therefore stop training