In [0]:
from google.colab import drive
drive.mount('/content/gdrive')
import os
os.chdir('/content/gdrive/My Drive/finch/tensorflow1/multi_turn_rewrite/chinese/main')
In [0]:
%tensorflow_version 1.x
TensorFlow 1.x selected.
In [0]:
import tensorflow as tf
import numpy as np

print("TensorFlow Version", tf.__version__)
print('GPU Enabled:', tf.test.is_gpu_available())
TensorFlow Version 1.15.2
GPU Enabled: False
In [0]:
def rnn_cell():
  def cell_fn():
    cell = tf.nn.rnn_cell.LSTMCell(params['hidden_units'],
                                   initializer=tf.orthogonal_initializer())
    return cell
  if params['dec_layers'] > 1:
    cells = []
    for i in range(params['dec_layers']):
      if i == params['dec_layers'] - 1:
        cells.append(cell_fn())
      else:
        cells.append(tf.nn.rnn_cell.ResidualWrapper(cell_fn(), residual_fn=lambda i,o: tf.concat((i,o), -1)))
    return tf.nn.rnn_cell.MultiRNNCell(cells)
  else:
    return cell_fn()

  
def dec_cell(enc_out, enc_seq_len):
  query, history = enc_out
  query_len, history_len = enc_seq_len

  attn1 = tf.contrib.seq2seq.BahdanauAttention(
    num_units = params['hidden_units'],
    memory = query,
    memory_sequence_length = query_len)
  
  attn2 = tf.contrib.seq2seq.BahdanauAttention(
    num_units = params['hidden_units'],
    memory = history,
    memory_sequence_length = history_len)
  
  return tf.contrib.seq2seq.AttentionWrapper(
    cell = rnn_cell(),
    attention_mechanism = [attn1, attn2],
    attention_layer_size = [params['hidden_units']//2, params['hidden_units']//2])
    

class TiedDense(tf.layers.Layer):
  def __init__(self, tied_embed, out_dim):
    super().__init__()
    self.tied_embed = tied_embed
    self.out_dim = out_dim
  
  def build(self, input_shape):
    self.bias = self.add_weight(name='bias',
                                shape=[self.out_dim],
                                trainable=True)
    super().build(input_shape)
  
  def call(self, inputs):
    x = tf.matmul(inputs, self.tied_embed, transpose_b=True)
    x = tf.nn.bias_add(x, self.bias)
    return x
  
  def compute_output_shape(self, input_shape):
    return input_shape[:-1].concatenate(self.out_dim)
In [0]:
def bilstm_encode(encoder, x, mask):
  enc_out, state_fw_h, state_fw_c, state_bw_h, state_bw_c = encoder(x, mask=mask)
  enc_state = tf.concat((state_fw_h, state_bw_h), axis=-1)
  return enc_out, enc_state


def greedy_search(embedding, enc_out, enc_state, words_len, batch_sz, params, output_proj):
  cell = dec_cell(enc_out, words_len)
  init_state = cell.zero_state(batch_sz, tf.float32).clone(
    cell_state=enc_state)
  
  helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
    embedding = embedding,
    start_tokens = tf.tile(tf.constant([1], tf.int32), [batch_sz]),
    end_token = 2,)
  decoder = tf.contrib.seq2seq.BasicDecoder(
    cell = cell,
    helper = helper,
    initial_state = init_state,
    output_layer = output_proj)
  decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
    decoder = decoder,
    maximum_iterations = params['max_len'])
  
  return decoder_output.sample_id


def beam_search(embedding, enc_out, enc_state, words_len, batch_sz, params, output_proj):
  enc_out_t = [tf.contrib.seq2seq.tile_batch(e, params['beam_width']) for e in enc_out]
  enc_state_t = tf.contrib.seq2seq.tile_batch(enc_state, params['beam_width'])
  enc_seq_len_t = []
  for l in words_len:
    if l is not None:
      enc_seq_len_t.append(tf.contrib.seq2seq.tile_batch(l, params['beam_width']))
    else:
      enc_seq_len_t.append(l)
  
  cell = dec_cell(enc_out_t, enc_seq_len_t)
  
  init_state = cell.zero_state(batch_sz*params['beam_width'], tf.float32).clone(
    cell_state=enc_state_t)
  
  decoder = tf.contrib.seq2seq.BeamSearchDecoder(
    cell = cell,
    embedding = embedding,
    start_tokens = tf.tile(tf.constant([1], tf.int32), [batch_sz]),
    end_token = 2,
    initial_state = init_state,
    beam_width = params['beam_width'],
    output_layer = output_proj,
    length_penalty_weight = params['length_penalty'],
    coverage_penalty_weight = params['coverage_penalty'],)
  decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
    decoder = decoder,
    maximum_iterations = params['max_len'],)
  
  return decoder_output.predicted_ids[:, :, 0]
In [0]:
def dynamic_memory_update(query, history):
  proj_1 = tf.layers.Dense(params['hidden_units'], params['activation'], name='attn_proj_1')
  proj_2 = tf.layers.Dense(1, use_bias=False, name='attn_proj_2')
  memory_proj = tf.layers.Dense(params['hidden_units'], params['activation'], name='memory_proj')

  memory = query
  for i in range(params['num_hops']):
    episode = gen_episode(memory,
                          query,
                          history,
                          proj_1,
                          proj_2,)
    memory = memory_proj(tf.concat([memory, episode, query], 1))

  lstm_memory = tf.nn.rnn_cell.LSTMStateTuple(c=memory, h=memory)
  if params['dec_layers'] > 1:
    lstm_memory = tuple(params['dec_layers'] * [lstm_memory])
  
  return lstm_memory


def gen_episode(memory, q_vec, fact_vecs, proj_1, proj_2):
  def gen_attn(fact_vec):
    features = [fact_vec * q_vec,
                fact_vec * memory,
                tf.abs(fact_vec - q_vec),
                tf.abs(fact_vec - memory)]
    feature_vec = tf.concat(features, 1)
    attention = proj_1(feature_vec)
    attention = proj_2(attention)
    return tf.squeeze(attention, 1)
  
  attns = tf.map_fn(gen_attn, tf.transpose(fact_vecs, [1,0,2]))
  attns = tf.transpose(attns)                                     
  attns = params['gating_fn'](attns)                              
  attns = tf.expand_dims(attns, -1)                               
  episode = tf.matmul(fact_vecs, attns, transpose_a=True)
  episode = tf.squeeze(episode, -1)

  return episode
In [0]:
def forward(features, labels, mode):
  history = features['history']
  query = features['query']

  is_training = (mode == tf.estimator.ModeKeys.TRAIN)
  batch_sz = tf.shape(query)[0]

  query_valid_len = tf.count_nonzero(query, 1, dtype=tf.int32)
  query_mask = tf.sign(query)

  num_history = tf.shape(history)[1]
  history_len = tf.shape(history)[2]
  history = tf.reshape(history, (num_history*batch_sz, history_len))
  history_mask = tf.sign(history)
  
  
  with tf.variable_scope('Embedding'):
    embedding = tf.Variable(np.load('../vocab/char.npy'),
                            dtype=tf.float32,
                            name='fasttext_vectors')
    def embed_fn(x):
      x = tf.nn.embedding_lookup(embedding, x)
      return x
    query = embed_fn(query)
    history = embed_fn(history)
  
  
  with tf.variable_scope('Encoder'):
    encoder = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(
      params['hidden_units'], return_state=True, return_sequences=True, zero_output_for_mask=True))

    query_out, query_state = bilstm_encode(encoder, query, query_mask)
    history_out, history_state = bilstm_encode(encoder, history, history_mask)

    feat_proj_1 = tf.layers.Dense(params['hidden_units'], params['activation'], name='feat_proj_1')
    feat_proj_2 = tf.layers.Dense(params['hidden_units'], params['activation'], name='feat_proj_2')

    def feat_engine(out, state):
      x = tf.concat([state, tf.reduce_max(out, 1)], -1)
      x = feat_proj_1(x)
      x = feat_proj_2(x)
      return x
    
    query_feat = feat_engine(query_out, query_state)
    history_feat = tf.reshape(feat_engine(history_out, history_state), (batch_sz, num_history, params['hidden_units']))


  with tf.variable_scope('Dynamic_Memory'): 
    dynamic_memory = dynamic_memory_update(query_feat, history_feat)
    query_memory = query_out
    history_memory = tf.reshape(history_out, (batch_sz, num_history*history_len, 2*params['hidden_units']))
    static_memory = [query_memory, history_memory]
    memory_len = [query_valid_len, None]


  with tf.variable_scope('Decoder'):
    output_proj = TiedDense(embedding, len(params['char2idx'])+1)
    return beam_search(embedding, static_memory, dynamic_memory, memory_len, batch_sz, params, output_proj)
In [0]:
def model_fn(features, labels, mode, params):
    logits_or_ids = forward(features, labels, mode)
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode, predictions=logits_or_ids)
In [0]:
def get_vocab(f_path):
  word2idx = {}
  with open(f_path) as f:
    for i, line in enumerate(f):
      line = line.rstrip('\n')
      word2idx[line] = i
  return word2idx
In [0]:
params = {
    'model_dir': '../model/baseline_lstm',
    'export_dir': '../model/baseline_lstm_beam_export',
    'vocab_path': '../vocab/char.txt',
    'max_len': 30,
    'activation': tf.nn.relu,
    'hidden_units': 300,
    'dec_layers': 1,
    'num_hops': 3,
    'gating_fn': tf.sigmoid,
    'beam_width': 10,
    'length_penalty': .0,
    'coverage_penalty': .0,
}
In [0]:
params['char2idx'] = get_vocab(params['vocab_path'])
params['idx2char'] = {idx: char for char, idx in params['char2idx'].items()}
In [0]:
def serving_input_receiver_fn():
    query = tf.placeholder(tf.int32, [None, None], 'query')
    history = tf.placeholder(tf.int32, [None, None, None], 'history')
    
    features = {'query': query, 'history': history}
    receiver_tensors = features
    
    return tf.estimator.export.ServingInputReceiver(features, receiver_tensors)
In [0]:
estimator = tf.estimator.Estimator(model_fn, params['model_dir'])
estimator.export_saved_model(params['export_dir'], serving_input_receiver_fn)
INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '../model/baseline_lstm', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f24fad50cc0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
WARNING:tensorflow:Estimator's model_fn (<function model_fn at 0x7f250137b048>) includes params argument, but params are not passed to Estimator.
INFO:tensorflow:Calling model_fn.
WARNING:tensorflow:Entity <bound method TiedDense.call of <__main__.TiedDense object at 0x7f24faa2ef98>> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: Bad argument number for Name: 3, expecting 4
WARNING: Entity <bound method TiedDense.call of <__main__.TiedDense object at 0x7f24faa2ef98>> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: Bad argument number for Name: 3, expecting 4
WARNING:tensorflow:From /tensorflow-1.15.2/python3.6/tensorflow_core/contrib/seq2seq/python/ops/beam_search_decoder.py:971: to_int64 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use `tf.cast` instead.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Signatures INCLUDED in export for Classify: None
INFO:tensorflow:Signatures INCLUDED in export for Regress: None
INFO:tensorflow:Signatures INCLUDED in export for Predict: ['serving_default']
INFO:tensorflow:Signatures INCLUDED in export for Train: None
INFO:tensorflow:Signatures INCLUDED in export for Eval: None
INFO:tensorflow:Restoring parameters from ../model/baseline_lstm/model.ckpt-35610
INFO:tensorflow:Assets added to graph.
INFO:tensorflow:No assets to write.
INFO:tensorflow:SavedModel written to: ../model/baseline_lstm_beam_export/temp-b'1587959561'/saved_model.pb
Out[0]:
b'../model/baseline_lstm_beam_export/1587959561'