FN = 'tf-projector'
Using TensorFlow's TenosrBoard to project numpy embedding matrix to 3D/2D
import os
LOG_DIR = os.path.join('data', FN)
!mkdir -p {LOG_DIR}
import numpy as np
my_embedding = np.random.random((10000,100)).astype(np.float32)
vocabulary_size, embedding_size = my_embedding.shape
vocabulary_size, embedding_size, my_embedding.dtype
(10000, 100, dtype('float32'))
# You dont need to normalize because tensorboard will do it for you
# embedding /= np.sqrt((embedding*embedding).sum(axis=-1,keepdims=1))
import tensorflow as tf
graph = tf.Graph()
with graph.as_default():
embedding_var = tf.Variable(
tf.constant(my_embedding))
init = tf.global_variables_initializer()
with tf.Session(graph=graph) as session:
init.run()
saver = tf.train.Saver()
saver.save(session, LOG_DIR+"/model.ckpt", 0)
from tensorflow.contrib.tensorboard.plugins import projector
# Use the same LOG_DIR where you stored your checkpoint.
summary_writer = tf.summary.FileWriter(LOG_DIR)
# Format: tensorflow/contrib/tensorboard/plugins/projector/projector_config.proto
config = projector.ProjectorConfig()
# You can add multiple embeddings. Here we add only one.
embedding = config.embeddings.add()
embedding.tensor_name = embedding_var.name
# Link this tensor to its metadata file (e.g. labels).
embedding.metadata_path = os.path.join(LOG_DIR, 'metadata.tsv')
# Saves a configuration file that TensorBoard will read during startup.
projector.visualize_embeddings(summary_writer, config)
documentation say you dont need a header line for a single column file, but code say otherwise
from __future__ import print_function
with open(embedding.metadata_path,'w') as fp:
print('Name', file=fp)
for i in xrange(vocabulary_size):
print('label%d'%i, file=fp)
!head {embedding.metadata_path}
Name label0 label1 label2 label3 label4 label5 label6 label7 label8
!wc -l {embedding.metadata_path}
10001 data/tf-projector/metadata.tsv
!ls data/{FN}
checkpoint model.ckpt-0.index metadata.tsv model.ckpt-0.meta model.ckpt-0.data-00000-of-00001 projector_config.pbtxt
!tensorboard --logdir=data/{FN}
click on this link and click on EMBEDDING
on the top right