In [1]:
FN = 'tf-projector'

Using TensorFlow's TenosrBoard to project numpy embedding matrix to 3D/2D

In [2]:
import os
LOG_DIR = os.path.join('data', FN)
In [3]:
!mkdir -p {LOG_DIR}
In [4]:
import numpy as np
my_embedding = np.random.random((10000,100)).astype(np.float32)
vocabulary_size, embedding_size = my_embedding.shape
vocabulary_size, embedding_size, my_embedding.dtype
Out[4]:
(10000, 100, dtype('float32'))
In [5]:
# You dont need to normalize because tensorboard will do it for you
# embedding /= np.sqrt((embedding*embedding).sum(axis=-1,keepdims=1))
In [6]:
import tensorflow as tf
graph = tf.Graph()
with graph.as_default():
    embedding_var = tf.Variable(
        tf.constant(my_embedding))
    init = tf.global_variables_initializer()
In [7]:
with tf.Session(graph=graph) as session:
    init.run()
    saver = tf.train.Saver()
    saver.save(session, LOG_DIR+"/model.ckpt", 0)
In [8]:
from tensorflow.contrib.tensorboard.plugins import projector
# Use the same LOG_DIR where you stored your checkpoint.
summary_writer = tf.summary.FileWriter(LOG_DIR)
In [9]:
# Format: tensorflow/contrib/tensorboard/plugins/projector/projector_config.proto
config = projector.ProjectorConfig()

# You can add multiple embeddings. Here we add only one.
embedding = config.embeddings.add()
embedding.tensor_name = embedding_var.name
# Link this tensor to its metadata file (e.g. labels).
embedding.metadata_path = os.path.join(LOG_DIR, 'metadata.tsv')

# Saves a configuration file that TensorBoard will read during startup.
projector.visualize_embeddings(summary_writer, config)

documentation say you dont need a header line for a single column file, but code say otherwise

In [10]:
from __future__ import print_function
with open(embedding.metadata_path,'w') as fp:
    print('Name', file=fp)
    for i in xrange(vocabulary_size):
        print('label%d'%i, file=fp)
In [11]:
!head {embedding.metadata_path}
Name
label0
label1
label2
label3
label4
label5
label6
label7
label8
In [12]:
!wc -l {embedding.metadata_path}
   10001 data/tf-projector/metadata.tsv
In [13]:
!ls data/{FN}
checkpoint                       model.ckpt-0.index
metadata.tsv                     model.ckpt-0.meta
model.ckpt-0.data-00000-of-00001 projector_config.pbtxt
In [ ]:
!tensorboard --logdir=data/{FN}

click on this link and click on EMBEDDING on the top right