#!/usr/bin/env python # coding: utf-8 # In[1]: FN = 'tf-projector' # Using TensorFlow's TenosrBoard to project numpy embedding matrix to 3D/2D # https://www.tensorflow.org/versions/master/how_tos/embedding_viz/ # In[2]: import os LOG_DIR = os.path.join('data', FN) # In[3]: get_ipython().system('mkdir -p {LOG_DIR}') # In[4]: import numpy as np my_embedding = np.random.random((10000,100)).astype(np.float32) vocabulary_size, embedding_size = my_embedding.shape vocabulary_size, embedding_size, my_embedding.dtype # In[5]: # You dont need to normalize because tensorboard will do it for you # embedding /= np.sqrt((embedding*embedding).sum(axis=-1,keepdims=1)) # In[6]: import tensorflow as tf graph = tf.Graph() with graph.as_default(): embedding_var = tf.Variable( tf.constant(my_embedding)) init = tf.global_variables_initializer() # In[7]: with tf.Session(graph=graph) as session: init.run() saver = tf.train.Saver() saver.save(session, LOG_DIR+"/model.ckpt", 0) # In[8]: from tensorflow.contrib.tensorboard.plugins import projector # Use the same LOG_DIR where you stored your checkpoint. summary_writer = tf.summary.FileWriter(LOG_DIR) # In[9]: # Format: tensorflow/contrib/tensorboard/plugins/projector/projector_config.proto config = projector.ProjectorConfig() # You can add multiple embeddings. Here we add only one. embedding = config.embeddings.add() embedding.tensor_name = embedding_var.name # Link this tensor to its metadata file (e.g. labels). embedding.metadata_path = os.path.join(LOG_DIR, 'metadata.tsv') # Saves a configuration file that TensorBoard will read during startup. projector.visualize_embeddings(summary_writer, config) # documentation say you dont need a header line for a single column file, but code say otherwise # In[10]: from __future__ import print_function with open(embedding.metadata_path,'w') as fp: print('Name', file=fp) for i in xrange(vocabulary_size): print('label%d'%i, file=fp) # In[11]: get_ipython().system('head {embedding.metadata_path}') # In[12]: get_ipython().system('wc -l {embedding.metadata_path}') # In[13]: get_ipython().system('ls data/{FN}') # In[ ]: get_ipython().system('tensorboard --logdir=data/{FN}') # click on [this link](http://localhost:6006) and click on `EMBEDDING` on the top right