#!/usr/bin/env python # coding: utf-8 # Ref: [Autoencoding Variational Inference for Topic Models](https://openreview.net/pdf?id=BybtVK9lg). In _ICLR_. 2017. # In[1]: from keras import backend as K from keras.layers import Input, Dense, Lambda, Activation, Dropout, BatchNormalization, Layer from keras.models import Model from keras.optimizers import Adam from keras.datasets import reuters from keras.callbacks import EarlyStopping import numpy as np # In[2]: V = 10922 (x_train, _), (_, _) = reuters.load_data(start_char=None, oov_char=None, index_from=-1, num_words=V) # remove words having freq(q) <= 5 word_index = reuters.get_word_index() index2word = {v-1: k for k, v in word_index.items()} # zero-origin word index x_train = np.array([np.bincount(doc, minlength=V) for doc in x_train]) x_train = x_train[:8000, :] # In[3]: num_hidden = 100 num_topic = 20 batch_size = 100 alpha = 1./20 # In[4]: mu1 = np.log(alpha) - 1/num_topic*num_topic*np.log(alpha) sigma1 = 1./alpha*(1-2./num_topic) + 1/(num_topic**2)*num_topic/alpha inv_sigma1 = 1./sigma1 log_det_sigma = num_topic*np.log(sigma1) # In[5]: x = Input(batch_shape=(batch_size, V)) h = Dense(num_hidden, activation='softplus')(x) h = Dense(num_hidden, activation='softplus')(h) z_mean = BatchNormalization()(Dense(num_topic)(h)) z_log_var = BatchNormalization()(Dense(num_topic)(h)) def sampling(args): z_mean, z_log_var = args epsilon = K.random_normal(shape=(batch_size, num_topic), mean=0., stddev=1.) return z_mean + K.exp(z_log_var / 2) * epsilon unnormalized_z = Lambda(sampling, output_shape=(num_topic,))([z_mean, z_log_var]) theta = Activation('softmax')(unnormalized_z) theta = Dropout(0.5)(theta) doc = Dense(units=V)(theta) doc = BatchNormalization()(doc) doc = Activation('softmax')(doc) # In[6]: # Custom loss layer class CustomVariationalLayer(Layer): def __init__(self, **kwargs): self.is_placeholder = True super(CustomVariationalLayer, self).__init__(**kwargs) def vae_loss(self, x, inference_x): decoder_loss = K.sum(x * K.log(inference_x), axis=-1) encoder_loss = -0.5*(K.sum(inv_sigma1*K.exp(z_log_var) + K.square(z_mean)*inv_sigma1 - 1 - z_log_var, axis=-1) + log_det_sigma) return -K.mean(encoder_loss + decoder_loss) def call(self, inputs): x = inputs[0] inference_x = inputs[1] loss = self.vae_loss(x, inference_x) self.add_loss(loss, inputs=inputs) # We won't actually use the output. return x # In[7]: y = CustomVariationalLayer()([x, doc]) prodLDA = Model(x, y) prodLDA.compile(optimizer=Adam(lr=0.001, beta_1=0.99), loss=None) # In[8]: prodLDA.fit(x_train, verbose=1, batch_size=batch_size, validation_split=0.1, callbacks=[EarlyStopping(patience=3)], epochs=20) # In[9]: exp_beta = np.exp(prodLDA.get_weights()[-6]).T phi = (exp_beta/np.sum(exp_beta, axis=0)).T # In[10]: for k, phi_k in enumerate(phi): print('topic: {}'.format(k)) for w in np.argsort(phi_k)[::-1][:10]: print(index2word[w], phi_k[w]) print() # In[ ]: