import json
import pyLDAvis as vis
The load_R_model
function loads LDA model data that ships with the LDAvisData package. It was extracted from the R data files into JSON using this script.
def load_R_model(filename):
with open(filename, 'r') as j:
data_input = json.load(j)
data = {'topic_term_dists': data_input['phi'],
'doc_topic_dists': data_input['theta'],
'doc_lengths': data_input['doc.length'],
'vocab': data_input['vocab'],
'term_frequency': data_input['term.frequency']}
return data
def vis_R_model(filename):
return vis.display(vis.prepare(**load_R_model(filename)))
This model was trained on a corpus of 2000 movie reviews parsed by Pang and Lee (ACL, 2004), originally gathered from the IMDB archive of the rec.arts.movies.reviews newsgroup.
movies_model_data = load_R_model('data/movie_reviews_input.json')
movies_pd = vis.prepare(**movies_model_data)
vis.display(movies_pd)
This model was trained on a corpus of 2246 documents from the Associated Press made available by Blei.
vis_R_model('data/ap_input.json')
This model was trained on a corpus of over 200,000 Jeopardy questions.
vis_R_model('data/jeopardy_input.json')