#!/usr/bin/env python # coding: utf-8 # In[1]: print("Hello World!") print("Hello Again") print("I like typing this.") print("This is fun.") print('Yay! Printing.') print("I'd much rather you 'not'.") print('I "said" do not touch this.') # In[5]: import requests import re from gensim.parsing.preprocessing import remove_stopwords from gensim.corpora.dictionary import Dictionary from gensim.models.ldamodel import LdaModel import pyLDAvis import pyLDAvis.gensim pride_and_prejudice = requests.get("http://www.gutenberg.org/cache/epub/42671/pg42671.txt").text paragraphs = [re.split(r"\W+",remove_stopwords(paragraph)) for paragraph in re.split(r"\r\n\r\n",re.sub(r"CHAPTER [XIV]+\.","",pride_and_prejudice[pride_and_prejudice.index("CHAPTER I."):pride_and_prejudice.index("*** END OF THE PROJECT GUTENBERG EBOOK PRIDE AND PREJUDICE ***")]))] d = Dictionary(paragraphs) c = [d.doc2bow(paragraph) for paragraph in paragraphs] m = LdaModel(c,num_topics = 15, alpha='auto') pyLDAvis.display(pyLDAvis.gensim.prepare(m,c,d)) # In[ ]: