In [1]:
print("Hello World!")
print("Hello Again")
print("I like typing this.")
print("This is fun.")
print('Yay! Printing.')
print("I'd much rather you 'not'.")
print('I "said" do not touch this.')
Hello World!
Hello Again
I like typing this.
This is fun.
Yay! Printing.
I'd much rather you 'not'.
I "said" do not touch this.
In [5]:
import requests
import re
from gensim.parsing.preprocessing import remove_stopwords
from gensim.corpora.dictionary import Dictionary
from gensim.models.ldamodel import LdaModel
import pyLDAvis
import pyLDAvis.gensim

pride_and_prejudice = requests.get("http://www.gutenberg.org/cache/epub/42671/pg42671.txt").text
paragraphs = [re.split(r"\W+",remove_stopwords(paragraph)) for paragraph in re.split(r"\r\n\r\n",re.sub(r"CHAPTER [XIV]+\.","",pride_and_prejudice[pride_and_prejudice.index("CHAPTER I."):pride_and_prejudice.index("***END OF THE PROJECT GUTENBERG EBOOK PRIDE AND PREJUDICE***")]))]
d = Dictionary(paragraphs)
c = [d.doc2bow(paragraph) for paragraph in paragraphs]
m = LdaModel(c,num_topics = 15, alpha='auto')
pyLDAvis.display(pyLDAvis.gensim.prepare(m,c,d))
Out[5]:
In [ ]: