#!/usr/bin/env python
# coding: utf-8

# In[1]:


print("Hello World!")
print("Hello Again")
print("I like typing this.")
print("This is fun.")
print('Yay! Printing.')
print("I'd much rather you 'not'.")
print('I "said" do not touch this.')


# In[5]:


import requests
import re
from gensim.parsing.preprocessing import remove_stopwords
from gensim.corpora.dictionary import Dictionary
from gensim.models.ldamodel import LdaModel
import pyLDAvis
import pyLDAvis.gensim

pride_and_prejudice = requests.get("http://www.gutenberg.org/cache/epub/42671/pg42671.txt").text
paragraphs = [re.split(r"\W+",remove_stopwords(paragraph)) for paragraph in re.split(r"\r\n\r\n",re.sub(r"CHAPTER [XIV]+\.","",pride_and_prejudice[pride_and_prejudice.index("CHAPTER I."):pride_and_prejudice.index("*** END OF THE PROJECT GUTENBERG EBOOK PRIDE AND PREJUDICE ***")]))]
d = Dictionary(paragraphs)
c = [d.doc2bow(paragraph) for paragraph in paragraphs]
m = LdaModel(c,num_topics = 15, alpha='auto')
pyLDAvis.display(pyLDAvis.gensim.prepare(m,c,d))


# In[ ]: