#!/usr/bin/env python
# coding: utf-8

# # Word Similarity Notes

# ## Stemming with nltk

# In[1]:


import nltk


# In[2]:


from nltk.stem.porter import *
stemmer = PorterStemmer()


# In[4]:


stemmer.stem("argumentation")


# In[5]:


stemmer.stem("creation")


# In[6]:


stemmer.stem("fly")


# In[7]:


stemmer.stem("flies")


# In[8]:


stemmer.stem("laziness")


# In[9]:


stemmer.stem("lazy")


# In[10]:


stemmer.stem("traditional")


# In[11]:


stemmer.stem("create")


# In[12]:


stemmer.stem("creation")


# In[13]:


stemmer.stem("creating")


# In[14]:


stemmer.stem("decisiveness")


# In[15]:


stemmer.stem("public")


# In[16]:


stemmer.stem("publicize")


# In[17]:


stemmer.stem("xyzing")


# ## Lemmatizing with WordNet

# In[18]:


from nltk.corpus import wordnet as wn


# In[222]:


wn.synsets("dog")


# In[212]:


d = wn.synset("dog.n.01")


# In[213]:


d.definition()


# In[214]:


d.lemmas()


# In[243]:


d = wn.synset('able.a.01')
d1 = d.lemmas()[0]


# In[244]:


d1.antonyms()


# In[256]:


dog = wn.synset("dog.n.01")
cool = wn.synset("cool.n.01")


# In[257]:


dog.path_similarity(cool)


# In[258]:


dog.hypernyms()


# In[259]:


list(dog.closure(lambda s : s.hypernyms()))


# In[260]:


list(cool.closure(lambda s : s.hypernyms()))


# In[332]:


from nltk.stem import WordNetLemmatizer
 
wnl = WordNetLemmatizer()
print(wnl.lemmatize('running', wn.VERB))
print(wnl.lemmatize('better', wn.ADJ))
print(wnl.lemmatize('oxen', wn.NOUN))
print(wnl.lemmatize('geese', wn.NOUN))


# ## Word Vectors with Gensim and Word2Vec

# In[261]:


import gensim


# In[262]:


import logging


# In[263]:


logging.basicConfig(format="%(asctime)s: %(levelname)s : %(message)s", level=logging.INFO)


# In[264]:


from gensim.models import word2vec


# In[153]:


sentences = word2vec.PathLineSentences("WOTclean")


# In[154]:


model = word2vec.Word2Vec(sentences, size=200)


# In[333]:


model.wv.most_similar("sleep")


# In[306]:


model.wv.most_similar("sword")


# In[307]:


model.wv.most_similar(positive=["king", "woman"], negative=["man"])


# In[ ]: