#!/usr/bin/env python
# coding: utf-8

# # Relearing Python

# In[2]:


wordfile = open("english2.txt")
words = [w.strip() for w in wordfile.readlines()]
wordfile.close()


# In[6]:


words[:20]


# I want to know how many words are in my word list?
# 
# * testing bullets
# * more bullets

# In[5]:


len(words)


# In[7]:


count = 0
for w in words:
    if w.endswith("ing"):
        count += 1


# In[8]:


count


# In[9]:


def abecedarian(w):
    for i in range(len(w) - 1):
        if w[i] > w[i + 1]:
            return False
    return True


# In[10]:


abecedarian("cat")


# In[12]:


abecedarian("dog")


# In[18]:


abc = [w for w in words if abecedarian(w)]


# In[19]:


best = ""
for w in abc:
    if len(w) > len(best):
        best = w


# In[20]:


best


# # Letter Frequency Counts

# In[21]:


d = {}
for w in words:
    for c in w:
        d[c] = d.get(c, 0) + 1


# In[22]:


d


# In[41]:


values = list(d.values())


# In[24]:


values.sort(reverse = True)


# In[34]:


revd = {}
for k in d:
    revd[d[k]] = k


# In[35]:


s = ""
for v in values:
    s += revd[v]


# In[36]:


s


# In[29]:


get_ipython().run_line_magic('matplotlib', 'inline')
import matplotlib.pyplot as plt
import string


# In[38]:


plt.bar(range(len(d.values())), values)
plt.xticks(range(len(d.values())), list(string.ascii_lowercase))


# In[58]:


v2 = []
for c in string.ascii_lowercase:
    v2.append(d[c] / sum(d.values()))


# In[59]:


plt.bar(range(len(d.values())), v2)
plt.xticks(range(len(d.values())), list(string.ascii_lowercase))


# In[44]:


eyeopen = open("WheelOfTime/EyeOfTheWorld.txt")
eye = eyeopen.read().lower()
eyeopen.close()


# In[45]:


eye[:100]


# In[46]:


import re
eye = re.sub(r'[^\w\s]', '', eye)


# In[48]:


eye[:100]


# In[49]:


d2 = {}
for c in eye:
    d2[c] = d2.get(c, 0) + 1


# In[60]:


v3 = []
for c in string.ascii_lowercase:
    v3.append(d2[c] / sum(d2.values()))


# In[63]:


plt.bar(range(len(d.values())), v2, alpha=0.5, label="Word List")
plt.bar(range(len(d.values())), v3, alpha=0.3, label="Novel")
plt.xticks(range(len(d.values())), list(string.ascii_lowercase))
plt.title("Frequency of Letters")
plt.ylabel("percent")
plt.legend()


# In[ ]: