#!/usr/bin/env python # coding: utf-8 # # Relearing Python # In[2]: wordfile = open("english2.txt") words = [w.strip() for w in wordfile.readlines()] wordfile.close() # In[6]: words[:20] # I want to know how many words are in my word list? # # * testing bullets # * more bullets # In[5]: len(words) # In[7]: count = 0 for w in words: if w.endswith("ing"): count += 1 # In[8]: count # In[9]: def abecedarian(w): for i in range(len(w) - 1): if w[i] > w[i + 1]: return False return True # In[10]: abecedarian("cat") # In[12]: abecedarian("dog") # In[18]: abc = [w for w in words if abecedarian(w)] # In[19]: best = "" for w in abc: if len(w) > len(best): best = w # In[20]: best # # Letter Frequency Counts # In[21]: d = {} for w in words: for c in w: d[c] = d.get(c, 0) + 1 # In[22]: d # In[41]: values = list(d.values()) # In[24]: values.sort(reverse = True) # In[34]: revd = {} for k in d: revd[d[k]] = k # In[35]: s = "" for v in values: s += revd[v] # In[36]: s # In[29]: get_ipython().run_line_magic('matplotlib', 'inline') import matplotlib.pyplot as plt import string # In[38]: plt.bar(range(len(d.values())), values) plt.xticks(range(len(d.values())), list(string.ascii_lowercase)) # In[58]: v2 = [] for c in string.ascii_lowercase: v2.append(d[c] / sum(d.values())) # In[59]: plt.bar(range(len(d.values())), v2) plt.xticks(range(len(d.values())), list(string.ascii_lowercase)) # In[44]: eyeopen = open("WheelOfTime/EyeOfTheWorld.txt") eye = eyeopen.read().lower() eyeopen.close() # In[45]: eye[:100] # In[46]: import re eye = re.sub(r'[^\w\s]', '', eye) # In[48]: eye[:100] # In[49]: d2 = {} for c in eye: d2[c] = d2.get(c, 0) + 1 # In[60]: v3 = [] for c in string.ascii_lowercase: v3.append(d2[c] / sum(d2.values())) # In[63]: plt.bar(range(len(d.values())), v2, alpha=0.5, label="Word List") plt.bar(range(len(d.values())), v3, alpha=0.3, label="Novel") plt.xticks(range(len(d.values())), list(string.ascii_lowercase)) plt.title("Frequency of Letters") plt.ylabel("percent") plt.legend() # In[ ]: