wordfile = open("english2.txt")
words = [w.strip() for w in wordfile.readlines()]
wordfile.close()
words[:20]
['aardvark', 'aardvarks', 'aardwolf', 'aardwolves', 'ab', 'aba', 'abaca', 'abacas', 'abaci', 'aback', 'abacterial', 'abacus', 'abacuses', 'abaft', 'abalone', 'abalones', 'abandon', 'abandoned', 'abandonee', 'abandoner']
I want to know how many words are in my word list?
len(words)
98221
count = 0
for w in words:
if w.endswith("ing"):
count += 1
count
6835
def abecedarian(w):
for i in range(len(w) - 1):
if w[i] > w[i + 1]:
return False
return True
abecedarian("cat")
False
abecedarian("dog")
False
abc = [w for w in words if abecedarian(w)]
best = ""
for w in abc:
if len(w) > len(best):
best = w
best
'billowy'
d = {}
for w in words:
for c in w:
d[c] = d.get(c, 0) + 1
d
{'a': 69209, 'b': 16827, 'c': 37835, 'd': 30050, 'e': 99676, 'f': 10796, 'g': 23163, 'h': 19260, 'i': 82878, 'j': 1260, 'k': 6134, 'l': 51765, 'm': 25443, 'n': 61380, 'o': 57655, 'p': 26144, 'q': 1526, 'r': 64183, 's': 68267, 't': 62494, 'u': 29387, 'v': 9252, 'w': 5938, 'x': 2555, 'y': 17514, 'z': 7321}
values = list(d.values())
values.sort(reverse = True)
revd = {}
for k in d:
revd[d[k]] = k
s = ""
for v in values:
s += revd[v]
s
'ardvkswolfebcitungmhyxjzpq'
%matplotlib inline
import matplotlib.pyplot as plt
import string
plt.bar(range(len(d.values())), values)
plt.xticks(range(len(d.values())), list(string.ascii_lowercase))
([<matplotlib.axis.XTick at 0x10c0b9358>, <matplotlib.axis.XTick at 0x10be48518>, <matplotlib.axis.XTick at 0x10c05de10>, <matplotlib.axis.XTick at 0x10c1f8f60>, <matplotlib.axis.XTick at 0x10c1ff978>, <matplotlib.axis.XTick at 0x10c207390>, <matplotlib.axis.XTick at 0x10c207d68>, <matplotlib.axis.XTick at 0x10c20c780>, <matplotlib.axis.XTick at 0x10c212198>, <matplotlib.axis.XTick at 0x10c212b70>, <matplotlib.axis.XTick at 0x10c217588>, <matplotlib.axis.XTick at 0x10c217f60>, <matplotlib.axis.XTick at 0x10c21e978>, <matplotlib.axis.XTick at 0x10c227390>, <matplotlib.axis.XTick at 0x10c227d68>, <matplotlib.axis.XTick at 0x10c22e780>, <matplotlib.axis.XTick at 0x10c235198>, <matplotlib.axis.XTick at 0x10c235b70>, <matplotlib.axis.XTick at 0x10c239588>, <matplotlib.axis.XTick at 0x10c239f60>, <matplotlib.axis.XTick at 0x10c240978>, <matplotlib.axis.XTick at 0x10c249390>, <matplotlib.axis.XTick at 0x10c249d68>, <matplotlib.axis.XTick at 0x10c24e780>, <matplotlib.axis.XTick at 0x10c255198>, <matplotlib.axis.XTick at 0x10c255b70>], <a list of 26 Text xticklabel objects>)
v2 = []
for c in string.ascii_lowercase:
v2.append(d[c] / sum(d.values()))
plt.bar(range(len(d.values())), v2)
plt.xticks(range(len(d.values())), list(string.ascii_lowercase))
([<matplotlib.axis.XTick at 0x11039de80>, <matplotlib.axis.XTick at 0x1103dcfd0>, <matplotlib.axis.XTick at 0x1101fd588>, <matplotlib.axis.XTick at 0x110531e48>, <matplotlib.axis.XTick at 0x110516860>, <matplotlib.axis.XTick at 0x110540278>, <matplotlib.axis.XTick at 0x110540c50>, <matplotlib.axis.XTick at 0x110546668>, <matplotlib.axis.XTick at 0x11054e080>, <matplotlib.axis.XTick at 0x11054ea58>, <matplotlib.axis.XTick at 0x110555470>, <matplotlib.axis.XTick at 0x110555e48>, <matplotlib.axis.XTick at 0x110558860>, <matplotlib.axis.XTick at 0x110561278>, <matplotlib.axis.XTick at 0x110561c50>, <matplotlib.axis.XTick at 0x110566668>, <matplotlib.axis.XTick at 0x11056f080>, <matplotlib.axis.XTick at 0x11056fa58>, <matplotlib.axis.XTick at 0x110576470>, <matplotlib.axis.XTick at 0x110576e48>, <matplotlib.axis.XTick at 0x11057b860>, <matplotlib.axis.XTick at 0x110582278>, <matplotlib.axis.XTick at 0x110582c50>, <matplotlib.axis.XTick at 0x110587668>, <matplotlib.axis.XTick at 0x110591080>, <matplotlib.axis.XTick at 0x110591a58>], <a list of 26 Text xticklabel objects>)
eyeopen = open("WheelOfTime/EyeOfTheWorld.txt")
eye = eyeopen.read().lower()
eyeopen.close()
eye[:100]
'“the eye of the world is the best of its genre.”\n\n—the ottawa citizen\n\n\n\n“a splendid tale of heroic '
import re
eye = re.sub(r'[^\w\s]', '', eye)
eye[:100]
'the eye of the world is the best of its genre\n\nthe ottawa citizen\n\n\n\na splendid tale of heroic fanta'
d2 = {}
for c in eye:
d2[c] = d2.get(c, 0) + 1
v3 = []
for c in string.ascii_lowercase:
v3.append(d2[c] / sum(d2.values()))
plt.bar(range(len(d.values())), v2, alpha=0.5, label="Word List")
plt.bar(range(len(d.values())), v3, alpha=0.3, label="Novel")
plt.xticks(range(len(d.values())), list(string.ascii_lowercase))
plt.title("Frequency of Letters")
plt.ylabel("percent")
plt.legend()
<matplotlib.legend.Legend at 0x110ca0e10>