#!/usr/bin/env python # coding: utf-8 # ## Counting Words # In our class today, we learned about "Availability Bias" where humans think that things that are easy to recall are also more common. For example, are there more English words starting with "e" than words with "e" in the third position? A lot of people would think there are more words starting with the letter e because it's easier to think of such words than word with e at the third position. To check this, we write some python scripts to count such words. # In[1]: #Count the words starting with e words = open('/usr/share/dict/words') count = 0 for word in words: if word.startswith("e") or word.startswith("E"): #print(word) count = count + 1 print(count) # In[2]: #Count the words with e in the third position words = open('/usr/share/dict/words') count = 0 for word in words: if len(word) < 3: continue if word[2] == "e" or word[2] == "E": #print(word) count = count + 1 print(count) # In[3]: #We define a function to count words with 'letter' in 'position' def count_words(letter, position, wordlist='/usr/share/dict/words'): """ Look through the words in 'wordlist', count the words with 'letter' in 'position'. If 'wordlist' is omitted, it's assumed to be at '/usr/share/dict/words' which is a word list on macOS. For example count_words("a", 1, "c:\dict.txt") counts the number of words in the file 'c:\dict.txt' starting with A or a; count_words("b", 3) counts the number of words whose 3rd letter is B or b. """ words = open(wordlist) index = position - 1 upcase = letter.upper() locase = letter.lower() count = 0 for word in words: if len(word) < position: continue if word[index] == upcase or word[index] == locase: count += 1 return(count) # We try count_words() for e at positions 1, 2, 3 to check with previous results. # In[4]: count_words("e", 1) # In[5]: count_words("e", 2) # In[6]: count_words("e", 3) # Here, we count words with the letter e at position 1, 2, 3, ..., 10 # In[7]: for i in range(1,11): print(i, count_words("e", i)) # Here, we count words with the letter k at position 1, 2, 3, ..., 10 # In[8]: for i in range(1,11): print(i, count_words("k", i)) # In[ ]: