infile = open('data/austen-emma-excerpt.txt') print(infile) print(infile.read()) infile = open('data/austen-emma-excerpt.txt') text = infile.read() infile.close() number_of_es = 0 # insert your code here # The following test should print True if your code is correct print(number_of_es == 78) number_of_es = text.count("e") print(number_of_es) print(text.count("an")) print(text.count(" an ")) print(text.split()) words = text.split() number_of_hits = 0 item_to_count = "in" # insert your code here # The following test should print True if your code is correct print(number_of_hits == 3) words = text.split() number_of_hits = 0 item_to_count = "in" for word in words: if word == item_to_count: number_of_hits += 1 print(number_of_hits) def count_in_list(item_to_count, list_to_search): number_of_hits = 0 for item in list_to_search: if item == item_to_count: number_of_hits += 1 return number_of_hits print(count_in_list("an", words)) # insert your code here counts = {} for word in words: if word in counts: counts[word] = counts[word] + 1 else: counts[word] = 1 print(counts) def counter(list_to_search): counts = {} for word in list_to_search: if word in counts: counts[word] = counts[word] + 1 else: counts[word] = 1 return counts print(counter(words)) emma_count = 0 # insert you code here # The following test should print True if your code is correct print(emma_count == 481) infile = open('data/austen-emma-excerpt.txt') text = infile.read() infile.close() words = text.split() for word in words: print(word, count_in_list(word, words)) x = ['a', 'a', 'b', 'b', 'c', 'c', 'c'] unique_x = set(x) print(unique_x) unique_words = set(words) for word in unique_words: print(word, count_in_list(word, words)) def counter2(list_to_search): unique_words = set(list_to_search) for word in unique_words: print(word, count_in_list(word, list_to_search)) counter2(words) x = 'Emma' x_lower = x.lower() print(x_lower) text_lower = text.lower() print(text_lower) x = 'Please. remove. all. dots. from. this. sentence.' x = x.replace(".", "") print(x) short_text = "Commas, as it turns out, are so much overestimated." # insert your code here # The following test should print True if your code is correct print(short_text == "commas as it turns out are so much overestimated.") def remove_punc(text): punctuation = '!@#$%^&*()_-+={}[]:;"\'|<>,.?/~`' for marker in punctuation: text = text.replace(marker, "") return text short_text = "Commas, as it turns out, are overestimated. Dots, however, even more so!" print(remove_punc(short_text)) def remove_punc2(text): punctuation = '!@#$%^&*()_-+={}[]:;"\'|<>,.?/~`' clean_text = "" for character in text: if character not in punctuation: clean_text += character return clean_text short_text = "Commas, as it turns out, are overestimated. Dots, however, even more so!" print(remove_punc2(short_text)) def clean_text(text): # insert your code here # The following test should print True if your code is correct short_text = "Commas, as it turns out, are overestimated. Dots, however, even more so!" print(clean_text(short_text) == "commas as it turns out are overestimated dots however even more so") woodhouse_counts = 0 # insert your code here # The following test should print True if your code is correct print(woodhouse_counts == 263) outfile = open("first-output.txt", mode="w") outfile.write("My first output.") outfile.close() # first open and read data/austen-emma.txt. Don't forget to close the infile infile = open("data/austen-emma.txt") text = # read the contents of the infile # close the file handler # clean the text # next compute the frequency distribution using the function counter frequency_distribution = # now open the file data/austen-frequency-distribution.txt for writing outfile = for word, frequency in frequency_distribution.items(): outfile.write(word + ";" + str(frequency) + '\n') # close the outfile from IPython.core.display import HTML def css_styling(): styles = open("styles/custom.css", "r").read() return HTML(styles) css_styling()