#!/usr/bin/env python # coding: utf-8 # # Generating Concordances # # This notebook shows how you can generate a concordance using lists. # First we see what text files we have. # In[1]: ls *.txt # We are going to use the "Hume Enquiry.txt" from the Gutenberg Project. You can use whatever text you want. We print the first 50 characters to check. # In[2]: theText2Use = "Hume Treatise.txt" with open(theText2Use, "r") as fileToRead: fileRead = fileToRead.read() print("This string has", len(fileRead), "characters.") print(fileRead[:50]) # ## Tokenization # # Now we tokenize the text producing a list called "listOfTokens" and check the first words. This eliminate punctuation and lowercases the words. # In[3]: import re listOfTokens = re.findall(r'\b\w[\w-]*\b', fileRead.lower()) print(listOfTokens[:10]) # ## Input # # Now we get the word you want a concordance for an the context wanted. # In[4]: word2find = input("What word do you want collocates for? ").lower() # Ask for the word to search for context = input("How much context do you want? ")# This asks for the context of words on either side to grab # In[5]: type(context) # In[7]: contextInt = int(context) type(contextInt) # In[9]: len(listOfTokens) # ## Main function # # Here is the main function that does the work populating a new list with the lines of concordance. We check the first 5 concordance lines. # In[10]: def makeConc(word2conc,list2FindIn,context2Use,concList): end = len(list2FindIn) for location in range(end): if list2FindIn[location] == word2conc: # Here we check whether we are at the very beginning or end if (location - context2Use) < 0: beginCon = 0 else: beginCon = location - context2Use if (location + context2Use) > end: endCon = end else: endCon = location + context2Use + 1 theContext = (list2FindIn[beginCon:endCon]) concordanceLine = ' '.join(theContext) # print(str(location) + ": " + concordanceLine) concList.append(str(location) + ": " + concordanceLine) theConc = [] makeConc(word2find,listOfTokens,int(context),theConc) theConc[-5:] # ## Output # # Finally, we output to a text file. # In[11]: nameOfResults = word2find.capitalize() + ".Concordance.txt" with open(nameOfResults, "w") as fileToWrite: for line in theConc: fileToWrite.write(line + "\n") print("Done") # Here we check that the file was created. # In[12]: ls *.Concordance.txt # --- # [CC BY-SA](https://creativecommons.org/licenses/by-sa/4.0/) From [The Art of Literary Text Analysis](ArtOfLiteraryTextAnalysis.ipynb) by [Stéfan Sinclair](http://stefansinclair.name) & [Geoffrey Rockwell](http://geoffreyrockwell.com)
Created September 30th, 2016 (Jupyter 4.2.1) # In[ ]: