In [1]:
import concurrent.futures
import gendercounter
import os
import glob

Normal use

In [18]:
%%time
def normalcount():
    womencounter = 0
    mencounter = 0
    for file in os.listdir("/home/anon/Desktop/SOU19222015/"):
        textfile = gendercounter.from_textfile("/home/anon/Desktop/SOU19222015/" + file)
        womencounter += textfile.genderfrequency()["Women"]
        mencounter += textfile.genderfrequency()["Men"]
    print("Women: " + str(womencounter))
    print("Men: " + str(mencounter))
        
normalcount()
Women: 198855
Men: 403188
CPU times: user 14min 21s, sys: 1.4 s, total: 14min 23s
Wall time: 14min 23s

Concurrency

In [16]:
def countfile(file):
    textfile = gendercounter.from_textfile(file) 
    return textfile.genderfrequency()
In [19]:
%%time 
with concurrent.futures.ProcessPoolExecutor() as executor:
    womencounter = 0
    mencounter = 0
    text_files = glob.glob("/home/anon/Desktop/SOU19222015/*.txt")
    #print(len(text_files))
    for text_file in zip(text_files, executor.map(countfile, text_files)):
        womencounter += text_file[1]["Women"]
        mencounter += text_file[1]["Men"]
        counter += 1
    print("Women: " + str(womencounter))
    print("Men: " + str(mencounter))
Women: 198855
Men: 403188
CPU times: user 3.98 s, sys: 364 ms, total: 4.34 s
Wall time: 3min 42s
In [ ]: