from IPython.core.display import Image Image(url='http://labrosa.ee.columbia.edu/crucialpython/logo.png', width=600) from joblib import Parallel, delayed import numpy as np print [np.power(i, 2) for i in xrange(10)] print Parallel()(delayed(np.power)(i, 2) for i in xrange(10)) # To parallelize, simply set the n_jobs argument! Parallel(n_jobs=8)(delayed(np.power)(i, 2) for i in xrange(10)) def convolve_random(size): ''' Convolve two random arrays of length "size" ''' return np.convolve(np.random.random_sample(size), np.random.random_sample(size)) # Time to run once with length-40000 arrays %timeit convolve_random(40000) # Time to run sequentially for length 40000, 41000, 42000, ... 47000 arrays %timeit [convolve_random(40000 + i*1000) for i in xrange(8)] # In parallel, with 8 jobs %timeit Parallel(n_jobs=8)(delayed(convolve_random)(40000 + i*1000) for i in xrange(8)) # Use the verbose argument to display progress messages. # The frequency of the messages increases with the verbosity level. result = Parallel(n_jobs=8, verbose=50)(delayed(convolve_random)(40000 + i*1000) for i in xrange(16)) # Try convolution sizes [5000, 10000, 15000 ... 50000] sizes = 5000*(1 + np.arange(10)) # Try n_jobs from [1, ..., max_jobs] max_jobs = 8 n_jobs_values = 1 + np.arange(max_jobs) import time # Store the timing for each setting times = np.zeros((n_jobs_values.shape[0], sizes.shape[0])) for n, n_jobs in enumerate(n_jobs_values): for m, size in enumerate(sizes): start = time.time() result = Parallel(n_jobs=n_jobs)(delayed(convolve_random)(size) for i in xrange(max_jobs)) # Compute and store elapsed time times[n, m] = time.time() - start # Save it out so we don't have to run it twice np.savetxt('times.txt', times) import matplotlib.pyplot as plt # Load in our pre-computed times times = np.genfromtxt('times.txt') plt.figure(figsize=(10, 6)) for size_index in xrange(4): plt.subplot(2, 2, size_index + 1) # Plot the times for this size by n_jobs plt.plot(n_jobs_values, times[:, size_index], '.') # Set up axes and labels plt.xticks(n_jobs_values) plt.xlim([.8, max_jobs + .2]) plt.title('Size = {}'.format(sizes[size_index])) plt.xlabel('n_jobs') plt.ylabel('Time (s)') plt.tight_layout() plt.figure(figsize=(10, 6)) # Need as many colors as max_jobs! colors = ['r', 'k', 'Purple', 'IndianRed', 'Chartreuse', 'DarkRed', 'Teal', 'b'] ax = plt.gca().set_color_cycle(colors) # Plot each n_jobs setting as a different line for job_results in times: plt.plot(sizes, job_results) # Set up labels and legend plt.xlabel('Convolution size') plt.ylabel('Time (s)') plt.legend(n_jobs_values, 'upper left', title='n_jobs')