PyData.Tokyo Meetup #4 - Performance Tips

In [1]:
%load_ext watermark
%watermark -a 'Hideki Tanaka (@atelierhide)' -v -p numpy,numba,cython -d
Hideki Tanaka (@atelierhide) 19/04/2015 

CPython 2.7.9
IPython 3.1.0

numpy 1.9.2
numba 0.18.2
cython 0.22

Multiprocessing

In [2]:
from multiprocessing import Pool
import math

pool = Pool(processes=4)
%timeit pool.map(math.sqrt, xrange(10000))

pool.close()
pool.join()
100 loops, best of 3: 2.83 ms per loop

Numpy, Numba, Cython

In [3]:
import numpy as np
X = np.random.random((1000, 3))
In [4]:
def pairwise_python(X):
    M = X.shape[0]
    N = X.shape[1]
    D = np.empty((M, M), dtype=np.float)
    for i in range(M):
        for j in range(M):
            d = 0.0
            for k in range(N):
                tmp = X[i, k] - X[j, k]
                d += tmp * tmp
            D[i, j] = np.sqrt(d)
    return D
In [5]:
# Numpy
def pairwise_numpy(X):
    return np.sqrt(((X[:, None, :] - X) ** 2).sum(-1))
In [6]:
# Numba
from numba.decorators import autojit
pairwise_numba = autojit(pairwise_python)
In [7]:
# Cython
%load_ext Cython
In [8]:
%%cython

import numpy as np
cimport cython
from libc.math cimport sqrt

@cython.boundscheck(False)
@cython.wraparound(False)
def pairwise_cython(double[:, ::1] X):
    cdef int M = X.shape[0]
    cdef int N = X.shape[1]
    cdef double tmp, d
    cdef double[:, ::1] D = np.empty((M, M), dtype=np.float64)
    for i in range(M):
        for j in range(M):
            d = 0.0
            for k in range(N):
                tmp = X[i, k] - X[j, k]
                d += tmp * tmp
            D[i, j] = sqrt(d)
    return np.asarray(D)
In [10]:
%timeit pairwise_python(X)
%timeit pairwise_numpy(X)
%timeit pairwise_numba(X)
%timeit pairwise_cython(X)
1 loops, best of 3: 5.78 s per loop
10 loops, best of 3: 65 ms per loop
100 loops, best of 3: 10.6 ms per loop
100 loops, best of 3: 9.75 ms per loop

List Comprehension, Map, Numpy

In [11]:
import math
import numpy as np
In [12]:
def list_append(x):
    results = []
    for i in xrange(x):
        results.append(math.sqrt(i))
    return results

def list_append2(x):
    results = []
    for i in xrange(x):
        results.append(math.sqrt(i))
    return results

def list_comp(x):
    results = [math.sqrt(i) for i in xrange(x)]
    return results

def list_map(x):
    results = map(math.sqrt, xrange(x))
    return results

def list_numpy(x):
    results = list(np.sqrt(np.arange(x)))
    return results
In [13]:
x = 10000
%timeit list_append(x)
%timeit list_append2(x)
%timeit list_comp(x)
%timeit list_map(x)
%timeit list_numpy(x)
100 loops, best of 3: 3.09 ms per loop
100 loops, best of 3: 3.09 ms per loop
1000 loops, best of 3: 1.86 ms per loop
1000 loops, best of 3: 1.17 ms per loop
1000 loops, best of 3: 736 ┬Ás per loop