To show the basics of numba we are going to try to downsample an image.
import numba
import numpy as np
from pylab import imshow
from scipy import misc
from time import time
We are going to use the misc.face image. It is included in scipy.misc anyway.
%matplotlib inline
img = misc.face()
imshow(img)
img.shape
(768, 1024, 3)
Now, let's make the downsampling function.
def downsample(src, prop):
p0, p1 = prop
yshape = (src.shape[0] // p0, src.shape[1] // p1, src.shape[2])
y = np.empty(yshape, src.dtype)
downsample_submatrix(src, p0, p1, y)
return y
def downsample_submatrix(src, p0, p1, y):
for z in range(y.shape[2]): # RGB
for i in range(y.shape[0]):
for j in range(y.shape[1]):
y[i, j, z] = np.mean(src[i*p0:(i+1)*p0, j*p1: (j+1)*p1, z])
t1 = time()
down_img = downsample(img, (2,2))
t2 = time()
elapsed1 = t2-t1
elapsed1
21.73535704612732
imshow(down_img)
down_img.shape
(384, 512, 3)
As you can see this operation even for a small image takes a lot of time, let's see if we can improve it using numba.
@numba.njit
def downsample_submatrix(src, p0, p1, y):
for z in range(y.shape[2]): # RGB
for i in range(y.shape[0]):
for j in range(y.shape[1]):
y[i, j, z] = np.mean(src[i*p0:(i+1)*p0, j*p1: (j+1)*p1, z])
down_img = downsample(img, (2,2))
--------------------------------------------------------------------------- TypingError Traceback (most recent call last) <ipython-input-25-4ce2f8bdc591> in <module>() ----> 1 down_img = downsample(img, (2,2)) <ipython-input-21-9c04c653c994> in downsample(src, prop) 4 y = np.empty(yshape, src.dtype) 5 ----> 6 downsample_submatrix(src, p0, p1, y) 7 8 return y /home/gef/miniconda/envs/condaGIT/lib/python2.7/site-packages/numba/dispatcher.pyc in _compile_and_call(self, *args, **kws) 124 assert not kws 125 sig = tuple([typeof_pyval(a) for a in args]) --> 126 self.jit(sig) 127 return self(*args, **kws) 128 /home/gef/miniconda/envs/condaGIT/lib/python2.7/site-packages/numba/dispatcher.pyc in jit(self, sig, **kws) 119 """Alias of compile(sig, **kws) 120 """ --> 121 return self.compile(sig, **kws) 122 123 def _compile_and_call(self, *args, **kws): /home/gef/miniconda/envs/condaGIT/lib/python2.7/site-packages/numba/dispatcher.pyc in compile(self, sig, locals, **targetoptions) 107 cres = compiler.compile_extra(typingctx, targetctx, self.py_func, 108 args=args, return_type=return_type, --> 109 flags=flags, locals=locs) 110 111 # Check typing error if object mode is used /home/gef/miniconda/envs/condaGIT/lib/python2.7/site-packages/numba/compiler.pyc in compile_extra(typingctx, targetctx, func, args, return_type, flags, locals) 77 args, 78 return_type, ---> 79 locals) 80 except Exception as e: 81 if not flags.enable_pyobject: /home/gef/miniconda/envs/condaGIT/lib/python2.7/site-packages/numba/compiler.pyc in type_inference_stage(typingctx, interp, args, return_type, locals) 157 158 infer.build_constrain() --> 159 infer.propagate() 160 typemap, restype, calltypes = infer.unify() 161 /home/gef/miniconda/envs/condaGIT/lib/python2.7/site-packages/numba/typeinfer.pyc in propagate(self) 284 print("propagate".center(80, '-')) 285 oldtoken = newtoken --> 286 self.constrains.propagate(self.context, self.typevars) 287 newtoken = self.get_state_token() 288 if config.DEBUG: /home/gef/miniconda/envs/condaGIT/lib/python2.7/site-packages/numba/typeinfer.pyc in propagate(self, context, typevars) 103 raise 104 except Exception as e: --> 105 raise TypingError("Internal error:\n%s" % e, constrain.loc) 106 107 TypingError: Internal error: (Module(<module 'numpy' from '/home/gef/miniconda/envs/condaGIT/lib/python2.7/site-packages/numpy/__init__.pyc'>), 'mean') File "<ipython-input-24-4be62d0cd81a>", line 7
This error is on purpose, numba can't be used there because the call to np.mean() creates a new object and numba doesn't know how to deal with it. Fortunatelly we can create or own mean function and speed it up with numba.
@numba.njit
def mymean(x):
xs, ys = x.shape
factor = 1. * xs * ys
s = 0.
for i in xrange(xs):
for j in xrange(ys):
s += x[i, j] / factor
return s
def downsample_submatrix(src, p0, p1, y):
for z in range(y.shape[2]): # RGB
for i in range(y.shape[0]):
for j in range(y.shape[1]):
y[i, j, z] = mymean(src[i*p0:(i+1)*p0, j*p1: (j+1)*p1, z])
t1 = time()
down_img = downsample(img, (2,2))
t2 = time()
elapsed2 = t2-t1
elapsed2
2.728079080581665
The code is now way faster than before, let's see how much.
elapsed1/elapsed2
7.967275289355994
That is a good speedup, but we can do better. Notice how the loops of downsample_submatrix are not being optimized, lets put downsample_submatrix and mymean all together.
@numba.njit
def downsample_submatrix(src, p0, p1, y):
factor = 1/(1. * p0 * p1)
for z in range(y.shape[2]): # RGB
for i in range(y.shape[0]):
for j in range(y.shape[1]):
s = 0.
for k in range(p0):
for l in range(p1):
s += src[(p0*i)+k,(p1*j)+l,z] * factor
y[i, j, z] = s
t1 = time()
down_img = downsample(img, (2,2))
t2 = time()
elapsed3 = t2-t1
elapsed3
0.299915075302124
Wow!, that is even better than before, let's se how much.
print str(elapsed1/elapsed3) + ' times better than pure python'
print str(elapsed2/elapsed3) + ' times better than just one function optimized'
72.4717056128 times better than pure python 9.09617190077 times better than just one function optimized
So, a few considerations:
Always use the @numba.njit decorator, this will help you find when the code is not being fully optimized.
Do not create objects inside a function that you are going to optimize with numba.
If you are debugging and weird errors come up, try to run the code without the numba decorator.