# Numba: compilation "Just in Time" (jit)¶

Numba convertit les fonctions Python en code machine optimisé au moment de l'exécution à l'aide de la bibliothèque de compilateur LLVM standard. Les algorithmes numériques compilés par Numba en Python peuvent alors approcher les vitesses de C ou de FORTRAN, la où les boucles classiques, comme en R et matlab, peuvent être un peu lente.

https://numba.pydata.org/

In [6]:
import numpy as np
import time
from numba import jit


# Exemple 1: Méthode de Monte Carlo pour approcher $\pi$¶

In [3]:
@jit(nopython=True)
def monte_carlo_pi(n_samples=1000):
acc = 0
for sample in range(n_samples):
vec = np.random.rand(2)
if np.linalg.norm(vec) < 1.:
acc += 1
return 4.0 * acc / n_samples

In [4]:
# DO NOT REPORT THIS... COMPILATION TIME IS INCLUDED IN THE EXECUTION TIME!
start = time.time()
monte_carlo_pi(n_samples=10000000)
end = time.time()
print("Elapsed (with compilation) = %s" % (end - start))

# NOW THE FUNCTION IS COMPILED, RE-TIME IT EXECUTING FROM CACHE
start = time.time()
monte_carlo_pi(n_samples=1000)
end = time.time()
print("Elapsed (after compilation) = %s" % (end - start))

Elapsed (with compilation) = 4.017842769622803
Elapsed (after compilation) = 0.000545501708984375


# Exemple 2: fonction matricielle (sans utiliser numpy)¶

In [5]:
def go_slow(a):  # Function is compiled and runs in machine code
trace = 0
for i in range(a.shape[0] - 1):
trace += np.tanh(a[i, i + 1])
return trace

all_n_samples = [1000, 5000, 10000]
t0 = []
t1 = []
t2 = []

for n_samples in all_n_samples:
print(n_samples)
x = np.arange(n_samples ** 2).reshape(n_samples, n_samples)

@jit(nopython=True)
def go_fast(a):  # Function is compiled and runs in machine code
trace = 0
for i in range(a.shape[0] - 1):
trace += np.tanh(a[i, i + 1])
return trace
# COMPILATION INCLUSE!
start = time.time()
go_fast(x)
end = time.time()
t0.append(end - start)
print("Elapsed (with compilation) = %s" % (end - start))
# COMPILATION NON INCLUSE, EXECUTER DEPUIS LE CACHE
start = time.time()
go_fast(x)
end = time.time()
t1.append(end - start)
print("Elapsed (after compilation) = %s" % (end - start))
# VANILLA PYTHON
start = time.time()
go_slow(x)
end = time.time()
t2.append(end - start)
print("Elapsed (vanilla) = %s" % (end - start))

t0 = np.array(t0)
t1 = np.array(t1)
t2 = np.array(t2)

print(all_n_samples)
print("Améliorations en pourcentage par rapport au code vanilla")
print((t0 - t2) / t2 * 100)
print((t1 - t2) / t2 * 100)

1000
Elapsed (with compilation) = 0.07340669631958008
Elapsed (after compilation) = 1.2159347534179688e-05
Elapsed (vanilla) = 0.002764463424682617
5000
Elapsed (with compilation) = 0.050783395767211914
Elapsed (after compilation) = 8.559226989746094e-05
Elapsed (vanilla) = 0.011275768280029297
10000
Elapsed (with compilation) = 0.056412458419799805
Elapsed (after compilation) = 0.00040268898010253906
Elapsed (vanilla) = 0.02436542510986328
[1000, 5000, 10000]
Améliorations en pourcentage par rapport au code vanilla
[2555.3686934   350.3763691   131.52667423]
[-99.56015524 -99.24091851 -98.34729344]


# Exemple 3: descente de gradient avec/sans numba.¶

In [ ]:
n_samples = 1000
n_features = 500
n_iterations = 2000

X = np.random.randn(n_samples, n_features)
y = np.random.randn(n_samples)
y[n_samples // 2:] = 0

w = np.zeros(n_features)  # init = 0

In [ ]:
@jit(nopython=True)
# Function is compiled and runs in machine code
def gradient(X, y, w, step_size=0.01,  max_iter=1000):
"""Gradient descent with constant step size."""
for k in range(max_iter):
w -=  step_size * (X.T.dot(X.dot(w) - y))
return w

# DO NOT REPORT THIS... COMPILATION TIME IS INCLUDED IN THE EXECUTION TIME!
start = time.time()
end = time.time()
print("Elapsed (with compilation) = %s" % (end - start))

# NOW THE FUNCTION IS COMPILED, RE-TIME IT EXECUTING FROM CACHE
start = time.time()
end = time.time()
print("Elapsed (after compilation) = %s" % (end - start))


# Exemple 4: Régression logistique¶

In [ ]:
y = np.random.randint(2, size=n_samples) *2 -1
print(y)
w = np.zeros(n_features)  # init = 0

In [ ]:
def logistic_regression_no_jit(y, X, w, iterations=1000):
for i in range(iterations):
w -= np.dot(((1.0 / (1.0 + np.exp(-y * np.dot(X, w))) - 1.0) * y), X)
return w

In [ ]:
start = time.time()
logistic_regression_no_jit(y, X, w, iterations=n_iterations )
end = time.time()
print("Elapsed (with compilation) = %s" % (end - start))

In [ ]:
@jit(nopython=True)
def logistic_regression(y, X, w, iterations=1000):
for i in range(iterations):
w -= np.dot(((1.0 / (1.0 + np.exp(-y * np.dot(X, w))) - 1.0) * y), X)
return w

In [ ]:
# DO NOT REPORT THIS... COMPILATION TIME IS INCLUDED IN THE EXECUTION TIME!
start = time.time()
logistic_regression(y, X, w, iterations=n_iterations)
end = time.time()
print("Elapsed (with compilation) = %s" % (end - start))

# NOW THE FUNCTION IS COMPILED, RE-TIME IT EXECUTING FROM CACHE
start = time.time()
logistic_regression(y, X, w, iterations=n_iterations)
end = time.time()
print("Elapsed (after compilation) = %s" % (end - start))

In [ ]: