#!/usr/bin/env python # coding: utf-8 # # # Numba: compilation "Just in Time" (jit) # # Numba convertit les fonctions Python en code machine optimisé au moment de l'exécution à l'aide de la bibliothèque de compilateur LLVM standard. # Les algorithmes numériques compilés par Numba en Python peuvent alors approcher les vitesses de C ou de FORTRAN, la où les boucles classiques, comme en R et matlab, peuvent être un peu lente. # # https://numba.pydata.org/ # In[6]: import numpy as np import time from numba import jit # # Exemple 1: Méthode de Monte Carlo pour approcher $\pi$ # In[3]: @jit(nopython=True) def monte_carlo_pi(n_samples=1000): acc = 0 for sample in range(n_samples): vec = np.random.rand(2) if np.linalg.norm(vec) < 1.: acc += 1 return 4.0 * acc / n_samples # In[4]: # DO NOT REPORT THIS... COMPILATION TIME IS INCLUDED IN THE EXECUTION TIME! start = time.time() monte_carlo_pi(n_samples=10000000) end = time.time() print("Elapsed (with compilation) = %s" % (end - start)) # NOW THE FUNCTION IS COMPILED, RE-TIME IT EXECUTING FROM CACHE start = time.time() monte_carlo_pi(n_samples=1000) end = time.time() print("Elapsed (after compilation) = %s" % (end - start)) # # Exemple 2: fonction matricielle (sans utiliser numpy) # In[5]: def go_slow(a): # Function is compiled and runs in machine code trace = 0 for i in range(a.shape[0] - 1): trace += np.tanh(a[i, i + 1]) return trace all_n_samples = [1000, 5000, 10000] t0 = [] t1 = [] t2 = [] for n_samples in all_n_samples: print(n_samples) x = np.arange(n_samples ** 2).reshape(n_samples, n_samples) @jit(nopython=True) def go_fast(a): # Function is compiled and runs in machine code trace = 0 for i in range(a.shape[0] - 1): trace += np.tanh(a[i, i + 1]) return trace # COMPILATION INCLUSE! start = time.time() go_fast(x) end = time.time() t0.append(end - start) print("Elapsed (with compilation) = %s" % (end - start)) # COMPILATION NON INCLUSE, EXECUTER DEPUIS LE CACHE start = time.time() go_fast(x) end = time.time() t1.append(end - start) print("Elapsed (after compilation) = %s" % (end - start)) # VANILLA PYTHON start = time.time() go_slow(x) end = time.time() t2.append(end - start) print("Elapsed (vanilla) = %s" % (end - start)) t0 = np.array(t0) t1 = np.array(t1) t2 = np.array(t2) print(all_n_samples) print("Améliorations en pourcentage par rapport au code vanilla") print((t0 - t2) / t2 * 100) print((t1 - t2) / t2 * 100) # # Exemple 3: descente de gradient avec/sans numba. # In[ ]: n_samples = 1000 n_features = 500 n_iterations = 2000 X = np.random.randn(n_samples, n_features) y = np.random.randn(n_samples) y[n_samples // 2:] = 0 w = np.zeros(n_features) # init = 0 # In[ ]: @jit(nopython=True) # Function is compiled and runs in machine code def gradient(X, y, w, step_size=0.01, max_iter=1000): """Gradient descent with constant step size.""" for k in range(max_iter): w -= step_size * (X.T.dot(X.dot(w) - y)) return w # DO NOT REPORT THIS... COMPILATION TIME IS INCLUDED IN THE EXECUTION TIME! start = time.time() gradient(X, y, w) end = time.time() print("Elapsed (with compilation) = %s" % (end - start)) # NOW THE FUNCTION IS COMPILED, RE-TIME IT EXECUTING FROM CACHE start = time.time() gradient(X, y, w) end = time.time() print("Elapsed (after compilation) = %s" % (end - start)) # # Exemple 4: Régression logistique # In[ ]: y = np.random.randint(2, size=n_samples) *2 -1 print(y) w = np.zeros(n_features) # init = 0 # In[ ]: def logistic_regression_no_jit(y, X, w, iterations=1000): for i in range(iterations): w -= np.dot(((1.0 / (1.0 + np.exp(-y * np.dot(X, w))) - 1.0) * y), X) return w # In[ ]: start = time.time() logistic_regression_no_jit(y, X, w, iterations=n_iterations ) end = time.time() print("Elapsed (with compilation) = %s" % (end - start)) # In[ ]: @jit(nopython=True) def logistic_regression(y, X, w, iterations=1000): for i in range(iterations): w -= np.dot(((1.0 / (1.0 + np.exp(-y * np.dot(X, w))) - 1.0) * y), X) return w # In[ ]: # DO NOT REPORT THIS... COMPILATION TIME IS INCLUDED IN THE EXECUTION TIME! start = time.time() logistic_regression(y, X, w, iterations=n_iterations) end = time.time() print("Elapsed (with compilation) = %s" % (end - start)) # NOW THE FUNCTION IS COMPILED, RE-TIME IT EXECUTING FROM CACHE start = time.time() logistic_regression(y, X, w, iterations=n_iterations) end = time.time() print("Elapsed (after compilation) = %s" % (end - start)) # In[ ]: