%load_ext cythonmagic # Define our test array import numpy as np X = np.random.random((500, 3)) import numpy as np def euclidean_distance(x1, x2): x1 = np.asarray(x1) x2 = np.asarray(x2) return np.sqrt(np.sum((x1 - x2) ** 2)) def pairwise_v1(X, metric=euclidean_distance): X = np.asarray(X) n_samples, n_dim = X.shape D = np.empty((n_samples, n_samples)) for i in range(n_samples): for j in range(n_samples): D[i, j] = metric(X[i], X[j]) return D %timeit pairwise_v1(X) %%cython import numpy as np cimport numpy as np from libc.math cimport sqrt cimport cython # define a function pointer to a metric ctypedef double (*metric_ptr)(np.ndarray, np.ndarray) @cython.boundscheck(False) @cython.wraparound(False) cdef double euclidean_distance(np.ndarray[double, ndim=1, mode='c'] x1, np.ndarray[double, ndim=1, mode='c'] x2): cdef double tmp, d cdef np.intp_t i, N d = 0 N = x1.shape[0] # assume x2 has the same shape as x1. This could be dangerous! for i in range(N): tmp = x1[i] - x2[i] d += tmp * tmp return sqrt(d) @cython.boundscheck(False) @cython.wraparound(False) def pairwise_v2(np.ndarray[double, ndim=2, mode='c'] X not None, metric = 'euclidean'): cdef metric_ptr dist_func if metric == 'euclidean': dist_func = &euclidean_distance else: raise ValueError("unrecognized metric") cdef np.intp_t i, j, n_samples n_samples = X.shape[0] cdef np.ndarray[double, ndim=2, mode='c'] D = np.empty((n_samples, n_samples)) for i in range(n_samples): for j in range(n_samples): D[i, j] = dist_func(X[i], X[j]) return D %timeit pairwise_v2(X) %%cython import numpy as np cimport numpy as np from libc.math cimport sqrt cimport cython # define a function pointer to a metric ctypedef double (*metric_ptr)(double[::1], double[::1]) @cython.boundscheck(False) @cython.wraparound(False) cdef double euclidean_distance(double[::1] x1, double[::1] x2): cdef double tmp, d cdef np.intp_t i, N d = 0 N = x1.shape[0] # assume x2 has the same shape as x1. This could be dangerous! for i in range(N): tmp = x1[i] - x2[i] d += tmp * tmp return sqrt(d) @cython.boundscheck(False) @cython.wraparound(False) def pairwise_v3(double[:, ::1] X not None, metric = 'euclidean'): cdef metric_ptr dist_func if metric == 'euclidean': dist_func = &euclidean_distance else: raise ValueError("unrecognized metric") cdef np.intp_t i, j, n_samples n_samples = X.shape[0] cdef double[:, ::1] D = np.empty((n_samples, n_samples)) for i in range(n_samples): for j in range(n_samples): D[i, j] = dist_func(X[i], X[j]) return D %timeit pairwise_v3(X) %%cython import numpy as np cimport numpy as np from libc.math cimport sqrt cimport cython # define a function pointer to a metric ctypedef double (*metric_ptr)(double*, double*, int) @cython.boundscheck(False) @cython.wraparound(False) cdef double euclidean_distance(double* x1, double* x2, int N): cdef double tmp, d cdef np.intp_t i d = 0 for i in range(N): tmp = x1[i] - x2[i] d += tmp * tmp return sqrt(d) @cython.boundscheck(False) @cython.wraparound(False) def pairwise_v4(double[:, ::1] X not None, metric = 'euclidean'): cdef metric_ptr dist_func if metric == 'euclidean': dist_func = &euclidean_distance else: raise ValueError("unrecognized metric") cdef np.intp_t i, j, n_samples, n_dim n_samples = X.shape[0] n_dim = X.shape[1] cdef double[:, ::1] D = np.empty((n_samples, n_samples)) cdef double* Dptr = &D[0, 0] cdef double* Xptr = &X[0, 0] for i in range(n_samples): for j in range(n_samples): Dptr[i * n_samples + j] = dist_func(Xptr + i * n_dim, Xptr + j * n_dim, n_dim) return D %timeit pairwise_v4(X) %%cython import numpy as np cimport numpy as np from libc.math cimport sqrt cimport cython # define a function pointer to a metric ctypedef double (*metric_ptr)(double[:, ::1], np.intp_t, np.intp_t) @cython.boundscheck(False) @cython.wraparound(False) cdef double euclidean_distance(double[:, ::1] X, np.intp_t i1, np.intp_t i2): cdef double tmp, d cdef np.intp_t j d = 0 for j in range(X.shape[1]): tmp = X[i1, j] - X[i2, j] d += tmp * tmp return sqrt(d) @cython.boundscheck(False) @cython.wraparound(False) def pairwise_v5(double[:, ::1] X not None, metric = 'euclidean'): cdef metric_ptr dist_func if metric == 'euclidean': dist_func = &euclidean_distance else: raise ValueError("unrecognized metric") cdef np.intp_t i, j, n_samples, n_dim n_samples = X.shape[0] n_dim = X.shape[1] cdef double[:, ::1] D = np.empty((n_samples, n_samples)) for i in range(n_samples): for j in range(n_samples): D[i, j] = dist_func(X, i, j) return D %timeit pairwise_v5(X) from scipy.spatial.distance import cdist %timeit cdist(X, X) from sklearn.metrics import pairwise_distances %timeit pairwise_distances(X, X)