%load_ext cythonmagic # Define our test array import numpy as np X = np.random.random((500, 3)) output = np.random.random((500,500)) # only used for Numba, declared here so we make sure the dimensions remain consistent import numpy as np def euclidean_distance(x1, x2): x1 = np.asarray(x1) x2 = np.asarray(x2) return np.sqrt(np.sum((x1 - x2) ** 2)) def pairwise_v1(X, metric=euclidean_distance): X = np.asarray(X) n_samples, n_dim = X.shape D = np.empty((n_samples, n_samples)) for i in range(n_samples): for j in range(n_samples): D[i, j] = metric(X[i], X[j]) return D %timeit pairwise_v1(X) %%cython import numpy as np cimport numpy as np from libc.math cimport sqrt cimport cython # define a function pointer to a metric ctypedef double (*metric_ptr)(np.ndarray, np.ndarray) @cython.boundscheck(False) @cython.wraparound(False) cdef double euclidean_distance(np.ndarray[double, ndim=1, mode='c'] x1, np.ndarray[double, ndim=1, mode='c'] x2): cdef double tmp, d cdef np.intp_t i, N d = 0 N = x1.shape[0] # assume x2 has the same shape as x1. This could be dangerous! for i in range(N): tmp = x1[i] - x2[i] d += tmp * tmp return sqrt(d) @cython.boundscheck(False) @cython.wraparound(False) def pairwise_v2(np.ndarray[double, ndim=2, mode='c'] X not None, metric = 'euclidean'): cdef metric_ptr dist_func if metric == 'euclidean': dist_func = &euclidean_distance else: raise ValueError("unrecognized metric") cdef np.intp_t i, j, n_samples n_samples = X.shape[0] cdef np.ndarray[double, ndim=2, mode='c'] D = np.empty((n_samples, n_samples)) for i in range(n_samples): for j in range(n_samples): D[i, j] = dist_func(X[i], X[j]) return D %timeit pairwise_v2(X) %%cython import numpy as np cimport numpy as np from libc.math cimport sqrt cimport cython # define a function pointer to a metric ctypedef double (*metric_ptr)(double[::1], double[::1]) @cython.boundscheck(False) @cython.wraparound(False) cdef double euclidean_distance(double[::1] x1, double[::1] x2): cdef double tmp, d cdef np.intp_t i, N d = 0 N = x1.shape[0] # assume x2 has the same shape as x1. This could be dangerous! for i in range(N): tmp = x1[i] - x2[i] d += tmp * tmp return sqrt(d) @cython.boundscheck(False) @cython.wraparound(False) def pairwise_v3(double[:, ::1] X, metric = 'euclidean'): cdef metric_ptr dist_func if metric == 'euclidean': dist_func = &euclidean_distance else: raise ValueError("unrecognized metric") cdef np.intp_t i, j, n_samples n_samples = X.shape[0] cdef double[:, ::1] D = np.empty((n_samples, n_samples)) for i in range(n_samples): for j in range(n_samples): D[i, j] = dist_func(X[i], X[j]) return D %timeit pairwise_v3(X) %%cython import numpy as np cimport numpy as np from libc.math cimport sqrt cimport cython # define a function pointer to a metric ctypedef double (*metric_ptr)(double*, double*, int) @cython.boundscheck(False) @cython.wraparound(False) cdef double euclidean_distance(double* x1, double* x2, int N): cdef double tmp, d cdef np.intp_t i d = 0 for i in range(N): tmp = x1[i] - x2[i] d += tmp * tmp return sqrt(d) @cython.boundscheck(False) @cython.wraparound(False) def pairwise_v4(double[:, ::1] X, metric = 'euclidean'): cdef metric_ptr dist_func if metric == 'euclidean': dist_func = &euclidean_distance else: raise ValueError("unrecognized metric") cdef np.intp_t i, j, n_samples, n_dim n_samples = X.shape[0] n_dim = X.shape[1] cdef double[:, ::1] D = np.empty((n_samples, n_samples)) cdef double* Dptr = &D[0, 0] cdef double* Xptr = &X[0, 0] for i in range(n_samples): for j in range(n_samples): Dptr[i * n_samples + j] = dist_func(Xptr + i * n_dim, Xptr + j * n_dim, n_dim) return D %timeit pairwise_v4(X) %%cython import numpy as np cimport numpy as np from libc.math cimport sqrt cimport cython # define a function pointer to a metric ctypedef double (*metric_ptr)(double[:, ::1], np.intp_t, np.intp_t) @cython.boundscheck(False) @cython.wraparound(False) cdef inline double euclidean_distance(double[:, ::1] X, np.intp_t i1, np.intp_t i2): cdef double tmp, d cdef np.intp_t j d = 0 for j in range(X.shape[1]): tmp = X[i1, j] - X[i2, j] d += tmp * tmp return sqrt(d) @cython.boundscheck(False) @cython.wraparound(False) def pairwise_v5(double[:, ::1] X, metric = 'euclidean'): cdef metric_ptr dist_func if metric == 'euclidean': dist_func = &euclidean_distance else: raise ValueError("unrecognized metric") cdef np.intp_t i, j, n_samples, n_dim n_samples = X.shape[0] n_dim = X.shape[1] cdef double[:, ::1] D = np.empty((n_samples, n_samples)) for i in range(n_samples): for j in range(n_samples): D[i, j] = dist_func(X, i, j) return D %timeit pairwise_v5(X) from numba.decorators import jit as jit @jit(arg_types=[[['d']], [['d']]], ret_type=[['d']]) def pairwise_numba(X, output): n_samples, n_dim = X.shape n_samples1, n_samples2 = output.shape for ii in range(n_samples): for jj in range(n_samples): result = 0.0; for kk in range(n_dim): result += (X[ii,kk] - X[jj,kk]) * (X[ii,kk] - X[jj,kk]) output[ii,jj] = result return output import time start = time.time() pairwise_numba(X, output) end = time.time() print "Result from compiled is in %s (msec)" % ((end-start)*1000)