This notebook first appeared as a post by Jake Vanderplas on the blog Pythonic Perambulations
import numpy as np
X = np.random.random((1000, 3))
def pairwise_numpy(X):
return np.sqrt(((X[:, None, :] - X) ** 2).sum(-1))
%timeit -n 10 pairwise_numpy(X)
10 loops, best of 3: 65.1 ms per loop
def pairwise_python(X):
M = X.shape[0]
N = X.shape[1]
D = np.empty((M, M), dtype=np.float)
for i in xrange(M):
for j in xrange(M):
d = 0.0
for k in xrange(N):
tmp = X[i, k] - X[j, k]
d += tmp * tmp
D[i, j] = np.sqrt(d)
return D
%timeit -n 1 pairwise_python(X)
1 loops, best of 3: 8.24 s per loop
%%pypy
import timeit
setup = """
import numpypy as np
import random
X = np.zeros((1000, 3))
for i in xrange(X.shape[0]):
for j in xrange(X.shape[1]):
X[i, j] = random.random()
def pairwise_pypy(X):
M = X.shape[0]
N = X.shape[1]
D = np.empty((M, M), dtype=float)
for i in xrange(M):
for j in xrange(M):
d = 0.0
for k in xrange(N):
tmp = X[i, k] - X[j, k]
d += tmp * tmp
D[i, j] = np.sqrt(d)
return D
"""
print timeit.timeit('pairwise_pypy(X)', setup, number=100)/100.
0.071649107933
from numba import double
from numba.decorators import jit, autojit
pairwise_numba = autojit(pairwise_python)
%timeit -n 100 pairwise_numba(X)
100 loops, best of 3: 8 ms per loop
%load_ext cythonmagic
%%cython
import numpy as np
cimport cython
from libc.math cimport sqrt
@cython.boundscheck(False)
@cython.wraparound(False)
def pairwise_cython(double[:, ::1] X):
cdef unsigned M = X.shape[0]
cdef unsigned N = X.shape[1]
cdef double tmp, d
cdef double[:, ::1] D = np.empty((M, M), dtype=np.float64)
cdef unsigned i, j, k
for i in range(M):
for j in range(M):
d = 0.0
for k in range(N):
tmp = X[i, k] - X[j, k]
d += tmp * tmp
D[i, j] = sqrt(d)
return np.asarray(D)
%timeit -n 100 pairwise_cython(X)
100 loops, best of 3: 7.97 ms per loop
from scipy.spatial.distance import cdist
%timeit cdist(X, X)
100 loops, best of 3: 9 ms per loop
from sklearn.metrics import euclidean_distances
%timeit euclidean_distances(X, X)
100 loops, best of 3: 17.4 ms per loop
%pylab inline
Welcome to pylab, a matplotlib-based Python environment [backend: module://IPython.zmq.pylab.backend_inline]. For more information, type 'help(pylab)'.
labels = ['python\nloop', 'pypy', 'numpy\nbroadc.', 'sklearn', 'scipy', 'cython', 'numba']
timings = [8.24, 0.0716, 0.0651, 0.0174, 0.009, 0.00797, 0.00800]
x = np.arange(len(labels))
ax = plt.axes(xticks=x, yscale='log')
ax.bar(x - 0.3, timings, width=0.6, alpha=0.4, bottom=1E-6)
ax.grid()
ax.set_xlim(-0.5, len(labels) - 0.5)
ax.set_ylim(1E-3, 1E2)
ax.xaxis.set_major_formatter(plt.FuncFormatter(lambda i, loc: labels[int(i)]))
ax.set_ylabel('time (s)')
ax.set_title("Pairwise Distance Timings");