# Numba vs. Cython: Take 2.2¶

This notebook first appeared as a post by Jake Vanderplas on the blog Pythonic Perambulations

In [1]:
import numpy as np
X = np.random.random((1000, 3))


In [2]:
def pairwise_numpy(X):
return np.sqrt(((X[:, None, :] - X) ** 2).sum(-1))
%timeit -n 10 pairwise_numpy(X)

10 loops, best of 3: 65.1 ms per loop



## Pure Python Function¶

In [3]:
def pairwise_python(X):
M = X.shape[0]
N = X.shape[1]
D = np.empty((M, M), dtype=np.float)
for i in xrange(M):
for j in xrange(M):
d = 0.0
for k in xrange(N):
tmp = X[i, k] - X[j, k]
d += tmp * tmp
D[i, j] = np.sqrt(d)
return D
%timeit -n 1 pairwise_python(X)

1 loops, best of 3: 8.24 s per loop



## PyPy¶

In [4]:
%%pypy
import timeit

setup = """
import numpypy as np
import random

X = np.zeros((1000, 3))
for i in xrange(X.shape[0]):
for j in xrange(X.shape[1]):
X[i, j] = random.random()

def pairwise_pypy(X):
M = X.shape[0]
N = X.shape[1]
D = np.empty((M, M), dtype=float)
for i in xrange(M):
for j in xrange(M):
d = 0.0
for k in xrange(N):
tmp = X[i, k] - X[j, k]
d += tmp * tmp
D[i, j] = np.sqrt(d)
return D
"""
print timeit.timeit('pairwise_pypy(X)', setup, number=100)/100.

0.071649107933



## Numba Wrapper¶

In [5]:
from numba import double
from numba.decorators import jit, autojit

pairwise_numba = autojit(pairwise_python)

%timeit -n 100 pairwise_numba(X)

100 loops, best of 3: 8 ms per loop



## Optimized Cython Function¶

In [6]:
%load_ext cythonmagic

In [7]:
%%cython

import numpy as np
cimport cython
from libc.math cimport sqrt

@cython.boundscheck(False)
@cython.wraparound(False)
def pairwise_cython(double[:, ::1] X):
cdef unsigned M = X.shape[0]
cdef unsigned N = X.shape[1]
cdef double tmp, d
cdef double[:, ::1] D = np.empty((M, M), dtype=np.float64)
cdef unsigned i, j, k
for i in range(M):
for j in range(M):
d = 0.0
for k in range(N):
tmp = X[i, k] - X[j, k]
d += tmp * tmp
D[i, j] = sqrt(d)
return np.asarray(D)

In [8]:
%timeit -n 100 pairwise_cython(X)

100 loops, best of 3: 7.97 ms per loop



## Scipy Pairwise Distances¶

In [9]:
from scipy.spatial.distance import cdist
%timeit cdist(X, X)

100 loops, best of 3: 9 ms per loop



## Scikit-learn Pairwise Distances¶

In [10]:
from sklearn.metrics import euclidean_distances
%timeit euclidean_distances(X, X)

100 loops, best of 3: 17.4 ms per loop



## Comparing the Results¶

In [11]:
%pylab inline


Welcome to pylab, a matplotlib-based Python environment [backend: module://IPython.zmq.pylab.backend_inline].


In [12]:
labels = ['python\nloop', 'pypy', 'numpy\nbroadc.', 'sklearn', 'scipy', 'cython', 'numba']
timings = [8.24, 0.0716, 0.0651, 0.0174, 0.009, 0.00797, 0.00800]
x = np.arange(len(labels))

ax = plt.axes(xticks=x, yscale='log')
ax.bar(x - 0.3, timings, width=0.6, alpha=0.4, bottom=1E-6)
ax.grid()
ax.set_xlim(-0.5, len(labels) - 0.5)
ax.set_ylim(1E-3, 1E2)
ax.xaxis.set_major_formatter(plt.FuncFormatter(lambda i, loc: labels[int(i)]))
ax.set_ylabel('time (s)')
ax.set_title("Pairwise Distance Timings");

In []: