import numpy as np def numpy_regular(A, B): return(A*B-4.1*A > 2.5*B) import numexpr as ne def numexpr_evaluate(A, B): return ne.evaluate('A*B-4.1*A > 2.5*B') from numba import jit @jit def numba_jit(A, B): return A*B-4.1*A > 2.5*B from numba import vectorize @vectorize(['u1(float64, float64)']) def numba_vectorized(A,B): return A*B+4.1*A > 2.5*B from parakeet import jit as para_jit @para_jit def parakeet_jit(A, B): return A*B-4.1*A > 2.5*B %load_ext cythonmagic %%cython import numpy as np cimport numpy as np cimport cython @cython.boundscheck(False) @cython.wraparound(False) cpdef cython_unrolled(double[:, :] A, double[:, :] B): cdef long i, j, rows, cols cdef double[:, :] C rows = A.shape[0] cols = A.shape[1] C = np.empty((rows,cols,)) for i in xrange(rows): for j in xrange(cols): C[i,j] = A[i,j]*B[i,j] + 4.1*A[i,j] > 2.5*B[i,j] return np.asarray(C) import timeit orders_n = [10**n for n in range(1, 5)] funcs = ['numpy_regular', 'numba_jit', 'numba_vectorized', 'parakeet_jit', 'cython_unrolled', 'numexpr_evaluate' ] timings = {f:[] for f in funcs} for n in orders_n: for f in funcs: A = np.random.rand(n,n) B = np.random.rand(n,n) timings[f].append(min(timeit.Timer('%s(A, B)' %f, 'from __main__ import A, B, %s' %f) .repeat(repeat=3, number=10))) import platform from llvm import __version__ as llvm__version__ from parakeet import __version__ as parakeet__version__ from numba import __version__ as numba__version__ def print_sysinfo(): print '\nsystem :', platform.system() print 'release :', platform.release() print 'machine :', platform.machine() print 'processor:', platform.processor() print 'interpreter:', platform.architecture()[0] print '\nPython version', platform.python_version() print 'compiler', platform.python_compiler() print 'NumPy version', np.__version__ print 'Numexpr version', ne.__version__ print 'parakeet version', parakeet__version__ print 'Numba version', numba__version__ print 'llvm version', llvm__version__ print '\n\n' import prettytable def summary_table(funcs): fit_table = prettytable.PrettyTable(['n=%s' %orders_n[-1], 'function' , 'rel. performance gain']) fit_table.align['function'] = 'l' for f in funcs: fit_table.add_row(['', f, '{:.2f}x'.format( (timings['numpy_regular'][-1]/timings[f][-1]))]) print(fit_table) %matplotlib inline import matplotlib.pyplot as plt def plot_figures(funcs): fig = plt.figure(figsize=(8,8)) for f in funcs: plt.plot([i**2 for i in orders_n], [i*100 for i in timings[f]], alpha=0.5, label='%s' %f, marker='o', lw=2) plt.legend(loc='upper left') plt.xscale('log') plt.yscale('log') plt.grid() plt.xlabel('number of elements per matrix') plt.ylabel('time in milliseconds') plt.title('Approaches to evaluate the NumPy array expression "A*B-4.1*A > 2.5*B"') plt.show() print_sysinfo() summary_table(funcs) plot_figures(funcs) import multiprocessing print('This benchmark was done on a machine with %s CPUs', multiprocessing.cpu_count())