This contains the implementations of the benchmarks described at http://jakevdp.github.com/blog/2012/08/08/memoryview-benchmarks.
Here we'll use ipython's cython magic to compile and run the benchmarks.
%load_ext cythonmagic
# Define our test array
import numpy as np
X = np.random.random((500, 3))
output = np.random.random((500,500)) # only used for Numba, declared here so we make sure the dimensions remain consistent
import numpy as np
def euclidean_distance(x1, x2):
x1 = np.asarray(x1)
x2 = np.asarray(x2)
return np.sqrt(np.sum((x1 - x2) ** 2))
def pairwise_v1(X, metric=euclidean_distance):
X = np.asarray(X)
n_samples, n_dim = X.shape
D = np.empty((n_samples, n_samples))
for i in range(n_samples):
for j in range(n_samples):
D[i, j] = metric(X[i], X[j])
return D
%timeit pairwise_v1(X)
1 loops, best of 3: 5.19 s per loop
%%cython
import numpy as np
cimport numpy as np
from libc.math cimport sqrt
cimport cython
# define a function pointer to a metric
ctypedef double (*metric_ptr)(np.ndarray, np.ndarray)
@cython.boundscheck(False)
@cython.wraparound(False)
cdef double euclidean_distance(np.ndarray[double, ndim=1, mode='c'] x1,
np.ndarray[double, ndim=1, mode='c'] x2):
cdef double tmp, d
cdef np.intp_t i, N
d = 0
N = x1.shape[0]
# assume x2 has the same shape as x1. This could be dangerous!
for i in range(N):
tmp = x1[i] - x2[i]
d += tmp * tmp
return sqrt(d)
@cython.boundscheck(False)
@cython.wraparound(False)
def pairwise_v2(np.ndarray[double, ndim=2, mode='c'] X not None,
metric = 'euclidean'):
cdef metric_ptr dist_func
if metric == 'euclidean':
dist_func = &euclidean_distance
else:
raise ValueError("unrecognized metric")
cdef np.intp_t i, j, n_samples
n_samples = X.shape[0]
cdef np.ndarray[double, ndim=2, mode='c'] D = np.empty((n_samples,
n_samples))
for i in range(n_samples):
for j in range(n_samples):
D[i, j] = dist_func(X[i], X[j])
return D
%timeit pairwise_v2(X)
1 loops, best of 3: 804 ms per loop
%%cython
import numpy as np
cimport numpy as np
from libc.math cimport sqrt
cimport cython
# define a function pointer to a metric
ctypedef double (*metric_ptr)(double[::1], double[::1])
@cython.boundscheck(False)
@cython.wraparound(False)
cdef double euclidean_distance(double[::1] x1,
double[::1] x2):
cdef double tmp, d
cdef np.intp_t i, N
d = 0
N = x1.shape[0]
# assume x2 has the same shape as x1. This could be dangerous!
for i in range(N):
tmp = x1[i] - x2[i]
d += tmp * tmp
return sqrt(d)
@cython.boundscheck(False)
@cython.wraparound(False)
def pairwise_v3(double[:, ::1] X,
metric = 'euclidean'):
cdef metric_ptr dist_func
if metric == 'euclidean':
dist_func = &euclidean_distance
else:
raise ValueError("unrecognized metric")
cdef np.intp_t i, j, n_samples
n_samples = X.shape[0]
cdef double[:, ::1] D = np.empty((n_samples, n_samples))
for i in range(n_samples):
for j in range(n_samples):
D[i, j] = dist_func(X[i], X[j])
return D
%timeit pairwise_v3(X)
10 loops, best of 3: 22.4 ms per loop
%%cython
import numpy as np
cimport numpy as np
from libc.math cimport sqrt
cimport cython
# define a function pointer to a metric
ctypedef double (*metric_ptr)(double*, double*, int)
@cython.boundscheck(False)
@cython.wraparound(False)
cdef double euclidean_distance(double* x1,
double* x2,
int N):
cdef double tmp, d
cdef np.intp_t i
d = 0
for i in range(N):
tmp = x1[i] - x2[i]
d += tmp * tmp
return sqrt(d)
@cython.boundscheck(False)
@cython.wraparound(False)
def pairwise_v4(double[:, ::1] X,
metric = 'euclidean'):
cdef metric_ptr dist_func
if metric == 'euclidean':
dist_func = &euclidean_distance
else:
raise ValueError("unrecognized metric")
cdef np.intp_t i, j, n_samples, n_dim
n_samples = X.shape[0]
n_dim = X.shape[1]
cdef double[:, ::1] D = np.empty((n_samples, n_samples))
cdef double* Dptr = &D[0, 0]
cdef double* Xptr = &X[0, 0]
for i in range(n_samples):
for j in range(n_samples):
Dptr[i * n_samples + j] = dist_func(Xptr + i * n_dim,
Xptr + j * n_dim,
n_dim)
return D
%timeit pairwise_v4(X)
100 loops, best of 3: 4.82 ms per loop
%%cython
import numpy as np
cimport numpy as np
from libc.math cimport sqrt
cimport cython
# define a function pointer to a metric
ctypedef double (*metric_ptr)(double[:, ::1], np.intp_t, np.intp_t)
@cython.boundscheck(False)
@cython.wraparound(False)
cdef inline double euclidean_distance(double[:, ::1] X,
np.intp_t i1, np.intp_t i2):
cdef double tmp, d
cdef np.intp_t j
d = 0
for j in range(X.shape[1]):
tmp = X[i1, j] - X[i2, j]
d += tmp * tmp
return sqrt(d)
@cython.boundscheck(False)
@cython.wraparound(False)
def pairwise_v5(double[:, ::1] X,
metric = 'euclidean'):
cdef metric_ptr dist_func
if metric == 'euclidean':
dist_func = &euclidean_distance
else:
raise ValueError("unrecognized metric")
cdef np.intp_t i, j, n_samples, n_dim
n_samples = X.shape[0]
n_dim = X.shape[1]
cdef double[:, ::1] D = np.empty((n_samples, n_samples))
for i in range(n_samples):
for j in range(n_samples):
D[i, j] = dist_func(X, i, j)
return D
%timeit pairwise_v5(X)
100 loops, best of 3: 4.88 ms per loop
from numba.decorators import jit as jit
@jit(arg_types=[[['d']], [['d']]], ret_type=[['d']])
def pairwise_numba(X, output):
n_samples, n_dim = X.shape
n_samples1, n_samples2 = output.shape
for ii in range(n_samples):
for jj in range(n_samples):
result = 0.0;
for kk in range(n_dim):
result += (X[ii,kk] - X[jj,kk]) * (X[ii,kk] - X[jj,kk])
output[ii,jj] = result
return output
{'blocks': {0: <llvm.core.BasicBlock object at 0x10595f710>, 33: <llvm.core.BasicBlock object at 0x10595f790>, 43: <llvm.core.BasicBlock object at 0x10595f490>, 46: <llvm.core.BasicBlock object at 0x10595ff50>, 49: <llvm.core.BasicBlock object at 0x10595f290>, 52: <llvm.core.BasicBlock object at 0x10595f690>, 62: <llvm.core.BasicBlock object at 0x10595f510>, 65: <llvm.core.BasicBlock object at 0x106033390>, 68: <llvm.core.BasicBlock object at 0x106033510>, 77: <llvm.core.BasicBlock object at 0x106033590>, 87: <llvm.core.BasicBlock object at 0x106033610>, 90: <llvm.core.BasicBlock object at 0x106033690>, 93: <llvm.core.BasicBlock object at 0x106033790>, 158: <llvm.core.BasicBlock object at 0x106033710>, 178: <llvm.core.BasicBlock object at 0x106033490>, 182: <llvm.core.BasicBlock object at 0x10595fa10>}, 'blocks_dom': {0: set([0]), 33: set([0, 33]), 43: set([0, 33, 43, 46, 49, 52, 65, 178]), 46: set([0, 33, 46]), 49: set([0, 33, 46, 49]), 52: set([0, 33, 46, 49, 52]), 62: set([0, 33, 46, 49, 52, 62, 65, 68, 77, 90, 158]), 65: set([0, 33, 46, 49, 52, 65]), 68: set([0, 33, 46, 49, 52, 65, 68]), 77: set([0, 33, 46, 49, 52, 65, 68, 77]), 87: set([0, 33, 46, 49, 52, 65, 68, 77, 87, 90, 93]), 90: set([0, 33, 46, 49, 52, 65, 68, 77, 90]), 93: set([0, 33, 46, 49, 52, 65, 68, 77, 90, 93]), 158: set([0, 33, 46, 49, 52, 65, 68, 77, 90, 158]), 178: set([0, 33, 46, 49, 52, 65, 178]), 182: set([0, 33, 46, 182])}, 'blocks_in': {0: set(), 33: set([0]), 43: set([178]), 46: set([33, 43]), 49: set([46]), 52: set([49]), 62: set([158]), 65: set([52, 62]), 68: set([65]), 77: set([68]), 87: set([93]), 90: set([77, 87]), 93: set([90]), 158: set([90]), 178: set([65]), 182: set([46])}, 'blocks_out': {0: set([33]), 33: set([46]), 43: set([46]), 46: set([49, 182]), 49: set([52]), 52: set([65]), 62: set([65]), 65: set([68, 178]), 68: set([77]), 77: set([90]), 87: set([90]), 90: set([93, 158]), 93: set([87]), 158: set([62]), 178: set([43]), 182: set()}, 'blocks_reaching': {0: set([0]), 33: set([0, 33]), 43: set([0, 33, 43, 46, 49, 52, 62, 65, 68, 77, 87, 90, 93, 158, 178]), 46: set([0, 33, 43, 46, 49, 52, 62, 65, 68, 77, 87, 90, 93, 158, 178]), 49: set([0, 33, 43, 46, 49, 52, 62, 65, 68, 77, 87, 90, 93, 158, 178]), 52: set([0, 33, 43, 46, 49, 52, 62, 65, 68, 77, 87, 90, 93, 158, 178]), 62: set([0, 33, 43, 46, 49, 52, 62, 65, 68, 77, 87, 90, 93, 158, 178]), 65: set([0, 33, 43, 46, 49, 52, 62, 65, 68, 77, 87, 90, 93, 158, 178]), 68: set([0, 33, 43, 46, 49, 52, 62, 65, 68, 77, 87, 90, 93, 158, 178]), 77: set([0, 33, 43, 46, 49, 52, 62, 65, 68, 77, 87, 90, 93, 158, 178]), 87: set([0, 33, 43, 46, 49, 52, 62, 65, 68, 77, 87, 90, 93, 158, 178]), 90: set([0, 33, 43, 46, 49, 52, 62, 65, 68, 77, 87, 90, 93, 158, 178]), 93: set([0, 33, 43, 46, 49, 52, 62, 65, 68, 77, 87, 90, 93, 158, 178]), 158: set([0, 33, 43, 46, 49, 52, 62, 65, 68, 77, 87, 90, 93, 158, 178]), 178: set([0, 33, 43, 46, 49, 52, 62, 65, 68, 77, 87, 90, 93, 158, 178]), 182: set([0, 33, 43, 46, 49, 52, 62, 65, 68, 77, 87, 90, 93, 158, 178, 182])}, 'blocks_reads': {0: set([0, 1]), 33: set([2]), 43: set(), 46: set([6]), 49: set(), 52: set([2]), 62: set(), 65: set([7]), 68: set(), 77: set([3]), 87: set(), 90: set([9]), 93: set([0, 6, 7, 8, 9]), 158: set([1, 6, 7, 8]), 178: set(), 182: set([1])}, 'blocks_writer': {0: {2: 9, 3: 12, 4: 24, 5: 27}, 33: {6: 42}, 43: {6: 43}, 46: {6: 46}, 49: {}, 52: {7: 61}, 62: {7: 62}, 65: {7: 65}, 68: {8: 71}, 77: {9: 86}, 87: {9: 87}, 90: {8: 90, 9: 90}, 93: {8: 152}, 158: {}, 178: {}, 182: {}}, 'blocks_writes': {0: set([0, 1, 2, 3, 4, 5]), 33: set([6]), 43: set([6]), 46: set([6]), 49: set(), 52: set([7]), 62: set([7]), 65: set([7]), 68: set([8]), 77: set([9]), 87: set([9]), 90: set([8, 9]), 93: set([8]), 158: set(), 178: set(), 182: set()}, 'translator': <numba.translate.Translate object at 0x106033410>} op_LOAD_ATTR(): 3 106 shape <Variable(val=<llvm.core.Argument object at 0x10595f610>, _llvm=<llvm.core.Argument object at 0x10595f610>, typ='arr[f64]')> arr[f64] op_LOAD_ATTR(): { i64, i32*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8*, i8*, i8*, i64* }* op_LOAD_ATTR(): 18 106 shape <Variable(val=<llvm.core.Argument object at 0x10595f810>, _llvm=<llvm.core.Argument object at 0x10595f810>, typ='arr[f64]')> arr[f64] op_LOAD_ATTR(): { i64, i32*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8*, i8*, i8*, i64* }* ('op_CALL_FUNCTION():', <Variable(val=<built-in function range>, _llvm=None, typ=['func'])>) str_to_llvmtype(): str = 'i64' add_phi_incomming(): reaching_defs = {33: {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 33}, 43: {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 43, 7: 65}} crnt_block=46, pred=43, local=6 op_BINARY_ADD(): <Variable(val=<llvm.core.PHINode object at 0x106033b10>, _llvm=<llvm.core.PHINode object at 0x106033b10>, typ='i64')> + <Variable(val=<llvm.core.ConstantInt object at 0x106033c50>, _llvm=<llvm.core.ConstantInt object at 0x106033c50>, typ='i64')> resolve_type(): arg1 = <Variable(val=<llvm.core.PHINode object at 0x106033b10>, _llvm=<llvm.core.PHINode object at 0x106033b10>, typ='i64')>, arg2 = <Variable(val=<llvm.core.ConstantInt object at 0x106033c50>, _llvm=<llvm.core.ConstantInt object at 0x106033c50>, typ='i64')> resolve_type() ==> 'i64' resolve_type(): arg1 = <Variable(val=<llvm.core.PHINode object at 0x106033b10>, _llvm=<llvm.core.PHINode object at 0x106033b10>, typ='i64')>, arg2 = <Variable(val=<llvm.core.Instruction object at 0x106033950>, _llvm=<llvm.core.Instruction object at 0x106033950>, typ='i64')> resolve_type() ==> 'i64' ('op_CALL_FUNCTION():', <Variable(val=<built-in function range>, _llvm=None, typ=['func'])>) str_to_llvmtype(): str = 'i64' add_phi_incomming(): reaching_defs = {52: {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 46, 7: 52}, 62: {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 46, 7: 62, 8: 90, 9: 90}} crnt_block=65, pred=62, local=7 op_BINARY_ADD(): <Variable(val=<llvm.core.PHINode object at 0x106033f50>, _llvm=<llvm.core.PHINode object at 0x106033f50>, typ='i64')> + <Variable(val=<llvm.core.ConstantInt object at 0x106033e50>, _llvm=<llvm.core.ConstantInt object at 0x106033e50>, typ='i64')> resolve_type(): arg1 = <Variable(val=<llvm.core.PHINode object at 0x106033f50>, _llvm=<llvm.core.PHINode object at 0x106033f50>, typ='i64')>, arg2 = <Variable(val=<llvm.core.ConstantInt object at 0x106033e50>, _llvm=<llvm.core.ConstantInt object at 0x106033e50>, typ='i64')> resolve_type() ==> 'i64' resolve_type(): arg1 = <Variable(val=<llvm.core.PHINode object at 0x106033f50>, _llvm=<llvm.core.PHINode object at 0x106033f50>, typ='i64')>, arg2 = <Variable(val=<llvm.core.Instruction object at 0x106033950>, _llvm=<llvm.core.Instruction object at 0x106033950>, typ='i64')> resolve_type() ==> 'i64' ('op_CALL_FUNCTION():', <Variable(val=<built-in function range>, _llvm=None, typ=['func'])>) str_to_llvmtype(): str = 'f64' add_phi_incomming(): reaching_defs = {77: {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 46, 7: 65, 8: 68, 9: 77}, 87: {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 46, 7: 65, 8: 93, 9: 87}} crnt_block=90, pred=87, local=8 str_to_llvmtype(): str = 'i64' add_phi_incomming(): reaching_defs = {77: {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 46, 7: 65, 8: 68, 9: 77}, 87: {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 46, 7: 65, 8: 93, 9: 87}} crnt_block=90, pred=87, local=9 op_BINARY_ADD(): <Variable(val=<llvm.core.PHINode object at 0x105971090>, _llvm=<llvm.core.PHINode object at 0x105971090>, typ='i64')> + <Variable(val=<llvm.core.ConstantInt object at 0x105971150>, _llvm=<llvm.core.ConstantInt object at 0x105971150>, typ='i64')> resolve_type(): arg1 = <Variable(val=<llvm.core.PHINode object at 0x105971090>, _llvm=<llvm.core.PHINode object at 0x105971090>, typ='i64')>, arg2 = <Variable(val=<llvm.core.ConstantInt object at 0x105971150>, _llvm=<llvm.core.ConstantInt object at 0x105971150>, typ='i64')> resolve_type() ==> 'i64' resolve_type(): arg1 = <Variable(val=<llvm.core.PHINode object at 0x105971090>, _llvm=<llvm.core.PHINode object at 0x105971090>, typ='i64')>, arg2 = <Variable(val=<llvm.core.Instruction object at 0x1060339d0>, _llvm=<llvm.core.Instruction object at 0x1060339d0>, typ='i64')> resolve_type() ==> 'i64' op_BINARY_SUBSCR(): arr_var.typ = arr[f64] str_to_llvmtype(): str = 'f64' %32 = load double* %31 op_BINARY_SUBSCR(): arr_var.typ = arr[f64] str_to_llvmtype(): str = 'f64' %43 = load double* %42 resolve_type(): arg1 = <Variable(val=<llvm.core.Instruction object at 0x105971450>, _llvm=<llvm.core.Instruction object at 0x105971450>, typ='f64')>, arg2 = <Variable(val=<llvm.core.Instruction object at 0x1059715d0>, _llvm=<llvm.core.Instruction object at 0x1059715d0>, typ='f64')> resolve_type() ==> 'f64' op_BINARY_SUBSCR(): arr_var.typ = arr[f64] str_to_llvmtype(): str = 'f64' %55 = load double* %54 op_BINARY_SUBSCR(): arr_var.typ = arr[f64] str_to_llvmtype(): str = 'f64' %66 = load double* %65 resolve_type(): arg1 = <Variable(val=<llvm.core.Instruction object at 0x105971410>, _llvm=<llvm.core.Instruction object at 0x105971410>, typ='f64')>, arg2 = <Variable(val=<llvm.core.Instruction object at 0x105971690>, _llvm=<llvm.core.Instruction object at 0x105971690>, typ='f64')> resolve_type() ==> 'f64' resolve_type(): arg1 = <Variable(val=<llvm.core.Instruction object at 0x105971290>, _llvm=<llvm.core.Instruction object at 0x105971290>, typ='f64')>, arg2 = <Variable(val=<llvm.core.Instruction object at 0x105971510>, _llvm=<llvm.core.Instruction object at 0x105971510>, typ='f64')> resolve_type() ==> 'f64' op_BINARY_ADD(): <Variable(val=<llvm.core.PHINode object at 0x106033e10>, _llvm=<llvm.core.PHINode object at 0x106033e10>, typ='f64')> + <Variable(val=<llvm.core.Instruction object at 0x105971410>, _llvm=<llvm.core.Instruction object at 0x105971410>, typ='f64')> resolve_type(): arg1 = <Variable(val=<llvm.core.PHINode object at 0x106033e10>, _llvm=<llvm.core.PHINode object at 0x106033e10>, typ='f64')>, arg2 = <Variable(val=<llvm.core.Instruction object at 0x105971410>, _llvm=<llvm.core.Instruction object at 0x105971410>, typ='f64')> resolve_type() ==> 'f64' op_STORE_SUBSCR(): 174 60 None op_STORE_SUBSCR(): <Variable(val=<llvm.core.Argument object at 0x10595f810>, _llvm=<llvm.core.Argument object at 0x10595f810>, typ='arr[f64]')>[<Variable(val=(<Variable(val=<llvm.core.PHINode object at 0x106033b10>, _llvm=<llvm.core.PHINode object at 0x106033b10>, typ='i64')>, <Variable(val=<llvm.core.PHINode object at 0x106033f50>, _llvm=<llvm.core.PHINode object at 0x106033f50>, typ='i64')>), _llvm=None, typ='tuple')>] = <Variable(val=<llvm.core.PHINode object at 0x106033e10>, _llvm=<llvm.core.PHINode object at 0x106033e10>, typ='f64')> op_STORE_SUBSCR(): arr_lval = '{ i64, i32*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8*, i8*, i8*, i64* }* %output', arr_ltype = '{ i64, i32*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8*, i8*, i8*, i64* }*' str_to_llvmtype(): str = 'f64' str_to_llvmtype(): str = 'arr[]' ; ModuleID = 'pairwise_numba_mod_106033410' define { i64, i32*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8*, i8*, i8*, i64* }* @pairwise_numba({ i64, i32*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8*, i8*, i8*, i64* }* %X, { i64, i32*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8*, i8*, i8*, i64* }* %output) { Entry: %0 = getelementptr { i64, i32*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8*, i8*, i8*, i64* }* %X, i32 0, i32 4 %1 = load i64** %0 %2 = getelementptr i64* %1, i32 0 %3 = load i64* %2 %4 = getelementptr i64* %1, i32 1 %5 = load i64* %4 %6 = getelementptr { i64, i32*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8*, i8*, i8*, i64* }* %output, i32 0, i32 4 %7 = load i64** %6 %8 = getelementptr i64* %7, i32 0 %9 = load i64* %8 %10 = getelementptr i64* %7, i32 1 %11 = load i64* %10 br label %BLOCK_33 BLOCK_33: ; preds = %Entry br label %BLOCK_46 BLOCK_43: ; preds = %BLOCK_178 %12 = add i64 %13, 1 br label %BLOCK_46 BLOCK_46: ; preds = %BLOCK_43, %BLOCK_33 %13 = phi i64 [ 0, %BLOCK_33 ], [ %12, %BLOCK_43 ] %14 = icmp slt i64 %13, %3 br i1 %14, label %BLOCK_49, label %BLOCK_182 BLOCK_182: ; preds = %BLOCK_46 ret { i64, i32*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8*, i8*, i8*, i64* }* %output BLOCK_49: ; preds = %BLOCK_46 br label %BLOCK_52 BLOCK_52: ; preds = %BLOCK_49 br label %BLOCK_65 BLOCK_62: ; preds = %BLOCK_158 %15 = add i64 %16, 1 br label %BLOCK_65 BLOCK_65: ; preds = %BLOCK_62, %BLOCK_52 %16 = phi i64 [ 0, %BLOCK_52 ], [ %15, %BLOCK_62 ] %17 = icmp slt i64 %16, %3 br i1 %17, label %BLOCK_68, label %BLOCK_178 BLOCK_178: ; preds = %BLOCK_65 br label %BLOCK_43 BLOCK_68: ; preds = %BLOCK_65 br label %BLOCK_77 BLOCK_77: ; preds = %BLOCK_68 br label %BLOCK_90 BLOCK_87: ; preds = %BLOCK_93 %18 = add i64 %20, 1 br label %BLOCK_90 BLOCK_90: ; preds = %BLOCK_87, %BLOCK_77 %19 = phi double [ 0.000000e+00, %BLOCK_77 ], [ %79, %BLOCK_87 ] %20 = phi i64 [ 0, %BLOCK_77 ], [ %18, %BLOCK_87 ] %21 = icmp slt i64 %20, %5 br i1 %21, label %BLOCK_93, label %BLOCK_158 BLOCK_158: ; preds = %BLOCK_90 %22 = getelementptr { i64, i32*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8*, i8*, i8*, i64* }* %output, i32 0, i32 5 %23 = load i64** %22 %24 = getelementptr i64* %23, i32 0 %25 = load i64* %24 %26 = mul i64 %13, %25 %27 = getelementptr { i64, i32*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8*, i8*, i8*, i64* }* %output, i32 0, i32 2 %28 = load i8** %27 %29 = getelementptr i8* %28, i64 %26 %30 = bitcast i8* %29 to double* %31 = getelementptr double* %30, i64 %16 store double %19, double* %31 br label %BLOCK_62 BLOCK_93: ; preds = %BLOCK_90 %32 = getelementptr { i64, i32*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8*, i8*, i8*, i64* }* %X, i32 0, i32 5 %33 = load i64** %32 %34 = getelementptr i64* %33, i32 0 %35 = load i64* %34 %36 = mul i64 %13, %35 %37 = getelementptr { i64, i32*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8*, i8*, i8*, i64* }* %X, i32 0, i32 2 %38 = load i8** %37 %39 = getelementptr i8* %38, i64 %36 %40 = bitcast i8* %39 to double* %41 = getelementptr double* %40, i64 %20 %42 = load double* %41 %43 = getelementptr { i64, i32*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8*, i8*, i8*, i64* }* %X, i32 0, i32 5 %44 = load i64** %43 %45 = getelementptr i64* %44, i32 0 %46 = load i64* %45 %47 = mul i64 %16, %46 %48 = getelementptr { i64, i32*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8*, i8*, i8*, i64* }* %X, i32 0, i32 2 %49 = load i8** %48 %50 = getelementptr i8* %49, i64 %47 %51 = bitcast i8* %50 to double* %52 = getelementptr double* %51, i64 %20 %53 = load double* %52 %54 = fsub double %42, %53 %55 = getelementptr { i64, i32*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8*, i8*, i8*, i64* }* %X, i32 0, i32 5 %56 = load i64** %55 %57 = getelementptr i64* %56, i32 0 %58 = load i64* %57 %59 = mul i64 %13, %58 %60 = getelementptr { i64, i32*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8*, i8*, i8*, i64* }* %X, i32 0, i32 2 %61 = load i8** %60 %62 = getelementptr i8* %61, i64 %59 %63 = bitcast i8* %62 to double* %64 = getelementptr double* %63, i64 %20 %65 = load double* %64 %66 = getelementptr { i64, i32*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8*, i8*, i8*, i64* }* %X, i32 0, i32 5 %67 = load i64** %66 %68 = getelementptr i64* %67, i32 0 %69 = load i64* %68 %70 = mul i64 %16, %69 %71 = getelementptr { i64, i32*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8*, i8*, i8*, i64* }* %X, i32 0, i32 2 %72 = load i8** %71 %73 = getelementptr i8* %72, i64 %70 %74 = bitcast i8* %73 to double* %75 = getelementptr double* %74, i64 %20 %76 = load double* %75 %77 = fsub double %65, %76 %78 = fmul double %54, %77 %79 = fadd double %19, %78 br label %BLOCK_87 }
import time
start = time.time()
pairwise_numba(X, output)
end = time.time()
print "Result from compiled is in %s (msec)" % ((end-start)*1000)
Result from compiled is in 2.77519226074 (msec)