Notebook

Typed Memoryview Benchmark¶

This contains the implementations of the benchmarks described at http://jakevdp.github.com/blog/2012/08/08/memoryview-benchmarks.

Here we'll use ipython's cython magic to compile and run the benchmarks.

In [4]:

%load_ext cythonmagic

# Define our test array
import numpy as np
X = np.random.random((500, 3))
output = np.random.random((500,500)) # only used for Numba, declared here so we make sure the dimensions remain consistent

Python-only Version¶

In [2]:

import numpy as np

def euclidean_distance(x1, x2):
    x1 = np.asarray(x1)
    x2 = np.asarray(x2)
    return np.sqrt(np.sum((x1 - x2) ** 2))

def pairwise_v1(X, metric=euclidean_distance):
    X = np.asarray(X)
    
    n_samples, n_dim = X.shape

    D = np.empty((n_samples, n_samples))

    for i in range(n_samples):
        for j in range(n_samples):
 	    D[i, j] = metric(X[i], X[j])

    return D

In [3]:

%timeit pairwise_v1(X)

1 loops, best of 3: 5.19 s per loop

Cython + numpy¶

In [5]:

%%cython

import numpy as np

cimport numpy as np
from libc.math cimport sqrt
cimport cython

# define a function pointer to a metric
ctypedef double (*metric_ptr)(np.ndarray, np.ndarray)

@cython.boundscheck(False)
@cython.wraparound(False)
cdef double euclidean_distance(np.ndarray[double, ndim=1, mode='c'] x1,
                               np.ndarray[double, ndim=1, mode='c'] x2):
    cdef double tmp, d
    cdef np.intp_t i, N

    d = 0
    N = x1.shape[0]
    # assume x2 has the same shape as x1.  This could be dangerous!

    for i in range(N):
        tmp = x1[i] - x2[i]
        d += tmp * tmp

    return sqrt(d)


@cython.boundscheck(False)
@cython.wraparound(False)
def pairwise_v2(np.ndarray[double, ndim=2, mode='c'] X not None,
                metric = 'euclidean'):
    cdef metric_ptr dist_func
    if metric == 'euclidean':
        dist_func = &euclidean_distance
    else:
        raise ValueError("unrecognized metric")

    cdef np.intp_t i, j, n_samples
    n_samples = X.shape[0]

    cdef np.ndarray[double, ndim=2, mode='c'] D = np.empty((n_samples,
                                                            n_samples))
    for i in range(n_samples):
        for j in range(n_samples):
            D[i, j] = dist_func(X[i], X[j])

    return D

In [6]:

%timeit pairwise_v2(X)

1 loops, best of 3: 804 ms per loop

Cython + memviews (with slicing)¶

In [10]:

%%cython
import numpy as np

cimport numpy as np
from libc.math cimport sqrt
cimport cython

# define a function pointer to a metric
ctypedef double (*metric_ptr)(double[::1], double[::1])

@cython.boundscheck(False)
@cython.wraparound(False)
cdef double euclidean_distance(double[::1] x1,
                               double[::1] x2):
    cdef double tmp, d
    cdef np.intp_t i, N

    d = 0
    N = x1.shape[0]
    # assume x2 has the same shape as x1.  This could be dangerous!

    for i in range(N):
        tmp = x1[i] - x2[i]
        d += tmp * tmp

    return sqrt(d)


@cython.boundscheck(False)
@cython.wraparound(False)
def pairwise_v3(double[:, ::1] X,
                metric = 'euclidean'):
    cdef metric_ptr dist_func
    if metric == 'euclidean':
        dist_func = &euclidean_distance
    else:
        raise ValueError("unrecognized metric")

    cdef np.intp_t i, j, n_samples
    n_samples = X.shape[0]

    cdef double[:, ::1] D = np.empty((n_samples, n_samples))

    for i in range(n_samples):
        for j in range(n_samples):
            D[i, j] = dist_func(X[i], X[j])

    return D

In [11]:

%timeit pairwise_v3(X)

10 loops, best of 3: 22.4 ms per loop

Cython + raw pointers¶

In [18]:

%%cython

import numpy as np

cimport numpy as np
from libc.math cimport sqrt
cimport cython

# define a function pointer to a metric
ctypedef double (*metric_ptr)(double*, double*, int)

@cython.boundscheck(False)
@cython.wraparound(False)
cdef double euclidean_distance(double* x1,
                               double* x2,
                               int N):
    cdef double tmp, d
    cdef np.intp_t i

    d = 0

    for i in range(N):
        tmp = x1[i] - x2[i]
        d += tmp * tmp

    return sqrt(d)


@cython.boundscheck(False)
@cython.wraparound(False)
def pairwise_v4(double[:, ::1] X,
                metric = 'euclidean'):
    cdef metric_ptr dist_func
    if metric == 'euclidean':
        dist_func = &euclidean_distance
    else:
        raise ValueError("unrecognized metric")

    cdef np.intp_t i, j, n_samples, n_dim
    n_samples = X.shape[0]
    n_dim = X.shape[1]

    cdef double[:, ::1] D = np.empty((n_samples, n_samples))

    cdef double* Dptr = &D[0, 0]
    cdef double* Xptr = &X[0, 0]

    for i in range(n_samples):
        for j in range(n_samples):
            Dptr[i * n_samples + j] = dist_func(Xptr + i * n_dim,
                                                Xptr + j * n_dim,
                                                n_dim)
    return D

In [19]:

%timeit pairwise_v4(X)

100 loops, best of 3: 4.82 ms per loop

Cython + memviews (no slicing)¶

In [14]:

%%cython

import numpy as np

cimport numpy as np
from libc.math cimport sqrt
cimport cython

# define a function pointer to a metric
ctypedef double (*metric_ptr)(double[:, ::1], np.intp_t, np.intp_t)

@cython.boundscheck(False)
@cython.wraparound(False)
cdef inline double euclidean_distance(double[:, ::1] X,
                                      np.intp_t i1, np.intp_t i2):
    cdef double tmp, d
    cdef np.intp_t j

    d = 0

    for j in range(X.shape[1]):
        tmp = X[i1, j] - X[i2, j]
        d += tmp * tmp

    return sqrt(d)


@cython.boundscheck(False)
@cython.wraparound(False)
def pairwise_v5(double[:, ::1] X,
                metric = 'euclidean'):
    cdef metric_ptr dist_func
    if metric == 'euclidean':
        dist_func = &euclidean_distance
    else:
        raise ValueError("unrecognized metric")

    cdef np.intp_t i, j, n_samples, n_dim
    n_samples = X.shape[0]
    n_dim = X.shape[1]

    cdef double[:, ::1] D = np.empty((n_samples, n_samples))

    for i in range(n_samples):
        for j in range(n_samples):
            D[i, j] = dist_func(X, i, j)

    return D

In [15]:

%timeit pairwise_v5(X)

100 loops, best of 3: 4.88 ms per loop

Numba¶

In [2]:

from numba.decorators import jit as jit

@jit(arg_types=[[['d']], [['d']]], ret_type=[['d']])
def pairwise_numba(X, output):
    n_samples, n_dim = X.shape
    n_samples1, n_samples2 = output.shape
    
    for ii in range(n_samples):
        for jj in range(n_samples):
            result = 0.0;
            for kk in range(n_dim):
                result += (X[ii,kk] - X[jj,kk]) * (X[ii,kk] - X[jj,kk])
            output[ii,jj] = result
    
    return output

{'blocks': {0: <llvm.core.BasicBlock object at 0x10595f710>,
            33: <llvm.core.BasicBlock object at 0x10595f790>,
            43: <llvm.core.BasicBlock object at 0x10595f490>,
            46: <llvm.core.BasicBlock object at 0x10595ff50>,
            49: <llvm.core.BasicBlock object at 0x10595f290>,
            52: <llvm.core.BasicBlock object at 0x10595f690>,
            62: <llvm.core.BasicBlock object at 0x10595f510>,
            65: <llvm.core.BasicBlock object at 0x106033390>,
            68: <llvm.core.BasicBlock object at 0x106033510>,
            77: <llvm.core.BasicBlock object at 0x106033590>,
            87: <llvm.core.BasicBlock object at 0x106033610>,
            90: <llvm.core.BasicBlock object at 0x106033690>,
            93: <llvm.core.BasicBlock object at 0x106033790>,
            158: <llvm.core.BasicBlock object at 0x106033710>,
            178: <llvm.core.BasicBlock object at 0x106033490>,
            182: <llvm.core.BasicBlock object at 0x10595fa10>},
 'blocks_dom': {0: set([0]),
                33: set([0, 33]),
                43: set([0, 33, 43, 46, 49, 52, 65, 178]),
                46: set([0, 33, 46]),
                49: set([0, 33, 46, 49]),
                52: set([0, 33, 46, 49, 52]),
                62: set([0, 33, 46, 49, 52, 62, 65, 68, 77, 90, 158]),
                65: set([0, 33, 46, 49, 52, 65]),
                68: set([0, 33, 46, 49, 52, 65, 68]),
                77: set([0, 33, 46, 49, 52, 65, 68, 77]),
                87: set([0, 33, 46, 49, 52, 65, 68, 77, 87, 90, 93]),
                90: set([0, 33, 46, 49, 52, 65, 68, 77, 90]),
                93: set([0, 33, 46, 49, 52, 65, 68, 77, 90, 93]),
                158: set([0, 33, 46, 49, 52, 65, 68, 77, 90, 158]),
                178: set([0, 33, 46, 49, 52, 65, 178]),
                182: set([0, 33, 46, 182])},
 'blocks_in': {0: set(),
               33: set([0]),
               43: set([178]),
               46: set([33, 43]),
               49: set([46]),
               52: set([49]),
               62: set([158]),
               65: set([52, 62]),
               68: set([65]),
               77: set([68]),
               87: set([93]),
               90: set([77, 87]),
               93: set([90]),
               158: set([90]),
               178: set([65]),
               182: set([46])},
 'blocks_out': {0: set([33]),
                33: set([46]),
                43: set([46]),
                46: set([49, 182]),
                49: set([52]),
                52: set([65]),
                62: set([65]),
                65: set([68, 178]),
                68: set([77]),
                77: set([90]),
                87: set([90]),
                90: set([93, 158]),
                93: set([87]),
                158: set([62]),
                178: set([43]),
                182: set()},
 'blocks_reaching': {0: set([0]),
                     33: set([0, 33]),
                     43: set([0,
                              33,
                              43,
                              46,
                              49,
                              52,
                              62,
                              65,
                              68,
                              77,
                              87,
                              90,
                              93,
                              158,
                              178]),
                     46: set([0,
                              33,
                              43,
                              46,
                              49,
                              52,
                              62,
                              65,
                              68,
                              77,
                              87,
                              90,
                              93,
                              158,
                              178]),
                     49: set([0,
                              33,
                              43,
                              46,
                              49,
                              52,
                              62,
                              65,
                              68,
                              77,
                              87,
                              90,
                              93,
                              158,
                              178]),
                     52: set([0,
                              33,
                              43,
                              46,
                              49,
                              52,
                              62,
                              65,
                              68,
                              77,
                              87,
                              90,
                              93,
                              158,
                              178]),
                     62: set([0,
                              33,
                              43,
                              46,
                              49,
                              52,
                              62,
                              65,
                              68,
                              77,
                              87,
                              90,
                              93,
                              158,
                              178]),
                     65: set([0,
                              33,
                              43,
                              46,
                              49,
                              52,
                              62,
                              65,
                              68,
                              77,
                              87,
                              90,
                              93,
                              158,
                              178]),
                     68: set([0,
                              33,
                              43,
                              46,
                              49,
                              52,
                              62,
                              65,
                              68,
                              77,
                              87,
                              90,
                              93,
                              158,
                              178]),
                     77: set([0,
                              33,
                              43,
                              46,
                              49,
                              52,
                              62,
                              65,
                              68,
                              77,
                              87,
                              90,
                              93,
                              158,
                              178]),
                     87: set([0,
                              33,
                              43,
                              46,
                              49,
                              52,
                              62,
                              65,
                              68,
                              77,
                              87,
                              90,
                              93,
                              158,
                              178]),
                     90: set([0,
                              33,
                              43,
                              46,
                              49,
                              52,
                              62,
                              65,
                              68,
                              77,
                              87,
                              90,
                              93,
                              158,
                              178]),
                     93: set([0,
                              33,
                              43,
                              46,
                              49,
                              52,
                              62,
                              65,
                              68,
                              77,
                              87,
                              90,
                              93,
                              158,
                              178]),
                     158: set([0,
                               33,
                               43,
                               46,
                               49,
                               52,
                               62,
                               65,
                               68,
                               77,
                               87,
                               90,
                               93,
                               158,
                               178]),
                     178: set([0,
                               33,
                               43,
                               46,
                               49,
                               52,
                               62,
                               65,
                               68,
                               77,
                               87,
                               90,
                               93,
                               158,
                               178]),
                     182: set([0,
                               33,
                               43,
                               46,
                               49,
                               52,
                               62,
                               65,
                               68,
                               77,
                               87,
                               90,
                               93,
                               158,
                               178,
                               182])},
 'blocks_reads': {0: set([0, 1]),
                  33: set([2]),
                  43: set(),
                  46: set([6]),
                  49: set(),
                  52: set([2]),
                  62: set(),
                  65: set([7]),
                  68: set(),
                  77: set([3]),
                  87: set(),
                  90: set([9]),
                  93: set([0, 6, 7, 8, 9]),
                  158: set([1, 6, 7, 8]),
                  178: set(),
                  182: set([1])},
 'blocks_writer': {0: {2: 9, 3: 12, 4: 24, 5: 27},
                   33: {6: 42},
                   43: {6: 43},
                   46: {6: 46},
                   49: {},
                   52: {7: 61},
                   62: {7: 62},
                   65: {7: 65},
                   68: {8: 71},
                   77: {9: 86},
                   87: {9: 87},
                   90: {8: 90, 9: 90},
                   93: {8: 152},
                   158: {},
                   178: {},
                   182: {}},
 'blocks_writes': {0: set([0, 1, 2, 3, 4, 5]),
                   33: set([6]),
                   43: set([6]),
                   46: set([6]),
                   49: set(),
                   52: set([7]),
                   62: set([7]),
                   65: set([7]),
                   68: set([8]),
                   77: set([9]),
                   87: set([9]),
                   90: set([8, 9]),
                   93: set([8]),
                   158: set(),
                   178: set(),
                   182: set()},
 'translator': <numba.translate.Translate object at 0x106033410>}
op_LOAD_ATTR(): 3 106 shape <Variable(val=<llvm.core.Argument object at 0x10595f610>, _llvm=<llvm.core.Argument object at 0x10595f610>, typ='arr[f64]')> arr[f64]
op_LOAD_ATTR(): { i64, i32*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8*, i8*, i8*, i64* }*
op_LOAD_ATTR(): 18 106 shape <Variable(val=<llvm.core.Argument object at 0x10595f810>, _llvm=<llvm.core.Argument object at 0x10595f810>, typ='arr[f64]')> arr[f64]
op_LOAD_ATTR(): { i64, i32*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8*, i8*, i8*, i64* }*
('op_CALL_FUNCTION():', <Variable(val=<built-in function range>, _llvm=None, typ=['func'])>)
str_to_llvmtype(): str = 'i64'
add_phi_incomming(): reaching_defs = {33: {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 33},
 43: {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 43, 7: 65}}
    crnt_block=46, pred=43, local=6
op_BINARY_ADD(): <Variable(val=<llvm.core.PHINode object at 0x106033b10>, _llvm=<llvm.core.PHINode object at 0x106033b10>, typ='i64')> + <Variable(val=<llvm.core.ConstantInt object at 0x106033c50>, _llvm=<llvm.core.ConstantInt object at 0x106033c50>, typ='i64')>
resolve_type(): arg1 = <Variable(val=<llvm.core.PHINode object at 0x106033b10>, _llvm=<llvm.core.PHINode object at 0x106033b10>, typ='i64')>, arg2 = <Variable(val=<llvm.core.ConstantInt object at 0x106033c50>, _llvm=<llvm.core.ConstantInt object at 0x106033c50>, typ='i64')>
resolve_type() ==> 'i64'
resolve_type(): arg1 = <Variable(val=<llvm.core.PHINode object at 0x106033b10>, _llvm=<llvm.core.PHINode object at 0x106033b10>, typ='i64')>, arg2 = <Variable(val=<llvm.core.Instruction object at 0x106033950>, _llvm=<llvm.core.Instruction object at 0x106033950>, typ='i64')>
resolve_type() ==> 'i64'
('op_CALL_FUNCTION():', <Variable(val=<built-in function range>, _llvm=None, typ=['func'])>)
str_to_llvmtype(): str = 'i64'
add_phi_incomming(): reaching_defs = {52: {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 46, 7: 52},
 62: {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 46, 7: 62, 8: 90, 9: 90}}
    crnt_block=65, pred=62, local=7
op_BINARY_ADD(): <Variable(val=<llvm.core.PHINode object at 0x106033f50>, _llvm=<llvm.core.PHINode object at 0x106033f50>, typ='i64')> + <Variable(val=<llvm.core.ConstantInt object at 0x106033e50>, _llvm=<llvm.core.ConstantInt object at 0x106033e50>, typ='i64')>
resolve_type(): arg1 = <Variable(val=<llvm.core.PHINode object at 0x106033f50>, _llvm=<llvm.core.PHINode object at 0x106033f50>, typ='i64')>, arg2 = <Variable(val=<llvm.core.ConstantInt object at 0x106033e50>, _llvm=<llvm.core.ConstantInt object at 0x106033e50>, typ='i64')>
resolve_type() ==> 'i64'
resolve_type(): arg1 = <Variable(val=<llvm.core.PHINode object at 0x106033f50>, _llvm=<llvm.core.PHINode object at 0x106033f50>, typ='i64')>, arg2 = <Variable(val=<llvm.core.Instruction object at 0x106033950>, _llvm=<llvm.core.Instruction object at 0x106033950>, typ='i64')>
resolve_type() ==> 'i64'
('op_CALL_FUNCTION():', <Variable(val=<built-in function range>, _llvm=None, typ=['func'])>)
str_to_llvmtype(): str = 'f64'
add_phi_incomming(): reaching_defs = {77: {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 46, 7: 65, 8: 68, 9: 77},
 87: {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 46, 7: 65, 8: 93, 9: 87}}
    crnt_block=90, pred=87, local=8
str_to_llvmtype(): str = 'i64'
add_phi_incomming(): reaching_defs = {77: {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 46, 7: 65, 8: 68, 9: 77},
 87: {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 46, 7: 65, 8: 93, 9: 87}}
    crnt_block=90, pred=87, local=9
op_BINARY_ADD(): <Variable(val=<llvm.core.PHINode object at 0x105971090>, _llvm=<llvm.core.PHINode object at 0x105971090>, typ='i64')> + <Variable(val=<llvm.core.ConstantInt object at 0x105971150>, _llvm=<llvm.core.ConstantInt object at 0x105971150>, typ='i64')>
resolve_type(): arg1 = <Variable(val=<llvm.core.PHINode object at 0x105971090>, _llvm=<llvm.core.PHINode object at 0x105971090>, typ='i64')>, arg2 = <Variable(val=<llvm.core.ConstantInt object at 0x105971150>, _llvm=<llvm.core.ConstantInt object at 0x105971150>, typ='i64')>
resolve_type() ==> 'i64'
resolve_type(): arg1 = <Variable(val=<llvm.core.PHINode object at 0x105971090>, _llvm=<llvm.core.PHINode object at 0x105971090>, typ='i64')>, arg2 = <Variable(val=<llvm.core.Instruction object at 0x1060339d0>, _llvm=<llvm.core.Instruction object at 0x1060339d0>, typ='i64')>
resolve_type() ==> 'i64'
op_BINARY_SUBSCR(): arr_var.typ = arr[f64]
str_to_llvmtype(): str = 'f64'
  %32 = load double* %31
op_BINARY_SUBSCR(): arr_var.typ = arr[f64]
str_to_llvmtype(): str = 'f64'
  %43 = load double* %42
resolve_type(): arg1 = <Variable(val=<llvm.core.Instruction object at 0x105971450>, _llvm=<llvm.core.Instruction object at 0x105971450>, typ='f64')>, arg2 = <Variable(val=<llvm.core.Instruction object at 0x1059715d0>, _llvm=<llvm.core.Instruction object at 0x1059715d0>, typ='f64')>
resolve_type() ==> 'f64'
op_BINARY_SUBSCR(): arr_var.typ = arr[f64]
str_to_llvmtype(): str = 'f64'
  %55 = load double* %54
op_BINARY_SUBSCR(): arr_var.typ = arr[f64]
str_to_llvmtype(): str = 'f64'
  %66 = load double* %65
resolve_type(): arg1 = <Variable(val=<llvm.core.Instruction object at 0x105971410>, _llvm=<llvm.core.Instruction object at 0x105971410>, typ='f64')>, arg2 = <Variable(val=<llvm.core.Instruction object at 0x105971690>, _llvm=<llvm.core.Instruction object at 0x105971690>, typ='f64')>
resolve_type() ==> 'f64'
resolve_type(): arg1 = <Variable(val=<llvm.core.Instruction object at 0x105971290>, _llvm=<llvm.core.Instruction object at 0x105971290>, typ='f64')>, arg2 = <Variable(val=<llvm.core.Instruction object at 0x105971510>, _llvm=<llvm.core.Instruction object at 0x105971510>, typ='f64')>
resolve_type() ==> 'f64'
op_BINARY_ADD(): <Variable(val=<llvm.core.PHINode object at 0x106033e10>, _llvm=<llvm.core.PHINode object at 0x106033e10>, typ='f64')> + <Variable(val=<llvm.core.Instruction object at 0x105971410>, _llvm=<llvm.core.Instruction object at 0x105971410>, typ='f64')>
resolve_type(): arg1 = <Variable(val=<llvm.core.PHINode object at 0x106033e10>, _llvm=<llvm.core.PHINode object at 0x106033e10>, typ='f64')>, arg2 = <Variable(val=<llvm.core.Instruction object at 0x105971410>, _llvm=<llvm.core.Instruction object at 0x105971410>, typ='f64')>
resolve_type() ==> 'f64'
op_STORE_SUBSCR(): 174 60 None
op_STORE_SUBSCR(): <Variable(val=<llvm.core.Argument object at 0x10595f810>, _llvm=<llvm.core.Argument object at 0x10595f810>, typ='arr[f64]')>[<Variable(val=(<Variable(val=<llvm.core.PHINode object at 0x106033b10>, _llvm=<llvm.core.PHINode object at 0x106033b10>, typ='i64')>, <Variable(val=<llvm.core.PHINode object at 0x106033f50>, _llvm=<llvm.core.PHINode object at 0x106033f50>, typ='i64')>), _llvm=None, typ='tuple')>] = <Variable(val=<llvm.core.PHINode object at 0x106033e10>, _llvm=<llvm.core.PHINode object at 0x106033e10>, typ='f64')>
op_STORE_SUBSCR(): arr_lval = '{ i64, i32*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8*, i8*, i8*, i64* }* %output', arr_ltype = '{ i64, i32*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8*, i8*, i8*, i64* }*'
str_to_llvmtype(): str = 'f64'
str_to_llvmtype(): str = 'arr[]'
; ModuleID = 'pairwise_numba_mod_106033410'

define { i64, i32*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8*, i8*, i8*, i64* }* @pairwise_numba({ i64, i32*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8*, i8*, i8*, i64* }* %X, { i64, i32*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8*, i8*, i8*, i64* }* %output) {
Entry:
  %0 = getelementptr { i64, i32*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8*, i8*, i8*, i64* }* %X, i32 0, i32 4
  %1 = load i64** %0
  %2 = getelementptr i64* %1, i32 0
  %3 = load i64* %2
  %4 = getelementptr i64* %1, i32 1
  %5 = load i64* %4
  %6 = getelementptr { i64, i32*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8*, i8*, i8*, i64* }* %output, i32 0, i32 4
  %7 = load i64** %6
  %8 = getelementptr i64* %7, i32 0
  %9 = load i64* %8
  %10 = getelementptr i64* %7, i32 1
  %11 = load i64* %10
  br label %BLOCK_33

BLOCK_33:                                         ; preds = %Entry
  br label %BLOCK_46

BLOCK_43:                                         ; preds = %BLOCK_178
  %12 = add i64 %13, 1
  br label %BLOCK_46

BLOCK_46:                                         ; preds = %BLOCK_43, %BLOCK_33
  %13 = phi i64 [ 0, %BLOCK_33 ], [ %12, %BLOCK_43 ]
  %14 = icmp slt i64 %13, %3
  br i1 %14, label %BLOCK_49, label %BLOCK_182

BLOCK_182:                                        ; preds = %BLOCK_46
  ret { i64, i32*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8*, i8*, i8*, i64* }* %output

BLOCK_49:                                         ; preds = %BLOCK_46
  br label %BLOCK_52

BLOCK_52:                                         ; preds = %BLOCK_49
  br label %BLOCK_65

BLOCK_62:                                         ; preds = %BLOCK_158
  %15 = add i64 %16, 1
  br label %BLOCK_65

BLOCK_65:                                         ; preds = %BLOCK_62, %BLOCK_52
  %16 = phi i64 [ 0, %BLOCK_52 ], [ %15, %BLOCK_62 ]
  %17 = icmp slt i64 %16, %3
  br i1 %17, label %BLOCK_68, label %BLOCK_178

BLOCK_178:                                        ; preds = %BLOCK_65
  br label %BLOCK_43

BLOCK_68:                                         ; preds = %BLOCK_65
  br label %BLOCK_77

BLOCK_77:                                         ; preds = %BLOCK_68
  br label %BLOCK_90

BLOCK_87:                                         ; preds = %BLOCK_93
  %18 = add i64 %20, 1
  br label %BLOCK_90

BLOCK_90:                                         ; preds = %BLOCK_87, %BLOCK_77
  %19 = phi double [ 0.000000e+00, %BLOCK_77 ], [ %79, %BLOCK_87 ]
  %20 = phi i64 [ 0, %BLOCK_77 ], [ %18, %BLOCK_87 ]
  %21 = icmp slt i64 %20, %5
  br i1 %21, label %BLOCK_93, label %BLOCK_158

BLOCK_158:                                        ; preds = %BLOCK_90
  %22 = getelementptr { i64, i32*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8*, i8*, i8*, i64* }* %output, i32 0, i32 5
  %23 = load i64** %22
  %24 = getelementptr i64* %23, i32 0
  %25 = load i64* %24
  %26 = mul i64 %13, %25
  %27 = getelementptr { i64, i32*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8*, i8*, i8*, i64* }* %output, i32 0, i32 2
  %28 = load i8** %27
  %29 = getelementptr i8* %28, i64 %26
  %30 = bitcast i8* %29 to double*
  %31 = getelementptr double* %30, i64 %16
  store double %19, double* %31
  br label %BLOCK_62

BLOCK_93:                                         ; preds = %BLOCK_90
  %32 = getelementptr { i64, i32*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8*, i8*, i8*, i64* }* %X, i32 0, i32 5
  %33 = load i64** %32
  %34 = getelementptr i64* %33, i32 0
  %35 = load i64* %34
  %36 = mul i64 %13, %35
  %37 = getelementptr { i64, i32*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8*, i8*, i8*, i64* }* %X, i32 0, i32 2
  %38 = load i8** %37
  %39 = getelementptr i8* %38, i64 %36
  %40 = bitcast i8* %39 to double*
  %41 = getelementptr double* %40, i64 %20
  %42 = load double* %41
  %43 = getelementptr { i64, i32*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8*, i8*, i8*, i64* }* %X, i32 0, i32 5
  %44 = load i64** %43
  %45 = getelementptr i64* %44, i32 0
  %46 = load i64* %45
  %47 = mul i64 %16, %46
  %48 = getelementptr { i64, i32*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8*, i8*, i8*, i64* }* %X, i32 0, i32 2
  %49 = load i8** %48
  %50 = getelementptr i8* %49, i64 %47
  %51 = bitcast i8* %50 to double*
  %52 = getelementptr double* %51, i64 %20
  %53 = load double* %52
  %54 = fsub double %42, %53
  %55 = getelementptr { i64, i32*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8*, i8*, i8*, i64* }* %X, i32 0, i32 5
  %56 = load i64** %55
  %57 = getelementptr i64* %56, i32 0
  %58 = load i64* %57
  %59 = mul i64 %13, %58
  %60 = getelementptr { i64, i32*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8*, i8*, i8*, i64* }* %X, i32 0, i32 2
  %61 = load i8** %60
  %62 = getelementptr i8* %61, i64 %59
  %63 = bitcast i8* %62 to double*
  %64 = getelementptr double* %63, i64 %20
  %65 = load double* %64
  %66 = getelementptr { i64, i32*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8*, i8*, i8*, i64* }* %X, i32 0, i32 5
  %67 = load i64** %66
  %68 = getelementptr i64* %67, i32 0
  %69 = load i64* %68
  %70 = mul i64 %16, %69
  %71 = getelementptr { i64, i32*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8*, i8*, i8*, i64* }* %X, i32 0, i32 2
  %72 = load i8** %71
  %73 = getelementptr i8* %72, i64 %70
  %74 = bitcast i8* %73 to double*
  %75 = getelementptr double* %74, i64 %20
  %76 = load double* %75
  %77 = fsub double %65, %76
  %78 = fmul double %54, %77
  %79 = fadd double %19, %78
  br label %BLOCK_87
}

In [5]:

import time
start = time.time()
pairwise_numba(X, output)
end = time.time()
print "Result from compiled is in %s (msec)" % ((end-start)*1000)

Result from compiled is in 2.77519226074 (msec)