n=500
mat=np.random.rand(n,n)
mat.shape
(500, 500)
def matmul(mat1,mat2):
out=np.zeros((mat1.shape[0],mat2.shape[1]))
for i in xrange(mat1.shape[0]):
for j in xrange(mat2.shape[1]):
for k in xrange(mat1.shape[1]):
out[i,j]+=mat1[i,k]*mat2[k,j]
return out
def matmul_dot(mat1,mat2):
return np.dot(mat1,mat2)
%timeit -r 3 -n 1 matmul(mat,mat)
1 loops, best of 3: 2min 19s per loop
%timeit -r 3 -n 1 matmul_dot(mat,mat)
1 loops, best of 3: 6.17 ms per loop
import numba
matmul_nb=numba.autojit(matmul)
%timeit -r 3 -n 1 matmul_nb(mat,mat)
1 loops, best of 3: 329 ms per loop
%load_ext cythonmagic
%%cython
import cython
import numpy as np
cimport numpy as np
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.nonecheck(False)
def matmul_cy(np.ndarray[double,ndim=2] mat1,np.ndarray[double,ndim=2] mat2):
cdef np.ndarray[double,ndim=2] out=np.zeros((mat1.shape[0],mat2.shape[1]))
cdef int i,j,k
for i in xrange(mat1.shape[0]):
for j in xrange(mat2.shape[1]):
for k in xrange(mat1.shape[1]):
out[i,j]+=mat1[i,k]*mat2[k,j]
return out
%timeit -r 3 -n 1 matmul_cy(mat,mat)
1 loops, best of 3: 334 ms per loop
%%file matmul.f90
subroutine matmul_s(n1,n2,n3,mat1,mat2,mat3)
implicit none
integer,intent(in):: n1,n2,n3
real(kind=8),intent(in):: mat1(n1,n2),mat2(n2,n3)
real(kind=8),intent(out):: mat3(n1,n3)
integer i,j,k
do i=1,n1
do j=1,n3
mat3(i,j)=0.
do k=1,n2
mat3(i,j)=mat3(i,j)+mat1(i,k)*mat2(k,j)
enddo
enddo
enddo
end subroutine
subroutine matmul_fi(n1,n2,n3,mat1,mat2,mat3)
implicit none
integer,intent(in):: n1,n2,n3
real(kind=8),intent(in):: mat1(n1,n2),mat2(n2,n3)
real(kind=8),intent(out):: mat3(n1,n3)
mat3=matmul(mat1,mat2)
end subroutine
Overwriting matmul.f90
!f2py -c -m matmul_fortran matmul.f90 --f90exec=/opt/local/bin/gfortran-mp-4.9 > log.txt
import matmul_fortran
print matmul_fortran.__doc__
This module 'matmul_fortran' is auto-generated with f2py (version:2). Functions: mat3 = matmul_s(mat1,mat2,n1=shape(mat1,0),n2=shape(mat1,1),n3=shape(mat2,1)) mat3 = matmul_f(mat1,mat2,n1=shape(mat1,0),n2=shape(mat1,1),n3=shape(mat2,1)) mat3 = matmul_fi(mat1,mat2,n1=shape(mat1,0),n2=shape(mat1,1),n3=shape(mat2,1)) .
%timeit -r 3 -n 1 matmul_fortran.matmul_s(mat,mat)
1 loops, best of 3: 108 ms per loop
%timeit -r 3 -n 1 matmul_fortran.matmul_fi(mat,mat)
1 loops, best of 3: 70.7 ms per loop