n=1000
mat=np.random.rand(n,n)
n2=100
mat2=np.random.rand(n2,n2)
n3=10
mat3=np.random.rand(n3,n3)
mat.shape
(1000, 1000)
def matmul(mat1,mat2):
out=np.zeros((mat1.shape[0],mat2.shape[1]))
for i in xrange(mat1.shape[0]):
for j in xrange(mat2.shape[1]):
for k in xrange(mat1.shape[1]):
out[i,j]+=mat1[i,k]*mat2[k,j]
return out
def matmul_dot(mat1,mat2):
return np.dot(mat1,mat2)
%timeit -r 3 -n 1 matmul(mat,mat)
1 loops, best of 3: 19min 22s per loop
%timeit -r 3 -n 1 matmul(mat2,mat2)
1 loops, best of 3: 1.15 s per loop
%timeit -r 3 -n 1 matmul(mat3,mat3)
1 loops, best of 3: 1.48 ms per loop
%timeit -r 3 -n 1 matmul_dot(mat,mat)
1 loops, best of 3: 48 ms per loop
%timeit -r 3 -n 1 matmul_dot(mat2,mat2)
1 loops, best of 3: 61 µs per loop
%timeit -r 3 -n 1 matmul_dot(mat3,mat3)
1 loops, best of 3: 1.91 µs per loop
import numba
matmul_nb=numba.autojit(matmul)
%timeit -r 3 -n 1 matmul_nb(mat,mat)
1 loops, best of 3: 3.79 s per loop
%timeit -r 3 -n 1 matmul_nb(mat2,mat2)
1 loops, best of 3: 2.83 ms per loop
%timeit -r 3 -n 1 matmul_nb(mat3,mat3)
1 loops, best of 3: 5.96 µs per loop
%load_ext cythonmagic
%%cython
import cython
import numpy as np
cimport numpy as np
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.nonecheck(False)
def matmul_cy(np.ndarray[double,ndim=2] mat1,np.ndarray[double,ndim=2] mat2):
cdef np.ndarray[double,ndim=2] out=np.zeros((mat1.shape[0],mat2.shape[1]))
cdef int i,j,k
for i in xrange(mat1.shape[0]):
for j in xrange(mat2.shape[1]):
for k in xrange(mat1.shape[1]):
out[i,j]+=mat1[i,k]*mat2[k,j]
return out
%timeit -r 3 -n 1 matmul_cy(mat,mat)
1 loops, best of 3: 3.41 s per loop
%timeit -r 3 -n 1 matmul_cy(mat2,mat2)
1 loops, best of 3: 2.69 ms per loop
%timeit -r 3 -n 1 matmul_cy(mat3,mat3)
1 loops, best of 3: 5.01 µs per loop
%%file matmul.f90
subroutine matmul_s(n1,n2,n3,mat1,mat2,mat3)
implicit none
integer,intent(in):: n1,n2,n3
real(kind=8),intent(in):: mat1(n1,n2),mat2(n2,n3)
real(kind=8),intent(out):: mat3(n1,n3)
integer i,j,k
do i=1,n1
do j=1,n3
mat3(i,j)=0.
do k=1,n2
mat3(i,j)=mat3(i,j)+mat1(i,k)*mat2(k,j)
enddo
enddo
enddo
end subroutine
subroutine matmul_fi(n1,n2,n3,mat1,mat2,mat3)
implicit none
integer,intent(in):: n1,n2,n3
real(kind=8),intent(in):: mat1(n1,n2),mat2(n2,n3)
real(kind=8),intent(out):: mat3(n1,n3)
mat3=matmul(mat1,mat2)
end subroutine
Overwriting matmul.f90
!f2py -c -m matmul_fortran matmul.f90 --f90exec=/opt/local/bin/gfortran-mp-4.9 > log.txt
import matmul_fortran
print matmul_fortran.__doc__
This module 'matmul_fortran' is auto-generated with f2py (version:2). Functions: mat3 = matmul_s(mat1,mat2,n1=shape(mat1,0),n2=shape(mat1,1),n3=shape(mat2,1)) mat3 = matmul_f(mat1,mat2,n1=shape(mat1,0),n2=shape(mat1,1),n3=shape(mat2,1)) mat3 = matmul_fi(mat1,mat2,n1=shape(mat1,0),n2=shape(mat1,1),n3=shape(mat2,1)) .
%timeit -r 3 -n 1 matmul_fortran.matmul_s(mat,mat)
1 loops, best of 3: 1.3 s per loop
%timeit -r 3 -n 1 matmul_fortran.matmul_s(mat2,mat2)
1 loops, best of 3: 770 µs per loop
%timeit -r 3 -n 1 matmul_fortran.matmul_s(mat3,mat3)
1 loops, best of 3: 5.01 µs per loop
%timeit -r 3 -n 1 matmul_fortran.matmul_fi(mat,mat)
1 loops, best of 3: 609 ms per loop
%timeit -r 3 -n 1 matmul_fortran.matmul_fi(mat2,mat2)
1 loops, best of 3: 700 µs per loop
%timeit -r 3 -n 1 matmul_fortran.matmul_fi(mat3,mat3)
1 loops, best of 3: 4.05 µs per loop