import chainer
chainer.__version__
'1.5.1'
import numpy as np
import cupy as cp
def dot_np(a, b):
return a.dot(b)
def dot_cp(a, b):
return a.dot(b)
n = 1000
m = 10000
a_np = np.random.randn(n, m).astype('f')
b_np = np.random.randn(m, n).astype('f')
a_cp = cp.asarray(a_np)
b_cp = cp.asarray(b_np)
%timeit dot_np(a_np, b_np)
1 loops, best of 3: 811 ms per loop
%timeit dot_cp(a_cp, b_cp)
The slowest run took 2175.14 times longer than the fastest. This could mean that an intermediate result is being cached 1 loops, best of 3: 110 µs per loop
def norm_np(data):
return np.linalg.norm(data, axis=1)
cp.linalg.norm(a_cp, axis=1)
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-12-8377ddd70259> in <module>() ----> 1 cp.linalg.norm(a_cp, axis=1) TypeError: 'module' object is not callable
def norm_cp(data):
return cp.sqrt(cp.sum(data**2, axis=1))
%timeit norm_np(a_np)
100 loops, best of 3: 11.8 ms per loop
%timeit norm_cp(a_cp)
The slowest run took 1863.49 times longer than the fastest. This could mean that an intermediate result is being cached 1 loops, best of 3: 158 µs per loop
from chainer import cuda
def dot(a, b):
return a.dot(b)
chainer.Variable
)¶import chainer.functions as F
def tv(x_data, beta=2):
xp = cuda.get_array_module(x_data)
n, ch, h, w = x_data.shape
Wh_data = xp.array([[[[1],[-1]]]], dtype='f')
Ww_data = xp.array([[[[1, -1]]]], dtype='f')
x = chainer.Variable(x_data.astype('f'))
Wh = chainer.Variable(Wh_data)
Ww = chainer.Variable(Ww_data)
diffh = F.convolution_2d(F.reshape(x, (3, 1, h, w)), W=Wh)
diffw = F.convolution_2d(F.reshape(x, (3, 1, h, w)), W=Ww)
tv = (F.sum(diffh**2) + F.sum(diffw**2))**(beta / 2.)
return tv
chainer.Variable
)¶def get_patches_idx(image_size, patch_size, stride):
l = image_size - patch_size
return range(l)[::stride] + [l]
def im2patch(image, patch_size, stride):
xp = cuda.get_array_module(image)
ch, h, w = image.shape
idx_h = get_patches_idx(h, patch_size, stride)
idx_w = get_patches_idx(w, patch_size, stride)
patches = xp.zeros((len(idx_h) * len(idx_w), ch, patch_size, patch_size),
dtype=image.dtype)
for ih in xrange(len(idx_h)):
hs = idx_h[ih]
he = hs + patch_size
for iw in xrange(len(idx_w)):
ws = idx_w[iw]
we = ws + patch_size
patches[iw + ih * len(idx_h)] += image[:, hs:he, ws:we]
return patches
img_np = np.random.randn(3, 256, 256)
img_cp = cp.asarray(img_np)
patch_size = 8
stride = 4
%timeit im2patch(img_np, patch_size, stride)
10 loops, best of 3: 13.6 ms per loop
%timeit im2patch(img_cp, patch_size, stride)
1 loops, best of 3: 145 ms per loop