In [1]:
import chainer

In [2]:
chainer.__version__

Out[2]:
'1.5.1'

# numpy vs. cupy¶

In [3]:
import numpy as np
import cupy as cp


## dot¶

In [4]:
def dot_np(a, b):
return a.dot(b)

In [5]:
def dot_cp(a, b):
return a.dot(b)

In [6]:
n = 1000
m = 10000

In [7]:
a_np = np.random.randn(n, m).astype('f')
b_np = np.random.randn(m, n).astype('f')

In [8]:
a_cp = cp.asarray(a_np)
b_cp = cp.asarray(b_np)

In [9]:
%timeit dot_np(a_np, b_np)

1 loops, best of 3: 811 ms per loop

In [10]:
%timeit dot_cp(a_cp, b_cp)

The slowest run took 2175.14 times longer than the fastest. This could mean that an intermediate result is being cached
1 loops, best of 3: 110 µs per loop


## norm¶

In [11]:
def norm_np(data):
return np.linalg.norm(data, axis=1)

In [12]:
cp.linalg.norm(a_cp, axis=1)

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-12-8377ddd70259> in <module>()
----> 1 cp.linalg.norm(a_cp, axis=1)

TypeError: 'module' object is not callable
In [13]:
def norm_cp(data):
return cp.sqrt(cp.sum(data**2, axis=1))

In [14]:
%timeit norm_np(a_np)

100 loops, best of 3: 11.8 ms per loop

In [15]:
%timeit norm_cp(a_cp)

The slowest run took 1863.49 times longer than the fastest. This could mean that an intermediate result is being cached
1 loops, best of 3: 158 µs per loop


# Compatible codes¶

In [16]:
from chainer import cuda


## dot (ufunc)¶

In [17]:
def dot(a, b):
return a.dot(b)


## Total variation (use chainer.Variable)¶

In [18]:
import chainer.functions as F

def tv(x_data, beta=2):
xp = cuda.get_array_module(x_data)
n, ch, h, w = x_data.shape

Wh_data = xp.array([[[[1],[-1]]]], dtype='f')
Ww_data = xp.array([[[[1, -1]]]], dtype='f')

x = chainer.Variable(x_data.astype('f'))
Wh = chainer.Variable(Wh_data)
Ww = chainer.Variable(Ww_data)

diffh = F.convolution_2d(F.reshape(x, (3, 1, h, w)), W=Wh)
diffw = F.convolution_2d(F.reshape(x, (3, 1, h, w)), W=Ww)

tv = (F.sum(diffh**2) + F.sum(diffw**2))**(beta / 2.)
return tv


## im2patch (not use chainer.Variable)¶

In [19]:
def get_patches_idx(image_size, patch_size, stride):
l = image_size - patch_size
return range(l)[::stride] + [l]

def im2patch(image, patch_size, stride):
xp = cuda.get_array_module(image)
ch, h, w = image.shape
idx_h = get_patches_idx(h, patch_size, stride)
idx_w = get_patches_idx(w, patch_size, stride)

patches = xp.zeros((len(idx_h) * len(idx_w), ch, patch_size, patch_size),
dtype=image.dtype)
for ih in xrange(len(idx_h)):
hs = idx_h[ih]
he = hs + patch_size
for iw in xrange(len(idx_w)):
ws = idx_w[iw]
we = ws + patch_size
patches[iw + ih * len(idx_h)] += image[:, hs:he, ws:we]
return patches

In [20]:
img_np = np.random.randn(3, 256, 256)
img_cp = cp.asarray(img_np)
patch_size = 8
stride = 4

In [21]:
%timeit im2patch(img_np, patch_size, stride)

10 loops, best of 3: 13.6 ms per loop

In [22]:
%timeit im2patch(img_cp, patch_size, stride)

1 loops, best of 3: 145 ms per loop