#!/usr/bin/env python
# coding: utf-8

# In[1]:


import chainer


# In[2]:


chainer.__version__


# # numpy vs. cupy

# In[3]:


import numpy as np
import cupy as cp


# ## dot

# In[4]:


def dot_np(a, b):
    return a.dot(b)


# In[5]:


def dot_cp(a, b):
    return a.dot(b)


# In[6]:


n = 1000
m = 10000


# In[7]:


a_np = np.random.randn(n, m).astype('f')
b_np = np.random.randn(m, n).astype('f')


# In[8]:


a_cp = cp.asarray(a_np)
b_cp = cp.asarray(b_np)


# In[9]:


get_ipython().run_line_magic('timeit', 'dot_np(a_np, b_np)')


# In[10]:


get_ipython().run_line_magic('timeit', 'dot_cp(a_cp, b_cp)')


# ## norm

# In[11]:


def norm_np(data):
    return np.linalg.norm(data, axis=1)


# In[12]:


cp.linalg.norm(a_cp, axis=1)


# In[13]:


def norm_cp(data):
    return cp.sqrt(cp.sum(data**2, axis=1))


# In[14]:


get_ipython().run_line_magic('timeit', 'norm_np(a_np)')


# In[15]:


get_ipython().run_line_magic('timeit', 'norm_cp(a_cp)')


# # Compatible codes

# In[16]:


from chainer import cuda


# ## dot (ufunc)

# In[17]:


def dot(a, b):
    return a.dot(b)


# ## Total variation (use `chainer.Variable`)

# In[18]:


import chainer.functions as F

def tv(x_data, beta=2):
    xp = cuda.get_array_module(x_data)
    n, ch, h, w = x_data.shape
    
    Wh_data = xp.array([[[[1],[-1]]]], dtype='f')
    Ww_data = xp.array([[[[1, -1]]]], dtype='f')

    x = chainer.Variable(x_data.astype('f'))
    Wh = chainer.Variable(Wh_data)
    Ww = chainer.Variable(Ww_data)

    diffh = F.convolution_2d(F.reshape(x, (3, 1, h, w)), W=Wh)
    diffw = F.convolution_2d(F.reshape(x, (3, 1, h, w)), W=Ww)

    tv = (F.sum(diffh**2) + F.sum(diffw**2))**(beta / 2.)
    return tv


# ## im2patch (not use `chainer.Variable`)

# In[19]:


def get_patches_idx(image_size, patch_size, stride):
    l = image_size - patch_size
    return range(l)[::stride] + [l]

def im2patch(image, patch_size, stride):
    xp = cuda.get_array_module(image)
    ch, h, w = image.shape
    idx_h = get_patches_idx(h, patch_size, stride)
    idx_w = get_patches_idx(w, patch_size, stride)

    patches = xp.zeros((len(idx_h) * len(idx_w), ch, patch_size, patch_size),
                       dtype=image.dtype)
    for ih in xrange(len(idx_h)):
        hs = idx_h[ih]
        he = hs + patch_size
        for iw in xrange(len(idx_w)):
            ws = idx_w[iw]
            we = ws + patch_size
            patches[iw + ih * len(idx_h)] += image[:, hs:he, ws:we]
    return patches


# In[20]:


img_np = np.random.randn(3, 256, 256)
img_cp = cp.asarray(img_np)
patch_size = 8
stride = 4


# In[21]:


get_ipython().run_line_magic('timeit', 'im2patch(img_np, patch_size, stride)')


# In[22]:


get_ipython().run_line_magic('timeit', 'im2patch(img_cp, patch_size, stride)')