Notebook

Energy auto-encoder: learn from an image¶

Setup¶

In [ ]:

%matplotlib inline
import numpy as np
import numpy.linalg as la
import matplotlib.pyplot as plt
from pyunlocbox import functions, solvers
import time
import matplotlib, pyunlocbox  # For versions only.
print('Software versions:')
for pkg in [np, matplotlib, pyunlocbox]:
    print('  %s: %s' % (pkg.__name__, pkg.__version__))

Hyper-parameters¶

The $\lambda$ are the relative importance of each term in the composite objective function.
The sparse code dimensionality $m$ should be greater than $n$ for an overcomplete representation but much smaller than $N$ to avoid over-fitting.
$N_p$ denotes the size of the (square) patches.

In [ ]:

l_d = 10
l_e = 10
l_g = 0
m = 200
Np = 12

Data¶

The set of data vectors $X \in R^{n \times N}$ is given by patches extracted from a grayscale image.

In [ ]:

image = matplotlib.image.imread('data/barbara.png')
(Nx, Ny) = np.shape(image)

plt.figure(figsize=(8,5))
plt.imshow(image, cmap='gray')
plt.title('Source image')
plt.show()

X = np.zeros((Np**2, Nx*Ny/Np**2))
for x in np.arange(Nx/Np):
    for y in np.arange(Ny/Np):
        X[:,x*y] = image[x:x+Np, y:y+Np].reshape((Np**2,))
(n, N) = np.shape(X)

print('N = %d samples with dimensionality n = %d (patches of %dx%d).' % (N, n, Np, Np))

Algorithm¶

Given $X \in R^{n \times N}$, solve $\min\limits_{Z \in R^{m \times N}, D \in R^{n \times m}, E \in R^{m \times n}} \frac{\lambda_d}{2} \|X - DZ\|_F^2 + \frac{\lambda_e}{2} \|Z - EX\|_F^2 + \|Z\|_1$ s.t. $\|d_i\|_2 \leq 1$, $\|e_k\|_2 \leq 1$, $i = 1, \ldots, m$, $k = 1, \ldots, n$

In [ ]:

# Solver numeric parameters.
N_outer = 20
rtol = 1e-3

# Static loss function definitions.
g_z = functions.norm_l1()
g_de = functions.proj_b2(epsilon=1)  # L2-ball indicator function.

# Initialization.
Z = np.random.normal(size=(m, N))
D = np.random.normal(size=(n, m))
E = np.random.normal(size=(m, n))
objective_z = []
objective_d = []
objective_e = []
objective_g = []
tstart = time.time()

# Multi-variate non-convex optimization (outer loop).
for k in np.arange(N_outer):
    
    # Convex minimization for Z.
    f_zd = functions.norm_l2(lambda_=l_d/2., A=D, y=X, tight=False)
    f_ze = functions.norm_l2(lambda_=l_e/2., y=np.dot(E,X))
    f_z = functions.func()
    f_z._eval = lambda Z: f_zd.eval(Z) + f_ze.eval(Z)
    f_z._grad = lambda Z: f_zd.grad(Z) + f_ze.grad(Z)
    L = l_e + l_d * la.norm(np.dot(D.T, D))  # Lipschitz continuous gradient.
    solver = solvers.forward_backward(step=1./L, method='FISTA')
    ret = solvers.solve([f_z, g_z], Z, solver, rtol=rtol, verbosity='NONE')
    Z = ret['sol']
    objective_z.extend(ret['objective'])
    objective_d.extend(np.zeros(np.shape(ret['objective'])))
    objective_e.extend(np.zeros(np.shape(ret['objective'])))
    
    # Convex minimization for D.
    f_d = functions.norm_l2(lambda_=l_d/2., A=Z.T, y=X.T, tight=False)
    L = l_d * la.norm(np.dot(Z, Z.T))  # Lipschitz continuous gradient.
    solver = solvers.forward_backward(step=1./L, method='FISTA')
    ret = solvers.solve([f_d, g_de], D.T, solver, rtol=rtol, verbosity='NONE')
    D = ret['sol'].T
    objective_d.extend(ret['objective'])
    objective_z.extend(np.zeros(np.shape(ret['objective'])))
    objective_e.extend(np.zeros(np.shape(ret['objective'])))
    
    E = D.T
    
    # Convex minimization for E.
    f_e = functions.norm_l2(lambda_=l_e/2., A=X.T, y=Z.T, tight=False)
    L = l_e * la.norm(np.dot(X, X.T))  # Lipschitz continuous gradient.
    solver = solvers.forward_backward(step=1./L, method='FISTA')
    ret = solvers.solve([f_e, g_de], E.T, solver, rtol=rtol, verbosity='NONE')
    E = ret['sol'].T
    objective_e.extend(ret['objective'])
    objective_z.extend(np.zeros(np.shape(ret['objective'])))
    objective_d.extend(np.zeros(np.shape(ret['objective'])))
    
    D = E.T
    
    # Global objective (the indicators are 0).
    objective_g.append(g_z.eval(Z) + f_d.eval(D.T) + f_e.eval(E.T))

print('Elapsed time: %d seconds' % (time.time() - tstart))

Convergence analysis¶

Although the overall multi-variate problem is not convex, it seems to converge toward a solution. As we optimally solve each sub-problem (which are convex), we can guarantee that the global objective function will monotically decrease, which is indeed the case.
Good news: the encoder seems to very well approximate the sparse code (low L2 reconstruction error) !

In [ ]:

print('g_z(Z) = %e' % g_z.eval(Z))
print('f_z(Z,D) = %e' % f_z.eval(Z))
print('f_d(D,Z) = %e' % f_d.eval(D.T))
print('f_e(E,Z) = %e' % f_e.eval(E.T))
print('g_z(Z) + f_d(D,Z) + f_e(E,Z) = %e' % objective_g[-1])

plt.figure(figsize=(8,5))
plt.semilogy(np.array(objective_z)[:, 0], label='Z: data term')
plt.semilogy(np.array(objective_z)[:, 1], label='Z: prior term')
#plt.semilogy(np.sum(objective[:,0:2], axis=1), label='Z: sum')
plt.semilogy(np.array(objective_d)[:, 0], label='D: data term')
plt.semilogy(np.array(objective_e)[:, 0], label='E: data term')
N = np.shape(objective_z)[0]
plt.xlim(0, N-1)
plt.title('Sub-problems convergence')
plt.xlabel('Iteration number (inner loops)')
plt.ylabel('Objective function value')
plt.grid(True); plt.legend(); plt.show()
print('Inner loop: %d iterations' % N)

plt.figure(figsize=(8,5))
plt.plot(objective_g)
N = np.shape(objective_g)[0]
plt.xlim(0, N-1)
plt.title('Global convergence')
plt.xlabel('Iteration number (outer loop)')
plt.ylabel('Objective function value')
plt.grid(True); plt.show()
print('Outer loop: %d iterations\n' % N)

Solution analysis¶

Sparse codes¶

They can be arbitrary sparse by decreasing $\lambda_d$ and $\lambda_e$.

In [ ]:

nnz = np.count_nonzero(Z)
#nnz = np.sum(np.abs(Z) < 1e-4)
print('Sparsity of Z: %d non-zero entries out of %d entries, i.e. %.1f%%.' % (nnz, Z.size, 100.*nnz/Z.size))

plt.figure(figsize=(8,5))
plt.spy(Z, precision=0, aspect='auto')
plt.xlabel('N = %d samples' % N)
plt.ylabel('m = %d atoms' % m)
plt.show()

Dictionary¶

All constraints are indeed honored.
Only few atoms are actually used. This can already be seen via the sparse code sparsity pattern.
Learned atoms seem plausible but highly repetitive (keep in mind that the images are normalized).

In [ ]:

d = np.sqrt(np.sum(D*D, axis=0))
print('Constraints on D: %s' % np.alltrue(d <= 1))

plt.figure(figsize=(8,5))
plt.semilogy(d, '.')
plt.title('Dictionary atom norms')
plt.xlabel('Atom [1,m]')
plt.ylabel('Norm [0,1]')
plt.grid(True); plt.show()
plt.show()

plt.figure(figsize=(8,5))
plt.spy(D, precision=1e-7)
plt.xlabel('m = %d atoms' % m)
plt.ylabel('data dimensionality of n = %d' % n)
plt.show()

#plt.scatter to show intensity

In [ ]:

plt.figure(figsize=(8,8))
Nx = np.ceil(np.sqrt(m))
Ny = np.ceil(m / float(Nx))
for k in np.arange(m):
    plt.subplot(Ny, Nx, k)
    img = D[:,k].reshape(Np,Np)
    plt.imshow(img, cmap='gray')  # vmin=0, vmax=1 to disable normalization.
    plt.axis('off')

Encoder¶

All constraints are indeed honored.
Depending on the conditions, the encoder ressembles the transpose of the dictionary, i.e. $E \approx D^T$.

In [ ]:

e = np.sqrt(np.sum(E*E, axis=0))
print('Constraints on E: %s' % np.alltrue(e <= 1))

plt.figure(figsize=(8,5))
plt.semilogy(e)
plt.title('Encoder norms')
plt.xlabel('[1,n]')
plt.ylabel('Norm [0,1]')
plt.grid(True); plt.show()
plt.show()

plt.figure(figsize=(8,5))
plt.spy(E, precision=1e-7)
plt.xlabel('data dimensionality of n = %d' % n)
plt.ylabel('m = %d atoms' % m)
plt.show()