Energy auto-encoder: algorithm formulation and synthetic tests

Setup

In [ ]:
%matplotlib inline
import numpy as np
import numpy.linalg as la
import matplotlib.pyplot as plt
from pyunlocbox import functions, solvers
import matplotlib, pyunlocbox  # For versions only.
print('Software versions:')
for pkg in [np, matplotlib, pyunlocbox]:
    print('  %s: %s' % (pkg.__name__, pkg.__version__))

Linear least square fitting

Given $X \in R^{n \times N}$ and $D \in R^{n \times m}$, solve $\min\limits_{Z \in R^{m \times N}} \frac{\lambda_d}{2} \|X - DZ\|_F^2$

In [ ]:
# Data and dictionary.
X = np.array([[2,-5,0],[-9,3,1],[5,7,-6]]).transpose()
D = np.array([[1,0,1],[-1,2,4]]).transpose()  # Overdetermined (no solution).
D = np.array([[1,0,0],[0,0,0.5],[0,2,0]]).transpose()  # Determined (a unique solution).
D = np.array([[1,0,1],[0,-4,0.5],[7,-2,0],[-1,2,4]]).transpose()  # Indeterminate (infinite number of solutions).
print('X = \n%s' % (X,))
print('D = \n%s' % (D,))
(n, N) = np.shape(X)
(n, m) = np.shape(D)

# Model hyper-parameters.
l_d = 1

# Algorithm numeric parameters.
rtol = 1e-4

# Loss function.
f_z = functions.norm_l2(lambda_=l_d/2., A=D, y=X, tight=False)

# Convex minimization (step size should be smaller than 1/L).
L = l_d * la.norm(np.dot(D.T, D))  # Lipschitz continuous gradient.
solver = solvers.forward_backward(step=1./L, method='FISTA')
Z = np.random.normal(size=(m, N))
ret = solvers.solve([f_z], Z, solver, rtol=rtol)

# Display solution.
Z = ret['sol']
print('Z = \n%s' % (Z,))
print('DZ = \n%s' % (np.dot(D, Z),))

# Visual convergence analysis.
objective = np.array(ret['objective'])
plt.figure(figsize=(8,5))
plt.semilogy(objective[:,0], label='objective')
plt.title('Convergence')
plt.xlabel('Iteration number')
plt.ylabel('Objective function value')
plt.grid(True); plt.legend(); plt.show()

Sparse coding

Given $X \in R^{n \times N}$ and $D \in R^{n \times m}$, solve $\min\limits_{Z \in R^{m \times N}} \frac{\lambda_d}{2} \|X - DZ\|_F^2 + \|Z\|_1$

In [ ]:
# Data and dictionary.
X = np.array([[2,-5,0],[-9,3,1],[5,7,-6]]).transpose()
D = np.array([[1,0,1],[-1,2,4]]).transpose()  # Under-complete.
D = np.array([[1,0,0],[0,0,0.5],[0,2,0]]).transpose()  # Complete.
D = np.array([[1,0,1],[0,-4,0.5],[7,-2,0],[-1,2,4]]).transpose()  # Over-complete.
print('X = \n%s' % (X,))
print('D = \n%s' % (D,))
(n, N) = np.shape(X)
(n, m) = np.shape(D)

# Model hyper-parameters.
l_d = 1

# Algorithm numeric parameters.
rtol = 1e-4

# Loss functions.
f_z = functions.norm_l2(lambda_=l_d/2., A=D, y=X, tight=False)
g_z = functions.norm_l1()

# Convex minimization (step size should be smaller than 1/L).
L = l_d * la.norm(np.dot(D.T, D))  # Lipschitz continuous gradient.
solver = solvers.forward_backward(step=1./L, method='FISTA')
Z = np.random.normal(size=(m, N))
ret = solvers.solve([f_z, g_z], Z, solver, rtol=rtol)

# Display solution.
Z = ret['sol']
print('Z = \n%s' % (Z,))
print('DZ = \n%s' % (np.dot(D, Z),))

# Visual convergence analysis.
objective = np.array(ret['objective'])
plt.figure(figsize=(8,5))
plt.semilogy(objective[:, 0], label='data term')
plt.semilogy(objective[:, 1], label='prior term')
plt.semilogy(np.sum(objective, axis=1), label='global objective')
plt.title('Convergence')
plt.xlabel('Iteration number')
plt.ylabel('Objective function value')
plt.grid(True); plt.legend(); plt.show()

Dictionary learning

Given $X \in R^{n \times N}$, solve $\min\limits_{Z \in R^{m \times N}, D \in R^{n \times m}} \frac{\lambda_d}{2} \|X - DZ\|_F^2 + \|Z\|_1$ s.t. $\|d_i\|_2 \leq 1, i = 1, \ldots, m$

In [ ]:
# Data.
X = np.array([[2,-5,0],[-9,3,1],[5,7,-6],[-1,3,0],[9,4,7]]).transpose()
print('X = \n%s' % (X,))
(n, N) = np.shape(X)

# Model hyper-parameters.
l_d = 5
m = 4

# Solver numeric parameters.
N_outer = 30
rtol = 1e-4

# Static loss function definitions.
g_z = functions.norm_l1()
g_d = functions.proj_b2(epsilon=1)  # L2-ball indicator function.

# Initialization.
Z = np.random.normal(size=(m, N))
D = np.random.normal(size=(n, m))
objective_z = []
objective_d = []
objective_g = []

# Multi-variate non-convex optimization (outer loop).
for n in np.arange(N_outer):
    
    # Convex minimization for Z.
    f_z = functions.norm_l2(lambda_=l_d/2., A=D, y=X, tight=False)
    L = l_d * la.norm(np.dot(D.T, D))  # Lipschitz continuous gradient.
    solver = solvers.forward_backward(step=1./L, method='FISTA')
    ret = solvers.solve([f_z, g_z], Z, solver, rtol=rtol, verbosity='NONE')
    Z = ret['sol']
    objective_z.extend(ret['objective'])
    objective_d.extend(np.zeros(np.shape(ret['objective'])))
    
    # Convex minimization for D.
    f_d = functions.norm_l2(lambda_=l_d/2., A=Z.T, y=X.T, tight=False)
    L = l_d * la.norm(np.dot(Z, Z.T))  # Lipschitz continuous gradient.
    solver = solvers.forward_backward(step=1./L, method='FISTA')
    ret = solvers.solve([f_d, g_d], D.T, solver, rtol=rtol, verbosity='NONE')
    D = ret['sol'].T
    objective_d.extend(ret['objective'])
    objective_z.extend(np.zeros(np.shape(ret['objective'])))
    
    # Global objective (as an indicator, g_d is 0).
    objective_g.append(f_z.eval(Z) + g_z.eval(Z) + f_d.eval(D.T))

# Display solution.
print('Z = \n%s' % (Z,))
print('D = \n%s' % (D,))
print('|d| = %s' % ([la.norm(D[:,k]) for k in np.arange(np.size(D,1))],))
print('DZ = \n%s' % (np.dot(D, Z),))

# Visual convergence analysis.
print('f_z(Z) + g_z(Z) + f_d(D) = %f' % (objective_g[-1],))
objective = np.concatenate((np.array(objective_z), np.array(objective_d)), axis=1)
plt.figure(figsize=(8,5))
plt.semilogy(objective[:, 0], label='Z: data term')
plt.semilogy(objective[:, 1], label='Z: prior term')
#plt.semilogy(np.sum(objective[:,0:2], axis=1), label='Z: global')
plt.semilogy(objective[:, 2], label='D: data term')
plt.title('Sub-problems convergence')
plt.xlabel('Iteration number (inner loops)')
plt.ylabel('Objective function value')
plt.grid(True); plt.legend(); plt.show()
plt.figure(figsize=(8,5))
plt.semilogy(objective_g)
plt.title('Global convergence')
plt.xlabel('Iteration number (outer loop)')
plt.ylabel('Objective function value')
plt.grid(True); plt.show()

Encoder learning

Given $X \in R^{n \times N}$, solve $\min\limits_{Z \in R^{m \times N}, D \in R^{n \times m}, E \in R^{m \times n}} \frac{\lambda_d}{2} \|X - DZ\|_F^2 + \frac{\lambda_e}{2} \|Z - EX\|_F^2 + \|Z\|_1$ s.t. $\|d_i\|_2 \leq 1$, $\|e_k\|_2 \leq 1$, $i = 1, \ldots, m$, $k = 1, \ldots, n$

In [ ]:
# Data.
X = np.array([[2,-5,0],[-9,3,1],[5,7,-6],[-1,3,0],[9,4,7]]).transpose()
print('X = \n%s' % (X,))
(n, N) = np.shape(X)

# Model hyper-parameters.
l_d = 5
l_e = 5
m = 3

# Solver numeric parameters.
N_outer = 20
rtol = 1e-4

# Static loss function definitions.
g_z = functions.norm_l1()
g_de = functions.proj_b2(epsilon=1)  # L2-ball indicator function.

# Initialization.
Z = np.random.normal(size=(m, N))
D = np.random.normal(size=(n, m))
E = np.random.normal(size=(m, n))
objective_z = []
objective_d = []
objective_e = []
objective_g = []

# Multi-variate non-convex optimization (outer loop).
for n in np.arange(N_outer):
    
    # Convex minimization for Z.
    f_zd = functions.norm_l2(lambda_=l_d/2., A=D, y=X, tight=False)
    f_ze = functions.norm_l2(lambda_=l_e/2., y=np.dot(E,X))
    f_z = functions.func()
    f_z._eval = lambda Z: f_zd.eval(Z) + f_ze.eval(Z)
    f_z._grad = lambda Z: f_zd.grad(Z) + f_ze.grad(Z)
    L = l_e + l_d * la.norm(np.dot(D.T, D))  # Lipschitz continuous gradient.
    solver = solvers.forward_backward(step=1./L, method='FISTA')
    ret = solvers.solve([f_z, g_z], Z, solver, rtol=rtol, verbosity='NONE')
    Z = ret['sol']
    objective_z.extend(ret['objective'])
    objective_d.extend(np.zeros(np.shape(ret['objective'])))
    objective_e.extend(np.zeros(np.shape(ret['objective'])))
    
    # Convex minimization for D.
    f_d = functions.norm_l2(lambda_=l_d/2., A=Z.T, y=X.T, tight=False)
    L = l_d * la.norm(np.dot(Z, Z.T))  # Lipschitz continuous gradient.
    solver = solvers.forward_backward(step=1./L, method='FISTA')
    ret = solvers.solve([f_d, g_de], D.T, solver, rtol=rtol, verbosity='NONE')
    D = ret['sol'].T
    objective_d.extend(ret['objective'])
    objective_z.extend(np.zeros(np.shape(ret['objective'])))
    objective_e.extend(np.zeros(np.shape(ret['objective'])))
    
    #E = D.T
    
    # Convex minimization for E.
    f_e = functions.norm_l2(lambda_=l_e/2., A=X.T, y=Z.T, tight=False)
    L = l_e * la.norm(np.dot(X, X.T))  # Lipschitz continuous gradient.
    solver = solvers.forward_backward(step=1./L, method='FISTA')
    ret = solvers.solve([f_e, g_de], E.T, solver, rtol=rtol, verbosity='NONE')
    E = ret['sol'].T
    objective_e.extend(ret['objective'])
    objective_z.extend(np.zeros(np.shape(ret['objective'])))
    objective_d.extend(np.zeros(np.shape(ret['objective'])))
    
    #D = E.T
    
    # Global objective (as indicators, g_d and g_e are 0).
    objective_g.append(f_z.eval(Z) + g_z.eval(Z) + f_d.eval(D.T) + f_e.eval(E.T))

# Display solution.
print('Z = \n%s' % (Z,))
print('D = \n%s' % (D,))
print('|d| = %s' % ([la.norm(D[:,k]) for k in np.arange(np.size(D,1))],))
print('E = \n%s' % (E,))
print('|e| = %s' % ([la.norm(E[:,k]) for k in np.arange(np.size(E,1))],))
print('DZ = \n%s' % (np.dot(D, Z),))
print('EX = \n%s' % (np.dot(E, X),))

# Visual convergence analysis.
print('f_z(Z) + g_z(Z) + f_d(D) + f_e(E) = %f' % (objective_g[-1],))
objective = np.concatenate((np.array(objective_z), np.array(objective_d), np.array(objective_e)), axis=1)
plt.figure(figsize=(8,5))
plt.semilogy(objective[:, 0], label='Z: data term')
plt.semilogy(objective[:, 1], label='Z: prior term')
#plt.semilogy(np.sum(objective[:,0:2], axis=1), label='Z: sum')
plt.semilogy(objective[:, 2], label='D: data term')
plt.semilogy(objective[:, 4], label='E: data term')
plt.title('Sub-problems convergence')
plt.xlabel('Iteration number (inner loops)')
plt.ylabel('Objective function value')
plt.grid(True); plt.legend(); plt.show()
plt.figure(figsize=(8,5))
plt.semilogy(objective_g)
plt.title('Global convergence')
plt.xlabel('Iteration number (outer loop)')
plt.ylabel('Objective function value')
plt.grid(True); plt.show()