#!/usr/bin/env python # coding: utf-8 # # Energy auto-encoder: algorithm formulation and synthetic tests # ## Setup # In[ ]: get_ipython().run_line_magic('matplotlib', 'inline') import numpy as np import numpy.linalg as la import matplotlib.pyplot as plt from pyunlocbox import functions, solvers import matplotlib, pyunlocbox # For versions only. print('Software versions:') for pkg in [np, matplotlib, pyunlocbox]: print(' %s: %s' % (pkg.__name__, pkg.__version__)) # ## Linear least square fitting # Given $X \in R^{n \times N}$ and $D \in R^{n \times m}$, solve $\min\limits_{Z \in R^{m \times N}} \frac{\lambda_d}{2} \|X - DZ\|_F^2$ # In[ ]: # Data and dictionary. X = np.array([[2,-5,0],[-9,3,1],[5,7,-6]]).transpose() D = np.array([[1,0,1],[-1,2,4]]).transpose() # Overdetermined (no solution). D = np.array([[1,0,0],[0,0,0.5],[0,2,0]]).transpose() # Determined (a unique solution). D = np.array([[1,0,1],[0,-4,0.5],[7,-2,0],[-1,2,4]]).transpose() # Indeterminate (infinite number of solutions). print('X = \n%s' % (X,)) print('D = \n%s' % (D,)) (n, N) = np.shape(X) (n, m) = np.shape(D) # Model hyper-parameters. l_d = 1 # Algorithm numeric parameters. rtol = 1e-4 # Loss function. f_z = functions.norm_l2(lambda_=l_d/2., A=D, y=X, tight=False) # Convex minimization (step size should be smaller than 1/L). L = l_d * la.norm(np.dot(D.T, D)) # Lipschitz continuous gradient. solver = solvers.forward_backward(step=1./L, method='FISTA') Z = np.random.normal(size=(m, N)) ret = solvers.solve([f_z], Z, solver, rtol=rtol) # Display solution. Z = ret['sol'] print('Z = \n%s' % (Z,)) print('DZ = \n%s' % (np.dot(D, Z),)) # Visual convergence analysis. objective = np.array(ret['objective']) plt.figure(figsize=(8,5)) plt.semilogy(objective[:,0], label='objective') plt.title('Convergence') plt.xlabel('Iteration number') plt.ylabel('Objective function value') plt.grid(True); plt.legend(); plt.show() # ## Sparse coding # Given $X \in R^{n \times N}$ and $D \in R^{n \times m}$, solve $\min\limits_{Z \in R^{m \times N}} \frac{\lambda_d}{2} \|X - DZ\|_F^2 + \|Z\|_1$ # In[ ]: # Data and dictionary. X = np.array([[2,-5,0],[-9,3,1],[5,7,-6]]).transpose() D = np.array([[1,0,1],[-1,2,4]]).transpose() # Under-complete. D = np.array([[1,0,0],[0,0,0.5],[0,2,0]]).transpose() # Complete. D = np.array([[1,0,1],[0,-4,0.5],[7,-2,0],[-1,2,4]]).transpose() # Over-complete. print('X = \n%s' % (X,)) print('D = \n%s' % (D,)) (n, N) = np.shape(X) (n, m) = np.shape(D) # Model hyper-parameters. l_d = 1 # Algorithm numeric parameters. rtol = 1e-4 # Loss functions. f_z = functions.norm_l2(lambda_=l_d/2., A=D, y=X, tight=False) g_z = functions.norm_l1() # Convex minimization (step size should be smaller than 1/L). L = l_d * la.norm(np.dot(D.T, D)) # Lipschitz continuous gradient. solver = solvers.forward_backward(step=1./L, method='FISTA') Z = np.random.normal(size=(m, N)) ret = solvers.solve([f_z, g_z], Z, solver, rtol=rtol) # Display solution. Z = ret['sol'] print('Z = \n%s' % (Z,)) print('DZ = \n%s' % (np.dot(D, Z),)) # Visual convergence analysis. objective = np.array(ret['objective']) plt.figure(figsize=(8,5)) plt.semilogy(objective[:, 0], label='data term') plt.semilogy(objective[:, 1], label='prior term') plt.semilogy(np.sum(objective, axis=1), label='global objective') plt.title('Convergence') plt.xlabel('Iteration number') plt.ylabel('Objective function value') plt.grid(True); plt.legend(); plt.show() # ## Dictionary learning # Given $X \in R^{n \times N}$, solve $\min\limits_{Z \in R^{m \times N}, D \in R^{n \times m}} \frac{\lambda_d}{2} \|X - DZ\|_F^2 + \|Z\|_1$ s.t. $\|d_i\|_2 \leq 1, i = 1, \ldots, m$ # In[ ]: # Data. X = np.array([[2,-5,0],[-9,3,1],[5,7,-6],[-1,3,0],[9,4,7]]).transpose() print('X = \n%s' % (X,)) (n, N) = np.shape(X) # Model hyper-parameters. l_d = 5 m = 4 # Solver numeric parameters. N_outer = 30 rtol = 1e-4 # Static loss function definitions. g_z = functions.norm_l1() g_d = functions.proj_b2(epsilon=1) # L2-ball indicator function. # Initialization. Z = np.random.normal(size=(m, N)) D = np.random.normal(size=(n, m)) objective_z = [] objective_d = [] objective_g = [] # Multi-variate non-convex optimization (outer loop). for n in np.arange(N_outer): # Convex minimization for Z. f_z = functions.norm_l2(lambda_=l_d/2., A=D, y=X, tight=False) L = l_d * la.norm(np.dot(D.T, D)) # Lipschitz continuous gradient. solver = solvers.forward_backward(step=1./L, method='FISTA') ret = solvers.solve([f_z, g_z], Z, solver, rtol=rtol, verbosity='NONE') Z = ret['sol'] objective_z.extend(ret['objective']) objective_d.extend(np.zeros(np.shape(ret['objective']))) # Convex minimization for D. f_d = functions.norm_l2(lambda_=l_d/2., A=Z.T, y=X.T, tight=False) L = l_d * la.norm(np.dot(Z, Z.T)) # Lipschitz continuous gradient. solver = solvers.forward_backward(step=1./L, method='FISTA') ret = solvers.solve([f_d, g_d], D.T, solver, rtol=rtol, verbosity='NONE') D = ret['sol'].T objective_d.extend(ret['objective']) objective_z.extend(np.zeros(np.shape(ret['objective']))) # Global objective (as an indicator, g_d is 0). objective_g.append(f_z.eval(Z) + g_z.eval(Z) + f_d.eval(D.T)) # Display solution. print('Z = \n%s' % (Z,)) print('D = \n%s' % (D,)) print('|d| = %s' % ([la.norm(D[:,k]) for k in np.arange(np.size(D,1))],)) print('DZ = \n%s' % (np.dot(D, Z),)) # Visual convergence analysis. print('f_z(Z) + g_z(Z) + f_d(D) = %f' % (objective_g[-1],)) objective = np.concatenate((np.array(objective_z), np.array(objective_d)), axis=1) plt.figure(figsize=(8,5)) plt.semilogy(objective[:, 0], label='Z: data term') plt.semilogy(objective[:, 1], label='Z: prior term') #plt.semilogy(np.sum(objective[:,0:2], axis=1), label='Z: global') plt.semilogy(objective[:, 2], label='D: data term') plt.title('Sub-problems convergence') plt.xlabel('Iteration number (inner loops)') plt.ylabel('Objective function value') plt.grid(True); plt.legend(); plt.show() plt.figure(figsize=(8,5)) plt.semilogy(objective_g) plt.title('Global convergence') plt.xlabel('Iteration number (outer loop)') plt.ylabel('Objective function value') plt.grid(True); plt.show() # ## Encoder learning # Given $X \in R^{n \times N}$, solve $\min\limits_{Z \in R^{m \times N}, D \in R^{n \times m}, E \in R^{m \times n}} \frac{\lambda_d}{2} \|X - DZ\|_F^2 + \frac{\lambda_e}{2} \|Z - EX\|_F^2 + \|Z\|_1$ s.t. $\|d_i\|_2 \leq 1$, $\|e_k\|_2 \leq 1$, $i = 1, \ldots, m$, $k = 1, \ldots, n$ # In[ ]: # Data. X = np.array([[2,-5,0],[-9,3,1],[5,7,-6],[-1,3,0],[9,4,7]]).transpose() print('X = \n%s' % (X,)) (n, N) = np.shape(X) # Model hyper-parameters. l_d = 5 l_e = 5 m = 3 # Solver numeric parameters. N_outer = 20 rtol = 1e-4 # Static loss function definitions. g_z = functions.norm_l1() g_de = functions.proj_b2(epsilon=1) # L2-ball indicator function. # Initialization. Z = np.random.normal(size=(m, N)) D = np.random.normal(size=(n, m)) E = np.random.normal(size=(m, n)) objective_z = [] objective_d = [] objective_e = [] objective_g = [] # Multi-variate non-convex optimization (outer loop). for n in np.arange(N_outer): # Convex minimization for Z. f_zd = functions.norm_l2(lambda_=l_d/2., A=D, y=X, tight=False) f_ze = functions.norm_l2(lambda_=l_e/2., y=np.dot(E,X)) f_z = functions.func() f_z._eval = lambda Z: f_zd.eval(Z) + f_ze.eval(Z) f_z._grad = lambda Z: f_zd.grad(Z) + f_ze.grad(Z) L = l_e + l_d * la.norm(np.dot(D.T, D)) # Lipschitz continuous gradient. solver = solvers.forward_backward(step=1./L, method='FISTA') ret = solvers.solve([f_z, g_z], Z, solver, rtol=rtol, verbosity='NONE') Z = ret['sol'] objective_z.extend(ret['objective']) objective_d.extend(np.zeros(np.shape(ret['objective']))) objective_e.extend(np.zeros(np.shape(ret['objective']))) # Convex minimization for D. f_d = functions.norm_l2(lambda_=l_d/2., A=Z.T, y=X.T, tight=False) L = l_d * la.norm(np.dot(Z, Z.T)) # Lipschitz continuous gradient. solver = solvers.forward_backward(step=1./L, method='FISTA') ret = solvers.solve([f_d, g_de], D.T, solver, rtol=rtol, verbosity='NONE') D = ret['sol'].T objective_d.extend(ret['objective']) objective_z.extend(np.zeros(np.shape(ret['objective']))) objective_e.extend(np.zeros(np.shape(ret['objective']))) #E = D.T # Convex minimization for E. f_e = functions.norm_l2(lambda_=l_e/2., A=X.T, y=Z.T, tight=False) L = l_e * la.norm(np.dot(X, X.T)) # Lipschitz continuous gradient. solver = solvers.forward_backward(step=1./L, method='FISTA') ret = solvers.solve([f_e, g_de], E.T, solver, rtol=rtol, verbosity='NONE') E = ret['sol'].T objective_e.extend(ret['objective']) objective_z.extend(np.zeros(np.shape(ret['objective']))) objective_d.extend(np.zeros(np.shape(ret['objective']))) #D = E.T # Global objective (as indicators, g_d and g_e are 0). objective_g.append(f_z.eval(Z) + g_z.eval(Z) + f_d.eval(D.T) + f_e.eval(E.T)) # Display solution. print('Z = \n%s' % (Z,)) print('D = \n%s' % (D,)) print('|d| = %s' % ([la.norm(D[:,k]) for k in np.arange(np.size(D,1))],)) print('E = \n%s' % (E,)) print('|e| = %s' % ([la.norm(E[:,k]) for k in np.arange(np.size(E,1))],)) print('DZ = \n%s' % (np.dot(D, Z),)) print('EX = \n%s' % (np.dot(E, X),)) # Visual convergence analysis. print('f_z(Z) + g_z(Z) + f_d(D) + f_e(E) = %f' % (objective_g[-1],)) objective = np.concatenate((np.array(objective_z), np.array(objective_d), np.array(objective_e)), axis=1) plt.figure(figsize=(8,5)) plt.semilogy(objective[:, 0], label='Z: data term') plt.semilogy(objective[:, 1], label='Z: prior term') #plt.semilogy(np.sum(objective[:,0:2], axis=1), label='Z: sum') plt.semilogy(objective[:, 2], label='D: data term') plt.semilogy(objective[:, 4], label='E: data term') plt.title('Sub-problems convergence') plt.xlabel('Iteration number (inner loops)') plt.ylabel('Objective function value') plt.grid(True); plt.legend(); plt.show() plt.figure(figsize=(8,5)) plt.semilogy(objective_g) plt.title('Global convergence') plt.xlabel('Iteration number (outer loop)') plt.ylabel('Objective function value') plt.grid(True); plt.show()