$0 = u_{xx} + u_y + \phi u_t$
For the generation of our training data we use:
$u: \mathbb{R}^3 \rightarrow \mathbb{R}, \; u(x,y,t) = x + y + t$
$f: \mathbb{R}^3 \rightarrow \mathbb{R}, \;f(x,y,t) = 0$
$X_i := (x_i, y_i, t_i) \in [0,1] \times [0,1] \times [0, 0.135] \subseteq \mathbb{R}^3$ for $i \in \{1, \dotsc, n\}$
and our known function values will be $\{u(X_i), f(X_i)\}_{i \in \{1, \dotsc, n\}}$.
We assume that $u$ can be represented as a Gaussian process with SE kernel:
$u \sim \mathcal{GP}(0, k_{uu}(X_i, X_j; \theta))$, where $\theta = \{\sigma, l_x, l_y, l_t\}$.
Set the linear operator to:
$\mathcal{L}_X^{\phi} := \partial_{xx} + \partial_y + \phi \partial_t$
so that
$\mathcal{L}_X^{\phi} u = f$
Problem at hand: Estimate $\phi$ (we expect $\phi = -1$).
import time
import numpy as np
import sympy as sp
from scipy.linalg import solve_triangular
import scipy.optimize as opt
# Global variables: x, y, t, n, y_u, y_f, s
Parameters, that can be modified:
# Number of data samples:
n = 20
# Noise parameter:
s = 1e-7
def simulate_data():
x = np.random.rand(n)
y = np.random.rand(n)
t = np.array([0.015*np.random.randint(10) for i in range(n)])
y_u = x + y + t
y_f = 0*np.ones(n)
return (x,y,t,y_u,y_f)
(x,y,t,y_u,y_f) = simulate_data()
$k_{uu}(X_i, X_j; \theta) = \sigma \cdot exp(-\frac{1}{2l_x}(x_i-x_j)^2 - \frac{1}{2l_y}(y_i-y_j)^2 - \frac{1}{2l_t}(t_i-t_j)^2)$
x_i, x_j, y_i, y_j, t_i, t_j, sigma, l_x, l_y, l_t, phi = sp.symbols('x_i x_j y_i y_j t_i t_j sigma \
l_x l_y l_t phi')
kuu_sym = sigma*sp.exp(-1/(2*l_x)*((x_i - x_j)**2) - 1/(2*l_y)*((y_i - y_j)**2) - 1/(2*l_t)*((t_i - t_j)**2))
kuu_fn = sp.lambdify((x_i, x_j, y_i, y_j, t_i, t_j, sigma, l_x, l_y, l_t), kuu_sym, "numpy")
def kuu(x, y, t, sigma, l_x, l_y, l_t):
k = np.zeros((x.size, x.size))
for i in range(x.size):
for j in range(x.size):
k[i,j] = kuu_fn(x[i], x[j], y[i], y[j], t[i], t[j], sigma, l_x, l_y, l_t)
return k
$k_{ff}(X_i,X_j;\theta,\phi)$
$= \mathcal{L}_{X_i}^{\phi} \mathcal{L}_{X_j}^{\phi} k_{uu}(X_i, X_j; \theta)$
$= \partial_{x_i, x_j}k_{uu} + \partial_{y_i, x_j}k_{uu} + \phi \partial_{t_i, x_j}k_{uu}$
$+ \partial_{x_i, y_j}k_{uu} + \partial_{y_i, y_j}k_{uu} + \phi \partial_{t_i, y_j}k_{uu}$
$+ \phi \partial_{x_i, t_j}k_{uu} + \phi \partial_{y_i, t_j}k_{uu} + \phi^2 \partial_{t_i, t_j}k_{uu}$
kff_sym = sp.diff(kuu_sym, x_i, x_i, x_j, x_j) \
+ sp.diff(kuu_sym, y_i, x_j, x_j) \
+ phi*sp.diff(kuu_sym, t_i, x_j, x_j) \
+ sp.diff(kuu_sym, x_i, x_i, y_j) \
+ sp.diff(kuu_sym, y_i, y_j) \
+ phi*sp.diff(kuu_sym, t_i, y_j) \
+ phi*sp.diff(kuu_sym, x_i, x_i, t_j) \
+ phi*sp.diff(kuu_sym, y_i, t_j) \
+ phi**2*sp.diff(kuu_sym, t_i, t_j)
kff_fn = sp.lambdify((x_i, x_j, y_i, y_j, t_i, t_j, sigma, l_x, l_y, l_t, phi), kff_sym, "numpy")
def kff(x, y, t, sigma, l_x, l_y, l_t, phi):
k = np.zeros((x.size, x.size))
for i in range(x.size):
for j in range(x.size):
k[i,j] = kff_fn(x[i], x[j], y[i], y[j], t[i], t[j], sigma, l_x, l_y, l_t, phi)
return k
$k_{fu}(X_i,X_j;\theta,\phi) \\ = \mathcal{L}_{X_i}^{\phi} k_{uu}(X_i, X_j; \theta) \\ = \partial_{x_i}k_{uu} + \partial_{y_i}k_{uu} + \phi \partial_{t_i}k_{uu}$
kfu_sym = sp.diff(kuu_sym, x_i, x_i) \
+ sp.diff(kuu_sym, y_i) \
+ phi*sp.diff(kuu_sym, t_i)
kfu_fn = sp.lambdify((x_i, x_j, y_i, y_j, t_i, t_j, sigma, l_x, l_y, l_t, phi), kfu_sym, "numpy")
def kfu(x, y, t, sigma, l_x, l_y, l_t, phi):
k = np.zeros((x.size, x.size))
for i in range(x.size):
for j in range(x.size):
k[i,j] = kfu_fn(x[i], x[j], y[i], y[j], t[i], t[j], sigma, l_x, l_y, l_t, phi)
return k
def kuf(x, y, t, sigma, l_x, l_y, l_t, phi):
return kfu(x, y, t, sigma, l_x, l_y, l_t, phi).T
(with block matrix inversion, Cholesky decomposition, potentially SVD)
def nlml(params):
sigma_exp = np.exp(params[0])
l_x_exp = np.exp(params[1])
l_y_exp = np.exp(params[2])
l_t_exp = np.exp(params[3])
# phi = params[4]
A = kuu(x, y, t, sigma_exp, l_x_exp, l_y_exp, l_t_exp) + s*np.eye(n)
B = kfu(x, y, t, sigma_exp, l_x_exp, l_y_exp, l_t_exp, params[4]).T
C = kff(x, y, t, sigma_exp, l_x_exp, l_y_exp, l_t_exp, params[4]) + s*np.eye(n)
# Inversion of A
A_inv = np.zeros((n, n))
try:
L = np.linalg.cholesky(A)
L_inv = solve_triangular(L, np.identity(n), lower=True) # Slight performance boost
# over np.linalg.inv
A_inv = L_inv.T @ L_inv
logdet_A = 2*np.log(np.abs(np.diag(L))).sum()
except np.linalg.LinAlgError:
# Inverse of K via SVD
u, s_mat, vt = np.linalg.svd(A)
A_inv = vt.T @ np.linalg.inv(np.diag(s_mat)) @ u.T
logdet_A = np.log(s_mat).sum()
# Inversion of $C-B^T A^{-1} B$
KA_inv = np.zeros((n, n))
KA = C - B.T @ A_inv @ B
try:
L = np.linalg.cholesky(KA)
L_inv = solve_triangular(L, np.identity(n), lower=True) # Slight performance boost
# over np.linalg.inv
KA_inv = L_inv.T @ L_inv
logdet_KA = 2*np.log(np.abs(np.diag(L))).sum()
except np.linalg.LinAlgError:
# Inverse of K via SVD
u, s_mat, vt = np.linalg.svd(KA)
KA_inv = vt.T @ np.linalg.inv(np.diag(s_mat)) @ u.T
logdet_KA = np.log(s_mat).sum()
# Piecing it together
T = A_inv @ B @ KA_inv
yKy = y_u @ (A_inv + T @ B.T @ A_inv) @ y_u - 2*y_u @ T @ y_f + y_f @ KA_inv @ y_f
return yKy + logdet_A + logdet_KA
1. Nelder-Mead
def callbackf(params):
print(params)
def minimize_restarts(x,y,y_u,y_f,n=5):
all_results = []
for it in range(0,n):
all_results.append(opt.minimize(nlml, np.random.rand(5), callback = callbackf,
method="Nelder-Mead",
options={'maxfev':5000, 'fatol':0.001, 'xatol':0.001}))
return min(all_results, key = lambda x: x.fun)
t0 = time.time()
m = minimize_restarts(x, y, y_u, y_f, 3)
t_Nelder = time.time() - t0
print(m)
t_Nelder
264.58030796051025
print('The inferred parameter is:', m.x[4])
The inferred parameter is: -1.004052962023918