For the generation of our training data we use the same method as in PDE-Net in Tensorflow (for details please refer to that chapter in the paper) with:
$a = b = 2$
$c = d = 0.5$
$u: \mathbb{R}^3 \rightarrow \mathbb{R}$ is some solution to $(*)$.
$f: \mathbb{R}^3 \rightarrow \mathbb{R}, \;f(x,y,t) = 0$
$X_i := (x_i, y_i, t_i) \in [0,1] \times [0,1] \times [0, 0.135] \subseteq \mathbb{R}^3$ for $i \in \{1, \dotsc, n\}$
and our known function values will be $\{u(X_i), f(X_i)\}_{i \in \{1, \dotsc, n\}}$.
We assume that $u$ can be represented as a Gaussian process with SE kernel.
$u \sim \mathcal{GP}(0, k_{uu}(X_i, X_j; \theta))$, where $\theta = \{\sigma, l_x, l_y, l_t\}$.
Set the linear operator to:
$\mathcal{L}_X^{\phi} := a\partial_x + b\partial_y + c\partial_{x,x} + d\partial_{y,y} - \partial_t$
so that
$\mathcal{L}_X^{\phi} u = f = 0$
Problem at hand: Estimate $\phi:=\{a\}$ with $\{b,c,d\} = \{2, 0.5, 0.5\}$ fixed.
import time
import numpy as np
import sympy as sp
import numpy.fft as fft
from scipy.linalg import solve_triangular
import scipy.optimize as opt
# Global variables: x, y, t, n, y_u, y_f, s
# Number of data samples
n = 80
# Noise of our data:
s = 1e-7
options = {'mesh_size': [250, 250], # How large is the (regular) 2D-grid of function values for
# a fixed t. Keep mesh_size[0] = mesh_size[1]
'layers': 9, # Layers of the NN. Also counting the initial layer!
'dt': 0.0015, # Time discretization. We step dt*(layers - 1) forward in time.
'batch_size': 1, # We take a batch of sub-grids in space
'noise_level': 0.0, # Can add some noise to the data (not taken 1 to 1, gets
# multiplied by stddev)
'downsample_by': 1, # Size of sub-grids (in space) * downsample_by = mesh_size
}
def initgen(mesh_size, freq=3, boundary='Periodic'):
"""
Returns function values for t=0 on a regular grid of size 'mesh_size' in [0, 2*pi]x[0, 2*pi]
as a matrix
"""
# Default: (mesh_size, freq, boundary) = ([250, 250], 4, 'Periodic')
if np.iterable(freq):
return freq
# 250x250 normally distributed variables IFFTed and FFTed:
x = _initgen_periodic(mesh_size, freq=freq)
x = x * 100
if boundary.upper() == 'DIRICHLET':
dim = x.ndim
for i in range(dim):
y = np.arange(mesh_size[i]) / mesh_size[i]
y = y * (1 - y)
s = np.ones(dim, dtype=np.int32)
s[i] = mesh_size[i]
y = np.reshape(y, s)
x = x * y
x = x[[slice(1, None), ] * dim]
x = x * 16
return x
def _initgen_periodic(mesh_size, freq=3):
# np.random.seed(50)
# Default: (mesh_size, freq) = ([250, 250], 4)
dim = len(mesh_size)
# Default: 250x250-matrix of normally distributed variables
x = np.random.randn(*mesh_size)
coe = fft.ifftn(x)
# set frequency of generated initial value
# Array of random ints in [freq, 2*freq - 1]
freqs = np.random.randint(freq, 2 * freq, size=[dim, ])
# freqs = [10,10]
for i in range(dim):
perm = np.arange(dim, dtype=np.int32)
perm[i] = 0
perm[0] = i
# Permutes for i = 1 and does nothing for i = 0.
coe = coe.transpose(*perm)
coe[freqs[i] + 1:-freqs[i]] = 0
coe = coe.transpose(*perm)
x = fft.fftn(coe)
assert np.linalg.norm(x.imag) < 1e-8
x = x.real
return x
def pad_input_2(input, pad_by):
"""
We increase the size of input for all j by pad_by on each side of the matrix
by inserting values from the opposite side
"""
mesh_size = input.shape[0]
B = np.eye(mesh_size, dtype=np.float32)
for i in range(pad_by):
a = np.zeros(mesh_size, dtype=np.float32)
a[mesh_size - i - 1] = 1
B = np.concatenate(([a], B), axis=0)
for i in range(pad_by):
a = np.zeros(mesh_size, dtype=np.float32)
a[i] = 1
B = np.concatenate((B, [a]), axis=0)
return B @ input @ B.T
def downsample(sample, scale):
"""
Returns a regular somewhat random sub-grid of sample, whose size is reduced by a
factor of 'scale'.
"""
# np.random.seed(50)
idx1 = slice(np.random.randint(scale), None, scale)
idx2 = slice(np.random.randint(scale), None, scale)
# idx1 = slice(1, None, scale)
# idx2 = slice(0, None, scale)
for kwarg in sample:
sample[kwarg] = sample[kwarg][idx1, idx2]
return sample
def addNoise(sample, noise, layers):
# Adding noise to u0
mean = sample['u0'].mean()
stdvar = np.sqrt(((sample['u0'] - mean) ** 2).mean())
size = sample['u0'].shape
startnoise = np.random.standard_normal(size)
sample['u0'] = sample['u0'] + noise * stdvar * startnoise
# Adding noise to ut, t > 0
for l in range(1, layers):
arg = 'u' + str(l)
size = sample[arg].shape
endnoise = np.random.standard_normal(size)
sample[arg] = sample[arg] + noise * stdvar * endnoise
return sample
def generate(options):
"""
Generating data / function-values on a regular grid of space-time, adding noise and taking a
batch of down-sampled regular sub-grids of this grid. This batch will contain the samples to
train our network with.
:param options: The dictionary of user-specified options (cf. main.py). Contains e.g. the
grid-dimensions and noise
:return: A batch (as a list) of samples (as dictionaries), that in turn consist of (noisy)
function values on down-sampled sub-grids for all dt-layers.
"""
# u_t = a*u_x + b*u_y + c*u_{xx} + d*u_{yy}
# Variable declarations
nx = options['mesh_size'][0]
ny = options['mesh_size'][1]
nt = options['layers']
batch_size = options['batch_size']
dt = options['dt']
noise_level = options['noise_level']
downsample_by = options['downsample_by']
# Really good with a = b = 2
a = 2
b = 2
c = 0.5
d = 0.5
dx = 2 * np.pi / (nx - 1)
dy = 2 * np.pi / (ny - 1)
## Needed for plotting:
# x = np.linspace(0, 2*np.pi, num = nx)
# y = np.linspace(0, 2*np.pi, num = ny)
# X, Y = np.meshgrid(x, y)
batch = []
for i in range(batch_size):
############ Change the following lines to implement your own data ############
# Assign initial function:
u = initgen(options['mesh_size'], freq=4, boundary='Periodic')
## Plotting the initial function:
# fig = plt.figure(figsize=(11,7), dpi=100)
# ax = fig.gca(projection='3d')
# surf = ax.plot_surface(X, Y, u[:], cmap=cm.viridis)
#
# plt.show()
sample = {}
sample['u0'] = u
for n in range(nt - 1):
un = pad_input_2(u, 2)[1:, 1:]
u = (un[1:-1, 1:-1]
+ c * dt / dx ** 2 * (un[1:-1, 2:] - 2 * un[1:-1, 1:-1] + un[1:-1, 0:-2])
+ d * dt / dy ** 2 * (un[2:, 1: -1] - 2 * un[1:-1, 1:-1] + un[0:-2, 1:-1])
+ a * dt / dx * (un[1:-1, 2:] - un[1:-1, 1:-1])
+ b * dt / dy * (un[2:, 1:-1] - un[1:-1, 1:-1]))[:-1, :-1]
sample['u' + str(n + 1)] = u
downsample(sample, downsample_by)
addNoise(sample, noise_level, nt)
batch.append(sample)
return batch
Note, that we only take $n$ data samples with their corresponding function values from the grid.
def simulate_data():
batch = generate(options)
x_val_arr = []
y_val_arr = []
t_val_arr = []
val_arr = []
for i in range(n):
t_rand = np.random.randint(options['layers'])
# Data is in [0,1]^2:
x_rand = np.random.randint(options['mesh_size'][0]//(2*np.pi))
y_rand = np.random.randint(options['mesh_size'][1]//(2*np.pi))
val = batch[0]['u' + str(t_rand)][x_rand, y_rand]
x_val = 2 * np.pi / (options['mesh_size'][0] - 1) * x_rand
y_val = 2 * np.pi / (options['mesh_size'][1] - 1) * y_rand
t_val = t_rand * options['dt']
x_val_arr.append(x_val)
y_val_arr.append(y_val)
t_val_arr.append(t_val)
val_arr.append(val)
return (np.array(x_val_arr), np.array(y_val_arr), np.array(t_val_arr), np.array(val_arr),
np.zeros(len(val_arr)))
(x,y,t,y_u,y_f) = simulate_data()
$k_{uu}(X_i, X_j; \theta) = \sigma \cdot exp(-\frac{1}{2l_x}(x_i-x_j)^2 - \frac{1}{2l_y}(y_i-y_j)^2 - \frac{1}{2l_t}(t_i-t_j)^2)$
x_i, x_j, y_i, y_j, t_i, t_j, sigma, l_x, l_y, l_t, a,b,c,d = sp.symbols('x_i x_j y_i y_j t_i t_j \
sigma l_x l_y l_t a b c d')
kuu_sym = sigma*sp.exp(-1/(2*l_x)*((x_i - x_j)**2) - 1/(2*l_y)*((y_i - y_j)**2)
- 1/(2*l_t)*((t_i - t_j)**2))
kuu_fn = sp.lambdify((x_i, x_j, y_i, y_j, t_i, t_j, sigma, l_x, l_y, l_t), kuu_sym, "numpy")
def kuu(x, y, t, sigma, l_x, l_y, l_t):
k = np.zeros((x.size, x.size))
for i in range(x.size):
for j in range(x.size):
k[i,j] = kuu_fn(x[i], x[j], y[i], y[j], t[i], t[j], sigma, l_x, l_y, l_t)
return k
$k_{ff}(X_i,X_j;\theta,\phi)$
$= \mathcal{L}_{X_i}^{\phi} \mathcal{L}_{X_j}^{\phi} k_{uu}(X_i, X_j; \theta)$
$= a^2\partial_{x_i, x_j}k_{uu} + ab \partial_{x_i, y_j}k_{uu} + ac \partial_{x_i, x_j, x_j}k_{uu} + ad \partial_{x_i, y_j, y_j}k_{uu} - a \partial_{x_i, t_j}k_{uu}$
$+ ba\partial_{y_i, x_j}k_{uu} + b^2\partial_{y_i, y_j}k_{uu} + bc\partial_{y_i, x_j, x_j}k_{uu} + bd\partial_{y_i, y_j, y_j}k_{uu} - b\partial_{y_i, t_j}k_{uu}$
$+ ca\partial_{x_i, x_i, x_j}k_{uu}+ cb\partial_{x_i, x_i, y_j}k_{uu}+ c^2\partial_{x_i, x_i, x_j, x_j}k_{uu}+ cd\partial_{x_i, x_i, y_j, y_j}k_{uu}- c\partial_{x_i, x_i, t_j}k_{uu}$
$+ da\partial_{y_i, y_i,x_j}k_{uu}+ db\partial_{y_i, y_i, y_j}k_{uu}+ dc\partial_{y_i, y_i, x_j, x_j}k_{uu}+ d^2\partial_{y_i, y_i, y_j, y_j}k_{uu}- d\partial_{y_i, y_i, t_j}k_{uu}$
$- a\partial_{t_i, x_j}k_{uu}- b\partial_{t_i, y_j}k_{uu}- c\partial_{t_i, x_j, x_j}k_{uu}- d\partial_{t_i, y_j, y_j}k_{uu}+ \partial_{t_i, t_j}k_{uu}$
kff_sym = a**2*sp.diff(kuu_sym, x_i, x_j) \
+ a*b*sp.diff(kuu_sym, x_i, y_j) \
+ a*c*sp.diff(kuu_sym, x_i, x_j, x_j) \
+ a*d*sp.diff(kuu_sym, x_i, y_j, y_j) \
- a*sp.diff(kuu_sym, x_i, t_j) \
+ b*a*sp.diff(kuu_sym, y_i, x_j) \
+ b**2*sp.diff(kuu_sym, y_i, y_j) \
+ b*c*sp.diff(kuu_sym, y_i, x_j, x_j) \
+ b*d*sp.diff(kuu_sym, y_i, y_j, y_j) \
- b*sp.diff(kuu_sym, y_i, t_j) \
+ c*a*sp.diff(kuu_sym, x_i, x_i, x_j) \
+ c*b*sp.diff(kuu_sym, x_i, x_i, y_j) \
+ c**2*sp.diff(kuu_sym, x_i, x_i, x_j, x_j) \
+ c*d*sp.diff(kuu_sym, x_i, x_i, y_j, y_j) \
- c*sp.diff(kuu_sym, x_i, x_i, t_j) \
+ d*a*sp.diff(kuu_sym, y_i, y_i, x_j) \
+ d*b*sp.diff(kuu_sym, y_i, y_i, y_j) \
+ d*c*sp.diff(kuu_sym, y_i, y_i, x_j, x_j) \
+ d**2*sp.diff(kuu_sym, y_i, y_i, y_j, y_j) \
- d*sp.diff(kuu_sym, y_i, y_i, t_j) \
- a*sp.diff(kuu_sym, t_i, x_j) \
- b*sp.diff(kuu_sym, t_i, y_j) \
- c*sp.diff(kuu_sym, t_i, x_j, x_j) \
- d*sp.diff(kuu_sym, t_i, y_j, y_j) \
+ sp.diff(kuu_sym, t_i, t_j)
kff_fn = sp.lambdify((x_i, x_j, y_i, y_j, t_i, t_j, sigma, l_x, l_y, l_t, a,b,c,d), kff_sym, "numpy")
def kff(x, y, t, sigma, l_x, l_y, l_t, a,b,c,d):
k = np.zeros((x.size, x.size))
for i in range(x.size):
for j in range(x.size):
k[i,j] = kff_fn(x[i], x[j], y[i], y[j], t[i], t[j], sigma, l_x, l_y, l_t, a,b,c,d)
return k
$k_{fu}(X_i,X_j;\theta,\phi) \\ = \mathcal{L}_{X_i}^{\phi} k_{uu}(X_i, X_j; \theta) \\ = a\partial_{x_i}k_{uu} + b \partial_{y_i}k_{uu} + c \partial_{x_i, x_i}k_{uu} + d \partial_{y_i, y_i}k_{uu} - \partial_{t_i}k_{uu}$
kfu_sym = a*sp.diff(kuu_sym, x_i) \
+ b*sp.diff(kuu_sym, y_i) \
+ c*sp.diff(kuu_sym, x_i, x_i) \
+ d*sp.diff(kuu_sym, y_i, y_i) \
- sp.diff(kuu_sym, t_i)
kfu_fn = sp.lambdify((x_i, x_j, y_i, y_j, t_i, t_j, sigma, l_x, l_y, l_t, a,b,c,d), kfu_sym, "numpy")
def kfu(x, y, t, sigma, l_x, l_y, l_t, a,b,c,d):
k = np.zeros((x.size, x.size))
for i in range(x.size):
for j in range(x.size):
k[i,j] = kfu_fn(x[i], x[j], y[i], y[j], t[i], t[j], sigma, l_x, l_y, l_t, a,b,c,d)
return k
def kuf(x, y, t, sigma, l_x, l_y, l_t, a,b,c,d):
return kfu(x, y, t, sigma, l_x, l_y, l_t, a,b,c,d).T
Implementing the covariance matrix K and its inverse
def K(sigma, l_x, l_y, l_t, a,b,c,d, s):
K_mat = np.block([
[kuu(x, y, t, sigma, l_x, l_y, l_t)+s*np.eye(n),kuf(x, y, t, sigma, l_x, l_y, l_t, a,b,c,d)],
[kfu(x, y, t, sigma, l_x, l_y, l_t,a,b,c,d),kff(x,y,t,sigma,l_x,l_y,l_t,a,b,c,d)+s*np.eye(n)]
])
return K_mat
def K_inv_and_det(sigma, l_x, l_y, l_t, a,b,c,d, s):
K_inv = np.zeros((2*n, 2*n))
log_sum = 0
# Use Cholesky, if possible. Otherwise use SVD.
try:
L = np.linalg.cholesky(K(sigma, l_x, l_y, l_t, a,b,c,d, s))
L_inv = solve_triangular(L, np.identity(2*n), lower=True) # Slight performance boost
# over np.linalg.inv
K_inv = (L_inv.T).dot(L_inv)
for i in range(2*n):
log_sum = log_sum + np.log(np.abs(L[i,i]))
except np.linalg.LinAlgError:
# Inverse of K via SVD
u, s_mat, vt = np.linalg.svd(K(sigma, l_x, l_y, l_t, a,b,c,d, s))
K_inv = (vt.T).dot(np.linalg.inv(np.diag(s_mat))).dot(u.T)
# Calculating the log of the determinant of K
# Singular values are always positive.
for i in range(s_mat.size):
log_sum = log_sum + np.log(s_mat[i])
return K_inv, log_sum
Implementing normalized negative log-likelihood function
def nlml(params):
b_par = 2
c_par = 0.5
d_par = 0.5
# Exponentiation to enable unconstrained optimization
sigma_exp = np.exp(params[0])
l_x_exp = np.exp(params[1])
l_y_exp = np.exp(params[2])
l_t_exp = np.exp(params[3])
# a = params[4]
y_con = np.concatenate((y_u, y_f))
A,b = K_inv_and_det(sigma_exp, l_x_exp, l_y_exp, l_t_exp, params[4],b_par,c_par,d_par, s)
val = b + y_con @ A @ y_con
return val
1. Nelder-Mead
def callbackf(params):
print(params)
def minimize_restarts(x,y,y_u,y_f,n=5):
all_results = []
for it in range(0,n):
all_results.append(opt.minimize(nlml, np.random.rand(5), callback = callbackf,
method="Nelder-Mead",
options={'maxfev':5000, 'fatol':0.001, 'xatol':0.001}))
return min(all_results, key = lambda x: x.fun)
t0 = time.time()
m = minimize_restarts(x, y, y_u, y_f, 3)
t_Nelder = time.time() - t0
t_Nelder
1865.7036004066467
print('The inferred parameter is:', m.x[4])
The inferred parameter is: 2.1634004301602183