Published: December 17, 2020
Revised: December 22, 2020
Author: Xinyu Chen [GitHub homepage]
Download: This Jupyter notebook is at our GitHub repository. If you want to evaluate the code, please download the notebook from the transdim repository.
This notebook shows how to implement the Bayesian treatment for Temporal Regularized Matrix Factorization (BTRMF) on some real-world data sets.
and
$$\boldsymbol{x}_{t}\sim\left\{\begin{array}{ll}\mathcal{N}\left(\boldsymbol{0},\Lambda_{x}^{-1}\right), & \text{if $t\in\{1,2,\ldots,h_d\}$,} \\ \mathcal{N}\left(\sum_{k=1}^{d}\text{diag}(\boldsymbol{\theta}_{k})\boldsymbol{x}_{t-h_k},\Lambda_{x}^{-1}\right),& \text{otherwise}.\end{array}\right.$$import numpy as np
from numpy.linalg import inv as inv
from numpy.random import normal as normrnd
from scipy.linalg import khatri_rao as kr_prod
from scipy.stats import wishart
from numpy.linalg import solve as solve
from scipy.linalg import cholesky as cholesky_upper
from scipy.linalg import solve_triangular as solve_ut
import matplotlib.pyplot as plt
%matplotlib inline
def mvnrnd_pre(mu, Lambda):
src = normrnd(size = (mu.shape[0],))
return solve_ut(cholesky_upper(Lambda, overwrite_a = True, check_finite = False),
src, lower = False, check_finite = False, overwrite_b = True) + mu
def cov_mat(mat, mat_bar):
mat = mat - mat_bar
return mat.T @ mat
def sample_factor_w(tau_sparse_mat, tau_ind, W, X, tau, beta0 = 1, vargin = 0):
"""Sampling N-by-R factor matrix W and its hyperparameters (mu_w, Lambda_w)."""
dim1, rank = W.shape
W_bar = np.mean(W, axis = 0)
temp = dim1 / (dim1 + beta0)
var_W_hyper = inv(np.eye(rank) + cov_mat(W, W_bar) + temp * beta0 * np.outer(W_bar, W_bar))
var_Lambda_hyper = wishart.rvs(df = dim1 + rank, scale = var_W_hyper)
var_mu_hyper = mvnrnd_pre(temp * W_bar, (dim1 + beta0) * var_Lambda_hyper)
if dim1 * rank ** 2 > 1e+8:
vargin = 1
if vargin == 0:
var1 = X.T
var2 = kr_prod(var1, var1)
var3 = (var2 @ tau_ind.T).reshape([rank, rank, dim1]) + var_Lambda_hyper[:, :, None]
var4 = var1 @ tau_sparse_mat.T + (var_Lambda_hyper @ var_mu_hyper)[:, None]
for i in range(dim1):
W[i, :] = mvnrnd_pre(solve(var3[:, :, i], var4[:, i]), var3[:, :, i])
elif vargin == 1:
for i in range(dim1):
pos0 = np.where(sparse_mat[i, :] != 0)
Xt = X[pos0[0], :]
var_mu = tau[i] * Xt.T @ sparse_mat[i, pos0[0]] + var_Lambda_hyper @ var_mu_hyper
var_Lambda = tau[i] * Xt.T @ Xt + var_Lambda_hyper
W[i, :] = mvnrnd_pre(solve(var_Lambda, var_mu), var_Lambda)
return W
def sample_theta(X, theta, Lambda_x, time_lags, beta0 = 1):
dim, rank = X.shape
d = time_lags.shape[0]
tmax = np.max(time_lags)
theta_bar = np.mean(theta, axis = 0)
temp = d / (d + beta0)
var_theta_hyper = inv(np.eye(rank) + cov_mat(theta, theta_bar)
+ temp * beta0 * np.outer(theta_bar, theta_bar))
var_Lambda_hyper = wishart.rvs(df = d + rank, scale = var_theta_hyper)
var_mu_hyper = mvnrnd_pre(temp * theta_bar, (d + beta0) * var_Lambda_hyper)
for k in range(d):
theta0 = theta.copy()
theta0[k, :] = 0
mat0 = np.zeros((dim - tmax, rank))
for L in range(d):
mat0 += X[tmax - time_lags[L] : dim - time_lags[L], :] @ np.diag(theta0[L, :])
varPi = X[tmax : dim, :] - mat0
var0 = X[tmax - time_lags[k] : dim - time_lags[k], :]
var = np.einsum('ij, jk, ik -> j', var0, Lambda_x, varPi)
var_Lambda = np.einsum('ti, tj, ij -> ij', var0, var0, Lambda_x) + var_Lambda_hyper
theta[k, :] = mvnrnd_pre(solve(var_Lambda, var + var_Lambda_hyper @ var_mu_hyper), var_Lambda)
return theta
def sample_Lambda_x(X, theta, time_lags):
dim, rank = X.shape
d = time_lags.shape[0]
tmax = np.max(time_lags)
mat = X[: tmax, :].T @ X[: tmax, :]
temp = np.zeros((dim - tmax, rank, d))
for k in range(d):
temp[:, :, k] = X[tmax - time_lags[k] : dim - time_lags[k], :]
new_mat = X[tmax : dim, :] - np.einsum('kr, irk -> ir', theta, temp)
Lambda_x = wishart.rvs(df = dim + rank, scale = inv(np.eye(rank) + mat + new_mat.T @ new_mat))
return Lambda_x
def sample_factor_x(tau_sparse_mat, tau_ind, time_lags, W, X, theta, Lambda_x):
"""Sampling T-by-R factor matrix X."""
dim2, rank = X.shape
tmax = np.max(time_lags)
tmin = np.min(time_lags)
d = time_lags.shape[0]
A = np.zeros((d * rank, rank))
for k in range(d):
A[k * rank : (k + 1) * rank, :] = np.diag(theta[k, :])
A0 = np.dstack([A] * d)
for k in range(d):
A0[k * rank : (k + 1) * rank, :, k] = 0
mat0 = Lambda_x @ A.T
mat1 = np.einsum('kij, jt -> kit', A.reshape([d, rank, rank]), Lambda_x)
mat2 = np.einsum('kit, kjt -> ij', mat1, A.reshape([d, rank, rank]))
var1 = W.T
var2 = kr_prod(var1, var1)
var3 = (var2 @ tau_ind).reshape([rank, rank, dim2]) + Lambda_x[:, :, None]
var4 = var1 @ tau_sparse_mat
for t in range(dim2):
Mt = np.zeros((rank, rank))
Nt = np.zeros(rank)
Qt = mat0 @ X[t - time_lags, :].reshape(rank * d)
index = list(range(0, d))
if t >= dim2 - tmax and t < dim2 - tmin:
index = list(np.where(t + time_lags < dim2))[0]
elif t < tmax:
Qt = np.zeros(rank)
index = list(np.where(t + time_lags >= tmax))[0]
if t < dim2 - tmin:
Mt = mat2.copy()
temp = np.zeros((rank * d, len(index)))
n = 0
for k in index:
temp[:, n] = X[t + time_lags[k] - time_lags, :].reshape(rank * d)
n += 1
temp0 = X[t + time_lags[index], :].T - np.einsum('ijk, ik -> jk', A0[:, :, index], temp)
Nt = np.einsum('kij, jk -> i', mat1[index, :, :], temp0)
var3[:, :, t] = var3[:, :, t] + Mt
if t < tmax:
var3[:, :, t] = var3[:, :, t] - Lambda_x + np.eye(rank)
X[t, :] = mvnrnd_pre(solve(var3[:, :, t], var4[:, t] + Nt + Qt), var3[:, :, t])
return X
def sample_precision_tau(sparse_mat, mat_hat, ind):
var_alpha = 1e-6 + 0.5 * np.sum(ind, axis = 1)
var_beta = 1e-6 + 0.5 * np.sum(((sparse_mat - mat_hat) ** 2) * ind, axis = 1)
return np.random.gamma(var_alpha, 1 / var_beta)
def sample_precision_scalar_tau(sparse_mat, mat_hat, ind):
var_alpha = 1e-6 + 0.5 * np.sum(ind)
var_beta = 1e-6 + 0.5 * np.sum(((sparse_mat - mat_hat) ** 2) * ind)
return np.random.gamma(var_alpha, 1 / var_beta)
def compute_mape(var, var_hat):
return np.sum(np.abs(var - var_hat) / var) / var.shape[0]
def compute_rmse(var, var_hat):
return np.sqrt(np.sum((var - var_hat) ** 2) / var.shape[0])
def BTRMF(dense_mat, sparse_mat, init, rank, time_lags, burn_iter, gibbs_iter, option = "factor"):
"""Bayesian Temporal Regularized Matrix Factorization, BTRMF."""
dim1, dim2 = sparse_mat.shape
d = time_lags.shape[0]
W = init["W"]
X = init["X"]
theta = 0.01 * np.random.randn(d, rank)
if np.isnan(sparse_mat).any() == False:
ind = sparse_mat != 0
pos_obs = np.where(ind)
pos_test = np.where((dense_mat != 0) & (sparse_mat == 0))
elif np.isnan(sparse_mat).any() == True:
pos_test = np.where((dense_mat != 0) & (np.isnan(sparse_mat)))
ind = ~np.isnan(sparse_mat)
pos_obs = np.where(ind)
sparse_mat[np.isnan(sparse_mat)] = 0
dense_test = dense_mat[pos_test]
del dense_mat
ind = sparse_mat != 0
tau = np.ones(dim1)
W_plus = np.zeros((dim1, rank))
X_plus = np.zeros((dim2, rank))
theta_plus = np.zeros((d, rank))
temp_hat = np.zeros(len(pos_test[0]))
show_iter = 200
mat_hat_plus = np.zeros((dim1, dim2))
for it in range(burn_iter + gibbs_iter):
tau_ind = tau[:, None] * ind
tau_sparse_mat = tau[:, None] * sparse_mat
W = sample_factor_w(tau_sparse_mat, tau_ind, W, X, tau, beta0 = 1, vargin = 0)
Lambda_x = sample_Lambda_x(X, theta, time_lags)
theta = sample_theta(X, theta, Lambda_x, time_lags)
X = sample_factor_x(tau_sparse_mat, tau_ind, time_lags, W, X, theta, Lambda_x)
mat_hat = W @ X.T
if option == "factor":
tau = sample_precision_tau(sparse_mat, mat_hat, ind)
elif option == "pca":
tau = sample_precision_scalar_tau(sparse_mat, mat_hat, ind)
tau = tau * np.ones(dim1)
temp_hat += mat_hat[pos_test]
if (it + 1) % show_iter == 0 and it < burn_iter:
temp_hat = temp_hat / show_iter
print('Iter: {}'.format(it + 1))
print('MAPE: {:.6}'.format(compute_mape(dense_test, temp_hat)))
print('RMSE: {:.6}'.format(compute_rmse(dense_test, temp_hat)))
temp_hat = np.zeros(len(pos_test[0]))
print()
if it + 1 > burn_iter:
W_plus += W
X_plus += X
theta_plus += theta
mat_hat_plus += mat_hat
mat_hat = mat_hat_plus / gibbs_iter
W = W_plus / gibbs_iter
X = X_plus / gibbs_iter
theta = theta_plus / gibbs_iter
print('Imputation MAPE: {:.6}'.format(compute_mape(dense_test, mat_hat[:, : dim2][pos_test])))
print('Imputation RMSE: {:.6}'.format(compute_rmse(dense_test, mat_hat[:, : dim2][pos_test])))
print()
mat_hat[mat_hat < 0] = 0
return mat_hat, W, X, theta
Scenario setting:
import scipy.io
import numpy as np
np.random.seed(1000)
dense_tensor = scipy.io.loadmat('../datasets/Guangzhou-data-set/tensor.mat')['tensor']
dim = dense_tensor.shape
missing_rate = 0.4 # Non-random missing (NM)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim[0], dim[1])[:, :, np.newaxis] + 0.5 - missing_rate)
dense_mat = dense_tensor.reshape([dim[0], dim[1] * dim[2]])
sparse_mat = sparse_tensor.reshape([dim[0], dim[1] * dim[2]])
del dense_tensor, sparse_tensor
Model setting:
import time
start = time.time()
dim1, dim2 = sparse_mat.shape
rank = 10
time_lags = np.array([1, 2, 144])
init = {"W": 0.01 * np.random.randn(dim1, rank), "X": 0.01 * np.random.randn(dim2, rank)}
burn_iter = 1000
gibbs_iter = 200
mat_hat, W, X, theta = BTRMF(dense_mat, sparse_mat, init, rank, time_lags, burn_iter, gibbs_iter)
end = time.time()
print('Running time: %d seconds'%(end - start))
Iter: 200 MAPE: 0.102392 RMSE: 4.32927 Iter: 400 MAPE: 0.102399 RMSE: 4.34368 Iter: 600 MAPE: 0.102638 RMSE: 4.35184 Iter: 800 MAPE: 0.102881 RMSE: 4.36252 Iter: 1000 MAPE: 0.102905 RMSE: 4.36489 Imputation MAPE: 0.103105 Imputation RMSE: 4.37774 Running time: 1204 seconds
Scenario setting:
import scipy.io
import numpy as np
np.random.seed(1000)
dense_tensor = scipy.io.loadmat('../datasets/Guangzhou-data-set/tensor.mat')['tensor']
dim = dense_tensor.shape
missing_rate = 0.4 # Random missing (RM)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim[0], dim[1], dim[2]) + 0.5 - missing_rate)
dense_mat = dense_tensor.reshape([dim[0], dim[1] * dim[2]])
sparse_mat = sparse_tensor.reshape([dim[0], dim[1] * dim[2]])
del dense_tensor, sparse_tensor
Model setting:
import time
start = time.time()
dim1, dim2 = sparse_mat.shape
rank = 80
time_lags = np.array([1, 2, 144])
init = {"W": 0.1 * np.random.randn(dim1, rank), "X": 0.1 * np.random.randn(dim2, rank)}
burn_iter = 1000
gibbs_iter = 200
mat_hat, W, X, theta = BTRMF(dense_mat, sparse_mat, init, rank, time_lags, burn_iter, gibbs_iter)
end = time.time()
print('Running time: %d seconds'%(end - start))
Iter: 200 MAPE: 0.07524 RMSE: 3.23358 Iter: 400 MAPE: 0.0750662 RMSE: 3.23704 Iter: 600 MAPE: 0.0751602 RMSE: 3.24055 Iter: 800 MAPE: 0.0751735 RMSE: 3.24014 Iter: 1000 MAPE: 0.0751397 RMSE: 3.2408 Imputation MAPE: 0.0751391 Imputation RMSE: 3.24151 Running time: 8652 seconds
Scenario setting:
import scipy.io
import numpy as np
np.random.seed(1000)
dense_tensor = scipy.io.loadmat('../datasets/Guangzhou-data-set/tensor.mat')['tensor']
dim = dense_tensor.shape
missing_rate = 0.6 # Random missing (RM)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim[0], dim[1], dim[2]) + 0.5 - missing_rate)
dense_mat = dense_tensor.reshape([dim[0], dim[1] * dim[2]])
sparse_mat = sparse_tensor.reshape([dim[0], dim[1] * dim[2]])
del dense_tensor, sparse_tensor
Model setting:
import time
start = time.time()
dim1, dim2 = sparse_mat.shape
rank = 80
time_lags = np.array([1, 2, 144])
init = {"W": 0.1 * np.random.randn(dim1, rank), "X": 0.1 * np.random.randn(dim2, rank)}
burn_iter = 1000
gibbs_iter = 200
mat_hat, W, X, theta = BTRMF(dense_mat, sparse_mat, init, rank, time_lags, burn_iter, gibbs_iter)
end = time.time()
print('Running time: %d seconds'%(end - start))
Iter: 200 MAPE: 0.0792675 RMSE: 3.39314 Iter: 400 MAPE: 0.0788858 RMSE: 3.39416 Iter: 600 MAPE: 0.0791253 RMSE: 3.40214 Iter: 800 MAPE: 0.0792649 RMSE: 3.40805 Iter: 1000 MAPE: 0.0793919 RMSE: 3.41317 Imputation MAPE: 0.0794925 Imputation RMSE: 3.4151 Running time: 8388 seconds
Scenario setting:
import scipy.io
import numpy as np
np.random.seed(1000)
dense_tensor = scipy.io.loadmat('../datasets/Hangzhou-data-set/tensor.mat')['tensor']
dim = dense_tensor.shape
missing_rate = 0.4 # Non-random missing (NM)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim[0], dim[1])[:, :, np.newaxis] + 0.5 - missing_rate)
dense_mat = dense_tensor.reshape([dim[0], dim[1] * dim[2]])
sparse_mat = sparse_tensor.reshape([dim[0], dim[1] * dim[2]])
del dense_tensor, sparse_tensor
Model setting:
import time
start = time.time()
dim1, dim2 = sparse_mat.shape
rank = 30
time_lags = np.array([1, 2, 108])
init = {"W": 0.01 * np.random.randn(dim1, rank), "X": 0.01 * np.random.randn(dim2, rank)}
burn_iter = 1000
gibbs_iter = 200
mat_hat, W, X, theta = BTRMF(dense_mat, sparse_mat, init, rank, time_lags, burn_iter, gibbs_iter, option = "pca")
end = time.time()
print('Running time: %d seconds'%(end - start))
Iter: 200 MAPE: 0.256171 RMSE: 54.7371 Iter: 400 MAPE: 0.268451 RMSE: 55.1768 Iter: 600 MAPE: 0.266337 RMSE: 53.5283 Iter: 800 MAPE: 0.266646 RMSE: 52.6491 Iter: 1000 MAPE: 0.280712 RMSE: 51.4976 Imputation MAPE: 0.270754 Imputation RMSE: 51.7259 Running time: 672 seconds
Scenario setting:
import scipy.io
import numpy as np
np.random.seed(1000)
dense_tensor = scipy.io.loadmat('../datasets/Hangzhou-data-set/tensor.mat')['tensor']
dim = dense_tensor.shape
missing_rate = 0.4 # Random missing (RM)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim[0], dim[1], dim[2]) + 0.5 - missing_rate)
dense_mat = dense_tensor.reshape([dim[0], dim[1] * dim[2]])
sparse_mat = sparse_tensor.reshape([dim[0], dim[1] * dim[2]])
del dense_tensor, sparse_tensor
Model setting:
import time
start = time.time()
dim1, dim2 = sparse_mat.shape
rank = 30
time_lags = np.array([1, 2, 108])
init = {"W": 0.1 * np.random.randn(dim1, rank), "X": 0.1 * np.random.randn(dim2, rank)}
burn_iter = 1000
gibbs_iter = 200
mat_hat, W, X, theta = BTRMF(dense_mat, sparse_mat, init, rank, time_lags, burn_iter, gibbs_iter, option = "pca")
end = time.time()
print('Running time: %d seconds'%(end - start))
Iter: 200 MAPE: 0.224843 RMSE: 35.1102 Iter: 400 MAPE: 0.228051 RMSE: 33.545 Iter: 600 MAPE: 0.226576 RMSE: 33.4804 Iter: 800 MAPE: 0.229436 RMSE: 33.5221 Iter: 1000 MAPE: 0.22548 RMSE: 33.4536 Imputation MAPE: 0.226545 Imputation RMSE: 33.4711 Running time: 602 seconds
Scenario setting:
import scipy.io
import numpy as np
np.random.seed(1000)
dense_tensor = scipy.io.loadmat('../datasets/Hangzhou-data-set/tensor.mat')['tensor']
dim = dense_tensor.shape
missing_rate = 0.6 # Random missing (RM)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim[0], dim[1], dim[2]) + 0.5 - missing_rate)
dense_mat = dense_tensor.reshape([dim[0], dim[1] * dim[2]])
sparse_mat = sparse_tensor.reshape([dim[0], dim[1] * dim[2]])
del dense_tensor, sparse_tensor
Model setting:
import time
start = time.time()
dim1, dim2 = sparse_mat.shape
rank = 30
time_lags = np.array([1, 2, 108])
init = {"W": 0.1 * np.random.randn(dim1, rank), "X": 0.1 * np.random.randn(dim2, rank)}
burn_iter = 1000
gibbs_iter = 200
mat_hat, W, X, theta = BTRMF(dense_mat, sparse_mat, init, rank, time_lags, burn_iter, gibbs_iter, option = "pca")
end = time.time()
print('Running time: %d seconds'%(end - start))
Iter: 200 MAPE: 0.239444 RMSE: 49.6641 Iter: 400 MAPE: 0.24268 RMSE: 42.9047 Iter: 600 MAPE: 0.245536 RMSE: 39.5845 Iter: 800 MAPE: 0.244401 RMSE: 38.0904 Iter: 1000 MAPE: 0.246259 RMSE: 37.1305 Imputation MAPE: 0.24692 Imputation RMSE: 37.0711 Running time: 753 seconds
Scenario setting:
import scipy.io
import numpy as np
np.random.seed(1000)
dense_tensor = scipy.io.loadmat('../datasets/Seattle-data-set/tensor.npz')['arr_0']
dim = dense_tensor.shape
missing_rate = 0.4 # Non-random missing (NM)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim[0], dim[1])[:, :, np.newaxis] + 0.5 - missing_rate)
dense_mat = dense_tensor.reshape([dim[0], dim[1] * dim[2]])
sparse_mat = sparse_tensor.reshape([dim[0], dim[1] * dim[2]])
del dense_tensor, sparse_tensor
Model setting:
import time
start = time.time()
dim1, dim2 = sparse_mat.shape
rank = 10
time_lags = np.array([1, 2, 288])
init = {"W": 0.01 * np.random.randn(dim1, rank), "X": 0.01 * np.random.randn(dim2, rank)}
burn_iter = 1000
gibbs_iter = 200
mat_hat, W, X, theta = BTRMF(dense_mat, sparse_mat, init, rank, time_lags, burn_iter, gibbs_iter)
end = time.time()
print('Running time: %d seconds'%(end - start))
Iter: 200 MAPE: 0.0938158 RMSE: 5.38035 Iter: 400 MAPE: 0.0930455 RMSE: 5.36452 Iter: 600 MAPE: 0.0933777 RMSE: 5.38176 Iter: 800 MAPE: 0.0934608 RMSE: 5.38578 Iter: 1000 MAPE: 0.0934591 RMSE: 5.38655 Imputation MAPE: 0.0932756 Imputation RMSE: 5.37778 Running time: 1093 seconds
Scenario setting:
import scipy.io
import numpy as np
np.random.seed(1000)
dense_tensor = scipy.io.loadmat('../datasets/Seattle-data-set/tensor.npz')['arr_0']
dim = dense_tensor.shape
missing_rate = 0.4 # Random missing (RM)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim[0], dim[1], dim[2]) + 0.5 - missing_rate)
dense_mat = dense_tensor.reshape([dim[0], dim[1] * dim[2]])
sparse_mat = sparse_tensor.reshape([dim[0], dim[1] * dim[2]])
del dense_tensor, sparse_tensor
Model setting:
import time
start = time.time()
dim1, dim2 = sparse_mat.shape
rank = 50
time_lags = np.array([1, 2, 288])
init = {"W": 0.1 * np.random.randn(dim1, rank), "X": 0.1 * np.random.randn(dim2, rank)}
burn_iter = 1000
gibbs_iter = 200
mat_hat, W, X, theta = BTRMF(dense_mat, sparse_mat, init, rank, time_lags, burn_iter, gibbs_iter)
end = time.time()
print('Running time: %d seconds'%(end - start))
Iter: 200 MAPE: 0.0596038 RMSE: 3.74737 Iter: 400 MAPE: 0.0595248 RMSE: 3.75127 Iter: 600 MAPE: 0.0595014 RMSE: 3.7492 Iter: 800 MAPE: 0.059484 RMSE: 3.74841 Iter: 1000 MAPE: 0.0594566 RMSE: 3.74681 Imputation MAPE: 0.0594566 Imputation RMSE: 3.74573 Running time: 3780 seconds
Scenario setting:
import scipy.io
import numpy as np
np.random.seed(1000)
dense_tensor = scipy.io.loadmat('../datasets/Seattle-data-set/tensor.npz')['arr_0']
dim = dense_tensor.shape
missing_rate = 0.6 # Random missing (RM)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim[0], dim[1], dim[2]) + 0.5 - missing_rate)
dense_mat = dense_tensor.reshape([dim[0], dim[1] * dim[2]])
sparse_mat = sparse_tensor.reshape([dim[0], dim[1] * dim[2]])
del dense_tensor, sparse_tensor
Model setting:
import time
start = time.time()
dim1, dim2 = sparse_mat.shape
rank = 50
time_lags = np.array([1, 2, 288])
init = {"W": 0.1 * np.random.randn(dim1, rank), "X": 0.1 * np.random.randn(dim2, rank)}
burn_iter = 1000
gibbs_iter = 200
mat_hat, W, X, theta = BTRMF(dense_mat, sparse_mat, init, rank, time_lags, burn_iter, gibbs_iter)
end = time.time()
print('Running time: %d seconds'%(end - start))
Iter: 200 MAPE: 0.0614937 RMSE: 3.8319 Iter: 400 MAPE: 0.0613195 RMSE: 3.83203 Iter: 600 MAPE: 0.0612728 RMSE: 3.83117 Iter: 800 MAPE: 0.0613037 RMSE: 3.83298 Iter: 1000 MAPE: 0.0613246 RMSE: 3.83193 Imputation MAPE: 0.0612939 Imputation RMSE: 3.8312 Running time: 3794 seconds
London movement speed data set is is a city-wide hourly traffic speeddataset collected in London.
Observation rate | $>90\%$ | $>80\%$ | $>70\%$ | $>60\%$ | $>50\%$ |
---|---|---|---|---|---|
Number of roads | 17,666 | 27,148 | 35,912 | 44,352 | 52,727 |
If want to test on the full dataset, you could consider the following setting for masking observations as missing values.
import numpy as np
np.random.seed(1000)
mask_rate = 0.20
dense_mat = np.load('../datasets/London-data-set/hourly_speed_mat.npy')
pos_obs = np.where(dense_mat != 0)
num = len(pos_obs[0])
sample_ind = np.random.choice(num, size = int(mask_rate * num), replace = False)
sparse_mat = dense_mat.copy()
sparse_mat[pos_obs[0][sample_ind], pos_obs[1][sample_ind]] = 0
Notably, you could also consider to evaluate the model on a subset of the data with the following setting.
import numpy as np
np.random.seed(1000)
missing_rate = 0.4
dense_mat = np.load('../datasets/London-data-set/hourly_speed_mat.npy')
binary_mat = dense_mat.copy()
binary_mat[binary_mat != 0] = 1
pos = np.where(np.sum(binary_mat, axis = 1) > 0.7 * binary_mat.shape[1])
dense_mat = dense_mat[pos[0], :]
## Non-random missing (NM)
binary_mat = np.zeros(dense_mat.shape)
random_mat = np.random.rand(dense_mat.shape[0], 30)
for i1 in range(dense_mat.shape[0]):
for i2 in range(30):
binary_mat[i1, i2 * 24 : (i2 + 1) * 24] = np.round(random_mat[i1, i2] + 0.5 - missing_rate)
sparse_mat = dense_mat * binary_mat
Model setting:
import time
start = time.time()
dim1, dim2 = sparse_mat.shape
rank = 20
time_lags = np.array([1, 2, 24])
init = {"W": 0.01 * np.random.randn(dim1, rank), "X": 0.01 * np.random.randn(dim2, rank)}
burn_iter = 1000
gibbs_iter = 200
mat_hat, W, X, theta = BTRMF(dense_mat, sparse_mat, init, rank, time_lags, burn_iter, gibbs_iter)
end = time.time()
print('Running time: %d seconds'%(end - start))
Iter: 200 MAPE: 0.0948059 RMSE: 2.34777 Iter: 400 MAPE: 0.0942527 RMSE: 2.3214 Iter: 600 MAPE: 0.0942426 RMSE: 2.32111 Iter: 800 MAPE: 0.0942407 RMSE: 2.32104 Iter: 1000 MAPE: 0.0942403 RMSE: 2.321 Imputation MAPE: 0.0942378 Imputation RMSE: 2.32091 Running time: 3394 seconds
import numpy as np
np.random.seed(1000)
missing_rate = 0.4
dense_mat = np.load('../datasets/London-data-set/hourly_speed_mat.npy')
binary_mat = dense_mat.copy()
binary_mat[binary_mat != 0] = 1
pos = np.where(np.sum(binary_mat, axis = 1) > 0.7 * binary_mat.shape[1])
dense_mat = dense_mat[pos[0], :]
## Random missing (RM)
random_mat = np.random.rand(dense_mat.shape[0], dense_mat.shape[1])
binary_mat = np.round(random_mat + 0.5 - missing_rate)
sparse_mat = dense_mat * binary_mat
Model setting:
import time
start = time.time()
dim1, dim2 = sparse_mat.shape
rank = 20
time_lags = np.array([1, 2, 24])
init = {"W": 0.01 * np.random.randn(dim1, rank), "X": 0.01 * np.random.randn(dim2, rank)}
burn_iter = 1000
gibbs_iter = 200
mat_hat, W, X, theta = BTRMF(dense_mat, sparse_mat, init, rank, time_lags, burn_iter, gibbs_iter)
end = time.time()
print('Running time: %d seconds'%(end - start))
Iter: 200 MAPE: 0.0921362 RMSE: 2.28467 Iter: 400 MAPE: 0.0914762 RMSE: 2.25792 Iter: 600 MAPE: 0.0914818 RMSE: 2.25793 Iter: 800 MAPE: 0.0914829 RMSE: 2.25798 Iter: 1000 MAPE: 0.0914881 RMSE: 2.25802 Imputation MAPE: 0.0914868 Imputation RMSE: 2.25799 Running time: 3542 seconds
import numpy as np
np.random.seed(1000)
missing_rate = 0.6
dense_mat = np.load('../datasets/London-data-set/hourly_speed_mat.npy')
binary_mat = dense_mat.copy()
binary_mat[binary_mat != 0] = 1
pos = np.where(np.sum(binary_mat, axis = 1) > 0.7 * binary_mat.shape[1])
dense_mat = dense_mat[pos[0], :]
## Random missing (RM)
random_mat = np.random.rand(dense_mat.shape[0], dense_mat.shape[1])
binary_mat = np.round(random_mat + 0.5 - missing_rate)
sparse_mat = dense_mat * binary_mat
Model setting:
import time
start = time.time()
dim1, dim2 = sparse_mat.shape
rank = 20
time_lags = np.array([1, 2, 24])
init = {"W": 0.01 * np.random.randn(dim1, rank), "X": 0.01 * np.random.randn(dim2, rank)}
burn_iter = 1000
gibbs_iter = 200
mat_hat, W, X, theta = BTRMF(dense_mat, sparse_mat, init, rank, time_lags, burn_iter, gibbs_iter)
end = time.time()
print('Running time: %d seconds'%(end - start))
Iter: 200 MAPE: 0.0942398 RMSE: 2.33817 Iter: 400 MAPE: 0.0931625 RMSE: 2.29652 Iter: 600 MAPE: 0.0931634 RMSE: 2.29655 Iter: 800 MAPE: 0.0931634 RMSE: 2.29658 Iter: 1000 MAPE: 0.093168 RMSE: 2.29656 Imputation MAPE: 0.0931706 Imputation RMSE: 2.29665 Running time: 3692 seconds