Published: December 17, 2020
Revised: December 23, 2020
Author: Xinyu Chen [GitHub homepage]
Download: This Jupyter notebook is at our GitHub repository. If you want to evaluate the code, please download the notebook from the transdim repository.
This notebook shows how to implement the Bayesian treatment for Temporal Regularized Matrix Factorization (BTRMF) on some real-world data sets.
We assume a spatiotemporal setting for multidimensional time series data throughout this work. In general, modern spatiotemporal data sets collected from sensor networks can be organized as matrix time series. For example, we can denote by matrix $Y\in\mathbb{R}^{N\times T}$ a multivariate time series collected from $N$ locations/sensors on $T$ time points, with each row $$\boldsymbol{y}_{i}=\left(y_{i,1},y_{i,2},...,y_{i,t-1},y_{i,t},y_{i,t+1},...,y_{i,T}\right)$$ corresponding to the time series collected at location $i$.
As mentioned, making accurate predictions on incomplete time series is very challenging, while missing data problem is almost inevitable in real-world applications. Figure 1 illustrates the prediction problem for incomplete time series data. Here we use $(i,t)\in\Omega$ to index the observed entries in matrix $Y$.
Figure 1: Illustration of multivariate time series and the prediction problem in the presence of missing values (green: observed data; white: missing data; red: prediction).
and
$$\boldsymbol{x}_{t}\sim\left\{\begin{array}{ll}\mathcal{N}\left(\boldsymbol{0},\Lambda_{x}^{-1}\right), & \text{if $t\in\{1,2,\ldots,h_d\}$,} \\ \mathcal{N}\left(\sum_{k=1}^{d}\text{diag}(\boldsymbol{\theta}_{k})\boldsymbol{x}_{t-h_k},\Lambda_{x}^{-1}\right),& \text{otherwise}.\end{array}\right.$$import numpy as np
from numpy.linalg import inv as inv
from numpy.random import normal as normrnd
from scipy.linalg import khatri_rao as kr_prod
from scipy.stats import wishart
from numpy.linalg import solve as solve
from scipy.linalg import cholesky as cholesky_upper
from scipy.linalg import solve_triangular as solve_ut
import matplotlib.pyplot as plt
%matplotlib inline
def mvnrnd_pre(mu, Lambda):
src = normrnd(size = (mu.shape[0],))
return solve_ut(cholesky_upper(Lambda, overwrite_a = True, check_finite = False),
src, lower = False, check_finite = False, overwrite_b = True) + mu
def cov_mat(mat, mat_bar):
mat = mat - mat_bar
return mat.T @ mat
def sample_factor_w(tau_sparse_mat, tau_ind, W, X, tau, beta0 = 1, vargin = 0):
"""Sampling N-by-R factor matrix W and its hyperparameters (mu_w, Lambda_w)."""
dim1, rank = W.shape
W_bar = np.mean(W, axis = 0)
temp = dim1 / (dim1 + beta0)
var_W_hyper = inv(np.eye(rank) + cov_mat(W, W_bar) + temp * beta0 * np.outer(W_bar, W_bar))
var_Lambda_hyper = wishart.rvs(df = dim1 + rank, scale = var_W_hyper)
var_mu_hyper = mvnrnd_pre(temp * W_bar, (dim1 + beta0) * var_Lambda_hyper)
if dim1 * rank ** 2 > 1e+8:
vargin = 1
if vargin == 0:
var1 = X.T
var2 = kr_prod(var1, var1)
var3 = (var2 @ tau_ind.T).reshape([rank, rank, dim1]) + var_Lambda_hyper[:, :, None]
var4 = var1 @ tau_sparse_mat.T + (var_Lambda_hyper @ var_mu_hyper)[:, None]
for i in range(dim1):
W[i, :] = mvnrnd_pre(solve(var3[:, :, i], var4[:, i]), var3[:, :, i])
elif vargin == 1:
for i in range(dim1):
pos0 = np.where(sparse_mat[i, :] != 0)
Xt = X[pos0[0], :]
var_mu = tau[i] * Xt.T @ sparse_mat[i, pos0[0]] + var_Lambda_hyper @ var_mu_hyper
var_Lambda = tau[i] * Xt.T @ Xt + var_Lambda_hyper
W[i, :] = mvnrnd_pre(solve(var_Lambda, var_mu), var_Lambda)
return W
def sample_theta(X, theta, Lambda_x, time_lags, beta0 = 1):
dim, rank = X.shape
d = time_lags.shape[0]
tmax = np.max(time_lags)
theta_bar = np.mean(theta, axis = 0)
temp = d / (d + beta0)
var_theta_hyper = inv(np.eye(rank) + cov_mat(theta, theta_bar)
+ temp * beta0 * np.outer(theta_bar, theta_bar))
var_Lambda_hyper = wishart.rvs(df = d + rank, scale = var_theta_hyper)
var_mu_hyper = mvnrnd_pre(temp * theta_bar, (d + beta0) * var_Lambda_hyper)
for k in range(d):
theta0 = theta.copy()
theta0[k, :] = 0
mat0 = np.zeros((dim - tmax, rank))
for L in range(d):
mat0 += X[tmax - time_lags[L] : dim - time_lags[L], :] @ np.diag(theta0[L, :])
varPi = X[tmax : dim, :] - mat0
var0 = X[tmax - time_lags[k] : dim - time_lags[k], :]
var = np.einsum('ij, jk, ik -> j', var0, Lambda_x, varPi)
var_Lambda = np.einsum('ti, tj, ij -> ij', var0, var0, Lambda_x) + var_Lambda_hyper
theta[k, :] = mvnrnd_pre(solve(var_Lambda, var + var_Lambda_hyper @ var_mu_hyper), var_Lambda)
return theta
def sample_Lambda_x(X, theta, time_lags):
dim, rank = X.shape
d = time_lags.shape[0]
tmax = np.max(time_lags)
mat = X[: tmax, :].T @ X[: tmax, :]
temp = np.zeros((dim - tmax, rank, d))
for k in range(d):
temp[:, :, k] = X[tmax - time_lags[k] : dim - time_lags[k], :]
new_mat = X[tmax : dim, :] - np.einsum('kr, irk -> ir', theta, temp)
Lambda_x = wishart.rvs(df = dim + rank, scale = inv(np.eye(rank) + mat + new_mat.T @ new_mat))
return Lambda_x
def sample_factor_x(tau_sparse_mat, tau_ind, time_lags, W, X, theta, Lambda_x):
"""Sampling T-by-R factor matrix X."""
dim2, rank = X.shape
tmax = np.max(time_lags)
tmin = np.min(time_lags)
d = time_lags.shape[0]
A = np.zeros((d * rank, rank))
for k in range(d):
A[k * rank : (k + 1) * rank, :] = np.diag(theta[k, :])
A0 = np.dstack([A] * d)
for k in range(d):
A0[k * rank : (k + 1) * rank, :, k] = 0
mat0 = Lambda_x @ A.T
mat1 = np.einsum('kij, jt -> kit', A.reshape([d, rank, rank]), Lambda_x)
mat2 = np.einsum('kit, kjt -> ij', mat1, A.reshape([d, rank, rank]))
var1 = W.T
var2 = kr_prod(var1, var1)
var3 = (var2 @ tau_ind).reshape([rank, rank, dim2]) + Lambda_x[:, :, None]
var4 = var1 @ tau_sparse_mat
for t in range(dim2):
Mt = np.zeros((rank, rank))
Nt = np.zeros(rank)
Qt = mat0 @ X[t - time_lags, :].reshape(rank * d)
index = list(range(0, d))
if t >= dim2 - tmax and t < dim2 - tmin:
index = list(np.where(t + time_lags < dim2))[0]
elif t < tmax:
Qt = np.zeros(rank)
index = list(np.where(t + time_lags >= tmax))[0]
if t < dim2 - tmin:
Mt = mat2.copy()
temp = np.zeros((rank * d, len(index)))
n = 0
for k in index:
temp[:, n] = X[t + time_lags[k] - time_lags, :].reshape(rank * d)
n += 1
temp0 = X[t + time_lags[index], :].T - np.einsum('ijk, ik -> jk', A0[:, :, index], temp)
Nt = np.einsum('kij, jk -> i', mat1[index, :, :], temp0)
var3[:, :, t] = var3[:, :, t] + Mt
if t < tmax:
var3[:, :, t] = var3[:, :, t] - Lambda_x + np.eye(rank)
X[t, :] = mvnrnd_pre(solve(var3[:, :, t], var4[:, t] + Nt + Qt), var3[:, :, t])
return X
def sample_precision_tau(sparse_mat, mat_hat, ind):
var_alpha = 1e-6 + 0.5 * np.sum(ind, axis = 1)
var_beta = 1e-6 + 0.5 * np.sum(((sparse_mat - mat_hat) ** 2) * ind, axis = 1)
return np.random.gamma(var_alpha, 1 / var_beta)
def sample_precision_scalar_tau(sparse_mat, mat_hat, ind):
var_alpha = 1e-6 + 0.5 * np.sum(ind)
var_beta = 1e-6 + 0.5 * np.sum(((sparse_mat - mat_hat) ** 2) * ind)
return np.random.gamma(var_alpha, 1 / var_beta)
def compute_mape(var, var_hat):
return np.sum(np.abs(var - var_hat) / var) / var.shape[0]
def compute_rmse(var, var_hat):
return np.sqrt(np.sum((var - var_hat) ** 2) / var.shape[0])
def ar4cast(theta, X, Lambda_x, time_lags, multi_step):
dim, rank = X.shape
d = time_lags.shape[0]
X_new = np.append(X, np.zeros((multi_step, rank)), axis = 0)
for t in range(multi_step):
X_new[dim + t, :] = mvnrnd_pre(np.einsum('kr, kr -> r', theta, X_new[dim + t - time_lags, :]), Lambda_x)
return X_new
def BTRMF(dense_mat, sparse_mat, init, rank, time_lags, burn_iter, gibbs_iter, multi_step = 1, option = "factor"):
"""Bayesian Temporal Regularized Matrix Factorization, BTRMF."""
dim1, dim2 = sparse_mat.shape
d = time_lags.shape[0]
W = init["W"]
X = init["X"]
theta = 0.01 * np.random.randn(d, rank)
if np.isnan(sparse_mat).any() == False:
ind = sparse_mat != 0
pos_obs = np.where(ind)
pos_test = np.where((dense_mat != 0) & (sparse_mat == 0))
elif np.isnan(sparse_mat).any() == True:
pos_test = np.where((dense_mat != 0) & (np.isnan(sparse_mat)))
ind = ~np.isnan(sparse_mat)
pos_obs = np.where(ind)
sparse_mat[np.isnan(sparse_mat)] = 0
dense_test = dense_mat[pos_test]
del dense_mat
ind = sparse_mat != 0
tau = np.ones(dim1)
W_plus = np.zeros((dim1, rank, gibbs_iter))
theta_plus = np.zeros((d, rank, gibbs_iter))
tau_plus = np.zeros((dim1, gibbs_iter))
Lambda_plus = np.zeros((rank, rank, gibbs_iter))
temp_hat = np.zeros(len(pos_test[0]))
show_iter = 500
mat_hat_plus = np.zeros((dim1, dim2))
X_plus = np.zeros((dim2 + multi_step, rank, gibbs_iter))
mat_new_plus = np.zeros((dim1, multi_step))
for it in range(burn_iter + gibbs_iter):
tau_ind = tau[:, None] * ind
tau_sparse_mat = tau[:, None] * sparse_mat
W = sample_factor_w(tau_sparse_mat, tau_ind, W, X, tau, beta0 = 1, vargin = 0)
Lambda_x = sample_Lambda_x(X, theta, time_lags)
theta = sample_theta(X, theta, Lambda_x, time_lags)
X = sample_factor_x(tau_sparse_mat, tau_ind, time_lags, W, X, theta, Lambda_x)
mat_hat = W @ X.T
if option == "factor":
tau = sample_precision_tau(sparse_mat, mat_hat, ind)
elif option == "pca":
tau = sample_precision_scalar_tau(sparse_mat, mat_hat, ind)
tau = tau * np.ones(dim1)
temp_hat += mat_hat[pos_test]
if (it + 1) % show_iter == 0 and it < burn_iter:
temp_hat = temp_hat / show_iter
print('Iter: {}'.format(it + 1))
print('MAPE: {:.6}'.format(compute_mape(dense_test, temp_hat)))
print('RMSE: {:.6}'.format(compute_rmse(dense_test, temp_hat)))
temp_hat = np.zeros(len(pos_test[0]))
print()
if it + 1 > burn_iter:
W_plus[:, :, it - burn_iter] = W
theta_plus[:, :, it - burn_iter] = theta
Lambda_plus[:, :, it - burn_iter] = Lambda_x
tau_plus[:, it - burn_iter] = tau
mat_hat_plus += mat_hat
X0 = ar4cast(theta, X, Lambda_x, time_lags, multi_step)
X_plus[:, :, it - burn_iter] = X0
mat_new_plus += W @ X0[dim2 : dim2 + multi_step, :].T
mat_hat = mat_hat_plus / gibbs_iter
print('Imputation MAPE: {:.6}'.format(compute_mape(dense_test, mat_hat[:, : dim2][pos_test])))
print('Imputation RMSE: {:.6}'.format(compute_rmse(dense_test, mat_hat[:, : dim2][pos_test])))
print()
mat_hat = np.append(mat_hat, mat_new_plus / gibbs_iter, axis = 1)
mat_hat[mat_hat < 0] = 0
return mat_hat, W_plus, X_plus, theta_plus, Lambda_plus, tau_plus
def sample_factor_x_partial(tau_sparse_mat, tau_ind, time_lags, W, X, theta, Lambda_x, back_step):
"""Sampling T-by-R factor matrix X."""
dim2, rank = X.shape
tmax = np.max(time_lags)
tmin = np.min(time_lags)
d = time_lags.shape[0]
A = np.zeros((d * rank, rank))
for k in range(d):
A[k * rank : (k + 1) * rank, :] = np.diag(theta[k, :])
A0 = np.dstack([A] * d)
for k in range(d):
A0[k * rank : (k + 1) * rank, :, k] = 0
mat0 = Lambda_x @ A.T
mat1 = np.einsum('kij, jt -> kit', A.reshape([d, rank, rank]), Lambda_x)
mat2 = np.einsum('kit, kjt -> ij', mat1, A.reshape([d, rank, rank]))
var1 = W.T
var2 = kr_prod(var1, var1)
var3 = (var2 @ tau_ind[:, - back_step :]).reshape([rank, rank, back_step]) + Lambda_x[:, :, None]
var4 = var1 @ tau_sparse_mat[:, - back_step :]
for t in range(dim2 - back_step, dim2):
Mt = np.zeros((rank, rank))
Nt = np.zeros(rank)
Qt = mat0 @ X[t - time_lags, :].reshape(rank * d)
index = list(range(0, d))
if t >= dim2 - tmax and t < dim2 - tmin:
index = list(np.where(t + time_lags < dim2))[0]
if t < dim2 - tmin:
Mt = mat2.copy()
temp = np.zeros((rank * d, len(index)))
n = 0
for k in index:
temp[:, n] = X[t + time_lags[k] - time_lags, :].reshape(rank * d)
n += 1
temp0 = X[t + time_lags[index], :].T - np.einsum('ijk, ik -> jk', A0[:, :, index], temp)
Nt = np.einsum('kij, jk -> i', mat1[index, :, :], temp0)
var3[:, :, t + back_step - dim2] = var3[:, :, t + back_step - dim2] + Mt
X[t, :] = mvnrnd_pre(solve(var3[:, :, t + back_step - dim2],
var4[:, t + back_step - dim2] + Nt + Qt), var3[:, :, t + back_step - dim2])
return X
def BTRMF_partial(dense_mat, sparse_mat, init, rank, time_lags, burn_iter, gibbs_iter, multi_step = 1, gamma = 10):
dim1, dim2 = sparse_mat.shape
W_plus = init["W_plus"]
X_plus = init["X_plus"]
theta_plus = init["theta_plus"]
Lambda_plus = init["Lambda_plus"]
tau_plus = init["tau_plus"]
ind = sparse_mat != 0
pos_obs = np.where(ind)
X_new_plus = np.zeros((dim2 + multi_step, rank, gibbs_iter))
mat_new_plus = np.zeros((dim1, multi_step))
back_step = gamma * multi_step
for it in range(gibbs_iter):
tau_ind = tau_plus[:, it][:, None] * ind
tau_sparse_mat = tau_plus[:, it][:, None] * sparse_mat
X = sample_factor_x_partial(tau_sparse_mat, tau_ind, time_lags, W_plus[:, :, it],
X_plus[:, :, it], theta_plus[:, :, it], Lambda_plus[:, :, it], back_step)
X0 = ar4cast(theta_plus[:, :, it], X, Lambda_plus[:, :, it], time_lags, multi_step)
X_new_plus[:, :, it] = X0
mat_new_plus += W_plus[:, :, it] @ X0[- multi_step :, :].T
mat_hat = mat_new_plus / gibbs_iter
mat_hat[mat_hat < 0] = 0
return mat_hat, W_plus, X_new_plus, theta_plus, Lambda_plus, tau_plus
from ipywidgets import IntProgress
from IPython.display import display
def BTRMF_forecast(dense_mat, sparse_mat, pred_step, multi_step,
rank, time_lags, burn_iter, gibbs_iter, option = "factor"):
T = dense_mat.shape[1]
start_time = T - pred_step
dim1 = dense_mat.shape[0]
max_count = int(np.ceil(pred_step / multi_step))
mat_hat = np.zeros((dim1, max_count * multi_step))
f = IntProgress(min = 0, max = max_count) # instantiate the bar
display(f) # display the bar
for t in range(max_count):
if t == 0:
init = {"W": 0.1 * np.random.randn(dim1, rank), "X": 0.1 * np.random.randn(start_time, rank)}
mat, W, X_new, theta, Lambda_x, tau = BTRMF(dense_mat[:, 0 : start_time],
sparse_mat[:, 0 : start_time], init, rank, time_lags, burn_iter, gibbs_iter, multi_step, option)
else:
init = {"W_plus": W, "X_plus": X_new, "theta_plus": theta, "Lambda_plus": Lambda_x, "tau_plus": tau}
mat, W, X_new, theta, Lambda_x, tau = BTRMF_partial(dense_mat[:, 0 : start_time + t * multi_step],
sparse_mat[:, 0 : start_time + t * multi_step], init, rank, time_lags,
burn_iter, gibbs_iter, multi_step)
mat_hat[:, t * multi_step : (t + 1) * multi_step] = mat[:, - multi_step :]
f.value = t
small_dense_mat = dense_mat[:, start_time : dense_mat.shape[1]]
pos = np.where(small_dense_mat != 0)
print('Prediction MAPE: {:.6}'.format(compute_mape(small_dense_mat[pos], mat_hat[pos])))
print('Prediction RMSE: {:.6}'.format(compute_rmse(small_dense_mat[pos], mat_hat[pos])))
print()
return mat_hat
Scenario setting:
import scipy.io
import warnings
warnings.simplefilter('ignore')
tensor = scipy.io.loadmat('../datasets/Guangzhou-data-set/tensor.mat')['tensor']
dense_mat = tensor.reshape([tensor.shape[0], tensor.shape[1] * tensor.shape[2]])
sparse_mat = dense_mat.copy()
Model setting:
import time
rank = 10
pred_step = 7 * 144
time_lags = np.array([1, 2, 3, 144, 145, 146, 7 * 144, 7 * 144 + 1, 7 * 144 + 2])
burn_iter = 1000
gibbs_iter = 200
for multi_step in [2, 4, 6, 12, 18, 24, 30, 36]:
start = time.time()
print('Prediction time horizon (delta) = {}.'.format(multi_step))
mat_hat = BTRMF_forecast(dense_mat, sparse_mat, pred_step, multi_step, rank, time_lags, burn_iter, gibbs_iter)
end = time.time()
print('Running time: %d seconds'%(end - start))
print()
Prediction time horizon (delta) = 2.
IntProgress(value=0, max=504)
Iter: 500 MAPE: nan RMSE: nan Iter: 1000 MAPE: nan RMSE: nan Imputation MAPE: nan Imputation RMSE: nan Prediction MAPE: 0.113683 Prediction RMSE: 4.57707 Running time: 2947 seconds Prediction time horizon (delta) = 4.
IntProgress(value=0, max=252)
Iter: 500 MAPE: nan RMSE: nan Iter: 1000 MAPE: nan RMSE: nan Imputation MAPE: nan Imputation RMSE: nan Prediction MAPE: 0.118136 Prediction RMSE: 4.7547 Running time: 2377 seconds Prediction time horizon (delta) = 6.
IntProgress(value=0, max=168)
Iter: 500 MAPE: nan RMSE: nan Iter: 1000 MAPE: nan RMSE: nan Imputation MAPE: nan Imputation RMSE: nan Prediction MAPE: 0.123447 Prediction RMSE: 4.94011 Running time: 2188 seconds Prediction time horizon (delta) = 12.
IntProgress(value=0, max=84)
Iter: 500 MAPE: nan RMSE: nan Iter: 1000 MAPE: nan RMSE: nan Imputation MAPE: nan Imputation RMSE: nan Prediction MAPE: 0.145856 Prediction RMSE: 5.90556 Running time: 1912 seconds Prediction time horizon (delta) = 18.
IntProgress(value=0, max=56)
Iter: 500 MAPE: nan RMSE: nan Iter: 1000 MAPE: nan RMSE: nan Imputation MAPE: nan Imputation RMSE: nan Prediction MAPE: 0.146342 Prediction RMSE: 5.90134 Running time: 1929 seconds Prediction time horizon (delta) = 24.
IntProgress(value=0, max=42)
Iter: 500 MAPE: nan RMSE: nan Iter: 1000 MAPE: nan RMSE: nan Imputation MAPE: nan Imputation RMSE: nan Prediction MAPE: 0.143891 Prediction RMSE: 5.73838 Running time: 1856 seconds Prediction time horizon (delta) = 30.
IntProgress(value=0, max=34)
Iter: 500 MAPE: nan RMSE: nan Iter: 1000 MAPE: nan RMSE: nan Imputation MAPE: nan Imputation RMSE: nan Prediction MAPE: 0.146586 Prediction RMSE: 5.78402 Running time: 1856 seconds Prediction time horizon (delta) = 36.
IntProgress(value=0, max=28)
Iter: 500 MAPE: nan RMSE: nan Iter: 1000 MAPE: nan RMSE: nan Imputation MAPE: nan Imputation RMSE: nan Prediction MAPE: 0.174528 Prediction RMSE: 7.10779 Running time: 1958 seconds
Scenario setting:
import time
import scipy.io
import numpy as np
np.random.seed(1000)
dense_tensor = scipy.io.loadmat('../datasets/Guangzhou-data-set/tensor.mat')['tensor']
dim = dense_tensor.shape
missing_rate = 0.4 # Non-random missing (NM)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim[0], dim[1])[:, :, np.newaxis] + 0.5 - missing_rate)
dense_mat = dense_tensor.reshape([dim[0], dim[1] * dim[2]])
sparse_mat = sparse_tensor.reshape([dim[0], dim[1] * dim[2]])
del dense_tensor, sparse_tensor
Model setting:
import time
rank = 10
pred_step = 7 * 144
time_lags = np.array([1, 2, 3, 144, 145, 146, 7 * 144, 7 * 144 + 1, 7 * 144 + 2])
burn_iter = 1000
gibbs_iter = 200
for multi_step in [2, 4, 6, 12, 18, 24, 30, 36]:
start = time.time()
print('Prediction time horizon (delta) = {}.'.format(multi_step))
mat_hat = BTRMF_forecast(dense_mat, sparse_mat, pred_step, multi_step, rank, time_lags, burn_iter, gibbs_iter)
end = time.time()
print('Running time: %d seconds'%(end - start))
print()
Prediction time horizon (delta) = 2.
IntProgress(value=0, max=504)
Iter: 500 MAPE: 0.102551 RMSE: 4.36767 Iter: 1000 MAPE: 0.103652 RMSE: 4.57921 Imputation MAPE: 0.104067 Imputation RMSE: 4.71913 Prediction MAPE: 0.119942 Prediction RMSE: 4.7698 Running time: 2890 seconds Prediction time horizon (delta) = 4.
IntProgress(value=0, max=252)
Iter: 500 MAPE: 0.102063 RMSE: 4.32435 Iter: 1000 MAPE: 0.102245 RMSE: 4.33657 Imputation MAPE: 0.102214 Imputation RMSE: 4.33442 Prediction MAPE: 0.122606 Prediction RMSE: 4.93322 Running time: 2278 seconds Prediction time horizon (delta) = 6.
IntProgress(value=0, max=168)
Iter: 500 MAPE: 0.102254 RMSE: 4.33556 Iter: 1000 MAPE: 0.102241 RMSE: 4.33627 Imputation MAPE: 0.102264 Imputation RMSE: 4.3372 Prediction MAPE: 0.12957 Prediction RMSE: 5.19038 Running time: 2090 seconds Prediction time horizon (delta) = 12.
IntProgress(value=0, max=84)
Iter: 500 MAPE: 0.102164 RMSE: 4.33312 Iter: 1000 MAPE: 0.102223 RMSE: 4.33568 Imputation MAPE: 0.102332 Imputation RMSE: 4.33852 Prediction MAPE: 0.146054 Prediction RMSE: 5.8759 Running time: 1921 seconds Prediction time horizon (delta) = 18.
IntProgress(value=0, max=56)
Iter: 500 MAPE: 0.102062 RMSE: 4.3295 Iter: 1000 MAPE: 0.102284 RMSE: 4.33875 Imputation MAPE: 0.10222 Imputation RMSE: 4.33367 Prediction MAPE: 0.162287 Prediction RMSE: 6.57395 Running time: 1869 seconds Prediction time horizon (delta) = 24.
IntProgress(value=0, max=42)
Iter: 500 MAPE: 0.103346 RMSE: 4.43913 Iter: 1000 MAPE: 0.103448 RMSE: 4.47035 Imputation MAPE: 0.103053 Imputation RMSE: 4.46833 Prediction MAPE: 0.145759 Prediction RMSE: 5.72347 Running time: 1846 seconds Prediction time horizon (delta) = 30.
IntProgress(value=0, max=34)
Iter: 500 MAPE: 0.102372 RMSE: 4.33869 Iter: 1000 MAPE: 0.102496 RMSE: 4.34821 Imputation MAPE: 0.1023 Imputation RMSE: 4.33981 Prediction MAPE: 0.168842 Prediction RMSE: 6.84068 Running time: 1829 seconds Prediction time horizon (delta) = 36.
IntProgress(value=0, max=28)
Iter: 500 MAPE: 0.102427 RMSE: 4.34408 Iter: 1000 MAPE: 0.102474 RMSE: 4.34794 Imputation MAPE: 0.102382 Imputation RMSE: 4.34278 Prediction MAPE: 0.14108 Prediction RMSE: 5.46894 Running time: 1829 seconds
Scenario setting:
import time
import scipy.io
import numpy as np
np.random.seed(1000)
dense_tensor = scipy.io.loadmat('../datasets/Guangzhou-data-set/tensor.mat')['tensor']
dim = dense_tensor.shape
missing_rate = 0.4 # Random missing (RM)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim[0], dim[1], dim[2]) + 0.5 - missing_rate)
dense_mat = dense_tensor.reshape([dim[0], dim[1] * dim[2]])
sparse_mat = sparse_tensor.reshape([dim[0], dim[1] * dim[2]])
del dense_tensor, sparse_tensor
Model setting:
import time
rank = 10
pred_step = 7 * 144
time_lags = np.array([1, 2, 3, 144, 145, 146, 7 * 144, 7 * 144 + 1, 7 * 144 + 2])
burn_iter = 1000
gibbs_iter = 200
for multi_step in [2, 4, 6, 12, 18, 24, 30, 36]:
start = time.time()
print('Prediction time horizon (delta) = {}.'.format(multi_step))
mat_hat = BTRMF_forecast(dense_mat, sparse_mat, pred_step, multi_step, rank, time_lags, burn_iter, gibbs_iter)
end = time.time()
print('Running time: %d seconds'%(end - start))
print()
Prediction time horizon (delta) = 2.
IntProgress(value=0, max=504)
Iter: 500 MAPE: 0.0986118 RMSE: 4.16984 Iter: 1000 MAPE: 0.0985913 RMSE: 4.17064 Imputation MAPE: 0.0986026 Imputation RMSE: 4.17141 Prediction MAPE: 0.114071 Prediction RMSE: 4.56949 Running time: 3381 seconds Prediction time horizon (delta) = 4.
IntProgress(value=0, max=252)
Iter: 500 MAPE: 0.098629 RMSE: 4.17203 Iter: 1000 MAPE: 0.0986091 RMSE: 4.17289 Imputation MAPE: 0.0986195 Imputation RMSE: 4.17455 Prediction MAPE: 0.116732 Prediction RMSE: 4.66984 Running time: 2550 seconds Prediction time horizon (delta) = 6.
IntProgress(value=0, max=168)
Iter: 500 MAPE: 0.0987062 RMSE: 4.17503 Iter: 1000 MAPE: 0.0987199 RMSE: 4.17563 Imputation MAPE: 0.0987252 Imputation RMSE: 4.17566 Prediction MAPE: 0.124486 Prediction RMSE: 4.9962 Running time: 2276 seconds Prediction time horizon (delta) = 12.
IntProgress(value=0, max=84)
Iter: 500 MAPE: 0.098544 RMSE: 4.16577 Iter: 1000 MAPE: 0.0986339 RMSE: 4.17173 Imputation MAPE: 0.0986256 Imputation RMSE: 4.1715 Prediction MAPE: 0.138936 Prediction RMSE: 5.52781 Running time: 2014 seconds Prediction time horizon (delta) = 18.
IntProgress(value=0, max=56)
Iter: 500 MAPE: 0.0985256 RMSE: 4.16895 Iter: 1000 MAPE: 0.0985908 RMSE: 4.17392 Imputation MAPE: 0.0986125 Imputation RMSE: 4.1749 Prediction MAPE: 0.14166 Prediction RMSE: 5.60551 Running time: 1929 seconds Prediction time horizon (delta) = 24.
IntProgress(value=0, max=42)
Iter: 500 MAPE: 0.0985505 RMSE: 4.16709 Iter: 1000 MAPE: 0.0985932 RMSE: 4.17026 Imputation MAPE: 0.0985927 Imputation RMSE: 4.16987 Prediction MAPE: 0.141169 Prediction RMSE: 5.54439 Running time: 1893 seconds Prediction time horizon (delta) = 30.
IntProgress(value=0, max=34)
Iter: 500 MAPE: 0.0985609 RMSE: 4.16711 Iter: 1000 MAPE: 0.0986802 RMSE: 4.17507 Imputation MAPE: 0.0987116 Imputation RMSE: 4.17666 Prediction MAPE: 0.137951 Prediction RMSE: 5.39165 Running time: 1879 seconds Prediction time horizon (delta) = 36.
IntProgress(value=0, max=28)
Iter: 500 MAPE: 0.0985261 RMSE: 4.16671 Iter: 1000 MAPE: 0.0986298 RMSE: 4.17334 Imputation MAPE: 0.0986251 Imputation RMSE: 4.17341 Prediction MAPE: 0.145925 Prediction RMSE: 5.77936 Running time: 1858 seconds
Scenario setting:
import time
import scipy.io
import numpy as np
np.random.seed(1000)
dense_tensor = scipy.io.loadmat('../datasets/Guangzhou-data-set/tensor.mat')['tensor']
dim = dense_tensor.shape
missing_rate = 0.6 # Random missing (RM)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim[0], dim[1], dim[2]) + 0.5 - missing_rate)
dense_mat = dense_tensor.reshape([dim[0], dim[1] * dim[2]])
sparse_mat = sparse_tensor.reshape([dim[0], dim[1] * dim[2]])
del dense_tensor, sparse_tensor
Model setting:
import time
rank = 10
pred_step = 7 * 144
time_lags = np.array([1, 2, 3, 144, 145, 146, 7 * 144, 7 * 144 + 1, 7 * 144 + 2])
burn_iter = 1000
gibbs_iter = 200
for multi_step in [2, 4, 6, 12, 18, 24, 30, 36]:
start = time.time()
print('Prediction time horizon (delta) = {}.'.format(multi_step))
mat_hat = BTRMF_forecast(dense_mat, sparse_mat, pred_step, multi_step, rank, time_lags, burn_iter, gibbs_iter)
end = time.time()
print('Running time: %d seconds'%(end - start))
print()
Prediction time horizon (delta) = 2.
IntProgress(value=0, max=504)
Iter: 500 MAPE: 0.0997617 RMSE: 4.20035 Iter: 1000 MAPE: 0.0999632 RMSE: 4.21554 Imputation MAPE: 0.100029 Imputation RMSE: 4.21733 Prediction MAPE: 0.116847 Prediction RMSE: 4.66734 Running time: 3349 seconds Prediction time horizon (delta) = 4.
IntProgress(value=0, max=252)
Iter: 500 MAPE: 0.0997897 RMSE: 4.20511 Iter: 1000 MAPE: 0.0999748 RMSE: 4.21601 Imputation MAPE: 0.0999896 Imputation RMSE: 4.21673 Prediction MAPE: 0.122342 Prediction RMSE: 4.82326 Running time: 2558 seconds Prediction time horizon (delta) = 6.
IntProgress(value=0, max=168)
Iter: 500 MAPE: 0.0999046 RMSE: 4.21086 Iter: 1000 MAPE: 0.0999454 RMSE: 4.21481 Imputation MAPE: 0.0999657 Imputation RMSE: 4.21483 Prediction MAPE: 0.126441 Prediction RMSE: 5.00543 Running time: 2323 seconds Prediction time horizon (delta) = 12.
IntProgress(value=0, max=84)
Iter: 500 MAPE: 0.099831 RMSE: 4.20667 Iter: 1000 MAPE: 0.099951 RMSE: 4.21453 Imputation MAPE: 0.0999973 Imputation RMSE: 4.21626 Prediction MAPE: 0.133589 Prediction RMSE: 5.31734 Running time: 2122 seconds Prediction time horizon (delta) = 18.
IntProgress(value=0, max=56)
Iter: 500 MAPE: 0.0998608 RMSE: 4.20897 Iter: 1000 MAPE: 0.0999707 RMSE: 4.21584 Imputation MAPE: 0.0999918 Imputation RMSE: 4.21661 Prediction MAPE: 0.144022 Prediction RMSE: 5.75476 Running time: 2106 seconds Prediction time horizon (delta) = 24.
IntProgress(value=0, max=42)
Iter: 500 MAPE: 0.0997915 RMSE: 4.2022 Iter: 1000 MAPE: 0.0999317 RMSE: 4.21354 Imputation MAPE: 0.0999557 Imputation RMSE: 4.21475 Prediction MAPE: 0.155638 Prediction RMSE: 6.19447 Running time: 1962 seconds Prediction time horizon (delta) = 30.
IntProgress(value=0, max=34)
Iter: 500 MAPE: 0.0997823 RMSE: 4.20168 Iter: 1000 MAPE: 0.0999515 RMSE: 4.21462 Imputation MAPE: 0.0999761 Imputation RMSE: 4.21531 Prediction MAPE: 0.151695 Prediction RMSE: 5.97049 Running time: 1980 seconds Prediction time horizon (delta) = 36.
IntProgress(value=0, max=28)
Iter: 500 MAPE: 0.0998516 RMSE: 4.20345 Iter: 1000 MAPE: 0.0998847 RMSE: 4.21217 Imputation MAPE: 0.0999567 Imputation RMSE: 4.21497 Prediction MAPE: 0.141283 Prediction RMSE: 5.47474 Running time: 1984 seconds
Scenario setting:
import scipy.io
import warnings
warnings.simplefilter('ignore')
tensor = scipy.io.loadmat('../datasets/Hangzhou-data-set/tensor.mat')['tensor']
dense_mat = tensor.reshape([tensor.shape[0], tensor.shape[1] * tensor.shape[2]])
sparse_mat = dense_mat.copy()
Model setting:
import time
rank = 10
pred_step = 7 * 108
time_lags = np.array([1, 2, 3, 108, 109, 110, 7 * 108, 7 * 108 + 1, 7 * 108 + 2])
burn_iter = 1000
gibbs_iter = 200
for multi_step in [2, 4, 6]:
start = time.time()
print('Prediction time horizon (delta) = {}.'.format(multi_step))
mat_hat = BTRMF_forecast(dense_mat, sparse_mat, pred_step, multi_step,
rank, time_lags, burn_iter, gibbs_iter, option = "pca")
end = time.time()
print('Running time: %d seconds'%(end - start))
print()
Prediction time horizon (delta) = 2.
IntProgress(value=0, max=378)
Iter: 500 MAPE: nan RMSE: nan Iter: 1000 MAPE: nan RMSE: nan Imputation MAPE: nan Imputation RMSE: nan Prediction MAPE: 0.256948 Prediction RMSE: 34.1296 Running time: 879 seconds Prediction time horizon (delta) = 4.
IntProgress(value=0, max=189)
Iter: 500 MAPE: nan RMSE: nan Iter: 1000 MAPE: nan RMSE: nan Imputation MAPE: nan Imputation RMSE: nan Prediction MAPE: 0.275644 Prediction RMSE: 36.2119 Running time: 760 seconds Prediction time horizon (delta) = 6.
IntProgress(value=0, max=126)
Iter: 500 MAPE: nan RMSE: nan Iter: 1000 MAPE: nan RMSE: nan Imputation MAPE: nan Imputation RMSE: nan Prediction MAPE: 0.30991 Prediction RMSE: 37.9937 Running time: 725 seconds
Scenario setting:
import time
import scipy.io
import numpy as np
np.random.seed(1000)
dense_tensor = scipy.io.loadmat('../datasets/Hangzhou-data-set/tensor.mat')['tensor']
dim = dense_tensor.shape
missing_rate = 0.4 # Non-random missing (NM)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim[0], dim[1])[:, :, np.newaxis] + 0.5 - missing_rate)
dense_mat = dense_tensor.reshape([dim[0], dim[1] * dim[2]])
sparse_mat = sparse_tensor.reshape([dim[0], dim[1] * dim[2]])
del dense_tensor, sparse_tensor
Model setting:
import time
rank = 10
pred_step = 7 * 108
time_lags = np.array([1, 2, 3, 108, 109, 110, 7 * 108, 7 * 108 + 1, 7 * 108 + 2])
burn_iter = 1000
gibbs_iter = 200
for multi_step in [2, 4, 6]:
start = time.time()
print('Prediction time horizon (delta) = {}.'.format(multi_step))
mat_hat = BTRMF_forecast(dense_mat, sparse_mat, pred_step, multi_step,
rank, time_lags, burn_iter, gibbs_iter, option = "pca")
end = time.time()
print('Running time: %d seconds'%(end - start))
print()
Prediction time horizon (delta) = 2.
IntProgress(value=0, max=378)
Iter: 500 MAPE: 0.267965 RMSE: 57.8098 Iter: 1000 MAPE: 0.276273 RMSE: 63.7867 Imputation MAPE: 0.276884 Imputation RMSE: 64.3905 Prediction MAPE: 0.297843 Prediction RMSE: 42.4882 Running time: 800 seconds Prediction time horizon (delta) = 4.
IntProgress(value=0, max=189)
Iter: 500 MAPE: 0.279664 RMSE: 68.3945 Iter: 1000 MAPE: 0.284004 RMSE: 71.6925 Imputation MAPE: 0.286865 Imputation RMSE: 73.2323 Prediction MAPE: 0.31744 Prediction RMSE: 49.4244 Running time: 755 seconds Prediction time horizon (delta) = 6.
IntProgress(value=0, max=126)
Iter: 500 MAPE: 0.273415 RMSE: 60.3597 Iter: 1000 MAPE: 0.280985 RMSE: 65.6219 Imputation MAPE: 0.283798 Imputation RMSE: 66.1044 Prediction MAPE: 0.396821 Prediction RMSE: 50.4606 Running time: 726 seconds
Scenario setting:
import time
import scipy.io
import numpy as np
np.random.seed(1000)
dense_tensor = scipy.io.loadmat('../datasets/Hangzhou-data-set/tensor.mat')['tensor']
dim = dense_tensor.shape
missing_rate = 0.4 # Random missing (RM)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim[0], dim[1], dim[2]) + 0.5 - missing_rate)
dense_mat = dense_tensor.reshape([dim[0], dim[1] * dim[2]])
sparse_mat = sparse_tensor.reshape([dim[0], dim[1] * dim[2]])
del dense_tensor, sparse_tensor
Model setting:
import time
rank = 10
pred_step = 7 * 108
time_lags = np.array([1, 2, 3, 108, 109, 110, 7 * 108, 7 * 108 + 1, 7 * 108 + 2])
burn_iter = 1000
gibbs_iter = 200
for multi_step in [2, 4, 6]:
start = time.time()
print('Prediction time horizon (delta) = {}.'.format(multi_step))
mat_hat = BTRMF_forecast(dense_mat, sparse_mat, pred_step, multi_step,
rank, time_lags, burn_iter, gibbs_iter, option = "pca")
end = time.time()
print('Running time: %d seconds'%(end - start))
print()
Prediction time horizon (delta) = 2.
IntProgress(value=0, max=378)
Iter: 500 MAPE: 0.246572 RMSE: 50.0399 Iter: 1000 MAPE: 0.248285 RMSE: 49.7568 Imputation MAPE: 0.250604 Imputation RMSE: 49.4909 Prediction MAPE: 0.300019 Prediction RMSE: 39.9774 Running time: 865 seconds Prediction time horizon (delta) = 4.
IntProgress(value=0, max=189)
Iter: 500 MAPE: 0.245024 RMSE: 53.0131 Iter: 1000 MAPE: 0.249384 RMSE: 50.2703 Imputation MAPE: 0.250927 Imputation RMSE: 49.6021 Prediction MAPE: 0.297699 Prediction RMSE: 41.9483 Running time: 780 seconds Prediction time horizon (delta) = 6.
IntProgress(value=0, max=126)
Iter: 500 MAPE: 0.246093 RMSE: 53.963 Iter: 1000 MAPE: 0.248304 RMSE: 50.3108 Imputation MAPE: 0.250369 Imputation RMSE: 49.4471 Prediction MAPE: 0.302486 Prediction RMSE: 42.4757 Running time: 805 seconds
Scenario setting:
import time
import scipy.io
import numpy as np
np.random.seed(1000)
dense_tensor = scipy.io.loadmat('../datasets/Hangzhou-data-set/tensor.mat')['tensor']
dim = dense_tensor.shape
missing_rate = 0.6 # Random missing (RM)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim[0], dim[1], dim[2]) + 0.5 - missing_rate)
dense_mat = dense_tensor.reshape([dim[0], dim[1] * dim[2]])
sparse_mat = sparse_tensor.reshape([dim[0], dim[1] * dim[2]])
del dense_tensor, sparse_tensor
Model setting:
import time
rank = 10
pred_step = 7 * 108
time_lags = np.array([1, 2, 3, 108, 109, 110, 7 * 108, 7 * 108 + 1, 7 * 108 + 2])
burn_iter = 1000
gibbs_iter = 200
for multi_step in [2, 4, 6]:
start = time.time()
print('Prediction time horizon (delta) = {}.'.format(multi_step))
mat_hat = BTRMF_forecast(dense_mat, sparse_mat, pred_step, multi_step,
rank, time_lags, burn_iter, gibbs_iter, option = "pca")
end = time.time()
print('Running time: %d seconds'%(end - start))
print()
Prediction time horizon (delta) = 2.
IntProgress(value=0, max=378)
Iter: 500 MAPE: 0.260285 RMSE: 58.4876 Iter: 1000 MAPE: 0.262584 RMSE: 50.5769 Imputation MAPE: 0.267052 Imputation RMSE: 49.0025 Prediction MAPE: 0.304186 Prediction RMSE: 43.1569 Running time: 955 seconds Prediction time horizon (delta) = 4.
IntProgress(value=0, max=189)
Iter: 500 MAPE: 0.259674 RMSE: 59.3179 Iter: 1000 MAPE: 0.261667 RMSE: 50.0329 Imputation MAPE: 0.26593 Imputation RMSE: 48.7262 Prediction MAPE: 0.295914 Prediction RMSE: 43.8736 Running time: 870 seconds Prediction time horizon (delta) = 6.
IntProgress(value=0, max=126)
Iter: 500 MAPE: 0.255708 RMSE: 58.5246 Iter: 1000 MAPE: 0.261553 RMSE: 49.4641 Imputation MAPE: 0.265269 Imputation RMSE: 48.7999 Prediction MAPE: 0.317446 Prediction RMSE: 44.5101 Running time: 868 seconds
Scenario setting:
import pandas as pd
import warnings
warnings.simplefilter('ignore')
dense_mat = pd.read_csv('../datasets/Seattle-data-set/mat.csv', index_col = 0)
dense_mat = dense_mat.values
sparse_mat = dense_mat.copy()
Model setting:
import time
rank = 10
pred_step = 7 * 288
time_lags = np.array([1, 2, 3, 288, 289, 290, 7 * 288, 7 * 288 + 1, 7 * 288 + 2])
burn_iter = 1000
gibbs_iter = 200
for multi_step in [2, 4, 6]:
start = time.time()
print('Prediction time horizon (delta) = {}.'.format(multi_step))
mat_hat = BTRMF_forecast(dense_mat, sparse_mat, pred_step, multi_step, rank, time_lags, burn_iter, gibbs_iter)
end = time.time()
print('Running time: %d seconds'%(end - start))
print()
Prediction time horizon (delta) = 2.
IntProgress(value=0, max=1008)
Iter: 500 MAPE: nan RMSE: nan Iter: 1000 MAPE: nan RMSE: nan Imputation MAPE: nan Imputation RMSE: nan Prediction MAPE: 0.114426 Prediction RMSE: 6.21236 Running time: 4595 seconds Prediction time horizon (delta) = 4.
IntProgress(value=0, max=504)
Iter: 500 MAPE: nan RMSE: nan Iter: 1000 MAPE: nan RMSE: nan Imputation MAPE: nan Imputation RMSE: nan Prediction MAPE: 0.123047 Prediction RMSE: 6.60276 Running time: 3132 seconds Prediction time horizon (delta) = 6.
IntProgress(value=0, max=336)
Iter: 500 MAPE: nan RMSE: nan Iter: 1000 MAPE: nan RMSE: nan Imputation MAPE: nan Imputation RMSE: nan Prediction MAPE: 0.129698 Prediction RMSE: 6.9967 Running time: 2612 seconds
Scenario setting:
import time
import scipy.io
import numpy as np
np.random.seed(1000)
dense_tensor = scipy.io.loadmat('../datasets/Seattle-data-set/tensor.npz')['arr_0']
dim = dense_tensor.shape
missing_rate = 0.4 # Non-random missing (NM)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim[0], dim[1])[:, :, np.newaxis] + 0.5 - missing_rate)
dense_mat = dense_tensor.reshape([dim[0], dim[1] * dim[2]])
sparse_mat = sparse_tensor.reshape([dim[0], dim[1] * dim[2]])
del dense_tensor, sparse_tensor
Model setting:
import time
rank = 10
pred_step = 7 * 288
time_lags = np.array([1, 2, 3, 288, 289, 290, 7 * 288, 7 * 288 + 1, 7 * 288 + 2])
burn_iter = 1000
gibbs_iter = 200
for multi_step in [2, 4, 6]:
start = time.time()
print('Prediction time horizon (delta) = {}.'.format(multi_step))
mat_hat = BTRMF_forecast(dense_mat, sparse_mat, pred_step, multi_step, rank, time_lags, burn_iter, gibbs_iter)
end = time.time()
print('Running time: %d seconds'%(end - start))
print()
Prediction time horizon (delta) = 2.
IntProgress(value=0, max=1008)
Iter: 500 MAPE: 0.0931481 RMSE: 5.42652 Iter: 1000 MAPE: 0.0933758 RMSE: 5.44179 Imputation MAPE: 0.0933853 Imputation RMSE: 5.44118 Prediction MAPE: 0.116625 Prediction RMSE: 6.3467 Running time: 4386 seconds Prediction time horizon (delta) = 4.
IntProgress(value=0, max=504)
Iter: 500 MAPE: 0.0931066 RMSE: 5.42514 Iter: 1000 MAPE: 0.0933132 RMSE: 5.43919 Imputation MAPE: 0.0933625 Imputation RMSE: 5.4392 Prediction MAPE: 0.125364 Prediction RMSE: 6.72899 Running time: 3000 seconds Prediction time horizon (delta) = 6.
IntProgress(value=0, max=336)
Iter: 500 MAPE: 0.0933031 RMSE: 5.43544 Iter: 1000 MAPE: 0.0932804 RMSE: 5.43644 Imputation MAPE: 0.0933198 Imputation RMSE: 5.43864 Prediction MAPE: 0.13381 Prediction RMSE: 7.28785 Running time: 2550 seconds
Scenario setting:
import time
import scipy.io
import numpy as np
np.random.seed(1000)
dense_tensor = scipy.io.loadmat('../datasets/Seattle-data-set/tensor.npz')['arr_0']
dim = dense_tensor.shape
missing_rate = 0.4 # Random missing (RM)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim[0], dim[1], dim[2]) + 0.5 - missing_rate)
dense_mat = dense_tensor.reshape([dim[0], dim[1] * dim[2]])
sparse_mat = sparse_tensor.reshape([dim[0], dim[1] * dim[2]])
del dense_tensor, sparse_tensor
Model setting:
import time
rank = 10
pred_step = 7 * 288
time_lags = np.array([1, 2, 3, 288, 289, 290, 7 * 288, 7 * 288 + 1, 7 * 288 + 2])
burn_iter = 1000
gibbs_iter = 200
for multi_step in [2, 4, 6]:
start = time.time()
print('Prediction time horizon (delta) = {}.'.format(multi_step))
mat_hat = BTRMF_forecast(dense_mat, sparse_mat, pred_step, multi_step, rank, time_lags, burn_iter, gibbs_iter)
end = time.time()
print('Running time: %d seconds'%(end - start))
print()
Prediction time horizon (delta) = 2.
IntProgress(value=0, max=1008)
Iter: 500 MAPE: 0.0859771 RMSE: 5.02476 Iter: 1000 MAPE: 0.0859844 RMSE: 5.02598 Imputation MAPE: 0.0859936 Imputation RMSE: 5.02656 Prediction MAPE: 0.115828 Prediction RMSE: 6.25326 Running time: 6009 seconds Prediction time horizon (delta) = 4.
IntProgress(value=0, max=504)
Iter: 500 MAPE: 0.0859089 RMSE: 5.01999 Iter: 1000 MAPE: 0.0859834 RMSE: 5.02593 Imputation MAPE: 0.0859932 Imputation RMSE: 5.02661 Prediction MAPE: 0.123386 Prediction RMSE: 6.623 Running time: 3760 seconds Prediction time horizon (delta) = 6.
IntProgress(value=0, max=336)
Iter: 500 MAPE: 0.0859226 RMSE: 5.02037 Iter: 1000 MAPE: 0.0859758 RMSE: 5.02599 Imputation MAPE: 0.0859924 Imputation RMSE: 5.02637 Prediction MAPE: 0.1279 Prediction RMSE: 6.93219 Running time: 3019 seconds
Scenario setting:
import time
import scipy.io
import numpy as np
np.random.seed(1000)
dense_tensor = scipy.io.loadmat('../datasets/Seattle-data-set/tensor.npz')['arr_0']
dim = dense_tensor.shape
missing_rate = 0.6 # Random missing (RM)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim[0], dim[1], dim[2]) + 0.5 - missing_rate)
dense_mat = dense_tensor.reshape([dim[0], dim[1] * dim[2]])
sparse_mat = sparse_tensor.reshape([dim[0], dim[1] * dim[2]])
del dense_tensor, sparse_tensor
Model setting:
import time
rank = 10
pred_step = 7 * 288
time_lags = np.array([1, 2, 3, 288, 289, 290, 7 * 288, 7 * 288 + 1, 7 * 288 + 2])
burn_iter = 1000
gibbs_iter = 200
for multi_step in [2, 4, 6]:
start = time.time()
print('Prediction time horizon (delta) = {}.'.format(multi_step))
mat_hat = BTRMF_forecast(dense_mat, sparse_mat, pred_step, multi_step, rank, time_lags, burn_iter, gibbs_iter)
end = time.time()
print('Running time: %d seconds'%(end - start))
print()
Prediction time horizon (delta) = 2.
IntProgress(value=0, max=1008)
Iter: 500 MAPE: 0.0870086 RMSE: 5.07269 Iter: 1000 MAPE: 0.0870044 RMSE: 5.0746 Imputation MAPE: 0.0870081 Imputation RMSE: 5.07459 Prediction MAPE: 0.11581 Prediction RMSE: 6.24139 Running time: 5893 seconds Prediction time horizon (delta) = 4.
IntProgress(value=0, max=504)
Iter: 500 MAPE: 0.0871143 RMSE: 5.07511 Iter: 1000 MAPE: 0.087012 RMSE: 5.0741 Imputation MAPE: 0.0870252 Imputation RMSE: 5.07454 Prediction MAPE: 0.123637 Prediction RMSE: 6.61941 Running time: 3753 seconds Prediction time horizon (delta) = 6.
IntProgress(value=0, max=336)
Iter: 500 MAPE: 0.0870185 RMSE: 5.07136 Iter: 1000 MAPE: 0.0870013 RMSE: 5.07389 Imputation MAPE: 0.0870315 Imputation RMSE: 5.07461 Prediction MAPE: 0.129897 Prediction RMSE: 6.98542 Running time: 3045 seconds
London movement speed data set is is a city-wide hourly traffic speeddataset collected in London.
Observation rate | $>90\%$ | $>80\%$ | $>70\%$ | $>60\%$ | $>50\%$ |
---|---|---|---|---|---|
Number of roads | 17,666 | 27,148 | 35,912 | 44,352 | 52,727 |
If want to test on the full dataset, you could consider the following setting for masking observations as missing values.
import numpy as np
np.random.seed(1000)
mask_rate = 0.20
dense_mat = np.load('../datasets/London-data-set/hourly_speed_mat.npy')
pos_obs = np.where(dense_mat != 0)
num = len(pos_obs[0])
sample_ind = np.random.choice(num, size = int(mask_rate * num), replace = False)
sparse_mat = dense_mat.copy()
sparse_mat[pos_obs[0][sample_ind], pos_obs[1][sample_ind]] = 0
Notably, you could also consider to evaluate the model on a subset of the data with the following setting.
import numpy as np
import warnings
warnings.simplefilter('ignore')
dense_mat = np.load('../datasets/London-data-set/hourly_speed_mat.npy')
binary_mat = dense_mat.copy()
binary_mat[binary_mat != 0] = 1
pos = np.where(np.sum(binary_mat, axis = 1) > 0.7 * binary_mat.shape[1])
dense_mat = dense_mat[pos[0], :]
sparse_mat = dense_mat.copy()
Model setting:
import time
rank = 10
pred_step = 7 * 24
time_lags = np.array([1, 2, 3, 24, 25, 26, 7 * 24, 7 * 24 + 1, 7 * 24 + 2])
burn_iter = 1000
gibbs_iter = 200
for multi_step in [2, 4, 6, 7, 8, 9, 10, 11]:
start = time.time()
print('Prediction time horizon (delta) = {}.'.format(multi_step))
mat_hat = BTRMF_forecast(dense_mat, sparse_mat, pred_step, multi_step, rank, time_lags, burn_iter, gibbs_iter)
end = time.time()
print('Running time: %d seconds'%(end - start))
print()
import numpy as np
np.random.seed(1000)
missing_rate = 0.4
dense_mat = np.load('../datasets/London-data-set/hourly_speed_mat.npy')
binary_mat = dense_mat.copy()
binary_mat[binary_mat != 0] = 1
pos = np.where(np.sum(binary_mat, axis = 1) > 0.7 * binary_mat.shape[1])
dense_mat = dense_mat[pos[0], :]
## Non-random missing (NM)
binary_mat = np.zeros(dense_mat.shape)
random_mat = np.random.rand(dense_mat.shape[0], 30)
for i1 in range(dense_mat.shape[0]):
for i2 in range(30):
binary_mat[i1, i2 * 24 : (i2 + 1) * 24] = np.round(random_mat[i1, i2] + 0.5 - missing_rate)
sparse_mat = np.multiply(dense_mat, binary_mat)
Model setting:
import time
rank = 10
pred_step = 7 * 24
time_lags = np.array([1, 2, 3, 24, 25, 26, 7 * 24, 7 * 24 + 1, 7 * 24 + 2])
burn_iter = 1000
gibbs_iter = 200
for multi_step in [2, 4, 6]:
start = time.time()
print('Prediction time horizon (delta) = {}.'.format(multi_step))
mat_hat = BTRMF_forecast(dense_mat, sparse_mat, pred_step, multi_step, rank, time_lags, burn_iter, gibbs_iter)
end = time.time()
print('Running time: %d seconds'%(end - start))
print()
Prediction time horizon (delta) = 2.
IntProgress(value=0, max=84)
Iter: 500 MAPE: 0.0955888 RMSE: 2.36944 Iter: 1000 MAPE: 0.0956103 RMSE: 2.36944 Imputation MAPE: 0.095598 Imputation RMSE: 2.36937 Prediction MAPE: 0.137952 Prediction RMSE: 3.47224 Running time: 9472 seconds Prediction time horizon (delta) = 4.
IntProgress(value=0, max=42)
Iter: 500 MAPE: 0.0955952 RMSE: 2.36972 Iter: 1000 MAPE: 0.0956006 RMSE: 2.36943 Imputation MAPE: 0.095602 Imputation RMSE: 2.36952 Prediction MAPE: 0.14437 Prediction RMSE: 3.64513 Running time: 6738 seconds Prediction time horizon (delta) = 6.
IntProgress(value=0, max=28)
Iter: 500 MAPE: 0.0955561 RMSE: 2.36944 Iter: 1000 MAPE: 0.0956025 RMSE: 2.36941 Imputation MAPE: 0.0956034 Imputation RMSE: 2.36941 Prediction MAPE: 0.175426 Prediction RMSE: 4.39519 Running time: 6254 seconds
import numpy as np
np.random.seed(1000)
missing_rate = 0.4
dense_mat = np.load('../datasets/London-data-set/hourly_speed_mat.npy')
binary_mat = dense_mat.copy()
binary_mat[binary_mat != 0] = 1
pos = np.where(np.sum(binary_mat, axis = 1) > 0.7 * binary_mat.shape[1])
dense_mat = dense_mat[pos[0], :]
## Random missing (RM)
random_mat = np.random.rand(dense_mat.shape[0], dense_mat.shape[1])
binary_mat = np.round(random_mat + 0.5 - missing_rate)
sparse_mat = np.multiply(dense_mat, binary_mat)
Model setting:
import time
rank = 10
pred_step = 7 * 24
time_lags = np.array([1, 2, 3, 24, 25, 26, 7 * 24, 7 * 24 + 1, 7 * 24 + 2])
burn_iter = 1000
gibbs_iter = 200
for multi_step in [2, 4, 6]:
start = time.time()
print('Prediction time horizon (delta) = {}.'.format(multi_step))
mat_hat = BTRMF_forecast(dense_mat, sparse_mat, pred_step, multi_step, rank, time_lags, burn_iter, gibbs_iter)
end = time.time()
print('Running time: %d seconds'%(end - start))
print()
Prediction time horizon (delta) = 2.
IntProgress(value=0, max=84)
Iter: 500 MAPE: 0.0935143 RMSE: 2.31653 Iter: 1000 MAPE: 0.0935035 RMSE: 2.31588 Imputation MAPE: 0.0935132 Imputation RMSE: 2.31602 Prediction MAPE: 0.122467 Prediction RMSE: 3.02785 Running time: 10192 seconds Prediction time horizon (delta) = 4.
IntProgress(value=0, max=42)
Iter: 500 MAPE: 0.0935298 RMSE: 2.31641 Iter: 1000 MAPE: 0.0935082 RMSE: 2.31601 Imputation MAPE: 0.0935157 Imputation RMSE: 2.31607 Prediction MAPE: 0.123548 Prediction RMSE: 2.98486 Running time: 7082 seconds Prediction time horizon (delta) = 6.
IntProgress(value=0, max=28)
Iter: 500 MAPE: 0.0935159 RMSE: 2.3161 Iter: 1000 MAPE: 0.0935057 RMSE: 2.31564 Imputation MAPE: 0.093524 Imputation RMSE: 2.31593 Prediction MAPE: 0.156798 Prediction RMSE: 4.02945 Running time: 6705 seconds
import numpy as np
np.random.seed(1000)
missing_rate = 0.6
dense_mat = np.load('../datasets/London-data-set/hourly_speed_mat.npy')
binary_mat = dense_mat.copy()
binary_mat[binary_mat != 0] = 1
pos = np.where(np.sum(binary_mat, axis = 1) > 0.7 * binary_mat.shape[1])
dense_mat = dense_mat[pos[0], :]
## Random missing (RM)
random_mat = np.random.rand(dense_mat.shape[0], dense_mat.shape[1])
binary_mat = np.round(random_mat + 0.5 - missing_rate)
sparse_mat = np.multiply(dense_mat, binary_mat)
Model setting:
import time
rank = 10
pred_step = 7 * 24
time_lags = np.array([1, 2, 3, 24, 25, 26, 7 * 24, 7 * 24 + 1, 7 * 24 + 2])
burn_iter = 1000
gibbs_iter = 200
for multi_step in [2, 4, 6]:
start = time.time()
print('Prediction time horizon (delta) = {}.'.format(multi_step))
mat_hat = BTRMF_forecast(dense_mat, sparse_mat, pred_step, multi_step, rank, time_lags, burn_iter, gibbs_iter)
end = time.time()
print('Running time: %d seconds'%(end - start))
print()
Prediction time horizon (delta) = 2.
IntProgress(value=0, max=84)
Iter: 500 MAPE: 0.094878 RMSE: 2.34841 Iter: 1000 MAPE: 0.0948556 RMSE: 2.34754 Imputation MAPE: 0.0948647 Imputation RMSE: 2.34786 Prediction MAPE: 0.124208 Prediction RMSE: 3.01062 Running time: 9733 seconds Prediction time horizon (delta) = 4.
IntProgress(value=0, max=42)
Iter: 500 MAPE: 0.0948692 RMSE: 2.34832 Iter: 1000 MAPE: 0.0948588 RMSE: 2.34752 Imputation MAPE: 0.094871 Imputation RMSE: 2.34774 Prediction MAPE: 0.132128 Prediction RMSE: 3.24769 Running time: 7269 seconds Prediction time horizon (delta) = 6.
IntProgress(value=0, max=28)
Iter: 500 MAPE: 0.0948599 RMSE: 2.34873 Iter: 1000 MAPE: 0.094844 RMSE: 2.3475 Imputation MAPE: 0.094857 Imputation RMSE: 2.34781 Prediction MAPE: 0.148669 Prediction RMSE: 3.7011 Running time: 6951 seconds