#!/usr/bin/env python # coding: utf-8 # # About this Notebook # # Bayesian temporal matrix factorization is a type of Bayesian matrix factorization that achieves state-of-the-art results on challenging imputation and prediction problems. In the following, we will discuss: # # - What the proposed Bayesian temporal matrix factorization (BTMF for short) is? # # - How to implement BTMF mainly using Python Numpy with high efficiency? # # - How to develop a spatiotemporal prediction model by adapting BTMF? # # - How to make predictions with real-world spatiotemporal datasets? # # If you want to understand what is BTMF and its modeling tricks in detail, our paper is for you: # # > Xinyu Chen, Lijun Sun (2019). **Bayesian temporal factorization for multidimensional time series prediction**. # # ## Quick Run # # This notebook is publicly available for any usage at our data imputation project. Please click [**transdim**](https://github.com/xinychen/transdim). # # In[1]: import numpy as np from numpy.random import multivariate_normal as mvnrnd from scipy.stats import wishart from numpy.linalg import inv as inv # # Part 1: Matrix Computation Concepts # # ## 1) Kronecker product # # - **Definition**: # # Given two matrices $A\in\mathbb{R}^{m_1\times n_1}$ and $B\in\mathbb{R}^{m_2\times n_2}$, then, the **Kronecker product** between these two matrices is defined as # # $$A\otimes B=\left[ \begin{array}{cccc} a_{11}B & a_{12}B & \cdots & a_{1m_2}B \\ a_{21}B & a_{22}B & \cdots & a_{2m_2}B \\ \vdots & \vdots & \ddots & \vdots \\ a_{m_11}B & a_{m_12}B & \cdots & a_{m_1m_2}B \\ \end{array} \right]$$ # where the symbol $\otimes$ denotes Kronecker product, and the size of resulted $A\otimes B$ is $(m_1m_2)\times (n_1n_2)$ (i.e., $m_1\times m_2$ columns and $n_1\times n_2$ rows). # # - **Example**: # # If $A=\left[ \begin{array}{cc} 1 & 2 \\ 3 & 4 \\ \end{array} \right]$ and $B=\left[ \begin{array}{ccc} 5 & 6 & 7\\ 8 & 9 & 10 \\ \end{array} \right]$, then, we have # # $$A\otimes B=\left[ \begin{array}{cc} 1\times \left[ \begin{array}{ccc} 5 & 6 & 7\\ 8 & 9 & 10\\ \end{array} \right] & 2\times \left[ \begin{array}{ccc} 5 & 6 & 7\\ 8 & 9 & 10\\ \end{array} \right] \\ 3\times \left[ \begin{array}{ccc} 5 & 6 & 7\\ 8 & 9 & 10\\ \end{array} \right] & 4\times \left[ \begin{array}{ccc} 5 & 6 & 7\\ 8 & 9 & 10\\ \end{array} \right] \\ \end{array} \right]$$ # # $$=\left[ \begin{array}{cccccc} 5 & 6 & 7 & 10 & 12 & 14 \\ 8 & 9 & 10 & 16 & 18 & 20 \\ 15 & 18 & 21 & 20 & 24 & 28 \\ 24 & 27 & 30 & 32 & 36 & 40 \\ \end{array} \right]\in\mathbb{R}^{4\times 6}.$$ # # ## 2) Khatri-Rao product (kr_prod) # # - **Definition**: # # Given two matrices $A=\left( \boldsymbol{a}_1,\boldsymbol{a}_2,...,\boldsymbol{a}_r \right)\in\mathbb{R}^{m\times r}$ and $B=\left( \boldsymbol{b}_1,\boldsymbol{b}_2,...,\boldsymbol{b}_r \right)\in\mathbb{R}^{n\times r}$ with same number of columns, then, the **Khatri-Rao product** (or **column-wise Kronecker product**) between $A$ and $B$ is given as follows, # # $$A\odot B=\left( \boldsymbol{a}_1\otimes \boldsymbol{b}_1,\boldsymbol{a}_2\otimes \boldsymbol{b}_2,...,\boldsymbol{a}_r\otimes \boldsymbol{b}_r \right)\in\mathbb{R}^{(mn)\times r}$$ # where the symbol $\odot$ denotes Khatri-Rao product, and $\otimes$ denotes Kronecker product. # # - **Example**: # # If $A=\left[ \begin{array}{cc} 1 & 2 \\ 3 & 4 \\ \end{array} \right]=\left( \boldsymbol{a}_1,\boldsymbol{a}_2 \right)$ and $B=\left[ \begin{array}{cc} 5 & 6 \\ 7 & 8 \\ 9 & 10 \\ \end{array} \right]=\left( \boldsymbol{b}_1,\boldsymbol{b}_2 \right)$, then, we have # # $$A\odot B=\left( \boldsymbol{a}_1\otimes \boldsymbol{b}_1,\boldsymbol{a}_2\otimes \boldsymbol{b}_2 \right)$$ # # $$=\left[ \begin{array}{cc} \left[ \begin{array}{c} 1 \\ 3 \\ \end{array} \right]\otimes \left[ \begin{array}{c} 5 \\ 7 \\ 9 \\ \end{array} \right] & \left[ \begin{array}{c} 2 \\ 4 \\ \end{array} \right]\otimes \left[ \begin{array}{c} 6 \\ 8 \\ 10 \\ \end{array} \right] \\ \end{array} \right]$$ # # $$=\left[ \begin{array}{cc} 5 & 12 \\ 7 & 16 \\ 9 & 20 \\ 15 & 24 \\ 21 & 32 \\ 27 & 40 \\ \end{array} \right]\in\mathbb{R}^{6\times 2}.$$ # In[2]: def kr_prod(a, b): return np.einsum('ir, jr -> ijr', a, b).reshape(a.shape[0] * b.shape[0], -1) # In[3]: A = np.array([[1, 2], [3, 4]]) B = np.array([[5, 6], [7, 8], [9, 10]]) print(kr_prod(A, B)) # ## 3) Computing Covariance Matrix (cov_mat) # # For any matrix $X\in\mathbb{R}^{m\times n}$, cov_mat can return a $n\times n$ covariance matrix for special use in the following. # In[4]: def cov_mat(mat): dim1, dim2 = mat.shape new_mat = np.zeros((dim2, dim2)) mat_bar = np.mean(mat, axis = 0) for i in range(dim1): new_mat += np.einsum('i, j -> ij', mat[i, :] - mat_bar, mat[i, :] - mat_bar) return new_mat # # Part 2: Bayesian Temporal Matrix Factorization (BTMF) # In[5]: def BTMF(dense_mat, sparse_mat, init, rank, time_lags, maxiter1, maxiter2): """Bayesian Temporal Matrix Factorization, BTMF.""" W = init["W"] X = init["X"] theta = init["theta"] d = theta.shape[0] dim1, dim2 = sparse_mat.shape pos = np.where((dense_mat != 0) & (sparse_mat == 0)) position = np.where(sparse_mat != 0) binary_mat = np.zeros((dim1, dim2)) binary_mat[position] = 1 beta0 = 1 nu0 = rank mu0 = np.zeros((rank)) W0 = np.eye(rank) tau = 1 alpha = 1e-6 beta = 1e-6 mat_hat = np.zeros((dim1, dim2 + 1)) W_plus = np.zeros((dim1, rank)) X_plus = np.zeros((dim2, rank)) X_new = np.zeros((dim2 + 1, rank)) X_new_plus = np.zeros((dim2 + 1, rank)) theta_plus = np.zeros((d, rank)) mat_hat_plus = np.zeros((dim1, dim2 + 1)) for iters in range(maxiter1): W_bar = np.mean(W, axis = 0) var_mu_hyper = (dim1 * W_bar + beta0 * mu0)/(dim1 + beta0) var_W_hyper = inv(inv(W0) + cov_mat(W) + dim1 * beta0/(dim1 + beta0) * np.outer(W_bar - mu0, W_bar - mu0)) var_W_hyper = (var_W_hyper + var_W_hyper.T)/2 var_Lambda_hyper = wishart(df = dim1 + nu0, scale = var_W_hyper, seed = None).rvs() var_mu_hyper = mvnrnd(var_mu_hyper, inv((dim1 + beta0) * var_Lambda_hyper)) var1 = X.T var2 = kr_prod(var1, var1) var3 = (tau * np.matmul(var2, binary_mat.T).reshape([rank, rank, dim1]) + np.dstack([var_Lambda_hyper] * dim1)) var4 = (tau * np.matmul(var1, sparse_mat.T) + np.dstack([np.matmul(var_Lambda_hyper, var_mu_hyper)] * dim1)[0, :, :]) for i in range(dim1): var_Lambda = var3[:, :, i] inv_var_Lambda = inv((var_Lambda + var_Lambda.T)/2) var_mu = np.matmul(inv_var_Lambda, var4[:, i]) W[i, :] = mvnrnd(var_mu, inv_var_Lambda) if iters + 1 > maxiter1 - maxiter2: W_plus += W mat0 = X[0 : np.max(time_lags), :] mat = np.matmul(mat0.T, mat0) new_mat = np.zeros((dim2 - np.max(time_lags), rank)) for t in range(dim2 - np.max(time_lags)): new_mat[t, :] = (X[t + np.max(time_lags), :] - np.einsum('ij, ij -> j', theta, X[t + np.max(time_lags) - time_lags, :])) mat += np.matmul(new_mat.T, new_mat) var_W_hyper = inv(inv(W0) + mat) var_W_hyper = (var_W_hyper + var_W_hyper.T)/2 Lambda_x = wishart(df = dim2 + nu0, scale = var_W_hyper, seed = None).rvs() var1 = W.T var2 = kr_prod(var1, var1) var3 = tau * np.matmul(var2, binary_mat).reshape([rank, rank, dim2]) + np.dstack([Lambda_x] * dim2) var4 = tau * np.matmul(var1, sparse_mat) for t in range(dim2): Mt = np.zeros((rank, rank)) Nt = np.zeros(rank) if t < np.max(time_lags): Qt = np.zeros(rank) else: Qt = np.matmul(Lambda_x, np.einsum('ij, ij -> j', theta, X[t - time_lags, :])) if t < dim2 - np.min(time_lags): if t >= np.max(time_lags) and t < dim2 - np.max(time_lags): index = list(range(0, d)) else: index = list(np.where((t + time_lags >= np.max(time_lags)) & (t + time_lags < dim2)))[0] for k in index: Ak = theta[k, :] Mt += np.multiply(np.outer(Ak, Ak), Lambda_x) theta0 = theta.copy() theta0[k, :] = 0 var5 = (X[t + time_lags[k], :] - np.einsum('ij, ij -> j', theta0, X[t + time_lags[k] - time_lags, :])) Nt += np.matmul(np.matmul(np.diag(Ak), Lambda_x), var5) var_mu = var4[:, t] + Nt + Qt var_Lambda = var3[:, :, t] + Mt inv_var_Lambda = inv((var_Lambda + var_Lambda.T)/2) X[t, :] = mvnrnd(np.matmul(inv_var_Lambda, var_mu), inv_var_Lambda) if iters + 1 > maxiter1 - maxiter2: X_plus += X mat_hat = np.matmul(W, X.T) if iters + 1 > maxiter1 - maxiter2: X_new[0 : dim2, :] = X.copy() X_new[dim2, :] = np.einsum('ij, ij -> j', theta, X_new[dim2 - time_lags, :]) X_new_plus += X_new mat_hat_plus += np.matmul(W, X_new.T) rmse = np.sqrt(np.sum((dense_mat[pos] - mat_hat[pos]) ** 2)/dense_mat[pos].shape[0]) var_alpha = alpha + 0.5 * sparse_mat[position].shape[0] error = sparse_mat - mat_hat var_beta = beta + 0.5 * np.sum(error[position] ** 2) tau = np.random.gamma(var_alpha, 1/var_beta) theta_bar = np.mean(theta, axis = 0) var_mu_hyper = (d * theta_bar + beta0 * mu0)/(d + beta0) var_W_hyper = inv(inv(W0) + cov_mat(theta) + d * beta0/(d + beta0) * np.outer(theta_bar - mu0, theta_bar - mu0)) var_W_hyper = (var_W_hyper + var_W_hyper.T)/2 var_Lambda_hyper = wishart(df = d + nu0, scale = var_W_hyper, seed = None).rvs() var_mu_hyper = mvnrnd(var_mu_hyper, inv((d + beta0) * var_Lambda_hyper)) for k in range(d): theta0 = theta.copy() theta0[k, :] = 0 mat0 = np.zeros((dim2 - np.max(time_lags), rank)) for L in range(d): mat0 += np.matmul(X[np.max(time_lags) - time_lags[L] : dim2 - time_lags[L], :], np.diag(theta0[L, :])) VarPi = X[np.max(time_lags) : dim2, :] - mat0 var1 = np.zeros((rank, rank)) var2 = np.zeros(rank) for t in range(np.max(time_lags), dim2): B = X[t - time_lags[k], :] var1 += np.multiply(np.outer(B, B), Lambda_x) var2 += np.matmul(np.matmul(np.diag(B), Lambda_x), VarPi[t - np.max(time_lags), :]) var_Lambda = var1 + var_Lambda_hyper inv_var_Lambda = inv((var_Lambda + var_Lambda.T)/2) var_mu = np.matmul(inv_var_Lambda, var2 + np.matmul(var_Lambda_hyper, var_mu_hyper)) theta[k, :] = mvnrnd(var_mu, inv_var_Lambda) if iters + 1 > maxiter1 - maxiter2: theta_plus += theta if (iters + 1) % 200 == 0 and iters < maxiter1 - maxiter2: print('Iter: {}'.format(iters + 1)) print('RMSE: {:.6}'.format(rmse)) print() W = W_plus/maxiter2 X = X_plus/maxiter2 X_new = X_new_plus/maxiter2 theta = theta_plus/maxiter2 mat_hat = mat_hat_plus/maxiter2 if maxiter1 >= 100: final_mape = np.sum(np.abs(dense_mat[pos] - mat_hat[pos])/dense_mat[pos])/dense_mat[pos].shape[0] final_rmse = np.sqrt(np.sum((dense_mat[pos] - mat_hat[pos]) ** 2)/dense_mat[pos].shape[0]) print('Imputation MAPE: {:.6}'.format(final_mape)) print('Imputation RMSE: {:.6}'.format(final_rmse)) print() return mat_hat, W, X_new, theta # # Part 3: Online Prediction Problem # # **Question**: How to update $\boldsymbol{x}_{t}\in\mathbb{R}^{r}$? # # Fixing spatial factor matrix $W\in\mathbb{R}^{m\times r}$. # # Bayesian model: # # $$y_{it}\sim\mathcal{N}\left(\boldsymbol{w}_{i}^{T}\boldsymbol{x}_{t},\tau^{-1}\right),$$ # # $$\boldsymbol{x}_{t}\sim\mathcal{N}\left(\sum_{k=1}^{d}\boldsymbol{\theta}_{k}\circledast\boldsymbol{x}_{t-h_k},\Lambda_{x}^{-1}\right),$$ # # $$\tau\sim\text{Gamma}\left(\alpha,\beta\right),$$ # # $$\Lambda_{x}\sim\mathcal{W}\left(W_0,\nu_0\right).$$ # # The posterior distribution of $\Lambda_{x}$ is $\mathcal{W}\left(W_{x}^{*},\nu_{x}^{*}\right)$ in which # # $$\left(W_{x}^{*}\right)^{-1}=W_0^{-1}+\left(\boldsymbol{x}_{t}-\tilde{\boldsymbol{x}}_{t}\right)\left(\boldsymbol{x}_{t}-\tilde{\boldsymbol{x}}_{t}\right)^{T},\nu_{x}^{*}=\nu_{0}+1,$$ # where $\tilde{\boldsymbol{x}}_{t}=\sum_{k=1}^{d}\boldsymbol{\theta}_{k}\circledast\boldsymbol{x}_{t-h_{k}}$. # # The posterior distribution of $\boldsymbol{x}_{t}$ is $\mathcal{N}\left(\boldsymbol{\mu}_{x}^{*},\left(\Lambda_{x}^{*}\right)^{-1}\right)$ in which # # $$\Lambda_{x}^{*}=\tau \sum_{i\in \Omega_{t}} \boldsymbol{w}_{i} \boldsymbol{w}_{i}^{T}+\Lambda_{x},~\boldsymbol{\mu}_{x}^{*}=\left(\Lambda_{x}^{*}\right)^{-1}\left(\tau \sum_{i\in \Omega_{t}} \boldsymbol{w}_{i} y_{it}+\Lambda_{x} \tilde{\boldsymbol{x}}_{t}\right).$$ # # The posterior distribution of $\tau$ is $\text{Gamma}\left(\alpha^{*},\beta^{*}\right)$ in which # # $$\alpha^{*}=\frac{1}{2}|\Omega|+\alpha,~\beta^{*}=\frac{1}{2} \sum_{i \in \Omega_{t}}\left(y_{i t}-\boldsymbol{w}_{i}^{T} \boldsymbol{x}_{t}\right)^{2}+\beta.$$ # In[6]: def OnlineBTMF(sparse_vec, init, time_lags, maxiter1, maxiter2): """Online Bayesain Temporal Matrix Factorization""" W = init["W"] X = init["X"] theta = init["theta"] d = time_lags.shape[0] dim = sparse_vec.shape[0] t, rank = X.shape position = np.where(sparse_vec != 0) binary_vec = np.zeros(dim) binary_vec[position] = 1 tau = 1 alpha = 1e-6 beta = 1e-6 nu0 = rank W0 = np.eye(rank) var_mu0 = np.einsum('ij, ij -> j', theta, X[t - 1 - time_lags, :]) X_new_plus = np.zeros((t + 1, rank)) mat_hat_plus = np.zeros((W.shape[0], t + 1)) for iters in range(maxiter1): vec0 = X[t - 1, :] - var_mu0 Lambda_x = wishart(df = nu0 + 1, scale = inv(inv(W0) + np.outer(vec0, vec0)), seed = None).rvs() var1 = W.T var2 = kr_prod(var1, var1) var_mu = tau * np.matmul(var1, sparse_vec) + np.matmul(Lambda_x, var_mu0) inv_var_Lambda = inv(tau * np.matmul(var2, binary_vec).reshape([rank, rank]) + Lambda_x) X[t - 1, :] = mvnrnd(np.matmul(inv_var_Lambda, var_mu), inv_var_Lambda) tau = np.random.gamma(alpha + 0.5 * sparse_vec[position].shape[0], 1/(beta + 0.5 * np.sum((sparse_vec - np.matmul(W, X[t - 1, :]))[position] ** 2))) X_new = np.zeros((t + 1, rank)) if iters + 1 > maxiter1 - maxiter2: X_new[0 : t, :] = X.copy() X_new[t, :] = np.einsum('ij, ij -> j', theta, X_new[t - time_lags, :]) X_new_plus += X_new mat_hat_plus += np.matmul(W, X_new.T) X_new = X_new_plus/maxiter2 mat_hat = mat_hat_plus/maxiter2 return mat_hat, X_new # In[7]: def st_prediction(dense_mat, sparse_mat, pred_time_steps, rank, time_lags, maxiter): T = dense_mat.shape[1] start_time = T - pred_time_steps dense_mat0 = dense_mat[:, 0 : start_time] sparse_mat0 = sparse_mat[:, 0 : start_time] dim1, dim2 = sparse_mat0.shape d = time_lags.shape[0] mat_hat = np.zeros((dim1, pred_time_steps)) for t in range(pred_time_steps): if t == 0: init = {"W": 0.1 * np.random.rand(dim1, rank), "X": 0.1 * np.random.rand(dim2, rank), "theta": 0.1 * np.random.rand(d, rank)} mat, W, X, theta = BTMF(dense_mat0, sparse_mat0, init, rank, time_lags, maxiter[0], maxiter[1]) else: sparse_vec = sparse_mat[:, start_time + t - 1] if np.where(sparse_vec > 0)[0].shape[0] > rank: init = {"W": W, "X": X[- np.max(time_lags) :, :], "theta": theta} mat, X = OnlineBTMF(sparse_vec, init, time_lags, maxiter[2], maxiter[3]) else: X0 = np.zeros(X.shape) X0[: -1, :] = X[- np.max(time_lags) :, :] X0[-1, :] = np.einsum('ij, ij -> j', theta, X[-1 - time_lags, :]) X = X0.copy() mat = np.matmul(W, X.T) mat_hat[:, t] = mat[:, -1] if (t + 1) % 40 == 0: print('Time step: {}'.format(t + 1)) small_dense_mat = dense_mat[:, start_time : dense_mat.shape[1]] pos = np.where(small_dense_mat > 0) final_mape = np.sum(np.abs(small_dense_mat[pos] - mat_hat[pos])/small_dense_mat[pos])/small_dense_mat[pos].shape[0] final_rmse = np.sqrt(np.sum((small_dense_mat[pos] - mat_hat[pos]) ** 2)/small_dense_mat[pos].shape[0]) print('Final MAPE: {:.6}'.format(final_mape)) print('Final RMSE: {:.6}'.format(final_rmse)) print() return mat_hat # # Part 4: Data Organization # # ## 1) Matrix Structure # # We consider a dataset of $m$ discrete time series $\boldsymbol{y}_{i}\in\mathbb{R}^{f},i\in\left\{1,2,...,m\right\}$. The time series may have missing elements. We express spatio-temporal dataset as a matrix $Y\in\mathbb{R}^{m\times f}$ with $m$ rows (e.g., locations) and $f$ columns (e.g., discrete time intervals), # # $$Y=\left[ \begin{array}{cccc} y_{11} & y_{12} & \cdots & y_{1f} \\ y_{21} & y_{22} & \cdots & y_{2f} \\ \vdots & \vdots & \ddots & \vdots \\ y_{m1} & y_{m2} & \cdots & y_{mf} \\ \end{array} \right]\in\mathbb{R}^{m\times f}.$$ # # ## 2) Tensor Structure # # We consider a dataset of $m$ discrete time series $\boldsymbol{y}_{i}\in\mathbb{R}^{nf},i\in\left\{1,2,...,m\right\}$. The time series may have missing elements. We partition each time series into intervals of predifined length $f$. We express each partitioned time series as a matrix $Y_{i}$ with $n$ rows (e.g., days) and $f$ columns (e.g., discrete time intervals per day), # # $$Y_{i}=\left[ \begin{array}{cccc} y_{11} & y_{12} & \cdots & y_{1f} \\ y_{21} & y_{22} & \cdots & y_{2f} \\ \vdots & \vdots & \ddots & \vdots \\ y_{n1} & y_{n2} & \cdots & y_{nf} \\ \end{array} \right]\in\mathbb{R}^{n\times f},i=1,2,...,m,$$ # # therefore, the resulting structure is a tensor $\mathcal{Y}\in\mathbb{R}^{m\times n\times f}$. # # Part 5: Experiments on Guangzhou Data Set # In[8]: import scipy.io tensor = scipy.io.loadmat('../datasets/Guangzhou-data-set/tensor.mat') tensor = tensor['tensor'] random_matrix = scipy.io.loadmat('../datasets/Guangzhou-data-set/random_matrix.mat') random_matrix = random_matrix['random_matrix'] random_tensor = scipy.io.loadmat('../datasets/Guangzhou-data-set/random_tensor.mat') random_tensor = random_tensor['random_tensor'] dense_mat = tensor.reshape([tensor.shape[0], tensor.shape[1] * tensor.shape[2]]) missing_rate = 0.0 # ============================================================================= ### Random missing (RM) scenario ### Set the RM scenario by: binary_mat = np.round(random_tensor + 0.5 - missing_rate).reshape([random_tensor.shape[0], random_tensor.shape[1] * random_tensor.shape[2]]) # ============================================================================= sparse_mat = np.multiply(dense_mat, binary_mat) # In[9]: import time start = time.time() pred_time_steps = 144 * 5 rank = 30 time_lags = np.array([1, 2, 144]) maxiter = np.array([200, 100, 1100, 100]) small_dense_mat = dense_mat[:, dense_mat.shape[1] - pred_time_steps : dense_mat.shape[1]] mat_hat = st_prediction(dense_mat, sparse_mat, pred_time_steps, rank, time_lags, maxiter) end = time.time() print('Running time: %d seconds'%(end - start)) # In[10]: import scipy.io tensor = scipy.io.loadmat('../datasets/Guangzhou-data-set/tensor.mat') tensor = tensor['tensor'] random_matrix = scipy.io.loadmat('../datasets/Guangzhou-data-set/random_matrix.mat') random_matrix = random_matrix['random_matrix'] random_tensor = scipy.io.loadmat('../datasets/Guangzhou-data-set/random_tensor.mat') random_tensor = random_tensor['random_tensor'] dense_mat = tensor.reshape([tensor.shape[0], tensor.shape[1] * tensor.shape[2]]) missing_rate = 0.2 # ============================================================================= ### Random missing (RM) scenario ### Set the RM scenario by: binary_mat = np.round(random_tensor + 0.5 - missing_rate).reshape([random_tensor.shape[0], random_tensor.shape[1] * random_tensor.shape[2]]) # ============================================================================= sparse_mat = np.multiply(dense_mat, binary_mat) # In[11]: import time start = time.time() pred_time_steps = 144 * 5 rank = 30 time_lags = np.array([1, 2, 144]) maxiter = np.array([200, 100, 1100, 100]) small_dense_mat = dense_mat[:, dense_mat.shape[1] - pred_time_steps : dense_mat.shape[1]] mat_hat = st_prediction(dense_mat, sparse_mat, pred_time_steps, rank, time_lags, maxiter) end = time.time() print('Running time: %d seconds'%(end - start)) # In[12]: import scipy.io tensor = scipy.io.loadmat('../datasets/Guangzhou-data-set/tensor.mat') tensor = tensor['tensor'] random_matrix = scipy.io.loadmat('../datasets/Guangzhou-data-set/random_matrix.mat') random_matrix = random_matrix['random_matrix'] random_tensor = scipy.io.loadmat('../datasets/Guangzhou-data-set/random_tensor.mat') random_tensor = random_tensor['random_tensor'] dense_mat = tensor.reshape([tensor.shape[0], tensor.shape[1] * tensor.shape[2]]) missing_rate = 0.4 # ============================================================================= ### Random missing (RM) scenario ### Set the RM scenario by: binary_mat = np.round(random_tensor + 0.5 - missing_rate).reshape([random_tensor.shape[0], random_tensor.shape[1] * random_tensor.shape[2]]) # ============================================================================= sparse_mat = np.multiply(dense_mat, binary_mat) # In[13]: import time start = time.time() pred_time_steps = 144 * 5 rank = 30 time_lags = np.array([1, 2, 144]) maxiter = np.array([200, 100, 1100, 100]) small_dense_mat = dense_mat[:, dense_mat.shape[1] - pred_time_steps : dense_mat.shape[1]] mat_hat = st_prediction(dense_mat, sparse_mat, pred_time_steps, rank, time_lags, maxiter) end = time.time() print('Running time: %d seconds'%(end - start)) # In[14]: import scipy.io tensor = scipy.io.loadmat('../datasets/Guangzhou-data-set/tensor.mat') tensor = tensor['tensor'] random_matrix = scipy.io.loadmat('../datasets/Guangzhou-data-set/random_matrix.mat') random_matrix = random_matrix['random_matrix'] random_tensor = scipy.io.loadmat('../datasets/Guangzhou-data-set/random_tensor.mat') random_tensor = random_tensor['random_tensor'] dense_mat = tensor.reshape([tensor.shape[0], tensor.shape[1] * tensor.shape[2]]) missing_rate = 0.2 # ============================================================================= ### Non-random missing (NM) scenario ### Set the NM scenario by: binary_tensor = np.zeros(tensor.shape) for i1 in range(tensor.shape[0]): for i2 in range(tensor.shape[1]): binary_tensor[i1,i2,:] = np.round(random_matrix[i1,i2] + 0.5 - missing_rate) binary_mat = binary_tensor.reshape([binary_tensor.shape[0], binary_tensor.shape[1] * binary_tensor.shape[2]]) # ============================================================================= sparse_mat = np.multiply(dense_mat, binary_mat) # In[15]: import time start = time.time() pred_time_steps = 144 * 5 rank = 30 time_lags = np.array([1, 2, 144]) maxiter = np.array([200, 100, 1100, 100]) small_dense_mat = dense_mat[:, dense_mat.shape[1] - pred_time_steps : dense_mat.shape[1]] mat_hat = st_prediction(dense_mat, sparse_mat, pred_time_steps, rank, time_lags, maxiter) end = time.time() print('Running time: %d seconds'%(end - start)) # In[16]: import scipy.io tensor = scipy.io.loadmat('../datasets/Guangzhou-data-set/tensor.mat') tensor = tensor['tensor'] random_matrix = scipy.io.loadmat('../datasets/Guangzhou-data-set/random_matrix.mat') random_matrix = random_matrix['random_matrix'] random_tensor = scipy.io.loadmat('../datasets/Guangzhou-data-set/random_tensor.mat') random_tensor = random_tensor['random_tensor'] dense_mat = tensor.reshape([tensor.shape[0], tensor.shape[1] * tensor.shape[2]]) missing_rate = 0.4 # ============================================================================= ### Non-random missing (NM) scenario ### Set the NM scenario by: binary_tensor = np.zeros(tensor.shape) for i1 in range(tensor.shape[0]): for i2 in range(tensor.shape[1]): binary_tensor[i1,i2,:] = np.round(random_matrix[i1,i2] + 0.5 - missing_rate) binary_mat = binary_tensor.reshape([binary_tensor.shape[0], binary_tensor.shape[1] * binary_tensor.shape[2]]) # ============================================================================= sparse_mat = np.multiply(dense_mat, binary_mat) # In[17]: import time start = time.time() pred_time_steps = 144 * 5 rank = 30 time_lags = np.array([1, 2, 144]) maxiter = np.array([200, 100, 1100, 100]) small_dense_mat = dense_mat[:, dense_mat.shape[1] - pred_time_steps : dense_mat.shape[1]] mat_hat = st_prediction(dense_mat, sparse_mat, pred_time_steps, rank, time_lags, maxiter) end = time.time() print('Running time: %d seconds'%(end - start)) # **Experiment results** of short-term rolling prediction with missing values using BTMF: # # | scenario |rank|time_lags| maxiter | mape | rmse | # |:----------|-----:|---------:|---------:|-----------:|----------:| # |**Original data**| 30 | (1,2,144) | (100,100,500,500) | **0.1070** | **4.27**| # |**20%, RM**| 30 | (1,2,144) | (100,100,500,500) | **0.1103** | **4.43**| # |**40%, RM**| 30 | (1,2,144) | (100,100,500,500) | **0.1119** | **4.49**| # |**20%, NM**| 30 | (1,2,144) | (100,100,500,500) | **0.1112** | **4.49**| # |**40%, NM**| 30 | (1,2,144) | (100,100,500,500) | **0.1197** | **4.91**| # # # Part 6: Experiments on Birmingham Data Set # In[18]: import scipy.io tensor = scipy.io.loadmat('../datasets/Birmingham-data-set/tensor.mat') tensor = tensor['tensor'] random_matrix = scipy.io.loadmat('../datasets/Birmingham-data-set/random_matrix.mat') random_matrix = random_matrix['random_matrix'] random_tensor = scipy.io.loadmat('../datasets/Birmingham-data-set/random_tensor.mat') random_tensor = random_tensor['random_tensor'] dense_mat = tensor.reshape([tensor.shape[0], tensor.shape[1] * tensor.shape[2]]) missing_rate = 0.0 # ============================================================================= ### Random missing (RM) scenario ### Set the RM scenario by: binary_mat = np.round(random_tensor + 0.5 - missing_rate).reshape([random_tensor.shape[0], random_tensor.shape[1] * random_tensor.shape[2]]) # ============================================================================= sparse_mat = np.multiply(dense_mat, binary_mat) # In[19]: import time start = time.time() pred_time_steps = 18 * 7 rank = 10 time_lags = np.array([1, 2, 18]) maxiter = np.array([200, 100, 1100, 100]) small_dense_mat = dense_mat[:, dense_mat.shape[1] - pred_time_steps : dense_mat.shape[1]] mat_hat = st_prediction(dense_mat, sparse_mat, pred_time_steps, rank, time_lags, maxiter) end = time.time() print('Running time: %d seconds'%(end - start)) # In[20]: import scipy.io tensor = scipy.io.loadmat('../datasets/Birmingham-data-set/tensor.mat') tensor = tensor['tensor'] random_matrix = scipy.io.loadmat('../datasets/Birmingham-data-set/random_matrix.mat') random_matrix = random_matrix['random_matrix'] random_tensor = scipy.io.loadmat('../datasets/Birmingham-data-set/random_tensor.mat') random_tensor = random_tensor['random_tensor'] dense_mat = tensor.reshape([tensor.shape[0], tensor.shape[1] * tensor.shape[2]]) missing_rate = 0.1 # ============================================================================= ### Random missing (RM) scenario ### Set the RM scenario by: binary_mat = np.round(random_tensor + 0.5 - missing_rate).reshape([random_tensor.shape[0], random_tensor.shape[1] * random_tensor.shape[2]]) # ============================================================================= sparse_mat = np.multiply(dense_mat, binary_mat) # In[21]: import time start = time.time() pred_time_steps = 18 * 7 rank = 10 time_lags = np.array([1, 2, 18]) maxiter = np.array([200, 100, 1100, 100]) small_dense_mat = dense_mat[:, dense_mat.shape[1] - pred_time_steps : dense_mat.shape[1]] mat_hat = st_prediction(dense_mat, sparse_mat, pred_time_steps, rank, time_lags, maxiter) end = time.time() print('Running time: %d seconds'%(end - start)) # In[22]: import scipy.io tensor = scipy.io.loadmat('../datasets/Birmingham-data-set/tensor.mat') tensor = tensor['tensor'] random_matrix = scipy.io.loadmat('../datasets/Birmingham-data-set/random_matrix.mat') random_matrix = random_matrix['random_matrix'] random_tensor = scipy.io.loadmat('../datasets/Birmingham-data-set/random_tensor.mat') random_tensor = random_tensor['random_tensor'] dense_mat = tensor.reshape([tensor.shape[0], tensor.shape[1] * tensor.shape[2]]) missing_rate = 0.3 # ============================================================================= ### Random missing (RM) scenario ### Set the RM scenario by: binary_mat = np.round(random_tensor + 0.5 - missing_rate).reshape([random_tensor.shape[0], random_tensor.shape[1] * random_tensor.shape[2]]) # ============================================================================= sparse_mat = np.multiply(dense_mat, binary_mat) # In[23]: import time start = time.time() pred_time_steps = 18 * 7 rank = 10 time_lags = np.array([1, 2, 18]) maxiter = np.array([200, 100, 1100, 100]) small_dense_mat = dense_mat[:, dense_mat.shape[1] - pred_time_steps : dense_mat.shape[1]] mat_hat = st_prediction(dense_mat, sparse_mat, pred_time_steps, rank, time_lags, maxiter) end = time.time() print('Running time: %d seconds'%(end - start)) # In[24]: import scipy.io tensor = scipy.io.loadmat('../datasets/Birmingham-data-set/tensor.mat') tensor = tensor['tensor'] random_matrix = scipy.io.loadmat('../datasets/Birmingham-data-set/random_matrix.mat') random_matrix = random_matrix['random_matrix'] random_tensor = scipy.io.loadmat('../datasets/Birmingham-data-set/random_tensor.mat') random_tensor = random_tensor['random_tensor'] dense_mat = tensor.reshape([tensor.shape[0], tensor.shape[1] * tensor.shape[2]]) missing_rate = 0.1 # ============================================================================= ### Non-random missing (NM) scenario ### Set the NM scenario by: binary_tensor = np.zeros(tensor.shape) for i1 in range(tensor.shape[0]): for i2 in range(tensor.shape[1]): binary_tensor[i1,i2,:] = np.round(random_matrix[i1,i2] + 0.5 - missing_rate) binary_mat = binary_tensor.reshape([binary_tensor.shape[0], binary_tensor.shape[1] * binary_tensor.shape[2]]) # ============================================================================= sparse_mat = np.multiply(dense_mat, binary_mat) # In[25]: import time start = time.time() pred_time_steps = 18 * 7 rank = 10 time_lags = np.array([1, 2, 18]) maxiter = np.array([200, 100, 1100, 100]) small_dense_mat = dense_mat[:, dense_mat.shape[1] - pred_time_steps : dense_mat.shape[1]] mat_hat = st_prediction(dense_mat, sparse_mat, pred_time_steps, rank, time_lags, maxiter) end = time.time() print('Running time: %d seconds'%(end - start)) # In[26]: import scipy.io tensor = scipy.io.loadmat('../datasets/Birmingham-data-set/tensor.mat') tensor = tensor['tensor'] random_matrix = scipy.io.loadmat('../datasets/Birmingham-data-set/random_matrix.mat') random_matrix = random_matrix['random_matrix'] random_tensor = scipy.io.loadmat('../datasets/Birmingham-data-set/random_tensor.mat') random_tensor = random_tensor['random_tensor'] dense_mat = tensor.reshape([tensor.shape[0], tensor.shape[1] * tensor.shape[2]]) missing_rate = 0.3 # ============================================================================= ### Non-random missing (NM) scenario ### Set the NM scenario by: binary_tensor = np.zeros(tensor.shape) for i1 in range(tensor.shape[0]): for i2 in range(tensor.shape[1]): binary_tensor[i1,i2,:] = np.round(random_matrix[i1,i2] + 0.5 - missing_rate) binary_mat = binary_tensor.reshape([binary_tensor.shape[0], binary_tensor.shape[1] * binary_tensor.shape[2]]) # ============================================================================= sparse_mat = np.multiply(dense_mat, binary_mat) # In[27]: import time start = time.time() pred_time_steps = 18 * 7 rank = 10 time_lags = np.array([1, 2, 18]) maxiter = np.array([200, 100, 1100, 100]) small_dense_mat = dense_mat[:, dense_mat.shape[1] - pred_time_steps : dense_mat.shape[1]] mat_hat = st_prediction(dense_mat, sparse_mat, pred_time_steps, rank, time_lags, maxiter) end = time.time() print('Running time: %d seconds'%(end - start)) # **Experiment results** of short-term rolling prediction with missing values using BTMF: # # | scenario |rank|time_lags| maxiter | mape | rmse | # |:----------|-----:|---------:|---------:|-----------:|----------:| # |**Original data**| 10 | (1,2,18) | (200,100,1100,100) | **0.3180** | **161.11**| # |**10%, RM**| 10 | (1,2,18) | (200,100,1100,100) | **0.3207** | **167.16**| # |**30%, RM**| 10 | (1,2,18) | (200,100,1100,100) | **0.3121** | **166.87**| # |**10%, NM**| 10 | (1,2,18) | (200,100,1100,100) | **0.3300** | **170.48**| # |**30%, NM**| 10 | (1,2,18) | (200,100,1100,100) | **0.3571** | **173.65**| # # # Part 7: Experiments on Hangzhou Data Set # In[28]: import scipy.io tensor = scipy.io.loadmat('../datasets/Hangzhou-data-set/tensor.mat') tensor = tensor['tensor'] random_matrix = scipy.io.loadmat('../datasets/Hangzhou-data-set/random_matrix.mat') random_matrix = random_matrix['random_matrix'] random_tensor = scipy.io.loadmat('../datasets/Hangzhou-data-set/random_tensor.mat') random_tensor = random_tensor['random_tensor'] dense_mat = tensor.reshape([tensor.shape[0], tensor.shape[1] * tensor.shape[2]]) missing_rate = 0.0 # ============================================================================= ### Random missing (RM) scenario ### Set the RM scenario by: binary_mat = np.round(random_tensor + 0.5 - missing_rate).reshape([random_tensor.shape[0], random_tensor.shape[1] * random_tensor.shape[2]]) # ============================================================================= sparse_mat = np.multiply(dense_mat, binary_mat) # In[29]: import time start = time.time() pred_time_steps = 108 * 5 rank = 10 time_lags = np.array([1, 2, 108]) maxiter = np.array([200, 100, 1100, 100]) small_dense_mat = dense_mat[:, dense_mat.shape[1] - pred_time_steps : dense_mat.shape[1]] mat_hat = st_prediction(dense_mat, sparse_mat, pred_time_steps, rank, time_lags, maxiter) end = time.time() print('Running time: %d seconds'%(end - start)) # In[30]: import scipy.io tensor = scipy.io.loadmat('../datasets/Hangzhou-data-set/tensor.mat') tensor = tensor['tensor'] random_matrix = scipy.io.loadmat('../datasets/Hangzhou-data-set/random_matrix.mat') random_matrix = random_matrix['random_matrix'] random_tensor = scipy.io.loadmat('../datasets/Hangzhou-data-set/random_tensor.mat') random_tensor = random_tensor['random_tensor'] dense_mat = tensor.reshape([tensor.shape[0], tensor.shape[1] * tensor.shape[2]]) missing_rate = 0.2 # ============================================================================= ### Random missing (RM) scenario ### Set the RM scenario by: binary_mat = np.round(random_tensor + 0.5 - missing_rate).reshape([random_tensor.shape[0], random_tensor.shape[1] * random_tensor.shape[2]]) # ============================================================================= sparse_mat = np.multiply(dense_mat, binary_mat) # In[31]: import time start = time.time() pred_time_steps = 108 * 5 rank = 10 time_lags = np.array([1, 2, 108]) maxiter = np.array([200, 100, 1100, 100]) small_dense_mat = dense_mat[:, dense_mat.shape[1] - pred_time_steps : dense_mat.shape[1]] mat_hat = st_prediction(dense_mat, sparse_mat, pred_time_steps, rank, time_lags, maxiter) end = time.time() print('Running time: %d seconds'%(end - start)) # In[32]: import scipy.io tensor = scipy.io.loadmat('../datasets/Hangzhou-data-set/tensor.mat') tensor = tensor['tensor'] random_matrix = scipy.io.loadmat('../datasets/Hangzhou-data-set/random_matrix.mat') random_matrix = random_matrix['random_matrix'] random_tensor = scipy.io.loadmat('../datasets/Hangzhou-data-set/random_tensor.mat') random_tensor = random_tensor['random_tensor'] dense_mat = tensor.reshape([tensor.shape[0], tensor.shape[1] * tensor.shape[2]]) missing_rate = 0.4 # ============================================================================= ### Random missing (RM) scenario ### Set the RM scenario by: binary_mat = np.round(random_tensor + 0.5 - missing_rate).reshape([random_tensor.shape[0], random_tensor.shape[1] * random_tensor.shape[2]]) # ============================================================================= sparse_mat = np.multiply(dense_mat, binary_mat) # In[33]: import time start = time.time() pred_time_steps = 108 * 5 rank = 10 time_lags = np.array([1, 2, 108]) maxiter = np.array([200, 100, 1100, 100]) small_dense_mat = dense_mat[:, dense_mat.shape[1] - pred_time_steps : dense_mat.shape[1]] mat_hat = st_prediction(dense_mat, sparse_mat, pred_time_steps, rank, time_lags, maxiter) end = time.time() print('Running time: %d seconds'%(end - start)) # In[34]: import scipy.io tensor = scipy.io.loadmat('../datasets/Hangzhou-data-set/tensor.mat') tensor = tensor['tensor'] random_matrix = scipy.io.loadmat('../datasets/Hangzhou-data-set/random_matrix.mat') random_matrix = random_matrix['random_matrix'] random_tensor = scipy.io.loadmat('../datasets/Hangzhou-data-set/random_tensor.mat') random_tensor = random_tensor['random_tensor'] dense_mat = tensor.reshape([tensor.shape[0], tensor.shape[1] * tensor.shape[2]]) missing_rate = 0.2 # ============================================================================= ### Non-random missing (NM) scenario ### Set the NM scenario by: binary_tensor = np.zeros(tensor.shape) for i1 in range(tensor.shape[0]): for i2 in range(tensor.shape[1]): binary_tensor[i1,i2,:] = np.round(random_matrix[i1,i2] + 0.5 - missing_rate) binary_mat = binary_tensor.reshape([binary_tensor.shape[0], binary_tensor.shape[1] * binary_tensor.shape[2]]) # ============================================================================= sparse_mat = np.multiply(dense_mat, binary_mat) # In[35]: import time start = time.time() pred_time_steps = 108 * 5 rank = 10 time_lags = np.array([1, 2, 108]) maxiter = np.array([200, 100, 1100, 100]) small_dense_mat = dense_mat[:, dense_mat.shape[1] - pred_time_steps : dense_mat.shape[1]] mat_hat = st_prediction(dense_mat, sparse_mat, pred_time_steps, rank, time_lags, maxiter) end = time.time() print('Running time: %d seconds'%(end - start)) # In[36]: import scipy.io tensor = scipy.io.loadmat('../datasets/Hangzhou-data-set/tensor.mat') tensor = tensor['tensor'] random_matrix = scipy.io.loadmat('../datasets/Hangzhou-data-set/random_matrix.mat') random_matrix = random_matrix['random_matrix'] random_tensor = scipy.io.loadmat('../datasets/Hangzhou-data-set/random_tensor.mat') random_tensor = random_tensor['random_tensor'] dense_mat = tensor.reshape([tensor.shape[0], tensor.shape[1] * tensor.shape[2]]) missing_rate = 0.4 # ============================================================================= ### Non-random missing (NM) scenario ### Set the NM scenario by: binary_tensor = np.zeros(tensor.shape) for i1 in range(tensor.shape[0]): for i2 in range(tensor.shape[1]): binary_tensor[i1,i2,:] = np.round(random_matrix[i1,i2] + 0.5 - missing_rate) binary_mat = binary_tensor.reshape([binary_tensor.shape[0], binary_tensor.shape[1] * binary_tensor.shape[2]]) # ============================================================================= sparse_mat = np.multiply(dense_mat, binary_mat) # In[37]: import time start = time.time() pred_time_steps = 108 * 5 rank = 10 time_lags = np.array([1, 2, 108]) maxiter = np.array([200, 100, 1100, 100]) small_dense_mat = dense_mat[:, dense_mat.shape[1] - pred_time_steps : dense_mat.shape[1]] mat_hat = st_prediction(dense_mat, sparse_mat, pred_time_steps, rank, time_lags, maxiter) end = time.time() print('Running time: %d seconds'%(end - start)) # **Experiment results** of short-term rolling prediction with missing values using BTMF: # # | scenario |rank|time_lags| maxiter | mape | rmse | # |:----------|-----:|----------:|----------:|-----------:|----------:| # |**Original data**| 30 | (1,2,108) | (200,100,1100,100) | **0.3017** | **40.87**| # |**20%, RM**| 30 | (1,2,108) | (200,100,1100,100) | **0.3234** | **48.20**| # |**40%, RM**| 30 | (1,2,108) | (200,100,1100,100) | **0.3493** | **49.30**| # |**20%, NM**| 30 | (1,2,108) | (200,100,1100,100) | **0.3085** | **49.20**| # |**40%, NM**| 30 | (1,2,108) | (200,100,1100,100) | **0.2969** | **51.64**| # # # Part 8: Experiments on Seattle Data Set # In[8]: import pandas as pd dense_mat = pd.read_csv('../datasets/Seattle-data-set/mat.csv', index_col = 0) RM_mat = pd.read_csv('../datasets/Seattle-data-set/RM_mat.csv', index_col = 0) dense_mat = dense_mat.values RM_mat = RM_mat.values missing_rate = 0.2 # ============================================================================= ### Random missing (RM) scenario ### Set the RM scenario by: binary_mat = np.round(RM_mat + 0.5 - missing_rate) # ============================================================================= sparse_mat = np.multiply(dense_mat, binary_mat) # In[9]: import time start = time.time() pred_time_steps = 288 * 5 rank = 30 time_lags = np.array([1, 2, 288]) maxiter = np.array([200, 100, 1100, 100]) small_dense_mat = dense_mat[:, dense_mat.shape[1] - pred_time_steps : dense_mat.shape[1]] mat_hat = st_prediction(dense_mat, sparse_mat, pred_time_steps, rank, time_lags, maxiter) end = time.time() print('Running time: %d seconds'%(end - start)) # In[10]: import pandas as pd dense_mat = pd.read_csv('../datasets/Seattle-data-set/mat.csv', index_col = 0) RM_mat = pd.read_csv('../datasets/Seattle-data-set/RM_mat.csv', index_col = 0) dense_mat = dense_mat.values RM_mat = RM_mat.values missing_rate = 0.4 # ============================================================================= ### Random missing (RM) scenario ### Set the RM scenario by: binary_mat = np.round(RM_mat + 0.5 - missing_rate) # ============================================================================= sparse_mat = np.multiply(dense_mat, binary_mat) # In[11]: import time start = time.time() pred_time_steps = 288 * 5 rank = 30 time_lags = np.array([1, 2, 288]) maxiter = np.array([200, 100, 1100, 100]) small_dense_mat = dense_mat[:, dense_mat.shape[1] - pred_time_steps : dense_mat.shape[1]] mat_hat = st_prediction(dense_mat, sparse_mat, pred_time_steps, rank, time_lags, maxiter) end = time.time() print('Running time: %d seconds'%(end - start)) # In[12]: import pandas as pd dense_mat = pd.read_csv('../datasets/Seattle-data-set/mat.csv', index_col = 0) NM_mat = pd.read_csv('../datasets/Seattle-data-set/NM_mat.csv', index_col = 0) dense_mat = dense_mat.values NM_mat = NM_mat.values missing_rate = 0.2 # ============================================================================= ### Non-random missing (NM) scenario ### Set the NM scenario by: binary_tensor = np.zeros((dense_mat.shape[0], 28, 288)) for i1 in range(binary_tensor.shape[0]): for i2 in range(binary_tensor.shape[1]): binary_tensor[i1, i2, :] = np.round(NM_mat[i1, i2] + 0.5 - missing_rate) binary_mat = binary_tensor.reshape([binary_tensor.shape[0], binary_tensor.shape[1] * binary_tensor.shape[2]]) # ============================================================================= sparse_mat = np.multiply(dense_mat, binary_mat) # In[13]: import time start = time.time() pred_time_steps = 288 * 5 rank = 30 time_lags = np.array([1, 2, 288]) maxiter = np.array([200, 100, 1100, 100]) small_dense_mat = dense_mat[:, dense_mat.shape[1] - pred_time_steps : dense_mat.shape[1]] mat_hat = st_prediction(dense_mat, sparse_mat, pred_time_steps, rank, time_lags, maxiter) end = time.time() print('Running time: %d seconds'%(end - start)) # In[14]: import pandas as pd dense_mat = pd.read_csv('../datasets/Seattle-data-set/mat.csv', index_col = 0) NM_mat = pd.read_csv('../datasets/Seattle-data-set/NM_mat.csv', index_col = 0) dense_mat = dense_mat.values NM_mat = NM_mat.values missing_rate = 0.4 # ============================================================================= ### Non-random missing (NM) scenario ### Set the NM scenario by: binary_tensor = np.zeros((dense_mat.shape[0], 28, 288)) for i1 in range(binary_tensor.shape[0]): for i2 in range(binary_tensor.shape[1]): binary_tensor[i1, i2, :] = np.round(NM_mat[i1, i2] + 0.5 - missing_rate) binary_mat = binary_tensor.reshape([binary_tensor.shape[0], binary_tensor.shape[1] * binary_tensor.shape[2]]) # ============================================================================= sparse_mat = np.multiply(dense_mat, binary_mat) # In[15]: import time start = time.time() pred_time_steps = 288 * 5 rank = 30 time_lags = np.array([1, 2, 288]) maxiter = np.array([200, 100, 1100, 100]) small_dense_mat = dense_mat[:, dense_mat.shape[1] - pred_time_steps : dense_mat.shape[1]] mat_hat = st_prediction(dense_mat, sparse_mat, pred_time_steps, rank, time_lags, maxiter) end = time.time() print('Running time: %d seconds'%(end - start)) # In[16]: import pandas as pd dense_mat = pd.read_csv('../datasets/Seattle-data-set/mat.csv', index_col = 0) NM_mat = pd.read_csv('../datasets/Seattle-data-set/NM_mat.csv', index_col = 0) dense_mat = dense_mat.values NM_mat = NM_mat.values missing_rate = 0.0 # ============================================================================= ### Non-random missing (NM) scenario ### Set the NM scenario by: binary_tensor = np.zeros((dense_mat.shape[0], 28, 288)) for i1 in range(binary_tensor.shape[0]): for i2 in range(binary_tensor.shape[1]): binary_tensor[i1, i2, :] = np.round(NM_mat[i1, i2] + 0.5 - missing_rate) binary_mat = binary_tensor.reshape([binary_tensor.shape[0], binary_tensor.shape[1] * binary_tensor.shape[2]]) # ============================================================================= sparse_mat = np.multiply(dense_mat, binary_mat) # In[17]: import time start = time.time() pred_time_steps = 288 * 5 rank = 30 time_lags = np.array([1, 2, 288]) maxiter = np.array([200, 100, 1100, 100]) small_dense_mat = dense_mat[:, dense_mat.shape[1] - pred_time_steps : dense_mat.shape[1]] mat_hat = st_prediction(dense_mat, sparse_mat, pred_time_steps, rank, time_lags, maxiter) end = time.time() print('Running time: %d seconds'%(end - start)) # **Experiment results** of short-term rolling prediction with missing values using BTMF: # # | scenario |rank|time_lags| maxiter | mape | rmse | # |:----------|-----:|---------:|---------:|-----------:|----------:| # |**Original data**| 30 | (1,2,288) | (200,100,1100,100) | **0.0790** | **4.78**| # |**20%, RM**| 30 | (1,2,288) | (200,100,1100,100) | **0.0813** | **4.90**| # |**40%, RM**| 30 | (1,2,288) | (200,100,1100,100) | **0.0841** | **5.08**| # |**20%, NM**| 30 | (1,2,108) | (200,100,1100,100) | **0.0796** | **4.84**| # |**40%, NM**| 30 | (1,2,108) | (200,100,1100,100) | **0.0847** | **5.12**| #