Published: December 31, 2020
Author: Xinyu Chen [GitHub homepage]
Download: This Jupyter notebook is at our GitHub repository. If you want to evaluate the code, please download the notebook from the transdim repository.
This notebook shows how to implement the High-accuracy Low-Rank Tensor Completion (HaLRTC) on some real-world data sets. For an in-depth discussion of HaLRTC, please see [1].
import numpy as np
def ten2mat(tensor, mode):
return np.reshape(np.moveaxis(tensor, mode, 0), (tensor.shape[mode], -1), order = 'F')
def mat2ten(mat, dim, mode):
index = list()
index.append(mode)
for i in range(dim.shape[0]):
if i != mode:
index.append(i)
return np.moveaxis(np.reshape(mat, list(dim[index]), order = 'F'), 0, mode)
def svt(mat, tau):
u, s, v = np.linalg.svd(mat, full_matrices = False)
vec = s - tau
vec[vec < 0] = 0
return np.matmul(np.matmul(u, np.diag(vec)), v)
def compute_mape(var, var_hat):
return np.sum(np.abs(var - var_hat) / var) / var.shape[0]
def compute_rmse(var, var_hat):
return np.sqrt(np.sum((var - var_hat) ** 2) / var.shape[0])
def HaLRTC_imputer(dense_tensor, sparse_tensor, alpha: list, rho: float, epsilon: float, maxiter: int):
dim = np.array(sparse_tensor.shape)
if np.isnan(sparse_tensor).any() == False:
pos_miss = np.where(sparse_tensor == 0)
pos_test = np.where((dense_tensor != 0) & (sparse_tensor == 0))
elif np.isnan(sparse_tensor).any() == True:
pos_test = np.where((dense_tensor != 0) & (np.isnan(sparse_tensor)))
sparse_tensor[np.isnan(sparse_tensor)] = 0
pos_miss = np.where(sparse_tensor == 0)
dense_test = dense_tensor[pos_test]
del dense_tensor
tensor_hat = sparse_tensor.copy()
B = [np.zeros(sparse_tensor.shape) for _ in range(len(dim))]
Y = [np.zeros(sparse_tensor.shape) for _ in range(len(dim))]
last_ten = sparse_tensor.copy()
snorm = np.linalg.norm(sparse_tensor)
it = 0
while True:
rho = min(rho * 1.05, 1e5)
for k in range(len(dim)):
B[k] = mat2ten(svt(ten2mat(tensor_hat + Y[k] / rho, k), alpha[k] / rho), dim, k)
tensor_hat[pos_miss] = ((sum(B) - sum(Y) / rho) / 3)[pos_miss]
for k in range(len(dim)):
Y[k] = Y[k] - rho * (B[k] - tensor_hat)
tol = np.linalg.norm((tensor_hat - last_ten)) / snorm
last_ten = tensor_hat.copy()
it += 1
if it % 50 == 0:
print('Iter: {}'.format(it))
print('Tolerance: {:.6}'.format(tol))
print('MAPE: {:.6}'.format(compute_mape(dense_test, tensor_hat[pos_test])))
print('RMSE: {:.6}'.format(compute_rmse(dense_test, tensor_hat[pos_test])))
print()
if (tol < epsilon) or (it >= maxiter):
break
print('Total iteration: {}'.format(it))
print('Tolerance: {:.6}'.format(tol))
print('MAPE: {:.6}'.format(compute_mape(dense_test, tensor_hat[pos_test])))
print('RMSE: {:.6}'.format(compute_rmse(dense_test, tensor_hat[pos_test])))
print()
return tensor_hat
Scenario setting:
import scipy.io
import numpy as np
np.random.seed(1000)
dense_tensor = scipy.io.loadmat('../datasets/Guangzhou-data-set/tensor.mat')['tensor']
dim = dense_tensor.shape
missing_rate = 0.4 # Random missing (RM)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim[0], dim[1], dim[2]) + 0.5 - missing_rate)
Model setting:
import time
start = time.time()
alpha = np.ones(3) / 3
rho = 1e-5
epsilon = 1e-4
maxiter = 200
tensor_hat = HaLRTC_imputer(dense_tensor, sparse_tensor, alpha, rho, epsilon, maxiter)
end = time.time()
print('Running time: %d seconds'%(end - start))
Iter: 50 Tolerance: 0.0016919 MAPE: 0.0880788 RMSE: 3.59247 Total iteration: 59 Tolerance: 5.54723e-05 MAPE: 0.0886173 RMSE: 3.61067 Running time: 32 seconds
Scenario setting:
import scipy.io
import numpy as np
np.random.seed(1000)
dense_tensor = scipy.io.loadmat('../datasets/Guangzhou-data-set/tensor.mat')['tensor']
dim = dense_tensor.shape
missing_rate = 0.6 # Random missing (RM)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim[0], dim[1], dim[2]) + 0.5 - missing_rate)
Model setting:
import time
start = time.time()
alpha = np.ones(3) / 3
rho = 1e-4
epsilon = 1e-4
maxiter = 200
tensor_hat = HaLRTC_imputer(dense_tensor, sparse_tensor, alpha, rho, epsilon, maxiter)
end = time.time()
print('Running time: %d seconds'%(end - start))
Total iteration: 30 Tolerance: 9.40289e-05 MAPE: 0.0982231 RMSE: 3.9581 Running time: 16 seconds
Scenario setting:
import scipy.io
import numpy as np
np.random.seed(1000)
dense_tensor = scipy.io.loadmat('../datasets/Guangzhou-data-set/tensor.mat')['tensor']
dim = dense_tensor.shape
missing_rate = 0.4 # Non-random missing (NM)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim[0], dim[1])[:, :, np.newaxis] + 0.5 - missing_rate)
Model setting:
import time
start = time.time()
alpha = np.ones(3) / 3
rho = 1e-5
epsilon = 1e-4
maxiter = 200
tensor_hat = HaLRTC_imputer(dense_tensor, sparse_tensor, alpha, rho, epsilon, maxiter)
end = time.time()
print('Running time: %d seconds'%(end - start))
Iter: 50 Tolerance: 0.00159878 MAPE: 0.108402 RMSE: 4.3662 Total iteration: 61 Tolerance: 9.28742e-05 MAPE: 0.108782 RMSE: 4.37534 Running time: 34 seconds
Scenario setting:
import scipy.io
import numpy as np
np.random.seed(1000)
dense_tensor = scipy.io.loadmat('../datasets/Hangzhou-data-set/tensor.mat')['tensor']
dim = dense_tensor.shape
missing_rate = 0.4 # Random missing (RM)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim[0], dim[1], dim[2]) + 0.5 - missing_rate)
Model setting:
import time
start = time.time()
alpha = np.ones(3) / 3
rho = 1e-5
epsilon = 1e-4
maxiter = 200
tensor_hat = HaLRTC_imputer(dense_tensor, sparse_tensor, alpha, rho, epsilon, maxiter)
end = time.time()
print('Running time: %d seconds'%(end - start))
Iter: 50 Tolerance: 0.000118962 MAPE: 0.190242 RMSE: 31.8082 Total iteration: 54 Tolerance: 8.03007e-05 MAPE: 0.190249 RMSE: 31.8102 Running time: 2 seconds
Scenario setting:
import scipy.io
import numpy as np
np.random.seed(1000)
dense_tensor = scipy.io.loadmat('../datasets/Hangzhou-data-set/tensor.mat')['tensor']
dim = dense_tensor.shape
missing_rate = 0.6 # Random missing (RM)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim[0], dim[1], dim[2]) + 0.5 - missing_rate)
Model setting:
import time
start = time.time()
alpha = np.ones(3) / 3
rho = 1e-5
epsilon = 1e-4
maxiter = 200
tensor_hat = HaLRTC_imputer(dense_tensor, sparse_tensor, alpha, rho, epsilon, maxiter)
end = time.time()
print('Running time: %d seconds'%(end - start))
Iter: 50 Tolerance: 0.000112243 MAPE: 0.20047 RMSE: 36.1853 Total iteration: 58 Tolerance: 7.86866e-05 MAPE: 0.200866 RMSE: 36.1915 Running time: 2 seconds
Scenario setting:
import scipy.io
import numpy as np
np.random.seed(1000)
dense_tensor = scipy.io.loadmat('../datasets/Hangzhou-data-set/tensor.mat')['tensor']
dim = dense_tensor.shape
missing_rate = 0.4 # Non-random missing (NM)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim[0], dim[1])[:, :, np.newaxis] + 0.5 - missing_rate)
Model setting:
import time
start = time.time()
alpha = np.ones(3) / 3
rho = 1e-5
epsilon = 1e-4
maxiter = 200
tensor_hat = HaLRTC_imputer(dense_tensor, sparse_tensor, alpha, rho, epsilon, maxiter)
end = time.time()
print('Running time: %d seconds'%(end - start))
Total iteration: 46 Tolerance: 7.6348e-05 MAPE: 0.214628 RMSE: 53.1454 Running time: 2 seconds
Scenario setting:
import scipy.io
import numpy as np
np.random.seed(1000)
dense_tensor = scipy.io.loadmat('../datasets/Seattle-data-set/tensor.npz')['arr_0']
dim = dense_tensor.shape
missing_rate = 0.4 # Random missing (RM)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim[0], dim[1], dim[2]) + 0.5 - missing_rate)
Model setting:
import time
start = time.time()
alpha = np.ones(3) / 3
rho = 1e-5
epsilon = 1e-4
maxiter = 200
tensor_hat = HaLRTC_imputer(dense_tensor, sparse_tensor, alpha, rho, epsilon, maxiter)
end = time.time()
print('Running time: %d seconds'%(end - start))
Iter: 50 Tolerance: 0.00163238 MAPE: 0.0657551 RMSE: 3.75497 Total iteration: 61 Tolerance: 7.7105e-05 MAPE: 0.0675857 RMSE: 3.83403 Running time: 46 seconds
Scenario setting:
import scipy.io
import numpy as np
np.random.seed(1000)
dense_tensor = scipy.io.loadmat('../datasets/Seattle-data-set/tensor.npz')['arr_0']
dim = dense_tensor.shape
missing_rate = 0.6 # Random missing (RM)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim[0], dim[1], dim[2]) + 0.5 - missing_rate)
Model setting:
import time
start = time.time()
alpha = np.ones(3) / 3
rho = 1e-5
epsilon = 1e-4
maxiter = 200
tensor_hat = HaLRTC_imputer(dense_tensor, sparse_tensor, alpha, rho, epsilon, maxiter)
end = time.time()
print('Running time: %d seconds'%(end - start))
Iter: 50 Tolerance: 0.00247575 MAPE: 0.0767219 RMSE: 4.23934 Total iteration: 64 Tolerance: 7.80975e-05 MAPE: 0.079017 RMSE: 4.33587 Running time: 46 seconds
Scenario setting:
import scipy.io
import numpy as np
np.random.seed(1000)
dense_tensor = scipy.io.loadmat('../datasets/Seattle-data-set/tensor.npz')['arr_0']
dim = dense_tensor.shape
missing_rate = 0.4 # Non-random missing (NM)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim[0], dim[1])[:, :, np.newaxis] + 0.5 - missing_rate)
Model setting:
import time
start = time.time()
alpha = np.ones(3) / 3
rho = 1e-5
epsilon = 1e-4
maxiter = 200
tensor_hat = HaLRTC_imputer(dense_tensor, sparse_tensor, alpha, rho, epsilon, maxiter)
end = time.time()
print('Running time: %d seconds'%(end - start))
Iter: 50 Tolerance: 0.00120819 MAPE: 0.100643 RMSE: 5.22702 Total iteration: 64 Tolerance: 9.95834e-05 MAPE: 0.101906 RMSE: 5.27444 Running time: 45 seconds
Scenario setting:
import numpy as np
np.random.seed(1000)
missing_rate = 0.4
dense_mat = np.load('../datasets/London-data-set/hourly_speed_mat.npy')
binary_mat = dense_mat.copy()
binary_mat[binary_mat != 0] = 1
pos = np.where(np.sum(binary_mat, axis = 1) > 0.7 * binary_mat.shape[1])
dense_mat = dense_mat[pos[0], :]
## Random missing (RM)
random_mat = np.random.rand(dense_mat.shape[0], dense_mat.shape[1])
binary_mat = np.round(random_mat + 0.5 - missing_rate)
sparse_mat = np.multiply(dense_mat, binary_mat)
dense_tensor = dense_mat.reshape([dense_mat.shape[0], 30, 24])
sparse_tensor = sparse_mat.reshape([sparse_mat.shape[0], 30, 24])
del dense_mat, sparse_mat
Model setting:
import time
start = time.time()
alpha = np.ones(3) / 3
rho = 1e-5
epsilon = 1e-4
maxiter = 200
tensor_hat = HaLRTC_imputer(dense_tensor, sparse_tensor, alpha, rho, epsilon, maxiter)
end = time.time()
print('Running time: %d seconds'%(end - start))
Iter: 50 Tolerance: 0.00111649 MAPE: 0.0907538 RMSE: 2.16498 Total iteration: 60 Tolerance: 9.10963e-05 MAPE: 0.091131 RMSE: 2.17446 Running time: 776 seconds
Scenario setting:
import numpy as np
np.random.seed(1000)
missing_rate = 0.6
dense_mat = np.load('../datasets/London-data-set/hourly_speed_mat.npy')
binary_mat = dense_mat.copy()
binary_mat[binary_mat != 0] = 1
pos = np.where(np.sum(binary_mat, axis = 1) > 0.7 * binary_mat.shape[1])
dense_mat = dense_mat[pos[0], :]
## Random missing (RM)
random_mat = np.random.rand(dense_mat.shape[0], dense_mat.shape[1])
binary_mat = np.round(random_mat + 0.5 - missing_rate)
sparse_mat = np.multiply(dense_mat, binary_mat)
dense_tensor = dense_mat.reshape([dense_mat.shape[0], 30, 24])
sparse_tensor = sparse_mat.reshape([sparse_mat.shape[0], 30, 24])
del dense_mat, sparse_mat
Model setting:
import time
start = time.time()
alpha = np.ones(3) / 3
rho = 1e-5
epsilon = 1e-4
maxiter = 200
tensor_hat = HaLRTC_imputer(dense_tensor, sparse_tensor, alpha, rho, epsilon, maxiter)
end = time.time()
print('Running time: %d seconds'%(end - start))
Iter: 50 Tolerance: 0.00241757 MAPE: 0.0942166 RMSE: 2.25369 Total iteration: 66 Tolerance: 8.58548e-05 MAPE: 0.0950641 RMSE: 2.27341 Running time: 891 seconds
Scenario setting:
import numpy as np
np.random.seed(1000)
missing_rate = 0.4
dense_mat = np.load('../datasets/London-data-set/hourly_speed_mat.npy')
binary_mat = dense_mat.copy()
binary_mat[binary_mat != 0] = 1
pos = np.where(np.sum(binary_mat, axis = 1) > 0.7 * binary_mat.shape[1])
dense_mat = dense_mat[pos[0], :]
## Non-random missing (NM)
binary_mat = np.zeros(dense_mat.shape)
random_mat = np.random.rand(dense_mat.shape[0], 30)
for i1 in range(dense_mat.shape[0]):
for i2 in range(30):
binary_mat[i1, i2 * 24 : (i2 + 1) * 24] = np.round(random_mat[i1, i2] + 0.5 - missing_rate)
sparse_mat = np.multiply(dense_mat, binary_mat)
dense_tensor = dense_mat.reshape([dense_mat.shape[0], 30, 24])
sparse_tensor = sparse_mat.reshape([sparse_mat.shape[0], 30, 24])
del dense_mat, sparse_mat
Model setting:
import time
start = time.time()
alpha = np.ones(3) / 3
rho = 1e-5
epsilon = 1e-4
maxiter = 200
tensor_hat = HaLRTC_imputer(dense_tensor, sparse_tensor, alpha, rho, epsilon, maxiter)
end = time.time()
print('Running time: %d seconds'%(end - start))
Iter: 50 Tolerance: 0.00103334 MAPE: 0.0982027 RMSE: 2.34015 Total iteration: 59 Tolerance: 9.23072e-05 MAPE: 0.0983797 RMSE: 2.34535 Running time: 759 seconds
Scenario setting:
import scipy.io
import numpy as np
np.random.seed(1000)
dense_tensor = scipy.io.loadmat('../datasets/NYC-data-set/tensor.mat')['tensor'].astype(np.float32)
dim = dense_tensor.shape
missing_rate = 0.4 # Random missing (RM)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim[0], dim[1], dim[2]) + 0.5 - missing_rate)
Model setting:
import time
start = time.time()
alpha = np.ones(3) / 3
rho = 1e-4
epsilon = 1e-4
maxiter = 200
tensor_hat = HaLRTC_imputer(dense_tensor, sparse_tensor, alpha, rho, epsilon, maxiter)
end = time.time()
print('Running time: %d seconds'%(end - start))
Iter: 50 Tolerance: 0.00122498 MAPE: 0.504978 RMSE: 6.84282 Total iteration: 87 Tolerance: 9.28379e-05 MAPE: 0.509911 RMSE: 6.84419 Running time: 54 seconds
Scenario setting:
import scipy.io
import numpy as np
np.random.seed(1000)
dense_tensor = scipy.io.loadmat('../datasets/NYC-data-set/tensor.mat')['tensor'].astype(np.float32)
dim = dense_tensor.shape
missing_rate = 0.6 # Random missing (RM)
sparse_tensor = dense_tensor * np.round(np.random.rand(dim[0], dim[1], dim[2]) + 0.5 - missing_rate)
Model setting:
import time
start = time.time()
alpha = np.ones(3) / 3
rho = 1e-4
epsilon = 1e-4
maxiter = 200
tensor_hat = HaLRTC_imputer(dense_tensor, sparse_tensor, alpha, rho, epsilon, maxiter)
end = time.time()
print('Running time: %d seconds'%(end - start))
Iter: 50 Tolerance: 0.00150918 MAPE: 0.515299 RMSE: 8.12735 Total iteration: 89 Tolerance: 9.40833e-05 MAPE: 0.520701 RMSE: 8.12889 Running time: 48 seconds
Scenario setting:
import scipy.io
import numpy as np
np.random.seed(1000)
dense_tensor = scipy.io.loadmat('../datasets/NYC-data-set/tensor.mat')['tensor']
dim = dense_tensor.shape
nm_tensor = np.random.rand(dim[0], dim[1], dim[2])
missing_rate = 0.4 # Non-random missing (NM)
binary_tensor = np.zeros(dense_tensor.shape)
for i1 in range(dim[0]):
for i2 in range(dim[1]):
for i3 in range(61):
binary_tensor[i1, i2, i3 * 24 : (i3 + 1) * 24] = np.round(nm_tensor[i1, i2, i3] + 0.5 - missing_rate)
sparse_tensor = dense_tensor * binary_tensor
Model setting:
import time
start = time.time()
alpha = np.ones(3) / 3
rho = 1e-4
epsilon = 1e-4
maxiter = 200
tensor_hat = HaLRTC_imputer(dense_tensor, sparse_tensor, alpha, rho, epsilon, maxiter)
end = time.time()
print('Running time: %d seconds'%(end - start))
Iter: 50 Tolerance: 0.00121294 MAPE: 0.510509 RMSE: 7.0283 Total iteration: 87 Tolerance: 9.4833e-05 MAPE: 0.515144 RMSE: 7.03015 Running time: 48 seconds
Scenario setting:
import numpy as np
np.random.seed(1000)
dense_tensor = np.load('../datasets/Temperature-data-set/tensor.npy').astype(np.float32)
pos = np.where(dense_tensor[:, 0, :] > 50)
dense_tensor[pos[0], :, pos[1]] = 0
random_tensor = np.random.rand(dense_tensor.shape[0], dense_tensor.shape[1], dense_tensor.shape[2])
missing_rate = 0.4
## Random missing (RM)
binary_tensor = np.round(random_tensor + 0.5 - missing_rate)
sparse_tensor = dense_tensor.copy()
sparse_tensor[binary_tensor == 0] = np.nan
sparse_tensor[sparse_tensor == 0] = np.nan
Model setting:
import time
start = time.time()
alpha = np.ones(3) / 3
rho = 1e-4
epsilon = 1e-4
maxiter = 200
tensor_hat = HaLRTC_imputer(dense_tensor, sparse_tensor, alpha, rho, epsilon, maxiter)
end = time.time()
print('Running time: %d seconds'%(end - start))
Iter: 50 Tolerance: 0.000424517 MAPE: 0.00663588 RMSE: 0.23659 Total iteration: 78 Tolerance: 9.14475e-05 MAPE: 0.00670003 RMSE: 0.236096 Running time: 24 seconds
Scenario setting:
import numpy as np
np.random.seed(1000)
dense_tensor = np.load('../datasets/Temperature-data-set/tensor.npy').astype(np.float32)
pos = np.where(dense_tensor[:, 0, :] > 50)
dense_tensor[pos[0], :, pos[1]] = 0
random_tensor = np.random.rand(dense_tensor.shape[0], dense_tensor.shape[1], dense_tensor.shape[2])
missing_rate = 0.6
## Random missing (RM)
binary_tensor = np.round(random_tensor + 0.5 - missing_rate)
sparse_tensor = dense_tensor.copy()
sparse_tensor[binary_tensor == 0] = np.nan
sparse_tensor[sparse_tensor == 0] = np.nan
Model setting:
import time
start = time.time()
alpha = np.ones(3) / 3
rho = 1e-4
epsilon = 1e-4
maxiter = 200
tensor_hat = HaLRTC_imputer(dense_tensor, sparse_tensor, alpha, rho, epsilon, maxiter)
end = time.time()
print('Running time: %d seconds'%(end - start))
Iter: 50 Tolerance: 0.000666835 MAPE: 0.00954841 RMSE: 0.338751 Total iteration: 78 Tolerance: 9.55115e-05 MAPE: 0.00977159 RMSE: 0.345411 Running time: 24 seconds
Scenario setting:
import numpy as np
np.random.seed(1000)
dense_tensor = np.load('../datasets/Temperature-data-set/tensor.npy').astype(np.float32)
pos = np.where(dense_tensor[:, 0, :] > 50)
dense_tensor[pos[0], :, pos[1]] = 0
random_tensor = np.random.rand(dense_tensor.shape[0], dense_tensor.shape[1], int(dense_tensor.shape[2] / 3))
missing_rate = 0.4
## Non-random missing (NM)
binary_tensor = np.zeros(dense_tensor.shape)
for i1 in range(dense_tensor.shape[0]):
for i2 in range(dense_tensor.shape[1]):
for i3 in range(int(dense_tensor.shape[2] / 3)):
binary_tensor[i1, i2, i3 * 3 : (i3 + 1) * 3] = np.round(random_tensor[i1, i2, i3] + 0.5 - missing_rate)
sparse_tensor = dense_tensor.copy()
sparse_tensor[binary_tensor == 0] = np.nan
sparse_tensor[sparse_tensor == 0] = np.nan
Model setting:
import time
start = time.time()
alpha = np.ones(3) / 3
rho = 1e-4
epsilon = 1e-4
maxiter = 200
tensor_hat = HaLRTC_imputer(dense_tensor, sparse_tensor, alpha, rho, epsilon, maxiter)
end = time.time()
print('Running time: %d seconds'%(end - start))
Iter: 50 Tolerance: 0.000423441 MAPE: 0.00701627 RMSE: 0.249312 Total iteration: 78 Tolerance: 9.44678e-05 MAPE: 0.00724445 RMSE: 0.252975 Running time: 24 seconds