import numpy as np
def compute_mape(var, var_hat):
return np.sum(np.abs(var - var_hat) / var) / var.shape[0]
def compute_rmse(var, var_hat):
return np.sqrt(np.sum((var - var_hat) ** 2) / var.shape[0])
def laplacian(n, tau):
ell = np.zeros(n)
ell[0] = 2 * tau
for k in range(tau):
ell[k + 1] = -1
ell[-k - 1] = -1
return ell
def prox(z, w, lmbda, denominator):
T = z.shape[0]
temp = np.fft.fft(lmbda * z - w) / denominator
temp1 = 1 - T / (lmbda * np.abs(temp))
temp1[temp1 <= 0] = 0
return np.fft.ifft(temp * temp1).real
def update_z(y_train, pos_train, x, w, lmbda, eta):
z = x + w / lmbda
z[pos_train] = (lmbda / (lmbda + eta) * z[pos_train]
+ eta / (lmbda + eta) * y_train)
return z
def update_w(x, z, w, lmbda):
return w + lmbda * (x - z)
def LCR(y_true, y, lmbda, gamma, tau, maxiter = 50):
eta = 100 * lmbda
T = y.shape
pos_train = np.where(y != 0)
y_train = y[pos_train]
pos_test = np.where((y_true != 0) & (y == 0))
y_test = y_true[pos_test]
z = y.copy()
w = y.copy()
denominator = lmbda + gamma * np.fft.fft(laplacian(T, tau)) ** 2
del y_true, y
show_iter = 10
for it in range(maxiter):
x = prox(z, w, lmbda, denominator)
z = update_z(y_train, pos_train, x, w, lmbda, eta)
w = update_w(x, z, w, lmbda)
if (it + 1) % show_iter == 0:
print(it + 1)
print(compute_mape(y_test, x[pos_test]))
print(compute_rmse(y_test, x[pos_test]))
print()
return x
import numpy as np
np.random.seed(1000)
dense_mat = np.load('../datasets/California-data-set/pems-w1.npz')['arr_0']
for t in range(2, 5):
dense_mat = np.append(dense_mat, np.load('../datasets/California-data-set/pems-w{}.npz'.format(t))['arr_0'],
axis = 1)
dim1, dim2 = dense_mat.shape
missing_rate = 0.3
sparse_mat = dense_mat * np.round(np.random.rand(dim1, dim2) + 0.5 - missing_rate)
dense_y = dense_mat.reshape(-1, order = 'C')
sparse_y = sparse_mat.reshape(-1, order = 'C')
del dense_mat, sparse_mat
import time
start = time.time()
T = sparse_y.shape[0]
lmbda = 5e-6 * T
gamma = 5 * lmbda
tau = 1
maxiter = 100
x = LCR(dense_y, sparse_y, lmbda, gamma, tau, maxiter)
end = time.time()
print('Running time: %d seconds.'%(end - start))
10 0.021298833688882 1.852869038818841 20 0.017948783075887268 1.6577517011378626 30 0.017114682668579725 1.6092986944925582 40 0.01671932524943785 1.5863398179810466 50 0.016481650254625495 1.5726194152070325 60 0.016321147647357398 1.563458923784471 70 0.01620372129667088 1.556882966067288 80 0.016114316636935536 1.5519306373935904 90 0.016044170575042718 1.5480913071124722 100 0.015988652943412708 1.545068696492713 Running time: 2145 seconds.
import numpy as np
np.random.seed(1000)
dense_mat = np.load('../datasets/California-data-set/pems-w1.npz')['arr_0']
for t in range(2, 5):
dense_mat = np.append(dense_mat, np.load('../datasets/California-data-set/pems-w{}.npz'.format(t))['arr_0'],
axis = 1)
dim1, dim2 = dense_mat.shape
missing_rate = 0.5
sparse_mat = dense_mat * np.round(np.random.rand(dim1, dim2) + 0.5 - missing_rate)
dense_y = dense_mat.reshape(-1, order = 'C')
sparse_y = sparse_mat.reshape(-1, order = 'C')
del dense_mat, sparse_mat
import time
start = time.time()
T = sparse_y.shape[0]
lmbda = 5e-6 * T
gamma = 5 * lmbda
tau = 1
maxiter = 100
x = LCR(dense_y, sparse_y, lmbda, gamma, tau, maxiter)
end = time.time()
print('Running time: %d seconds.'%(end - start))
10 0.03205266864755385 2.526811037440527 20 0.020649498260034923 1.8508015663813164 30 0.01940574128550898 1.7871697951431336 40 0.018907996821007276 1.7630546251803738 50 0.018645027703456687 1.7506494428170742 60 0.018484805148232853 1.7432115205832388 70 0.018375600450113792 1.7381723089706416 80 0.018296552342510687 1.7345568525579949 90 0.01823630066557636 1.731779686149809 100 0.018188937933787473 1.7296167765785124 Running time: 2128 seconds.
import numpy as np
np.random.seed(1000)
dense_mat = np.load('../datasets/California-data-set/pems-w1.npz')['arr_0']
for t in range(2, 5):
dense_mat = np.append(dense_mat, np.load('../datasets/California-data-set/pems-w{}.npz'.format(t))['arr_0'],
axis = 1)
dim1, dim2 = dense_mat.shape
missing_rate = 0.7
sparse_mat = dense_mat * np.round(np.random.rand(dim1, dim2) + 0.5 - missing_rate)
dense_y = dense_mat.reshape(-1, order = 'C')
sparse_y = sparse_mat.reshape(-1, order = 'C')
del dense_mat, sparse_mat
import time
start = time.time()
T = sparse_y.shape[0]
lmbda = 5e-6 * T
gamma = 5 * lmbda
tau = 1
maxiter = 100
x = LCR(dense_y, sparse_y, lmbda, gamma, tau, maxiter)
end = time.time()
print('Running time: %d seconds.'%(end - start))
10 0.059642983892190415 4.355813604377682 20 0.02992763476694469 2.4474182835362432 30 0.024791492864814007 2.1726790058834635 40 0.023762968708682115 2.1273407746980464 50 0.023379944819814615 2.112732710483295 60 0.023185447502839084 2.105694987693853 70 0.023073197796444175 2.1020125743685907 80 0.023000050147720174 2.099657321145785 90 0.02295082541491914 2.0981183684572837 100 0.02291409240026881 2.096966697436104 Running time: 2116 seconds.
import numpy as np
np.random.seed(1000)
dense_mat = np.load('../datasets/California-data-set/pems-w1.npz')['arr_0']
for t in range(2, 5):
dense_mat = np.append(dense_mat, np.load('../datasets/California-data-set/pems-w{}.npz'.format(t))['arr_0'],
axis = 1)
dim1, dim2 = dense_mat.shape
missing_rate = 0.9
sparse_mat = dense_mat * np.round(np.random.rand(dim1, dim2) + 0.5 - missing_rate)
dense_y = dense_mat.reshape(-1, order = 'C')
sparse_y = sparse_mat.reshape(-1, order = 'C')
del dense_mat, sparse_mat
import time
start = time.time()
T = sparse_y.shape[0]
lmbda = 5e-6 * T
gamma = 5 * lmbda
tau = 1
maxiter = 100
x = LCR(dense_y, sparse_y, lmbda, gamma, tau, maxiter)
end = time.time()
print('Running time: %d seconds.'%(end - start))
10 0.16680356645498787 11.496445382941689 20 0.06333774897527557 4.611538576003319 30 0.04873960322905492 3.66901067254967 40 0.04249083199551347 3.3643225678770525 50 0.040315592284164425 3.2592584203432566 60 0.039399367029591396 3.2169776061586437 70 0.03898036501397407 3.1994176232162332 80 0.038772536047528894 3.1911225871906406 90 0.03865151926864036 3.186191345330333 100 0.038580167465265505 3.1832566522439194 Running time: 2120 seconds.