#Calculates the distance between two series. Given series A, B returns the Euclidean distance between A and B
def distance(a, b):
return np.sqrt(np.sum((a - b)**2))
#The probability is converted according to the sorted distances, which adds up to 1
def distopro(a):
a=len(a)
if(a==3):
b=[0.2,0.3,0.5]
elif(a==4):
b=[0.1,0.2,0.3,0.4]
else:
b=[0.04,0.12,0.2,0.28,0.36]
return np.array(b)
#Input is the original data matrix, return is the relationship matrix relation_matrix, and probability matrix probability_matrix
#Data is the matrix of series, the first dimension is the number of series, and the second dimension is each series
#Window_size is the size of the window to calculate the distance, and k is the number of the nearest neighbors selected. Currently, 3,4,5 are supported
def transform(data, window_size, k):
numOfSeq=data.shape[0]
distance_matrix=np.ones([numOfSeq,numOfSeq],dtype = float)
for i in range(numOfSeq):
for j in range(numOfSeq):
distance_matrix[i][j]=distance(data[i,data.shape[1]-window_size:],data[j,0:window_size])
relation_matrix=np.ones([numOfSeq,k],dtype = int)
subdistance_matrix=np.ones([numOfSeq,k],dtype = float)
probability_matrix=np.ones([numOfSeq,k],dtype = float)
for i in range(numOfSeq):
relation_matrix[i]=distance_matrix[i].argsort()[::-1][data.shape[1]-k:]
#print(relation_matrix[i])
for i in range(numOfSeq):
for j in range(k):
subdistance_matrix[i][j]=distance_matrix[i][relation_matrix[i][j]]
for i in range(numOfSeq):
probability_matrix[i]=distopro(subdistance_matrix[i])
return distance_matrix, subdistance_matrix ,relation_matrix, probability_matrix
#print(transform(np.array([[0,1,1,1],[1,0,1,1],[1,1,0,1],[1,1,1,0]]), 2, 3))
#Given the ID of the current series, the ID of the next series is generated randomly according to probability
def next_step(relation_array, probability_array):
value=random.random()
print(value)
threshold=[0]
sum_value=0
for i in range(len(probability_array)):
sum_value=sum_value+probability_array[i]
threshold.append(sum_value)
for i in range(len(threshold)-1):
if(value>threshold[i] and value<=threshold[i+1]):
return relation_array[i]
#Given a relation matrix and a probability matrix, returns a series of length
def random_walk(relation_matrix, probability_matrix, length):
#seq=[0]
temp_id=0
for i in range(length-1):
temp_id=next_step(relation_matrix[temp_id],probability_matrix[temp_id])
seq.append(temp_id)
#print(temp_id)
return np.array(seq)
a,b,c,d=transform(data, 100, 5)
print(random_walk( c, d, 10))