# Importing Libraries
import numpy as np
import pandas as pd
import random,math
import matplotlib.pyplot as plt
# Generating random numbers between zero and one from uniform
# distribution with the given dimension
def random_point_gen(dimension):
return [random.random() for _ in range(dimension)]
# Root mean sum of squares of Euclidean distances (2-norm) between points
def distance(a,b):
diff = [a_i-b_i for a_i,b_i in zip(a,b)]
sum_of_sqrs = sum(a_i**2 for a_i in diff)
return math.sqrt(sum_of_sqrs)
# Calculating the distances
def random_distances_comparison(dimension,number_pairs):
return [distance(random_point_gen(dimension),random_point_gen(dimension))
for _ in range(number_pairs)]
def mean(x):
return sum(x) / len(x)
# Dimensions in range of 1 to 200 with interval of 5
dimensions = range(1, 201, 5)
avg_distances = []
dummy = np.empty((20,2))
dist = pd.DataFrame(dummy)
dist.columns = ["Dimension","Avg_Distance"]
random.seed(34)
i = 0
for dims in dimensions:
distances = random_distances_comparison(dims, 1000)
avg_distances.append(mean(distances))
dist.loc[i,"Dimension"] = dims
dist.loc[i,"Avg_Distance"] = mean(distances)
print(dims,mean(distances))
i = i+1
1 0.3304377974569266 6 0.9718366726979907 11 1.3428297800724869 16 1.5916756747360146 21 1.8606501665486392 26 2.05939278034058 31 2.2575232751731726 36 2.4368760200852035 41 2.620080307069328 46 2.7625945084356123 51 2.89338869628594 56 3.04103478287079 61 3.1837540168385114 66 3.2946307081057804 71 3.4335738927751285 76 3.5609023888328744 81 3.6705963579722676 86 3.7747621910195295 91 3.885202293008529 96 3.9871994289025343 101 4.091623861134935 106 4.194797436760151 111 4.299946953926221 116 4.392107797538249 121 4.500000117577843 126 4.569193064205523 131 4.670386854961063 136 4.743869381556361 141 4.8403658613650995 146 4.916156206138143 151 5.011904217530445 156 5.097331764593295 161 5.15738876580636 166 5.251041104914 171 5.325070508861497 176 5.422322753425231 181 5.48791272897287 186 5.565523375530461 191 5.6312761071627975 196 5.715461971073279
plt.plot(dist["Dimension"],dist["Avg_Distance"])
plt.title("Average Distance with Number of Dimensions for 1k Observations")
plt.xlabel('Dimensions')
plt.ylabel('Avg. Distance')
plt.legend(loc='best')
plt.show();