In [7]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

Comparing volumes of hyperspheres

Here we study high-dimensional stuff...

$$ v = \frac{4}{3} \pi r^3 $$
In [2]:
def vol_ratio(epsilon, dimension):
    return 1 - (1 - epsilon)**dimension
In [3]:
vol_ratio(0.1, 2)
Out[3]:
0.18999999999999995
In [4]:
vol_ratio(0.1, 20)
Out[4]:
0.8784233454094307
In [6]:
dims = [1, 2, 5, 10, 20, 100, 1000]
ratios = []
for dim in dims:
    ratios.append(vol_ratio(0.1, dim))
ratios
Out[6]:
[0.09999999999999998,
 0.18999999999999995,
 0.40950999999999993,
 0.6513215599,
 0.8784233454094307,
 0.9999734386011124,
 1.0]
In [8]:
dims = np.array([1, 2, 5, 10, 20, 100, 1000])
dims
Out[8]:
array([   1,    2,    5,   10,   20,  100, 1000])
In [9]:
ratios = vol_ratio(0.1, dims)
ratios
Out[9]:
array([0.1       , 0.19      , 0.40951   , 0.65132156, 0.87842335,
       0.99997344, 1.        ])
In [14]:
dims = np.arange(1, 1000)
plt.plot(dims, vol_ratio(0.01, dims))
Out[14]:
[<matplotlib.lines.Line2D at 0x7f6716e7e438>]

Consider distribution of randomly generated points in high-d spaces

In [22]:
np.random.uniform(-1,1)
Out[22]:
-0.2945850556693055
In [62]:
dim = 5000
n_samples = 10000
data = np.random.uniform(-1, 1, size=(n_samples, dim))
dists = np.sqrt(np.sum(data**2, axis=1))
dists = dists / np.sqrt(dim)
In [63]:
plt.hist(dists, 100)
plt.xlim(0, 1)
plt.grid(True);
In [27]:
plt.plot(data[:, 0], data[:, 1], 'o')
Out[27]:
[<matplotlib.lines.Line2D at 0x7f67171d8128>]
In [69]:
plt.figure(figsize=(12, 12))
n_samples = 10000
for (k, dim) in enumerate(np.arange(1, 25 *10 + 1, 10)):
    data = np.random.uniform(-1, 1, size=(n_samples, dim))
    dists = np.sqrt(np.sum(data**2, axis=1))
    dists = dists / np.sqrt(dim)
    # print(k)
    plt.subplot(5, 5, k + 1)
    plt.hist(dists, 100)
    plt.xlim(0, 1)    # plt.pause(0.1)
    plt.grid(True)
    plt.title('Dim = {}'.format(dim))
plt.tight_layout()
In [ ]: