import numpy as np
from scipy.spatial.distance import cdist
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import NearestNeighbors as skNearestNeighbors
class NearestNeighbors():
def __init__(self, n_neighbors=5, radius=1.0):
self.n_neighbors = n_neighbors
self.radius = radius
def fit(self, X):
self._fit_X = X
return self
def kneighbors(self, X, n_neighbors=None):
if n_neighbors is None:
n_neighbors = self.n_neighbors
dist_mat = cdist(X, self._fit_X)
neigh_ind = np.argsort(dist_mat, axis=1)[:, :n_neighbors]
dist = dist_mat[np.arange(dist_mat.shape[0])[:, np.newaxis], neigh_ind]
return dist, neigh_ind
def radius_neighbors(self, X, radius=None):
if radius is None:
radius = self.radius
dist_mat = cdist(X, self._fit_X)
neigh_ind_list = [np.where(d <= radius)[0] for d in dist_mat]
dist_list = [d[neigh_ind_list[i]] for i, d in enumerate(dist_mat)]
dist = np.empty(len(dist_list), dtype='object')
dist[:] = dist_list
neigh_ind = np.empty(len(neigh_ind_list), dtype='object')
neigh_ind[:] = neigh_ind_list
return dist, neigh_ind
X, _ = load_breast_cancer(return_X_y=True)
X = StandardScaler().fit_transform(X)
neigh1 = NearestNeighbors().fit(X)
neigh2 = skNearestNeighbors().fit(X)
dist1, neigh_ind1 = neigh1.kneighbors(X)
dist2, neigh_ind2 = neigh2.kneighbors(X)
assert np.allclose(dist1, dist2)
assert np.array_equal(neigh_ind1, neigh_ind2)
dist1, neigh_ind1 = neigh1.radius_neighbors(X, radius=5)
dist2, neigh_ind2 = neigh2.radius_neighbors(X, radius=5)
for d1, d2, n1, n2 in zip(dist1, dist2, neigh_ind1, neigh_ind2):
ind1 = np.argsort(d1)
ind2 = np.argsort(d2)
assert np.allclose(d1[ind1], d2[ind2])
assert np.array_equal(n1[ind1], n2[ind2])