import numpy as np
from scipy.stats import mode
from scipy.spatial.distance import cdist
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier as skKNeighborsClassifier
class KNeighborsClassifier():
def __init__(self, n_neighbors=5):
self.n_neighbors = n_neighbors
def fit(self, X, y):
self._fit_X = X
self.classes_, self._y = np.unique(y, return_inverse=True)
return self
def predict(self, X):
dist_mat = cdist(X, self._fit_X)
neigh_ind = np.argsort(dist_mat, axis=1)[:, :self.n_neighbors]
return self.classes_[mode(self._y[neigh_ind], axis=1)[0].ravel()]
def predict_proba(self, X):
dist_mat = cdist(X, self._fit_X)
neigh_ind = np.argsort(dist_mat, axis=1)[:, :self.n_neighbors]
proba = np.zeros((X.shape[0], len(self.classes_)))
pred_labels = self._y[neigh_ind]
for idx in pred_labels.T:
proba[np.arange(X.shape[0]), idx] += 1
proba /= np.sum(proba, axis=1)[:, np.newaxis]
return proba
X, y = load_breast_cancer(return_X_y=True)
X = StandardScaler().fit_transform(X)
clf1 = KNeighborsClassifier().fit(X, y)
clf2 = skKNeighborsClassifier().fit(X, y)
prob1 = clf1.predict_proba(X)
prob2 = clf2.predict_proba(X)
assert np.allclose(prob1, prob2)
pred1 = clf1.predict(X)
pred2 = clf2.predict(X)
assert np.array_equal(pred1, pred2)