class DBSCAN():
def __init__(self, eps=0.5, min_samples=5):
self.eps = eps
self.min_samples = min_samples
def fit(self, X):
dist = squareform(pdist(X))
neighbors = np.array([np.where(d <= self.eps)[0] for d in dist])
n_neighbors = np.array([len(neighbor) for neighbor in neighbors])
core_samples = n_neighbors >= self.min_samples
labels = np.full(X.shape[0], -1)
label_num = 0
stack = []
for i in range(len(labels)):
if labels[i] != -1 or not core_samples[i]:
continue
stack.append(i)
while len(stack):
cur = stack.pop()
if labels[cur] == -1:
labels[cur] = label_num
if core_samples[cur]:
for neighbor in neighbors[cur]:
if labels[neighbor] == -1 and neighbor not in stack:
stack.append(neighbor)
label_num += 1
self.core_sample_indices_ = np.where(core_samples)[0]
self.labels_= labels
return self
def fit_predict(self, X):
self.fit(X)
return self.labels_