#!/usr/bin/env python # coding: utf-8 # In[1]: import pandas as pd znaczki = pd.read_excel("../input/wykaz-zt-polski.xls", skiprows = 4) znaczki.rename(columns={"GPS":"lat", "Unnamed: 5":"lon"}, inplace=True) znaczki["lat"] = znaczki["lat"].astype(float) znaczki["lon"] = znaczki["lon"].astype(float) # In[2]: from sklearn.cluster import KMeans n_clusters = 12 kmeans = KMeans(n_clusters = n_clusters, random_state = 42) coordinates = znaczki[["lat", "lon"]] kmeans.fit(coordinates) znaczki["grupa"] = kmeans.labels_ znaczki.head() # In[13]: # kod 1 from sklearn.metrics import silhouette_score silhouette_score(X = coordinates, labels = znaczki["grupa"]) # In[50]: # kod 2 silhouette_averages = [] for ilosc_grup in range(2,len(znaczki)): n_clusters = ilosc_grup kmeans = KMeans(n_clusters = n_clusters, random_state = 42) kmeans.fit(coordinates) znaczki["grupa"] = kmeans.labels_ sil_sco = silhouette_score(X = coordinates, labels = znaczki["grupa"]) silhouette_averages.append(sil_sco) if ilosc_grup % 10 == 0: print(ilosc_grup,sil_sco) # In[51]: import pylab # In[52]: pylab.plot(range(2,len(silhouette_averages)+2),silhouette_averages, ) # In[53]: import operator min_index, min_value = min(enumerate(silhouette_averages), key=operator.itemgetter(1)) max_index, max_value = max(enumerate(silhouette_averages), key=operator.itemgetter(1)) print(min_index+2, min_value) print(max_index+2, max_value) # In[54]: len(znaczki) # In[ ]: