#!/usr/bin/env python # coding: utf-8 #

# Iris Dataset'ini Yığınsal Kümeleme Algoritması ile Kümelere Ayırma

# #

# In[1]: import numpy as np import matplotlib.pyplot as plt import pandas as pd from scipy.cluster.hierarchy import dendrogram,linkage from sklearn.cluster import AgglomerativeClustering get_ipython().run_line_magic('matplotlib', 'inline') #

# Gerekli Dosyanın Yüklenmesi

# #

# In[2]: data = pd.read_csv('C:\\Users\\ceakn\\Desktop\\iris.csv') # In[3]: data.head() # Taçyaprak ve çanakyaprak uzunluklarına göre türünü gösteren bir tablomuz var. #

# Boş değer olup olmadığını kontrol edelim. #

#

# # In[4]: data.isnull().any(axis=1).sum() #

# Kullanılacak özellikleri seçelim. #

#

# In[5]: data.columns # In[6]: features = ['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm'] # In[7]: X = data[features].copy() # In[8]: X.head() # In[9]: X = X.values # In[10]: type(X) #

# SciPy ile Dendrogramın Çizdirilmesi #

#

# In[11]: plt.figure(figsize=(25,12)) #from scipy.spatial.distance import pdist, squareform dendo = dendrogram(linkage(X, method='ward')) # In[12]: agg = AgglomerativeClustering(n_clusters=3, linkage='ward') iris_predict = agg.fit_predict(X) # In[13]: plt.figure(figsize=(25,10)) color = ['red','blue','yellow'] for i in range(3): plt.scatter(X[iris_predict == i, 0], X[iris_predict == i, 1], s = 100, c = color[i], label = 'Cluster - {:d}'.format(i+1)) plt.legend() plt.show()