#!/usr/bin/env python # coding: utf-8 # #MULTI CLASS CLASSIFICATION # In[1]: # Data Imports import numpy as np import pandas as pd from pandas import Series,DataFrame # Plot imports import matplotlib.pyplot as plt import seaborn as sns sns.set_style('whitegrid') get_ipython().run_line_magic('matplotlib', 'inline') # In[2]: from sklearn import linear_model from sklearn.datasets import load_iris # In[3]: iris = load_iris() # In[4]: X = iris.data Y = iris.target # In[5]: print iris.DESCR # In[6]: iris_data = DataFrame(X,columns=['Sepal Length','Sepal Width','Petal Length','Petal Width']) # In[7]: iris_target = DataFrame(Y,columns=['Species']) # In[10]: def flower(num): if num == 0: return 'Setosa' elif num == 1: return 'Versicolour' else: return 'Virginica' # In[11]: iris_target['Species'] = iris_target['Species'].apply(flower) # In[13]: iris_target.tail() # In[14]: iris = pd.concat([iris_data,iris_target],axis=1) # In[15]: iris.head() # In[16]: sns.pairplot(iris,hue='Species',size=2) # In[17]: sns.factorplot('Petal Length',data=iris,hue='Species',size=10) # In[19]: from sklearn.linear_model import LogisticRegression from sklearn.cross_validation import train_test_split # In[20]: logreg = LogisticRegression() X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size = 0.4,random_state=3) # In[21]: logreg.fit(X_train,Y_train) # In[22]: from sklearn import metrics # In[23]: Y_pred = logreg.predict(X_test) # In[24]: print metrics.accuracy_score(Y_test,Y_pred) # In[25]: from sklearn.neighbors import KNeighborsClassifier # In[27]: knn = KNeighborsClassifier(n_neighbors = 6) # In[28]: knn.fit(X_train,Y_train) # In[29]: Y_pred = knn.predict(X_test) # In[30]: print metrics.accuracy_score(Y_test,Y_pred) # In[31]: knn = KNeighborsClassifier(n_neighbors=1) # In[32]: knn.fit(X_train,Y_train) # In[33]: Y_pred = knn.predict(X_test) # In[34]: print metrics.accuracy_score(Y_test,Y_pred) # In[35]: k_range = range(1,21) accuracy = [] # In[36]: for k in k_range: knn = KNeighborsClassifier(n_neighbors=k) knn.fit(X_train,Y_train) Y_pred = knn.predict(X_test) accuracy.append(metrics.accuracy_score(Y_test,Y_pred)) # In[37]: plt.plot(k_range,accuracy) plt.xlabel('K value') plt.ylabel('Testing Accuracy') # In[ ]: