#!/usr/bin/env python # coding: utf-8 # # Example-1 (Comparison of three different classifiers) # A comparison of a 3 classifiers in `scikit-learn` on iris dataset. # The iris dataset is a classic and very easy multi-class classification dataset. # ## Install scikit-learn # In[1]: import sys import os get_ipython().system('{sys.executable} -m pip install scikit-learn') if "Example1_Files" not in os.listdir(): os.mkdir("Example1_Files") # ## Load dataset # In[2]: from sklearn import datasets from sklearn.model_selection import train_test_split from pycm import ConfusionMatrix iris = datasets.load_iris() X = iris.data y = iris.target X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) # ## Classifier 1 (C-Support vector) # In[3]: from sklearn import svm classifier_1 = svm.SVC(kernel='linear', C=0.01) # In[4]: y_pred_1 = classifier_1.fit(X_train, y_train).predict(X_test) # In[5]: cm1=ConfusionMatrix(y_test,y_pred_1) cm1.print_matrix() # In[6]: cm1.print_normalized_matrix() # In[7]: cm1.Kappa # In[8]: cm1.Overall_ACC # In[9]: cm1.SOA1 # Landis and Koch benchmark # In[10]: cm1.SOA2 # Fleiss’ benchmark # In[11]: cm1.SOA3 # Altman’s benchmark # In[12]: cm1.SOA4 # Cicchetti’s benchmark # In[13]: cm1.save_html(os.path.join("Example1_Files","cm1")) # Open File # ## Classifier 2 (Decision tree) # In[14]: from sklearn.tree import DecisionTreeClassifier classifier_2 = DecisionTreeClassifier(max_depth=5) # In[15]: y_pred_2 = classifier_2.fit(X_train, y_train).predict(X_test) # In[16]: cm2=ConfusionMatrix(y_test,y_pred_2) cm2.print_matrix() # In[17]: cm2.print_normalized_matrix() # In[18]: cm2.Kappa # In[19]: cm2.Overall_ACC # In[20]: cm2.SOA1 # Landis and Koch benchmark # In[21]: cm2.SOA2 # Fleiss’ benchmark # In[22]: cm2.SOA3 # Altman’s benchmark # In[23]: cm2.SOA4 # Cicchetti’s benchmark # In[24]: cm2.save_html(os.path.join("Example1_Files","cm2")) # Open File # ## Classifier 3 (AdaBoost) # In[25]: from sklearn.ensemble import AdaBoostClassifier classifier_3 = AdaBoostClassifier() # In[26]: y_pred_3 = classifier_3.fit(X_train, y_train).predict(X_test) # In[27]: cm3=ConfusionMatrix(y_test,y_pred_3) cm3.print_matrix() # In[28]: cm3.print_normalized_matrix() # In[29]: cm3.Kappa # In[30]: cm3.Overall_ACC # In[31]: cm3.SOA1 # Landis and Koch benchmark # In[32]: cm3.SOA2 # Fleiss’ benchmark # In[33]: cm3.SOA3 # Altman’s benchmark # In[34]: cm3.SOA4 # Cicchetti’s benchmark # In[35]: cm3.save_html(os.path.join("Example1_Files","cm3")) # Open File # ## How to compare classifiers? # In[36]: from pycm import Compare cp = Compare({"C-Support vector":cm1,"Decision tree":cm2,"AdaBoost":cm3}) print(cp) # In[37]: cp.save_report(os.path.join("Example1_Files","cp")) # Open File