Please cite us if you use the software

Example-1 (Comparison of three different classifiers)

A comparison of a 3 classifiers in scikit-learn on iris dataset. The iris dataset is a classic and very easy multi-class classification dataset.

Install scikit-learn

In [1]:
import sys
import os
!{sys.executable} -m pip -q -q install scikit-learn
if "Example1_Files" not in os.listdir():
    os.mkdir("Example1_Files")

Load dataset

In [2]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from pycm import ConfusionMatrix
iris = datasets.load_iris()
X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

Classifier 1 (C-Support vector)

In [3]:
from sklearn import svm
classifier_1 = svm.SVC(kernel='linear', C=0.01)
In [4]:
y_pred_1 = classifier_1.fit(X_train, y_train).predict(X_test)
In [5]:
cm1=ConfusionMatrix(y_test,y_pred_1)
cm1.print_matrix()
Predict  0        1        2        
Actual
0        13       0        0        

1        0        10       6        

2        0        0        9        


In [6]:
cm1.print_normalized_matrix()
Predict     0           1           2           
Actual
0           1.0         0.0         0.0         

1           0.0         0.625       0.375       

2           0.0         0.0         1.0         


In [7]:
cm1.Kappa 
Out[7]:
0.7673469387755101
In [8]:
cm1.Overall_ACC
Out[8]:
0.8421052631578947
In [9]:
cm1.SOA1  # Landis and Koch benchmark
Out[9]:
'Substantial'
In [10]:
cm1.SOA2  # Fleiss’ benchmark
Out[10]:
'Excellent'
In [11]:
cm1.SOA3  # Altman’s benchmark
Out[11]:
'Good'
In [12]:
cm1.SOA4  # Cicchetti’s benchmark
Out[12]:
'Excellent'
In [13]:
cm1.save_html(os.path.join("Example1_Files","cm1"))
Out[13]:
{'Message': 'D:\\For Asus Laptop\\projects\\pycm\\Document\\Example1_Files\\cm1.html',
 'Status': True}

Classifier 2 (Decision tree)

In [14]:
from sklearn.tree import DecisionTreeClassifier
classifier_2 = DecisionTreeClassifier(max_depth=5)
In [15]:
y_pred_2 = classifier_2.fit(X_train, y_train).predict(X_test)
In [16]:
cm2=ConfusionMatrix(y_test,y_pred_2)
cm2.print_matrix()
Predict  0        1        2        
Actual
0        13       0        0        

1        0        15       1        

2        0        0        9        


In [17]:
cm2.print_normalized_matrix()
Predict      0            1            2            
Actual
0            1.0          0.0          0.0          

1            0.0          0.9375       0.0625       

2            0.0          0.0          1.0          


In [18]:
cm2.Kappa 
Out[18]:
0.95978835978836
In [19]:
cm2.Overall_ACC
Out[19]:
0.9736842105263158
In [20]:
cm2.SOA1  # Landis and Koch benchmark
Out[20]:
'Almost Perfect'
In [21]:
cm2.SOA2  # Fleiss’ benchmark
Out[21]:
'Excellent'
In [22]:
cm2.SOA3  # Altman’s benchmark
Out[22]:
'Very Good'
In [23]:
cm2.SOA4  # Cicchetti’s benchmark
Out[23]:
'Excellent'
In [24]:
cm2.save_html(os.path.join("Example1_Files","cm2"))
Out[24]:
{'Message': 'D:\\For Asus Laptop\\projects\\pycm\\Document\\Example1_Files\\cm2.html',
 'Status': True}

Classifier 3 (AdaBoost)

In [25]:
from sklearn.ensemble import AdaBoostClassifier
classifier_3 = AdaBoostClassifier()
In [26]:
y_pred_3 = classifier_3.fit(X_train, y_train).predict(X_test)
In [27]:
cm3=ConfusionMatrix(y_test,y_pred_3)
cm3.print_matrix()
Predict  0        1        2        
Actual
0        13       0        0        

1        0        15       1        

2        0        3        6        


In [28]:
cm3.print_normalized_matrix()
Predict       0             1             2             
Actual
0             1.0           0.0           0.0           

1             0.0           0.9375        0.0625        

2             0.0           0.33333       0.66667       


In [29]:
cm3.Kappa 
Out[29]:
0.8354978354978355
In [30]:
cm3.Overall_ACC
Out[30]:
0.8947368421052632
In [31]:
cm3.SOA1  # Landis and Koch benchmark
Out[31]:
'Almost Perfect'
In [32]:
cm3.SOA2  # Fleiss’ benchmark
Out[32]:
'Excellent'
In [33]:
cm3.SOA3  # Altman’s benchmark
Out[33]:
'Very Good'
In [34]:
cm3.SOA4  # Cicchetti’s benchmark
Out[34]:
'Excellent'
In [35]:
cm3.save_html(os.path.join("Example1_Files","cm3"))
Out[35]:
{'Message': 'D:\\For Asus Laptop\\projects\\pycm\\Document\\Example1_Files\\cm3.html',
 'Status': True}

How to compare classifiers?

In [36]:
from pycm import Compare

cp = Compare({"C-Support vector":cm1,"Decision tree":cm2,"AdaBoost":cm3})
print(cp)
Best : Decision tree

Rank   Name                Class-Score    Overall-Score
1      Decision tree       10.0           6.0
2      AdaBoost            8.7            5.8
3      C-Support vector    8.0            5.43333

In [37]:
cp.save_report(os.path.join("Example1_Files","cp"))
Out[37]:
{'Message': 'D:\\For Asus Laptop\\projects\\pycm\\Document\\Example1_Files\\cp.comp',
 'Status': True}