#!/usr/bin/env python
# coding: utf-8
# # Example-1 (Comparison of three different classifiers)
# A comparison of a 3 classifiers in `scikit-learn` on iris dataset.
# The iris dataset is a classic and very easy multi-class classification dataset.
# ## Install scikit-learn
# In[1]:
import sys
import os
get_ipython().system('{sys.executable} -m pip install scikit-learn')
if "Example1_Files" not in os.listdir():
os.mkdir("Example1_Files")
# ## Load dataset
# In[2]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from pycm import ConfusionMatrix
iris = datasets.load_iris()
X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
# ## Classifier 1 (C-Support vector)
# In[3]:
from sklearn import svm
classifier_1 = svm.SVC(kernel='linear', C=0.01)
# In[4]:
y_pred_1 = classifier_1.fit(X_train, y_train).predict(X_test)
# In[5]:
cm1=ConfusionMatrix(y_test,y_pred_1)
cm1.print_matrix()
# In[6]:
cm1.print_normalized_matrix()
# In[7]:
cm1.Kappa
# In[8]:
cm1.Overall_ACC
# In[9]:
cm1.SOA1 # Landis and Koch benchmark
# In[10]:
cm1.SOA2 # Fleiss’ benchmark
# In[11]:
cm1.SOA3 # Altman’s benchmark
# In[12]:
cm1.SOA4 # Cicchetti’s benchmark
# In[13]:
cm1.save_html(os.path.join("Example1_Files","cm1"))
# Open File
# ## Classifier 2 (Decision tree)
# In[14]:
from sklearn.tree import DecisionTreeClassifier
classifier_2 = DecisionTreeClassifier(max_depth=5)
# In[15]:
y_pred_2 = classifier_2.fit(X_train, y_train).predict(X_test)
# In[16]:
cm2=ConfusionMatrix(y_test,y_pred_2)
cm2.print_matrix()
# In[17]:
cm2.print_normalized_matrix()
# In[18]:
cm2.Kappa
# In[19]:
cm2.Overall_ACC
# In[20]:
cm2.SOA1 # Landis and Koch benchmark
# In[21]:
cm2.SOA2 # Fleiss’ benchmark
# In[22]:
cm2.SOA3 # Altman’s benchmark
# In[23]:
cm2.SOA4 # Cicchetti’s benchmark
# In[24]:
cm2.save_html(os.path.join("Example1_Files","cm2"))
# Open File
# ## Classifier 3 (AdaBoost)
# In[25]:
from sklearn.ensemble import AdaBoostClassifier
classifier_3 = AdaBoostClassifier()
# In[26]:
y_pred_3 = classifier_3.fit(X_train, y_train).predict(X_test)
# In[27]:
cm3=ConfusionMatrix(y_test,y_pred_3)
cm3.print_matrix()
# In[28]:
cm3.print_normalized_matrix()
# In[29]:
cm3.Kappa
# In[30]:
cm3.Overall_ACC
# In[31]:
cm3.SOA1 # Landis and Koch benchmark
# In[32]:
cm3.SOA2 # Fleiss’ benchmark
# In[33]:
cm3.SOA3 # Altman’s benchmark
# In[34]:
cm3.SOA4 # Cicchetti’s benchmark
# In[35]:
cm3.save_html(os.path.join("Example1_Files","cm3"))
# Open File
# ## How to compare classifiers?
# In[36]:
from pycm import Compare
cp = Compare({"C-Support vector":cm1,"Decision tree":cm2,"AdaBoost":cm3})
print(cp)
# In[37]:
cp.save_report(os.path.join("Example1_Files","cp"))
# Open File