from pycm import *
y_actu = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2]
y_pred = [0, 0, 2, 1, 0, 2, 1, 0, 2, 0, 2, 2]
cm = ConfusionMatrix(y_actu, y_pred)
cm
pycm.ConfusionMatrix(classes: [0, 1, 2])
cm.actual_vector
[2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2]
cm.predict_vector
[0, 0, 2, 1, 0, 2, 1, 0, 2, 0, 2, 2]
cm.classes
[0, 1, 2]
cm.class_stat
{'ACC': {0: 0.8333333333333334, 1: 0.75, 2: 0.5833333333333334}, 'BM': {0: 0.7777777777777777, 1: 0.2222222222222221, 2: 0.16666666666666652}, 'DOR': {0: 'None', 1: 3.999999999999998, 2: 1.9999999999999998}, 'ERR': {0: 0.16666666666666663, 1: 0.25, 2: 0.41666666666666663}, 'F0.5': {0: 0.6521739130434783, 1: 0.45454545454545453, 2: 0.5769230769230769}, 'F1': {0: 0.75, 1: 0.4, 2: 0.5454545454545454}, 'F2': {0: 0.8823529411764706, 1: 0.35714285714285715, 2: 0.5172413793103449}, 'FDR': {0: 0.4, 1: 0.5, 2: 0.4}, 'FN': {0: 0, 1: 2, 2: 3}, 'FNR': {0: 0.0, 1: 0.6666666666666667, 2: 0.5}, 'FOR': {0: 0.0, 1: 0.19999999999999996, 2: 0.4285714285714286}, 'FP': {0: 2, 1: 1, 2: 2}, 'FPR': {0: 0.2222222222222222, 1: 0.11111111111111116, 2: 0.33333333333333337}, 'G': {0: 0.7745966692414834, 1: 0.408248290463863, 2: 0.5477225575051661}, 'LR+': {0: 4.5, 1: 2.9999999999999987, 2: 1.4999999999999998}, 'LR-': {0: 0.0, 1: 0.7500000000000001, 2: 0.75}, 'MCC': {0: 0.6831300510639732, 1: 0.25819888974716115, 2: 0.1690308509457033}, 'MK': {0: 0.6000000000000001, 1: 0.30000000000000004, 2: 0.17142857142857126}, 'N': {0: 9, 1: 9, 2: 6}, 'NPV': {0: 1.0, 1: 0.8, 2: 0.5714285714285714}, 'P': {0: 3, 1: 3, 2: 6}, 'POP': {0: 12, 1: 12, 2: 12}, 'PPV': {0: 0.6, 1: 0.5, 2: 0.6}, 'PRE': {0: 0.25, 1: 0.25, 2: 0.5}, 'RACC': {0: 0.10416666666666667, 1: 0.041666666666666664, 2: 0.20833333333333334}, 'TN': {0: 7, 1: 8, 2: 4}, 'TNR': {0: 0.7777777777777778, 1: 0.8888888888888888, 2: 0.6666666666666666}, 'TON': {0: 7, 1: 10, 2: 7}, 'TOP': {0: 5, 1: 2, 2: 5}, 'TP': {0: 3, 1: 1, 2: 3}, 'TPR': {0: 1.0, 1: 0.3333333333333333, 2: 0.5}}
cm.overall_stat
{'Kappa': 0.35483870967741943, 'Overall_ACC': 0.5833333333333334, 'Overall_RACC': 0.3541666666666667, 'PPV_Macro': 0.5666666666666668, 'PPV_Micro': 0.5833333333333334, 'Strength_Of_Agreement(Altman)': 'Fair', 'Strength_Of_Agreement(Fleiss)': 'Poor', 'Strength_Of_Agreement(Landis and Koch)': 'Fair', 'TPR_Macro': 0.611111111111111, 'TPR_Micro': 0.5833333333333334}
cm.table
{0: {0: 3, 1: 0, 2: 0}, 1: {0: 0, 1: 1, 2: 2}, 2: {0: 2, 1: 1, 2: 3}}
cm.TP
{0: 3, 1: 1, 2: 3}
cm.TN
{0: 7, 1: 8, 2: 4}
cm.FP
{0: 2, 1: 1, 2: 2}
cm.FN
{0: 0, 1: 2, 2: 3}
cm.P
{0: 3, 1: 3, 2: 6}
cm.N
{0: 9, 1: 9, 2: 6}
cm.TOP
{0: 5, 1: 2, 2: 5}
cm.TON
{0: 7, 1: 10, 2: 7}
cm.POP
{0: 12, 1: 12, 2: 12}
cm.TPR
{0: 1.0, 1: 0.3333333333333333, 2: 0.5}
cm.TNR
{0: 0.7777777777777778, 1: 0.8888888888888888, 2: 0.6666666666666666}
cm.PPV
{0: 0.6, 1: 0.5, 2: 0.6}
cm.NPV
{0: 1.0, 1: 0.8, 2: 0.5714285714285714}
cm.FNR
{0: 0.0, 1: 0.6666666666666667, 2: 0.5}
cm.FPR
{0: 0.2222222222222222, 1: 0.11111111111111116, 2: 0.33333333333333337}
cm.PPV
{0: 0.6, 1: 0.5, 2: 0.6}
cm.FOR
{0: 0.0, 1: 0.19999999999999996, 2: 0.4285714285714286}
cm.ACC
{0: 0.8333333333333334, 1: 0.75, 2: 0.5833333333333334}
cm.ERR
{0: 0.16666666666666663, 1: 0.25, 2: 0.41666666666666663}
cm.F1
{0: 0.75, 1: 0.4, 2: 0.5454545454545454}
cm.F05
{0: 0.6521739130434783, 1: 0.45454545454545453, 2: 0.5769230769230769}
cm.F2
{0: 0.8823529411764706, 1: 0.35714285714285715, 2: 0.5172413793103449}
cm.F_beta(Beta=4)
{0: 0.9622641509433962, 1: 0.34, 2: 0.504950495049505}
cm.MCC
{0: 0.6831300510639732, 1: 0.25819888974716115, 2: 0.1690308509457033}
cm.BM
{0: 0.7777777777777777, 1: 0.2222222222222221, 2: 0.16666666666666652}
cm.MK
{0: 0.6000000000000001, 1: 0.30000000000000004, 2: 0.17142857142857126}
cm.PLR
{0: 4.5, 1: 2.9999999999999987, 2: 1.4999999999999998}
cm.NLR
{0: 0.0, 1: 0.7500000000000001, 2: 0.75}
cm.DOR
{0: 'None', 1: 3.999999999999998, 2: 1.9999999999999998}
cm.PRE
{0: 0.25, 1: 0.25, 2: 0.5}
cm.G
{0: 0.7745966692414834, 1: 0.408248290463863, 2: 0.5477225575051661}
cm.RACC
{0: 0.10416666666666667, 1: 0.041666666666666664, 2: 0.20833333333333334}
cm.Kappa
0.35483870967741943
Kappa | Strength of Agreement |
0 > | Poor |
0 - 0.20 | Slight |
0.21 – 0.40 | Fair |
0.41 – 0.60 | Moderate |
0.61 – 0.80 | Substantial |
0.81 – 1.00 | Almost perfect |
cm.SOA1
'Fair'
Kappa | Strength of Agreement |
0.40 > | Poor |
0.4 - 0.75 | Intermediate to Good |
More than 0.75 | Excellent |
cm.SOA2
'Poor'
Kappa | Strength of Agreement |
0.2 > | Poor |
0.21 – 0.40 | Fair |
0.41 – 0.60 | Moderate |
0.61 – 0.80 | Good |
0.81 – 1.00 | Very Good |
cm.SOA3
'Fair'
cm.Overall_ACC
0.5833333333333334
cm.Overall_RACC
0.3541666666666667
cm.PPV_Micro
0.5833333333333334
cm.TPR_Micro
0.5833333333333334
cm.PPV_Macro
0.5666666666666668
cm.TPR_Macro
0.611111111111111
print(cm)
Predict 0 1 2 Actual 0 3 0 0 1 0 1 2 2 2 1 3 Overall Statistics : Kappa 0.35484 Overall_ACC 0.58333 Overall_RACC 0.35417 PPV_Macro 0.56667 PPV_Micro 0.58333 Strength_Of_Agreement(Altman) Fair Strength_Of_Agreement(Fleiss) Poor Strength_Of_Agreement(Landis and Koch) Fair TPR_Macro 0.61111 TPR_Micro 0.58333 Class Statistics : Classes 0 1 2 ACC(accuracy) 0.83333 0.75 0.58333 BM(Informedness or Bookmaker Informedness) 0.77778 0.22222 0.16667 DOR(Diagnostic odds ratio) None 4.0 2.0 ERR(Error Rate) 0.16667 0.25 0.41667 F0.5(F0.5 Score) 0.65217 0.45455 0.57692 F1(F1 Score - harmonic mean of precision and sensitivity) 0.75 0.4 0.54545 F2(F2 Score) 0.88235 0.35714 0.51724 FDR(false discovery rate) 0.4 0.5 0.4 FN(false negative/miss/Type II error) 0 2 3 FNR(miss rate or false negative rate) 0.0 0.66667 0.5 FOR(false omission rate) 0.0 0.2 0.42857 FP(false positive/Type I error/false alarm) 2 1 2 FPR(fall-out or false positive rate) 0.22222 0.11111 0.33333 G(G-measure geometric mean of precision and sensitivity) 0.7746 0.40825 0.54772 LR+(Positive likelihood ratio) 4.5 3.0 1.5 LR-(Negative likelihood ratio) 0.0 0.75 0.75 MCC(Matthews correlation coefficient) 0.68313 0.2582 0.16903 MK(Markedness) 0.6 0.3 0.17143 N(Condition negative) 9 9 6 NPV(negative predictive value) 1.0 0.8 0.57143 P(Condition positive) 3 3 6 POP(Population) 12 12 12 PPV(precision or positive predictive value) 0.6 0.5 0.6 PRE(Prevalence) 0.25 0.25 0.5 RACC(Random Accuracy) 0.10417 0.04167 0.20833 TN(true negative/correct rejection) 7 8 4 TNR(specificity or true negative rate) 0.77778 0.88889 0.66667 TON(Test outcome negative) 7 10 7 TOP(Test outcome positive) 5 2 5 TP(true positive/hit) 3 1 3 TPR(sensitivity, recall, hit rate, or true positive rate) 1.0 0.33333 0.5
cm.matrix()
Predict 0 1 2 Actual 0 3 0 0 1 0 1 2 2 2 1 3
cm.normalized_matrix()
Predict 0 1 2 Actual 0 1.0 0.0 0.0 1 0.0 0.33333 0.66667 2 0.33333 0.16667 0.5
cm.stat()
Overall Statistics : Kappa 0.35484 Overall_ACC 0.58333 Overall_RACC 0.35417 PPV_Macro 0.56667 PPV_Micro 0.58333 Strength_Of_Agreement(Altman) Fair Strength_Of_Agreement(Fleiss) Poor Strength_Of_Agreement(Landis and Koch) Fair TPR_Macro 0.61111 TPR_Micro 0.58333 Class Statistics : Classes 0 1 2 ACC(accuracy) 0.83333 0.75 0.58333 BM(Informedness or Bookmaker Informedness) 0.77778 0.22222 0.16667 DOR(Diagnostic odds ratio) None 4.0 2.0 ERR(Error Rate) 0.16667 0.25 0.41667 F0.5(F0.5 Score) 0.65217 0.45455 0.57692 F1(F1 Score - harmonic mean of precision and sensitivity) 0.75 0.4 0.54545 F2(F2 Score) 0.88235 0.35714 0.51724 FDR(false discovery rate) 0.4 0.5 0.4 FN(false negative/miss/Type II error) 0 2 3 FNR(miss rate or false negative rate) 0.0 0.66667 0.5 FOR(false omission rate) 0.0 0.2 0.42857 FP(false positive/Type I error/false alarm) 2 1 2 FPR(fall-out or false positive rate) 0.22222 0.11111 0.33333 G(G-measure geometric mean of precision and sensitivity) 0.7746 0.40825 0.54772 LR+(Positive likelihood ratio) 4.5 3.0 1.5 LR-(Negative likelihood ratio) 0.0 0.75 0.75 MCC(Matthews correlation coefficient) 0.68313 0.2582 0.16903 MK(Markedness) 0.6 0.3 0.17143 N(Condition negative) 9 9 6 NPV(negative predictive value) 1.0 0.8 0.57143 P(Condition positive) 3 3 6 POP(Population) 12 12 12 PPV(precision or positive predictive value) 0.6 0.5 0.6 PRE(Prevalence) 0.25 0.25 0.5 RACC(Random Accuracy) 0.10417 0.04167 0.20833 TN(true negative/correct rejection) 7 8 4 TNR(specificity or true negative rate) 0.77778 0.88889 0.66667 TON(Test outcome negative) 7 10 7 TOP(Test outcome positive) 5 2 5 TP(true positive/hit) 3 1 3 TPR(sensitivity, recall, hit rate, or true positive rate) 1.0 0.33333 0.5
cm.save_stat("cm1")
{'Message': 'C:\\Users\\Sepkjaer\\Desktop\\JupyterNotebooks\\cm1.pycm', 'Status': True}
cm.save_stat("cm1asdasd/")
{'Message': "[Errno 2] No such file or directory: 'cm1asdasd/.pycm'", 'Status': False}
cm2=ConfusionMatrix(y_actu, 2)
--------------------------------------------------------------------------- pycmError Traceback (most recent call last) <ipython-input-59-572bf15e689d> in <module>() ----> 1 cm2=ConfusionMatrix(y_actu, 2) ~\AppData\Local\Programs\Python\Python35-32\lib\site-packages\pycm\pycm.py in __init__(self, actual_vector, predict_vector) 11 def __init__(self,actual_vector,predict_vector): 12 if not isinstance(actual_vector,list) or not isinstance(predict_vector,list): ---> 13 raise pycmError("Input Vectors Must Be List") 14 if len(actual_vector)!=len(predict_vector): 15 raise pycmError("Input Vectors Must Be The Same Length") pycmError: Input Vectors Must Be List
cm3=ConfusionMatrix(y_actu, [1,2,3])
--------------------------------------------------------------------------- pycmError Traceback (most recent call last) <ipython-input-60-fe0a030b981a> in <module>() ----> 1 cm3=ConfusionMatrix(y_actu, [1,2,3]) ~\AppData\Local\Programs\Python\Python35-32\lib\site-packages\pycm\pycm.py in __init__(self, actual_vector, predict_vector) 13 raise pycmError("Input Vectors Must Be List") 14 if len(actual_vector)!=len(predict_vector): ---> 15 raise pycmError("Input Vectors Must Be The Same Length") 16 matrix_param=matrix_params_calc(actual_vector,predict_vector) 17 self.actual_vector=actual_vector pycmError: Input Vectors Must Be The Same Length
1- Landis JR, Koch GG. The measurement of observer agreement for categorical data. Biometrics 1977; 33:159–174
2- Powers, D. M. W. (2011). Evaluation: from precision, recall and f-measure to roc, informedness, markedness & correlation. Journal of Machine Learning Technologies.
3- C. Sammut, G. Webb, Encyclopedia of Machine Learning. Springer, 2011. Springer reference.
4- Fleiss, J. L. (1971). Measuring nominal scale agreement among many raters. Psychological Bulletin, 76(5), 378-382. http://dx.doi.org/10.1037/h0031619
5- Altman D.G. 1991. Practical Statistics for Medical Research. Chapman and Hall, London.