In [1]:
from sklearn.datasets import load_breast_cancer
bc = load_breast_cancer()
In [2]:
import pandas as pd
bc_df = pd.DataFrame(bc.data, columns=bc.feature_names)
bc_df.head()
Out[2]:
mean radius mean texture mean perimeter mean area mean smoothness mean compactness mean concavity mean concave points mean symmetry mean fractal dimension ... worst radius worst texture worst perimeter worst area worst smoothness worst compactness worst concavity worst concave points worst symmetry worst fractal dimension
0 17.99 10.38 122.80 1001.0 0.11840 0.27760 0.3001 0.14710 0.2419 0.07871 ... 25.38 17.33 184.60 2019.0 0.1622 0.6656 0.7119 0.2654 0.4601 0.11890
1 20.57 17.77 132.90 1326.0 0.08474 0.07864 0.0869 0.07017 0.1812 0.05667 ... 24.99 23.41 158.80 1956.0 0.1238 0.1866 0.2416 0.1860 0.2750 0.08902
2 19.69 21.25 130.00 1203.0 0.10960 0.15990 0.1974 0.12790 0.2069 0.05999 ... 23.57 25.53 152.50 1709.0 0.1444 0.4245 0.4504 0.2430 0.3613 0.08758
3 11.42 20.38 77.58 386.1 0.14250 0.28390 0.2414 0.10520 0.2597 0.09744 ... 14.91 26.50 98.87 567.7 0.2098 0.8663 0.6869 0.2575 0.6638 0.17300
4 20.29 14.34 135.10 1297.0 0.10030 0.13280 0.1980 0.10430 0.1809 0.05883 ... 22.54 16.67 152.20 1575.0 0.1374 0.2050 0.4000 0.1625 0.2364 0.07678

5 rows × 30 columns

In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(bc_df, bc.target, test_size = 0.2, random_state = 31)
print(X_train.shape)
print(Y_train.shape)
print(X_test.shape)
print(Y_test.shape)
(455, 30)
(455,)
(114, 30)
(114,)
In [4]:
from aix360.algorithms.rbm import FeatureBinarizer
fb = FeatureBinarizer(negations=True)
X_train_fb = fb.fit_transform(X_train)
X_test_fb = fb.transform(X_test)
X_train_fb['mean radius'][:8]
Out[4]:
operation <= >
value 10.254 11.328 11.942 12.604 13.270 14.142 15.058 17.026 19.324 10.254 11.328 11.942 12.604 13.270 14.142 15.058 17.026 19.324
468 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 0
179 0 0 0 0 1 1 1 1 1 1 1 1 1 0 0 0 0 0
114 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0
35 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 0 0
88 0 0 0 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0
507 0 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0
213 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 0
134 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 0
In [5]:
from aix360.algorithms.rbm import BRCGExplainer, BooleanRuleCG
In [6]:
boolean_model = BooleanRuleCG(silent=True)
explainer = BRCGExplainer(boolean_model)
explainer.fit(X_train_fb, Y_train)
In [7]:
Y_pred = explainer.predict(X_test_fb)
In [8]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
print(f'Accuracy = {accuracy_score(Y_test, Y_pred)}')
print(f'Precision = {precision_score(Y_test, Y_pred)}')
print(f'Recall = {recall_score(Y_test, Y_pred)}')
print(f'F1 = {f1_score(Y_test, Y_pred)}')
Accuracy = 0.9298245614035088
Precision = 0.9538461538461539
Recall = 0.9253731343283582
F1 = 0.9393939393939394
In [9]:
e = explainer.explain()
isCNF = 'Predict Y=0 if ANY of the following rules are satisfied, otherwise Y=1:'
notCNF = 'Predict Y=1 if ANY of the following rules are satisfied, otherwise Y=0:'
print(isCNF if e['isCNF'] else notCNF)
print()
for rule in e['rules']:
    print(f'  - {rule}')
Predict Y=1 if ANY of the following rules are satisfied, otherwise Y=0:

  - compactness error > 0.01 AND worst concavity <= 0.22 AND worst symmetry <= 0.28
  - mean texture <= 15.46 AND mean concavity <= 0.15 AND area error <= 54.16
  - fractal dimension error > 0.00 AND worst area <= 680.60 AND worst concave points <= 0.18
  - mean concave points <= 0.05 AND perimeter error <= 3.80 AND worst area <= 930.88 AND worst smoothness <= 0.16
In [ ]: