from sklearn.datasets import load_boston
boston = load_boston()
import pandas as pd
boston_df = pd.DataFrame(boston.data, columns=boston.feature_names)
boston_df.head()
CRIM | ZN | INDUS | CHAS | NOX | RM | AGE | DIS | RAD | TAX | PTRATIO | B | LSTAT | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.00632 | 18.0 | 2.31 | 0.0 | 0.538 | 6.575 | 65.2 | 4.0900 | 1.0 | 296.0 | 15.3 | 396.90 | 4.98 |
1 | 0.02731 | 0.0 | 7.07 | 0.0 | 0.469 | 6.421 | 78.9 | 4.9671 | 2.0 | 242.0 | 17.8 | 396.90 | 9.14 |
2 | 0.02729 | 0.0 | 7.07 | 0.0 | 0.469 | 7.185 | 61.1 | 4.9671 | 2.0 | 242.0 | 17.8 | 392.83 | 4.03 |
3 | 0.03237 | 0.0 | 2.18 | 0.0 | 0.458 | 6.998 | 45.8 | 6.0622 | 3.0 | 222.0 | 18.7 | 394.63 | 2.94 |
4 | 0.06905 | 0.0 | 2.18 | 0.0 | 0.458 | 7.147 | 54.2 | 6.0622 | 3.0 | 222.0 | 18.7 | 396.90 | 5.33 |
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(boston_df, boston.target, test_size = 0.25, random_state = 31)
print(X_train.shape)
print(Y_train.shape)
print(X_test.shape)
print(Y_test.shape)
(379, 13) (379,) (127, 13) (127,)
from aix360.algorithms.rbm import FeatureBinarizer
fb = FeatureBinarizer(negations=True)
X_train_fb = fb.fit_transform(X_train)
X_test_fb = fb.transform(X_test)
X_train_fb['CRIM'][:10]
operation | <= | > | ||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
value | 0.037314 | 0.059438 | 0.091626 | 0.141662 | 0.229270 | 0.489478 | 1.374682 | 4.849966 | 9.842102 | 0.037314 | 0.059438 | 0.091626 | 0.141662 | 0.229270 | 0.489478 | 1.374682 | 4.849966 | 9.842102 |
90 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
434 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
384 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
175 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
22 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 0 |
54 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
62 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 |
320 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 |
199 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
273 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 |
from aix360.algorithms.rbm import GLRMExplainer, LinearRuleRegression
linear_model = LinearRuleRegression()
explainer = GLRMExplainer(linear_model)
explainer.fit(X_train_fb, Y_train)
Y_pred = explainer.predict(X_test_fb)
from sklearn.metrics import r2_score, explained_variance_score, mean_absolute_error, max_error
print(f'R2 Score = {r2_score(Y_test, Y_pred)}')
print(f'Explained Variance = {explained_variance_score(Y_test, Y_pred)}')
print(f'Mean abs. error = {mean_absolute_error(Y_test, Y_pred)}')
print(f'Max error = {max_error(Y_test, Y_pred)}')
R2 Score = 0.8249087941706609 Explained Variance = 0.8289797066674642 Mean abs. error = 2.754048495903457 Max error = 11.399762504643611
explainer.explain()
rule | coefficient | |
---|---|---|
0 | (intercept) | 21.944 |
1 | NOX <= 0.66 | 6.31439 |
2 | RM <= 7.16 AND DIS > 1.62 | -5.78581 |
3 | LSTAT <= 4.66 | 5.4635 |
4 | DIS <= 3.32 AND RAD > 2.00 AND B > 295.98 AND ... | 4.80349 |
5 | CHAS not AND PTRATIO > 16.10 | -3.9513 |
6 | RM <= 7.16 AND RAD <= 6.00 | -3.35462 |
7 | TAX > 293.00 AND LSTAT > 4.66 | -2.96268 |
8 | LSTAT <= 15.03 | 2.84329 |
9 | INDUS > 4.05 AND LSTAT > 4.66 | -2.56005 |
10 | DIS <= 7.24 AND RAD > 2.00 AND PTRATIO <= 20.9... | 2.51755 |
11 | LSTAT <= 9.48 | 2.50064 |
12 | CRIM <= 9.84 AND DIS <= 4.64 AND RAD > 1.00 AN... | 2.28817 |
13 | LSTAT <= 17.60 | 1.91807 |
14 | TAX > 330.00 AND LSTAT > 4.66 | -1.80772 |
15 | CRIM <= 9.84 AND CRIM > 0.06 AND PTRATIO <= 20.90 | 1.79055 |
16 | LSTAT <= 6.25 | 1.66632 |
17 | RM <= 7.16 AND B > 380.27 | -1.61764 |
18 | LSTAT <= 11.12 | 1.59846 |
19 | RAD > 2.00 AND LSTAT <= 22.79 | 1.24117 |
20 | RM <= 7.16 | -1.20717 |
21 | CHAS not AND RM <= 7.16 | 1.19303 |
22 | RM <= 6.51 | -1.13602 |
23 | CRIM <= 9.84 AND DIS <= 3.95 AND TAX <= 666.00... | 1.0065 |
24 | CRIM <= 9.84 AND RAD > 1.00 AND LSTAT <= 22.79 | 0.972651 |
25 | DIS <= 3.95 AND LSTAT <= 22.79 | -0.919471 |
26 | RM <= 6.74 | -0.795374 |
27 | PTRATIO <= 19.52 | 0.795212 |
28 | NOX <= 0.66 AND PTRATIO <= 20.90 AND LSTAT <= ... | -0.775433 |
29 | RAD > 4.00 AND LSTAT <= 22.79 | -0.632437 |
30 | B <= 391.27 AND LSTAT <= 22.79 | 0.479933 |
31 | LSTAT <= 7.58 | 0.43504 |
32 | LSTAT <= 13.14 | 0.178565 |
%matplotlib inline
fb = FeatureBinarizer(negations=True)
figs, _ = explainer.visualize(boston_df, fb)