%load_ext autoreload
%autoreload 2
import sys
sys.path.append("../")
from aif360.datasets import BinaryLabelDataset
from aif360.datasets import AdultDataset, GermanDataset, CompasDataset
from aif360.metrics import BinaryLabelDatasetMetric
from aif360.metrics import ClassificationMetric
from aif360.metrics.utils import compute_boolean_conditioning_vector
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, MaxAbsScaler
from sklearn.metrics import accuracy_score
from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions import load_preproc_data_adult, load_preproc_data_compas, load_preproc_data_german
from aif360.algorithms.inprocessing.meta_fair_classifier import MetaFairClassifier
from aif360.algorithms.inprocessing.celisMeta.utils import getStats
from IPython.display import Markdown, display
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
The autoreload extension is already loaded. To reload it, use: %reload_ext autoreload
display(Markdown("### Meta-Algorithm for fair classification."))
display(Markdown("The fairness metrics to be optimized have to specified as \"input\". Currently we can handle the following fairness metrics."))
display(Markdown("Statistical Rate, False Positive Rate, True Positive Rate, False Negative Rate, True Negative Rate,"))
display(Markdown("Accuracy Rate, False Discovery Rate, False Omission Rate, Positive Predictive Rate, Negative Predictive Rate."))
display(Markdown("#### -----------------------------"))
display(Markdown("The example below considers the case of False Discovery Parity."))
The fairness metrics to be optimized have to specified as "input". Currently we can handle the following fairness metrics.
Statistical Rate, False Positive Rate, True Positive Rate, False Negative Rate, True Negative Rate,
Accuracy Rate, False Discovery Rate, False Omission Rate, Positive Predictive Rate, Negative Predictive Rate.
The example below considers the case of False Discovery Parity.
dataset_orig = load_preproc_data_adult()
privileged_groups = [{'sex': 1}]
unprivileged_groups = [{'sex': 0}]
dataset_orig_train, dataset_orig_test = dataset_orig.split([0.7], shuffle=True)
display(Markdown("#### Training Dataset shape"))
print(dataset_orig_train.features.shape)
display(Markdown("#### Favorable and unfavorable labels"))
print(dataset_orig_train.favorable_label, dataset_orig_train.unfavorable_label)
display(Markdown("#### Protected attribute names"))
print(dataset_orig_train.protected_attribute_names)
display(Markdown("#### Privileged and unprivileged protected attribute values"))
print(dataset_orig_train.privileged_protected_attributes,
dataset_orig_train.unprivileged_protected_attributes)
display(Markdown("#### Dataset feature names"))
print(dataset_orig_train.feature_names)
(34189, 18)
(1.0, 0.0)
['sex', 'race']
([array([1.]), array([1.])], [array([0.]), array([0.])])
['race', 'sex', 'Age (decade)=10', 'Age (decade)=20', 'Age (decade)=30', 'Age (decade)=40', 'Age (decade)=50', 'Age (decade)=60', 'Age (decade)=>=70', 'Education Years=6', 'Education Years=7', 'Education Years=8', 'Education Years=9', 'Education Years=10', 'Education Years=11', 'Education Years=12', 'Education Years=<6', 'Education Years=>12']
display(Markdown("#### Training Dataset shape"))
print(dataset_orig_train.features.shape)
display(Markdown("#### Favorable and unfavorable labels"))
print(dataset_orig_train.favorable_label, dataset_orig_train.unfavorable_label)
display(Markdown("#### Protected attribute names"))
print(dataset_orig_train.protected_attribute_names)
display(Markdown("#### Privileged and unprivileged protected attribute values"))
print(dataset_orig_train.privileged_protected_attributes,
dataset_orig_train.unprivileged_protected_attributes)
display(Markdown("#### Dataset feature names"))
print(dataset_orig_train.feature_names)
(34189, 18)
(1.0, 0.0)
['sex', 'race']
([array([1.]), array([1.])], [array([0.]), array([0.])])
['race', 'sex', 'Age (decade)=10', 'Age (decade)=20', 'Age (decade)=30', 'Age (decade)=40', 'Age (decade)=50', 'Age (decade)=60', 'Age (decade)=>=70', 'Education Years=6', 'Education Years=7', 'Education Years=8', 'Education Years=9', 'Education Years=10', 'Education Years=11', 'Education Years=12', 'Education Years=<6', 'Education Years=>12']
metric_orig_train = BinaryLabelDatasetMetric(dataset_orig_train,
unprivileged_groups=unprivileged_groups,
privileged_groups=privileged_groups)
#display(Markdown("#### Original training dataset"))
print("Train set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_orig_train.mean_difference())
metric_orig_test = BinaryLabelDatasetMetric(dataset_orig_test,
unprivileged_groups=unprivileged_groups,
privileged_groups=privileged_groups)
print("Test set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_orig_test.mean_difference())
min_max_scaler = MaxAbsScaler()
dataset_orig_train.features = min_max_scaler.fit_transform(dataset_orig_train.features)
dataset_orig_test.features = min_max_scaler.transform(dataset_orig_test.features)
metric_scaled_train = BinaryLabelDatasetMetric(dataset_orig_train,
unprivileged_groups=unprivileged_groups,
privileged_groups=privileged_groups)
#display(Markdown("#### Scaled dataset - Verify that the scaling does not affect the group label statistics"))
print("Train set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_scaled_train.mean_difference())
metric_scaled_test = BinaryLabelDatasetMetric(dataset_orig_test,
unprivileged_groups=unprivileged_groups,
privileged_groups=privileged_groups)
print("Test set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_scaled_test.mean_difference())
Train set: Difference in mean outcomes between unprivileged and privileged groups = -0.193944 Test set: Difference in mean outcomes between unprivileged and privileged groups = -0.195913 Train set: Difference in mean outcomes between unprivileged and privileged groups = -0.193944 Test set: Difference in mean outcomes between unprivileged and privileged groups = -0.195913
# Get classifier without fairness constraints
biased_model = MetaFairClassifier(tau=0, sensitive_attr="sex")
biased_model.fit(dataset_orig_train)
<aif360.algorithms.inprocessing.meta_fair_classifier.MetaFairClassifer at 0x110c21110>
# Apply the unconstrained model to test data
dataset_bias_test = biased_model.predict(dataset_orig_test)
predictions = [1 if y == dataset_orig_train.favorable_label else -1 for y in list(dataset_bias_test.labels)]
y_test = np.array([1 if y == [dataset_orig_train.favorable_label] else -1 for y in dataset_orig_test.labels])
x_control_test = pd.DataFrame(data=dataset_orig_test.features, columns=dataset_orig_test.feature_names)["sex"]
acc, sr, unconstrainedFDR = getStats(y_test, predictions, x_control_test)
print(unconstrainedFDR)
('Accuracy : ', 3148, 14653, 0.7851634477581383) ('SR tau : ', 0.5128381178595508) ('FPR tau : ', 0.7945499159671334) ('FNR tau : ', 0.910501272336843) ('TPR tau : ', 0.7721613485851896) ('TNR tau : ', 0.986749402037707) ('AR tau : ', 0.8525978220135617) ('FDR tau : ', 0.5030017152658662) ('FOR tau : ', 0.3717552930362757) ('PPR tau : ', 0.5485001947798986) ('NPR tau : ', 0.827615343560593) 0.503001715266
# Learn debiased classifier
tau = 0.8
debiased_model = MetaFairClassifier(tau=tau, sensitive_attr="sex")
debiased_model.fit(dataset_orig_train)
('Training Accuracy: ', 0.7350317353534763, ', Training gamma: ', 0.672899406837947)
<aif360.algorithms.inprocessing.meta_fair_classifier.MetaFairClassifer at 0x110c27c10>
# Apply the debiased model to test data
dataset_debiasing_train = debiased_model.predict(dataset_orig_train)
dataset_debiasing_test = debiased_model.predict(dataset_orig_test)
# Metrics for the dataset from model with debiasing
display(Markdown("#### Model - with debiasing - dataset metrics"))
metric_dataset_debiasing_train = BinaryLabelDatasetMetric(dataset_debiasing_train,
unprivileged_groups=unprivileged_groups,
privileged_groups=privileged_groups)
print("Train set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_dataset_debiasing_train.mean_difference())
metric_dataset_debiasing_test = BinaryLabelDatasetMetric(dataset_debiasing_test,
unprivileged_groups=unprivileged_groups,
privileged_groups=privileged_groups)
print("Test set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_dataset_debiasing_test.mean_difference())
Train set: Difference in mean outcomes between unprivileged and privileged groups = -0.201319 Test set: Difference in mean outcomes between unprivileged and privileged groups = -0.195210
display(Markdown("#### Model - with debiasing - classification metrics"))
classified_metric_debiasing_test = ClassificationMetric(dataset_orig_test,
dataset_debiasing_test,
unprivileged_groups=unprivileged_groups,
privileged_groups=privileged_groups)
print("Test set: Classification accuracy = %f" % classified_metric_debiasing_test.accuracy())
TPR = classified_metric_debiasing_test.true_positive_rate()
TNR = classified_metric_debiasing_test.true_negative_rate()
bal_acc_debiasing_test = 0.5*(TPR+TNR)
print("Test set: Balanced classification accuracy = %f" % bal_acc_debiasing_test)
print("Test set: Disparate impact = %f" % classified_metric_debiasing_test.disparate_impact())
print("Test set: Equal opportunity difference = %f" % classified_metric_debiasing_test.equal_opportunity_difference())
print("Test set: Average odds difference = %f" % classified_metric_debiasing_test.average_odds_difference())
print("Test set: Theil_index = %f" % classified_metric_debiasing_test.theil_index())
Test set: Classification accuracy = 0.731932 Test set: Balanced classification accuracy = 0.716763 Test set: Disparate impact = 0.539856 Test set: Equal opportunity difference = -0.120467 Test set: Average odds difference = -0.117636 Test set: Theil_index = 0.128652
### Testing
predictions = list(dataset_debiasing_test.labels)
predictions = [1 if y == dataset_orig_train.favorable_label else -1 for y in predictions]
y_test = np.array([1 if y == [dataset_orig_train.favorable_label] else -1 for y in dataset_orig_test.labels])
x_control_test = pd.DataFrame(data=dataset_orig_test.features, columns=dataset_orig_test.feature_names)["sex"]
acc, sr, fdr = getStats(y_test, predictions, x_control_test)
print(fdr, unconstrainedFDR)
assert(fdr >= unconstrainedFDR)
('Accuracy : ', 3928, 14653, 0.7319320275711458) ('SR tau : ', 0.5398556890759312) ('FPR tau : ', 0.6157226437750696) ('FNR tau : ', 0.7093999136230463) ('TPR tau : ', 0.8293479564733099) ('TNR tau : ', 0.8593163406441414) ('AR tau : ', 0.8892945217528149) ('FDR tau : ', 0.6832866118898019) ('FOR tau : ', 0.3834976405176844) ('PPR tau : ', 0.5596391928376183) ('NPR tau : ', 0.8967236467236467) (0.6832866118898019, 0.5030017152658662)
biased_model = MetaFairClassifier(tau=0, sensitive_attr="race")
biased_model.fit(dataset_orig_train)
dataset_bias_test = biased_model.predict(dataset_orig_test)
predictions = [1 if y == dataset_orig_train.favorable_label else -1 for y in list(dataset_bias_test.labels)]
y_test = np.array([1 if y == [dataset_orig_train.favorable_label] else -1 for y in dataset_orig_test.labels])
x_control_test = pd.DataFrame(data=dataset_orig_test.features, columns=dataset_orig_test.feature_names)["race"]
acc, sr, unconstrainedFDR = getStats(y_test, predictions, x_control_test)
display(Markdown("#### Running the algorithm for different tau values"))
accuracies, false_discovery_rates, statistical_rates = [], [], []
s_attr = "race"
# Converting to form used by celisMeta.utils file
y_test = np.array([1 if y == [dataset_orig_train.favorable_label] else -1 for y in dataset_orig_test.labels])
x_control_test = pd.DataFrame(data=dataset_orig_test.features, columns=dataset_orig_test.feature_names)[s_attr]
all_tau = np.linspace(0.1, 0.9, 9)
for tau in all_tau:
print("Tau: %.2f" % tau)
debiased_model = MetaFairClassifier(tau=tau, sensitive_attr=s_attr)
debiased_model.fit(dataset_orig_train)
dataset_debiasing_test = debiased_model.predict(dataset_orig_test)
predictions = dataset_debiasing_test.labels
predictions = [1 if y == dataset_orig_train.favorable_label else -1 for y in predictions]
acc, sr, fdr = getStats(y_test, predictions, x_control_test)
## Testing
assert (tau < unconstrainedFDR) or (fdr >= unconstrainedFDR)
accuracies.append(acc)
false_discovery_rates.append(fdr)
statistical_rates.append(sr)
Tau: 0.10 ('Training Accuracy: ', 0.59007283044254, ', Training gamma: ', 0.8471557184765197) ('Accuracy : ', 6015, 14653, 0.5895038558656931) ('SR tau : ', 0.8607089248858592) ('FPR tau : ', 0.8957864358026685) ('FNR tau : ', 0.9194857234907978) ('TPR tau : ', 0.9919093179930415) ('TNR tau : ', 0.8974125546638897) ('AR tau : ', 0.9991230759162755) ('FDR tau : ', 0.8457246400235614) ('FOR tau : ', 0.5351545846135752) ('PPR tau : ', 0.6980432406212983) ('NPR tau : ', 0.972325603734294) Tau: 0.20 ('Training Accuracy: ', 0.7089122232296938, ', Training gamma: ', 0.8560547557579788) ('Accuracy : ', 4219, 14653, 0.7120726131167678) ('SR tau : ', 0.6866930102717664) ('FPR tau : ', 0.6726467708167688) ('FNR tau : ', 0.9258698009067614) ('TPR tau : ', 0.9788702965603217) ('TNR tau : ', 0.8609042092761701) ('AR tau : ', 0.9016318454549019) ('FDR tau : ', 0.8985716754370806) ('FOR tau : ', 0.5212835077229696) ('PPR tau : ', 0.8634340785883854) ('NPR tau : ', 0.9509873699572469) Tau: 0.30 ('Training Accuracy: ', 0.7305566117757174, ', Training gamma: ', 0.8652540142403449) ('Accuracy : ', 3971, 14653, 0.7289974749198116) ('SR tau : ', 0.6378556299285792) ('FPR tau : ', 0.6258949415833267) ('FNR tau : ', 0.8021281808953761) ('TPR tau : ', 0.9157061270745418) ('TNR tau : ', 0.8661540136913607) ('AR tau : ', 0.9010399037361635) ('FDR tau : ', 0.8970117068060988) ('FOR tau : ', 0.6040206475603341) ('PPR tau : ', 0.8695616726701354) ('NPR tau : ', 0.9531107873071419) Tau: 0.40 ('Training Accuracy: ', 0.6383339670654304, ', Training gamma: ', 0.8874069404811007) ('Accuracy : ', 5277, 14653, 0.6398689688118474) ('SR tau : ', 0.6738297875613554) ('FPR tau : ', 0.644122920953404) ('FNR tau : ', 0.7865711339087011) ('TPR tau : ', 0.9673632005976219) ('TNR tau : ', 0.7621665735103976) ('AR tau : ', 0.8627152073258121) ('FDR tau : ', 0.9207857965052172) ('FOR tau : ', 0.5333710407239819) ('PPR tau : ', 0.869572944869857) ('NPR tau : ', 0.9685594512195121) Tau: 0.50 ('Training Accuracy: ', 0.6278920120506596, ', Training gamma: ', 0.8424560564810398) ('Accuracy : ', 5474, 14653, 0.6264246229441071) ('SR tau : ', 0.8298481425555508) ('FPR tau : ', 0.8569955013034397) ('FNR tau : ', 0.928239074324443) ('TPR tau : ', 0.9916079407319798) ('TNR tau : ', 0.8873302430084463) ('AR tau : ', 0.9673097194084582) ('FDR tau : ', 0.8523093321100546) ('FOR tau : ', 0.4555634964843873) ('PPR tau : ', 0.7360851226839791) ('NPR tau : ', 0.9639178758413839) Tau: 0.60 ('Training Accuracy: ', 0.688964286758899, ', Training gamma: ', 0.8364392682037156) ('Accuracy : ', 4525, 14653, 0.6911895175049478) ('SR tau : ', 0.7999629846862536) ('FPR tau : ', 0.8174527554362845) ('FNR tau : ', 0.7909665888208081) ('TPR tau : ', 0.953958901547282) ('TNR tau : ', 0.9070827451204897) ('AR tau : ', 0.9394597060776729) ('FDR tau : ', 0.8613593842228706) ('FOR tau : ', 0.4055043530080791) ('PPR tau : ', 0.7937196009266697) ('NPR tau : ', 0.9433076267447764) Tau: 0.70 ('Training Accuracy: ', 0.758694316885548, ', Training gamma: ', 0.8794270410853803) ('Accuracy : ', 3569, 14653, 0.7564321299392616) ('SR tau : ', 0.6227876622165098) ('FPR tau : ', 0.5866903792182638) ('FNR tau : ', 0.9190407482450215) ('TPR tau : ', 0.9560780895648338) ('TNR tau : ', 0.890147909980094) ('AR tau : ', 0.896234124640508) ('FDR tau : ', 0.9343469954055406) ('FOR tau : ', 0.5509667897652915) ('PPR tau : ', 0.9298652703704154) ('NPR tau : ', 0.9372291956457304) Tau: 0.80 ('Training Accuracy: ', 0.7235953084325368, ', Training gamma: ', 0.8054119984862806) ('Accuracy : ', 4059, 14653, 0.7229918787961509) ('SR tau : ', 0.854029993599877) ('FPR tau : ', 0.8938353737389849) ('FNR tau : ', 0.6857428917603186) ('TPR tau : ', 0.9027407287653024) ('TNR tau : ', 0.9581570773154029) ('AR tau : ', 0.9535764486010664) ('FDR tau : ', 0.8409937049267278) ('FOR tau : ', 0.37742963089855464) ('PPR tau : ', 0.7856525093953137) ('NPR tau : ', 0.9281236852587971) Tau: 0.90 ('Training Accuracy: ', 0.7241217935593319, ', Training gamma: ', 0.8099147766891792) ('Accuracy : ', 4051, 14653, 0.7235378420801202) ('SR tau : ', 0.8381936758377602) ('FPR tau : ', 0.8732228179504548) ('FNR tau : ', 0.7206111743921992) ('TPR tau : ', 0.9125888094427421) ('TNR tau : ', 0.9504356649707367) ('AR tau : ', 0.9489248561688661) ('FDR tau : ', 0.8448827898766063) ('FOR tau : ', 0.39272980229352533) ('PPR tau : ', 0.791857698925242) ('NPR tau : ', 0.9297660413700446)
display(Markdown("### Plot of accuracy and output fairness vs input constraint (tau)"))
display(Markdown("#### Output fairness is represented by $\gamma_{fdr}$, which is the ratio of false discovery rate of different sensitive attribute values."))
fig, ax1 = plt.subplots(figsize=(13,7))
ax1.plot(all_tau, accuracies, color='r')
ax1.set_title('Accuracy and $\gamma_{fdr}$ vs Tau', fontsize=16, fontweight='bold')
ax1.set_xlabel('Input Tau', fontsize=16, fontweight='bold')
ax1.set_ylabel('Accuracy', color='r', fontsize=16, fontweight='bold')
ax1.xaxis.set_tick_params(labelsize=14)
ax1.yaxis.set_tick_params(labelsize=14)
ax2 = ax1.twinx()
ax2.plot(all_tau, false_discovery_rates, color='b')
ax2.set_ylabel('$\gamma_{fdr}$', color='b', fontsize=16, fontweight='bold')
ax2.yaxis.set_tick_params(labelsize=14)
ax2.grid(True)
# #
# References:
# Celis, L. E., Huang, L., Keswani, V., & Vishnoi, N. K. (2018).
# "Classification with Fairness Constraints: A Meta-Algorithm with Provable Guarantees.""