#!/usr/bin/env python # coding: utf-8 # In[ ]: import pandas as pd import numpy as np np.random.seed(42) import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.metrics import accuracy_score, roc_auc_score from sklearn.utils.class_weight import compute_class_weight from keras.layers import Input, Dense, Dropout from keras.models import Model # In[ ]: get_ipython().system('ls ../input') # In[ ]: path = '../input/adult.csv' input_data = pd.read_csv(path, na_values="?") # In[ ]: input_data.head() # In[ ]: input_data = input_data[input_data['race'].isin(['White', 'Black'])] # In[ ]: input_data.head() # In[ ]: # sensitive attributes; we identify 'race' and 'sex' as sensitive attributes sensitive_attribs = ['race', 'gender'] A = input_data[sensitive_attribs] A = pd.get_dummies(A,drop_first=True) A.columns = sensitive_attribs # In[ ]: A.head() # In[ ]: y = (input_data['income'] == '>50K').astype(int) # In[ ]: X = input_data.drop(labels=['income', 'race', 'gender'],axis=1) X = X.fillna('Unknown') X = pd.get_dummies(X, drop_first=True) # In[ ]: # split into train/test set X_train, X_test, y_train, y_test, A_train, A_test = train_test_split(X, y, A, test_size=0.5, stratify=y, random_state=7) # standardize the data scaler = StandardScaler().fit(X_train) #scale_df = lambda df, scaler: pd.DataFrame(scaler.transform(df), columns=df.columns, index=df.index) X_train = pd.DataFrame(scaler.transform(X_train), columns=X_train.columns, index=X_train.index) X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns, index=X_test.index) # In[ ]: def p_rule(y_pred, a_values, threshold=0.5): y_a_1 = y_pred[a_values == 1] > threshold if threshold else y_pred[a_values == 1] y_a_0 = y_pred[a_values == 0] > threshold if threshold else y_pred[a_values == 0] odds = y_a_1.mean() / y_a_0.mean() return np.min([odds, 1/odds]) * 100 # In[ ]: def make_trainable_fn(net): # Produces a function that makes a network trainable or not def make_trainable(flag): # Loop over layers and set their trainability net.trainable = flag for layer in net.layers: layer.trainable = flag return make_trainable # In[ ]: def compute_class_weights(data_set): class_values = [0, 1] class_weights = [] if len(data_set.shape) == 1: balanced_weights = compute_class_weight('balanced', class_values, data_set) class_weights.append(dict(zip(class_values, balanced_weights))) else: n_attr = data_set.shape[1] for attr_idx in range(n_attr): balanced_weights = compute_class_weight('balanced', class_values, np.array(data_set)[:,attr_idx]) class_weights.append(dict(zip(class_values, balanced_weights))) return class_weights # In[ ]: def compute_target_class_weights(y): class_values = [0,1] balanced_weights = compute_class_weight('balanced', class_values, y) class_weights = {'y': dict(zip(class_values, balanced_weights))} return class_weights # In[ ]: n_features=X_train.shape[1] n_sensitive=A_train.shape[1] lambdas=[130., 30.] # In[ ]: clf_inputs = Input(shape=(n_features,)) # Classifier input = All features ############### Create CLF net ######################## x = Dense(32, activation='relu')(clf_inputs) x = Dropout(0.2)(x) x = Dense(32, activation='relu')(x) x = Dropout(0.2)(x) x = Dense(32, activation='relu')(x) x = Dropout(0.2)(x) outputs = Dense(1, activation='sigmoid', name='y')(x) clf_net = Model(inputs=[clf_inputs], outputs=[outputs]) ####################################################### # In[ ]: adv_inputs = Input(shape=(1,)) # Adversary input = Classifier output (one number) ############## Create ADV net ######################### x = Dense(32, activation='relu')(adv_inputs) x = Dense(32, activation='relu')(x) x = Dense(32, activation='relu')(x) outputs = [Dense(1, activation='sigmoid')(x) for _ in range(n_sensitive)] adv_net = Model(inputs=[adv_inputs], outputs=outputs) ####################################################### # In[ ]: ############## Create train switches ################# trainable_clf_net = make_trainable_fn(clf_net) # Get function to make classifier trainable trainable_adv_net = make_trainable_fn(adv_net) # Function to make adversary trainable ###################################################### # In[ ]: #clf = compile_clf(clf_net) # Compile classifier clf = clf_net trainable_clf_net(True) clf.compile(loss='binary_crossentropy', optimizer='adam') # In[ ]: # Creates a classifier adversary super net adv_out = adv_net(clf_net(clf_inputs)) clf_out = clf_net(clf_inputs) clf_w_adv = Model(inputs=[clf_inputs], outputs=[clf_out]+adv_out) # The adversary is not trainable the classifier is trainable_clf_net(True) trainable_adv_net(False) # Create a weighted loss for all sensitive variables loss_weights = [1.]+[-lambda_param for lambda_param in lambdas] # Compile super net clf_w_adv.compile(loss='binary_crossentropy', loss_weights=loss_weights, optimizer='adam') # In[ ]: # Compile adversary with the classifier as inputs adv = Model(inputs=[clf_inputs], outputs=adv_net(clf_net(clf_inputs))) # Classifier is not trainable, adversary is trainable_clf_net(False) trainable_adv_net(True) adv.compile(loss='binary_crossentropy', optimizer='adam') # In[ ]: trainable_clf_net(True) clf.fit(X_train.values, y_train.values, epochs=10) # In[ ]: trainable_clf_net(False) trainable_adv_net(True) class_weight_adv = compute_class_weights(A_train) adv.fit(X_train.values, np.hsplit(A_train.values, A_train.shape[1]), class_weight=class_weight_adv,epochs=10) # In[ ]: y_pred = clf.predict(X_test) # In[ ]: for sens in A_test.columns: pr = p_rule(y_pred,A_test[sens]) print(sens,pr) # In[ ]: acc = accuracy_score(y_test,(y_pred>0.5))* 100 print('Clf acc: {:.2f}'.format(acc)) # In[ ]: n_iter=250 batch_size=128 n_sensitive = A_train.shape[1] class_weight_clf_w_adv = [{0:1., 1:1.}]+class_weight_adv val_metrics = pd.DataFrame() fairness_metrics = pd.DataFrame() for idx in range(n_iter): # Train for n epochs # train adverserial trainable_clf_net(False) trainable_adv_net(True) adv.fit(X_train.values, np.hsplit(A_train.values, A_train.shape[1]), batch_size=batch_size, class_weight=class_weight_adv, epochs=1, verbose=0) # train classifier # Make classifier trainable and adversery untrainable trainable_clf_net(True) trainable_adv_net(False) # Sample batch indices = np.random.permutation(len(X_train))[:batch_size] # Train on batch clf_w_adv.train_on_batch(X_train.values[indices], [y_train.values[indices]]+np.hsplit(A_train.values[indices], n_sensitive), class_weight=class_weight_clf_w_adv) # Make validation data predictions y_pred = pd.Series(clf.predict(X_test).ravel(), index=y_test.index) roc_auc = roc_auc_score(y_test, y_pred) acc = accuracy_score(y_test, (y_pred>0.5))*100 # Calculate ROC and accuracy val_metrics.loc[idx, 'ROC AUC'] = roc_auc val_metrics.loc[idx, 'Accuracy'] = acc # Calculate p rule for sensitive_attr in A_test.columns: fairness_metrics.loc[idx, sensitive_attr] = p_rule(y_pred,A_test[sensitive_attr]) print('Epoch: {}, Accuracy: {:.2f}, Race P: {:.2f}, Gender P: {:.2f}'.format(idx, acc, fairness_metrics.loc[idx, 'race'], fairness_metrics.loc[idx, 'gender'])) # In[ ]: # adverserial train on train set and validate on test set #vm, fm = fit(X_train, y_train, A_train,validation_data=(X_test, y_test, A_test),n_iter=200) # In[ ]: plt.figure(figsize=(10,7)) plt.xlabel('Epochs') plt.plot(val_metrics['Accuracy'],label='Accuracy') plt.plot(val_metrics['ROC AUC']*100,label='ROC AUC') plt.plot(fairness_metrics['race'],label='Race') plt.plot(fairness_metrics['gender'],label='Gender') plt.legend() # In[ ]: