#!/usr/bin/env python
# coding: utf-8
#
#
Machine Learning Using Python (MEAFA Workshop)
# Lesson 10: Neural Networks (Regression)
#
#
# Credit Card Data
# Neural Networks
# Model Evaluation
#
#
# This notebook relies on the following imports and setting. We will load new functions and libraries in context to make clear what we are using them for.
# In[1]:
# Packages
import numpy as np
from scipy import stats
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore') # this is to clear the warnings from this page, usually we should leave them on
# In[2]:
# Plot settings
sns.set_context('notebook') # optimise figures for notebook display
sns.set_style('ticks') # set default plot style
colours = ['#1F77B4', '#FF7F0E', '#2CA02C', '#DB2728', '#9467BD', '#8C564B', '#E377C2','#7F7F7F', '#BCBD22', '#17BECF']
crayon = ['#4E79A7','#F28E2C','#E15759','#76B7B2','#59A14F', '#EDC949','#AF7AA1','#FF9DA7','#9C755F','#BAB0AB']
sns.set_palette(colours) # set custom color scheme
get_ipython().run_line_magic('matplotlib', 'inline')
plt.rcParams['figure.figsize'] = (9, 6)
# In[3]:
# Methods
from sklearn.linear_model import LinearRegression
# Model selection and evaluation tools
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
# Data processing
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
# ## Credit Card Data
#
# In[4]:
data=pd.read_csv('Datasets/Credit.csv', index_col='Obs')
data.head(10)
# In[5]:
from sklearn.model_selection import train_test_split
# Randomly split indexes
index_train, index_test = train_test_split(np.array(data.index), train_size=0.7, random_state=10)
# Write training and test sets
train = data.loc[index_train,:].copy()
test = data.loc[index_test,:].copy()
# In[6]:
def prepare_data(df):
df['Male']=(df['Gender'] ==' Male').astype(int) # create dummy variable for gender
df['Student']=(df['Student'] =='Yes').astype(int)
df['Married']=(df['Married'] =='Yes').astype(int)
df['Caucasian']=(df['Ethnicity'] =='Caucasian').astype(int)
df['Asian']=(df['Ethnicity'] =='Asian').astype(int)
df=df.loc[:, df.dtypes!='object'] # discards the columns that are not numerical
df=df.drop('Rating', axis=1) # collinear with limit
return df
train = prepare_data(train)
test = prepare_data(test)
train.head()
# In[7]:
# Construting response vector and design matrix (matrix of predictor values)
response = 'Balance'
predictors = list(train.columns.values)
predictors.remove(response)
y_train = train[response].copy()
y_test = test[response].copy()
X_train=train[predictors].copy() # selects the variables in the predictor list
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(test[predictors].values)
X_test.shape
# ## Neural Networks
# In[8]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Activation
from keras.wrappers.scikit_learn import KerasRegressor
# ### Single Layer Perceptron
# In[9]:
slp = Sequential()
slp.add(Dense(24, input_dim=10, init='uniform', activation='relu'))
slp.add(Dense(1))
slp.compile(loss='mse', optimizer='adam')
slp.fit(X_train, y_train, epochs=15000, verbose=0)
# ### Using the Scikit-Learn Wrapper
# In[10]:
def build_model():
model = Sequential()
model.add(Dense(24, input_dim=10, init='uniform', activation='relu'))
model.add(Dense(1))
model.compile(loss='mse', optimizer='adam')
return model
estimator = KerasRegressor(build_fn=build_model, epochs=1000, verbose=0)
cross_val_score(estimator, X_train, y_train, cv=5, scoring = 'neg_mean_squared_error')
# ### Dropout
# In[11]:
nn = Sequential()
nn.add(Dense(24, input_dim=10, init='uniform', activation='relu'))
nn.add(Dropout(0.2)) # adding 20% dropout to the hidden layers
nn.add(Dense(1))
nn.compile(loss='mse', optimizer='adam')
nn.fit(X_train, y_train, epochs=100, verbose=0) # few epochs just for illustration
# ## Model evaluation
#
# In[12]:
# Benchmark
ols = LinearRegression()
ols.fit(X_train, y_train)
# Initialise table
columns=['RMSE', 'R-Squared', 'MAE']
rows=['Linear Regression', 'Neural ']
results =pd.DataFrame(0.0, columns=columns, index=rows)
# List algorithms
methods = [ols, slp]
# Computer test predictions and metrics
for i, method in enumerate(methods):
y_pred = method.predict(X_test)
results.iloc[i, 0] = np.sqrt(mean_squared_error(y_test, y_pred))
results.iloc[i, 1] = r2_score(y_test, y_pred)
results.iloc[i, 2] = mean_absolute_error(y_test, y_pred)
results.round(2)