#!/usr/bin/env python # coding: utf-8 #

Machine Learning Using Python (MEAFA Workshop)

Lesson 10: Neural Networks (Regression)

#
# # Credit Card Data
# Neural Networks
# Model Evaluation
# # # This notebook relies on the following imports and setting. We will load new functions and libraries in context to make clear what we are using them for. # In[1]: # Packages import numpy as np from scipy import stats import pandas as pd import matplotlib.pyplot as plt import seaborn as sns import warnings warnings.filterwarnings('ignore') # this is to clear the warnings from this page, usually we should leave them on # In[2]: # Plot settings sns.set_context('notebook') # optimise figures for notebook display sns.set_style('ticks') # set default plot style colours = ['#1F77B4', '#FF7F0E', '#2CA02C', '#DB2728', '#9467BD', '#8C564B', '#E377C2','#7F7F7F', '#BCBD22', '#17BECF'] crayon = ['#4E79A7','#F28E2C','#E15759','#76B7B2','#59A14F', '#EDC949','#AF7AA1','#FF9DA7','#9C755F','#BAB0AB'] sns.set_palette(colours) # set custom color scheme get_ipython().run_line_magic('matplotlib', 'inline') plt.rcParams['figure.figsize'] = (9, 6) # In[3]: # Methods from sklearn.linear_model import LinearRegression # Model selection and evaluation tools from sklearn.model_selection import train_test_split from sklearn.model_selection import GridSearchCV, RandomizedSearchCV from sklearn.model_selection import cross_val_score from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error # Data processing from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler # ## Credit Card Data # # In[4]: data=pd.read_csv('Datasets/Credit.csv', index_col='Obs') data.head(10) # In[5]: from sklearn.model_selection import train_test_split # Randomly split indexes index_train, index_test = train_test_split(np.array(data.index), train_size=0.7, random_state=10) # Write training and test sets train = data.loc[index_train,:].copy() test = data.loc[index_test,:].copy() # In[6]: def prepare_data(df): df['Male']=(df['Gender'] ==' Male').astype(int) # create dummy variable for gender df['Student']=(df['Student'] =='Yes').astype(int) df['Married']=(df['Married'] =='Yes').astype(int) df['Caucasian']=(df['Ethnicity'] =='Caucasian').astype(int) df['Asian']=(df['Ethnicity'] =='Asian').astype(int) df=df.loc[:, df.dtypes!='object'] # discards the columns that are not numerical df=df.drop('Rating', axis=1) # collinear with limit return df train = prepare_data(train) test = prepare_data(test) train.head() # In[7]: # Construting response vector and design matrix (matrix of predictor values) response = 'Balance' predictors = list(train.columns.values) predictors.remove(response) y_train = train[response].copy() y_test = test[response].copy() X_train=train[predictors].copy() # selects the variables in the predictor list scaler = StandardScaler() scaler.fit(X_train) X_train = scaler.transform(X_train) X_test = scaler.transform(test[predictors].values) X_test.shape # ## Neural Networks # In[8]: from keras.models import Sequential from keras.layers import Dense from keras.layers import Dropout from keras.layers import Activation from keras.wrappers.scikit_learn import KerasRegressor # ### Single Layer Perceptron # In[9]: slp = Sequential() slp.add(Dense(24, input_dim=10, init='uniform', activation='relu')) slp.add(Dense(1)) slp.compile(loss='mse', optimizer='adam') slp.fit(X_train, y_train, epochs=15000, verbose=0) # ### Using the Scikit-Learn Wrapper # In[10]: def build_model(): model = Sequential() model.add(Dense(24, input_dim=10, init='uniform', activation='relu')) model.add(Dense(1)) model.compile(loss='mse', optimizer='adam') return model estimator = KerasRegressor(build_fn=build_model, epochs=1000, verbose=0) cross_val_score(estimator, X_train, y_train, cv=5, scoring = 'neg_mean_squared_error') # ### Dropout # In[11]: nn = Sequential() nn.add(Dense(24, input_dim=10, init='uniform', activation='relu')) nn.add(Dropout(0.2)) # adding 20% dropout to the hidden layers nn.add(Dense(1)) nn.compile(loss='mse', optimizer='adam') nn.fit(X_train, y_train, epochs=100, verbose=0) # few epochs just for illustration # ## Model evaluation # # In[12]: # Benchmark ols = LinearRegression() ols.fit(X_train, y_train) # Initialise table columns=['RMSE', 'R-Squared', 'MAE'] rows=['Linear Regression', 'Neural '] results =pd.DataFrame(0.0, columns=columns, index=rows) # List algorithms methods = [ols, slp] # Computer test predictions and metrics for i, method in enumerate(methods): y_pred = method.predict(X_test) results.iloc[i, 0] = np.sqrt(mean_squared_error(y_test, y_pred)) results.iloc[i, 1] = r2_score(y_test, y_pred) results.iloc[i, 2] = mean_absolute_error(y_test, y_pred) results.round(2)