#!/usr/bin/env python # coding: utf-8 # In[1]: import pandas as pd import numpy as np np.random.seed(2017) #important to set the seed before importing keras from keras.models import Sequential from keras.layers import Dense from keras.utils import to_categorical from keras.callbacks import EarlyStopping from keras.optimizers import SGD import matplotlib import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split import seaborn as sns # In[2]: get_ipython().run_line_magic('matplotlib', 'inline') # In[3]: sns.set() sns.set_style('ticks') # In[4]: #load the dataset df = pd.read_csv("HR_comma_sep.csv") # In[5]: #preview df.head() # In[6]: df.sales.value_counts() # In[7]: df.rename(columns={'sales':'department'},inplace=True) # In[8]: #let's convert the two categorical variables 'department' and 'salary' into dummy-variables for modelling # In[9]: df = pd.get_dummies(df,columns=['department','salary']) # In[10]: x,y = df.drop('left',axis=1).values, df.left.values # In[ ]: # In[11]: # let's do a training-test split for validation later on x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3, random_state=2017) # In[12]: # let's convert our output variable into categorical format for keras num_classes = np.max(y_train)+1 y_train = to_categorical(y_train,num_classes) y_test = to_categorical(y_test,num_classes) # In[13]: x_train.shape # In[14]: n_cols = x_train.shape[1] # In[ ]: # In[15]: #set-up early-stopping monitor early_stopping_monitor = EarlyStopping(patience=5) # # Baseline Model # In[16]: # configure our neural-net np.random.seed(2017) #important to set the seed for reproducibility model = Sequential() model.add(Dense(50,activation='relu',input_shape=(n_cols,))) model.add(Dense(2,activation='softmax')) #compile model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy']) #fit history = model.fit(x_train,y_train, epochs=100,verbose=1,validation_split=0.2, callbacks=[early_stopping_monitor],shuffle=False) model.summary() #plot training and validation los plt.plot(history.history['loss'],'r',label='training') plt.plot(history.history['val_loss'],'b',label='validation') plt.xlabel('epochs') plt.ylabel('loss') plt.legend() #plot training and validation accuracy plt.figure(figsize=(8,7)) plt.plot(history.history['acc'],'r',label='training') plt.plot(history.history['val_acc'],'b',label='validation') plt.xlabel('epochs') plt.ylabel('accuracy') plt.legend() # so we see that our baseline model has quite good accuracy just after 20 epochs #let's evaluate our model to predict on our hold-out data model.evaluate(x_test,y_test) # In[ ]: # In[17]: # Let's increase number of nodes in the hidden layer and repeat # # Model-2 : Increasing number of nodes in hidden layer # In[18]: # configure our neural-net np.random.seed(2017) #important to set the seed for reproducibility model = Sequential() model.add(Dense(100,activation='relu',input_shape=(n_cols,))) #changed number of hidden layers from 50 to 100 model.add(Dense(2,activation='softmax')) #compile model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy']) #fit history = model.fit(x_train,y_train,epochs=100,verbose=1, validation_split=0.2,callbacks=[early_stopping_monitor], shuffle=False) model.summary() #plot training and validation los plt.plot(history.history['loss'],'r',label='training') plt.plot(history.history['val_loss'],'b',label='validation') plt.xlabel('epochs') plt.ylabel('loss') plt.legend() #plot training and validation accuracy plt.figure(figsize=(8,7)) plt.plot(history.history['acc'],'r',label='training') plt.plot(history.history['val_acc'],'b',label='validation') plt.xlabel('epochs') plt.ylabel('accuracy') plt.legend() # so we see that our baseline model has quite good accuracy just after 20 epochs #let's evaluate our model to predict on our hold-out data print "Model Evaluation on test dataset [loss,accuracy] ",model.evaluate(x_test,y_test) # In[ ]: # In[19]: # we see that increasing the number of nodes in the hidden layer didn't help, let's move on to model-3 # # Model 3: Adding more hidden layers # In[20]: # configure our neural-net np.random.seed(2018) #important to set the seed for reproducibility model = Sequential() model.add(Dense(50,activation='relu',input_shape=(n_cols,))) model.add(Dense(50,activation='relu')) model.add(Dense(2,activation='softmax')) #compile model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy']) #fit history = model.fit(x_train,y_train,epochs=20,verbose=1, validation_split=0.2,callbacks=[early_stopping_monitor], shuffle=False) model.summary() #plot training and validation los plt.plot(history.history['loss'],'r',label='training') plt.plot(history.history['val_loss'],'b',label='validation') plt.xlabel('epochs') plt.ylabel('loss') plt.legend() #plot training and validation accuracy plt.figure(figsize=(8,7)) plt.plot(history.history['acc'],'r',label='training') plt.plot(history.history['val_acc'],'b',label='validation') plt.xlabel('epochs') plt.ylabel('accuracy') plt.legend() # so we see that our baseline model has quite good accuracy just after 20 epochs #let's evaluate our model to predict on our hold-out data print "Model Evaluation on test dataset [loss,accuracy] ", model.evaluate(x_test,y_test) # In[21]: #increasing the number of layers certainly helped which leads us to our next model where make the model deeper # # Model 4 : Making it more deep model # In[22]: # configure our neural-net np.random.seed(2019) model = Sequential() model.add(Dense(50,activation='relu',input_shape=(n_cols,))) model.add(Dense(50,activation='relu')) # model.add(Dense(50,activation='relu')) model.add(Dense(2,activation='softmax')) #compile model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy']) #fit history = model.fit(x_train,y_train,epochs=50, verbose=1,validation_split=0.2, callbacks=[early_stopping_monitor],shuffle=False) model.summary() #plot training and validation los plt.plot(history.history['loss'],'r',label='training') plt.plot(history.history['val_loss'],'b',label='validation') plt.xlabel('epochs') plt.ylabel('loss') plt.legend() #plot training and validation accuracy plt.figure(figsize=(8,7)) plt.plot(history.history['acc'],'r',label='training') plt.plot(history.history['val_acc'],'b',label='validation') plt.xlabel('epochs') plt.ylabel('accuracy') plt.legend() # so we see that our baseline model has quite good accuracy just after 20 epochs #let's evaluate our model to predict on our hold-out data print "Model Evaluation on test dataset [loss,accuracy] ", model.evaluate(x_test,y_test) # In[ ]: