import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
# random seed
import tensorflow as tf
import random as rn
import os
os.environ['PYTHONHASHSEED'] = '0'
random_n = 123
np.random.seed(random_n)
rn.seed(random_n)
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
from keras import backend as K
tf.set_random_seed(random_n)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)
train = pd.read_csv('train.csv', index_col=0)
test = pd.read_csv('test.csv', index_col=0)
/home/mirrornerror/.pyenv/versions/anaconda3-5.1.0/envs/py36/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`. from ._conv import register_converters as _register_converters Using TensorFlow backend.
train.head()
Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||||
1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22.0 | 1 | 0 | A/5 21171 | 7.2500 | NaN | S |
2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | 38.0 | 1 | 0 | PC 17599 | 71.2833 | C85 | C |
3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26.0 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S |
4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35.0 | 1 | 0 | 113803 | 53.1000 | C123 | S |
5 | 0 | 3 | Allen, Mr. William Henry | male | 35.0 | 0 | 0 | 373450 | 8.0500 | NaN | S |
train_tmp = train.drop(['Survived', 'Ticket'], axis=1)
test_tmp = test.drop(['Ticket'], axis=1)
df = pd.concat([train_tmp, test_tmp])
df.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 1309 entries, 1 to 1309 Data columns (total 9 columns): Pclass 1309 non-null int64 Name 1309 non-null object Sex 1309 non-null object Age 1046 non-null float64 SibSp 1309 non-null int64 Parch 1309 non-null int64 Fare 1308 non-null float64 Cabin 295 non-null object Embarked 1307 non-null object dtypes: float64(2), int64(3), object(4) memory usage: 102.3+ KB
# Name to Title
df = df.assign(Title=df.Name.str.extract(' ([A-Za-z]+)\..', expand=True))
title_list = df.Title.unique()
print(title_list)
['Mr' 'Mrs' 'Miss' 'Master' 'Don' 'Rev' 'Dr' 'Mme' 'Ms' 'Major' 'Lady' 'Sir' 'Mlle' 'Col' 'Capt' 'Countess' 'Jonkheer' 'Dona']
# Title to Number(0-17)
df.Title = df.Title.replace(df.Title.unique(), np.arange(len(df.Title.unique())))
# Drop Name column
df = df.drop(['Name'], axis=1)
df.head()
Pclass | Sex | Age | SibSp | Parch | Fare | Cabin | Embarked | Title | |
---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||
1 | 3 | male | 22.0 | 1 | 0 | 7.2500 | NaN | S | 0 |
2 | 1 | female | 38.0 | 1 | 0 | 71.2833 | C85 | C | 1 |
3 | 3 | female | 26.0 | 0 | 0 | 7.9250 | NaN | S | 2 |
4 | 1 | female | 35.0 | 1 | 0 | 53.1000 | C123 | S | 1 |
5 | 3 | male | 35.0 | 0 | 0 | 8.0500 | NaN | S | 0 |
df.Sex = df.Sex.replace({'male': 0, 'female': 1})
df = df.assign(Cabin=df.Cabin.str[0])
cabin_list = df.Cabin.unique()
df.Cabin = df.Cabin.replace(df.Cabin.str[0].unique(), np.arange(len(df.Cabin.str[0].unique())))
print(cabin_list)
print(df.Cabin.unique())
[nan 'C' 'E' 'G' 'D' 'A' 'B' 'F' 'T'] [0 1 2 3 4 5 6 7 8]
df.Embarked.unique()
array(['S', 'C', 'Q', nan], dtype=object)
df.Embarked = df.Embarked.replace({'S':0, 'C':1, 'Q':2})
Z = (x - x.mean) / x.std
N = (x - x.min) / (x.max - x.min)
sklearn.preprocessing.MinMaxScaler causes error with Null data.
# Normalize Function
def normalize(df_col):
df_col = (df_col - df_col.min()) / (df_col.max() - df_col.min())
return df_col
# Standardization(zscore)
def zscore(df_col):
df_col = (df_col - df_col.mean()) / df_col.std()
return df_col
df.Age = zscore(df.Age)
df.Fare = zscore(df.Fare)
df.SibSp = zscore(df.SibSp)
df.Parch = zscore(df.Parch)
df.Title = zscore(df.Title)
# df.Age = normalize(df.Age)
# df.Fare = normalize(df.Fare)
# for col in df.columns:
# df[col] = normalize(df[col])
df.describe()
Pclass | Sex | Age | SibSp | Parch | Fare | Cabin | Embarked | Title | |
---|---|---|---|---|---|---|---|---|---|
count | 1309.000000 | 1309.000000 | 1.046000e+03 | 1.309000e+03 | 1.309000e+03 | 1.308000e+03 | 1309.000000 | 1307.000000 | 1.309000e+03 |
mean | 2.294882 | 0.355997 | 9.488904e-17 | 2.178887e-16 | -5.920059e-17 | -6.049357e-16 | 0.786860 | 0.394797 | 5.343319e-17 |
std | 0.837836 | 0.478997 | 1.000000e+00 | 1.000000e+00 | 1.000000e+00 | 1.000000e+00 | 1.794388 | 0.653817 | 1.000000e+00 |
min | 1.000000 | 0.000000 | -2.061342e+00 | -4.789037e-01 | -4.448295e-01 | -6.432832e-01 | 0.000000 | 0.000000 | -5.418264e-01 |
25% | 2.000000 | 0.000000 | -6.161683e-01 | -4.789037e-01 | -4.448295e-01 | -4.907329e-01 | 0.000000 | 0.000000 | -5.418264e-01 |
50% | 3.000000 | 0.000000 | -1.305123e-01 | -4.789037e-01 | -4.448295e-01 | -3.640217e-01 | 0.000000 | 0.000000 | -5.418264e-01 |
75% | 3.000000 | 1.000000 | 6.326615e-01 | 4.811039e-01 | -4.448295e-01 | -3.903654e-02 | 0.000000 | 1.000000 | 6.481916e-01 |
max | 3.000000 | 1.000000 | 3.477218e+00 | 7.201157e+00 | 9.953060e+00 | 9.255140e+00 | 8.000000 | 2.000000 | 9.573327e+00 |
# Drop Cabin if the result gets better
#df = df.drop(['Cabin'], axis=1)
df0 = df.copy()
df0.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 1309 entries, 1 to 1309 Data columns (total 9 columns): Pclass 1309 non-null int64 Sex 1309 non-null int64 Age 1046 non-null float64 SibSp 1309 non-null float64 Parch 1309 non-null float64 Fare 1308 non-null float64 Cabin 1309 non-null int64 Embarked 1307 non-null float64 Title 1309 non-null float64 dtypes: float64(6), int64(3) memory usage: 102.3 KB
Age_null = df[df.Age.isnull()]
df = df[df.Age.notnull()]
Embarked_null = df[df.Embarked.isnull()]
df = df[df.Embarked.notnull()]
Fare_null = df[df.Fare.isnull()]
df = df[df.Fare.notnull()]
print(df.shape)
df.info()
(1043, 9) <class 'pandas.core.frame.DataFrame'> Int64Index: 1043 entries, 1 to 1307 Data columns (total 9 columns): Pclass 1043 non-null int64 Sex 1043 non-null int64 Age 1043 non-null float64 SibSp 1043 non-null float64 Parch 1043 non-null float64 Fare 1043 non-null float64 Cabin 1043 non-null int64 Embarked 1043 non-null float64 Title 1043 non-null float64 dtypes: float64(6), int64(3) memory usage: 81.5 KB
from keras.models import Sequential
from keras.layers import Flatten, Dense, Dropout, BatchNormalization
import keras
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
initializer = keras.initializers.glorot_uniform(seed=random_n)
# model for Fare, Embarked, Age
def fill_data(col):
n_cols = len(df.columns) - 1
num = len(df[col].unique())
model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(n_cols,), kernel_initializer=initializer))
model.add(Dropout(0.5, seed=random_n))
if col == 'Embarked':
model.add(Dense(num, activation='softmax', kernel_initializer=initializer))
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['acc'])
else: # 'Fare', 'Age'
model.add(Dense(1, activation='relu', kernel_initializer=initializer))
model.compile(optimizer='adam', loss='mse', metrics=['mae'])
data = df.drop([col], axis=1)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.000001,verbose=1)
checkpointer = ModelCheckpoint(filepath='checkpoint_'+col+'.hdf5', verbose=1, save_best_only=True)
early_stopping = EarlyStopping(patience=10, verbose=1)
epochs = 300
hist = model.fit(data, df[col],
epochs=epochs,
batch_size=32,
verbose=1,
validation_split=0.1,
callbacks=[reduce_lr, early_stopping, checkpointer])
null_data = df0[df0[col].isnull()]
null_data = null_data.drop([col], axis=1)
model.load_weights('checkpoint_'+col+'.hdf5')
pred = model.predict(null_data)
if col == 'Embarked':
pred = pred.argmax(axis=1)
plt.plot(hist.history['acc'], 'b-', label='acc' )
plt.plot(hist.history['loss'], 'r-', label='loss' )
plt.xlabel('epochs')
plt.legend()
plt.show()
pred = pred.reshape(-1, )
idx = df0[df0[col].isnull()].index.values
for n, i in enumerate(idx):
df0.loc[i, col] = pred[n]
fill_data('Embarked') # id:62,830
Train on 938 samples, validate on 105 samples Epoch 1/300 938/938 [==============================] - 0s 344us/step - loss: 0.8662 - acc: 0.6205 - val_loss: 0.7452 - val_acc: 0.7524 Epoch 00001: val_loss improved from inf to 0.74523, saving model to checkpoint_Embarked.hdf5 Epoch 2/300 938/938 [==============================] - 0s 76us/step - loss: 0.7523 - acc: 0.7313 - val_loss: 0.7328 - val_acc: 0.7429 Epoch 00002: val_loss improved from 0.74523 to 0.73280, saving model to checkpoint_Embarked.hdf5 Epoch 3/300 938/938 [==============================] - 0s 74us/step - loss: 0.7292 - acc: 0.7495 - val_loss: 0.7240 - val_acc: 0.7333 Epoch 00003: val_loss improved from 0.73280 to 0.72398, saving model to checkpoint_Embarked.hdf5 Epoch 4/300 938/938 [==============================] - 0s 82us/step - loss: 0.7211 - acc: 0.7441 - val_loss: 0.7141 - val_acc: 0.7333 Epoch 00004: val_loss improved from 0.72398 to 0.71408, saving model to checkpoint_Embarked.hdf5 Epoch 5/300 938/938 [==============================] - 0s 84us/step - loss: 0.6862 - acc: 0.7601 - val_loss: 0.7117 - val_acc: 0.7333 Epoch 00005: val_loss improved from 0.71408 to 0.71174, saving model to checkpoint_Embarked.hdf5 Epoch 6/300 938/938 [==============================] - 0s 74us/step - loss: 0.6646 - acc: 0.7537 - val_loss: 0.7110 - val_acc: 0.7333 Epoch 00006: val_loss improved from 0.71174 to 0.71097, saving model to checkpoint_Embarked.hdf5 Epoch 7/300 938/938 [==============================] - 0s 75us/step - loss: 0.6576 - acc: 0.7655 - val_loss: 0.7034 - val_acc: 0.7429 Epoch 00007: val_loss improved from 0.71097 to 0.70335, saving model to checkpoint_Embarked.hdf5 Epoch 8/300 938/938 [==============================] - 0s 69us/step - loss: 0.6594 - acc: 0.7655 - val_loss: 0.7016 - val_acc: 0.7429 Epoch 00008: val_loss improved from 0.70335 to 0.70159, saving model to checkpoint_Embarked.hdf5 Epoch 9/300 938/938 [==============================] - 0s 73us/step - loss: 0.6650 - acc: 0.7644 - val_loss: 0.7008 - val_acc: 0.7429 Epoch 00009: val_loss improved from 0.70159 to 0.70081, saving model to checkpoint_Embarked.hdf5 Epoch 10/300 938/938 [==============================] - 0s 73us/step - loss: 0.6344 - acc: 0.7665 - val_loss: 0.7043 - val_acc: 0.7429 Epoch 00010: val_loss did not improve from 0.70081 Epoch 11/300 938/938 [==============================] - 0s 72us/step - loss: 0.6337 - acc: 0.7665 - val_loss: 0.6979 - val_acc: 0.7429 Epoch 00011: val_loss improved from 0.70081 to 0.69788, saving model to checkpoint_Embarked.hdf5 Epoch 12/300 938/938 [==============================] - 0s 74us/step - loss: 0.6329 - acc: 0.7601 - val_loss: 0.6970 - val_acc: 0.7429 Epoch 00012: val_loss improved from 0.69788 to 0.69701, saving model to checkpoint_Embarked.hdf5 Epoch 13/300 938/938 [==============================] - 0s 75us/step - loss: 0.6176 - acc: 0.7708 - val_loss: 0.6976 - val_acc: 0.7429 Epoch 00013: val_loss did not improve from 0.69701 Epoch 14/300 938/938 [==============================] - 0s 79us/step - loss: 0.6086 - acc: 0.7655 - val_loss: 0.6951 - val_acc: 0.7429 Epoch 00014: val_loss improved from 0.69701 to 0.69508, saving model to checkpoint_Embarked.hdf5 Epoch 15/300 938/938 [==============================] - 0s 75us/step - loss: 0.6289 - acc: 0.7719 - val_loss: 0.6965 - val_acc: 0.7429 Epoch 00015: val_loss did not improve from 0.69508 Epoch 16/300 938/938 [==============================] - 0s 73us/step - loss: 0.6104 - acc: 0.7687 - val_loss: 0.6972 - val_acc: 0.7429 Epoch 00016: val_loss did not improve from 0.69508 Epoch 17/300 938/938 [==============================] - 0s 72us/step - loss: 0.6014 - acc: 0.7772 - val_loss: 0.6946 - val_acc: 0.7333 Epoch 00017: val_loss improved from 0.69508 to 0.69457, saving model to checkpoint_Embarked.hdf5 Epoch 18/300 938/938 [==============================] - 0s 73us/step - loss: 0.6165 - acc: 0.7665 - val_loss: 0.6978 - val_acc: 0.7429 Epoch 00018: val_loss did not improve from 0.69457 Epoch 19/300 938/938 [==============================] - 0s 74us/step - loss: 0.6167 - acc: 0.7644 - val_loss: 0.6933 - val_acc: 0.7333 Epoch 00019: val_loss improved from 0.69457 to 0.69334, saving model to checkpoint_Embarked.hdf5 Epoch 20/300 938/938 [==============================] - 0s 75us/step - loss: 0.6039 - acc: 0.7708 - val_loss: 0.6966 - val_acc: 0.7429 Epoch 00020: val_loss did not improve from 0.69334 Epoch 21/300 938/938 [==============================] - 0s 72us/step - loss: 0.6079 - acc: 0.7740 - val_loss: 0.6931 - val_acc: 0.7429 Epoch 00021: val_loss improved from 0.69334 to 0.69311, saving model to checkpoint_Embarked.hdf5 Epoch 22/300 938/938 [==============================] - 0s 73us/step - loss: 0.6071 - acc: 0.7697 - val_loss: 0.6897 - val_acc: 0.7333 Epoch 00022: val_loss improved from 0.69311 to 0.68972, saving model to checkpoint_Embarked.hdf5 Epoch 23/300 938/938 [==============================] - 0s 69us/step - loss: 0.6012 - acc: 0.7687 - val_loss: 0.6914 - val_acc: 0.7333 Epoch 00023: val_loss did not improve from 0.68972 Epoch 24/300 938/938 [==============================] - 0s 77us/step - loss: 0.5974 - acc: 0.7804 - val_loss: 0.6913 - val_acc: 0.7333 Epoch 00024: val_loss did not improve from 0.68972 Epoch 25/300 938/938 [==============================] - 0s 77us/step - loss: 0.5967 - acc: 0.7761 - val_loss: 0.6933 - val_acc: 0.7333 Epoch 00025: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026. Epoch 00025: val_loss did not improve from 0.68972 Epoch 26/300 938/938 [==============================] - 0s 86us/step - loss: 0.5963 - acc: 0.7751 - val_loss: 0.6924 - val_acc: 0.7333 Epoch 00026: val_loss did not improve from 0.68972 Epoch 27/300 938/938 [==============================] - 0s 94us/step - loss: 0.5949 - acc: 0.7729 - val_loss: 0.6920 - val_acc: 0.7333 Epoch 00027: val_loss did not improve from 0.68972 Epoch 28/300 938/938 [==============================] - 0s 79us/step - loss: 0.5952 - acc: 0.7783 - val_loss: 0.6918 - val_acc: 0.7333 Epoch 00028: ReduceLROnPlateau reducing learning rate to 4.0000001899898055e-05. Epoch 00028: val_loss did not improve from 0.68972 Epoch 29/300 938/938 [==============================] - 0s 87us/step - loss: 0.6045 - acc: 0.7665 - val_loss: 0.6918 - val_acc: 0.7333 Epoch 00029: val_loss did not improve from 0.68972 Epoch 30/300 938/938 [==============================] - 0s 83us/step - loss: 0.5944 - acc: 0.7719 - val_loss: 0.6917 - val_acc: 0.7333 Epoch 00030: val_loss did not improve from 0.68972 Epoch 31/300 938/938 [==============================] - 0s 65us/step - loss: 0.6018 - acc: 0.7740 - val_loss: 0.6916 - val_acc: 0.7333 Epoch 00031: ReduceLROnPlateau reducing learning rate to 8.000000525498762e-06. Epoch 00031: val_loss did not improve from 0.68972 Epoch 32/300 938/938 [==============================] - 0s 75us/step - loss: 0.5872 - acc: 0.7697 - val_loss: 0.6916 - val_acc: 0.7333 Epoch 00032: val_loss did not improve from 0.68972 Epoch 00032: early stopping
fill_data('Fare') # id:1044
Train on 938 samples, validate on 105 samples Epoch 1/300 938/938 [==============================] - 0s 253us/step - loss: 8.9094 - mean_absolute_error: 2.5688 - val_loss: 5.5439 - val_mean_absolute_error: 1.9672 Epoch 00001: val_loss improved from inf to 5.54393, saving model to checkpoint_Fare.hdf5 Epoch 2/300 938/938 [==============================] - 0s 73us/step - loss: 4.6612 - mean_absolute_error: 1.8467 - val_loss: 3.4742 - val_mean_absolute_error: 1.4843 Epoch 00002: val_loss improved from 5.54393 to 3.47424, saving model to checkpoint_Fare.hdf5 Epoch 3/300 938/938 [==============================] - 0s 72us/step - loss: 2.7566 - mean_absolute_error: 1.3448 - val_loss: 2.4206 - val_mean_absolute_error: 1.1232 Epoch 00003: val_loss improved from 3.47424 to 2.42062, saving model to checkpoint_Fare.hdf5 Epoch 4/300 938/938 [==============================] - 0s 80us/step - loss: 1.7961 - mean_absolute_error: 0.9854 - val_loss: 1.8958 - val_mean_absolute_error: 0.8475 Epoch 00004: val_loss improved from 2.42062 to 1.89583, saving model to checkpoint_Fare.hdf5 Epoch 5/300 938/938 [==============================] - 0s 91us/step - loss: 1.3640 - mean_absolute_error: 0.7712 - val_loss: 1.7291 - val_mean_absolute_error: 0.7299 Epoch 00005: val_loss improved from 1.89583 to 1.72906, saving model to checkpoint_Fare.hdf5 Epoch 6/300 938/938 [==============================] - 0s 96us/step - loss: 1.2310 - mean_absolute_error: 0.6862 - val_loss: 1.6974 - val_mean_absolute_error: 0.7071 Epoch 00006: val_loss improved from 1.72906 to 1.69739, saving model to checkpoint_Fare.hdf5 Epoch 7/300 938/938 [==============================] - 0s 90us/step - loss: 1.1849 - mean_absolute_error: 0.6521 - val_loss: 1.6919 - val_mean_absolute_error: 0.7020 Epoch 00007: val_loss improved from 1.69739 to 1.69192, saving model to checkpoint_Fare.hdf5 Epoch 8/300 938/938 [==============================] - 0s 90us/step - loss: 1.1712 - mean_absolute_error: 0.6380 - val_loss: 1.6902 - val_mean_absolute_error: 0.6990 Epoch 00008: val_loss improved from 1.69192 to 1.69016, saving model to checkpoint_Fare.hdf5 Epoch 9/300 938/938 [==============================] - 0s 72us/step - loss: 1.1354 - mean_absolute_error: 0.6196 - val_loss: 1.6893 - val_mean_absolute_error: 0.6972 Epoch 00009: val_loss improved from 1.69016 to 1.68933, saving model to checkpoint_Fare.hdf5 Epoch 10/300 938/938 [==============================] - 0s 75us/step - loss: 1.1181 - mean_absolute_error: 0.6091 - val_loss: 1.6891 - val_mean_absolute_error: 0.6967 Epoch 00010: val_loss improved from 1.68933 to 1.68909, saving model to checkpoint_Fare.hdf5 Epoch 11/300 938/938 [==============================] - 0s 70us/step - loss: 1.1126 - mean_absolute_error: 0.6037 - val_loss: 1.6891 - val_mean_absolute_error: 0.6967 Epoch 00011: val_loss did not improve from 1.68909 Epoch 12/300 938/938 [==============================] - 0s 75us/step - loss: 1.1241 - mean_absolute_error: 0.6080 - val_loss: 1.6891 - val_mean_absolute_error: 0.6967 Epoch 00012: val_loss did not improve from 1.68909 Epoch 13/300 938/938 [==============================] - 0s 76us/step - loss: 1.1258 - mean_absolute_error: 0.6088 - val_loss: 1.6891 - val_mean_absolute_error: 0.6967 Epoch 00013: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026. Epoch 00013: val_loss did not improve from 1.68909 Epoch 14/300 938/938 [==============================] - 0s 65us/step - loss: 1.1162 - mean_absolute_error: 0.6043 - val_loss: 1.6891 - val_mean_absolute_error: 0.6967 Epoch 00014: val_loss did not improve from 1.68909 Epoch 15/300 938/938 [==============================] - 0s 74us/step - loss: 1.1207 - mean_absolute_error: 0.6061 - val_loss: 1.6891 - val_mean_absolute_error: 0.6967 Epoch 00015: val_loss did not improve from 1.68909 Epoch 16/300 938/938 [==============================] - 0s 76us/step - loss: 1.1153 - mean_absolute_error: 0.6049 - val_loss: 1.6891 - val_mean_absolute_error: 0.6967 Epoch 00016: ReduceLROnPlateau reducing learning rate to 4.0000001899898055e-05. Epoch 00016: val_loss did not improve from 1.68909 Epoch 17/300 938/938 [==============================] - 0s 81us/step - loss: 1.1196 - mean_absolute_error: 0.6068 - val_loss: 1.6891 - val_mean_absolute_error: 0.6967 Epoch 00017: val_loss did not improve from 1.68909 Epoch 18/300 938/938 [==============================] - 0s 88us/step - loss: 1.1197 - mean_absolute_error: 0.6043 - val_loss: 1.6891 - val_mean_absolute_error: 0.6967 Epoch 00018: val_loss did not improve from 1.68909 Epoch 19/300 938/938 [==============================] - 0s 84us/step - loss: 1.1266 - mean_absolute_error: 0.6088 - val_loss: 1.6891 - val_mean_absolute_error: 0.6967 Epoch 00019: ReduceLROnPlateau reducing learning rate to 8.000000525498762e-06. Epoch 00019: val_loss did not improve from 1.68909 Epoch 20/300 938/938 [==============================] - 0s 75us/step - loss: 1.1196 - mean_absolute_error: 0.6043 - val_loss: 1.6891 - val_mean_absolute_error: 0.6967 Epoch 00020: val_loss did not improve from 1.68909 Epoch 00020: early stopping
fill_data('Age') # id: 6,18,20,27,29,30
Train on 938 samples, validate on 105 samples Epoch 1/300 938/938 [==============================] - 0s 341us/step - loss: 7.6284 - mean_absolute_error: 2.3602 - val_loss: 4.4499 - val_mean_absolute_error: 1.8288 Epoch 00001: val_loss improved from inf to 4.44989, saving model to checkpoint_Age.hdf5 Epoch 2/300 938/938 [==============================] - 0s 82us/step - loss: 3.8868 - mean_absolute_error: 1.6588 - val_loss: 2.4728 - val_mean_absolute_error: 1.3419 Epoch 00002: val_loss improved from 4.44989 to 2.47278, saving model to checkpoint_Age.hdf5 Epoch 3/300 938/938 [==============================] - 0s 81us/step - loss: 2.2828 - mean_absolute_error: 1.2684 - val_loss: 1.5693 - val_mean_absolute_error: 1.0532 Epoch 00003: val_loss improved from 2.47278 to 1.56933, saving model to checkpoint_Age.hdf5 Epoch 4/300 938/938 [==============================] - 0s 77us/step - loss: 1.5540 - mean_absolute_error: 1.0170 - val_loss: 1.1551 - val_mean_absolute_error: 0.8729 Epoch 00004: val_loss improved from 1.56933 to 1.15514, saving model to checkpoint_Age.hdf5 Epoch 5/300 938/938 [==============================] - 0s 80us/step - loss: 1.1987 - mean_absolute_error: 0.8832 - val_loss: 1.0211 - val_mean_absolute_error: 0.8047 Epoch 00005: val_loss improved from 1.15514 to 1.02112, saving model to checkpoint_Age.hdf5 Epoch 6/300 938/938 [==============================] - 0s 75us/step - loss: 1.0803 - mean_absolute_error: 0.8229 - val_loss: 0.9843 - val_mean_absolute_error: 0.7832 Epoch 00006: val_loss improved from 1.02112 to 0.98428, saving model to checkpoint_Age.hdf5 Epoch 7/300 938/938 [==============================] - 0s 79us/step - loss: 1.0654 - mean_absolute_error: 0.8107 - val_loss: 0.9686 - val_mean_absolute_error: 0.7786 Epoch 00007: val_loss improved from 0.98428 to 0.96857, saving model to checkpoint_Age.hdf5 Epoch 8/300 938/938 [==============================] - 0s 82us/step - loss: 1.0416 - mean_absolute_error: 0.8012 - val_loss: 0.9594 - val_mean_absolute_error: 0.7756 Epoch 00008: val_loss improved from 0.96857 to 0.95938, saving model to checkpoint_Age.hdf5 Epoch 9/300 938/938 [==============================] - 0s 74us/step - loss: 1.0249 - mean_absolute_error: 0.7952 - val_loss: 0.9534 - val_mean_absolute_error: 0.7736 Epoch 00009: val_loss improved from 0.95938 to 0.95341, saving model to checkpoint_Age.hdf5 Epoch 10/300 938/938 [==============================] - 0s 76us/step - loss: 1.0173 - mean_absolute_error: 0.7899 - val_loss: 0.9507 - val_mean_absolute_error: 0.7725 Epoch 00010: val_loss improved from 0.95341 to 0.95071, saving model to checkpoint_Age.hdf5 Epoch 11/300 938/938 [==============================] - 0s 80us/step - loss: 1.0108 - mean_absolute_error: 0.7877 - val_loss: 0.9507 - val_mean_absolute_error: 0.7725 Epoch 00011: val_loss did not improve from 0.95071 Epoch 12/300 938/938 [==============================] - 0s 80us/step - loss: 1.0022 - mean_absolute_error: 0.7844 - val_loss: 0.9507 - val_mean_absolute_error: 0.7725 Epoch 00012: val_loss did not improve from 0.95071 Epoch 13/300 938/938 [==============================] - 0s 75us/step - loss: 1.0092 - mean_absolute_error: 0.7845 - val_loss: 0.9507 - val_mean_absolute_error: 0.7725 Epoch 00013: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026. Epoch 00013: val_loss did not improve from 0.95071 Epoch 14/300 938/938 [==============================] - 0s 73us/step - loss: 0.9999 - mean_absolute_error: 0.7827 - val_loss: 0.9507 - val_mean_absolute_error: 0.7725 Epoch 00014: val_loss did not improve from 0.95071 Epoch 15/300 938/938 [==============================] - 0s 68us/step - loss: 1.0042 - mean_absolute_error: 0.7825 - val_loss: 0.9507 - val_mean_absolute_error: 0.7725 Epoch 00015: val_loss did not improve from 0.95071 Epoch 16/300 938/938 [==============================] - 0s 70us/step - loss: 1.0074 - mean_absolute_error: 0.7871 - val_loss: 0.9507 - val_mean_absolute_error: 0.7725 Epoch 00016: ReduceLROnPlateau reducing learning rate to 4.0000001899898055e-05. Epoch 00016: val_loss did not improve from 0.95071 Epoch 17/300 938/938 [==============================] - 0s 76us/step - loss: 1.0043 - mean_absolute_error: 0.7832 - val_loss: 0.9507 - val_mean_absolute_error: 0.7725 Epoch 00017: val_loss did not improve from 0.95071 Epoch 18/300 938/938 [==============================] - 0s 73us/step - loss: 1.0017 - mean_absolute_error: 0.7830 - val_loss: 0.9507 - val_mean_absolute_error: 0.7725 Epoch 00018: val_loss did not improve from 0.95071 Epoch 19/300 938/938 [==============================] - 0s 72us/step - loss: 1.0063 - mean_absolute_error: 0.7838 - val_loss: 0.9507 - val_mean_absolute_error: 0.7725 Epoch 00019: ReduceLROnPlateau reducing learning rate to 8.000000525498762e-06. Epoch 00019: val_loss did not improve from 0.95071 Epoch 20/300 938/938 [==============================] - 0s 74us/step - loss: 1.0131 - mean_absolute_error: 0.7872 - val_loss: 0.9507 - val_mean_absolute_error: 0.7725 Epoch 00020: val_loss did not improve from 0.95071 Epoch 00020: early stopping
#df0 = df0.drop(['Title'], axis=1)
train0 = df0[0:891].copy()
test0 = df0[891:].copy()
train0.head()
Pclass | Sex | Age | SibSp | Parch | Fare | Cabin | Embarked | Title | |
---|---|---|---|---|---|---|---|---|---|
PassengerId | |||||||||
1 | 3 | 0 | -0.546789 | 0.481104 | -0.444829 | -0.503210 | 0 | 0.0 | -0.541826 |
2 | 1 | 1 | 0.563282 | 0.481104 | -0.444829 | 0.733941 | 1 | 1.0 | 0.053183 |
3 | 3 | 1 | -0.269271 | -0.478904 | -0.444829 | -0.490169 | 0 | 0.0 | 0.648192 |
4 | 1 | 1 | 0.355144 | 0.481104 | -0.444829 | 0.382632 | 1 | 0.0 | 0.053183 |
5 | 3 | 0 | 0.355144 | -0.478904 | -0.444829 | -0.487754 | 0 | 0.0 | -0.541826 |
df0_cols = len(df0.columns)
model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(df0_cols,), kernel_initializer=initializer))
model.add(Dropout(0.5, seed=random_n))
model.add(Dense(2, activation='softmax', kernel_initializer=initializer))
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['acc'])
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.000001,verbose=1)
checkpointer = ModelCheckpoint(filepath='checkpoint_final.hdf5', verbose=1, save_best_only=True)
early_stopping = EarlyStopping(patience=10, verbose=1)
epochs = 300
hist = model.fit(train0, train.Survived,
epochs=epochs,
batch_size=5,
verbose=1,
validation_split=0.2,
callbacks=[reduce_lr, early_stopping, checkpointer])
model.load_weights('checkpoint_final.hdf5')
pred = model.predict(test0)
Train on 712 samples, validate on 179 samples Epoch 1/300 712/712 [==============================] - 1s 915us/step - loss: 0.6920 - acc: 0.6236 - val_loss: 0.4706 - val_acc: 0.7989 Epoch 00001: val_loss improved from inf to 0.47056, saving model to checkpoint_final.hdf5 Epoch 2/300 712/712 [==============================] - 0s 466us/step - loss: 0.5920 - acc: 0.7233 - val_loss: 0.4519 - val_acc: 0.8045 Epoch 00002: val_loss improved from 0.47056 to 0.45195, saving model to checkpoint_final.hdf5 Epoch 3/300 712/712 [==============================] - 0s 579us/step - loss: 0.5272 - acc: 0.7598 - val_loss: 0.4405 - val_acc: 0.7933 Epoch 00003: val_loss improved from 0.45195 to 0.44048, saving model to checkpoint_final.hdf5 Epoch 4/300 712/712 [==============================] - 0s 495us/step - loss: 0.5220 - acc: 0.7612 - val_loss: 0.4308 - val_acc: 0.7877 Epoch 00004: val_loss improved from 0.44048 to 0.43079, saving model to checkpoint_final.hdf5 Epoch 5/300 712/712 [==============================] - 0s 446us/step - loss: 0.5136 - acc: 0.7640 - val_loss: 0.4141 - val_acc: 0.8101 Epoch 00005: val_loss improved from 0.43079 to 0.41408, saving model to checkpoint_final.hdf5 Epoch 6/300 712/712 [==============================] - 0s 564us/step - loss: 0.5008 - acc: 0.7921 - val_loss: 0.4178 - val_acc: 0.8156 Epoch 00006: val_loss did not improve from 0.41408 Epoch 7/300 712/712 [==============================] - 0s 457us/step - loss: 0.4654 - acc: 0.7935 - val_loss: 0.4178 - val_acc: 0.8045 Epoch 00007: val_loss did not improve from 0.41408 Epoch 8/300 712/712 [==============================] - 0s 456us/step - loss: 0.4761 - acc: 0.7907 - val_loss: 0.4091 - val_acc: 0.8156 Epoch 00008: val_loss improved from 0.41408 to 0.40911, saving model to checkpoint_final.hdf5 Epoch 9/300 712/712 [==============================] - 0s 510us/step - loss: 0.4743 - acc: 0.8048 - val_loss: 0.4105 - val_acc: 0.8101 Epoch 00009: val_loss did not improve from 0.40911 Epoch 10/300 712/712 [==============================] - 0s 450us/step - loss: 0.4678 - acc: 0.7949 - val_loss: 0.3936 - val_acc: 0.8101 Epoch 00010: val_loss improved from 0.40911 to 0.39362, saving model to checkpoint_final.hdf5 Epoch 11/300 712/712 [==============================] - 0s 470us/step - loss: 0.4531 - acc: 0.8132 - val_loss: 0.4029 - val_acc: 0.8212 Epoch 00011: val_loss did not improve from 0.39362 Epoch 12/300 712/712 [==============================] - 0s 483us/step - loss: 0.4576 - acc: 0.8076 - val_loss: 0.3877 - val_acc: 0.8156 Epoch 00012: val_loss improved from 0.39362 to 0.38769, saving model to checkpoint_final.hdf5 Epoch 13/300 712/712 [==============================] - 0s 468us/step - loss: 0.4707 - acc: 0.8034 - val_loss: 0.3819 - val_acc: 0.8212 Epoch 00013: val_loss improved from 0.38769 to 0.38185, saving model to checkpoint_final.hdf5 Epoch 14/300 712/712 [==============================] - 0s 464us/step - loss: 0.4626 - acc: 0.8034 - val_loss: 0.3824 - val_acc: 0.8212 Epoch 00014: val_loss did not improve from 0.38185 Epoch 15/300 712/712 [==============================] - 0s 465us/step - loss: 0.4725 - acc: 0.7935 - val_loss: 0.3826 - val_acc: 0.8212 Epoch 00015: val_loss did not improve from 0.38185 Epoch 16/300 712/712 [==============================] - 0s 458us/step - loss: 0.4554 - acc: 0.8104 - val_loss: 0.3769 - val_acc: 0.8268 Epoch 00016: val_loss improved from 0.38185 to 0.37686, saving model to checkpoint_final.hdf5 Epoch 17/300 712/712 [==============================] - 0s 461us/step - loss: 0.4565 - acc: 0.8104 - val_loss: 0.3707 - val_acc: 0.8380 Epoch 00017: val_loss improved from 0.37686 to 0.37075, saving model to checkpoint_final.hdf5 Epoch 18/300 712/712 [==============================] - 0s 462us/step - loss: 0.4378 - acc: 0.8132 - val_loss: 0.3780 - val_acc: 0.8324 Epoch 00018: val_loss did not improve from 0.37075 Epoch 19/300 712/712 [==============================] - 0s 457us/step - loss: 0.4452 - acc: 0.8076 - val_loss: 0.3650 - val_acc: 0.8380 Epoch 00019: val_loss improved from 0.37075 to 0.36499, saving model to checkpoint_final.hdf5 Epoch 20/300 712/712 [==============================] - 0s 462us/step - loss: 0.4525 - acc: 0.8076 - val_loss: 0.3754 - val_acc: 0.8436 Epoch 00020: val_loss did not improve from 0.36499 Epoch 21/300 712/712 [==============================] - 0s 461us/step - loss: 0.4346 - acc: 0.8174 - val_loss: 0.3658 - val_acc: 0.8380 Epoch 00021: val_loss did not improve from 0.36499 Epoch 22/300 712/712 [==============================] - 0s 456us/step - loss: 0.4429 - acc: 0.7963 - val_loss: 0.3620 - val_acc: 0.8324 Epoch 00022: val_loss improved from 0.36499 to 0.36197, saving model to checkpoint_final.hdf5 Epoch 23/300 712/712 [==============================] - 0s 451us/step - loss: 0.4420 - acc: 0.8048 - val_loss: 0.3654 - val_acc: 0.8380 Epoch 00023: val_loss did not improve from 0.36197 Epoch 24/300 712/712 [==============================] - 0s 459us/step - loss: 0.4237 - acc: 0.8188 - val_loss: 0.3661 - val_acc: 0.8324 Epoch 00024: val_loss did not improve from 0.36197 Epoch 25/300 712/712 [==============================] - 0s 461us/step - loss: 0.4378 - acc: 0.8174 - val_loss: 0.3621 - val_acc: 0.8380 Epoch 00025: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026. Epoch 00025: val_loss did not improve from 0.36197 Epoch 26/300 712/712 [==============================] - 0s 447us/step - loss: 0.4346 - acc: 0.8132 - val_loss: 0.3625 - val_acc: 0.8380 Epoch 00026: val_loss did not improve from 0.36197 Epoch 27/300 712/712 [==============================] - 0s 474us/step - loss: 0.4328 - acc: 0.8188 - val_loss: 0.3624 - val_acc: 0.8380 Epoch 00027: val_loss did not improve from 0.36197 Epoch 28/300 712/712 [==============================] - 0s 460us/step - loss: 0.4285 - acc: 0.8174 - val_loss: 0.3616 - val_acc: 0.8380 Epoch 00028: val_loss improved from 0.36197 to 0.36160, saving model to checkpoint_final.hdf5 Epoch 29/300 712/712 [==============================] - 0s 462us/step - loss: 0.4219 - acc: 0.8202 - val_loss: 0.3602 - val_acc: 0.8380 Epoch 00029: val_loss improved from 0.36160 to 0.36017, saving model to checkpoint_final.hdf5 Epoch 30/300 712/712 [==============================] - 0s 462us/step - loss: 0.4244 - acc: 0.8188 - val_loss: 0.3642 - val_acc: 0.8324 Epoch 00030: val_loss did not improve from 0.36017 Epoch 31/300 712/712 [==============================] - 0s 459us/step - loss: 0.4344 - acc: 0.8034 - val_loss: 0.3621 - val_acc: 0.8380 Epoch 00031: val_loss did not improve from 0.36017 Epoch 32/300 712/712 [==============================] - 0s 469us/step - loss: 0.4240 - acc: 0.8174 - val_loss: 0.3607 - val_acc: 0.8380 Epoch 00032: ReduceLROnPlateau reducing learning rate to 4.0000001899898055e-05. Epoch 00032: val_loss did not improve from 0.36017 Epoch 33/300 712/712 [==============================] - 0s 459us/step - loss: 0.4373 - acc: 0.8104 - val_loss: 0.3609 - val_acc: 0.8380 Epoch 00033: val_loss did not improve from 0.36017 Epoch 34/300 712/712 [==============================] - 0s 485us/step - loss: 0.4190 - acc: 0.8188 - val_loss: 0.3609 - val_acc: 0.8380 Epoch 00034: val_loss did not improve from 0.36017 Epoch 35/300 712/712 [==============================] - 0s 456us/step - loss: 0.4233 - acc: 0.8230 - val_loss: 0.3610 - val_acc: 0.8380 Epoch 00035: ReduceLROnPlateau reducing learning rate to 8.000000525498762e-06. Epoch 00035: val_loss did not improve from 0.36017 Epoch 36/300 712/712 [==============================] - 0s 462us/step - loss: 0.4290 - acc: 0.8244 - val_loss: 0.3610 - val_acc: 0.8380 Epoch 00036: val_loss did not improve from 0.36017 Epoch 37/300 712/712 [==============================] - 0s 465us/step - loss: 0.4351 - acc: 0.8146 - val_loss: 0.3610 - val_acc: 0.8380 Epoch 00037: val_loss did not improve from 0.36017 Epoch 38/300 712/712 [==============================] - 0s 472us/step - loss: 0.4340 - acc: 0.8174 - val_loss: 0.3611 - val_acc: 0.8380 Epoch 00038: ReduceLROnPlateau reducing learning rate to 1.6000001778593287e-06. Epoch 00038: val_loss did not improve from 0.36017 Epoch 39/300 712/712 [==============================] - 0s 448us/step - loss: 0.4410 - acc: 0.8076 - val_loss: 0.3611 - val_acc: 0.8380 Epoch 00039: val_loss did not improve from 0.36017 Epoch 00039: early stopping
# print(model.metrics_names)
plt.plot(hist.history['acc'], 'b-', label='acc' )
plt.plot(hist.history['loss'], 'r-', label='loss' )
plt.xlabel('epochs')
plt.legend()
plt.show()
result = pred.argmax(axis=1)
# compare to the previous result
prev = pd.read_csv('submission.csv', index_col=0)
print('Diff: ', np.sum(prev.Survived.values != result))
print('Survived: ', result.sum())
# submission data to csv file
submission = pd.DataFrame({'PassengerId': test.index, 'Survived': result})
submission.to_csv('submission.csv', index=False)
Diff: 0 Survived: 160
result
array([0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0])