Building Deep Learning Applications with Keras 2.0

source: Lynda.com

preprocess data

In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
In [2]:
training_data_df = pd.read_csv("Exercise Files/03/sales_data_training.csv")

test_data_df = pd.read_csv("Exercise Files/03/sales_data_test.csv")
In [3]:
scaler = MinMaxScaler(feature_range=(0,1))
In [4]:
scaled_training = scaler.fit_transform(training_data_df)
scaled_testing = scaler.transform(test_data_df)
In [5]:
# Print out the adjustment that the scaler applied to the total_earnings column of data
print("Note: total_earnings values were scaled by multiplying by {:.10f} and adding {:.6f}".format(scaler.scale_[8], scaler.min_[8]))
Note: total_earnings values were scaled by multiplying by 0.0000036968 and adding -0.115913
In [6]:
# Create new pandas DataFrame objects from the scaled data
scaled_training_df = pd.DataFrame(scaled_training, columns=training_data_df.columns.values)
scaled_testing_df = pd.DataFrame(scaled_testing, columns=test_data_df.columns.values)

# Save scaled data dataframes to new CSV files
# scaled_training_df.to_csv("03/sales_data_training_scaled.csv", index=False)
# scaled_testing_df.to_csv("03/sales_data_test_scaled.csv", index=False)

create model

In [7]:
from keras import backend as K
print(K.backend())
C:\Users\rstancut\AppData\Local\Continuum\anaconda2\envs\py35_b\lib\site-packages\h5py\__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
tensorflow
In [8]:
from keras.models import Sequential
from keras.layers import *
In [9]:
# training_data_df = pd.read_csv("sales_data_training_scaled.csv")
training_data_df = scaled_training_df.copy()

X = training_data_df.drop('total_earnings', axis=1).values
Y = training_data_df[['total_earnings']].values

# Define the model
model = Sequential()
model.add(Dense(50, input_dim=9, activation='relu'))
model.add(Dense(100, activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(1, activation='linear'))
model.compile(loss="mean_squared_error", optimizer="adam")

train model

In [10]:
model.fit(
    X,
    Y,
    epochs=50,
    shuffle=True,
    verbose=2
)
Epoch 1/50
 - 0s - loss: 0.0157
Epoch 2/50
 - 0s - loss: 0.0011
Epoch 3/50
 - 0s - loss: 4.9348e-04
Epoch 4/50
 - 0s - loss: 3.2267e-04
Epoch 5/50
 - 0s - loss: 2.2482e-04
Epoch 6/50
 - 0s - loss: 1.5807e-04
Epoch 7/50
 - 0s - loss: 1.4252e-04
Epoch 8/50
 - 0s - loss: 1.0628e-04
Epoch 9/50
 - 0s - loss: 1.0160e-04
Epoch 10/50
 - 0s - loss: 7.4911e-05
Epoch 11/50
 - 0s - loss: 6.9303e-05
Epoch 12/50
 - 0s - loss: 5.8302e-05
Epoch 13/50
 - 0s - loss: 5.3947e-05
Epoch 14/50
 - 0s - loss: 4.8728e-05
Epoch 15/50
 - 0s - loss: 5.8212e-05
Epoch 16/50
 - 0s - loss: 6.9634e-05
Epoch 17/50
 - 0s - loss: 4.0381e-05
Epoch 18/50
 - 0s - loss: 3.4131e-05
Epoch 19/50
 - 0s - loss: 3.7760e-05
Epoch 20/50
 - 0s - loss: 5.5089e-05
Epoch 21/50
 - 0s - loss: 3.1485e-05
Epoch 22/50
 - 0s - loss: 2.9684e-05
Epoch 23/50
 - 0s - loss: 2.8411e-05
Epoch 24/50
 - 0s - loss: 3.4527e-05
Epoch 25/50
 - 0s - loss: 2.6026e-05
Epoch 26/50
 - 0s - loss: 2.3656e-05
Epoch 27/50
 - 0s - loss: 2.1731e-05
Epoch 28/50
 - 0s - loss: 2.5504e-05
Epoch 29/50
 - 0s - loss: 2.4802e-05
Epoch 30/50
 - 0s - loss: 2.1677e-05
Epoch 31/50
 - 0s - loss: 3.3145e-05
Epoch 32/50
 - 0s - loss: 2.1167e-05
Epoch 33/50
 - 0s - loss: 2.1312e-05
Epoch 34/50
 - 0s - loss: 2.1827e-05
Epoch 35/50
 - 0s - loss: 2.5391e-05
Epoch 36/50
 - 0s - loss: 2.7949e-05
Epoch 37/50
 - 0s - loss: 2.2627e-05
Epoch 38/50
 - 0s - loss: 2.0507e-05
Epoch 39/50
 - 0s - loss: 1.8280e-05
Epoch 40/50
 - 0s - loss: 2.1268e-05
Epoch 41/50
 - 0s - loss: 3.3388e-05
Epoch 42/50
 - 0s - loss: 2.9092e-05
Epoch 43/50
 - 0s - loss: 2.6891e-05
Epoch 44/50
 - 0s - loss: 2.7179e-05
Epoch 45/50
 - 0s - loss: 2.2383e-05
Epoch 46/50
 - 0s - loss: 3.5679e-05
Epoch 47/50
 - 0s - loss: 3.4162e-05
Epoch 48/50
 - 0s - loss: 3.5844e-05
Epoch 49/50
 - 0s - loss: 3.7718e-05
Epoch 50/50
 - 0s - loss: 4.0113e-05
Out[10]:
<keras.callbacks.History at 0x2b7d74efc88>
In [11]:
test_data_df = scaled_testing_df.copy()

X_test = test_data_df.drop('total_earnings', axis=1).values
Y_test = test_data_df[['total_earnings']].values
In [12]:
test_error_rate = model.evaluate(X_test, Y_test, verbose=0)
print("The mean squared error (MSE) for the test data set is: {}".format(test_error_rate))
The mean squared error (MSE) for the test data set is: 0.00010563843767158687

predict

In [13]:
X = pd.read_csv("04/proposed_new_product.csv").values
In [14]:
prediction = model.predict(X)
In [15]:
# Grab just the first element of the first prediction (since that's the only have one)
prediction = prediction[0][0]
In [16]:
# Re-scale the data from the 0-to-1 range back to dollars
# These constants are from when the data was originally scaled down to the 0-to-1 range
prediction = prediction + 0.1159
prediction = prediction / 0.0000036968

print("Earnings Prediction for Proposed Product - ${}".format(prediction))
Earnings Prediction for Proposed Product - $264798.70252499875

save model

In [17]:
model.save("trained_model.h5")
print("Model saved to disk")
Model saved to disk

load model

In [18]:
from keras.models import load_model
In [19]:
model_disk = load_model("trained_model.h5")
In [20]:
X = pd.read_csv("Exercise Files/04/proposed_new_product.csv").values
prediction = model_disk.predict(X)

# Grab just the first element of the first prediction (since that's the only have one)
prediction = prediction[0][0]

# Re-scale the data from the 0-to-1 range back to dollars
# These constants are from when the data was originally scaled down to the 0-to-1 range
prediction = prediction + 0.1159
prediction = prediction / 0.0000036968

print("Earnings Prediction for Proposed Product - ${}".format(prediction))
Earnings Prediction for Proposed Product - $264798.70252499875