import pandas as pd
from sklearn.preprocessing import MinMaxScaler
training_data_df = pd.read_csv("Exercise Files/03/sales_data_training.csv")
test_data_df = pd.read_csv("Exercise Files/03/sales_data_test.csv")
scaler = MinMaxScaler(feature_range=(0,1))
scaled_training = scaler.fit_transform(training_data_df)
scaled_testing = scaler.transform(test_data_df)
# Print out the adjustment that the scaler applied to the total_earnings column of data
print("Note: total_earnings values were scaled by multiplying by {:.10f} and adding {:.6f}".format(scaler.scale_[8], scaler.min_[8]))
Note: total_earnings values were scaled by multiplying by 0.0000036968 and adding -0.115913
# Create new pandas DataFrame objects from the scaled data
scaled_training_df = pd.DataFrame(scaled_training, columns=training_data_df.columns.values)
scaled_testing_df = pd.DataFrame(scaled_testing, columns=test_data_df.columns.values)
# Save scaled data dataframes to new CSV files
# scaled_training_df.to_csv("03/sales_data_training_scaled.csv", index=False)
# scaled_testing_df.to_csv("03/sales_data_test_scaled.csv", index=False)
from keras import backend as K
print(K.backend())
C:\Users\rstancut\AppData\Local\Continuum\anaconda2\envs\py35_b\lib\site-packages\h5py\__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`. from ._conv import register_converters as _register_converters Using TensorFlow backend.
tensorflow
from keras.models import Sequential
from keras.layers import *
# training_data_df = pd.read_csv("sales_data_training_scaled.csv")
training_data_df = scaled_training_df.copy()
X = training_data_df.drop('total_earnings', axis=1).values
Y = training_data_df[['total_earnings']].values
# Define the model
model = Sequential()
model.add(Dense(50, input_dim=9, activation='relu'))
model.add(Dense(100, activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(1, activation='linear'))
model.compile(loss="mean_squared_error", optimizer="adam")
model.fit(
X,
Y,
epochs=50,
shuffle=True,
verbose=2
)
Epoch 1/50 - 0s - loss: 0.0157 Epoch 2/50 - 0s - loss: 0.0011 Epoch 3/50 - 0s - loss: 4.9348e-04 Epoch 4/50 - 0s - loss: 3.2267e-04 Epoch 5/50 - 0s - loss: 2.2482e-04 Epoch 6/50 - 0s - loss: 1.5807e-04 Epoch 7/50 - 0s - loss: 1.4252e-04 Epoch 8/50 - 0s - loss: 1.0628e-04 Epoch 9/50 - 0s - loss: 1.0160e-04 Epoch 10/50 - 0s - loss: 7.4911e-05 Epoch 11/50 - 0s - loss: 6.9303e-05 Epoch 12/50 - 0s - loss: 5.8302e-05 Epoch 13/50 - 0s - loss: 5.3947e-05 Epoch 14/50 - 0s - loss: 4.8728e-05 Epoch 15/50 - 0s - loss: 5.8212e-05 Epoch 16/50 - 0s - loss: 6.9634e-05 Epoch 17/50 - 0s - loss: 4.0381e-05 Epoch 18/50 - 0s - loss: 3.4131e-05 Epoch 19/50 - 0s - loss: 3.7760e-05 Epoch 20/50 - 0s - loss: 5.5089e-05 Epoch 21/50 - 0s - loss: 3.1485e-05 Epoch 22/50 - 0s - loss: 2.9684e-05 Epoch 23/50 - 0s - loss: 2.8411e-05 Epoch 24/50 - 0s - loss: 3.4527e-05 Epoch 25/50 - 0s - loss: 2.6026e-05 Epoch 26/50 - 0s - loss: 2.3656e-05 Epoch 27/50 - 0s - loss: 2.1731e-05 Epoch 28/50 - 0s - loss: 2.5504e-05 Epoch 29/50 - 0s - loss: 2.4802e-05 Epoch 30/50 - 0s - loss: 2.1677e-05 Epoch 31/50 - 0s - loss: 3.3145e-05 Epoch 32/50 - 0s - loss: 2.1167e-05 Epoch 33/50 - 0s - loss: 2.1312e-05 Epoch 34/50 - 0s - loss: 2.1827e-05 Epoch 35/50 - 0s - loss: 2.5391e-05 Epoch 36/50 - 0s - loss: 2.7949e-05 Epoch 37/50 - 0s - loss: 2.2627e-05 Epoch 38/50 - 0s - loss: 2.0507e-05 Epoch 39/50 - 0s - loss: 1.8280e-05 Epoch 40/50 - 0s - loss: 2.1268e-05 Epoch 41/50 - 0s - loss: 3.3388e-05 Epoch 42/50 - 0s - loss: 2.9092e-05 Epoch 43/50 - 0s - loss: 2.6891e-05 Epoch 44/50 - 0s - loss: 2.7179e-05 Epoch 45/50 - 0s - loss: 2.2383e-05 Epoch 46/50 - 0s - loss: 3.5679e-05 Epoch 47/50 - 0s - loss: 3.4162e-05 Epoch 48/50 - 0s - loss: 3.5844e-05 Epoch 49/50 - 0s - loss: 3.7718e-05 Epoch 50/50 - 0s - loss: 4.0113e-05
<keras.callbacks.History at 0x2b7d74efc88>
test_data_df = scaled_testing_df.copy()
X_test = test_data_df.drop('total_earnings', axis=1).values
Y_test = test_data_df[['total_earnings']].values
test_error_rate = model.evaluate(X_test, Y_test, verbose=0)
print("The mean squared error (MSE) for the test data set is: {}".format(test_error_rate))
The mean squared error (MSE) for the test data set is: 0.00010563843767158687
X = pd.read_csv("04/proposed_new_product.csv").values
prediction = model.predict(X)
# Grab just the first element of the first prediction (since that's the only have one)
prediction = prediction[0][0]
# Re-scale the data from the 0-to-1 range back to dollars
# These constants are from when the data was originally scaled down to the 0-to-1 range
prediction = prediction + 0.1159
prediction = prediction / 0.0000036968
print("Earnings Prediction for Proposed Product - ${}".format(prediction))
Earnings Prediction for Proposed Product - $264798.70252499875
model.save("trained_model.h5")
print("Model saved to disk")
Model saved to disk
from keras.models import load_model
model_disk = load_model("trained_model.h5")
X = pd.read_csv("Exercise Files/04/proposed_new_product.csv").values
prediction = model_disk.predict(X)
# Grab just the first element of the first prediction (since that's the only have one)
prediction = prediction[0][0]
# Re-scale the data from the 0-to-1 range back to dollars
# These constants are from when the data was originally scaled down to the 0-to-1 range
prediction = prediction + 0.1159
prediction = prediction / 0.0000036968
print("Earnings Prediction for Proposed Product - ${}".format(prediction))
Earnings Prediction for Proposed Product - $264798.70252499875