#!/usr/bin/env python # coding: utf-8 # *Python Machine Learning 3rd Edition* by [Sebastian Raschka](https://sebastianraschka.com) & [Vahid Mirjalili](http://vahidmirjalili.com), Packt Publishing Ltd. 2019 # # Code Repository: https://github.com/rasbt/python-machine-learning-book-3rd-edition # # Code License: [MIT License](https://github.com/rasbt/python-machine-learning-book-3rd-edition/blob/master/LICENSE.txt) # # Chapter 14: Going Deeper -- the Mechanics of TensorFlow (Part 2/3) # Note that the optional watermark extension is a small IPython notebook plugin that I developed to make the code reproducible. You can just skip the following line(s). # In[1]: get_ipython().run_line_magic('load_ext', 'watermark') get_ipython().run_line_magic('watermark', '-a "Sebastian Raschka & Vahid Mirjalili" -u -d -p numpy,scipy,matplotlib,tensorflow') # In[2]: import numpy as np import tensorflow as tf import pandas as pd from IPython.display import Image # ## TensorFlow Estimators # # ##### Steps for using pre-made estimators # # * **Step 1:** Define the input function for importing the data # * **Step 2:** Define the feature columns to bridge between the estimator and the data # * **Step 3:** Instantiate an estimator or convert a Keras model to an estimator # * **Step 4:** Use the estimator: train() evaluate() predict() # In[3]: tf.random.set_seed(1) np.random.seed(1) # ### Working with feature columns # # # * See definition: https://developers.google.com/machine-learning/glossary/#feature_columns # * Documentation: https://www.tensorflow.org/api_docs/python/tf/feature_column # In[4]: Image(filename='images/02.png', width=700) # In[5]: dataset_path = tf.keras.utils.get_file("auto-mpg.data", ("http://archive.ics.uci.edu/ml/machine-learning-databases" "/auto-mpg/auto-mpg.data")) column_names = ['MPG', 'Cylinders', 'Displacement', 'Horsepower', 'Weight', 'Acceleration', 'ModelYear', 'Origin'] df = pd.read_csv(dataset_path, names=column_names, na_values = "?", comment='\t', sep=" ", skipinitialspace=True) df.tail() # In[6]: print(df.isna().sum()) df = df.dropna() df = df.reset_index(drop=True) df.tail() # In[7]: import sklearn import sklearn.model_selection df_train, df_test = sklearn.model_selection.train_test_split(df, train_size=0.8) train_stats = df_train.describe().transpose() train_stats # In[8]: numeric_column_names = ['Cylinders', 'Displacement', 'Horsepower', 'Weight', 'Acceleration'] df_train_norm, df_test_norm = df_train.copy(), df_test.copy() for col_name in numeric_column_names: mean = train_stats.loc[col_name, 'mean'] std = train_stats.loc[col_name, 'std'] df_train_norm.loc[:, col_name] = (df_train_norm.loc[:, col_name] - mean)/std df_test_norm.loc[:, col_name] = (df_test_norm.loc[:, col_name] - mean)/std df_train_norm.tail() # #### Numeric Columns # In[9]: numeric_features = [] for col_name in numeric_column_names: numeric_features.append(tf.feature_column.numeric_column(key=col_name)) numeric_features # In[10]: feature_year = tf.feature_column.numeric_column(key="ModelYear") bucketized_features = [] bucketized_features.append(tf.feature_column.bucketized_column( source_column=feature_year, boundaries=[73, 76, 79])) print(bucketized_features) # In[11]: feature_origin = tf.feature_column.categorical_column_with_vocabulary_list( key='Origin', vocabulary_list=[1, 2, 3]) categorical_indicator_features = [] categorical_indicator_features.append(tf.feature_column.indicator_column(feature_origin)) print(categorical_indicator_features) # ### Machine learning with pre-made Estimators # In[12]: def train_input_fn(df_train, batch_size=8): df = df_train.copy() train_x, train_y = df, df.pop('MPG') dataset = tf.data.Dataset.from_tensor_slices((dict(train_x), train_y)) # shuffle, repeat, and batch the examples return dataset.shuffle(1000).repeat().batch(batch_size) ## inspection ds = train_input_fn(df_train_norm) batch = next(iter(ds)) print('Keys:', batch[0].keys()) print('Batch Model Years:', batch[0]['ModelYear']) # In[13]: all_feature_columns = (numeric_features + bucketized_features + categorical_indicator_features) print(all_feature_columns) # In[14]: regressor = tf.estimator.DNNRegressor( feature_columns=all_feature_columns, hidden_units=[32, 10], model_dir='models/autompg-dnnregressor/') # In[15]: EPOCHS = 1000 BATCH_SIZE = 8 total_steps = EPOCHS * int(np.ceil(len(df_train) / BATCH_SIZE)) print('Training Steps:', total_steps) regressor.train( input_fn=lambda:train_input_fn(df_train_norm, batch_size=BATCH_SIZE), steps=total_steps) # In[16]: reloaded_regressor = tf.estimator.DNNRegressor( feature_columns=all_feature_columns, hidden_units=[32, 10], warm_start_from='models/autompg-dnnregressor/', model_dir='models/autompg-dnnregressor/') # In[17]: def eval_input_fn(df_test, batch_size=8): df = df_test.copy() test_x, test_y = df, df.pop('MPG') dataset = tf.data.Dataset.from_tensor_slices((dict(test_x), test_y)) return dataset.batch(batch_size) eval_results = reloaded_regressor.evaluate( input_fn=lambda:eval_input_fn(df_test_norm, batch_size=8)) for key in eval_results: print('{:15s} {}'.format(key, eval_results[key])) print('Average-Loss {:.4f}'.format(eval_results['average_loss'])) # In[18]: pred_res = regressor.predict(input_fn=lambda: eval_input_fn(df_test_norm, batch_size=8)) print(next(iter(pred_res))) # #### Boosted Tree Regressor # In[19]: boosted_tree = tf.estimator.BoostedTreesRegressor( feature_columns=all_feature_columns, n_batches_per_layer=20, n_trees=200) boosted_tree.train( input_fn=lambda:train_input_fn(df_train_norm, batch_size=BATCH_SIZE)) eval_results = boosted_tree.evaluate( input_fn=lambda:eval_input_fn(df_test_norm, batch_size=8)) print(eval_results) print('Average-Loss {:.4f}'.format(eval_results['average_loss'])) # --- # # Readers may ignore the next cell. # In[20]: get_ipython().system(' python ../.convert_notebook_to_script.py --input ch14_part2.ipynb --output ch14_part2.py')