from __future__ import absolute_import, division, print_function
import numpy as np
import tensorflow as tf
keras = tf.keras
print(tf.__version__)
print(tf.keras.__version__)
1.12.0 2.1.6-tf
https://www.tensorflow.org/guide/keras#build_a_simple_model
In Keras, you assemble layers to build models. A model is (usually) a graph of layers. The most common type of model is a stack of layers: the tf.keras.Sequential
model.
## To build a simple, fully-connected network (i.e. multi-layer perceptron)
# If you specify the input shape, the model gets built continuously, as you are adding layers.
# Note that when using this delayed-build pattern (no input shape specified),
# the model doesn't have any weights until the first call,
# to a training/evaluation method (since it isn't yet built)
model = keras.Sequential()
model.add(keras.layers.Dense(units = 64, activation = 'relu'))
model.add(keras.layers.Dense(units = 64, activation = 'relu'))
model.add(keras.layers.Dense(units = 10, activation = 'softmax'))
# Note that when using this delayed-build pattern (no input shape specified),
# the model doesn't have any weights until the first call,
# to a training/evaluation method (since it isn't yet built)
print(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES))
print(tf.get_default_graph().get_operations())
del model
[] []
There are many tf.keras.layers available with some common constructor parameters:
activation
: Set the activation function for the layer. This parameter is specified by the name of a built-in function or as a callable object. By default, no activation is applied.kernel_initializer
and bias_initializer
: The initialization schemes that create the layer's weights (kernel and bias). This parameter is a name or a callable object. This defaults to the "Glorot uniform"
initializer.kernel_regularizer
and bias_regularizer
: The regularization schemes that apply the layer's weights (kernel and bias), such as L1 or L2 regularization. By default, no regularization is applied.The following instantiates tf.keras.layers.Dense
layers using constructor arguments:
keras.backend.clear_session()
tf.reset_default_graph()
# Create a sigmoid layer:
keras.layers.Dense(64, activation='sigmoid')
# Or:
keras.layers.Dense(64, activation=tf.sigmoid)
# A linear layer with L1 regularization of factor 0.01 applied to the kernel matrix:
keras.layers.Dense(64, kernel_regularizer=tf.keras.regularizers.l1(0.01))
# A linear layer with L2 regularization of factor 0.01 applied to the bias vector:
keras.layers.Dense(64, bias_regularizer=tf.keras.regularizers.l2(0.01))
# A linear layer with a kernel initialized to a random orthogonal matrix:
keras.layers.Dense(64, kernel_initializer='orthogonal')
# A linear layer with a bias vector initialized to 2.0s:
keras.layers.Dense(64, bias_initializer=tf.keras.initializers.constant(2.0))
<tensorflow.python.keras.layers.core.Dense at 0x7ff1e70874a8>
print(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES))
print(tf.get_default_graph().get_operations())
[] []
https://www.tensorflow.org/guide/keras?hl=ko#train_and_evaluate
After the model is constructed, configure its learning process by calling the compile
method, tf.keras.Model.compile
or tf.keras.Sequential.compile
takes three important arguments.
optimizer
: This object specifies the training procedure. Pass it optimizer instances from the tf.train
module, such as tf.train.AdamOptimizer
, tf.train.RMSPropOptimizer
, or tf.train.GradientDescentOptimizer
.loss
: The function to minimize during optimization. Common choices include mean square error (mse
), categorical_crossentropy
, and binary_crossentropy
. Loss functions are specified by name or by passing a callable object from the tf.keras.losses
module.metrics
: Used to monitor training. These are string names or callables from the tf.keras.metrics
module.keras.backend.clear_session()
tf.reset_default_graph()
model = keras.Sequential()
model.add(keras.layers.Dense(units=64, activation='relu', input_shape = (32,)))
model.add(keras.layers.Dense(units=64, activation='relu'))
model.add(keras.layers.Dense(units=10, activation='softmax'))
print(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES))
print(tf.get_default_graph().get_operations())
[<tf.Variable 'dense/kernel:0' shape=(32, 64) dtype=float32>, <tf.Variable 'dense/bias:0' shape=(64,) dtype=float32>, <tf.Variable 'dense_1/kernel:0' shape=(64, 64) dtype=float32>, <tf.Variable 'dense_1/bias:0' shape=(64,) dtype=float32>, <tf.Variable 'dense_2/kernel:0' shape=(64, 10) dtype=float32>, <tf.Variable 'dense_2/bias:0' shape=(10,) dtype=float32>] [<tf.Operation 'dense_input' type=Placeholder>, <tf.Operation 'dense/kernel/Initializer/random_uniform/shape' type=Const>, <tf.Operation 'dense/kernel/Initializer/random_uniform/min' type=Const>, <tf.Operation 'dense/kernel/Initializer/random_uniform/max' type=Const>, <tf.Operation 'dense/kernel/Initializer/random_uniform/RandomUniform' type=RandomUniform>, <tf.Operation 'dense/kernel/Initializer/random_uniform/sub' type=Sub>, <tf.Operation 'dense/kernel/Initializer/random_uniform/mul' type=Mul>, <tf.Operation 'dense/kernel/Initializer/random_uniform' type=Add>, <tf.Operation 'dense/kernel' type=VarHandleOp>, <tf.Operation 'dense/kernel/IsInitialized/VarIsInitializedOp' type=VarIsInitializedOp>, <tf.Operation 'dense/kernel/Assign' type=AssignVariableOp>, <tf.Operation 'dense/kernel/Read/ReadVariableOp' type=ReadVariableOp>, <tf.Operation 'dense/bias/Initializer/zeros' type=Const>, <tf.Operation 'dense/bias' type=VarHandleOp>, <tf.Operation 'dense/bias/IsInitialized/VarIsInitializedOp' type=VarIsInitializedOp>, <tf.Operation 'dense/bias/Assign' type=AssignVariableOp>, <tf.Operation 'dense/bias/Read/ReadVariableOp' type=ReadVariableOp>, <tf.Operation 'dense/MatMul/ReadVariableOp' type=ReadVariableOp>, <tf.Operation 'dense/MatMul' type=MatMul>, <tf.Operation 'dense/BiasAdd/ReadVariableOp' type=ReadVariableOp>, <tf.Operation 'dense/BiasAdd' type=BiasAdd>, <tf.Operation 'dense/Relu' type=Relu>, <tf.Operation 'dense_1/kernel/Initializer/random_uniform/shape' type=Const>, <tf.Operation 'dense_1/kernel/Initializer/random_uniform/min' type=Const>, <tf.Operation 'dense_1/kernel/Initializer/random_uniform/max' type=Const>, <tf.Operation 'dense_1/kernel/Initializer/random_uniform/RandomUniform' type=RandomUniform>, <tf.Operation 'dense_1/kernel/Initializer/random_uniform/sub' type=Sub>, <tf.Operation 'dense_1/kernel/Initializer/random_uniform/mul' type=Mul>, <tf.Operation 'dense_1/kernel/Initializer/random_uniform' type=Add>, <tf.Operation 'dense_1/kernel' type=VarHandleOp>, <tf.Operation 'dense_1/kernel/IsInitialized/VarIsInitializedOp' type=VarIsInitializedOp>, <tf.Operation 'dense_1/kernel/Assign' type=AssignVariableOp>, <tf.Operation 'dense_1/kernel/Read/ReadVariableOp' type=ReadVariableOp>, <tf.Operation 'dense_1/bias/Initializer/zeros' type=Const>, <tf.Operation 'dense_1/bias' type=VarHandleOp>, <tf.Operation 'dense_1/bias/IsInitialized/VarIsInitializedOp' type=VarIsInitializedOp>, <tf.Operation 'dense_1/bias/Assign' type=AssignVariableOp>, <tf.Operation 'dense_1/bias/Read/ReadVariableOp' type=ReadVariableOp>, <tf.Operation 'dense_1/MatMul/ReadVariableOp' type=ReadVariableOp>, <tf.Operation 'dense_1/MatMul' type=MatMul>, <tf.Operation 'dense_1/BiasAdd/ReadVariableOp' type=ReadVariableOp>, <tf.Operation 'dense_1/BiasAdd' type=BiasAdd>, <tf.Operation 'dense_1/Relu' type=Relu>, <tf.Operation 'dense_2/kernel/Initializer/random_uniform/shape' type=Const>, <tf.Operation 'dense_2/kernel/Initializer/random_uniform/min' type=Const>, <tf.Operation 'dense_2/kernel/Initializer/random_uniform/max' type=Const>, <tf.Operation 'dense_2/kernel/Initializer/random_uniform/RandomUniform' type=RandomUniform>, <tf.Operation 'dense_2/kernel/Initializer/random_uniform/sub' type=Sub>, <tf.Operation 'dense_2/kernel/Initializer/random_uniform/mul' type=Mul>, <tf.Operation 'dense_2/kernel/Initializer/random_uniform' type=Add>, <tf.Operation 'dense_2/kernel' type=VarHandleOp>, <tf.Operation 'dense_2/kernel/IsInitialized/VarIsInitializedOp' type=VarIsInitializedOp>, <tf.Operation 'dense_2/kernel/Assign' type=AssignVariableOp>, <tf.Operation 'dense_2/kernel/Read/ReadVariableOp' type=ReadVariableOp>, <tf.Operation 'dense_2/bias/Initializer/zeros' type=Const>, <tf.Operation 'dense_2/bias' type=VarHandleOp>, <tf.Operation 'dense_2/bias/IsInitialized/VarIsInitializedOp' type=VarIsInitializedOp>, <tf.Operation 'dense_2/bias/Assign' type=AssignVariableOp>, <tf.Operation 'dense_2/bias/Read/ReadVariableOp' type=ReadVariableOp>, <tf.Operation 'dense_2/MatMul/ReadVariableOp' type=ReadVariableOp>, <tf.Operation 'dense_2/MatMul' type=MatMul>, <tf.Operation 'dense_2/BiasAdd/ReadVariableOp' type=ReadVariableOp>, <tf.Operation 'dense_2/BiasAdd' type=BiasAdd>, <tf.Operation 'dense_2/Softmax' type=Softmax>]
# Compile
model.compile(optimizer=tf.train.AdamOptimizer(0.001),
loss='categorical_crossentropy',
metrics=['accuracy'])
The following shows a few examples of configuring a model for training:
# Configure a model for mean-squared error regression.
model.compile(optimizer=tf.train.AdamOptimizer(0.01),
loss='mse', # mean squared error
metrics=['mae']) # mean absolute error
# Configure a model for categorical classification.
model.compile(optimizer=tf.train.RMSPropOptimizer(0.01),
loss=keras.losses.categorical_crossentropy,
metrics=[keras.metrics.categorical_accuracy])
del model
keras.backend.clear_session()
tf.reset_default_graph()
# Numpy dataset
tr_data = np.random.random((1000, 32)).astype(np.float32)
tr_label = np.random.randint(low=0, high=10, size = 1000).astype(np.int32)
val_data = np.random.random((100, 32)).astype(np.float32)
val_label = np.random.randint(low=0, high=10, size = 100).astype(np.int32)
tst_data = np.random.random((100, 32)).astype(np.float32)
tst_label = np.random.randint(low=0, high=10, size = 100).astype(np.int32)
print(tr_data.dtype, tr_label.dtype)
float32 int32
# Create a model
model = keras.Sequential()
model.add(keras.layers.Dense(units=64, activation='relu'))
model.add(keras.layers.Dense(units=64, activation='relu'))
model.add(keras.layers.Dense(units=10, activation='softmax'))
model.compile(optimizer=tf.train.GradientDescentOptimizer(.01),
loss=keras.losses.sparse_categorical_crossentropy,
metrics=['accuracy'])
model.fit(x=tr_data, y=tr_label, epochs=5, batch_size=32, validation_data=(val_data, val_label))
Train on 1000 samples, validate on 100 samples Epoch 1/5 1000/1000 [==============================] - 1s 692us/step - loss: 2.3259 - acc: 0.1010 - val_loss: 2.3399 - val_acc: 0.0800 Epoch 2/5 1000/1000 [==============================] - 0s 48us/step - loss: 2.3171 - acc: 0.1030 - val_loss: 2.3322 - val_acc: 0.0800 Epoch 3/5 1000/1000 [==============================] - 0s 40us/step - loss: 2.3123 - acc: 0.1080 - val_loss: 2.3265 - val_acc: 0.0700 Epoch 4/5 1000/1000 [==============================] - 0s 43us/step - loss: 2.3083 - acc: 0.1140 - val_loss: 2.3239 - val_acc: 0.0600 Epoch 5/5 1000/1000 [==============================] - 0s 46us/step - loss: 2.3055 - acc: 0.1150 - val_loss: 2.3202 - val_acc: 0.0600
<tensorflow.python.keras.callbacks.History at 0x7ff1e94e6128>
# Evaluate and predict
print(model.metrics_names)
print(model.evaluate(x=tst_data, y=tst_label))
print(model.predict(x=tst_data).shape)
del model
['loss', 'acc'] 100/100 [==============================] - 0s 34us/step [2.2931285667419434, 0.1] (100, 10)
Pass a tf.data.Dataset
instance to the fit
, evaluate
, predict
method.
When passing tf.data.Dataset
instance to model.fit
method which is instantiated by tf.keras.Sequential
, tf.keras.Model
,
subclassing tf.keras.Model
, passing metrics
argument to 'accuracy'
in model.compile
method provokes TypeError
keras.backend.clear_session() # very important!
tf.reset_default_graph()
print(tf.get_default_graph().get_operations())
print(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES))
[] []
# tf.data.Dataset instance
tr_data = np.random.random((1000, 32)).astype(np.float32)
tr_label = np.random.randint(low=0, high=10, size = 1000).astype(np.int32)
tr_dataset = tf.data.Dataset.from_tensor_slices((tr_data, tr_label))
tr_dataset = tr_dataset.batch(batch_size=32)
tr_dataset = tr_dataset.repeat()
val_data = np.random.random((100, 32)).astype(np.float32)
val_label = np.random.randint(low=0, high=10, size = 100).astype(np.int32)
val_dataset = tf.data.Dataset.from_tensor_slices((val_data, val_label))
val_dataset = val_dataset.batch(batch_size=100).repeat()
tst_data = np.random.random((100, 32)).astype(np.float32)
tst_label = np.random.randint(low=0, high=10, size = 100).astype(np.int32)
tst_dataset = tf.data.Dataset.from_tensor_slices((tst_data, tst_label))
tst_dataset = tst_dataset.batch(batch_size=100)
print(tr_dataset.output_types)
(tf.float32, tf.int32)
# Training
model = keras.Sequential()
model.add(keras.layers.Dense(units=64, activation='relu'))
model.add(keras.layers.Dense(units=64, activation='relu'))
model.add(keras.layers.Dense(units=10, activation='softmax'))
model.compile(optimizer=tf.train.GradientDescentOptimizer(.01),
loss=keras.losses.sparse_categorical_crossentropy)
model.fit(tr_dataset, epochs = 5, steps_per_epoch = 1000 // 32,
validation_data = val_dataset, validation_steps = 1)
Epoch 1/5 31/31 [==============================] - 0s 3ms/step - loss: 2.3375 - val_loss: 2.3473 Epoch 2/5 31/31 [==============================] - 0s 1ms/step - loss: 2.3215 - val_loss: 2.3414 Epoch 3/5 31/31 [==============================] - 0s 1ms/step - loss: 2.3140 - val_loss: 2.3372 Epoch 4/5 31/31 [==============================] - 0s 1ms/step - loss: 2.3098 - val_loss: 2.3354 Epoch 5/5 31/31 [==============================] - 0s 1ms/step - loss: 2.3070 - val_loss: 2.3337
<tensorflow.python.keras.callbacks.History at 0x7ff1e4109d30>
# Evaluate and predict
print(model.metrics_names)
print(model.evaluate(tst_dataset, steps = 1))
print(model.predict(tst_dataset, steps = 1).shape)
del model
['loss'] 1/1 [==============================] - 0s 653us/step 2.3561947345733643 (32, 10)
https://www.tensorflow.org/guide/keras?hl=ko#build_advanced_models
The tf.keras.Sequential
model is a simple stack of layers that cannot represent arbitrary models. Use the Keras functional API to build complex model topologies such as:
Building a model with the functional API works like this:
tf.keras.Model
instance.Sequential
model.The following example uses the functional API to build a simple, fully-connected network:
# Clear
keras.backend.clear_session()
tf.reset_default_graph()
data = np.random.random((1000, 32)).astype(np.float32)
label = np.random.randint(low=0, high=10, size = 1000).astype(np.int32)
dataset = tf.data.Dataset.from_tensor_slices((data, label))
dataset = dataset.batch(batch_size=32).repeat()
print(dataset.output_types)
inputs = tf.keras.Input(shape=(32,)) # Returns a placeholder tensor
print(inputs, type(inputs))
# A layer instance is callable on a tensor, and returns a tensor.
x = keras.layers.Dense(64, activation='relu')(inputs)
x = keras.layers.Dense(64, activation='relu')(x)
predictions = keras.layers.Dense(10, activation='softmax')(x)
# Instantiate the model given inputs and outputs
model = keras.Model(inputs = inputs, outputs = predictions)
print(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES))
print(tf.get_default_graph().get_operations())
# The compile step specifies the training configuration.
model.compile(optimizer=tf.train.RMSPropOptimizer(.001),
loss=keras.losses.sparse_categorical_crossentropy)
# Trains for 5 epochs
model.fit(dataset, epochs=5, steps_per_epoch = 1000//32)
del model
(tf.float32, tf.int32) Tensor("input_1:0", shape=(?, 32), dtype=float32) <class 'tensorflow.python.framework.ops.Tensor'> [<tf.Variable 'dense/kernel:0' shape=(32, 64) dtype=float32>, <tf.Variable 'dense/bias:0' shape=(64,) dtype=float32>, <tf.Variable 'dense_1/kernel:0' shape=(64, 64) dtype=float32>, <tf.Variable 'dense_1/bias:0' shape=(64,) dtype=float32>, <tf.Variable 'dense_2/kernel:0' shape=(64, 10) dtype=float32>, <tf.Variable 'dense_2/bias:0' shape=(10,) dtype=float32>] [<tf.Operation 'tensors/component_0' type=Const>, <tf.Operation 'tensors/component_1' type=Const>, <tf.Operation 'batch_size' type=Const>, <tf.Operation 'drop_remainder' type=Const>, <tf.Operation 'count' type=Const>, <tf.Operation 'input_1' type=Placeholder>, <tf.Operation 'dense/kernel/Initializer/random_uniform/shape' type=Const>, <tf.Operation 'dense/kernel/Initializer/random_uniform/min' type=Const>, <tf.Operation 'dense/kernel/Initializer/random_uniform/max' type=Const>, <tf.Operation 'dense/kernel/Initializer/random_uniform/RandomUniform' type=RandomUniform>, <tf.Operation 'dense/kernel/Initializer/random_uniform/sub' type=Sub>, <tf.Operation 'dense/kernel/Initializer/random_uniform/mul' type=Mul>, <tf.Operation 'dense/kernel/Initializer/random_uniform' type=Add>, <tf.Operation 'dense/kernel' type=VarHandleOp>, <tf.Operation 'dense/kernel/IsInitialized/VarIsInitializedOp' type=VarIsInitializedOp>, <tf.Operation 'dense/kernel/Assign' type=AssignVariableOp>, <tf.Operation 'dense/kernel/Read/ReadVariableOp' type=ReadVariableOp>, <tf.Operation 'dense/bias/Initializer/zeros' type=Const>, <tf.Operation 'dense/bias' type=VarHandleOp>, <tf.Operation 'dense/bias/IsInitialized/VarIsInitializedOp' type=VarIsInitializedOp>, <tf.Operation 'dense/bias/Assign' type=AssignVariableOp>, <tf.Operation 'dense/bias/Read/ReadVariableOp' type=ReadVariableOp>, <tf.Operation 'dense/MatMul/ReadVariableOp' type=ReadVariableOp>, <tf.Operation 'dense/MatMul' type=MatMul>, <tf.Operation 'dense/BiasAdd/ReadVariableOp' type=ReadVariableOp>, <tf.Operation 'dense/BiasAdd' type=BiasAdd>, <tf.Operation 'dense/Relu' type=Relu>, <tf.Operation 'dense_1/kernel/Initializer/random_uniform/shape' type=Const>, <tf.Operation 'dense_1/kernel/Initializer/random_uniform/min' type=Const>, <tf.Operation 'dense_1/kernel/Initializer/random_uniform/max' type=Const>, <tf.Operation 'dense_1/kernel/Initializer/random_uniform/RandomUniform' type=RandomUniform>, <tf.Operation 'dense_1/kernel/Initializer/random_uniform/sub' type=Sub>, <tf.Operation 'dense_1/kernel/Initializer/random_uniform/mul' type=Mul>, <tf.Operation 'dense_1/kernel/Initializer/random_uniform' type=Add>, <tf.Operation 'dense_1/kernel' type=VarHandleOp>, <tf.Operation 'dense_1/kernel/IsInitialized/VarIsInitializedOp' type=VarIsInitializedOp>, <tf.Operation 'dense_1/kernel/Assign' type=AssignVariableOp>, <tf.Operation 'dense_1/kernel/Read/ReadVariableOp' type=ReadVariableOp>, <tf.Operation 'dense_1/bias/Initializer/zeros' type=Const>, <tf.Operation 'dense_1/bias' type=VarHandleOp>, <tf.Operation 'dense_1/bias/IsInitialized/VarIsInitializedOp' type=VarIsInitializedOp>, <tf.Operation 'dense_1/bias/Assign' type=AssignVariableOp>, <tf.Operation 'dense_1/bias/Read/ReadVariableOp' type=ReadVariableOp>, <tf.Operation 'dense_1/MatMul/ReadVariableOp' type=ReadVariableOp>, <tf.Operation 'dense_1/MatMul' type=MatMul>, <tf.Operation 'dense_1/BiasAdd/ReadVariableOp' type=ReadVariableOp>, <tf.Operation 'dense_1/BiasAdd' type=BiasAdd>, <tf.Operation 'dense_1/Relu' type=Relu>, <tf.Operation 'dense_2/kernel/Initializer/random_uniform/shape' type=Const>, <tf.Operation 'dense_2/kernel/Initializer/random_uniform/min' type=Const>, <tf.Operation 'dense_2/kernel/Initializer/random_uniform/max' type=Const>, <tf.Operation 'dense_2/kernel/Initializer/random_uniform/RandomUniform' type=RandomUniform>, <tf.Operation 'dense_2/kernel/Initializer/random_uniform/sub' type=Sub>, <tf.Operation 'dense_2/kernel/Initializer/random_uniform/mul' type=Mul>, <tf.Operation 'dense_2/kernel/Initializer/random_uniform' type=Add>, <tf.Operation 'dense_2/kernel' type=VarHandleOp>, <tf.Operation 'dense_2/kernel/IsInitialized/VarIsInitializedOp' type=VarIsInitializedOp>, <tf.Operation 'dense_2/kernel/Assign' type=AssignVariableOp>, <tf.Operation 'dense_2/kernel/Read/ReadVariableOp' type=ReadVariableOp>, <tf.Operation 'dense_2/bias/Initializer/zeros' type=Const>, <tf.Operation 'dense_2/bias' type=VarHandleOp>, <tf.Operation 'dense_2/bias/IsInitialized/VarIsInitializedOp' type=VarIsInitializedOp>, <tf.Operation 'dense_2/bias/Assign' type=AssignVariableOp>, <tf.Operation 'dense_2/bias/Read/ReadVariableOp' type=ReadVariableOp>, <tf.Operation 'dense_2/MatMul/ReadVariableOp' type=ReadVariableOp>, <tf.Operation 'dense_2/MatMul' type=MatMul>, <tf.Operation 'dense_2/BiasAdd/ReadVariableOp' type=ReadVariableOp>, <tf.Operation 'dense_2/BiasAdd' type=BiasAdd>, <tf.Operation 'dense_2/Softmax' type=Softmax>] Epoch 1/5 31/31 [==============================] - 0s 5ms/step - loss: 2.3502 Epoch 2/5 31/31 [==============================] - 0s 2ms/step - loss: 2.3298 Epoch 3/5 31/31 [==============================] - 0s 1ms/step - loss: 2.3133 Epoch 4/5 31/31 [==============================] - 0s 1ms/step - loss: 2.3034 Epoch 5/5 31/31 [==============================] - 0s 1ms/step - loss: 2.2969
Build a fully-customizable model by subclassing tf.keras.Model
and defining your own forward pass. Create layers in the __init__
method and set them as attributes of the class instance. Define the forward pass in the call method.
Model subclassing is particularly useful when eager execution is enabled since the forward pass can be written imperatively.
# Clear
keras.backend.clear_session()
tf.reset_default_graph()
print(tf.get_default_graph().get_operations())
print(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES))
[] []
# Subclassing tf.keras.Model
class MLP(keras.Model):
def __init__(self, hidden_dim, num_classes):
super(MLP, self).__init__()
# Define your layers here.
self.hidden_layer = keras.layers.Dense(units = hidden_dim, activation='relu')
self.output_layer = keras.layers.Dense(units = num_classes, activation='softmax')
def call(self, inputs):
hidden = self.hidden_layer(inputs)
score = self.output_layer(hidden)
return score
# Instantiate the MLP class
mlp = MLP(hidden_dim=100, num_classes=10)
# The compile step specifies the training configuration.
mlp.compile(optimizer=tf.train.RMSPropOptimizer(.001),
loss=keras.losses.sparse_categorical_crossentropy)
# tf.data.Dataset instance
tr_data = np.random.random((1000, 32)).astype(np.float32)
tr_label = np.random.randint(low=0, high=10, size = 1000).astype(np.int32)
tr_dataset = tf.data.Dataset.from_tensor_slices((tr_data, tr_label))
tr_dataset = tr_dataset.batch(batch_size=32)
tr_dataset = tr_dataset.repeat()
val_data = np.random.random((100, 32)).astype(np.float32)
val_label = np.random.randint(low=0, high=10, size = 100).astype(np.int32)
val_dataset = tf.data.Dataset.from_tensor_slices((val_data, val_label))
val_dataset = val_dataset.batch(batch_size=100).repeat()
tst_data = np.random.random((100, 32)).astype(np.float32)
tst_label = np.random.randint(low=0, high=10, size = 100).astype(np.int32)
tst_dataset = tf.data.Dataset.from_tensor_slices((tst_data, tst_label))
tst_dataset = tst_dataset.batch(batch_size=100)
print(tr_dataset.output_types)
(tf.float32, tf.int32)
# Trains for 5 epochs
mlp.fit(tr_dataset, epochs=5, steps_per_epoch=1000//32, validation_data = val_dataset, validation_steps=1)
del mlp
Epoch 1/5 31/31 [==============================] - 0s 5ms/step - loss: 2.3889 - val_loss: 2.2315 Epoch 2/5 31/31 [==============================] - 0s 1ms/step - loss: 2.3525 - val_loss: 2.2768 Epoch 3/5 31/31 [==============================] - 0s 1ms/step - loss: 2.3126 - val_loss: 2.3388 Epoch 4/5 31/31 [==============================] - 0s 1ms/step - loss: 2.2994 - val_loss: 2.3531 Epoch 5/5 31/31 [==============================] - 0s 1ms/step - loss: 2.2890 - val_loss: 2.3579
Reading https://www.tensorflow.org/guide/keras?hl=ko#custom_layers
https://www.tensorflow.org/guide/keras?hl=ko#callbacks
A callback is an object passed to a model to customize and extend its behavior during training. You can write your own custom callback, or use the built-in tf.keras.callbacks
that include:
tf.keras.callbacks.ModelCheckpoint
: Save checkpoints of your model at regular intervals.tf.keras.callbacks.LearningRateScheduler
: Dynamically change the learning rate.tf.keras.callbacks.EarlyStopping
: Interrupt training when validation performance has stopped improving.tf.keras.callbacks.TensorBoard
: Monitor the model's behavior using TensorBoard.To use a tf.keras.callbacks.Callback
, pass it to the model's fit
method:
# Clear
keras.backend.clear_session()
tf.reset_default_graph()
print(tf.get_default_graph().get_operations())
print(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES))
[] []
# tf.data.Dataset instance
tr_data = np.random.random((1000, 32)).astype(np.float32)
tr_label = np.random.randint(low=0, high=10, size = 1000).astype(np.int32)
tr_dataset = tf.data.Dataset.from_tensor_slices((tr_data, tr_label))
tr_dataset = tr_dataset.batch(batch_size=32)
tr_dataset = tr_dataset.repeat()
val_data = np.random.random((100, 32)).astype(np.float32)
val_label = np.random.randint(low=0, high=10, size = 100).astype(np.int32)
val_dataset = tf.data.Dataset.from_tensor_slices((val_data, val_label))
val_dataset = val_dataset.batch(batch_size=100).repeat()
tst_data = np.random.random((100, 32)).astype(np.float32)
tst_label = np.random.randint(low=0, high=10, size = 100).astype(np.int32)
tst_dataset = tf.data.Dataset.from_tensor_slices((tst_data, tst_label))
tst_dataset = tst_dataset.batch(batch_size=100)
print(tr_dataset.output_types)
(tf.float32, tf.int32)
# Creating "callback" object
callbacks = [
# Interrupt training if `val_loss` stops improving for over 2 epochs
keras.callbacks.EarlyStopping(patience=2, monitor='val_loss'),
# Write TensorBoard logs to `./logs` directory
keras.callbacks.TensorBoard(log_dir='./logs')
]
# Training
model = keras.Sequential()
model.add(keras.layers.Dense(units=64, activation='relu'))
model.add(keras.layers.Dense(units=64, activation='relu'))
model.add(keras.layers.Dense(units=10, activation='softmax'))
model.compile(optimizer=tf.train.GradientDescentOptimizer(.01),
loss=keras.losses.sparse_categorical_crossentropy,
callbacks = callbacks)
model.fit(tr_dataset, epochs = 5, steps_per_epoch = 1000 // 32,
validation_data = val_dataset, validation_steps = 1)
del model
Epoch 1/5 31/31 [==============================] - 0s 3ms/step - loss: 2.3456 - val_loss: 2.3992 Epoch 2/5 31/31 [==============================] - 0s 1ms/step - loss: 2.3280 - val_loss: 2.3774 Epoch 3/5 31/31 [==============================] - 0s 1ms/step - loss: 2.3194 - val_loss: 2.3638 Epoch 4/5 31/31 [==============================] - 0s 1ms/step - loss: 2.3141 - val_loss: 2.3551 Epoch 5/5 31/31 [==============================] - 0s 1ms/step - loss: 2.3105 - val_loss: 2.3495
https://www.tensorflow.org/guide/keras?hl=ko#save_and_restore
Save and load the weights of a model using tf.keras.Model.save_weights
:
# Clear
keras.backend.clear_session()
tf.reset_default_graph()
print(tf.get_default_graph().get_operations())
print(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES))
[] []
# Subclassing tf.keras.Model
class MLP(keras.Model):
def __init__(self, hidden_dim, num_classes):
super(MLP, self).__init__()
# Define your layers here.
self.hidden_layer = keras.layers.Dense(units = hidden_dim, activation='relu')
self.output_layer = keras.layers.Dense(units = num_classes, activation='softmax')
def call(self, inputs):
hidden = self.hidden_layer(inputs)
score = self.output_layer(hidden)
return score
# Instantiate the MLP class
mlp = MLP(hidden_dim=100, num_classes=10)
# The compile step specifies the training configuration.
mlp.compile(optimizer=tf.train.GradientDescentOptimizer(.001),
loss=keras.losses.sparse_categorical_crossentropy)
# tf.data.Dataset instance
tr_data = np.random.random((1000, 32)).astype(np.float32)
tr_label = np.random.randint(low=0, high=10, size = 1000).astype(np.int32)
tr_dataset = tf.data.Dataset.from_tensor_slices((tr_data, tr_label))
tr_dataset = tr_dataset.batch(batch_size=100)
tr_dataset = tr_dataset.repeat()
val_data = np.random.random((100, 32)).astype(np.float32)
val_label = np.random.randint(low=0, high=10, size = 100).astype(np.int32)
val_dataset = tf.data.Dataset.from_tensor_slices((val_data, val_label))
val_dataset = val_dataset.batch(batch_size=100).repeat()
tst_data = np.ones((100,32), dtype=np.float32)
tst_label = np.ones((100,), dtype=np.int32)
tst_dataset = tf.data.Dataset.from_tensor_slices((tst_data, tst_label))
tst_dataset = tst_dataset.batch(batch_size=100)
print(tr_dataset.output_types)
print(tst_dataset.output_types)
(tf.float32, tf.int32) (tf.float32, tf.int32)
# Trains for 5 epochs
mlp.fit(x=tr_data, y=tr_label, epochs=5, batch_size=100,
validation_data=(val_data, val_label))
# mlp.fit(tr_dataset, epochs=5, steps_per_epoch=1000//100,
# validation_data=val_dataset, validation_steps=1)
mlp.save_weights('../graphs/lecture05/keras/mlp')
y_before = np.argmax(mlp.predict(x=tst_data), axis = -1)
print(mlp.evaluate(x=tst_data, y=tst_label))
# with keras.backend.get_session() as sess:
# before = sess.run(mlp.variables)
del mlp
Train on 1000 samples, validate on 100 samples Epoch 1/5 1000/1000 [==============================] - 0s 112us/step - loss: 2.3423 - val_loss: 2.3729 Epoch 2/5 1000/1000 [==============================] - 0s 15us/step - loss: 2.3414 - val_loss: 2.3719 Epoch 3/5 1000/1000 [==============================] - 0s 15us/step - loss: 2.3404 - val_loss: 2.3710 Epoch 4/5 1000/1000 [==============================] - 0s 18us/step - loss: 2.3395 - val_loss: 2.3701 Epoch 5/5 1000/1000 [==============================] - 0s 14us/step - loss: 2.3387 - val_loss: 2.3692 100/100 [==============================] - 0s 34us/step 1.9046856212615966
# Clear
keras.backend.clear_session()
tf.reset_default_graph()
print(tf.get_default_graph().get_operations())
print(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES))
[] []
# Restore
## Instantiate the MLP class
tst_model = MLP(hidden_dim=100, num_classes=10)
tst_model.compile(optimizer=tf.train.GradientDescentOptimizer(.001),
loss=keras.losses.sparse_categorical_crossentropy)
# tst_model.build(input_shape=tf.TensorShape(([None,32])))
tst_model.load_weights('../graphs/lecture05/keras/mlp')
<tensorflow.python.training.checkpointable.util.CheckpointLoadStatus at 0x7ff19c15fc18>
tst_data = np.ones((100,32), dtype=np.float32)
tst_label = np.ones((100,), dtype=np.int32)
tst_dataset = tf.data.Dataset.from_tensor_slices((tst_data, tst_label))
tst_dataset = tst_dataset.batch(batch_size=100)
y_after = np.argmax(tst_model.predict(tst_dataset, steps = 1), axis = -1)
print(tst_model.evaluate(tst_dataset, steps = 1))
1/1 [==============================] - 0s 10ms/step 1.904685616493225
# equal
np.mean(y_before == y_after)
1.0
Reading https://www.tensorflow.org/guide/keras?hl=ko#configuration_only
Reading https://www.tensorflow.org/guide/keras?hl=ko#entire_model
Reading https://www.tensorflow.org/guide/keras?hl=ko#eager_execution
Reading https://www.tensorflow.org/guide/keras?hl=ko#distribution