# Install TensorFlow 2.0
!pip install -q tensorflow==2.0.0
import tensorflow as tf
# This command ensures that the first Matplotlib plot is shown inline
%matplotlib inline
# Import some utility modules from sklearn
from sklearn import datasets, preprocessing, model_selection
# Load a simple regression dataset
boston = datasets.load_boston()
# The dataset is a dictionary
boston.keys()
dict_keys(['data', 'target', 'feature_names', 'DESCR', 'filename'])
# Take the input matrix and the output vector
X = boston['data']
y = boston['target']
# Inspect the shape of the matrix:
# X.shape[0]: number of examples
# X.shape[1]: number of features
X.shape
(506, 13)
y[0:10]
array([24. , 21.6, 34.7, 33.4, 36.2, 28.7, 22.9, 27.1, 16.5, 18.9])
y.shape
(506,)
# Useful to do shape computations easily
y = y.reshape(-1, 1)
# Normalize the data
X = preprocessing.scale(X)
y = preprocessing.scale(y)
# Split the data into training and test portions
Xtrain, Xtest, ytrain, ytest = \
model_selection.train_test_split(X, y)
# Define a utility function to initialize the model variables
# Note: we initialize them as small pseudo-random numbers
def model_init(n_features):
w = tf.Variable(
tf.random.normal(shape=(n_features, 1), stddev=0.01, dtype=tf.float64),
)
b = tf.Variable(0.0, dtype=tf.float64)
return w, b
w, b = model_init(13)
# Compute the linear model outputs
def linear_model(x):
return tf.matmul(x, w) + b
linear_model(Xtrain[0:3])
<tf.Tensor: id=24, shape=(3, 1), dtype=float64, numpy= array([[0.0190547 ], [0.03325769], [0.03834291]])>
# A function to compute the squared loss
def squared_loss(ytrue, ypredicted):
return tf.reduce_mean((ytrue - ypredicted)**2.0)
squared_loss(ytrain, linear_model(Xtrain))
<tf.Tensor: id=1452644, shape=(), dtype=float64, numpy=2722.854309248313>
# Optimization phase
for i in range(1000):
# Step 1: compute the loss
with tf.GradientTape() as tape:
ypredicted = linear_model(Xtrain)
l = squared_loss(ytrain, ypredicted)
# Step 2: compute the gradients
g = tape.gradient(l, [w, b])
# Step 3: do a gradient descent step
w.assign_sub(0.01 * g[0]) # w = w - \eta * gradient of l w.r.t. w
b.assign_sub(0.01 * g[1]) # b = b - \eta * gradient of l w.r.t. b
w
<tf.Variable 'Variable:0' shape=(13, 1) dtype=float64, numpy= array([[-0.05481841], [ 0.09820639], [-0.02990333], [ 0.04727738], [-0.19264479], [ 0.30978357], [-0.02967521], [-0.3383542 ], [ 0.19877823], [-0.16600745], [-0.22601543], [ 0.0687544 ], [-0.37885501]])>
# Check the training loss has decreased
squared_loss(ytrain, linear_model(Xtrain))
<tf.Tensor: id=36048, shape=(), dtype=float64, numpy=0.2557999458656836>
# Check the test loss (almost no overfitting in this scenario)
squared_loss(ytest, linear_model(Xtest))
<tf.Tensor: id=36059, shape=(), dtype=float64, numpy=0.27910240485065635>
# Check that the solution is optimal by computing the closed form solution to the least-squares problem.
# (X.T * X)^-1 * X.T * y
# Note: in realistic situations there are faster and more numerically stable ways of doing this computation.
XX = tf.matmul(X.transpose(), X)
Xy = tf.matmul(X.transpose(), y)
wopt = tf.matmul(tf.linalg.inv(XX), Xy)
import matplotlib.pyplot as plt
plt.plot(wopt - w)
plt.plot([0, 12], [0.1, 0.1], '--r')
plt.plot([0, 12], [-0.1, -0.1], '--r')
[<matplotlib.lines.Line2D at 0x7fb654f77ba8>]
breastcancer = datasets.load_breast_cancer()
X = breastcancer['data']
y = breastcancer['target']
# We do not need to scale the 0/1 y vector
X = preprocessing.scale(X)
X.shape
(569, 30)
y[0:50]
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1])
y.dtype
dtype('int64')
# For compatibility with some TF functions, we need np.int32 values
import numpy as np
y = y.reshape(-1, 1).astype(np.int32)
# Split the data, this time with stratification with respect to y, to ensure the same balancing of classes in train and test
Xtrain, Xtest, ytrain, ytest = \
model_selection.train_test_split(X, y, stratify=y)
w, b = model_init(X.shape[1])
# Simply wrap the output of a linear model in a sigmoid
logreg_model = lambda x: tf.sigmoid(linear_model(x))
# tf.keras has a lot of pre-made objects and functions to simplify code development
from tensorflow.keras import losses, optimizers, metrics
# Define a loss
loss = losses.BinaryCrossentropy()
# loss = losses.MeanSquaredError()
# Define an optimizer
# opt = optimizers.SGD(learning_rate=0.01)
opt = optimizers.RMSprop(learning_rate=0.01)
ypredicted = logreg_model(Xtrain)
ypredicted[0:10]
<tf.Tensor: id=1452673, shape=(10, 1), dtype=float64, numpy= array([[0.46999973], [0.4916889 ], [0.46146135], [0.49145486], [0.50755118], [0.48121933], [0.47787075], [0.48504932], [0.50930721], [0.5083554 ]])>
# Metrics accumulate multiple computations before outputting a result
acc = metrics.BinaryAccuracy()
acc.update_state(ytrain, ypredicted)
acc.result()
<tf.Tensor: id=1452704, shape=(), dtype=float32, numpy=0.46478873>
all_accuracies = []
for i in range(1000):
with tf.GradientTape() as tape:
ypredicted = logreg_model(Xtrain)
l = loss(ytrain, ypredicted)
g = tape.gradient(l, [w, b])
opt.apply_gradients([(g[0], w), (g[1], b)]) # This time, we let the optimizer do this step
# Note: the input is a list of pairs (gradient, variable)
# Compute the train accuracy at every iteration
acc.reset_states()
acc.update_state(ytrain, ypredicted)
all_accuracies.append(acc.result())
import matplotlib.pyplot as plt
plt.plot(all_accuracies)
[<matplotlib.lines.Line2D at 0x7fb654efe3c8>]