Notebook

Copyright 2017 Google Inc. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.

In [ ]:

import os
import time
import tensorflow as tf
import numpy as np

generate some noisy training and test data. (based on Jake's example on day 2)

In [ ]:

ncomponents = 1
slopes_true = np.random.uniform(0, 1, ncomponents)
intercepts_true = np.random.uniform(0, 1, ncomponents)
component_fractionalprobs = np.random.dirichlet(np.arange(1., ncomponents+1.))
print('Slopes:', slopes_true)
print('Intercepts:', intercepts_true)
print('Fractional probabilities:', component_fractionalprobs)

In [ ]:

ndatapoints = 100
ndatapoints_test = 20

xis_true = np.random.uniform(0, 1, ndatapoints)
xis_test_true = np.random.uniform(0, 1, ndatapoints_test)
x_grid = np.linspace(0, 1, 100)

numberpercomponent = np.random.multinomial(ndatapoints, component_fractionalprobs)
numberpercomponent_test = np.random.multinomial(ndatapoints_test, component_fractionalprobs)
allocations = np.concatenate([np.repeat(i, nb).astype(int) 
                              for i, nb in enumerate(numberpercomponent)])
allocations_test = np.concatenate([np.repeat(i, nb).astype(int) 
                              for i, nb in enumerate(numberpercomponent_test)])
np.random.shuffle(allocations)
np.random.shuffle(allocations_test)


def model_linear(xs, slope, intercept): return xs * slope + intercept
yis_true = model_linear(xis_true, slopes_true[allocations], intercepts_true[allocations])
yis_test_true = model_linear(xis_test_true, slopes_true[allocations_test], intercepts_true[allocations_test])

In [ ]:

sigma_yis = np.repeat(0.1, ndatapoints) * np.random.uniform(0.5, 2.0, ndatapoints)
yis_noisy = yis_true + np.random.randn(ndatapoints) * sigma_yis

sigma_yis_test = np.repeat(0.1, ndatapoints_test) * np.random.uniform(0.5, 2.0, ndatapoints_test)
yis_test_noisy = yis_test_true + np.random.randn(ndatapoints_test) * sigma_yis_test

Beginning the TensorFlow part.

In [ ]:

# define a utility function for generating a new directory in which to save 
# model information, so multiple training runs don't stomp on each other.
def get_new_path(name=""):
    base="/tmp/tfmodels/linear"
    logpath = os.path.join(base, name + "_" + str(int(time.time())))
    print("Logging to {}".format(logpath))
    return logpath

# Declare list of features. We only have one numeric feature. There are many
# other types of columns that are more complicated and useful.
feature_columns = [tf.feature_column.numeric_column("x", shape=[1])]

In [ ]:

# An estimator is the front end to invoke training (fitting) and evaluation
# (inference). There are many predefined types like linear regression,
# linear classification, and many neural network classifiers and regressors.
# The following code provides an estimator that does linear regression.
estimator = tf.estimator.LinearRegressor(feature_columns=feature_columns, model_dir=get_new_path('lr'))

In [ ]:

# TensorFlow provides many helper methods to read and set up data sets.
# Here we use two data sets: one for training and one for evaluation
# We have to tell the function how many batches
# of data (num_epochs) we want and how big each batch should be.

# x_train = np.array([1., 2., 3., 4.])
# y_train = np.array([0., -1., -2., -3.])
# x_eval = np.array([2., 5., 8., 1.])
# y_eval = np.array([-1.01, -4.1, -7, 0.])

x_train = xis_true
y_train = yis_noisy
x_eval = xis_test_true
y_eval = yis_test_true

input_fn = tf.estimator.inputs.numpy_input_fn(
    {"x": x_train}, y_train, batch_size=4, num_epochs=None, shuffle=True)
train_input_fn = tf.estimator.inputs.numpy_input_fn(
    {"x": x_train}, y_train, batch_size=4, shuffle=False)
eval_input_fn = tf.estimator.inputs.numpy_input_fn(
    {"x": x_eval}, y_eval, batch_size=4, shuffle=False)

In [ ]:

# Train the model.
estimator.train(input_fn=input_fn, steps=2000)

In [ ]:

# evaluate how well our model did.
train_metrics = estimator.evaluate(input_fn=train_input_fn)
eval_metrics = estimator.evaluate(input_fn=eval_input_fn)
print("train metrics: %r"% train_metrics)
print("eval metrics: %r"% eval_metrics)

In [ ]: