#!/usr/bin/env python
# coding: utf-8
#
#
#
#
#
#
#
#
#
#
#
#
#
# Tensorflow
# 08 - Lesson
#
#
#
#
# # Deep Learning Using Tensorflow
# In this lesson you are going to try to create a model that supases the one you created in Lesson 07 using a ***Deep Learning*** NN.
# > What is deep learning?
#
# This is just a term to describe neural networks that are composed of more than one hidden layer. The good news is that it is pretty easy to go from one to many hidden layers and your predictions should also improve.
# # Let's Get to Work!
# In[1]:
# import libraries
import tensorflow as tf
import pandas as pd
import numpy as np
import sys
import datetime
import matplotlib.pyplot as plt
plt.style.use('ggplot') # use this plot style
get_ipython().run_line_magic('matplotlib', 'inline')
# In[2]:
print('Python version ' + sys.version)
print('Tensorflow version ' + tf.VERSION)
print('Pandas version ' + pd.__version__)
print('Numpy version ' + np.__version__)
# # Function to model
#
# y = a * x^2 + b * x + c
#
# In[3]:
# Let's generate 1000 random samples
pool = np.random.rand(1000,1).astype(np.float32)
# Shuffle the samples
np.random.shuffle(pool)
# sample size of 15%
sample = int(1000 * 0.15)
# 15% test
test_x = pool[0:sample]
# 85% training
train_x = pool[sample:]
print('Testing data points: ' + str(test_x.shape))
print('Training data points: ' + str(train_x.shape))
# Let's compute the ouput using 2 for a, 3 for b, and 5 for c
test_y = 2.0 * test_x**2 + 3.0 * test_x + 5
train_y = 2.0 * train_x**2 + 3.0 * train_x + 5
# In[4]:
df = pd.DataFrame({'x':train_x[:,0],
'y':train_y[:,0]})
df.head()
# In[5]:
df.describe()
# In[6]:
df.plot.scatter(x='x', y='y', figsize=(15,5));
# # Helper Functions
#
# Make a function that will help you create layers easily
# In[7]:
def add_layer(inputs, in_size, out_size, activation_function=None):
# tf.random_normal([what is the size of your batches, size of output layer])
Weights = tf.Variable(tf.truncated_normal([in_size, out_size], mean=0.1, stddev=0.1))
# tf.random_normal([size of output layer])
biases = tf.Variable(tf.truncated_normal([out_size], mean=0.1, stddev=0.1))
# shape of pred = [size of your batches, size of output layer]
pred = tf.matmul(inputs, Weights) + biases
if activation_function is None:
outputs = pred
else:
outputs = activation_function(pred)
return outputs
# # Model your Graph
#
# Start to use W (for weight) and b (for bias) when setting up your variables. Aside from adding your ReLU activation function, it is a good idea to use Tensorflow's ***matrix multiplication function (matmul)*** as shown below.
#
# > The ? in the shape output just means it can be of any shape.
# For the shape parameter, you can think of it like this...
#
# > shape = [how many data points do you have, how many features does each data point have]
#
# For this lesson since we are doing a simple regression, we only have one feature (x). We use the ***None*** keyword so that we are not restricted on the number of samples to feed our model. This will become more important when you learn about training using batches on a future lesson.
# In[8]:
# you can adjust the number of neurons in the hidden layers here
hidden_size = 100
# placeholders
# shape=[how many samples do you have, how many input neurons]
x = tf.placeholder(tf.float32, shape=[None, 1], name="01_x")
y = tf.placeholder(tf.float32, shape=[None, 1], name="01_y")
print("shape of x and y:")
print(x.get_shape(),y.get_shape())
# Note that the input of one layer becomes the input of the next layer.
# In[9]:
# create your hidden layers!
h1 = add_layer(x, 1, hidden_size, tf.nn.relu)
h2 = add_layer(h1, hidden_size, hidden_size, tf.nn.relu)
print("shape of hidden layers:")
print(h1.get_shape(), h2.get_shape())
# In[10]:
# Output Layers
pred = add_layer(h2, hidden_size, 1)
print("shape of output layer:")
print(pred.get_shape())
# In[11]:
# minimize the mean squared errors.
loss = tf.reduce_mean(tf.square(pred - y))
# pick optimizer
optimizer = tf.train.GradientDescentOptimizer(0.003)
train = optimizer.minimize(loss)
# # How Good is Your model?
#
# Set up the following variables to calculate the accuracy rate of your model. You will do that shortly.
# In[12]:
# check accuracy of model
correct_prediction = tf.equal(tf.round(pred), tf.round(y))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# # Training Time!
#
# The best score I was able to obtain was a ***~98% accuracy*** using a LR of 0.003 and iterating 3,000 times. As you can see adding additional layers made a big difference.
# In[13]:
# initialize the variables
init = tf.global_variables_initializer()
# hold step and error values
t = []
# Run your graph
with tf.Session() as sess:
# initialize variables
sess.run(init)
# Fit the function.
for step in range(3000):
# get your data
train_data = {x:train_x, y:train_y}
test_data = {x:test_x, y:test_y}
# training in progress...
train_loss, train_pred = sess.run([loss, train], feed_dict=train_data)
# print every n iterations
if step%200==0:
# capture the step and error for analysis
t.append((step, train_loss))
# get snapshot of current training accuracy
train_acc = accuracy.eval(train_data)
print("Training loss at step %d: %f" % (step, train_loss))
# here is where you see how good of a Data Scientist you are
print("Accuracy on the Training Set:", accuracy.eval(train_data) )
print("Accuracy on the Test Set:", accuracy.eval(test_data) )
# capture predictions on test data
test_results = sess.run(pred, feed_dict={x:test_x})
df_final = pd.DataFrame({'test_x':test_x[:,0],
'pred':test_results[:,0]})
# capture training and validation loss
df_loss = pd.DataFrame(t, columns=['step', 'train_loss'])
# In[14]:
fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(15, 5))
# Chart 1 - Shows the line we are trying to model
df.plot.scatter(x='x', y='y', ax=axes, color='red')
# Chart 2 - Shows the line our trained model came up with
df_final.plot.scatter(x='test_x', y='pred', ax=axes, alpha=0.3)
# add a little sugar
axes.set_title('target vs pred', fontsize=20)
axes.set_ylabel('y', fontsize=15)
axes.set_xlabel('x', fontsize=15)
axes.legend(["target", "pred"], loc='best');
# In[15]:
df_loss.set_index('step').plot(logy=True, figsize=(15,5));
# # Your Turn
#
# > Try different sizes of neurons, learning rates, and iterations. You will notice that changing any one of these parameters will require you to change another one. Try to get a feel of this model and how it is affected by tweaking it. If you are bold enough, try to add even more layers and see what happens. In the next lesson we are going to talk about Validation.
# This tutorial was created by HEDARO