In [1]:
from IPython.display import Image

import numpy as np
np.random.seed(1)

In [2]:
Image(filename='and-or.png')

Out[2]:
In [3]:
or_input = np.array([[0,0], [0,1], [1,0], [1,1]])
or_output = np.array([[0,1,1,1]]).T

In [4]:
or_input

Out[4]:
array([[0, 0],
[0, 1],
[1, 0],
[1, 1]])
In [5]:
or_output

Out[5]:
array([[0],
[1],
[1],
[1]])
In [6]:
def sigmoid(x): # Returns values that range between -1 and 1
# BTW, this is pretty fun stuff: https://www.google.com.sg/#q=1/(1%2Bexp(-x))
return 1/(1+np.exp(-x))

def sigmoid_derivative(x): # We don't really care what it outputs, lolz...
return x*(1-x)

In [7]:
sigmoid(np.array([2.5, 0.32, -1.42]))

Out[7]:
array([ 0.92414182,  0.57932425,  0.19466158])
In [8]:
sigmoid_derivaive(np.array([2.5, 0.32, -1.42]))

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-8-64f3aed90076> in <module>()
----> 1 sigmoid_derivaive(np.array([2.5, 0.32, -1.42]))

NameError: name 'sigmoid_derivaive' is not defined
In [9]:
sigmoid_derivative(np.array([2.5, 0.32, -2.42]))

Out[9]:
array([-3.75  ,  0.2176, -8.2764])
In [10]:
def cost(predicted, truth):
return truth - predicted


In [11]:
gold = np.array([0.5, 1.2, 9.8])
pred = np.array([0.6, 1.0, 10.0])
cost(pred, gold)

Out[11]:
array([-0.1,  0.2, -0.2])
In [12]:
gold = np.array([0.5, 1.2, 9.8])
pred = np.array([9.3, 4.0, 99.0])
cost(pred, gold)

Out[12]:
array([ -8.8,  -2.8, -89.2])
In [13]:
num_data, input_dim = or_input.shape
output_dim = len(or_output.T)

# Initialize weights for the input layer, aka "syn0", syn is short for synapse.
syn0 = np.random.random((input_dim, output_dim))

In [14]:
num_epochs = 10000
learning_rate = 1.0

X = or_input
y = or_output

for _ in range(num_epochs):
# forward propagation.
l0 = X
l1 = sigmoid(np.dot(l0, syn0))

# how much did we miss?
l1_error = cost(l1, y)

# back propagation.
# multiply how much we missed by the
# slope of the sigmoid at the values in l1
l1_delta = l1_error * sigmoid_derivative(l1)

# update weights
syn0 +=  learning_rate * np.dot(l0.T, l1_delta)

In [15]:
l1

Out[15]:
array([[ 0.5       ],
[ 0.99283162],
[ 0.99282994],
[ 0.99994786]])
In [16]:
[int(l > 0.5) for l in l1]

Out[16]:
[0, 1, 1, 1]
In [17]:
or_output

Out[17]:
array([[0],
[1],
[1],
[1]])

Do the same but with 1 more hidden layer¶

In [18]:
def sigmoid(x): # Returns values that range between -1 and 1
# BTW, this is pretty fun stuff: https://www.google.com.sg/#q=1/(1%2Bexp(-x))
return 1/(1+np.exp(-x)) # ... # YOUR CODE HERE

def sigmoid_derivative(x): # We don't really care what it outputs, lolz...
return x*(1-x) # ... # YOUR CODE HERE

# Cost functions.
def cost(predicted, truth):
return truth - predicted

X = or_input = np.array([[0,0], [0,1], [1,0], [1,1]])
y = or_output = np.array([[0,1,1,1]]).T

# Initialize weights for the input layer, aka "syn0", syn is short for synapse.
num_data, input_dim = or_input.shape
hidden_dim = ... # YOUR CODE HERE
syn0 = ... # YOUR CODE HERE

# Initialize weights for the first hidden layer, aka "syn1".
output_dim = ... # YOUR CODE HERE
syn1 = ... # YOUR CODE HERE

num_epochs = 10000
learning_rate = 1.0
cost = cost

for _ in range(num_epochs):
# forward propagation.
l0 = X
l1 = sigmoid(np.dot(l0, syn0))
l2 = sigmoid(np.dot(l1, syn1))

# how much did we miss?
l2_error = cost(l2, y)

# Now we back propagate...

# in what direction is the target value?
# were we really sure? if so, don't change too much.
l2_delta = l2_error * sigmoid_derivative(l2)

# how much did each l1 value contribute to the l2 error (according to the weights)?
l1_error = l2_delta.dot(syn1.T)

# in what direction is the target l1?
# were we really sure? if so, don't change too much.
l1_delta = ... # YOUR CODE HERE

syn1 += l1.T.dot(l2_delta)
syn0 += l0.T.dot(l1_delta)

In [19]:
l2 # output layer.

Out[19]:
array([[ 0.01286598],
[ 0.99285218],
[ 0.99266375],
[ 0.99797014]])
In [20]:
l1 # hidden layer.

Out[20]:
array([[ 0.5       ,  0.5       ,  0.5       ,  0.5       ,  0.5       ],
[ 0.05030027,  0.9370088 ,  0.06046283,  0.88587697,  0.87996499],
[ 0.0475292 ,  0.93480791,  0.05574036,  0.86846293,  0.84798218],
[ 0.00263601,  0.99533365,  0.00378448,  0.98086165,  0.9761297 ]])

we'll drop one data point and use 3 layers ¶

In [21]:
def sigmoid(x): # Returns values that range between -1 and 1
# BTW, this is pretty fun stuff: https://www.google.com.sg/#q=1/(1%2Bexp(-x))
return 1/(1+np.exp(-x)) # ... # YOUR CODE HERE

def sigmoid_derivative(x): # We don't really care what it outputs, lolz...
return x*(1-x) # ... # YOUR CODE HERE

# Cost functions.
def cost(predicted, truth):
return truth - predicted

X = or_input = np.array([[0,0], [0,1], [1,0]])
y = or_output = np.array([[0,1,1]]).T

# Initialize weights for the input layer, aka "syn0", syn is short for synapse.
num_data, input_dim = or_input.shape
hidden_dim_1 = 5 # YOUR CODE HERE
syn0 = ... # YOUR CODE HERE

# Initialize weights for the first hidden layer, aka "syn1".
hidden_dim_2 = 3 # YOUR CODE HERE
syn1 = ... # YOUR CODE HERE

# Initialize weights for the first hidden layer, aka "syn2".
output_dim = ... # YOUR CODE HERE
syn2 =... # YOUR CODE HERE

num_epochs = 10000
learning_rate = 1.0
cost = cost

for _ in range(num_epochs):
# forward propagation.
l0 = X
l1 = ... # YOUR CODE HERE
l2 = ... # YOUR CODE HERE
l3 = ... # YOUR CODE HERE

# how much did we miss?
l3_error = ... # YOUR CODE HERE

# Now we back propagate...

# in what direction is the target value?
# were we really sure? if so, don't change too much.
l3_delta = ... # YOUR CODE HERE

# how much did each l2 value contribute to the l3 error (according to the weights)?
l2_error = ... # YOUR CODE HERE

# in what direction is the l2 weights changing?
l2_delta = ... # YOUR CODE HERE

# how much did each l1 value contribute to the l2 error (according to the weights)?
l1_error = ... # YOUR CODE HERE

# in what direction is the target l1?
# were we really sure? if so, don't change too much.
l1_delta = ... # YOUR CODE HERE

syn2 += ... # YOUR CODE HERE
syn1 += ... # YOUR CODE HERE
syn0 += ... # YOUR CODE HERE

In [22]:
l3

Out[22]:
array([[ 0.00798033],
[ 0.99521575],
[ 0.99523938]])
In [23]:
new_input = np.array([[1,1]])
# apply l1 to new input
_l1 = sigmoid(np.dot(new_input, syn0))
# apply l2 to new input
_l2 = sigmoid(np.dot(_l1, syn1))
# apply l3 (output layer) to new input
prediction = _l3 = sigmoid(np.dot(_l2, syn2))

prediction

Out[23]:
array([[ 0.99821909]])

Now let's do the same in tensorflow!!!¶

In [24]:
import tensorflow as tf

In [25]:
# Parameters
learning_rate = 1.0
num_epochs = 10000

# Network Parameters
hidden_dim_1 = 5 # 1st layer number of features
hidden_dim_2 = 3 # 2nd layer number of features
input_dim = 2 # Input dimensions.
output_dim = 1 # Output dimensions.

# tf Graph input
x = tf.placeholder("float", [None, input_dim])
y = ... # YOUR CODE HERE

In [26]:
# Without biases.

weights = {
'syn0': tf.Variable(tf.random_normal([input_dim, hidden_dim_1])),
'syn1': tf.Variable(tf.random_normal([hidden_dim_1, hidden_dim_2])),
'syn2': tf.Variable(tf.random_normal([hidden_dim_2, output_dim]))
}

# Create a model
def multilayer_perceptron(X, weights, biases):
# Hidden layer 1
layer_1 = tf.matmul(X, weights['syn0'])
# Hidden layer 2
layer_2 = ... # YOUR CODE HERE
# Output layer
out_layer = ... # YOUR CODE HERE
return out_layer

In [27]:
# With biases.
weights = {
'syn0': tf.Variable(tf.random_normal([input_dim, hidden_dim_1])),
'syn1': tf.Variable(tf.random_normal([hidden_dim_1, hidden_dim_2])),
'syn2': tf.Variable(tf.random_normal([hidden_dim_2, output_dim]))
}

biases = {
'b0': tf.Variable(tf.random_normal([hidden_dim_1])),
'b1': tf.Variable(tf.random_normal([hidden_dim_2])),
'b2': tf.Variable(tf.random_normal([output_dim]))
}

# Create a model
def multilayer_perceptron(X, weights, biases):
# Hidden layer 1
# Hidden layer 2
layer_2 = ... # YOUR CODE HERE
# Output layer
out_layer = ... # YOUR CODE HERE
return out_layer

In [28]:
# With biases.
weights = {
'syn0': tf.Variable(tf.random_normal([input_dim, hidden_dim_1])),
'syn1': tf.Variable(tf.random_normal([hidden_dim_1, hidden_dim_2])),
'syn2': tf.Variable(tf.random_normal([hidden_dim_2, output_dim]))
}

biases = {
'b0': tf.Variable(tf.random_normal([hidden_dim_1])),
'b1': tf.Variable(tf.random_normal([hidden_dim_2])),
'b2': tf.Variable(tf.random_normal([output_dim]))
}

# Create a model
def multilayer_perceptron(X, weights, biases):
# Hidden layer 1  + sigmoid activation function
layer_1 = tf.nn.sigmoid(layer_1)
# Hidden layer 2 + sigmoid activation function
layer_2 = ... # YOUR CODE HERE
layer_2 = ... # YOUR CODE HERE
# Output layer
out_layer = ... # YOUR CODE HERE
out_layer = ... # YOUR CODE HERE
return out_layer

In [29]:
# Construct model
pred = multilayer_perceptron(x, weights, biases)

# Define loss and optimizer
cost = tf.sub(y, pred)
# Or you can use fancy cost like:
##tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
init = tf.initialize_all_variables()

In [ ]:


In [30]:
np.array([[0,1,1]]).T

Out[30]:
array([[0],
[1],
[1]])
In [31]:
or_input = np.array([[0.,0.], [0.,1.], [1.,0.]])
or_output = np.array([[0.,1.,1.]]).T

# Launch the graph
with tf.Session() as sess:
sess.run(init)
# Training cycle
for epoch in range(num_epochs):
batch_x, batch_y = or_input, or_output # Loop over all data points.
# Run optimization op (backprop) and cost op (to get loss value)
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x, y: batch_y})
#print (c)

# Now let's test it on the unknown dataset.
new_inputs = np.array([[1.,1.], [1.,0.]])
feed_dict = {x: new_inputs}
predictions = sess.run(pred, feed_dict)
print (predictions)

[[ 0.99999201]
[ 0.99999034]]


Now let's do the same in tensorflow learn (aka skflow)!!!¶

In [32]:
from tensorflow.contrib import learn

In [33]:
classifier = learn.DNNClassifier(hidden_units=[5, 3], n_classes=2)

In [34]:
or_input = np.array([[0.,0.], [0.,1.], [1.,0.]])
or_output = np.array([[0,1,1]]).T

classifier.fit(or_input, or_output, steps=0.05, batch_size=3)

/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/array_ops.py:1197: VisibleDeprecationWarning: converting an array with ndim > 0 to an index will result in an error in the future
result_shape.insert(dim, 1)

Out[34]:
DNNClassifier()
In [35]:
classifier.predict(np.array([ [1., 1.], [1., 0.] , [0., 0.] , [0., 1.]]))

Out[35]:
array([0, 0, 1, 0])

Now try it with steps=1000¶

In [37]:
from tensorflow.contrib import learn
classifier = ... # YOUR CODE HERE

or_input = np.array([[0.,0.], [0.,1.], [1.,0.]])
or_output = np.array([[0,1,1]]).T

classifier.fit(...) ... # YOUR CODE HERE
classifier.predict(np.array([ [1., 1.], [1., 0.] , [0., 0.] , [0., 1.]]))

/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/array_ops.py:1197: VisibleDeprecationWarning: converting an array with ndim > 0 to an index will result in an error in the future
result_shape.insert(dim, 1)

Out[37]:
array([1, 1, 0, 1])