#!/usr/bin/env python
# coding: utf-8

# In[1]:


from IPython.display import Image

import numpy as np
np.random.seed(1)


# In[2]:


Image(filename='and-or.png')


# In[3]:


or_input = np.array([[0,0], [0,1], [1,0], [1,1]])
or_output = np.array([[0,1,1,1]]).T


# In[4]:


or_input


# In[5]:


or_output


# In[6]:


def sigmoid(x): # Returns values that range between -1 and 1
    # BTW, this is pretty fun stuff: https://www.google.com.sg/#q=1/(1%2Bexp(-x))
    return 1/(1+np.exp(-x))

def sigmoid_derivative(x): # We don't really care what it outputs, lolz...
    return x*(1-x)


# In[7]:


sigmoid(np.array([2.5, 0.32, -1.42]))


# In[8]:


sigmoid_derivaive(np.array([2.5, 0.32, -1.42]))


# In[9]:


sigmoid_derivative(np.array([2.5, 0.32, -2.42]))


# In[10]:


def cost(predicted, truth):
    return truth - predicted
    

# In[11]:


gold = np.array([0.5, 1.2, 9.8])
pred = np.array([0.6, 1.0, 10.0])
cost(pred, gold)


# In[12]:


gold = np.array([0.5, 1.2, 9.8])
pred = np.array([9.3, 4.0, 99.0])
cost(pred, gold)


# In[13]:


num_data, input_dim = or_input.shape
output_dim = len(or_output.T)

# Initialize weights for the input layer, aka "syn0", syn is short for synapse. 
syn0 = np.random.random((input_dim, output_dim)) 


# In[14]:


num_epochs = 10000
learning_rate = 1.0

X = or_input
y = or_output

for _ in range(num_epochs):
    # forward propagation.
    l0 = X
    l1 = sigmoid(np.dot(l0, syn0))

    # how much did we miss?
    l1_error = cost(l1, y)

    # back propagation.
    # multiply how much we missed by the 
    # slope of the sigmoid at the values in l1
    l1_delta = l1_error * sigmoid_derivative(l1)

    # update weights
    syn0 +=  learning_rate * np.dot(l0.T, l1_delta)


# In[15]:


l1


# In[16]:


[int(l > 0.5) for l in l1]


# In[17]:


or_output


# Do the same but with 1 more hidden layer
# ====

# In[18]:


def sigmoid(x): # Returns values that range between -1 and 1
    # BTW, this is pretty fun stuff: https://www.google.com.sg/#q=1/(1%2Bexp(-x))
    return 1/(1+np.exp(-x)) # ... # YOUR CODE HERE

def sigmoid_derivative(x): # We don't really care what it outputs, lolz...
    return x*(1-x) # ... # YOUR CODE HERE

# Cost functions.
def cost(predicted, truth):
    return truth - predicted


X = or_input = np.array([[0,0], [0,1], [1,0], [1,1]])
y = or_output = np.array([[0,1,1,1]]).T

# Initialize weights for the input layer, aka "syn0", syn is short for synapse. 
num_data, input_dim = or_input.shape
hidden_dim = ... # YOUR CODE HERE
syn0 = ... # YOUR CODE HERE

# Initialize weights for the first hidden layer, aka "syn1".
output_dim = ... # YOUR CODE HERE
syn1 = ... # YOUR CODE HERE


num_epochs = 10000
learning_rate = 1.0
cost = cost

for _ in range(num_epochs):
    # forward propagation.
    l0 = X
    l1 = sigmoid(np.dot(l0, syn0))
    l2 = sigmoid(np.dot(l1, syn1))

    # how much did we miss?
    l2_error = cost(l2, y)

    # Now we back propagate...
    
    # in what direction is the target value?
    # were we really sure? if so, don't change too much.
    l2_delta = l2_error * sigmoid_derivative(l2)

    # how much did each l1 value contribute to the l2 error (according to the weights)?
    l1_error = l2_delta.dot(syn1.T)
    
    # in what direction is the target l1?
    # were we really sure? if so, don't change too much.
    l1_delta = ... # YOUR CODE HERE

    syn1 += l1.T.dot(l2_delta)
    syn0 += l0.T.dot(l1_delta)


# In[19]:


l2 # output layer.


# In[20]:


l1 # hidden layer.


# Now let's make it even more challenging, 
# ====
# we'll drop one data point and use 3 layers 
# ====

# In[21]:


def sigmoid(x): # Returns values that range between -1 and 1
    # BTW, this is pretty fun stuff: https://www.google.com.sg/#q=1/(1%2Bexp(-x))
    return 1/(1+np.exp(-x)) # ... # YOUR CODE HERE

def sigmoid_derivative(x): # We don't really care what it outputs, lolz...
    return x*(1-x) # ... # YOUR CODE HERE

# Cost functions.
def cost(predicted, truth):
    return truth - predicted


X = or_input = np.array([[0,0], [0,1], [1,0]])
y = or_output = np.array([[0,1,1]]).T

# Initialize weights for the input layer, aka "syn0", syn is short for synapse. 
num_data, input_dim = or_input.shape
hidden_dim_1 = 5 # YOUR CODE HERE
syn0 = ... # YOUR CODE HERE

# Initialize weights for the first hidden layer, aka "syn1".
hidden_dim_2 = 3 # YOUR CODE HERE
syn1 = ... # YOUR CODE HERE

# Initialize weights for the first hidden layer, aka "syn2".
output_dim = ... # YOUR CODE HERE
syn2 =... # YOUR CODE HERE

                       
num_epochs = 10000
learning_rate = 1.0
cost = cost

for _ in range(num_epochs):
    # forward propagation.
    l0 = X
    l1 = ... # YOUR CODE HERE
    l2 = ... # YOUR CODE HERE
    l3 = ... # YOUR CODE HERE

    # how much did we miss?
    l3_error = ... # YOUR CODE HERE                        

    # Now we back propagate...
    
    # in what direction is the target value?
    # were we really sure? if so, don't change too much.
    l3_delta = ... # YOUR CODE HERE
                         
    # how much did each l2 value contribute to the l3 error (according to the weights)?
    l2_error = ... # YOUR CODE HERE                   

    # in what direction is the l2 weights changing?
    l2_delta = ... # YOUR CODE HERE

    # how much did each l1 value contribute to the l2 error (according to the weights)?
    l1_error = ... # YOUR CODE HERE
    
    # in what direction is the target l1?
    # were we really sure? if so, don't change too much.
    l1_delta = ... # YOUR CODE HERE
                         
    syn2 += ... # YOUR CODE HERE
    syn1 += ... # YOUR CODE HERE
    syn0 += ... # YOUR CODE HERE


# In[22]:


l3


# In[23]:


new_input = np.array([[1,1]])
# apply l1 to new input
_l1 = sigmoid(np.dot(new_input, syn0))
# apply l2 to new input
_l2 = sigmoid(np.dot(_l1, syn1))
# apply l3 (output layer) to new input
prediction = _l3 = sigmoid(np.dot(_l2, syn2))

prediction


# Now let's do the same in tensorflow!!!
# ====

# In[24]:


import tensorflow as tf


# In[25]:


# Parameters
learning_rate = 1.0
num_epochs = 10000

# Network Parameters
hidden_dim_1 = 5 # 1st layer number of features
hidden_dim_2 = 3 # 2nd layer number of features
input_dim = 2 # Input dimensions.
output_dim = 1 # Output dimensions.

# tf Graph input
x = tf.placeholder("float", [None, input_dim])
y = ... # YOUR CODE HERE


# In[26]:


# Without biases.

weights = {
    'syn0': tf.Variable(tf.random_normal([input_dim, hidden_dim_1])),
    'syn1': tf.Variable(tf.random_normal([hidden_dim_1, hidden_dim_2])),
    'syn2': tf.Variable(tf.random_normal([hidden_dim_2, output_dim]))
}

# Create a model
def multilayer_perceptron(X, weights, biases):
    # Hidden layer 1
    layer_1 = tf.matmul(X, weights['syn0'])
    # Hidden layer 2
    layer_2 = ... # YOUR CODE HERE
    # Output layer
    out_layer = ... # YOUR CODE HERE
    return out_layer


# In[27]:


# With biases.
weights = {
    'syn0': tf.Variable(tf.random_normal([input_dim, hidden_dim_1])),
    'syn1': tf.Variable(tf.random_normal([hidden_dim_1, hidden_dim_2])),
    'syn2': tf.Variable(tf.random_normal([hidden_dim_2, output_dim]))
}


biases = {
    'b0': tf.Variable(tf.random_normal([hidden_dim_1])),
    'b1': tf.Variable(tf.random_normal([hidden_dim_2])),
    'b2': tf.Variable(tf.random_normal([output_dim]))
}


# Create a model
def multilayer_perceptron(X, weights, biases):
    # Hidden layer 1
    layer_1 = tf.add(tf.matmul(X, weights['syn0']), biases['b0'])
    # Hidden layer 2
    layer_2 = ... # YOUR CODE HERE
    # Output layer
    out_layer = ... # YOUR CODE HERE
    return out_layer


# In[28]:


# With biases.
weights = {
    'syn0': tf.Variable(tf.random_normal([input_dim, hidden_dim_1])),
    'syn1': tf.Variable(tf.random_normal([hidden_dim_1, hidden_dim_2])),
    'syn2': tf.Variable(tf.random_normal([hidden_dim_2, output_dim]))
}


biases = {
    'b0': tf.Variable(tf.random_normal([hidden_dim_1])),
    'b1': tf.Variable(tf.random_normal([hidden_dim_2])),
    'b2': tf.Variable(tf.random_normal([output_dim]))
}


# Create a model
def multilayer_perceptron(X, weights, biases):
    # Hidden layer 1  + sigmoid activation function
    layer_1 = tf.add(tf.matmul(X, weights['syn0']), biases['b0'])
    layer_1 = tf.nn.sigmoid(layer_1)
    # Hidden layer 2 + sigmoid activation function
    layer_2 = ... # YOUR CODE HERE
    layer_2 = ... # YOUR CODE HERE
    # Output layer
    out_layer = ... # YOUR CODE HERE
    out_layer = ... # YOUR CODE HERE
    return out_layer


# In[29]:


# Construct model
pred = multilayer_perceptron(x, weights, biases)

# Define loss and optimizer
cost = tf.sub(y, pred) 
# Or you can use fancy cost like:
##tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
init = tf.initialize_all_variables()


# In[ ]:


# In[30]:


np.array([[0,1,1]]).T


# In[31]:


or_input = np.array([[0.,0.], [0.,1.], [1.,0.]])
or_output = np.array([[0.,1.,1.]]).T

# Launch the graph
with tf.Session() as sess:
    sess.run(init)
    # Training cycle
    for epoch in range(num_epochs):
        batch_x, batch_y = or_input, or_output # Loop over all data points.
        # Run optimization op (backprop) and cost op (to get loss value)
        _, c = sess.run([optimizer, cost], feed_dict={x: batch_x, y: batch_y})
        #print (c)
        
    # Now let's test it on the unknown dataset.
    new_inputs = np.array([[1.,1.], [1.,0.]])
    feed_dict = {x: new_inputs}
    predictions = sess.run(pred, feed_dict)
    print (predictions)


# Now let's do the same in tensorflow learn (aka skflow)!!!
# ====

# In[32]:


from tensorflow.contrib import learn


# In[33]:


classifier = learn.DNNClassifier(hidden_units=[5, 3], n_classes=2)


# In[34]:


or_input = np.array([[0.,0.], [0.,1.], [1.,0.]])
or_output = np.array([[0,1,1]]).T

classifier.fit(or_input, or_output, steps=0.05, batch_size=3)


# In[35]:


classifier.predict(np.array([ [1., 1.], [1., 0.] , [0., 0.] , [0., 1.]]))


# In[ ]:


# Now try it with `steps=1000`
# ====

# In[37]:


from tensorflow.contrib import learn
classifier = ... # YOUR CODE HERE

or_input = np.array([[0.,0.], [0.,1.], [1.,0.]])
or_output = np.array([[0,1,1]]).T

classifier.fit(...) ... # YOUR CODE HERE
classifier.predict(np.array([ [1., 1.], [1., 0.] , [0., 0.] , [0., 1.]]))


# In[ ]: