#!/usr/bin/env python
# coding: utf-8

# # Gradient Descent

# ## 1. Gradient

# In[57]:


import numpy as np
import matplotlib.pylab as plt


# In[58]:


def numerical_diff(f, x):
    h = 1e-4 # 0.0001
    return (f(x+h) - f(x-h)) / (2*h)

def function_1(x):
    return 0.01*x**2 + 0.1*x 

def tangent_line(f, x):
    d = numerical_diff(f, x)
    return lambda t: d * (t - x) + f(x)
     
x = np.arange(0.0, 20.0, 0.1)
y = function_1(x)
plt.xlabel("x")
plt.ylabel("f(x)")

tf = tangent_line(function_1, 5)
y2 = tf(x)

plt.plot(x, y)
plt.plot(x, y2)
plt.show()


# ## 2. Implement Gradient Descent in Python
# - https://towardsdatascience.com/implement-gradient-descent-in-python-9b93ed7108d1

# In[103]:


f = lambda x: (x + 5)**2

from IPython.display import Image
Image(url= "https://cdn-images-1.medium.com/max/800/1*5-56UEwcZHgzqIAtlnsLog.png", width=300, height=300)


# - Step 1 : Initialize parameters

# In[108]:


f = lambda x: (x + 5)**2

cur_x = 3 # The algorithm starts at x=3

learning_rate = 0.1 # Learning rate

precision = 0.000001 #This tells us when to stop the algorithm

previous_step_size = 1 #

max_iters = 10000 # maximum number of iterations

iters = 0 #iteration counter

df = lambda x: 2 * (x + 5) #Gradient of our function 


# - Step 2 : Run a loop to perform gradient descent
#   - Stop loop when difference between x values from 2 consecutive iterations is less than 0.000001 or when number of iterations exceeds 10,000

# In[109]:


print("Iteration: {0:2d} - X is {1:8.5f}".format(0, cur_x))

while previous_step_size > precision and iters < max_iters:
    prev_x = cur_x #Store current x value in prev_x
    
    cur_x = cur_x - learning_rate * df(prev_x) #Grad descent

    previous_step_size = abs(cur_x - prev_x) #Change in x

    iters = iters + 1 #iteration count
    
    print("Iteration: {0:2d} - X value is {1:8.5f}".format(iters, cur_x)) #Print iterations
    
print("The local minimum occurs at {0:8.5f}".format(cur_x))


# ## 3. Implement 2D Gradient Descent in Python

# In[110]:


f = lambda x, y: x ** 2 + y ** 2 + 10

cur_x = 3 # The algorithm starts at x=3
cur_y = -3 # The algorithm starts at y=-3

learning_rate = 0.1 # Learning rate

precision = 0.000001 #This tells us when to stop the algorithm

previous_step_size = 1 #

max_iters = 10000 # maximum number of iterations

iters = 0 #iteration counter

df_x = lambda x, y: 2 * x #Gradient of our function for x
df_y = lambda x, y: 2 * y #Gradient of our function for y


# In[111]:


print("Iteration: {0:2d} - X is {1:8.5f}, Y is {2:8.5f},".format(0, cur_x, cur_y))

while previous_step_size > precision and iters < max_iters:
    prev_x = cur_x #Store current x value in prev_x
    prev_y = cur_y #Store current x value in prev_x    
    
    cur_x = cur_x - learning_rate * df_x(prev_x, prev_y) #Grad descent for x
    cur_y = cur_y - learning_rate * df_y(prev_x, prev_y) #Grad descent for x    

    previous_step_size = abs(cur_x - prev_x) + abs(cur_y - prev_y) #Change in x
    
    iters = iters + 1 #iteration count
    
    print("Iteration: {0:2d} - X is {1:8.5f}, Y is {2:8.5f},".format(iters, cur_x, cur_y)) #Print iterations
    
print("The local minimum occurs at {0:8.5f}, {1:8.5f}".format(cur_x, cur_y))


# In[ ]: