#!/usr/bin/env python
# coding: utf-8

# # Gradient descent

# In[1]:


import numpy as np


# In[2]:


# From cost_function.ipynb
def costFunction(X, y, theta):
    m = len(y)
    hypothesis = X @ theta
    err = hypothesis - y
    return ((1 / (2 * m)) * (np.transpose(err) @ err).item((0, 0)))

def gradientDescent(X, y, theta, alpha, num_iters):
    m = len(y)
    J_history = np.zeros((num_iters, 1))
    
    for iter in range(0, num_iters):
        hypothesis = X @ theta
        err = hypothesis - y
        theta = theta - alpha * (1 / m) * np.transpose(np.transpose(err) @ X)
        J_history[iter, 0] = costFunction(X, y, theta)
        
    return (theta, J_history)


# Let's run it against some data:

# In[3]:


actual_theta = np.array([
    [100],
    [40],
])

X = np.array([
    [1, 0.8],
    [1, 2.3],
    [1, 1.6],
])

y = X @ actual_theta

theta = np.array([
    [0],
    [0],
])

alpha = 0.5     # learning rate of 0.5
num_iters = 200 # 200 iterations

(theta, history) = gradientDescent(X, y, theta, alpha, num_iters)

print("Actual theta_0:", actual_theta.item(0, 0), " Gradient descent theta_0:", theta.item(0, 0))
print("Actual theta_1:", actual_theta.item(1, 0), "  Gradient descent theta_1:", theta.item(1, 0))


# Just to ensure that the error really is decreasing with each run, let's plot the cost history across runs.

# In[4]:


from matplotlib import pyplot as plt

fig = plt.figure()
x = np.linspace(0, num_iters - 1, num_iters)
plt.plot(x, history, 'r')
plt.show()