#!/usr/bin/env python # coding: utf-8 # # Gradient descent # In[1]: import numpy as np # In[2]: # From cost_function.ipynb def costFunction(X, y, theta): m = len(y) hypothesis = X @ theta err = hypothesis - y return ((1 / (2 * m)) * (np.transpose(err) @ err).item((0, 0))) def gradientDescent(X, y, theta, alpha, num_iters): m = len(y) J_history = np.zeros((num_iters, 1)) for iter in range(0, num_iters): hypothesis = X @ theta err = hypothesis - y theta = theta - alpha * (1 / m) * np.transpose(np.transpose(err) @ X) J_history[iter, 0] = costFunction(X, y, theta) return (theta, J_history) # Let's run it against some data: # In[3]: actual_theta = np.array([ [100], [40], ]) X = np.array([ [1, 0.8], [1, 2.3], [1, 1.6], ]) y = X @ actual_theta theta = np.array([ [0], [0], ]) alpha = 0.5 # learning rate of 0.5 num_iters = 200 # 200 iterations (theta, history) = gradientDescent(X, y, theta, alpha, num_iters) print("Actual theta_0:", actual_theta.item(0, 0), " Gradient descent theta_0:", theta.item(0, 0)) print("Actual theta_1:", actual_theta.item(1, 0), " Gradient descent theta_1:", theta.item(1, 0)) # Just to ensure that the error really is decreasing with each run, let's plot the cost history across runs. # In[4]: from matplotlib import pyplot as plt fig = plt.figure() x = np.linspace(0, num_iters - 1, num_iters) plt.plot(x, history, 'r') plt.show()