Gradient Descent from scratch¶

In [1]:

!pip3 install plotly

Requirement already satisfied: plotly in /usr/local/lib/python3.6/dist-packages (4.5.0)
Requirement already satisfied: retrying>=1.3.3 in /usr/local/lib/python3.6/dist-packages (from plotly) (1.3.3)
Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from plotly) (1.14.0)

In [2]:

from random import randint
from typing import List
from plotly import graph_objects as go

In [3]:

# The function we want to run Gradient Descent on
# See: https://en.wikipedia.org/wiki/Paraboloid or https://www.wolframalpha.com/input/?i=x%5E2+%2B+y%5E2
def paraboloid(x: float, y: float) -> float:
    return x ** 2 + y ** 2

In [4]:

# Test data generation (only really necessary for the plotting below)
xs_start = ys_start = -10
xs_stop = ys_stop = 11
xs_step = ys_step = 1

xs: List[float] = [i for i in range(xs_start, xs_stop, xs_step)]
ys: List[float] = [i for i in range(ys_start, ys_stop, ys_step)]
zs: List[List[float]] = []

for x in xs:
    temp_res: List[float] = []
    for y in ys:
        result: float = paraboloid(x, y)
        temp_res.append(result)
    zs.append(temp_res)

print(f'xs: {xs}\n')
print(f'ys: {ys}\n')
print(f'zs: {zs[:5]} ...\n')

xs: [-10, -9, -8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

ys: [-10, -9, -8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

zs: [[200, 181, 164, 149, 136, 125, 116, 109, 104, 101, 100, 101, 104, 109, 116, 125, 136, 149, 164, 181, 200], [181, 162, 145, 130, 117, 106, 97, 90, 85, 82, 81, 82, 85, 90, 97, 106, 117, 130, 145, 162, 181], [164, 145, 128, 113, 100, 89, 80, 73, 68, 65, 64, 65, 68, 73, 80, 89, 100, 113, 128, 145, 164], [149, 130, 113, 98, 85, 74, 65, 58, 53, 50, 49, 50, 53, 58, 65, 74, 85, 98, 113, 130, 149], [136, 117, 100, 85, 72, 61, 52, 45, 40, 37, 36, 37, 40, 45, 52, 61, 72, 85, 100, 117, 136]] ...

In [5]:

# Plotting the generated test data
fig = go.Figure(go.Surface(x=xs, y=ys, z=zs, colorscale='Viridis'))
fig.show()

In [6]:

# The Gradient is a vector pointing in the direction of greatest increase
# This function computes gradients for our Paraboloid function (defined above)
# See: https://www.wolframalpha.com/input/?i=gradient+of+x%5E2+%2B+y%5E2
def compute_gradient(vec: List[float]) -> List[float]:
    assert len(vec) == 2
    x: float = vec[0]
    y: float = vec[1]
    # The derivative of z with respect to x is 2 * x
    # The derivative of z with respect to y is 2 * y
    return [2 * x, 2 * y]

In [7]:

# This function computes the next position based on the current position, its computed gradient and the learning rate
def compute_step(curr_pos: List[float], learning_rate: float) -> List[float]:
    grad: List[float] = compute_gradient(curr_pos)
    grad[0] *= -learning_rate
    grad[1] *= -learning_rate
    next_pos: List[float] = [0, 0]
    next_pos[0] = curr_pos[0] + grad[0]
    next_pos[1] = curr_pos[1] + grad[1]
    return next_pos

In [8]:

# Pick a random starting position on the surface of our Paraboloid
start_pos: List[float]

# Ensure that we don't start at a minimum (0, 0 in our case)
while True:
    start_x: float = randint(xs_start, xs_stop)
    start_y: float = randint(ys_start, ys_stop)
    if start_x != 0 and start_y != 0:
        start_pos = [start_x, start_y]
        break

start_pos

Out[8]:

[4, 7]

In [9]:

epochs: int = 5000
learning_rate: float = 0.001
    
best_pos: List[float] = start_pos

for i in range(0, epochs):
    next_pos: List[float] = compute_step(best_pos, learning_rate)
    # Print some debug information every once in a while 
    if i % 500 == 0:
        print(f'Epoch {i}: {next_pos}')
    best_pos = next_pos    

print(f'Best guess for a minimum: {best_pos}')

Epoch 0: [3.992, 6.986]
Epoch 500: [1.4671049293897798, 2.5674336264321123]
Epoch 1000: [0.539177573607161, 0.9435607538125328]
Epoch 1500: [0.19815382666720605, 0.34676919666761064]
Epoch 2000: [0.07282376149321286, 0.12744158261312272]
Epoch 2500: [0.026763551969789107, 0.04683621594713106]
Epoch 3000: [0.009835906568851993, 0.017212836495491053]
Epoch 3500: [0.003614806365776567, 0.006325911140109013]
Epoch 4000: [0.0013284820235521919, 0.0023248435412163435]
Epoch 4500: [0.00048823209553084355, 0.00085440616717898]
Best guess for a minimum: [0.00017979037083174428, 0.00031463314895555317]