#!/usr/bin/env python
# coding: utf-8

# ## Stochastic Gradient Descent for Linear Regression
# 
# As described in [these lecture notes](http://cs229.stanford.edu/notes/cs229-notes1.pdf).
# 
# 1. First, we generate a dataset from the linear function `f(x) = -x + 4`. We apply some noise.
# 2. We plot the quadratic loss for the function `f(x) = ax + b` for `a` and `b` in the [-10, 10) range.
# 3. We run stochastic gradient descent, using the formulal described in the paper linked above.
# 
# Enjoy!

# In[1]:


import random
import math
import matplotlib.pyplot as plt

random.seed(100)

N = 100
xs = []
ys = []

a = -1
b = 4

for i in range(0, N):
  x = i
  y = a * x + b
  
  scale = 40
  x += scale * (0.5 - random.random())
  y += scale * (0.5 - random.random())
  
  xs.append(x)
  ys.append(y)
  
plt.scatter(xs, ys)
plt.title('Dataset')
plt.show();
  

# In[2]:


from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import numpy as np

def calc_error(a, b):
  error = 0
  for i in range(0, N):
    predicted = a * xs[i] + b
    error += (predicted - ys[i]) ** 2
  return error

px = []
py = []
pz = []

for a in range(-10, 10):
  for b in range(-10, 10):
    px.append(a)
    py.append(b)
    pz.append(calc_error(a, b))

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

n = colors.Normalize(vmin=-1., vmax=1.)
color = (229, 223, 52)
ax.plot_trisurf(px, py, pz, linewidth=0.2, color='yellow', shade=False)

ax.set_xlabel('A')
ax.set_ylabel('B')
ax.set_zlabel('Error')
plt.title('Quadratic Loss')

plt.show();


# In[3]:


ta = 10
tb = 10

alpha = 0.00001

for j in range(0, 2): 
  for i in range(0, N):
    hy = ta * xs[i] + tb
    ta = ta + alpha * (ys[i] - hy) * xs[i]
    tb = tb + alpha * (ys[i] - hy) * 1.0
  
x0 = xs[0] - 20
x1 = xs[-1] + 20
y0 = ta * x0 + tb
y1 = ta * x1 + tb

plt.plot([x0, x1], [y0, y1])
  
plt.scatter(xs, ys)
plt.title('Dataset and fitted line')
plt.show();