Agenda:
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.datasets import load_boston
boston_data = load_boston()
print(boston_data['DESCR'])
# take the boston data
data = boston_data['data']
# we will only work with two of the features: INDUS and RM
x_input = data[:, [2,5]]
y_target = boston_data['target']
# Individual plots for the two features:
plt.title('Industrialness vs Med House Price')
plt.scatter(x_input[:, 0], y_target)
plt.xlabel('Industrialness')
plt.ylabel('Med House Price')
plt.show()
plt.title('Avg Num Rooms vs Med House Price')
plt.scatter(x_input[:, 1], y_target)
plt.xlabel('Avg Num Rooms')
plt.ylabel('Med House Price')
plt.show()
def cost(w1, w2, b, X, t):
'''
Evaluate the cost function in a non-vectorized manner for
inputs `X` and targets `t`, at weights `w1`, `w2` and `b`.
'''
costs = 0
for i in range(len(t)):
y_i = w1 * X[i, 0] + w2 * X[i, 1] + b
t_i = t[i]
costs += 0.5 * (y_i - t_i) ** 2
return costs / len(t)
cost(3, 5, 20, x_input, y_target)
cost(3, 5, 0, x_input, y_target)
def cost_vectorized(w1, w2, b, X, t):
'''
Evaluate the cost function in a vectorized manner for
inputs `X` and targets `t`, at weights `w1`, `w2` and `b`.
'''
N = len(y_target)
w = np.array([w1, w2])
y = np.dot(X, w) + b * np.ones(N)
return np.sum((y - t)**2) / (2.0 * N)
cost_vectorized(3, 5, 20, x_input, y_target)
cost(3, 5, 0, x_input, y_target)
We'll see below that the vectorized code already runs ~2x faster than the non-vectorized code!
Hopefully this will convince you to always vectorized your code whenever possible
import time
t0 = time.time()
print cost(4, 5, 20, x_input, y_target)
t1 = time.time()
print t1 - t0
t0 = time.time()
print cost_vectorized(4, 5, 20, x_input, y_target)
t1 = time.time()
print t1 - t0
We'll plot the cost for two of our weights, assuming that bias = -22.89831573.
We'll see where that number comes from later.
Notice the shape of the contours are ovals.
w1s = np.arange(-1.0, 0.0, 0.01)
w2s = np.arange(6.0, 10.0, 0.1)
z_cost = []
for w2 in w2s:
z_cost.append([cost_vectorized(w1, w2, -22.89831573, x_input, y_target) for w1 in w1s])
z_cost = np.array(z_cost)
np.shape(z_cost)
W1, W2 = np.meshgrid(w1s, w2s)
CS = plt.contour(W1, W2, z_cost, 25)
plt.clabel(CS, inline=1, fontsize=10)
plt.title('Costs for various values of w1 and w2 for b=0')
plt.xlabel("w1")
plt.ylabel("w2")
plt.plot([-0.33471389], [7.82205511], 'o') # this will be the minima that we'll find later
plt.show()
Work this out on the board:
# add an extra feature (column in the input) that are just all ones
x_in = np.concatenate([x_input, np.ones([np.shape(x_input)[0], 1])], axis=1)
x_in
def solve_exactly(X, t):
'''
Solve linear regression exactly. (fully vectorized)
Given `X` - NxD matrix of inputs
`t` - target outputs
Returns the optimal weights as a D-dimensional vector
'''
N, D = np.shape(X)
A = np.matmul(X.T, X)
c = np.dot(X.T, t)
return np.matmul(np.linalg.inv(A), c)
solve_exactly(x_in, y_target)
# In real life we don't want to code it directly
np.linalg.lstsq(x_in, y_target)
# Vectorized gradient function
def gradfn(weights, X, t):
'''
Given `weights` - a current "Guess" of what our weights should be
`X` - matrix of shape (N,D) of input features
`t` - target y values
Return gradient of each weight evaluated at the current value
'''
N, D = np.shape(X)
y_pred = np.matmul(X, weights)
error = y_pred - t
return np.matmul(np.transpose(x_in), error) / float(N)
def solve_via_gradient_descent(X, t, print_every=5000,
niter=100000, alpha=0.005):
'''
Given `X` - matrix of shape (N,D) of input features
`t` - target y values
Solves for linear regression weights.
Return weights after `niter` iterations.
'''
N, D = np.shape(X)
# initialize all the weights to zeros
w = np.zeros([D])
for k in range(niter):
dw = gradfn(w, X, t)
w = w - alpha*dw
if k % print_every == 0:
print 'Weight after %d iteration: %s' % (k, str(w))
return w
solve_via_gradient_descent( X=x_in, t=y_target)
# For comparison, this was the exact result:
np.linalg.lstsq(x_in, y_target)