## Machine Learning Online Class - Exercise 2: Logistic Regression¶

In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
## ==================== Part 1: Plotting ====================
## We start the exercise by first plotting the data to understand the
#  the problem we are working with.
data = np.loadtxt('ex2data1.txt', delimiter=',')
X = data[:, 0:2]
y = data[:, 2]
def plot_data(X,y):
pos, neg = y==0, y==1
plt.scatter(X[pos,0], X[pos, 1], marker='+', color='red', label='admitted')
plt.scatter(X[neg,0], X[neg, 1], marker='o', color='blue', label='rejected')
plt.xlabel('exam1 score')
plt.ylabel('exam2 score')
plt.legend()
plot_data(X,y)

In [4]:
## ============ Part 2: Compute Cost and Gradient ============
#  In this part of the exercise, you will implement the cost and gradient
#  for logistic regression. You neeed to complete the code in
#  costFunction.m

# Setup the data matrix appropriately, and add ones for the intercept term
X = data[:, 0:2]
y = data[:, 2]
(m, n) = np.shape(X)
# Add intercept term to x and X_test
X = np.concatenate((np.ones((m, 1)), X), axis=1)
# Initialize fitting parameters
initial_theta = np.zeros(n+1)

# Compute and display initial cost and gradient
from scipy.special import expit
def sigmoid(z):
return expit(z)
#return 1./(1.+np.exp(-z))

def costFunction(theta, X, y):
""" Logistic Regression cost function and Gradient.
Denoting g(z) = 1/(1+exp(-z))
h = g(X theta) (m,)
In:
theta (n+1,) float array
X (m, n+1) float array
y (m, ) binary array
Out:
cost (float): 1/m (-y^T log(h) - (1-y)^T log(1-h))
grad (n+1, ): 1/m X^T(g(X theta)-y)
"""
m = X.shape[0]
h = sigmoid(np.dot(X, theta))
cost = 1./float(m) * (-np.dot(y, np.log(h)) - np.dot((1.-y), np.log(1.-h)))
grad = 1./float(m) * np.dot(X.T, h-y)
return cost, grad
(cost, grad) = costFunction(initial_theta, X, y)

print 'Cost at initial theta (zeros): %f\n'%(cost)
print 'Gradient at initial theta (zeros):'
print grad

Cost at initial theta (zeros): 0.693147

Gradient at initial theta (zeros):
[ -0.1        -12.00921659 -11.26284221]

In [5]:
## ============= Part 3: Optimizing using fminunc  =============
#  In this exercise, you will use a built-in function (fminunc) to find the
#  optimal parameters theta.
from scipy.optimize import minimize

options = {'maxiter':400, 'disp':True}
res = minimize(costFunction, initial_theta, args=(X,y),
method='BFGS', jac=True, options=options)
theta_opt = res['x']
print "theta_opt="
print theta_opt
# Plot decision boundary
def plot_decision_bd(theta, X, y):
x1min, x1max = X[:,1].min(), X[:,1].max()
x2min, x2max = X[:,2].min(), X[:,2].max()
yofx = lambda x1, theta : -1./theta[2] * (theta[0]+theta[1]*x1)
pt1, pt2 = [x1min, yofx(x1min, theta)], [x1max, yofx(x1max, theta)]
plt.plot(pt1, pt2, label='decison_bd')
pos, neg = y==0, y==1
plt.scatter(X[pos,1], X[pos, 2], marker='+', color='red', label='admitted')
plt.scatter(X[neg,1], X[neg, 2], marker='o', color='blue', label='rejected')
plt.xlabel('exam1 score')
plt.ylabel('exam2 score')
#plt.legend()
plot_decision_bd(theta_opt, X, y)

Optimization terminated successfully.
Current function value: 0.203498
Iterations: 23
Function evaluations: 31
Gradient evaluations: 31
theta_opt=
[-25.16133284   0.2062317    0.2014716 ]

/home/blumenta/anaconda/envs/tensorflow/lib/python2.7/site-packages/ipykernel/__main__.py:35: RuntimeWarning: divide by zero encountered in log

In [6]:
## ============== Part 4: Predict and Accuracies ==============
#  After learning the parameters, you'll like to use it to predict the outcomes
#  on unseen data. In this part, you will use the logistic regression model
#  to predict the probability that a student with score 45 on exam 1 and
#  score 85 on exam 2 will be admitted.
#
#  Furthermore, you will compute the training and test set accuracies of
#  our model.
#
#  Your task is to complete the code in predict.m

#  Predict probability for a student with score 45 on exam 1
#  and score 85 on exam 2
def proba_admission(theta_opt, x):
return sigmoid(np.dot(x,theta_opt))
def predict(theta, X):
return np.double(proba_admission(theta, X)>= 0.5)

xtest = np.array([1., 45., 85.]) #test student score
print "Probability of admission : %f"%(proba_admission(theta_opt, xtest))
p = predict(theta_opt, X)
accuracy = np.double(p==y).mean() * 100
print "Training accuracy: %f percent"%accuracy

Probability of admission : 0.776291
Training accuracy: 89.000000 percent


## Exercise 2: Logistic regression with regularization¶

In [8]:
data = np.loadtxt('ex2data2.txt', delimiter=',')
x1, x2, y = data[:,0], data[:,1], data[:,2]
def plot_data2(x1, x2, y):
pos, neg = y==0, y==1
fig = plt.figure()
plt.scatter(x1[pos], x2[pos], marker='+', color='red')
plt.scatter(x1[neg], x2[neg], marker='o', color='blue')
plt.xlabel('Microchip Test 1')
plt.ylabel('Microchip Test 2')
return fig
fig = plot_data2(x1,x2,y)


Clearly, the data is not linearly separable so we need to use feature mapping to find a non linear decision boundary.

In [9]:
def map_feature(x1, x2, degree=6):
""" Maps linear to non linear features
In:
x1 (m,) dim array
x2 (m,) dim array
Out:
out (m, ncol) dim array  = (1,x1,x1^2,.., x1^6,x1x2,..,x2^6)
"""
ncol = (degree+1)*(degree+2)/2
out = np.ones((x1.shape[0], ncol))
k=0
for j in range(0, degree+1):
for i in range(0,degree+1-j):
out[:,k] = x2**j * x1**i
k+=1
return out

#Create data matrix
X = map_feature(data[:,0], data[:,1], degree=6)
#Define initial theta:
theta0 = np.zeros(X.shape[1])
#Define regularization parameter
lam = 1.

#Define regularized cost function
def cost_function_reg(theta, X, y, lam):
""" Regularized Logistic Regression cost function and Gradient.
Denoting g(z) = 1/(1+exp(-z))
h = g(X theta) (m,1)
t = theta[1:n+1]
In:
theta (n+1,) float array
X (m, n+1) float array
y (m,) binary array
lam : scalar, regularization parameter
Out:
cost (float): 1/m (-y^T log(h) - (1-y)^T log(1-h)) +lam/2m * t^T t
grad (n+1, 1): 1/m X^T(g(X theta)-y)
"""
cost, grad = costFunction(theta, X, y)
t = theta[1:]
cost += lam/(2*m) * np.dot(t,t)
grad[1:] += lam/m * t
return cost, grad

(cost, grad) = cost_function_reg(theta0, X, y, lam)
print "cost at initial theta: %f"%cost

def learn_reg_logistic(X,y,lam,theta0):
options = {'maxiter':800, 'disp':True}
res = minimize(cost_function_reg, theta0, args=(X,y,lam),
method='BFGS', jac=True, options=options)
return res['x']

#Plotting the decision boundaries for the different regularization parameters
def f(xx1, xx2, theta, degree=6):
ncol = (degree+1)*(degree+2)/2
out = np.zeros_like(xx1)
k=0
for j in range(0, degree+1):
for i in range(0,degree+1-j):
out += theta[k] * xx2**j * xx1**i
k+=1
return out
def plot_nl_decision_bd(theta, lam, x1, x2, y, acc):
x1min, x1max = x1.min(), x1.max()
x2min, x2max = x2.min(), x2.max()
nn = 256
xs, ys = np.linspace(x1min, x1max, nn), np.linspace(x2min, x2max, nn)
xx, yy = np.meshgrid(xs, ys)
zz = f(xx,yy,theta)
fig = plot_data2(x1, x2, y)
C = plt.contour(xx, yy, zz, [0.], linewidth=0.5, colors='black')
plt.title('Lam = %.1f, accuracy = %.1f'%(lam,acc))

##############################################
# Perform Regularized logistic Regression for
# different values of lambda
##############################################
reg_vals = [0., 1., 10., 100.]
for lam in reg_vals:
theta_opt = learn_reg_logistic(X,y,lam, theta0)
p = predict(theta_opt, X)
accuracy = np.double(p==y).mean() * 100
plot_nl_decision_bd(theta_opt, lam, x1, x2, y, accuracy)

cost at initial theta: 0.693147
Optimization terminated successfully.
Current function value: 0.224569
Iterations: 546
Function evaluations: 547
Gradient evaluations: 547
Optimization terminated successfully.
Current function value: 0.539956
Iterations: 45
Function evaluations: 46
Gradient evaluations: 46
Optimization terminated successfully.
Current function value: 0.653251
Iterations: 19
Function evaluations: 20
Gradient evaluations: 20
Optimization terminated successfully.
Current function value: 0.687424
Iterations: 6
Function evaluations: 7
Gradient evaluations: 7