%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sklearn
data1 = pd.read_csv('ex2data1.txt', header=None, names=['Exam1', 'Exam2', 'Admitted'])
data1.head()
plt.figure(figsize=(8, 6))
plt.xlabel('Exam 1')
plt.ylabel('Exam 2')
plt.plot(data1[data1['Admitted'] == 0]['Exam1'], data1[data1['Admitted'] == 0]['Exam2'], 'yo')
plt.plot(data1[data1['Admitted'] == 1]['Exam1'], data1[data1['Admitted'] == 1]['Exam2'], 'b+')
plt.show()
from sklearn import linear_model
clf = linear_model.LogisticRegression(penalty='l1', C = 1)
clf.fit(data1[['Exam1', 'Exam2']].values, data1['Admitted'].values)
print clf.score(data1[['Exam1', 'Exam2']].values, data1['Admitted'].values)
print clf.coef_
print clf.intercept_
xx = np.linspace(30, 100, 100)
yy = -(clf.coef_[:, 0] * xx + clf.intercept_) / clf.coef_[:, 1]
plt.figure(figsize=(8, 6))
plt.xlabel('Exam 1')
plt.ylabel('Exam 2')
plt.plot(data1[data1['Admitted'] == 0]['Exam1'], data1[data1['Admitted'] == 0]['Exam2'], 'yo')
plt.plot(data1[data1['Admitted'] == 1]['Exam1'], data1[data1['Admitted'] == 1]['Exam2'], 'b+')
plt.plot(xx, yy, color='r', label='decision boundary');
plt.show()
data2 = pd.read_csv('ex2data2.txt', header=None, names=['Test1', 'Test2', 'y'])
data2.head()
plt.figure(figsize=(6, 6))
plt.xlabel('Microchip Test 1')
plt.ylabel('Microchip Test 2')
plt.plot(data2[data2['y'] == 0]['Test1'], data2[data2['y'] == 0]['Test2'], 'yo')
plt.plot(data2[data2['y'] == 1]['Test1'], data2[data2['y'] == 1]['Test2'], 'b+')
plt.show()
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(degree=6)
trans = poly.fit_transform(data2[['Test1', 'Test2']])
clf2 = linear_model.LogisticRegression(penalty='l1', C = 1)
clf2.fit(trans, data2['y'].values)
print clf2.score(trans, data2['y'].values)
print clf2.coef_
print clf2.intercept_
plt.figure(figsize=(6, 6))
plt.xlabel('Microchip Test 1')
plt.ylabel('Microchip Test 2')
plt.plot(data2[data2['y'] == 0]['Test1'], data2[data2['y'] == 0]['Test2'], 'yo')
plt.plot(data2[data2['y'] == 1]['Test1'], data2[data2['y'] == 1]['Test2'], 'b+')
dim = np.linspace(-1, 1.5, 1000)
xx, yy = np.meshgrid(dim, dim)
# Plot the decision boundary. For that, we will assign a color to each
# point in the mesh [x_min, m_max]x[y_min, y_max].
Z = clf2.predict(poly.transform(np.c_[xx.ravel(), yy.ravel()]))
# Put the result into a color plot
Z = Z.reshape(xx.shape)
plt.contour(xx, yy, Z, cmap=plt.cm.Paired)
plt.show()
clf2_over = linear_model.LogisticRegression(penalty='l1', C = 1000)
clf2_over.fit(trans, data2['y'].values)
plt.figure(figsize=(6, 6))
plt.xlabel('Microchip Test 1')
plt.ylabel('Microchip Test 2')
plt.plot(data2[data2['y'] == 0]['Test1'], data2[data2['y'] == 0]['Test2'], 'yo')
plt.plot(data2[data2['y'] == 1]['Test1'], data2[data2['y'] == 1]['Test2'], 'b+')
dim = np.linspace(-1, 1.5, 1000)
xx, yy = np.meshgrid(dim, dim)
# Plot the decision boundary. For that, we will assign a color to each
# point in the mesh [x_min, m_max]x[y_min, y_max].
Z = clf2_over.predict(poly.transform(np.c_[xx.ravel(), yy.ravel()]))
# Put the result into a color plot
Z = Z.reshape(xx.shape)
plt.contour(xx, yy, Z, cmap=plt.cm.Paired)
plt.show()
clf2_under = linear_model.LogisticRegression(penalty='l2', C = 0.01)
clf2_under.fit(trans, data2['y'].values)
plt.figure(figsize=(6, 6))
plt.xlabel('Microchip Test 1')
plt.ylabel('Microchip Test 2')
plt.plot(data2[data2['y'] == 0]['Test1'], data2[data2['y'] == 0]['Test2'], 'yo')
plt.plot(data2[data2['y'] == 1]['Test1'], data2[data2['y'] == 1]['Test2'], 'b+')
dim = np.linspace(-1, 1.5, 1000)
xx, yy = np.meshgrid(dim, dim)
# Plot the decision boundary. For that, we will assign a color to each
# point in the mesh [x_min, m_max]x[y_min, y_max].
Z = clf2_under.predict(poly.transform(np.c_[xx.ravel(), yy.ravel()]))
# Put the result into a color plot
Z = Z.reshape(xx.shape)
plt.contour(xx, yy, Z, cmap=plt.cm.Paired)
plt.show()