#!/usr/bin/env python # coding: utf-8 # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') import numpy as np import matplotlib.pyplot as plt import pandas as pd import sklearn # #Visualizing the dataset # In[2]: import scipy.io # In[3]: data = scipy.io.loadmat('ex5data1.mat') # In[4]: plt.figure(figsize=(8, 6)) plt.xlabel('Change in water level(x)') plt.ylabel('Water flowing out of the dam(y)') plt.scatter(data['X'], data['y'], c='r', marker='x') plt.show() # #Learning curve for linear regression # In[5]: from sklearn.learning_curve import learning_curve # In[6]: def plot_learning_curve(estimator, title, X, y, Xval, yval, xlim=None, ylim=None): iterations = range(2, len(X)) train_error = [] val_error = [] for i in iterations: estimator.fit(X[:i], y[:i]) train_error.append(100* (1 - estimator.score(X[:i], y[:i]))) val_error.append(100 * (1- estimator.score(Xval, yval))) plt.figure(figsize=(8, 6)) plt.title(title) plt.plot(iterations, train_error, label="Train") plt.plot(iterations, val_error, label="Cross Validation") plt.ylabel('Error (%)') plt.xlabel('Number of training examples') plt.xlim(xlim) plt.ylim(ylim) plt.legend() return plt # In[7]: from sklearn.linear_model import LinearRegression from sklearn.linear_model import Ridge # In[8]: clf = LinearRegression() clf.fit(data['X'], data['y']) print clf.coef_ print clf.intercept_ # In[9]: plt.figure(figsize=(8, 6)) plt.xlabel('Change in water level(x)') plt.ylabel('Water flowing out of the dam(y)') plt.scatter(data['X'], data['y'], c='r', marker='x') plt.plot(data['X'], clf.predict(data['X'])) plt.show() # In[10]: plot_learning_curve(clf, 'Learning curve for linear regression', data['X'], data['y'], data['Xval'], data['yval']).show() # #Polynomial Regression Learning Curve # ##$\lambda = 0$ # In[11]: from sklearn.preprocessing import PolynomialFeatures from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler clf2 = Pipeline([('poly', PolynomialFeatures(degree=6)), ('zscore', StandardScaler()), ('linear', Ridge(alpha=0.0))]) clf2.fit(data['X'], data['y']) # In[12]: plt.figure(figsize=(8, 6)) plt.xlabel('Change in water level(x)') plt.ylabel('Water flowing out of the dam(y)') plt.scatter(data['X'], data['y'], c='r', marker='x') xx = np.linspace(-60, 50, 100)[:, np.newaxis] plt.plot(xx, clf2.predict(xx)) plt.show() # In[13]: plot_learning_curve(clf2, 'Learning curve for polynomial regression', data['X'], data['y'], data['Xval'], data['yval']).show() # ##$\lambda = 1$ # In[14]: clf3 = Pipeline([('poly', PolynomialFeatures(degree=6)), ('zscore', StandardScaler()), ('linear', Ridge(alpha=1))]) clf3.fit(data['X'], data['y']) print clf3.named_steps['linear'].coef_ # In[15]: plt.figure(figsize=(8, 6)) plt.xlabel('Change in water level(x)') plt.ylabel('Water flowing out of the dam(y)') plt.scatter(data['X'], data['y'], c='r', marker='x') xx = np.linspace(-60, 50, 100)[:, np.newaxis] plt.plot(xx, clf3.predict(xx)) plt.show() # In[16]: plot_learning_curve(clf3, 'Learning curve for polynomial regression', data['X'], data['y'], data['Xval'], data['yval']).show() # ##$\lambda = 100$ # In[17]: clf4 = Pipeline([('poly', PolynomialFeatures(degree=6)), ('zscore', StandardScaler()), ('linear', Ridge(alpha=100))]) clf4.fit(data['X'], data['y']) print clf4.named_steps['linear'].coef_ # In[18]: plt.figure(figsize=(8, 6)) plt.xlabel('Change in water level(x)') plt.ylabel('Water flowing out of the dam(y)') plt.scatter(data['X'], data['y'], c='r', marker='x') xx = np.linspace(-60, 50, 100)[:, np.newaxis] plt.plot(xx, clf4.predict(xx)) plt.show() # In[19]: plot_learning_curve(clf4, 'Learning curve for polynomial regression', data['X'], data['y'], data['Xval'], data['yval']).show() # In[ ]: