#!/usr/bin/env python # coding: utf-8 # In[1]: import sklearn import numpy as np import matplotlib.pyplot as plt print(sklearn.__version__, np.__version__) # In[2]: from sklearn.linear_model import LinearRegression from sklearn.model_selection import train_test_split from sklearn.datasets import fetch_california_housing # In[3]: housing = fetch_california_housing() print(housing.data.shape, housing.target.shape) # In[4]: X_train, X_test, y_train, y_test = train_test_split(housing.data[:, 0:1], housing.target, random_state=42) print(X_train.shape, X_test.shape, y_train.shape, y_test.shape) # normal equation # In[5]: x0 = np.ones((X_train.shape[0],1)) X = np.hstack((x0, X_train)) print(X.shape) # In[6]: w = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y_train) print(w) # In[7]: plt.scatter(X_train, y_train) plt.plot([0, 10], [w[0], 10*w[1]+w[0]], 'r') plt.xlabel('x') plt.ylabel('y') plt.show() # basic regression # In[8]: lr = LinearRegression() lr.fit(X_train, y_train) lr.score(X_test, y_test) # In[9]: y_pred = lr.predict(X_test) r2 = 1 - ((y_test - y_pred)**2).sum() / ((y_test - y_test.mean())**2).sum() print(r2) # In[10]: print(lr.coef_, lr.intercept_) # In[11]: plt.scatter(X_train, y_train) plt.plot([0, 10], [lr.intercept_, 10 * lr.coef_ + lr.intercept_], 'r') plt.xlabel('x') plt.ylabel('y') plt.show() # not fit intercept # In[12]: lr_no_intercept = LinearRegression(fit_intercept=False) lr_no_intercept.fit(X_train, y_train) print(lr_no_intercept.coef_, lr_no_intercept.intercept_) # In[13]: plt.scatter(X_train, y_train) plt.plot([0, 10], [lr_no_intercept.intercept_, 10 * lr_no_intercept.coef_ + lr_no_intercept.intercept_], 'r') plt.xlabel('x') plt.ylabel('y') plt.show()