#!/usr/bin/env python
# coding: utf-8

# In[1]:


import sklearn
import numpy as np
import matplotlib.pyplot as plt

print(sklearn.__version__, np.__version__)


# In[2]:


from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_california_housing


# In[3]:


housing = fetch_california_housing()
print(housing.data.shape, housing.target.shape)


# In[4]:


X_train, X_test, y_train, y_test = train_test_split(housing.data[:, 0:1], housing.target, random_state=42)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)


# normal equation

# In[5]:


x0 = np.ones((X_train.shape[0],1))
X = np.hstack((x0, X_train))
print(X.shape)


# In[6]:


w = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y_train)
print(w)


# In[7]:


plt.scatter(X_train, y_train)
plt.plot([0, 10], [w[0], 10*w[1]+w[0]], 'r')
plt.xlabel('x')
plt.ylabel('y')
plt.show()


# basic regression

# In[8]:


lr = LinearRegression()
lr.fit(X_train, y_train)
lr.score(X_test, y_test)


# In[9]:


y_pred = lr.predict(X_test)
r2 = 1 - ((y_test - y_pred)**2).sum() / ((y_test - y_test.mean())**2).sum()
print(r2)


# In[10]:


print(lr.coef_, lr.intercept_)


# In[11]:


plt.scatter(X_train, y_train)
plt.plot([0, 10], [lr.intercept_, 10 * lr.coef_ + lr.intercept_], 'r')
plt.xlabel('x')
plt.ylabel('y')
plt.show()


# not fit intercept

# In[12]:


lr_no_intercept = LinearRegression(fit_intercept=False)
lr_no_intercept.fit(X_train, y_train)
print(lr_no_intercept.coef_, lr_no_intercept.intercept_)


# In[13]:


plt.scatter(X_train, y_train)
plt.plot([0, 10], [lr_no_intercept.intercept_, 10 * lr_no_intercept.coef_ + lr_no_intercept.intercept_], 'r')
plt.xlabel('x')
plt.ylabel('y')
plt.show()