#!/usr/bin/env python # coding: utf-8 # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') import numpy as np import matplotlib.pyplot as plt import pandas as pd import sklearn # # Linear regression with one variable # In[2]: data1 = pd.read_csv('ex1data1.txt', header=None, names=['x', 'y']) # In[3]: data1.head() # In[4]: plt.figure(figsize=(8, 6)) plt.plot(data1['x'], data1['y'], 'rx') plt.show() # In[5]: from sklearn import linear_model # In[6]: clf = linear_model.LinearRegression() clf.fit(data1[['x']].values, data1['y'].values) print clf.coef_ print clf.intercept_ # In[7]: plt.figure(figsize=(8, 6)) plt.plot(data1['x'], data1['y'], 'rx') plt.plot(data1['x'], clf.predict(data1[['x']])) plt.show() # #Linear regression with multiple variables # In[8]: data2 = pd.read_csv('ex1data2.txt', header=None, names=['size', 'number of bedrooms', 'price'], dtype=float) # In[9]: data2.head() # In[10]: from sklearn import preprocessing # In[11]: scaler = preprocessing.StandardScaler().fit(data2[['size', 'number of bedrooms']].values) features = scaler.transform(data2[['size', 'number of bedrooms']].values) print scaler.mean_, scaler.std_ # In[12]: clf2 = linear_model.LinearRegression() clf2.fit(features, data2['price'].values) print clf2.coef_ print clf2.intercept_ # In[ ]: