#!/usr/bin/env python
# coding: utf-8

# In[1]:


get_ipython().run_line_magic('matplotlib', 'inline')
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sklearn


# # Linear regression with one variable

# In[2]:


data1 = pd.read_csv('ex1data1.txt', header=None, names=['x', 'y'])


# In[3]:


data1.head()


# In[4]:


plt.figure(figsize=(8, 6))
plt.plot(data1['x'], data1['y'], 'rx')
plt.show()


# In[5]:


from sklearn import linear_model


# In[6]:


clf = linear_model.LinearRegression()
clf.fit(data1[['x']].values, data1['y'].values)

print clf.coef_
print clf.intercept_


# In[7]:


plt.figure(figsize=(8, 6))

plt.plot(data1['x'], data1['y'], 'rx')
plt.plot(data1['x'], clf.predict(data1[['x']]))

plt.show()


# #Linear regression with multiple variables

# In[8]:


data2 = pd.read_csv('ex1data2.txt', header=None, names=['size', 'number of bedrooms', 'price'], dtype=float)


# In[9]:


data2.head()


# In[10]:


from sklearn import preprocessing


# In[11]:


scaler = preprocessing.StandardScaler().fit(data2[['size', 'number of bedrooms']].values)
features = scaler.transform(data2[['size', 'number of bedrooms']].values)
print scaler.mean_, scaler.std_ 


# In[12]:


clf2 = linear_model.LinearRegression()
clf2.fit(features, data2['price'].values)

print clf2.coef_
print clf2.intercept_


# In[ ]: