#!/usr/bin/env python # coding: utf-8 # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') import numpy as np import modin.pandas as pd import matplotlib.pyplot as plt import sklearn # In[2]: data = pd.read_csv("data/boston_housing.csv") data.head() # In[3]: features = data.drop("PRICE", axis=1) labels = data["PRICE"] type(features) # In[4]: from sklearn.linear_model import LinearRegression lm = LinearRegression() lm.fit(features, labels) # In[5]: plt.scatter(data["RM"], labels) plt.xlabel("Average number of rooms per dwelling") plt.ylabel("Housing Price") plt.title("Relationship between Rooms and Price") plt.show() # In[6]: predicted_prices = lm.predict(features) # In[7]: plt.scatter(labels, predicted_prices) plt.xlabel("Prices") plt.ylabel("Predicted Prices") plt.title("Prices versus Predicted Prices") plt.show() # In[8]: training_error = \ (labels - predicted_prices).apply(lambda x: x ** 2).mean() training_error # In[9]: # Citation: http://bigdata-madesimple.com/how-to-run-linear-regression-in-python-scikit-learn/