import pandas as pd
mtcars=pd.read_csv('/home/anshul/data-sets/mtcars.csv')
names(mtcars)
map(lambda x: x.encode("utf-8"),mtcars.columns)
['Unnamed: 0', 'mpg', 'cyl', 'disp', 'hp', 'drat', 'wt', 'qsec', 'vs', 'am', 'gear', 'carb']
from sklearn import linear_model
import numpy as np
X = [[x] for x in mtcars["wt"].values]
y=mtcars["mpg"]
lm=linear_model.LinearRegression()
lm.fit(X,y)
# The coefficients
print('Coefficients: \n', lm.coef_)
# The mean square error
('Coefficients: \n', array([-5.34447157]))
attach(mtcars)
bmodel=lm(mpg~wt)
bmodel
Call: lm(formula = mpg ~ wt) Coefficients: (Intercept) wt 37.285 -5.344
print("Residual sum of squares: %.2f"
% np.mean((lm.predict(X) - y) ** 2))
# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % lm.score(X, y))
Residual sum of squares: 8.70 Variance score: 0.75
summary(bmodel)
Call: lm(formula = mpg ~ wt) Residuals: Min 1Q Median 3Q Max -4.5432 -2.3647 -0.1252 1.4096 6.8727 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 37.2851 1.8776 19.858 < 2e-16 *** wt -5.3445 0.5591 -9.559 1.29e-10 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 3.046 on 30 degrees of freedom Multiple R-squared: 0.7528, Adjusted R-squared: 0.7446 F-statistic: 91.38 on 1 and 30 DF, p-value: 1.294e-10
mtcars$new=predict(bmodel,mtcars)
d=mtcars[,c("mpg","new")]
plot(d)
import matplotlib.pyplot as plt
plt.scatter(y,lm.predict(X), color='black')
plt.show()
from IPython.display import Image
Image(filename='/home/anshul/lr.png')
bmodel=lm(mpg~wt+cyl+gear+wt+hp)
bmodel
Call: lm(formula = mpg ~ wt + cyl + gear + wt + hp) Coefficients: (Intercept) wt cyl gear hp 36.6895 -3.0226 -0.8126 0.3626 -0.0217
X = [x for x in mtcars.loc[:,["wt","cyl","gear","wt","hp"]].values]
y=mtcars["mpg"]
lm.fit(X,y)
# The coefficients
print('Coefficients: \n', lm.coef_)
('Coefficients: \n', array([-1.51131728, -0.81259895, 0.36259005, -1.51131728, -0.02170211]))
summary(bmodel)
Call: lm(formula = mpg ~ wt + cyl + gear + wt + hp) Residuals: Min 1Q Median 3Q Max -3.4710 -1.7876 -0.6517 1.2362 5.9677 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 36.68953 5.97025 6.145 1.44e-06 *** wt -3.02263 0.85116 -3.551 0.00143 ** cyl -0.81260 0.66320 -1.225 0.23106 gear 0.36259 1.00000 0.363 0.71974 hp -0.02170 0.01574 -1.379 0.17922 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 2.551 on 27 degrees of freedom Multiple R-squared: 0.8439, Adjusted R-squared: 0.8208 F-statistic: 36.49 on 4 and 27 DF, p-value: 1.599e-10
print("Residual sum of squares: %.2f"
% np.mean((lm.predict(X) - y) ** 2))
# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % lm.score(X, y))
Residual sum of squares: 5.49 Variance score: 0.84
mtcars$new=predict(bmodel,mtcars)
d=mtcars[,c("mpg","new")]
plot(d)
plt.scatter(y,lm.predict(X), color='black')
plt.show()
Image(filename='/home/anshul/lr2.png')
bmodel=lm(mpg~wt+cyl+gear+wt+hp+carb+disp+drat+vs+qsec)
bmodel
Call: lm(formula = mpg ~ wt + cyl + gear + wt + hp + carb + disp + drat + vs + qsec) Coefficients: (Intercept) wt cyl gear hp carb 17.59704 -3.93430 -0.44543 1.22953 -0.02022 -0.26242 disp drat vs qsec 0.01275 1.10054 -0.22311 0.57571
X = [x for x in mtcars.loc[:,["wt","cyl","gear","wt","hp","carb","disp","drat","vs","qsec"]].values]
y=mtcars["mpg"]
lm.fit(X,y)
# The coefficients
print('Coefficients: \n', lm.coef_)
('Coefficients: \n', array([-1.96715177, -0.4454288 , 1.22952745, -1.96715177, -0.0202156 , -0.26242241, 0.01274525, 1.10054141, -0.22310953, 0.57571041]))
summary(bmodel)
Call: lm(formula = mpg ~ wt + cyl + gear + wt + hp + carb + disp + drat + vs + qsec) Residuals: Min 1Q Median 3Q Max -2.9886 -1.6738 -0.3834 0.9796 5.4395 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 17.59704 18.41902 0.955 0.3498 wt -3.93430 1.90734 -2.063 0.0511 . cyl -0.44543 1.02029 -0.437 0.6667 gear 1.22953 1.43393 0.857 0.4004 hp -0.02022 0.02199 -0.919 0.3679 carb -0.26242 0.83653 -0.314 0.7567 disp 0.01275 0.01805 0.706 0.4876 drat 1.10054 1.63356 0.674 0.5075 vs -0.22311 2.08103 -0.107 0.9156 qsec 0.57571 0.71086 0.810 0.4267 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 2.68 on 22 degrees of freedom Multiple R-squared: 0.8596, Adjusted R-squared: 0.8022 F-statistic: 14.97 on 9 and 22 DF, p-value: 1.855e-07
print("Residual sum of squares: %.2f"
% np.mean((lm.predict(X) - y) ** 2))
# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % lm.score(X, y))
Residual sum of squares: 4.94 Variance score: 0.86
plt.scatter(y,lm.predict(X), color='black')
plt.show()
Image(filename='/home/anshul/lr3.png')
mtcars$new=predict(bmodel,mtcars)
d=mtcars[,c("mpg","new")]
plot(d)