import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import scipy
import sklearn
#feature engr'ing
from sklearn.preprocessing import OneHotEncoder
#models
from sklearn import model_selection
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.svm import SVR
#metrics
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
#ignore some warnings we dont care about
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
names = ['instant','date','season','year','month','hour','holiday','weekday','workingday','weathersit','temp','atemp','humidity','windspeed','casual','registered','total']
dataset = pd.read_csv('hour.csv',names=names,header=0,usecols=[*range(2,17)])
print(dataset.head(5))
season year month hour holiday weekday workingday weathersit temp \ 0 1 0 1 0 0 6 0 1 0.24 1 1 0 1 1 0 6 0 1 0.22 2 1 0 1 2 0 6 0 1 0.22 3 1 0 1 3 0 6 0 1 0.24 4 1 0 1 4 0 6 0 1 0.24 atemp humidity windspeed casual registered total 0 0.2879 0.81 0.0 3 13 16 1 0.2727 0.80 0.0 8 32 40 2 0.2727 0.80 0.0 5 27 32 3 0.2879 0.75 0.0 3 10 13 4 0.2879 0.75 0.0 0 1 1
weatherSits = dataset['weathersit'].values
total = dataset['total'].values
#One-Hot Encoding
OHEr = OneHotEncoder(sparse=False)
weatherSits = weatherSits.reshape(len(weatherSits), 1)
OHEd = OHEr.fit_transform(weatherSits)
#remove unwanted columns
dataset = dataset.drop(columns=['weathersit','casual','registered','total'])
#add new OHE columns and put total back at the end
dataset['weather1'] = np.transpose(OHEd)[0]
dataset['weather2'] = np.transpose(OHEd)[1]
dataset['weather3'] = np.transpose(OHEd)[2]
dataset['weather4'] = np.transpose(OHEd)[3]
dataset['total'] = total
print(dataset.head(5))
season year month hour holiday weekday workingday temp atemp \ 0 1 0 1 0 0 6 0 0.24 0.2879 1 1 0 1 1 0 6 0 0.22 0.2727 2 1 0 1 2 0 6 0 0.22 0.2727 3 1 0 1 3 0 6 0 0.24 0.2879 4 1 0 1 4 0 6 0 0.24 0.2879 humidity windspeed weather1 weather2 weather3 weather4 total 0 0.81 0.0 1.0 0.0 0.0 0.0 16 1 0.80 0.0 1.0 0.0 0.0 0.0 40 2 0.80 0.0 1.0 0.0 0.0 0.0 32 3 0.75 0.0 1.0 0.0 0.0 0.0 13 4 0.75 0.0 1.0 0.0 0.0 0.0 1
array = dataset.values
X = array[:,0:15]
Y = array[:,15]
validation_size = 0.20
seed = 11
X_train, X_validation, Y_train, Y_validation = model_selection.train_test_split(X,Y,test_size=validation_size,random_state=seed)
reg_model = LinearRegression()
reg_model.fit(X_train,Y_train)
reg_model.score(X_validation,Y_validation)
0.41800686189447844
reg_model = RandomForestRegressor(max_depth=60,random_state=0,n_estimators=500)
reg_model.fit(X_train,Y_train)
reg_model.score(X_validation,Y_validation)
0.9478203512307453
reg_model = ElasticNet(random_state=0)
reg_model.fit(X_train,Y_trcorner&2
ain)
reg_model.score(X_validation,Y_validation)
0.2727850252848387
reg_model = Lasso(random_state=0)
reg_model.fit(X_train,Y_train)
reg_model.score(X_validation,Y_validation)
0.4147073584579234
reg_model = Ridge(random_state=0)
reg_model.fit(X_train,Y_train)
reg_model.score(X_validation,Y_validation)
0.41804293724431285
reg_model = SVR(gamma='scale', C=400.0, epsilon=0.2)
reg_model.fit(X_train,Y_train)
reg_model.score(X_validation,Y_validation)
0.7816133895418931