#!/usr/bin/env python # coding: utf-8 # In[1]: import pandas as pd import matplotlib.pyplot as plt import numpy as np import scipy import sklearn #feature engr'ing from sklearn.preprocessing import OneHotEncoder #models from sklearn import model_selection from sklearn.linear_model import LinearRegression from sklearn.ensemble import RandomForestRegressor from sklearn.linear_model import ElasticNet from sklearn.linear_model import Lasso from sklearn.linear_model import Ridge from sklearn.svm import SVR #metrics from sklearn.metrics import classification_report from sklearn.metrics import confusion_matrix from sklearn.metrics import accuracy_score #ignore some warnings we dont care about import warnings warnings.filterwarnings("ignore", category=FutureWarning) # In[7]: names = ['instant','date','season','year','month','hour','holiday','weekday','workingday','weathersit','temp','atemp','humidity','windspeed','casual','registered','total'] dataset = pd.read_csv('hour.csv',names=names,header=0,usecols=[*range(2,17)]) print(dataset.head(5)) # In[8]: weatherSits = dataset['weathersit'].values total = dataset['total'].values #One-Hot Encoding OHEr = OneHotEncoder(sparse=False) weatherSits = weatherSits.reshape(len(weatherSits), 1) OHEd = OHEr.fit_transform(weatherSits) #remove unwanted columns dataset = dataset.drop(columns=['weathersit','casual','registered','total']) #add new OHE columns and put total back at the end dataset['weather1'] = np.transpose(OHEd)[0] dataset['weather2'] = np.transpose(OHEd)[1] dataset['weather3'] = np.transpose(OHEd)[2] dataset['weather4'] = np.transpose(OHEd)[3] dataset['total'] = total print(dataset.head(5)) # In[9]: array = dataset.values X = array[:,0:15] Y = array[:,15] validation_size = 0.20 seed = 11 X_train, X_validation, Y_train, Y_validation = model_selection.train_test_split(X,Y,test_size=validation_size,random_state=seed) # In[14]: reg_model = LinearRegression() reg_model.fit(X_train,Y_train) reg_model.score(X_validation,Y_validation) # In[33]: reg_model = RandomForestRegressor(max_depth=60,random_state=0,n_estimators=500) reg_model.fit(X_train,Y_train) reg_model.score(X_validation,Y_validation) # In[17]: reg_model = ElasticNet(random_state=0) reg_model.fit(X_train,Y_trcorner&2 ain) reg_model.score(X_validation,Y_validation) # In[19]: reg_model = Lasso(random_state=0) reg_model.fit(X_train,Y_train) reg_model.score(X_validation,Y_validation) # In[21]: reg_model = Ridge(random_state=0) reg_model.fit(X_train,Y_train) reg_model.score(X_validation,Y_validation) # In[32]: reg_model = SVR(gamma='scale', C=400.0, epsilon=0.2) reg_model.fit(X_train,Y_train) reg_model.score(X_validation,Y_validation) # In[ ]: