import pandas as pd
iris=pd.read_csv('iris.csv')
iris.head()
sepal_length | sepal_width | petal_length | petal_width | species | |
---|---|---|---|---|---|
0 | 5.1 | 3.5 | 1.4 | 0.2 | Iris-setosa |
1 | 4.9 | 3.0 | 1.4 | 0.2 | Iris-setosa |
2 | 4.7 | 3.2 | 1.3 | 0.2 | Iris-setosa |
3 | 4.6 | 3.1 | 1.5 | 0.2 | Iris-setosa |
4 | 5.0 | 3.6 | 1.4 | 0.2 | Iris-setosa |
x=iris[['sepal_length','sepal_width','petal_length','petal_width']]
y=iris[['species']]
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3)
#pip install xgboost
import xgboost as xgb
xgbc=xgb.XGBClassifier(n_estimator=10,max_dept=5)
xgbc.fit(x_train,y_train)
D:\softwares\Anaconda\lib\site-packages\sklearn\preprocessing\label.py:219: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel(). y = column_or_1d(y, warn=True) D:\softwares\Anaconda\lib\site-packages\sklearn\preprocessing\label.py:252: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel(). y = column_or_1d(y, warn=True)
XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0, max_dept=5, max_depth=3, min_child_weight=1, missing=None, n_estimator=10, n_estimators=100, n_jobs=1, nthread=None, objective='multi:softprob', random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None, silent=None, subsample=1, verbosity=1)
y_pred=xgbc.predict(x_test)
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test,y_pred)
array([[15, 0, 0], [ 0, 15, 2], [ 0, 0, 13]], dtype=int64)
from sklearn.metrics import accuracy_score
accuracy_score(y_test,y_pred)
0.9555555555555556
feature_imp = pd.Series(xgbc.feature_importances_,iris.columns[0:4]).sort_values(ascending=False)
feature_imp
petal_length 0.849951 petal_width 0.084822 sepal_width 0.040115 sepal_length 0.025112 dtype: float32