In [1]:
import pandas as pd
import numpy as np
from pandas.io.parsers import read_csv
%matplotlib inline

from matplotlib import pyplot as plt
import matplotlib as mpl

import scipy

import xgboost as xgb
In [2]:
dtrain = xgb.DMatrix("train.buffer")
dvalidation = xgb.DMatrix("validation.buffer")
In [3]:
param = {'bst:max_depth':7, 'bst:eta':1, 'silent':1, 'objective':'binary:logistic' }
param['nthread'] = 4
plst = param.items()
plst += [('eval_metric', 'logloss')]
plst += [('eval_metric', 'auc')]
In [4]:
evallist  = [(dvalidation,'eval'), (dtrain,'train')]
In [5]:
num_round = 40
bst = xgb.train( plst, dtrain, num_round, evallist )
[0]	eval-logloss:0.253334	eval-auc:0.780535	train-logloss:0.248384	train-auc:0.781785
[1]	eval-logloss:0.207300	eval-auc:0.785617	train-logloss:0.209092	train-auc:0.787094
[2]	eval-logloss:0.197886	eval-auc:0.788099	train-logloss:0.195596	train-auc:0.789908
[3]	eval-logloss:0.196388	eval-auc:0.789676	train-logloss:0.193340	train-auc:0.792229
[4]	eval-logloss:0.195568	eval-auc:0.793677	train-logloss:0.192150	train-auc:0.796082
[5]	eval-logloss:0.195593	eval-auc:0.795120	train-logloss:0.191933	train-auc:0.798321
[6]	eval-logloss:0.195982	eval-auc:0.796112	train-logloss:0.192192	train-auc:0.799354
[7]	eval-logloss:0.195935	eval-auc:0.797171	train-logloss:0.192042	train-auc:0.800681
[8]	eval-logloss:0.195762	eval-auc:0.797791	train-logloss:0.191980	train-auc:0.801628
[9]	eval-logloss:0.195653	eval-auc:0.799015	train-logloss:0.191947	train-auc:0.802798
[10]	eval-logloss:0.195533	eval-auc:0.799382	train-logloss:0.191716	train-auc:0.803155
[11]	eval-logloss:0.195838	eval-auc:0.800146	train-logloss:0.191925	train-auc:0.804124
[12]	eval-logloss:0.195927	eval-auc:0.800358	train-logloss:0.191971	train-auc:0.804732
[13]	eval-logloss:0.195886	eval-auc:0.800860	train-logloss:0.191593	train-auc:0.805456
[14]	eval-logloss:0.196272	eval-auc:0.801299	train-logloss:0.191675	train-auc:0.805989
[15]	eval-logloss:0.196464	eval-auc:0.801970	train-logloss:0.190474	train-auc:0.806655
[16]	eval-logloss:0.196404	eval-auc:0.802252	train-logloss:0.190403	train-auc:0.807044
[17]	eval-logloss:0.196262	eval-auc:0.802891	train-logloss:0.190237	train-auc:0.807700
[18]	eval-logloss:0.196599	eval-auc:0.803300	train-logloss:0.190356	train-auc:0.808317
[19]	eval-logloss:0.196430	eval-auc:0.803652	train-logloss:0.190241	train-auc:0.808611
[20]	eval-logloss:0.196436	eval-auc:0.804174	train-logloss:0.190326	train-auc:0.809188
[21]	eval-logloss:0.196494	eval-auc:0.804444	train-logloss:0.190349	train-auc:0.809547
[22]	eval-logloss:0.196617	eval-auc:0.804801	train-logloss:0.190343	train-auc:0.810022
[23]	eval-logloss:0.196617	eval-auc:0.805046	train-logloss:0.190262	train-auc:0.810345
[24]	eval-logloss:0.196569	eval-auc:0.805390	train-logloss:0.190094	train-auc:0.810903
[25]	eval-logloss:0.196540	eval-auc:0.805762	train-logloss:0.189940	train-auc:0.811338
[26]	eval-logloss:0.196496	eval-auc:0.806066	train-logloss:0.189812	train-auc:0.811782
[27]	eval-logloss:0.196562	eval-auc:0.806010	train-logloss:0.189837	train-auc:0.811808
[28]	eval-logloss:0.196723	eval-auc:0.806168	train-logloss:0.190012	train-auc:0.812114
[29]	eval-logloss:0.197192	eval-auc:0.806515	train-logloss:0.190270	train-auc:0.812461
[30]	eval-logloss:0.197457	eval-auc:0.806607	train-logloss:0.190326	train-auc:0.812706
[31]	eval-logloss:0.197358	eval-auc:0.806797	train-logloss:0.190127	train-auc:0.813055
[32]	eval-logloss:0.197435	eval-auc:0.807031	train-logloss:0.190063	train-auc:0.813546
[33]	eval-logloss:0.197510	eval-auc:0.807228	train-logloss:0.190088	train-auc:0.813798
[34]	eval-logloss:0.197452	eval-auc:0.807467	train-logloss:0.190039	train-auc:0.814018
[35]	eval-logloss:0.197579	eval-auc:0.807707	train-logloss:0.190044	train-auc:0.814438
[36]	eval-logloss:0.197523	eval-auc:0.807915	train-logloss:0.189938	train-auc:0.814649
[37]	eval-logloss:0.197562	eval-auc:0.808144	train-logloss:0.189920	train-auc:0.815010
[38]	eval-logloss:0.197583	eval-auc:0.808244	train-logloss:0.189900	train-auc:0.815211
[39]	eval-logloss:0.197524	eval-auc:0.808328	train-logloss:0.189815	train-auc:0.815430
In [6]:
bst.save_model("0001.model")
In [7]:
tr_leafindex = bst.predict(dtrain, pred_leaf=True)
In [8]:
va_leafindex = bst.predict(dvalidation, pred_leaf=True)
In [9]:
tr_y = dtrain.get_label()
va_y = dvalidation.get_label()
In [10]:
np.save("tr.xgb", tr_leafindex)
In [11]:
np.save("va.xgb", va_leafindex)
In [12]:
np.save("tr.label", tr_y)
np.save("va.label", va_y)