#!/usr/bin/env python # coding: utf-8 # In[1]: import pandas as pd import numpy as np from pandas.io.parsers import read_csv get_ipython().run_line_magic('matplotlib', 'inline') from matplotlib import pyplot as plt import matplotlib as mpl import scipy import xgboost as xgb # In[2]: dtrain = xgb.DMatrix("train.buffer") dvalidation = xgb.DMatrix("validation.buffer") # In[3]: param = {'bst:max_depth':7, 'bst:eta':1, 'silent':1, 'objective':'binary:logistic' } param['nthread'] = 4 plst = param.items() plst += [('eval_metric', 'logloss')] plst += [('eval_metric', 'auc')] # In[4]: evallist = [(dvalidation,'eval'), (dtrain,'train')] # In[5]: num_round = 40 bst = xgb.train( plst, dtrain, num_round, evallist ) # In[6]: bst.save_model("0001.model") # In[7]: tr_leafindex = bst.predict(dtrain, pred_leaf=True) # In[8]: va_leafindex = bst.predict(dvalidation, pred_leaf=True) # In[9]: tr_y = dtrain.get_label() va_y = dvalidation.get_label() # In[10]: np.save("tr.xgb", tr_leafindex) # In[11]: np.save("va.xgb", va_leafindex) # In[12]: np.save("tr.label", tr_y) np.save("va.label", va_y) # In[ ]: