# Full width
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
%load_ext autoreload
%autoreload 2
import math
import os
import subprocess
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from IPython.display import display
#
from lib_modeling import *
from lib_feature_engineering import *
# some settings for displaying Pandas results
pd.set_option('display.width', 2000)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.precision', 4)
pd.set_option('display.max_colwidth', -1)
# load train/test data
data_path = "home-credit-default-risk/application_train.csv"
pdf_train = pd.read_csv(data_path)
data_path = "home-credit-default-risk/application_test.csv"
pdf_test = pd.read_csv(data_path)
# filter by tvt code
pdf_tvt_extend = pd.read_pickle("pdf_tvt_extend.pkl", compression="bz2")
pdf_train_filtered = (pdf_tvt_extend.query("tvt_code == 'train'")
.merge(pdf_train[["SK_ID_CURR"]], on="SK_ID_CURR")
.drop(columns=["tvt_code"]))
pdf_train_filtered.head()
SK_ID_CURR | TARGET | |
---|---|---|
0 | 100002 | 1 |
1 | 100003 | 0 |
2 | 100004 | 0 |
3 | 100006 | 0 |
4 | 100007 | 0 |
# load previous application
data_path = "home-credit-default-risk/credit_card_balance.csv"
pdf_data = pd.read_csv(data_path)
print(pdf_data.shape)
pdf_data.head()
(3840312, 23)
SK_ID_PREV | SK_ID_CURR | MONTHS_BALANCE | AMT_BALANCE | AMT_CREDIT_LIMIT_ACTUAL | AMT_DRAWINGS_ATM_CURRENT | AMT_DRAWINGS_CURRENT | AMT_DRAWINGS_OTHER_CURRENT | AMT_DRAWINGS_POS_CURRENT | AMT_INST_MIN_REGULARITY | AMT_PAYMENT_CURRENT | AMT_PAYMENT_TOTAL_CURRENT | AMT_RECEIVABLE_PRINCIPAL | AMT_RECIVABLE | AMT_TOTAL_RECEIVABLE | CNT_DRAWINGS_ATM_CURRENT | CNT_DRAWINGS_CURRENT | CNT_DRAWINGS_OTHER_CURRENT | CNT_DRAWINGS_POS_CURRENT | CNT_INSTALMENT_MATURE_CUM | NAME_CONTRACT_STATUS | SK_DPD | SK_DPD_DEF | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2562384 | 378907 | -6 | 56.970 | 135000 | 0.0 | 877.5 | 0.0 | 877.5 | 1700.325 | 1800.0 | 1800.0 | 0.000 | 0.000 | 0.000 | 0.0 | 1 | 0.0 | 1.0 | 35.0 | Active | 0 | 0 |
1 | 2582071 | 363914 | -1 | 63975.555 | 45000 | 2250.0 | 2250.0 | 0.0 | 0.0 | 2250.000 | 2250.0 | 2250.0 | 60175.080 | 64875.555 | 64875.555 | 1.0 | 1 | 0.0 | 0.0 | 69.0 | Active | 0 | 0 |
2 | 1740877 | 371185 | -7 | 31815.225 | 450000 | 0.0 | 0.0 | 0.0 | 0.0 | 2250.000 | 2250.0 | 2250.0 | 26926.425 | 31460.085 | 31460.085 | 0.0 | 0 | 0.0 | 0.0 | 30.0 | Active | 0 | 0 |
3 | 1389973 | 337855 | -4 | 236572.110 | 225000 | 2250.0 | 2250.0 | 0.0 | 0.0 | 11795.760 | 11925.0 | 11925.0 | 224949.285 | 233048.970 | 233048.970 | 1.0 | 1 | 0.0 | 0.0 | 10.0 | Active | 0 | 0 |
4 | 1891521 | 126868 | -1 | 453919.455 | 450000 | 0.0 | 11547.0 | 0.0 | 11547.0 | 22924.890 | 27000.0 | 27000.0 | 443044.395 | 453919.455 | 453919.455 | 0.0 | 1 | 0.0 | 1.0 | 101.0 | Active | 0 | 0 |
# load meta data
meta_path = "../02_pandas/reports/report_credit_card_balance.csv"
pdf_meta = pd.read_csv(meta_path)
pdf_data["is_DPD"] = (pdf_data["SK_DPD"] > 0).astype(int)
pdf_data["is_DPD_DEF"] = (pdf_data["SK_DPD_DEF"] > 0).astype(int)
# drop columns
pdf_data.drop(columns=["SK_DPD", "SK_DPD_DEF"], inplace=True)
# get list categorical attributes
ls_cate = pdf_meta.query("sub_type == 'object'")["name"].tolist()
ls_cate
['NAME_CONTRACT_STATUS']
# construct categorical mapping
dict_onehot = {}
for cate in ls_cate:
ls_val = pdf_data[cate].value_counts().index.tolist()
dict_onehot[cate] = ls_val
%%time
pdf_onehot = gen_one_hot_feat(pdf_data, dict_onehot, main_key="SK_ID_CURR")
print(pdf_onehot.shape)
(3840312, 8) CPU times: user 23 s, sys: 1.73 s, total: 24.8 s Wall time: 11.4 s
%%time
pdf_agg01 = agg_common_data(pdf_onehot, ["max", "sum", "mean"], main_key="SK_ID_CURR")
eval_agg01 = feature_evaluate(pdf_train_filtered, pdf_agg01)
display(eval_agg01)
{'NAME_CONTRACT_STATUS_Active': ['max', 'sum', 'mean'], 'NAME_CONTRACT_STATUS_Approved': ['max', 'sum', 'mean'], 'NAME_CONTRACT_STATUS_Completed': ['max', 'sum', 'mean'], 'NAME_CONTRACT_STATUS_Demand': ['max', 'sum', 'mean'], 'NAME_CONTRACT_STATUS_Refused': ['max', 'sum', 'mean'], 'NAME_CONTRACT_STATUS_Sent_proposal': ['max', 'sum', 'mean'], 'NAME_CONTRACT_STATUS_Signed': ['max', 'sum', 'mean']}
After agg: (103558, 21)
name | auc | corr | coverage | |
---|---|---|---|---|
1 | NAME_CONTRACT_STATUS_Active_sum | 0.5549 | -0.0591 | 1.0 |
2 | NAME_CONTRACT_STATUS_Active_mean | 0.5210 | 0.0235 | 1.0 |
7 | NAME_CONTRACT_STATUS_Completed_sum | 0.5155 | -0.0216 | 1.0 |
8 | NAME_CONTRACT_STATUS_Completed_mean | 0.5155 | -0.0235 | 1.0 |
6 | NAME_CONTRACT_STATUS_Completed_max | 0.5154 | -0.0264 | 1.0 |
19 | NAME_CONTRACT_STATUS_Signed_sum | 0.5058 | -0.0066 | 1.0 |
18 | NAME_CONTRACT_STATUS_Signed_max | 0.5058 | -0.0150 | 1.0 |
20 | NAME_CONTRACT_STATUS_Signed_mean | 0.5056 | -0.0053 | 1.0 |
11 | NAME_CONTRACT_STATUS_Sent_proposal_mean | 0.5016 | -0.0127 | 1.0 |
10 | NAME_CONTRACT_STATUS_Sent_proposal_sum | 0.5016 | -0.0126 | 1.0 |
9 | NAME_CONTRACT_STATUS_Sent_proposal_max | 0.5016 | -0.0126 | 1.0 |
0 | NAME_CONTRACT_STATUS_Active_max | 0.5003 | 0.0068 | 1.0 |
13 | NAME_CONTRACT_STATUS_Demand_sum | 0.5001 | 0.0074 | 1.0 |
14 | NAME_CONTRACT_STATUS_Demand_mean | 0.5001 | 0.0065 | 1.0 |
12 | NAME_CONTRACT_STATUS_Demand_max | 0.5001 | 0.0045 | 1.0 |
5 | NAME_CONTRACT_STATUS_Approved_mean | 0.5000 | -0.0028 | 1.0 |
4 | NAME_CONTRACT_STATUS_Approved_sum | 0.5000 | -0.0028 | 1.0 |
3 | NAME_CONTRACT_STATUS_Approved_max | 0.5000 | -0.0028 | 1.0 |
15 | NAME_CONTRACT_STATUS_Refused_max | 0.5000 | 0.0010 | 1.0 |
16 | NAME_CONTRACT_STATUS_Refused_sum | 0.5000 | 0.0010 | 1.0 |
17 | NAME_CONTRACT_STATUS_Refused_mean | 0.5000 | 0.0005 | 1.0 |
CPU times: user 3.29 s, sys: 147 ms, total: 3.44 s Wall time: 1.65 s
eval_agg01.query("auc <= 0.501").shape
(10, 4)
sel_feat = eval_agg01.query("auc > 0.501")["name"].tolist()
pdf_agg01 = pdf_agg01[sel_feat]
print(pdf_agg01.shape)
(103558, 11)
# get list numerical attributes
# ls_num = pdf_meta.query("sub_type == 'int64'")["name"].tolist()
series_type = pdf_data.dtypes
ls_num = series_type[series_type == "int64"].index.tolist()
ls_num = [col for col in ls_num if col not in ["SK_ID_PREV", "SK_ID_CURR"]]
ls_num
['MONTHS_BALANCE', 'AMT_CREDIT_LIMIT_ACTUAL', 'CNT_DRAWINGS_CURRENT', 'is_DPD', 'is_DPD_DEF']
pdf_num = pdf_data[["SK_ID_PREV", "SK_ID_CURR"] + ls_num].copy()
pdf_num.head()
SK_ID_PREV | SK_ID_CURR | MONTHS_BALANCE | AMT_CREDIT_LIMIT_ACTUAL | CNT_DRAWINGS_CURRENT | is_DPD | is_DPD_DEF | |
---|---|---|---|---|---|---|---|
0 | 2562384 | 378907 | -6 | 135000 | 1 | 0 | 0 |
1 | 2582071 | 363914 | -1 | 45000 | 1 | 0 | 0 |
2 | 1740877 | 371185 | -7 | 450000 | 0 | 0 | 0 |
3 | 1389973 | 337855 | -4 | 225000 | 1 | 0 | 0 |
4 | 1891521 | 126868 | -1 | 450000 | 1 | 0 | 0 |
pdf_num["MONTHS_BALANCE"] = pdf_num["MONTHS_BALANCE"] * -1
%%time
pdf_agg02 = agg_common_data(pdf_num[["SK_ID_CURR"] + ls_num], ["max", "min", "sum", "mean", "std"], main_key="SK_ID_CURR")
eval_agg02 = feature_evaluate(pdf_train_filtered, pdf_agg02)
display(eval_agg02)
{'AMT_CREDIT_LIMIT_ACTUAL': ['max', 'min', 'sum', 'mean', 'std'], 'CNT_DRAWINGS_CURRENT': ['max', 'min', 'sum', 'mean', 'std'], 'MONTHS_BALANCE': ['max', 'min', 'sum', 'mean', 'std'], 'is_DPD': ['max', 'min', 'sum', 'mean', 'std'], 'is_DPD_DEF': ['max', 'min', 'sum', 'mean', 'std']}
After agg: (103558, 25)
name | auc | corr | coverage | |
---|---|---|---|---|
24 | CNT_DRAWINGS_CURRENT_std | 0.6257 | 0.1087 | 0.9929 |
23 | CNT_DRAWINGS_CURRENT_mean | 0.6250 | 0.0843 | 1.0000 |
20 | CNT_DRAWINGS_CURRENT_max | 0.6153 | 0.1011 | 1.0000 |
22 | CNT_DRAWINGS_CURRENT_sum | 0.5969 | 0.0504 | 1.0000 |
8 | MONTHS_BALANCE_mean | 0.5627 | -0.0622 | 1.0000 |
5 | MONTHS_BALANCE_max | 0.5606 | -0.0613 | 1.0000 |
7 | MONTHS_BALANCE_sum | 0.5597 | -0.0589 | 1.0000 |
9 | MONTHS_BALANCE_std | 0.5582 | -0.0605 | 0.9929 |
17 | AMT_CREDIT_LIMIT_ACTUAL_sum | 0.5470 | -0.0426 | 1.0000 |
6 | MONTHS_BALANCE_min | 0.5259 | -0.0309 | 1.0000 |
16 | AMT_CREDIT_LIMIT_ACTUAL_min | 0.5183 | 0.0002 | 1.0000 |
19 | AMT_CREDIT_LIMIT_ACTUAL_std | 0.5180 | -0.0143 | 0.9929 |
21 | CNT_DRAWINGS_CURRENT_min | 0.5125 | 0.0290 | 1.0000 |
2 | is_DPD_DEF_sum | 0.5092 | -0.0083 | 1.0000 |
0 | is_DPD_DEF_max | 0.5087 | -0.0130 | 1.0000 |
3 | is_DPD_DEF_mean | 0.5073 | 0.0052 | 1.0000 |
4 | is_DPD_DEF_std | 0.5072 | -0.0021 | 0.9929 |
12 | is_DPD_sum | 0.5066 | -0.0084 | 1.0000 |
15 | AMT_CREDIT_LIMIT_ACTUAL_max | 0.5064 | -0.0113 | 1.0000 |
10 | is_DPD_max | 0.5051 | -0.0071 | 1.0000 |
13 | is_DPD_mean | 0.5032 | 0.0042 | 1.0000 |
14 | is_DPD_std | 0.5029 | 0.0027 | 0.9929 |
18 | AMT_CREDIT_LIMIT_ACTUAL_mean | 0.5003 | -0.0076 | 1.0000 |
1 | is_DPD_DEF_min | 0.5000 | NaN | 1.0000 |
11 | is_DPD_min | 0.5000 | NaN | 1.0000 |
CPU times: user 5.12 s, sys: 213 ms, total: 5.33 s Wall time: 2.14 s
# get list continuous attributes
ls_con = pdf_meta.query("sub_type == 'float64'")["name"].tolist()
ls_con
['AMT_BALANCE', 'AMT_DRAWINGS_ATM_CURRENT', 'AMT_DRAWINGS_CURRENT', 'AMT_DRAWINGS_OTHER_CURRENT', 'AMT_DRAWINGS_POS_CURRENT', 'AMT_INST_MIN_REGULARITY', 'AMT_PAYMENT_CURRENT', 'AMT_PAYMENT_TOTAL_CURRENT', 'AMT_RECEIVABLE_PRINCIPAL', 'AMT_RECIVABLE', 'AMT_TOTAL_RECEIVABLE', 'CNT_DRAWINGS_ATM_CURRENT', 'CNT_DRAWINGS_OTHER_CURRENT', 'CNT_DRAWINGS_POS_CURRENT', 'CNT_INSTALMENT_MATURE_CUM']
pdf_con = pdf_data[["SK_ID_PREV", "SK_ID_CURR"] + ls_con].copy()
print(pdf_con.shape)
(3840312, 17)
%%time
pdf_agg03 = agg_common_data(pdf_con[["SK_ID_CURR"] + ls_con], ["max", "min", "sum", "mean", "std"], main_key="SK_ID_CURR")
eval_agg03 = feature_evaluate(pdf_train_filtered, pdf_agg03)
display(eval_agg03)
{'AMT_BALANCE': ['max', 'min', 'sum', 'mean', 'std'], 'AMT_DRAWINGS_ATM_CURRENT': ['max', 'min', 'sum', 'mean', 'std'], 'AMT_DRAWINGS_CURRENT': ['max', 'min', 'sum', 'mean', 'std'], 'AMT_DRAWINGS_OTHER_CURRENT': ['max', 'min', 'sum', 'mean', 'std'], 'AMT_DRAWINGS_POS_CURRENT': ['max', 'min', 'sum', 'mean', 'std'], 'AMT_INST_MIN_REGULARITY': ['max', 'min', 'sum', 'mean', 'std'], 'AMT_PAYMENT_CURRENT': ['max', 'min', 'sum', 'mean', 'std'], 'AMT_PAYMENT_TOTAL_CURRENT': ['max', 'min', 'sum', 'mean', 'std'], 'AMT_RECEIVABLE_PRINCIPAL': ['max', 'min', 'sum', 'mean', 'std'], 'AMT_RECIVABLE': ['max', 'min', 'sum', 'mean', 'std'], 'AMT_TOTAL_RECEIVABLE': ['max', 'min', 'sum', 'mean', 'std'], 'CNT_DRAWINGS_ATM_CURRENT': ['max', 'min', 'sum', 'mean', 'std'], 'CNT_DRAWINGS_OTHER_CURRENT': ['max', 'min', 'sum', 'mean', 'std'], 'CNT_DRAWINGS_POS_CURRENT': ['max', 'min', 'sum', 'mean', 'std'], 'CNT_INSTALMENT_MATURE_CUM': ['max', 'min', 'sum', 'mean', 'std']}
After agg: (103558, 75)
name | auc | corr | coverage | |
---|---|---|---|---|
3 | AMT_DRAWINGS_CURRENT_mean | 0.6091 | 0.0630 | 1.0000 |
63 | AMT_BALANCE_mean | 0.6077 | 0.0894 | 1.0000 |
53 | AMT_TOTAL_RECEIVABLE_mean | 0.6069 | 0.0887 | 1.0000 |
13 | AMT_RECIVABLE_mean | 0.6069 | 0.0887 | 1.0000 |
18 | AMT_RECEIVABLE_PRINCIPAL_mean | 0.6068 | 0.0882 | 1.0000 |
68 | CNT_DRAWINGS_ATM_CURRENT_mean | 0.6066 | 0.1092 | 0.7030 |
4 | AMT_DRAWINGS_CURRENT_std | 0.6019 | 0.0711 | 0.9929 |
69 | CNT_DRAWINGS_ATM_CURRENT_std | 0.5971 | 0.1073 | 0.6979 |
48 | AMT_INST_MIN_REGULARITY_mean | 0.5925 | 0.0752 | 1.0000 |
60 | AMT_BALANCE_max | 0.5843 | 0.0716 | 1.0000 |
50 | AMT_TOTAL_RECEIVABLE_max | 0.5830 | 0.0708 | 1.0000 |
10 | AMT_RECIVABLE_max | 0.5830 | 0.0708 | 1.0000 |
15 | AMT_RECEIVABLE_PRINCIPAL_max | 0.5825 | 0.0695 | 1.0000 |
58 | AMT_DRAWINGS_ATM_CURRENT_mean | 0.5814 | 0.0623 | 0.7030 |
49 | AMT_INST_MIN_REGULARITY_std | 0.5808 | 0.0700 | 0.9929 |
7 | CNT_DRAWINGS_POS_CURRENT_sum | 0.5790 | 0.0382 | 1.0000 |
45 | AMT_INST_MIN_REGULARITY_max | 0.5786 | 0.0661 | 1.0000 |
67 | CNT_DRAWINGS_ATM_CURRENT_sum | 0.5776 | 0.0499 | 1.0000 |
64 | AMT_BALANCE_std | 0.5754 | 0.0622 | 0.9929 |
54 | AMT_TOTAL_RECEIVABLE_std | 0.5747 | 0.0619 | 0.9929 |
14 | AMT_RECIVABLE_std | 0.5747 | 0.0619 | 0.9929 |
0 | AMT_DRAWINGS_CURRENT_max | 0.5745 | 0.0541 | 1.0000 |
19 | AMT_RECEIVABLE_PRINCIPAL_std | 0.5740 | 0.0608 | 0.9929 |
9 | CNT_DRAWINGS_POS_CURRENT_std | 0.5714 | 0.0738 | 0.6979 |
11 | AMT_RECIVABLE_min | 0.5714 | 0.0649 | 1.0000 |
51 | AMT_TOTAL_RECEIVABLE_min | 0.5714 | 0.0649 | 1.0000 |
23 | AMT_PAYMENT_TOTAL_CURRENT_mean | 0.5702 | 0.0250 | 1.0000 |
8 | CNT_DRAWINGS_POS_CURRENT_mean | 0.5687 | 0.0531 | 0.7030 |
5 | CNT_DRAWINGS_POS_CURRENT_max | 0.5662 | 0.0664 | 0.7030 |
57 | AMT_DRAWINGS_ATM_CURRENT_sum | 0.5662 | 0.0390 | 1.0000 |
61 | AMT_BALANCE_min | 0.5647 | 0.0656 | 1.0000 |
2 | AMT_DRAWINGS_CURRENT_sum | 0.5646 | 0.0256 | 1.0000 |
59 | AMT_DRAWINGS_ATM_CURRENT_std | 0.5627 | 0.0536 | 0.6979 |
42 | AMT_DRAWINGS_POS_CURRENT_sum | 0.5625 | -0.0024 | 1.0000 |
16 | AMT_RECEIVABLE_PRINCIPAL_min | 0.5581 | 0.0645 | 1.0000 |
65 | CNT_DRAWINGS_ATM_CURRENT_max | 0.5573 | 0.0630 | 0.7030 |
62 | AMT_BALANCE_sum | 0.5566 | 0.0205 | 1.0000 |
52 | AMT_TOTAL_RECEIVABLE_sum | 0.5560 | 0.0202 | 1.0000 |
12 | AMT_RECIVABLE_sum | 0.5560 | 0.0202 | 1.0000 |
17 | AMT_RECEIVABLE_PRINCIPAL_sum | 0.5559 | 0.0203 | 1.0000 |
24 | AMT_PAYMENT_TOTAL_CURRENT_std | 0.5552 | 0.0363 | 0.9929 |
43 | AMT_DRAWINGS_POS_CURRENT_mean | 0.5451 | -0.0027 | 0.7030 |
36 | AMT_PAYMENT_CURRENT_min | 0.5433 | 0.0167 | 0.7022 |
47 | AMT_INST_MIN_REGULARITY_sum | 0.5423 | 0.0041 | 1.0000 |
20 | AMT_PAYMENT_TOTAL_CURRENT_max | 0.5422 | 0.0293 | 1.0000 |
44 | AMT_DRAWINGS_POS_CURRENT_std | 0.5390 | 0.0024 | 0.6979 |
71 | CNT_INSTALMENT_MATURE_CUM_min | 0.5369 | -0.0302 | 1.0000 |
74 | CNT_INSTALMENT_MATURE_CUM_std | 0.5305 | -0.0112 | 0.9929 |
38 | AMT_PAYMENT_CURRENT_mean | 0.5301 | 0.0068 | 0.7022 |
35 | AMT_PAYMENT_CURRENT_max | 0.5290 | 0.0003 | 0.7022 |
40 | AMT_DRAWINGS_POS_CURRENT_max | 0.5287 | -0.0090 | 0.7030 |
55 | AMT_DRAWINGS_ATM_CURRENT_max | 0.5252 | 0.0241 | 0.7030 |
22 | AMT_PAYMENT_TOTAL_CURRENT_sum | 0.5248 | -0.0041 | 1.0000 |
37 | AMT_PAYMENT_CURRENT_sum | 0.5237 | -0.0041 | 1.0000 |
70 | CNT_INSTALMENT_MATURE_CUM_max | 0.5213 | -0.0166 | 1.0000 |
73 | CNT_INSTALMENT_MATURE_CUM_mean | 0.5142 | -0.0277 | 1.0000 |
1 | AMT_DRAWINGS_CURRENT_min | 0.5124 | 0.0158 | 1.0000 |
39 | AMT_PAYMENT_CURRENT_std | 0.5111 | 0.0133 | 0.6971 |
6 | CNT_DRAWINGS_POS_CURRENT_min | 0.5096 | 0.0243 | 0.7030 |
41 | AMT_DRAWINGS_POS_CURRENT_min | 0.5093 | -0.0021 | 0.7030 |
72 | CNT_INSTALMENT_MATURE_CUM_sum | 0.5070 | -0.0412 | 1.0000 |
66 | CNT_DRAWINGS_ATM_CURRENT_min | 0.5061 | 0.0269 | 0.7030 |
56 | AMT_DRAWINGS_ATM_CURRENT_min | 0.5061 | 0.0177 | 0.7030 |
21 | AMT_PAYMENT_TOTAL_CURRENT_min | 0.5059 | 0.0047 | 1.0000 |
25 | CNT_DRAWINGS_OTHER_CURRENT_max | 0.5035 | 0.0009 | 0.7030 |
32 | AMT_DRAWINGS_OTHER_CURRENT_sum | 0.5032 | 0.0107 | 1.0000 |
30 | AMT_DRAWINGS_OTHER_CURRENT_max | 0.5031 | 0.0048 | 0.7030 |
28 | CNT_DRAWINGS_OTHER_CURRENT_mean | 0.5030 | 0.0156 | 0.7030 |
27 | CNT_DRAWINGS_OTHER_CURRENT_sum | 0.5029 | 0.0015 | 1.0000 |
29 | CNT_DRAWINGS_OTHER_CURRENT_std | 0.5028 | 0.0125 | 0.6979 |
33 | AMT_DRAWINGS_OTHER_CURRENT_mean | 0.5028 | 0.0110 | 0.7030 |
34 | AMT_DRAWINGS_OTHER_CURRENT_std | 0.5027 | 0.0097 | 0.6979 |
46 | AMT_INST_MIN_REGULARITY_min | 0.5015 | 0.0028 | 1.0000 |
26 | CNT_DRAWINGS_OTHER_CURRENT_min | 0.5000 | -0.0026 | 0.7030 |
31 | AMT_DRAWINGS_OTHER_CURRENT_min | 0.5000 | -0.0024 | 0.7030 |
CPU times: user 9.22 s, sys: 384 ms, total: 9.6 s Wall time: 5.76 s
eval_agg03.query("auc <= 0.501").shape
(2, 4)
pdf_feat = pdf_agg01.join(pdf_agg02).join(pdf_agg03)
print(pdf_feat.shape)
(103558, 111)
%%time
fname = "credit_card_balance"
fname = os.path.join("features", "{}.pkl.bz2".format(fname))
pdf_feat.to_pickle(fname, compression="bz2")
print("Store features completed!")
Store features completed! CPU times: user 5.62 s, sys: 130 ms, total: 5.75 s Wall time: 4.22 s