import os, math, subprocess
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from IPython.display import display
from lib_feature_engineering import *
# some settings for displaying Pandas results
pd.set_option('display.width', 2000)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.precision', 4)
pd.set_option('display.max_colwidth', -1)
# train
train_path = "home-credit-default-risk/application_train.csv"
pdf_train = pd.read_csv(train_path)
print("(rows, columns)", pdf_train.shape)
print("First 5 rows")
display(pdf_train.head(5))
# test
test_path = "home-credit-default-risk/application_test.csv"
pdf_test = pd.read_csv(test_path)
print("(rows, columns)", pdf_test.shape)
print("First 5 rows")
display(pdf_test.head(5))
# load meta data
meta_path = "../02_pandas/reports/report_application_train.csv"
pdf_meta = pd.read_csv(meta_path)
display(pdf_meta)
('(rows, columns)', (307511, 122)) First 5 rows
SK_ID_CURR | TARGET | NAME_CONTRACT_TYPE | CODE_GENDER | FLAG_OWN_CAR | FLAG_OWN_REALTY | CNT_CHILDREN | AMT_INCOME_TOTAL | AMT_CREDIT | AMT_ANNUITY | AMT_GOODS_PRICE | NAME_TYPE_SUITE | NAME_INCOME_TYPE | NAME_EDUCATION_TYPE | NAME_FAMILY_STATUS | NAME_HOUSING_TYPE | REGION_POPULATION_RELATIVE | DAYS_BIRTH | DAYS_EMPLOYED | DAYS_REGISTRATION | DAYS_ID_PUBLISH | OWN_CAR_AGE | FLAG_MOBIL | FLAG_EMP_PHONE | FLAG_WORK_PHONE | FLAG_CONT_MOBILE | FLAG_PHONE | FLAG_EMAIL | OCCUPATION_TYPE | CNT_FAM_MEMBERS | REGION_RATING_CLIENT | REGION_RATING_CLIENT_W_CITY | WEEKDAY_APPR_PROCESS_START | HOUR_APPR_PROCESS_START | REG_REGION_NOT_LIVE_REGION | REG_REGION_NOT_WORK_REGION | LIVE_REGION_NOT_WORK_REGION | REG_CITY_NOT_LIVE_CITY | REG_CITY_NOT_WORK_CITY | LIVE_CITY_NOT_WORK_CITY | ORGANIZATION_TYPE | EXT_SOURCE_1 | EXT_SOURCE_2 | EXT_SOURCE_3 | APARTMENTS_AVG | BASEMENTAREA_AVG | YEARS_BEGINEXPLUATATION_AVG | YEARS_BUILD_AVG | COMMONAREA_AVG | ELEVATORS_AVG | ENTRANCES_AVG | FLOORSMAX_AVG | FLOORSMIN_AVG | LANDAREA_AVG | LIVINGAPARTMENTS_AVG | LIVINGAREA_AVG | NONLIVINGAPARTMENTS_AVG | NONLIVINGAREA_AVG | APARTMENTS_MODE | BASEMENTAREA_MODE | YEARS_BEGINEXPLUATATION_MODE | YEARS_BUILD_MODE | COMMONAREA_MODE | ELEVATORS_MODE | ENTRANCES_MODE | FLOORSMAX_MODE | FLOORSMIN_MODE | LANDAREA_MODE | LIVINGAPARTMENTS_MODE | LIVINGAREA_MODE | NONLIVINGAPARTMENTS_MODE | NONLIVINGAREA_MODE | APARTMENTS_MEDI | BASEMENTAREA_MEDI | YEARS_BEGINEXPLUATATION_MEDI | YEARS_BUILD_MEDI | COMMONAREA_MEDI | ELEVATORS_MEDI | ENTRANCES_MEDI | FLOORSMAX_MEDI | FLOORSMIN_MEDI | LANDAREA_MEDI | LIVINGAPARTMENTS_MEDI | LIVINGAREA_MEDI | NONLIVINGAPARTMENTS_MEDI | NONLIVINGAREA_MEDI | FONDKAPREMONT_MODE | HOUSETYPE_MODE | TOTALAREA_MODE | WALLSMATERIAL_MODE | EMERGENCYSTATE_MODE | OBS_30_CNT_SOCIAL_CIRCLE | DEF_30_CNT_SOCIAL_CIRCLE | OBS_60_CNT_SOCIAL_CIRCLE | DEF_60_CNT_SOCIAL_CIRCLE | DAYS_LAST_PHONE_CHANGE | FLAG_DOCUMENT_2 | FLAG_DOCUMENT_3 | FLAG_DOCUMENT_4 | FLAG_DOCUMENT_5 | FLAG_DOCUMENT_6 | FLAG_DOCUMENT_7 | FLAG_DOCUMENT_8 | FLAG_DOCUMENT_9 | FLAG_DOCUMENT_10 | FLAG_DOCUMENT_11 | FLAG_DOCUMENT_12 | FLAG_DOCUMENT_13 | FLAG_DOCUMENT_14 | FLAG_DOCUMENT_15 | FLAG_DOCUMENT_16 | FLAG_DOCUMENT_17 | FLAG_DOCUMENT_18 | FLAG_DOCUMENT_19 | FLAG_DOCUMENT_20 | FLAG_DOCUMENT_21 | AMT_REQ_CREDIT_BUREAU_HOUR | AMT_REQ_CREDIT_BUREAU_DAY | AMT_REQ_CREDIT_BUREAU_WEEK | AMT_REQ_CREDIT_BUREAU_MON | AMT_REQ_CREDIT_BUREAU_QRT | AMT_REQ_CREDIT_BUREAU_YEAR | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 100002 | 1 | Cash loans | M | N | Y | 0 | 202500.0 | 406597.5 | 24700.5 | 351000.0 | Unaccompanied | Working | Secondary / secondary special | Single / not married | House / apartment | 0.0188 | -9461 | -637 | -3648.0 | -2120 | NaN | 1 | 1 | 0 | 1 | 1 | 0 | Laborers | 1.0 | 2 | 2 | WEDNESDAY | 10 | 0 | 0 | 0 | 0 | 0 | 0 | Business Entity Type 3 | 0.0830 | 0.2629 | 0.1394 | 0.0247 | 0.0369 | 0.9722 | 0.6192 | 0.0143 | 0.00 | 0.0690 | 0.0833 | 0.1250 | 0.0369 | 0.0202 | 0.0190 | 0.0000 | 0.0000 | 0.0252 | 0.0383 | 0.9722 | 0.6341 | 0.0144 | 0.0000 | 0.0690 | 0.0833 | 0.1250 | 0.0377 | 0.022 | 0.0198 | 0.0 | 0.0 | 0.0250 | 0.0369 | 0.9722 | 0.6243 | 0.0144 | 0.00 | 0.0690 | 0.0833 | 0.1250 | 0.0375 | 0.0205 | 0.0193 | 0.0000 | 0.00 | reg oper account | block of flats | 0.0149 | Stone, brick | No | 2.0 | 2.0 | 2.0 | 2.0 | -1134.0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 |
1 | 100003 | 0 | Cash loans | F | N | N | 0 | 270000.0 | 1293502.5 | 35698.5 | 1129500.0 | Family | State servant | Higher education | Married | House / apartment | 0.0035 | -16765 | -1188 | -1186.0 | -291 | NaN | 1 | 1 | 0 | 1 | 1 | 0 | Core staff | 2.0 | 1 | 1 | MONDAY | 11 | 0 | 0 | 0 | 0 | 0 | 0 | School | 0.3113 | 0.6222 | NaN | 0.0959 | 0.0529 | 0.9851 | 0.7960 | 0.0605 | 0.08 | 0.0345 | 0.2917 | 0.3333 | 0.0130 | 0.0773 | 0.0549 | 0.0039 | 0.0098 | 0.0924 | 0.0538 | 0.9851 | 0.8040 | 0.0497 | 0.0806 | 0.0345 | 0.2917 | 0.3333 | 0.0128 | 0.079 | 0.0554 | 0.0 | 0.0 | 0.0968 | 0.0529 | 0.9851 | 0.7987 | 0.0608 | 0.08 | 0.0345 | 0.2917 | 0.3333 | 0.0132 | 0.0787 | 0.0558 | 0.0039 | 0.01 | reg oper account | block of flats | 0.0714 | Block | No | 1.0 | 0.0 | 1.0 | 0.0 | -828.0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2 | 100004 | 0 | Revolving loans | M | Y | Y | 0 | 67500.0 | 135000.0 | 6750.0 | 135000.0 | Unaccompanied | Working | Secondary / secondary special | Single / not married | House / apartment | 0.0100 | -19046 | -225 | -4260.0 | -2531 | 26.0 | 1 | 1 | 1 | 1 | 1 | 0 | Laborers | 1.0 | 2 | 2 | MONDAY | 9 | 0 | 0 | 0 | 0 | 0 | 0 | Government | NaN | 0.5559 | 0.7296 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.0 | 0.0 | 0.0 | 0.0 | -815.0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
3 | 100006 | 0 | Cash loans | F | N | Y | 0 | 135000.0 | 312682.5 | 29686.5 | 297000.0 | Unaccompanied | Working | Secondary / secondary special | Civil marriage | House / apartment | 0.0080 | -19005 | -3039 | -9833.0 | -2437 | NaN | 1 | 1 | 0 | 1 | 0 | 0 | Laborers | 2.0 | 2 | 2 | WEDNESDAY | 17 | 0 | 0 | 0 | 0 | 0 | 0 | Business Entity Type 3 | NaN | 0.6504 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 2.0 | 0.0 | 2.0 | 0.0 | -617.0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | NaN | NaN | NaN | NaN | NaN | NaN |
4 | 100007 | 0 | Cash loans | M | N | Y | 0 | 121500.0 | 513000.0 | 21865.5 | 513000.0 | Unaccompanied | Working | Secondary / secondary special | Single / not married | House / apartment | 0.0287 | -19932 | -3038 | -4311.0 | -3458 | NaN | 1 | 1 | 0 | 1 | 0 | 0 | Core staff | 1.0 | 2 | 2 | THURSDAY | 11 | 0 | 0 | 0 | 0 | 1 | 1 | Religion | NaN | 0.3227 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.0 | 0.0 | 0.0 | 0.0 | -1106.0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
('(rows, columns)', (48744, 121)) First 5 rows
SK_ID_CURR | NAME_CONTRACT_TYPE | CODE_GENDER | FLAG_OWN_CAR | FLAG_OWN_REALTY | CNT_CHILDREN | AMT_INCOME_TOTAL | AMT_CREDIT | AMT_ANNUITY | AMT_GOODS_PRICE | NAME_TYPE_SUITE | NAME_INCOME_TYPE | NAME_EDUCATION_TYPE | NAME_FAMILY_STATUS | NAME_HOUSING_TYPE | REGION_POPULATION_RELATIVE | DAYS_BIRTH | DAYS_EMPLOYED | DAYS_REGISTRATION | DAYS_ID_PUBLISH | OWN_CAR_AGE | FLAG_MOBIL | FLAG_EMP_PHONE | FLAG_WORK_PHONE | FLAG_CONT_MOBILE | FLAG_PHONE | FLAG_EMAIL | OCCUPATION_TYPE | CNT_FAM_MEMBERS | REGION_RATING_CLIENT | REGION_RATING_CLIENT_W_CITY | WEEKDAY_APPR_PROCESS_START | HOUR_APPR_PROCESS_START | REG_REGION_NOT_LIVE_REGION | REG_REGION_NOT_WORK_REGION | LIVE_REGION_NOT_WORK_REGION | REG_CITY_NOT_LIVE_CITY | REG_CITY_NOT_WORK_CITY | LIVE_CITY_NOT_WORK_CITY | ORGANIZATION_TYPE | EXT_SOURCE_1 | EXT_SOURCE_2 | EXT_SOURCE_3 | APARTMENTS_AVG | BASEMENTAREA_AVG | YEARS_BEGINEXPLUATATION_AVG | YEARS_BUILD_AVG | COMMONAREA_AVG | ELEVATORS_AVG | ENTRANCES_AVG | FLOORSMAX_AVG | FLOORSMIN_AVG | LANDAREA_AVG | LIVINGAPARTMENTS_AVG | LIVINGAREA_AVG | NONLIVINGAPARTMENTS_AVG | NONLIVINGAREA_AVG | APARTMENTS_MODE | BASEMENTAREA_MODE | YEARS_BEGINEXPLUATATION_MODE | YEARS_BUILD_MODE | COMMONAREA_MODE | ELEVATORS_MODE | ENTRANCES_MODE | FLOORSMAX_MODE | FLOORSMIN_MODE | LANDAREA_MODE | LIVINGAPARTMENTS_MODE | LIVINGAREA_MODE | NONLIVINGAPARTMENTS_MODE | NONLIVINGAREA_MODE | APARTMENTS_MEDI | BASEMENTAREA_MEDI | YEARS_BEGINEXPLUATATION_MEDI | YEARS_BUILD_MEDI | COMMONAREA_MEDI | ELEVATORS_MEDI | ENTRANCES_MEDI | FLOORSMAX_MEDI | FLOORSMIN_MEDI | LANDAREA_MEDI | LIVINGAPARTMENTS_MEDI | LIVINGAREA_MEDI | NONLIVINGAPARTMENTS_MEDI | NONLIVINGAREA_MEDI | FONDKAPREMONT_MODE | HOUSETYPE_MODE | TOTALAREA_MODE | WALLSMATERIAL_MODE | EMERGENCYSTATE_MODE | OBS_30_CNT_SOCIAL_CIRCLE | DEF_30_CNT_SOCIAL_CIRCLE | OBS_60_CNT_SOCIAL_CIRCLE | DEF_60_CNT_SOCIAL_CIRCLE | DAYS_LAST_PHONE_CHANGE | FLAG_DOCUMENT_2 | FLAG_DOCUMENT_3 | FLAG_DOCUMENT_4 | FLAG_DOCUMENT_5 | FLAG_DOCUMENT_6 | FLAG_DOCUMENT_7 | FLAG_DOCUMENT_8 | FLAG_DOCUMENT_9 | FLAG_DOCUMENT_10 | FLAG_DOCUMENT_11 | FLAG_DOCUMENT_12 | FLAG_DOCUMENT_13 | FLAG_DOCUMENT_14 | FLAG_DOCUMENT_15 | FLAG_DOCUMENT_16 | FLAG_DOCUMENT_17 | FLAG_DOCUMENT_18 | FLAG_DOCUMENT_19 | FLAG_DOCUMENT_20 | FLAG_DOCUMENT_21 | AMT_REQ_CREDIT_BUREAU_HOUR | AMT_REQ_CREDIT_BUREAU_DAY | AMT_REQ_CREDIT_BUREAU_WEEK | AMT_REQ_CREDIT_BUREAU_MON | AMT_REQ_CREDIT_BUREAU_QRT | AMT_REQ_CREDIT_BUREAU_YEAR | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 100001 | Cash loans | F | N | Y | 0 | 135000.0 | 568800.0 | 20560.5 | 450000.0 | Unaccompanied | Working | Higher education | Married | House / apartment | 0.0188 | -19241 | -2329 | -5170.0 | -812 | NaN | 1 | 1 | 0 | 1 | 0 | 1 | NaN | 2.0 | 2 | 2 | TUESDAY | 18 | 0 | 0 | 0 | 0 | 0 | 0 | Kindergarten | 0.7526 | 0.7897 | 0.1595 | 0.0660 | 0.0590 | 0.9732 | NaN | NaN | NaN | 0.1379 | 0.125 | NaN | NaN | NaN | 0.0505 | NaN | NaN | 0.0672 | 0.0612 | 0.9732 | NaN | NaN | NaN | 0.1379 | 0.125 | NaN | NaN | NaN | 0.0526 | NaN | NaN | 0.0666 | 0.0590 | 0.9732 | NaN | NaN | NaN | 0.1379 | 0.125 | NaN | NaN | NaN | 0.0514 | NaN | NaN | NaN | block of flats | 0.0392 | Stone, brick | No | 0.0 | 0.0 | 0.0 | 0.0 | -1740.0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
1 | 100005 | Cash loans | M | N | Y | 0 | 99000.0 | 222768.0 | 17370.0 | 180000.0 | Unaccompanied | Working | Secondary / secondary special | Married | House / apartment | 0.0358 | -18064 | -4469 | -9118.0 | -1623 | NaN | 1 | 1 | 0 | 1 | 0 | 0 | Low-skill Laborers | 2.0 | 2 | 2 | FRIDAY | 9 | 0 | 0 | 0 | 0 | 0 | 0 | Self-employed | 0.5650 | 0.2917 | 0.4330 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 3.0 |
2 | 100013 | Cash loans | M | Y | Y | 0 | 202500.0 | 663264.0 | 69777.0 | 630000.0 | NaN | Working | Higher education | Married | House / apartment | 0.0191 | -20038 | -4458 | -2175.0 | -3503 | 5.0 | 1 | 1 | 0 | 1 | 0 | 0 | Drivers | 2.0 | 2 | 2 | MONDAY | 14 | 0 | 0 | 0 | 0 | 0 | 0 | Transport: type 3 | NaN | 0.6998 | 0.6110 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.0 | 0.0 | 0.0 | 0.0 | -856.0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 4.0 |
3 | 100028 | Cash loans | F | N | Y | 2 | 315000.0 | 1575000.0 | 49018.5 | 1575000.0 | Unaccompanied | Working | Secondary / secondary special | Married | House / apartment | 0.0264 | -13976 | -1866 | -2000.0 | -4208 | NaN | 1 | 1 | 0 | 1 | 1 | 0 | Sales staff | 4.0 | 2 | 2 | WEDNESDAY | 11 | 0 | 0 | 0 | 0 | 0 | 0 | Business Entity Type 3 | 0.5257 | 0.5097 | 0.6127 | 0.3052 | 0.1974 | 0.9970 | 0.9592 | 0.1165 | 0.32 | 0.2759 | 0.375 | 0.0417 | 0.2042 | 0.2404 | 0.3673 | 0.0386 | 0.08 | 0.3109 | 0.2049 | 0.9970 | 0.9608 | 0.1176 | 0.3222 | 0.2759 | 0.375 | 0.0417 | 0.2089 | 0.2626 | 0.3827 | 0.0389 | 0.0847 | 0.3081 | 0.1974 | 0.9970 | 0.9597 | 0.1173 | 0.32 | 0.2759 | 0.375 | 0.0417 | 0.2078 | 0.2446 | 0.3739 | 0.0388 | 0.0817 | reg oper account | block of flats | 0.3700 | Panel | No | 0.0 | 0.0 | 0.0 | 0.0 | -1805.0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 3.0 |
4 | 100038 | Cash loans | M | Y | N | 1 | 180000.0 | 625500.0 | 32067.0 | 625500.0 | Unaccompanied | Working | Secondary / secondary special | Married | House / apartment | 0.0100 | -13040 | -2191 | -4000.0 | -4262 | 16.0 | 1 | 1 | 1 | 1 | 0 | 0 | NaN | 3.0 | 2 | 2 | FRIDAY | 5 | 0 | 0 | 0 | 0 | 1 | 1 | Business Entity Type 3 | 0.2021 | 0.4257 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.0 | 0.0 | 0.0 | 0.0 | -821.0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | NaN | NaN | NaN | NaN | NaN | NaN |
name | sub_type | n_distinct | n_miss | n_negative | n_zeros | 25% | 50% | 75% | count | max | mean | min | std | sample_0 | sample_1 | sample_2 | sample_3 | sample_4 | sample_5 | sample_6 | sample_7 | sample_8 | sample_9 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | SK_ID_CURR | int64 | 307511 (100.00%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | 189145.5000 | 278202.0000 | 367142.5000 | 307511.0 | 4.5626e+05 | 2.7818e+05 | 1.0000e+05 | 102790.1753 | 326682 | 414578 | 432657 | 346257 | 169928 | 228494 | 305986 | 450918 | 393627 | 121604 |
1 | TARGET | int64 | 2 (0.00%) | 0 (0.00%) | 0 (0.00%) | 282686 (91.93%) | 0.0000 | 0.0000 | 0.0000 | 307511.0 | 1.0000e+00 | 8.0729e-02 | 0.0000e+00 | 0.2724 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 |
2 | NAME_CONTRACT_TYPE | object | 2 (0.00%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | 0.0000 | 0.0000 | 0.0000 | 0.0 | 0.0000e+00 | 0.0000e+00 | 0.0000e+00 | 0.0000 | Cash loans | Cash loans | Revolving loans | Cash loans | Cash loans | Cash loans | Cash loans | Cash loans | Cash loans | Cash loans |
3 | CODE_GENDER | object | 3 (0.00%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | 0.0000 | 0.0000 | 0.0000 | 0.0 | 0.0000e+00 | 0.0000e+00 | 0.0000e+00 | 0.0000 | M | M | M | M | M | F | M | F | F | F |
4 | FLAG_OWN_CAR | object | 2 (0.00%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | 0.0000 | 0.0000 | 0.0000 | 0.0 | 0.0000e+00 | 0.0000e+00 | 0.0000e+00 | 0.0000 | Y | Y | N | N | N | N | N | Y | N | N |
5 | FLAG_OWN_REALTY | object | 2 (0.00%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | 0.0000 | 0.0000 | 0.0000 | 0.0 | 0.0000e+00 | 0.0000e+00 | 0.0000e+00 | 0.0000 | Y | N | Y | Y | Y | Y | Y | N | Y | Y |
6 | CNT_CHILDREN | int64 | 15 (0.00%) | 0 (0.00%) | 0 (0.00%) | 215371 (70.04%) | 0.0000 | 0.0000 | 1.0000 | 307511.0 | 1.9000e+01 | 4.1705e-01 | 0.0000e+00 | 0.7221 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 2 | 0 |
7 | AMT_INCOME_TOTAL | float64 | 2548 (0.83%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | 112500.0000 | 147150.0000 | 202500.0000 | 307511.0 | 1.1700e+08 | 1.6880e+05 | 2.5650e+04 | 237123.1463 | 166500.0 | 450000.0 | 157500.0 | 135000.0 | 202500.0 | 117000.0 | 90000.0 | 180000.0 | 157500.0 | 90000.0 |
8 | AMT_CREDIT | float64 | 5603 (1.82%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | 270000.0000 | 513531.0000 | 808650.0000 | 307511.0 | 4.0500e+06 | 5.9903e+05 | 4.5000e+04 | 402490.7770 | 254700.0 | 1381113.0 | 450000.0 | 1764000.0 | 203760.0 | 254700.0 | 538704.0 | 630000.0 | 679500.0 | 755190.0 |
9 | AMT_ANNUITY | float64 | 13672 (4.45%) | 12 (0.00%) | 0 (0.00%) | 0 (0.00%) | 16524.0000 | 24903.0000 | 34596.0000 | 307499.0 | 2.5803e+05 | 2.7109e+04 | 1.6155e+03 | 14493.7373 | 25191.0 | 39712.5 | 22500.0 | 48510.0 | 16227.0 | 25191.0 | 26046.0 | 23274.0 | 36202.5 | 36459.0 |
10 | AMT_GOODS_PRICE | float64 | 1002 (0.33%) | 278 (0.09%) | 0 (0.00%) | 0 (0.00%) | 238500.0000 | 450000.0000 | 679500.0000 | 307233.0 | 4.0500e+06 | 5.3840e+05 | 4.0500e+04 | 369446.4605 | 225000.0 | 1206000.0 | 450000.0 | 1764000.0 | 180000.0 | 225000.0 | 481500.0 | 630000.0 | 679500.0 | 675000.0 |
11 | NAME_TYPE_SUITE | object | 7 (0.00%) | 1292 (0.42%) | 0 (0.00%) | 0 (0.00%) | 0.0000 | 0.0000 | 0.0000 | 0.0 | 0.0000e+00 | 0.0000e+00 | 0.0000e+00 | 0.0000 | Unaccompanied | Unaccompanied | Unaccompanied | Unaccompanied | Unaccompanied | Unaccompanied | Unaccompanied | Unaccompanied | Family | Family |
12 | NAME_INCOME_TYPE | object | 8 (0.00%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | 0.0000 | 0.0000 | 0.0000 | 0.0 | 0.0000e+00 | 0.0000e+00 | 0.0000e+00 | 0.0000 | State servant | Working | Working | Commercial associate | Working | Pensioner | Working | Working | Working | Working |
13 | NAME_EDUCATION_TYPE | object | 5 (0.00%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | 0.0000 | 0.0000 | 0.0000 | 0.0 | 0.0000e+00 | 0.0000e+00 | 0.0000e+00 | 0.0000 | Higher education | Incomplete higher | Secondary / secondary special | Secondary / secondary special | Secondary / secondary special | Secondary / secondary special | Secondary / secondary special | Secondary / secondary special | Secondary / secondary special | Secondary / secondary special |
14 | NAME_FAMILY_STATUS | object | 6 (0.00%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | 0.0000 | 0.0000 | 0.0000 | 0.0 | 0.0000e+00 | 0.0000e+00 | 0.0000e+00 | 0.0000 | Married | Married | Married | Married | Separated | Married | Single / not married | Married | Married | Married |
15 | NAME_HOUSING_TYPE | object | 6 (0.00%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | 0.0000 | 0.0000 | 0.0000 | 0.0 | 0.0000e+00 | 0.0000e+00 | 0.0000e+00 | 0.0000 | House / apartment | House / apartment | House / apartment | House / apartment | House / apartment | House / apartment | House / apartment | House / apartment | House / apartment | With parents |
16 | REGION_POPULATION_RELATIVE | float64 | 81 (0.03%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | 0.0100 | 0.0188 | 0.0287 | 307511.0 | 7.2508e-02 | 2.0868e-02 | 2.9000e-04 | 0.0138 | 0.02461 | 0.018634 | 0.015221 | 0.008019 | 0.0105 | 0.008865999999999999 | 0.018634 | 0.0105 | 0.01885 | 0.030755 |
17 | DAYS_BIRTH | int64 | 17460 (5.68%) | 0 (0.00%) | 307511 (100.00%) | 0 (0.00%) | -19682.0000 | -15750.0000 | -12413.0000 | 307511.0 | -7.4890e+03 | -1.6037e+04 | -2.5229e+04 | 4363.9886 | -21882 | -14026 | -9905 | -11946 | -10493 | -24264 | -15251 | -14263 | -11179 | -10477 |
18 | DAYS_EMPLOYED | int64 | 12574 (4.09%) | 0 (0.00%) | 252135 (81.99%) | 2 (0.00%) | -2760.0000 | -1213.0000 | -289.0000 | 307511.0 | 3.6524e+05 | 6.3815e+04 | -1.7912e+04 | 141275.7665 | -2987 | -270 | -2691 | -1526 | -656 | 365243 | -1984 | -481 | -687 | -1400 |
19 | DAYS_REGISTRATION | float64 | 15688 (5.10%) | 0 (0.00%) | 307431 (99.97%) | 80 (0.03%) | -7479.5000 | -4504.0000 | -2010.0000 | 307511.0 | 0.0000e+00 | -4.9861e+03 | -2.4672e+04 | 3522.8863 | -11125.0 | -1625.0 | -4725.0 | -1513.0 | -2389.0 | -87.0 | -6933.0 | -1315.0 | -1491.0 | -5034.0 |
20 | DAYS_ID_PUBLISH | int64 | 6168 (2.01%) | 0 (0.00%) | 307495 (99.99%) | 16 (0.01%) | -4299.0000 | -3254.0000 | -1720.0000 | 307511.0 | 0.0000e+00 | -2.9942e+03 | -7.1970e+03 | 1509.4504 | -3984 | -4768 | -2549 | -4392 | -2526 | -4388 | -4396 | -4830 | -2742 | -1625 |
21 | OWN_CAR_AGE | float64 | 62 (0.02%) | 202929 (65.99%) | 0 (0.00%) | 2134 (0.69%) | 5.0000 | 9.0000 | 15.0000 | 104582.0 | 9.1000e+01 | 1.2061e+01 | 0.0000e+00 | 11.9448 | 4.0 | 1.0 | NaN | NaN | NaN | NaN | NaN | 18.0 | NaN | NaN |
22 | FLAG_MOBIL | int64 | 2 (0.00%) | 0 (0.00%) | 0 (0.00%) | 1 (0.00%) | 1.0000 | 1.0000 | 1.0000 | 307511.0 | 1.0000e+00 | 1.0000e+00 | 0.0000e+00 | 0.0018 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
23 | FLAG_EMP_PHONE | int64 | 2 (0.00%) | 0 (0.00%) | 0 (0.00%) | 55386 (18.01%) | 1.0000 | 1.0000 | 1.0000 | 307511.0 | 1.0000e+00 | 8.1989e-01 | 0.0000e+00 | 0.3843 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 |
24 | FLAG_WORK_PHONE | int64 | 2 (0.00%) | 0 (0.00%) | 0 (0.00%) | 246203 (80.06%) | 0.0000 | 0.0000 | 0.0000 | 307511.0 | 1.0000e+00 | 1.9937e-01 | 0.0000e+00 | 0.3995 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 1 |
25 | FLAG_CONT_MOBILE | int64 | 2 (0.00%) | 0 (0.00%) | 0 (0.00%) | 574 (0.19%) | 1.0000 | 1.0000 | 1.0000 | 307511.0 | 1.0000e+00 | 9.9813e-01 | 0.0000e+00 | 0.0432 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
26 | FLAG_PHONE | int64 | 2 (0.00%) | 0 (0.00%) | 0 (0.00%) | 221080 (71.89%) | 0.0000 | 0.0000 | 1.0000 | 307511.0 | 1.0000e+00 | 2.8107e-01 | 0.0000e+00 | 0.4495 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 |
27 | FLAG_EMAIL | int64 | 2 (0.00%) | 0 (0.00%) | 0 (0.00%) | 290069 (94.33%) | 0.0000 | 0.0000 | 0.0000 | 307511.0 | 1.0000e+00 | 5.6720e-02 | 0.0000e+00 | 0.2313 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
28 | OCCUPATION_TYPE | object | 18 (0.01%) | 96391 (31.35%) | 0 (0.00%) | 0 (0.00%) | 0.0000 | 0.0000 | 0.0000 | 0.0 | 0.0000e+00 | 0.0000e+00 | 0.0000e+00 | 0.0000 | NaN | Drivers | Laborers | NaN | Laborers | NaN | Laborers | NaN | Managers | Laborers |
29 | CNT_FAM_MEMBERS | float64 | 17 (0.01%) | 2 (0.00%) | 0 (0.00%) | 0 (0.00%) | 2.0000 | 2.0000 | 3.0000 | 307509.0 | 2.0000e+01 | 2.1527e+00 | 1.0000e+00 | 0.9107 | 2.0 | 3.0 | 2.0 | 2.0 | 1.0 | 2.0 | 1.0 | 3.0 | 4.0 | 2.0 |
30 | REGION_RATING_CLIENT | int64 | 3 (0.00%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | 2.0000 | 2.0000 | 2.0000 | 307511.0 | 3.0000e+00 | 2.0525e+00 | 1.0000e+00 | 0.5090 | 2 | 2 | 2 | 2 | 3 | 2 | 2 | 3 | 2 | 2 |
31 | REGION_RATING_CLIENT_W_CITY | int64 | 3 (0.00%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | 2.0000 | 2.0000 | 2.0000 | 307511.0 | 3.0000e+00 | 2.0315e+00 | 1.0000e+00 | 0.5027 | 2 | 2 | 2 | 2 | 3 | 2 | 2 | 3 | 2 | 2 |
32 | WEEKDAY_APPR_PROCESS_START | object | 7 (0.00%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | 0.0000 | 0.0000 | 0.0000 | 0.0 | 0.0000e+00 | 0.0000e+00 | 0.0000e+00 | 0.0000 | THURSDAY | MONDAY | WEDNESDAY | FRIDAY | WEDNESDAY | MONDAY | FRIDAY | MONDAY | TUESDAY | FRIDAY |
33 | HOUR_APPR_PROCESS_START | int64 | 24 (0.01%) | 0 (0.00%) | 0 (0.00%) | 40 (0.01%) | 10.0000 | 12.0000 | 14.0000 | 307511.0 | 2.3000e+01 | 1.2063e+01 | 0.0000e+00 | 3.2658 | 17 | 10 | 9 | 10 | 15 | 17 | 9 | 13 | 12 | 13 |
34 | REG_REGION_NOT_LIVE_REGION | int64 | 2 (0.00%) | 0 (0.00%) | 0 (0.00%) | 302854 (98.49%) | 0.0000 | 0.0000 | 0.0000 | 307511.0 | 1.0000e+00 | 1.5144e-02 | 0.0000e+00 | 0.1221 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
35 | REG_REGION_NOT_WORK_REGION | int64 | 2 (0.00%) | 0 (0.00%) | 0 (0.00%) | 291899 (94.92%) | 0.0000 | 0.0000 | 0.0000 | 307511.0 | 1.0000e+00 | 5.0769e-02 | 0.0000e+00 | 0.2195 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 |
36 | LIVE_REGION_NOT_WORK_REGION | int64 | 2 (0.00%) | 0 (0.00%) | 0 (0.00%) | 295008 (95.93%) | 0.0000 | 0.0000 | 0.0000 | 307511.0 | 1.0000e+00 | 4.0659e-02 | 0.0000e+00 | 0.1975 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 |
37 | REG_CITY_NOT_LIVE_CITY | int64 | 2 (0.00%) | 0 (0.00%) | 0 (0.00%) | 283472 (92.18%) | 0.0000 | 0.0000 | 0.0000 | 307511.0 | 1.0000e+00 | 7.8173e-02 | 0.0000e+00 | 0.2684 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
38 | REG_CITY_NOT_WORK_CITY | int64 | 2 (0.00%) | 0 (0.00%) | 0 (0.00%) | 236644 (76.95%) | 0.0000 | 0.0000 | 0.0000 | 307511.0 | 1.0000e+00 | 2.3045e-01 | 0.0000e+00 | 0.4211 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 |
39 | LIVE_CITY_NOT_WORK_CITY | int64 | 2 (0.00%) | 0 (0.00%) | 0 (0.00%) | 252296 (82.04%) | 0.0000 | 0.0000 | 0.0000 | 307511.0 | 1.0000e+00 | 1.7955e-01 | 0.0000e+00 | 0.3838 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 |
40 | ORGANIZATION_TYPE | object | 58 (0.02%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | 0.0000 | 0.0000 | 0.0000 | 0.0 | 0.0000e+00 | 0.0000e+00 | 0.0000e+00 | 0.0000 | Business Entity Type 2 | Business Entity Type 3 | Business Entity Type 2 | Business Entity Type 3 | Self-employed | XNA | Self-employed | Business Entity Type 3 | Trade: type 2 | Medicine |
41 | EXT_SOURCE_1 | float64 | 114584 (37.26%) | 173378 (56.38%) | 0 (0.00%) | 0 (0.00%) | 0.3340 | 0.5060 | 0.6751 | 134133.0 | 9.6269e-01 | 5.0213e-01 | 1.4568e-02 | 0.2111 | NaN | 0.3569777836552319 | NaN | 0.4949180654446097 | NaN | NaN | NaN | 0.5392576763343839 | 0.6575317720639661 | NaN |
42 | EXT_SOURCE_2 | float64 | 119831 (38.97%) | 660 (0.21%) | 0 (0.00%) | 0 (0.00%) | 0.3925 | 0.5660 | 0.6636 | 306851.0 | 8.5500e-01 | 5.1439e-01 | 8.1736e-08 | 0.1911 | 0.5784756531591329 | 0.5152771000292551 | 0.4248161058463641 | 0.6670352420910267 | 0.0962461218101204 | 0.39473825561809134 | 0.7881138083280189 | 0.5579841354415206 | 0.5916561691175296 | 0.6675741251599911 |
43 | EXT_SOURCE_3 | float64 | 814 (0.26%) | 60965 (19.83%) | 0 (0.00%) | 0 (0.00%) | 0.3706 | 0.5353 | 0.6691 | 246546.0 | 8.9601e-01 | 5.1085e-01 | 5.2727e-04 | 0.1948 | 0.5971924268337128 | 0.5226973172821112 | 0.31547215492577346 | NaN | 0.4170996682522097 | 0.6058362647264226 | NaN | 0.7490217048463391 | 0.4956658291397297 | 0.7394117535524816 |
44 | APARTMENTS_AVG | float64 | 2339 (0.76%) | 156061 (50.75%) | 0 (0.00%) | 751 (0.24%) | 0.0577 | 0.0876 | 0.1485 | 151450.0 | 1.0000e+00 | 1.1744e-01 | 0.0000e+00 | 0.1082 | NaN | 0.1485 | NaN | NaN | NaN | 0.2216 | 0.0629 | 0.0825 | 0.1031 | 0.1825 |
45 | BASEMENTAREA_AVG | float64 | 3780 (1.23%) | 179943 (58.52%) | 0 (0.00%) | 14745 (4.79%) | 0.0442 | 0.0763 | 0.1122 | 127568.0 | 1.0000e+00 | 8.8442e-02 | 0.0000e+00 | 0.0824 | NaN | 0.0991 | NaN | NaN | NaN | 0.0776 | 0.0756 | 0.0788 | NaN | 0.1322 |
46 | YEARS_BEGINEXPLUATATION_AVG | float64 | 285 (0.09%) | 150007 (48.78%) | 0 (0.00%) | 514 (0.17%) | 0.9767 | 0.9816 | 0.9866 | 157504.0 | 1.0000e+00 | 9.7773e-01 | 0.0000e+00 | 0.0592 | 0.9856 | 0.9871 | NaN | NaN | NaN | 0.9826 | 0.9831 | 0.9786 | 0.9771 | 0.9861 |
47 | YEARS_BUILD_AVG | float64 | 149 (0.05%) | 204488 (66.50%) | 0 (0.00%) | 102 (0.03%) | 0.6872 | 0.7552 | 0.8232 | 103023.0 | 1.0000e+00 | 7.5247e-01 | 0.0000e+00 | 0.1133 | NaN | 0.8232 | NaN | NaN | NaN | NaN | 0.7688 | 0.7076 | 0.6872 | 0.8096 |
48 | COMMONAREA_AVG | float64 | 3181 (1.03%) | 214865 (69.87%) | 0 (0.00%) | 8442 (2.75%) | 0.0078 | 0.0211 | 0.0515 | 92646.0 | 1.0000e+00 | 4.4621e-02 | 0.0000e+00 | 0.0760 | NaN | 0.0889 | NaN | NaN | NaN | NaN | NaN | 0.0079 | NaN | 0.0378 |
49 | ELEVATORS_AVG | float64 | 257 (0.08%) | 163891 (53.30%) | 0 (0.00%) | 85718 (27.87%) | 0.0000 | 0.0000 | 0.1200 | 143620.0 | 1.0000e+00 | 7.8942e-02 | 0.0000e+00 | 0.1346 | 0.08 | 0.16 | NaN | NaN | NaN | 0.08 | 0.0 | 0.0 | NaN | 0.2 |
50 | ENTRANCES_AVG | float64 | 285 (0.09%) | 154828 (50.35%) | 0 (0.00%) | 323 (0.11%) | 0.0690 | 0.1379 | 0.2069 | 152683.0 | 1.0000e+00 | 1.4972e-01 | 0.0000e+00 | 0.1000 | 0.069 | 0.1379 | NaN | NaN | NaN | 0.0345 | 0.1379 | 0.1379 | 0.2069 | 0.1724 |
51 | FLOORSMAX_AVG | float64 | 403 (0.13%) | 153020 (49.76%) | 0 (0.00%) | 2938 (0.96%) | 0.1667 | 0.1667 | 0.3333 | 154491.0 | 1.0000e+00 | 2.2628e-01 | 0.0000e+00 | 0.1446 | 0.3333 | 0.3333 | NaN | NaN | NaN | 0.3333 | 0.1667 | 0.1667 | 0.1667 | 0.3333 |
52 | FLOORSMIN_AVG | float64 | 305 (0.10%) | 208642 (67.85%) | 0 (0.00%) | 2320 (0.75%) | 0.0833 | 0.2083 | 0.3750 | 98869.0 | 1.0000e+00 | 2.3189e-01 | 0.0000e+00 | 0.1614 | NaN | 0.375 | NaN | NaN | NaN | NaN | NaN | 0.2083 | NaN | 0.375 |
53 | LANDAREA_AVG | float64 | 3527 (1.15%) | 182590 (59.38%) | 0 (0.00%) | 15600 (5.07%) | 0.0187 | 0.0481 | 0.0856 | 124921.0 | 1.0000e+00 | 6.6333e-02 | 0.0000e+00 | 0.0812 | NaN | 0.1127 | NaN | NaN | NaN | 0.0911 | 0.0151 | 0.0203 | NaN | 0.1238 |
54 | LIVINGAPARTMENTS_AVG | float64 | 1868 (0.61%) | 210199 (68.35%) | 0 (0.00%) | 418 (0.14%) | 0.0504 | 0.0756 | 0.1210 | 97312.0 | 1.0000e+00 | 1.0077e-01 | 0.0000e+00 | 0.0926 | NaN | 0.121 | NaN | NaN | NaN | NaN | 0.0504 | 0.0672 | 0.0807 | 0.1488 |
55 | LIVINGAREA_AVG | float64 | 5199 (1.69%) | 154350 (50.19%) | 0 (0.00%) | 284 (0.09%) | 0.0453 | 0.0745 | 0.1299 | 153161.0 | 1.0000e+00 | 1.0740e-01 | 0.0000e+00 | 0.1106 | 0.0739 | 0.0915 | NaN | NaN | NaN | 0.0582 | 0.0556 | 0.0703 | 0.0841 | 0.1824 |
56 | NONLIVINGAPARTMENTS_AVG | float64 | 386 (0.13%) | 213514 (69.43%) | 0 (0.00%) | 54549 (17.74%) | 0.0000 | 0.0000 | 0.0039 | 93997.0 | 1.0000e+00 | 8.8087e-03 | 0.0000e+00 | 0.0477 | NaN | 0.0 | NaN | NaN | NaN | NaN | 0.0039 | 0.0 | 0.0154 | 0.0 |
57 | NONLIVINGAREA_AVG | float64 | 3290 (1.07%) | 169682 (55.18%) | 0 (0.00%) | 58735 (19.10%) | 0.0000 | 0.0036 | 0.0277 | 137829.0 | 1.0000e+00 | 2.8358e-02 | 0.0000e+00 | 0.0695 | NaN | 0.0 | NaN | NaN | NaN | 0.2242 | 0.0188 | 0.0 | 0.0128 | 0.0022 |
58 | APARTMENTS_MODE | float64 | 760 (0.25%) | 156061 (50.75%) | 0 (0.00%) | 976 (0.32%) | 0.0525 | 0.0840 | 0.1439 | 151450.0 | 1.0000e+00 | 1.1423e-01 | 0.0000e+00 | 0.1079 | NaN | 0.1513 | NaN | NaN | NaN | 0.2258 | 0.0641 | 0.084 | 0.105 | 0.1859 |
59 | BASEMENTAREA_MODE | float64 | 3841 (1.25%) | 179943 (58.52%) | 0 (0.00%) | 16598 (5.40%) | 0.0407 | 0.0746 | 0.1124 | 127568.0 | 1.0000e+00 | 8.7543e-02 | 0.0000e+00 | 0.0843 | NaN | 0.1028 | NaN | NaN | NaN | 0.0806 | 0.0785 | 0.0818 | NaN | 0.1372 |
60 | YEARS_BEGINEXPLUATATION_MODE | float64 | 221 (0.07%) | 150007 (48.78%) | 0 (0.00%) | 142 (0.05%) | 0.9767 | 0.9816 | 0.9866 | 157504.0 | 1.0000e+00 | 9.7707e-01 | 0.0000e+00 | 0.0646 | 0.9856 | 0.9871 | NaN | NaN | NaN | 0.9826 | 0.9831 | 0.9786 | 0.9772 | 0.9861 |
61 | YEARS_BUILD_MODE | float64 | 154 (0.05%) | 204488 (66.50%) | 0 (0.00%) | 103 (0.03%) | 0.6994 | 0.7648 | 0.8236 | 103023.0 | 1.0000e+00 | 7.5964e-01 | 0.0000e+00 | 0.1101 | NaN | 0.8301 | NaN | NaN | NaN | NaN | 0.7779 | 0.7190000000000001 | 0.6994 | 0.8171 |
62 | COMMONAREA_MODE | float64 | 3128 (1.02%) | 214865 (69.87%) | 0 (0.00%) | 9690 (3.15%) | 0.0072 | 0.0190 | 0.0490 | 92646.0 | 1.0000e+00 | 4.2553e-02 | 0.0000e+00 | 0.0744 | NaN | 0.0897 | NaN | NaN | NaN | NaN | NaN | 0.008 | NaN | 0.0382 |
63 | ELEVATORS_MODE | float64 | 26 (0.01%) | 163891 (53.30%) | 0 (0.00%) | 89498 (29.10%) | 0.0000 | 0.0000 | 0.1208 | 143620.0 | 1.0000e+00 | 7.4490e-02 | 0.0000e+00 | 0.1323 | 0.0806 | 0.1611 | NaN | NaN | NaN | 0.0806 | 0.0 | 0.0 | NaN | 0.2014 |
64 | ENTRANCES_MODE | float64 | 30 (0.01%) | 154828 (50.35%) | 0 (0.00%) | 387 (0.13%) | 0.0690 | 0.1379 | 0.2069 | 152683.0 | 1.0000e+00 | 1.4519e-01 | 0.0000e+00 | 0.1010 | 0.069 | 0.1379 | NaN | NaN | NaN | 0.0345 | 0.1379 | 0.1379 | 0.2069 | 0.1724 |
65 | FLOORSMAX_MODE | float64 | 25 (0.01%) | 153020 (49.76%) | 0 (0.00%) | 3415 (1.11%) | 0.1667 | 0.1667 | 0.3333 | 154491.0 | 1.0000e+00 | 2.2232e-01 | 0.0000e+00 | 0.1437 | 0.3333 | 0.3333 | NaN | NaN | NaN | 0.3333 | 0.1667 | 0.1667 | 0.1667 | 0.3333 |
66 | FLOORSMIN_MODE | float64 | 25 (0.01%) | 208642 (67.85%) | 0 (0.00%) | 2517 (0.82%) | 0.0833 | 0.2083 | 0.3750 | 98869.0 | 1.0000e+00 | 2.2806e-01 | 0.0000e+00 | 0.1612 | NaN | 0.375 | NaN | NaN | NaN | NaN | NaN | 0.2083 | NaN | 0.375 |
67 | LANDAREA_MODE | float64 | 3563 (1.16%) | 182590 (59.38%) | 0 (0.00%) | 17453 (5.68%) | 0.0166 | 0.0458 | 0.0841 | 124921.0 | 1.0000e+00 | 6.4958e-02 | 0.0000e+00 | 0.0818 | NaN | 0.1153 | NaN | NaN | NaN | 0.0932 | 0.0155 | 0.0207 | NaN | 0.1266 |
68 | LIVINGAPARTMENTS_MODE | float64 | 736 (0.24%) | 210199 (68.35%) | 0 (0.00%) | 519 (0.17%) | 0.0542 | 0.0771 | 0.1313 | 97312.0 | 1.0000e+00 | 1.0564e-01 | 0.0000e+00 | 0.0979 | NaN | 0.1322 | NaN | NaN | NaN | NaN | 0.0551 | 0.0735 | 0.0882 | 0.1625 |
69 | LIVINGAREA_MODE | float64 | 5301 (1.72%) | 154350 (50.19%) | 0 (0.00%) | 444 (0.14%) | 0.0427 | 0.0731 | 0.1252 | 153161.0 | 1.0000e+00 | 1.0598e-01 | 0.0000e+00 | 0.1118 | 0.077 | 0.0953 | NaN | NaN | NaN | 0.0606 | 0.0579 | 0.0733 | 0.0876 | 0.1901 |
70 | NONLIVINGAPARTMENTS_MODE | float64 | 167 (0.05%) | 213514 (69.43%) | 0 (0.00%) | 59255 (19.27%) | 0.0000 | 0.0000 | 0.0039 | 93997.0 | 1.0000e+00 | 8.0764e-03 | 0.0000e+00 | 0.0463 | NaN | 0.0 | NaN | NaN | NaN | NaN | 0.0039 | 0.0 | 0.0156 | 0.0 |
71 | NONLIVINGAREA_MODE | float64 | 3327 (1.08%) | 169682 (55.18%) | 0 (0.00%) | 67126 (21.83%) | 0.0000 | 0.0011 | 0.0231 | 137829.0 | 1.0000e+00 | 2.7022e-02 | 0.0000e+00 | 0.0703 | NaN | 0.0 | NaN | NaN | NaN | 0.2373 | 0.0199 | 0.0 | 0.0136 | 0.0023 |
72 | APARTMENTS_MEDI | float64 | 1148 (0.37%) | 156061 (50.75%) | 0 (0.00%) | 771 (0.25%) | 0.0583 | 0.0864 | 0.1489 | 151450.0 | 1.0000e+00 | 1.1785e-01 | 0.0000e+00 | 0.1091 | NaN | 0.1499 | NaN | NaN | NaN | 0.2238 | 0.0635 | 0.0833 | 0.1041 | 0.1842 |
73 | BASEMENTAREA_MEDI | float64 | 3772 (1.23%) | 179943 (58.52%) | 0 (0.00%) | 14991 (4.87%) | 0.0437 | 0.0758 | 0.1116 | 127568.0 | 1.0000e+00 | 8.7955e-02 | 0.0000e+00 | 0.0822 | NaN | 0.0991 | NaN | NaN | NaN | 0.0776 | 0.0756 | 0.0788 | NaN | 0.1322 |
74 | YEARS_BEGINEXPLUATATION_MEDI | float64 | 245 (0.08%) | 150007 (48.78%) | 0 (0.00%) | 548 (0.18%) | 0.9767 | 0.9816 | 0.9866 | 157504.0 | 1.0000e+00 | 9.7775e-01 | 0.0000e+00 | 0.0599 | 0.9856 | 0.9871 | NaN | NaN | NaN | 0.9826 | 0.9831 | 0.9786 | 0.9771 | 0.9861 |
75 | YEARS_BUILD_MEDI | float64 | 151 (0.05%) | 204488 (66.50%) | 0 (0.00%) | 101 (0.03%) | 0.6914 | 0.7585 | 0.8256 | 103023.0 | 1.0000e+00 | 7.5575e-01 | 0.0000e+00 | 0.1121 | NaN | 0.8256 | NaN | NaN | NaN | NaN | 0.7719 | 0.7115 | 0.6914 | 0.8121 |
76 | COMMONAREA_MEDI | float64 | 3202 (1.04%) | 214865 (69.87%) | 0 (0.00%) | 8691 (2.83%) | 0.0079 | 0.0208 | 0.0513 | 92646.0 | 1.0000e+00 | 4.4595e-02 | 0.0000e+00 | 0.0761 | NaN | 0.0895 | NaN | NaN | NaN | NaN | NaN | 0.008 | NaN | 0.0381 |
77 | ELEVATORS_MEDI | float64 | 46 (0.01%) | 163891 (53.30%) | 0 (0.00%) | 87026 (28.30%) | 0.0000 | 0.0000 | 0.1200 | 143620.0 | 1.0000e+00 | 7.8078e-02 | 0.0000e+00 | 0.1345 | 0.08 | 0.16 | NaN | NaN | NaN | 0.08 | 0.0 | 0.0 | NaN | 0.2 |
78 | ENTRANCES_MEDI | float64 | 46 (0.01%) | 154828 (50.35%) | 0 (0.00%) | 329 (0.11%) | 0.0690 | 0.1379 | 0.2069 | 152683.0 | 1.0000e+00 | 1.4921e-01 | 0.0000e+00 | 0.1004 | 0.069 | 0.1379 | NaN | NaN | NaN | 0.0345 | 0.1379 | 0.1379 | 0.2069 | 0.1724 |
79 | FLOORSMAX_MEDI | float64 | 49 (0.02%) | 153020 (49.76%) | 0 (0.00%) | 2995 (0.97%) | 0.1667 | 0.1667 | 0.3333 | 154491.0 | 1.0000e+00 | 2.2590e-01 | 0.0000e+00 | 0.1451 | 0.3333 | 0.3333 | NaN | NaN | NaN | 0.3333 | 0.1667 | 0.1667 | 0.1667 | 0.3333 |
80 | FLOORSMIN_MEDI | float64 | 47 (0.02%) | 208642 (67.85%) | 0 (0.00%) | 2351 (0.76%) | 0.0833 | 0.2083 | 0.3750 | 98869.0 | 1.0000e+00 | 2.3162e-01 | 0.0000e+00 | 0.1619 | NaN | 0.375 | NaN | NaN | NaN | NaN | NaN | 0.2083 | NaN | 0.375 |
81 | LANDAREA_MEDI | float64 | 3560 (1.16%) | 182590 (59.38%) | 0 (0.00%) | 15919 (5.18%) | 0.0187 | 0.0487 | 0.0868 | 124921.0 | 1.0000e+00 | 6.7169e-02 | 0.0000e+00 | 0.0822 | NaN | 0.1147 | NaN | NaN | NaN | 0.0927 | 0.0154 | 0.0206 | NaN | 0.126 |
82 | LIVINGAPARTMENTS_MEDI | float64 | 1097 (0.36%) | 210199 (68.35%) | 0 (0.00%) | 433 (0.14%) | 0.0513 | 0.0761 | 0.1231 | 97312.0 | 1.0000e+00 | 1.0195e-01 | 0.0000e+00 | 0.0936 | NaN | 0.1231 | NaN | NaN | NaN | NaN | 0.0513 | 0.0684 | 0.0821 | 0.1513 |
83 | LIVINGAREA_MEDI | float64 | 5281 (1.72%) | 154350 (50.19%) | 0 (0.00%) | 299 (0.10%) | 0.0457 | 0.0749 | 0.1303 | 153161.0 | 1.0000e+00 | 1.0861e-01 | 0.0000e+00 | 0.1123 | 0.0752 | 0.0931 | NaN | NaN | NaN | 0.0592 | 0.0566 | 0.0716 | 0.0856 | 0.1857 |
84 | NONLIVINGAPARTMENTS_MEDI | float64 | 214 (0.07%) | 213514 (69.43%) | 0 (0.00%) | 56097 (18.24%) | 0.0000 | 0.0000 | 0.0039 | 93997.0 | 1.0000e+00 | 8.6510e-03 | 0.0000e+00 | 0.0474 | NaN | 0.0 | NaN | NaN | NaN | NaN | 0.0039 | 0.0 | 0.0155 | 0.0 |
85 | NONLIVINGAREA_MEDI | float64 | 3323 (1.08%) | 169682 (55.18%) | 0 (0.00%) | 60954 (19.82%) | 0.0000 | 0.0031 | 0.0266 | 137829.0 | 1.0000e+00 | 2.8236e-02 | 0.0000e+00 | 0.0702 | NaN | 0.0 | NaN | NaN | NaN | 0.2289 | 0.0192 | 0.0 | 0.0131 | 0.0022 |
86 | FONDKAPREMONT_MODE | object | 4 (0.00%) | 210295 (68.39%) | 0 (0.00%) | 0 (0.00%) | 0.0000 | 0.0000 | 0.0000 | 0.0 | 0.0000e+00 | 0.0000e+00 | 0.0000e+00 | 0.0000 | NaN | reg oper account | NaN | NaN | NaN | NaN | reg oper spec account | reg oper account | reg oper account | reg oper account |
87 | HOUSETYPE_MODE | object | 3 (0.00%) | 154297 (50.18%) | 0 (0.00%) | 0 (0.00%) | 0.0000 | 0.0000 | 0.0000 | 0.0 | 0.0000e+00 | 0.0000e+00 | 0.0000e+00 | 0.0000 | NaN | block of flats | NaN | NaN | NaN | block of flats | block of flats | block of flats | block of flats | block of flats |
88 | TOTALAREA_MODE | float64 | 5116 (1.66%) | 148431 (48.27%) | 0 (0.00%) | 582 (0.19%) | 0.0412 | 0.0688 | 0.1276 | 159080.0 | 1.0000e+00 | 1.0255e-01 | 0.0000e+00 | 0.1075 | 0.0581 | 0.1206 | NaN | NaN | NaN | 0.0945 | 0.0475 | 0.0574 | 0.0689 | 0.1644 |
89 | WALLSMATERIAL_MODE | object | 7 (0.00%) | 156341 (50.84%) | 0 (0.00%) | 0 (0.00%) | 0.0000 | 0.0000 | 0.0000 | 0.0 | 0.0000e+00 | 0.0000e+00 | 0.0000e+00 | 0.0000 | NaN | Panel | NaN | NaN | NaN | Stone, brick | Stone, brick | Panel | Stone, brick | Panel |
90 | EMERGENCYSTATE_MODE | object | 2 (0.00%) | 145755 (47.40%) | 0 (0.00%) | 0 (0.00%) | 0.0000 | 0.0000 | 0.0000 | 0.0 | 0.0000e+00 | 0.0000e+00 | 0.0000e+00 | 0.0000 | No | No | NaN | NaN | NaN | No | No | No | No | No |
91 | OBS_30_CNT_SOCIAL_CIRCLE | float64 | 33 (0.01%) | 1021 (0.33%) | 0 (0.00%) | 163910 (53.30%) | 0.0000 | 0.0000 | 2.0000 | 306490.0 | 3.4800e+02 | 1.4222e+00 | 0.0000e+00 | 2.4010 | 0.0 | 2.0 | 1.0 | 0.0 | 0.0 | 1.0 | 2.0 | 3.0 | 9.0 | 0.0 |
92 | DEF_30_CNT_SOCIAL_CIRCLE | float64 | 10 (0.00%) | 1021 (0.33%) | 0 (0.00%) | 271324 (88.23%) | 0.0000 | 0.0000 | 0.0000 | 306490.0 | 3.4000e+01 | 1.4342e-01 | 0.0000e+00 | 0.4467 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 |
93 | OBS_60_CNT_SOCIAL_CIRCLE | float64 | 33 (0.01%) | 1021 (0.33%) | 0 (0.00%) | 164666 (53.55%) | 0.0000 | 0.0000 | 2.0000 | 306490.0 | 3.4400e+02 | 1.4053e+00 | 0.0000e+00 | 2.3798 | 0.0 | 2.0 | 1.0 | 0.0 | 0.0 | 1.0 | 2.0 | 3.0 | 9.0 | 0.0 |
94 | DEF_60_CNT_SOCIAL_CIRCLE | float64 | 9 (0.00%) | 1021 (0.33%) | 0 (0.00%) | 280721 (91.29%) | 0.0000 | 0.0000 | 0.0000 | 306490.0 | 2.4000e+01 | 1.0005e-01 | 0.0000e+00 | 0.3623 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 |
95 | DAYS_LAST_PHONE_CHANGE | float64 | 3773 (1.23%) | 1 (0.00%) | 269838 (87.75%) | 37672 (12.25%) | -1570.0000 | -757.0000 | -274.0000 | 307510.0 | 0.0000e+00 | -9.6286e+02 | -4.2920e+03 | 826.8085 | -3143.0 | -2.0 | -1523.0 | -1224.0 | 0.0 | -201.0 | -1128.0 | -2959.0 | -1634.0 | -1258.0 |
96 | FLAG_DOCUMENT_2 | int64 | 2 (0.00%) | 0 (0.00%) | 0 (0.00%) | 307498 (100.00%) | 0.0000 | 0.0000 | 0.0000 | 307511.0 | 1.0000e+00 | 4.2275e-05 | 0.0000e+00 | 0.0065 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
97 | FLAG_DOCUMENT_3 | int64 | 2 (0.00%) | 0 (0.00%) | 0 (0.00%) | 89171 (29.00%) | 0.0000 | 1.0000 | 1.0000 | 307511.0 | 1.0000e+00 | 7.1002e-01 | 0.0000e+00 | 0.4538 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 |
98 | FLAG_DOCUMENT_4 | int64 | 2 (0.00%) | 0 (0.00%) | 0 (0.00%) | 307486 (99.99%) | 0.0000 | 0.0000 | 0.0000 | 307511.0 | 1.0000e+00 | 8.1298e-05 | 0.0000e+00 | 0.0090 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
99 | FLAG_DOCUMENT_5 | int64 | 2 (0.00%) | 0 (0.00%) | 0 (0.00%) | 302863 (98.49%) | 0.0000 | 0.0000 | 0.0000 | 307511.0 | 1.0000e+00 | 1.5115e-02 | 0.0000e+00 | 0.1220 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
100 | FLAG_DOCUMENT_6 | int64 | 2 (0.00%) | 0 (0.00%) | 0 (0.00%) | 280433 (91.19%) | 0.0000 | 0.0000 | 0.0000 | 307511.0 | 1.0000e+00 | 8.8055e-02 | 0.0000e+00 | 0.2834 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
101 | FLAG_DOCUMENT_7 | int64 | 2 (0.00%) | 0 (0.00%) | 0 (0.00%) | 307452 (99.98%) | 0.0000 | 0.0000 | 0.0000 | 307511.0 | 1.0000e+00 | 1.9186e-04 | 0.0000e+00 | 0.0139 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
102 | FLAG_DOCUMENT_8 | int64 | 2 (0.00%) | 0 (0.00%) | 0 (0.00%) | 282487 (91.86%) | 0.0000 | 0.0000 | 0.0000 | 307511.0 | 1.0000e+00 | 8.1376e-02 | 0.0000e+00 | 0.2734 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
103 | FLAG_DOCUMENT_9 | int64 | 2 (0.00%) | 0 (0.00%) | 0 (0.00%) | 306313 (99.61%) | 0.0000 | 0.0000 | 0.0000 | 307511.0 | 1.0000e+00 | 3.8958e-03 | 0.0000e+00 | 0.0623 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
104 | FLAG_DOCUMENT_10 | int64 | 2 (0.00%) | 0 (0.00%) | 0 (0.00%) | 307504 (100.00%) | 0.0000 | 0.0000 | 0.0000 | 307511.0 | 1.0000e+00 | 2.2763e-05 | 0.0000e+00 | 0.0048 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
105 | FLAG_DOCUMENT_11 | int64 | 2 (0.00%) | 0 (0.00%) | 0 (0.00%) | 306308 (99.61%) | 0.0000 | 0.0000 | 0.0000 | 307511.0 | 1.0000e+00 | 3.9121e-03 | 0.0000e+00 | 0.0624 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
106 | FLAG_DOCUMENT_12 | int64 | 2 (0.00%) | 0 (0.00%) | 0 (0.00%) | 307509 (100.00%) | 0.0000 | 0.0000 | 0.0000 | 307511.0 | 1.0000e+00 | 6.5038e-06 | 0.0000e+00 | 0.0026 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
107 | FLAG_DOCUMENT_13 | int64 | 2 (0.00%) | 0 (0.00%) | 0 (0.00%) | 306427 (99.65%) | 0.0000 | 0.0000 | 0.0000 | 307511.0 | 1.0000e+00 | 3.5251e-03 | 0.0000e+00 | 0.0593 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
108 | FLAG_DOCUMENT_14 | int64 | 2 (0.00%) | 0 (0.00%) | 0 (0.00%) | 306608 (99.71%) | 0.0000 | 0.0000 | 0.0000 | 307511.0 | 1.0000e+00 | 2.9365e-03 | 0.0000e+00 | 0.0541 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
109 | FLAG_DOCUMENT_15 | int64 | 2 (0.00%) | 0 (0.00%) | 0 (0.00%) | 307139 (99.88%) | 0.0000 | 0.0000 | 0.0000 | 307511.0 | 1.0000e+00 | 1.2097e-03 | 0.0000e+00 | 0.0348 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
110 | FLAG_DOCUMENT_16 | int64 | 2 (0.00%) | 0 (0.00%) | 0 (0.00%) | 304458 (99.01%) | 0.0000 | 0.0000 | 0.0000 | 307511.0 | 1.0000e+00 | 9.9281e-03 | 0.0000e+00 | 0.0991 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
111 | FLAG_DOCUMENT_17 | int64 | 2 (0.00%) | 0 (0.00%) | 0 (0.00%) | 307429 (99.97%) | 0.0000 | 0.0000 | 0.0000 | 307511.0 | 1.0000e+00 | 2.6666e-04 | 0.0000e+00 | 0.0163 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
112 | FLAG_DOCUMENT_18 | int64 | 2 (0.00%) | 0 (0.00%) | 0 (0.00%) | 305011 (99.19%) | 0.0000 | 0.0000 | 0.0000 | 307511.0 | 1.0000e+00 | 8.1298e-03 | 0.0000e+00 | 0.0898 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
113 | FLAG_DOCUMENT_19 | int64 | 2 (0.00%) | 0 (0.00%) | 0 (0.00%) | 307328 (99.94%) | 0.0000 | 0.0000 | 0.0000 | 307511.0 | 1.0000e+00 | 5.9510e-04 | 0.0000e+00 | 0.0244 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
114 | FLAG_DOCUMENT_20 | int64 | 2 (0.00%) | 0 (0.00%) | 0 (0.00%) | 307355 (99.95%) | 0.0000 | 0.0000 | 0.0000 | 307511.0 | 1.0000e+00 | 5.0730e-04 | 0.0000e+00 | 0.0225 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
115 | FLAG_DOCUMENT_21 | int64 | 2 (0.00%) | 0 (0.00%) | 0 (0.00%) | 307408 (99.97%) | 0.0000 | 0.0000 | 0.0000 | 307511.0 | 1.0000e+00 | 3.3495e-04 | 0.0000e+00 | 0.0183 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
116 | AMT_REQ_CREDIT_BUREAU_HOUR | float64 | 5 (0.00%) | 41519 (13.50%) | 0 (0.00%) | 264366 (85.97%) | 0.0000 | 0.0000 | 0.0000 | 265992.0 | 4.0000e+00 | 6.4024e-03 | 0.0000e+00 | 0.0838 | 0.0 | 0.0 | 0.0 | NaN | 0.0 | 0.0 | NaN | 0.0 | 0.0 | 0.0 |
117 | AMT_REQ_CREDIT_BUREAU_DAY | float64 | 9 (0.00%) | 41519 (13.50%) | 0 (0.00%) | 264503 (86.01%) | 0.0000 | 0.0000 | 0.0000 | 265992.0 | 9.0000e+00 | 7.0002e-03 | 0.0000e+00 | 0.1108 | 0.0 | 0.0 | 0.0 | NaN | 0.0 | 0.0 | NaN | 0.0 | 0.0 | 0.0 |
118 | AMT_REQ_CREDIT_BUREAU_WEEK | float64 | 9 (0.00%) | 41519 (13.50%) | 0 (0.00%) | 257456 (83.72%) | 0.0000 | 0.0000 | 0.0000 | 265992.0 | 8.0000e+00 | 3.4362e-02 | 0.0000e+00 | 0.2047 | 0.0 | 0.0 | 0.0 | NaN | 0.0 | 0.0 | NaN | 0.0 | 0.0 | 0.0 |
119 | AMT_REQ_CREDIT_BUREAU_MON | float64 | 24 (0.01%) | 41519 (13.50%) | 0 (0.00%) | 222233 (72.27%) | 0.0000 | 0.0000 | 0.0000 | 265992.0 | 2.7000e+01 | 2.6740e-01 | 0.0000e+00 | 0.9160 | 0.0 | 0.0 | 0.0 | NaN | 0.0 | 0.0 | NaN | 0.0 | 0.0 | 0.0 |
120 | AMT_REQ_CREDIT_BUREAU_QRT | float64 | 11 (0.00%) | 41519 (13.50%) | 0 (0.00%) | 215417 (70.05%) | 0.0000 | 0.0000 | 0.0000 | 265992.0 | 2.6100e+02 | 2.6547e-01 | 0.0000e+00 | 0.7941 | 0.0 | 0.0 | 0.0 | NaN | 1.0 | 0.0 | NaN | 0.0 | 1.0 | 0.0 |
121 | AMT_REQ_CREDIT_BUREAU_YEAR | float64 | 25 (0.01%) | 41519 (13.50%) | 0 (0.00%) | 71801 (23.35%) | 0.0000 | 1.0000 | 3.0000 | 265992.0 | 2.5000e+01 | 1.9000e+00 | 0.0000e+00 | 1.8693 | 1.0 | 0.0 | 5.0 | NaN | 1.0 | 3.0 | NaN | 1.0 | 1.0 | 1.0 |
# filter by tvt code
pdf_tvt_extend = pd.read_pickle("pdf_tvt_extend.pkl", compression="bz2")
pdf_train_filtered = (pdf_tvt_extend.query("tvt_code == 'train'")
.merge(pdf_train[["SK_ID_CURR"]], on="SK_ID_CURR")
.drop(columns=["tvt_code"]))
pdf_train_filtered.head()
SK_ID_CURR | TARGET | |
---|---|---|
0 | 100002 | 1 |
1 | 100003 | 0 |
2 | 100004 | 0 |
3 | 100006 | 0 |
4 | 100007 | 0 |
pdf_train['NAME_TYPE_SUITE'].isna().sum()
1292
pdf_train['NAME_TYPE_SUITE'].value_counts()
Unaccompanied 248526 Family 40149 Spouse, partner 11370 Children 3267 Other_B 1770 Other_A 866 Group of people 271 Name: NAME_TYPE_SUITE, dtype: int64
# Có 1292 giá trị null, vì đa số là Unaccompanied, nên sẽ fillna là Unaccomanied
pdf_train['NAME_TYPE_SUITE'].fillna(value='Unaccompanied', inplace=True)
pdf_test['NAME_TYPE_SUITE'].fillna(value='Unaccompanied', inplace=True)
pdf_train['OWN_CAR_AGE'].isna().mean()
0.6599081008484249
pdf_train[(pdf_train["OWN_CAR_AGE"].isna()) & (pdf_train["FLAG_OWN_CAR"] == "Y")].shape
(5, 122)
pdf_train[(pdf_train["OWN_CAR_AGE"].isna()) & (pdf_train["FLAG_OWN_CAR"] == "N")].shape
(202924, 122)
# 65% không điền thông tin, những người không có xe thì sẽ không điền, nên fillna là 0
pdf_train['OWN_CAR_AGE'].fillna(value=0, inplace=True)
pdf_test['OWN_CAR_AGE'].fillna(value=0, inplace=True)
# AMT_CREDIT: Số tiền vay
# AMT_INCOME_TOTAL: Thu nhập của khách hàng
# AMT_ANNUITY: Số tiền phải trả hàng năm
pdf_train['CREDIT_INCOME_PERCENT'] = pdf_train['AMT_CREDIT'] / pdf_train['AMT_INCOME_TOTAL']
pdf_train['ANNUITY_INCOME_PERCENT'] = pdf_train['AMT_ANNUITY'] / pdf_train['AMT_INCOME_TOTAL']
pdf_train['CREDIT_TERM'] = pdf_train['AMT_ANNUITY'] / pdf_train['AMT_CREDIT']
pdf_test['CREDIT_INCOME_PERCENT'] = pdf_test['AMT_CREDIT'] / pdf_test['AMT_INCOME_TOTAL']
pdf_test['ANNUITY_INCOME_PERCENT'] = pdf_test['AMT_ANNUITY'] / pdf_test['AMT_INCOME_TOTAL']
pdf_test['CREDIT_TERM'] = pdf_test['AMT_ANNUITY'] / pdf_test['AMT_CREDIT']
%%time
def gen_binary_one_hot_feat(pdf_input):
pdf_data = pdf_input.copy()
select_features = []
dict_feat = {
"binary_default": {
"NAME_CONTRACT_TYPE": ['Cash loans', 'Revolving loans'],
"CODE_GENDER": ['M', 'F', 'XNA'],
"FLAG_OWN_CAR": ['Y', 'N'],
"FLAG_OWN_REALTY": ['Y', 'N'],
"EMERGENCYSTATE_MODE": ['Yes', 'No'],
},
"binary": [
"FLAG_EMP_PHONE",
"FLAG_WORK_PHONE",
"FLAG_PHONE",
"FLAG_EMAIL",
"REG_REGION_NOT_LIVE_REGION",
"REG_REGION_NOT_WORK_REGION",
"LIVE_REGION_NOT_WORK_REGION",
"REG_CITY_NOT_WORK_CITY",
"LIVE_CITY_NOT_WORK_CITY",
"FLAG_DOCUMENT_3",
"FLAG_DOCUMENT_5",
"FLAG_DOCUMENT_6",
"FLAG_DOCUMENT_8",
"FLAG_DOCUMENT_9",
"REGION_RATING_CLIENT",
"REGION_RATING_CLIENT_W_CITY",
],
"onehot": {
"NAME_TYPE_SUITE": ["Unaccompanied", "Family", "Spouse, partner", "Children", "Other_A", "Other_B", "Group of people"],
"NAME_INCOME_TYPE": ["Working", "State servant", "Commercial associate", "Pensioner", "Unemployed", "Student", "Businessman", "Maternity leave"],
"NAME_EDUCATION_TYPE": ["Secondary / secondary special", "Higher education", "Incomplete higher", "Lower secondary", "Academic degree"],
"NAME_FAMILY_STATUS": ["Single / not married", "Married", "Civil marriage", "Widow", "Separated", "Unknown"],
"NAME_HOUSING_TYPE": ["House / apartment", "Rented apartment", "With parents", "Municipal apartment", "Office apartment", "Co-op apartment"],
"OCCUPATION_TYPE": ["Laborers", "Core staff", "Accountants", "Managers", "Drivers", "Sales staff", "Cleaning staff", "Cooking staff", "Private service staff", "Medicine staff", "Security staff", "High skill tech staff", "Waiters/barmen staff", "Low-skill Laborers", "Realty agents", "Secretaries", "IT staff", "HR staff"],
"ORGANIZATION_TYPE": ["Business Entity Type 3", "School", "Government", "Religion", "Other", "XNA", "Electricity", "Medicine", "Business Entity Type 2", "Self-employed", "Transport: type 2", "Construction", "Housing", "Kindergarten", "Trade: type 7", "Industry: type 11", "Military", "Services", "Security Ministries", "Transport: type 4", "Industry: type 1", "Emergency", "Security", "Trade: type 2", "University", "Transport: type 3", "Police", "Business Entity Type 1", "Postal", "Industry: type 4", "Agriculture", "Restaurant", "Culture", "Hotel", "Industry: type 7", "Trade: type 3", "Industry: type 3", "Bank", "Industry: type 9", "Insurance", "Trade: type 6", "Industry: type 2", "Transport: type 1", "Industry: type 12", "Mobile", "Trade: type 1", "Industry: type 5", "Industry: type 10", "Legal Services", "Advertising", "Trade: type 5", "Cleaning", "Industry: type 13", "Trade: type 4", "Telecom", "Industry: type 8", "Realtor", "Industry: type 6"],
"FONDKAPREMONT_MODE": ["reg oper account", "org spec account", "reg oper spec account", "not specified"],
"HOUSETYPE_MODE": ["block of flats", "terraced house", "specific housing"],
"WALLSMATERIAL_MODE": ["Stone, brick", "Block", "Panel", "Mixed", "Wooden", "Others", "Monolithic"],
}
}
for k in dict_feat:
if k == "binary_default":
for cname in dict_feat[k]:
# get default value
default_val = dict_feat[k][cname][0]
# convert category to binary
feat_name = "is_" + cname
select_features.append(feat_name)
pdf_data[feat_name] = pdf_data[cname].apply(lambda x: int(x == default_val))
elif k == "binary":
# rename only
for cname in dict_feat[k]:
feat_name = "is_" + cname
select_features.append(feat_name)
pdf_data[feat_name] = pdf_data[cname]
elif k == "onehot":
for cname in dict_feat[k]:
ls_vals = dict_feat[k][cname]
for val in ls_vals:
try:
new_name = "{}_{}".format(cname, val.replace(" ", "_")\
.replace(":", "_")\
.replace("/", "_")\
.replace("-", "_"))
select_features.append(new_name)
pdf_data[new_name] = pdf_data[cname].apply(lambda x: int(x == val))
except Exception as err:
print("One hot for {}-{}. Error: {}".format(cname, val, err))
return pdf_data[["SK_ID_CURR"] + select_features]
# for train feat
pdf01_baseline = gen_binary_one_hot_feat(pdf_train)
# for test feat
pdf02_baseline = gen_binary_one_hot_feat(pdf_test)
# print results
print(pdf01_baseline.shape, pdf02_baseline.shape)
display(pdf01_baseline.head().T)
((307511, 144), (48744, 144))
0 | 1 | 2 | 3 | 4 | |
---|---|---|---|---|---|
SK_ID_CURR | 100002 | 100003 | 100004 | 100006 | 100007 |
is_FLAG_EMP_PHONE | 1 | 1 | 1 | 1 | 1 |
is_FLAG_WORK_PHONE | 0 | 0 | 1 | 0 | 0 |
is_FLAG_PHONE | 1 | 1 | 1 | 0 | 0 |
is_FLAG_EMAIL | 0 | 0 | 0 | 0 | 0 |
is_REG_REGION_NOT_LIVE_REGION | 0 | 0 | 0 | 0 | 0 |
is_REG_REGION_NOT_WORK_REGION | 0 | 0 | 0 | 0 | 0 |
is_LIVE_REGION_NOT_WORK_REGION | 0 | 0 | 0 | 0 | 0 |
is_REG_CITY_NOT_WORK_CITY | 0 | 0 | 0 | 0 | 1 |
is_LIVE_CITY_NOT_WORK_CITY | 0 | 0 | 0 | 0 | 1 |
is_FLAG_DOCUMENT_3 | 1 | 1 | 0 | 1 | 0 |
is_FLAG_DOCUMENT_5 | 0 | 0 | 0 | 0 | 0 |
is_FLAG_DOCUMENT_6 | 0 | 0 | 0 | 0 | 0 |
is_FLAG_DOCUMENT_8 | 0 | 0 | 0 | 0 | 1 |
is_FLAG_DOCUMENT_9 | 0 | 0 | 0 | 0 | 0 |
is_REGION_RATING_CLIENT | 2 | 1 | 2 | 2 | 2 |
is_REGION_RATING_CLIENT_W_CITY | 2 | 1 | 2 | 2 | 2 |
NAME_INCOME_TYPE_Working | 1 | 0 | 1 | 1 | 1 |
NAME_INCOME_TYPE_State_servant | 0 | 1 | 0 | 0 | 0 |
NAME_INCOME_TYPE_Commercial_associate | 0 | 0 | 0 | 0 | 0 |
NAME_INCOME_TYPE_Pensioner | 0 | 0 | 0 | 0 | 0 |
NAME_INCOME_TYPE_Unemployed | 0 | 0 | 0 | 0 | 0 |
NAME_INCOME_TYPE_Student | 0 | 0 | 0 | 0 | 0 |
NAME_INCOME_TYPE_Businessman | 0 | 0 | 0 | 0 | 0 |
NAME_INCOME_TYPE_Maternity_leave | 0 | 0 | 0 | 0 | 0 |
FONDKAPREMONT_MODE_reg_oper_account | 1 | 1 | 0 | 0 | 0 |
FONDKAPREMONT_MODE_org_spec_account | 0 | 0 | 0 | 0 | 0 |
FONDKAPREMONT_MODE_reg_oper_spec_account | 0 | 0 | 0 | 0 | 0 |
FONDKAPREMONT_MODE_not_specified | 0 | 0 | 0 | 0 | 0 |
NAME_HOUSING_TYPE_House___apartment | 1 | 1 | 1 | 1 | 1 |
NAME_HOUSING_TYPE_Rented_apartment | 0 | 0 | 0 | 0 | 0 |
NAME_HOUSING_TYPE_With_parents | 0 | 0 | 0 | 0 | 0 |
NAME_HOUSING_TYPE_Municipal_apartment | 0 | 0 | 0 | 0 | 0 |
NAME_HOUSING_TYPE_Office_apartment | 0 | 0 | 0 | 0 | 0 |
NAME_HOUSING_TYPE_Co_op_apartment | 0 | 0 | 0 | 0 | 0 |
NAME_EDUCATION_TYPE_Secondary___secondary_special | 1 | 0 | 1 | 1 | 1 |
NAME_EDUCATION_TYPE_Higher_education | 0 | 1 | 0 | 0 | 0 |
NAME_EDUCATION_TYPE_Incomplete_higher | 0 | 0 | 0 | 0 | 0 |
NAME_EDUCATION_TYPE_Lower_secondary | 0 | 0 | 0 | 0 | 0 |
NAME_EDUCATION_TYPE_Academic_degree | 0 | 0 | 0 | 0 | 0 |
OCCUPATION_TYPE_Laborers | 1 | 0 | 1 | 1 | 0 |
OCCUPATION_TYPE_Core_staff | 0 | 1 | 0 | 0 | 1 |
OCCUPATION_TYPE_Accountants | 0 | 0 | 0 | 0 | 0 |
OCCUPATION_TYPE_Managers | 0 | 0 | 0 | 0 | 0 |
OCCUPATION_TYPE_Drivers | 0 | 0 | 0 | 0 | 0 |
OCCUPATION_TYPE_Sales_staff | 0 | 0 | 0 | 0 | 0 |
OCCUPATION_TYPE_Cleaning_staff | 0 | 0 | 0 | 0 | 0 |
OCCUPATION_TYPE_Cooking_staff | 0 | 0 | 0 | 0 | 0 |
OCCUPATION_TYPE_Private_service_staff | 0 | 0 | 0 | 0 | 0 |
OCCUPATION_TYPE_Medicine_staff | 0 | 0 | 0 | 0 | 0 |
OCCUPATION_TYPE_Security_staff | 0 | 0 | 0 | 0 | 0 |
OCCUPATION_TYPE_High_skill_tech_staff | 0 | 0 | 0 | 0 | 0 |
OCCUPATION_TYPE_Waiters_barmen_staff | 0 | 0 | 0 | 0 | 0 |
OCCUPATION_TYPE_Low_skill_Laborers | 0 | 0 | 0 | 0 | 0 |
OCCUPATION_TYPE_Realty_agents | 0 | 0 | 0 | 0 | 0 |
OCCUPATION_TYPE_Secretaries | 0 | 0 | 0 | 0 | 0 |
OCCUPATION_TYPE_IT_staff | 0 | 0 | 0 | 0 | 0 |
OCCUPATION_TYPE_HR_staff | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Business_Entity_Type_3 | 1 | 0 | 0 | 1 | 0 |
ORGANIZATION_TYPE_School | 0 | 1 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Government | 0 | 0 | 1 | 0 | 0 |
ORGANIZATION_TYPE_Religion | 0 | 0 | 0 | 0 | 1 |
ORGANIZATION_TYPE_Other | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_XNA | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Electricity | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Medicine | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Business_Entity_Type_2 | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Self_employed | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Transport__type_2 | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Construction | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Housing | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Kindergarten | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Trade__type_7 | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Industry__type_11 | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Military | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Services | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Security_Ministries | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Transport__type_4 | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Industry__type_1 | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Emergency | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Security | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Trade__type_2 | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_University | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Transport__type_3 | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Police | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Business_Entity_Type_1 | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Postal | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Industry__type_4 | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Agriculture | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Restaurant | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Culture | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Hotel | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Industry__type_7 | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Trade__type_3 | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Industry__type_3 | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Bank | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Industry__type_9 | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Insurance | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Trade__type_6 | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Industry__type_2 | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Transport__type_1 | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Industry__type_12 | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Mobile | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Trade__type_1 | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Industry__type_5 | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Industry__type_10 | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Legal_Services | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Advertising | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Trade__type_5 | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Cleaning | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Industry__type_13 | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Trade__type_4 | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Telecom | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Industry__type_8 | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Realtor | 0 | 0 | 0 | 0 | 0 |
ORGANIZATION_TYPE_Industry__type_6 | 0 | 0 | 0 | 0 | 0 |
WALLSMATERIAL_MODE_Stone,_brick | 1 | 0 | 0 | 0 | 0 |
WALLSMATERIAL_MODE_Block | 0 | 1 | 0 | 0 | 0 |
WALLSMATERIAL_MODE_Panel | 0 | 0 | 0 | 0 | 0 |
WALLSMATERIAL_MODE_Mixed | 0 | 0 | 0 | 0 | 0 |
WALLSMATERIAL_MODE_Wooden | 0 | 0 | 0 | 0 | 0 |
WALLSMATERIAL_MODE_Others | 0 | 0 | 0 | 0 | 0 |
WALLSMATERIAL_MODE_Monolithic | 0 | 0 | 0 | 0 | 0 |
NAME_FAMILY_STATUS_Single___not_married | 1 | 0 | 1 | 0 | 1 |
NAME_FAMILY_STATUS_Married | 0 | 1 | 0 | 0 | 0 |
NAME_FAMILY_STATUS_Civil_marriage | 0 | 0 | 0 | 1 | 0 |
NAME_FAMILY_STATUS_Widow | 0 | 0 | 0 | 0 | 0 |
NAME_FAMILY_STATUS_Separated | 0 | 0 | 0 | 0 | 0 |
NAME_FAMILY_STATUS_Unknown | 0 | 0 | 0 | 0 | 0 |
HOUSETYPE_MODE_block_of_flats | 1 | 1 | 0 | 0 | 0 |
HOUSETYPE_MODE_terraced_house | 0 | 0 | 0 | 0 | 0 |
HOUSETYPE_MODE_specific_housing | 0 | 0 | 0 | 0 | 0 |
NAME_TYPE_SUITE_Unaccompanied | 1 | 0 | 1 | 1 | 1 |
NAME_TYPE_SUITE_Family | 0 | 1 | 0 | 0 | 0 |
NAME_TYPE_SUITE_Spouse,_partner | 0 | 0 | 0 | 0 | 0 |
NAME_TYPE_SUITE_Children | 0 | 0 | 0 | 0 | 0 |
NAME_TYPE_SUITE_Other_A | 0 | 0 | 0 | 0 | 0 |
NAME_TYPE_SUITE_Other_B | 0 | 0 | 0 | 0 | 0 |
NAME_TYPE_SUITE_Group_of_people | 0 | 0 | 0 | 0 | 0 |
is_FLAG_OWN_CAR | 0 | 0 | 1 | 0 | 0 |
is_NAME_CONTRACT_TYPE | 1 | 1 | 0 | 1 | 1 |
is_FLAG_OWN_REALTY | 1 | 0 | 1 | 1 | 1 |
is_CODE_GENDER | 1 | 0 | 1 | 0 | 1 |
is_EMERGENCYSTATE_MODE | 0 | 0 | 0 | 0 | 0 |
CPU times: user 2min 44s, sys: 4.28 s, total: 2min 48s Wall time: 19.7 s
eval_agg01 = feature_evaluate(pdf_train_filtered, pdf01_baseline)
display(eval_agg01)
name | auc | corr | coverage | |
---|---|---|---|---|
16 | NAME_INCOME_TYPE_Working | 0.5541 | 0.0591 | 1.0 |
15 | is_REGION_RATING_CLIENT_W_CITY | 0.5495 | 0.0615 | 1.0 |
14 | is_REGION_RATING_CLIENT | 0.5481 | 0.0592 | 1.0 |
141 | is_CODE_GENDER | 0.5466 | 0.0536 | 1.0 |
35 | NAME_EDUCATION_TYPE_Higher_education | 0.5440 | -0.0560 | 1.0 |
34 | NAME_EDUCATION_TYPE_Secondary___secondary_special | 0.5406 | 0.0488 | 1.0 |
7 | is_REG_CITY_NOT_WORK_CITY | 0.5395 | 0.0512 | 1.0 |
9 | is_FLAG_DOCUMENT_3 | 0.5379 | 0.0456 | 1.0 |
128 | HOUSETYPE_MODE_block_of_flats | 0.5379 | -0.0414 | 1.0 |
19 | NAME_INCOME_TYPE_Pensioner | 0.5324 | -0.0461 | 1.0 |
62 | ORGANIZATION_TYPE_XNA | 0.5322 | -0.0458 | 1.0 |
0 | is_FLAG_EMP_PHONE | 0.5322 | 0.0458 | 1.0 |
39 | OCCUPATION_TYPE_Laborers | 0.5295 | 0.0420 | 1.0 |
117 | WALLSMATERIAL_MODE_Panel | 0.5246 | -0.0327 | 1.0 |
8 | is_LIVE_CITY_NOT_WORK_CITY | 0.5239 | 0.0340 | 1.0 |
123 | NAME_FAMILY_STATUS_Married | 0.5220 | -0.0251 | 1.0 |
1 | is_FLAG_WORK_PHONE | 0.5208 | 0.0284 | 1.0 |
2 | is_FLAG_PHONE | 0.5192 | -0.0234 | 1.0 |
138 | is_FLAG_OWN_CAR | 0.5192 | -0.0221 | 1.0 |
66 | ORGANIZATION_TYPE_Self_employed | 0.5181 | 0.0300 | 1.0 |
57 | ORGANIZATION_TYPE_Business_Entity_Type_3 | 0.5180 | 0.0236 | 1.0 |
122 | NAME_FAMILY_STATUS_Single___not_married | 0.5177 | 0.0273 | 1.0 |
24 | FONDKAPREMONT_MODE_reg_oper_account | 0.5177 | -0.0226 | 1.0 |
139 | is_NAME_CONTRACT_TYPE | 0.5164 | 0.0304 | 1.0 |
28 | NAME_HOUSING_TYPE_House___apartment | 0.5162 | -0.0281 | 1.0 |
11 | is_FLAG_DOCUMENT_6 | 0.5154 | -0.0296 | 1.0 |
43 | OCCUPATION_TYPE_Drivers | 0.5129 | 0.0296 | 1.0 |
124 | NAME_FAMILY_STATUS_Civil_marriage | 0.5128 | 0.0238 | 1.0 |
30 | NAME_HOUSING_TYPE_With_parents | 0.5120 | 0.0306 | 1.0 |
17 | NAME_INCOME_TYPE_State_servant | 0.5115 | -0.0245 | 1.0 |
40 | OCCUPATION_TYPE_Core_staff | 0.5112 | -0.0214 | 1.0 |
44 | OCCUPATION_TYPE_Sales_staff | 0.5110 | 0.0197 | 1.0 |
18 | NAME_INCOME_TYPE_Commercial_associate | 0.5103 | -0.0133 | 1.0 |
115 | WALLSMATERIAL_MODE_Stone,_brick | 0.5089 | -0.0119 | 1.0 |
125 | NAME_FAMILY_STATUS_Widow | 0.5082 | -0.0200 | 1.0 |
42 | OCCUPATION_TYPE_Managers | 0.5081 | -0.0173 | 1.0 |
41 | OCCUPATION_TYPE_Accountants | 0.5068 | -0.0211 | 1.0 |
140 | is_FLAG_OWN_REALTY | 0.5062 | -0.0074 | 1.0 |
68 | ORGANIZATION_TYPE_Construction | 0.5056 | 0.0207 | 1.0 |
131 | NAME_TYPE_SUITE_Unaccompanied | 0.5054 | 0.0076 | 1.0 |
12 | is_FLAG_DOCUMENT_8 | 0.5051 | -0.0101 | 1.0 |
132 | NAME_TYPE_SUITE_Family | 0.5047 | -0.0077 | 1.0 |
50 | OCCUPATION_TYPE_High_skill_tech_staff | 0.5045 | -0.0130 | 1.0 |
58 | ORGANIZATION_TYPE_School | 0.5045 | -0.0146 | 1.0 |
29 | NAME_HOUSING_TYPE_Rented_apartment | 0.5045 | 0.0196 | 1.0 |
52 | OCCUPATION_TYPE_Low_skill_Laborers | 0.5044 | 0.0297 | 1.0 |
49 | OCCUPATION_TYPE_Security_staff | 0.5039 | 0.0146 | 1.0 |
26 | FONDKAPREMONT_MODE_reg_oper_spec_account | 0.5038 | -0.0107 | 1.0 |
64 | ORGANIZATION_TYPE_Medicine | 0.5032 | -0.0092 | 1.0 |
25 | FONDKAPREMONT_MODE_org_spec_account | 0.5031 | -0.0126 | 1.0 |
116 | WALLSMATERIAL_MODE_Block | 0.5029 | -0.0094 | 1.0 |
46 | OCCUPATION_TYPE_Cooking_staff | 0.5029 | 0.0115 | 1.0 |
5 | is_REG_REGION_NOT_WORK_REGION | 0.5025 | 0.0063 | 1.0 |
37 | NAME_EDUCATION_TYPE_Lower_secondary | 0.5025 | 0.0123 | 1.0 |
59 | ORGANIZATION_TYPE_Government | 0.5024 | -0.0073 | 1.0 |
71 | ORGANIZATION_TYPE_Trade__type_7 | 0.5022 | 0.0077 | 1.0 |
48 | OCCUPATION_TYPE_Medicine_staff | 0.5020 | -0.0068 | 1.0 |
73 | ORGANIZATION_TYPE_Military | 0.5019 | -0.0110 | 1.0 |
93 | ORGANIZATION_TYPE_Industry__type_3 | 0.5018 | 0.0096 | 1.0 |
94 | ORGANIZATION_TYPE_Bank | 0.5018 | -0.0108 | 1.0 |
82 | ORGANIZATION_TYPE_Transport__type_3 | 0.5018 | 0.0156 | 1.0 |
83 | ORGANIZATION_TYPE_Police | 0.5017 | -0.0107 | 1.0 |
88 | ORGANIZATION_TYPE_Restaurant | 0.5017 | 0.0121 | 1.0 |
70 | ORGANIZATION_TYPE_Kindergarten | 0.5017 | -0.0063 | 1.0 |
79 | ORGANIZATION_TYPE_Security | 0.5016 | 0.0088 | 1.0 |
87 | ORGANIZATION_TYPE_Agriculture | 0.5016 | 0.0100 | 1.0 |
45 | OCCUPATION_TYPE_Cleaning_staff | 0.5016 | 0.0070 | 1.0 |
119 | WALLSMATERIAL_MODE_Wooden | 0.5016 | 0.0065 | 1.0 |
75 | ORGANIZATION_TYPE_Security_Ministries | 0.5014 | -0.0095 | 1.0 |
92 | ORGANIZATION_TYPE_Trade__type_3 | 0.5014 | 0.0071 | 1.0 |
65 | ORGANIZATION_TYPE_Business_Entity_Type_2 | 0.5013 | 0.0041 | 1.0 |
61 | ORGANIZATION_TYPE_Other | 0.5013 | -0.0032 | 1.0 |
4 | is_REG_REGION_NOT_LIVE_REGION | 0.5013 | 0.0057 | 1.0 |
36 | NAME_EDUCATION_TYPE_Incomplete_higher | 0.5012 | 0.0037 | 1.0 |
121 | WALLSMATERIAL_MODE_Monolithic | 0.5012 | -0.0084 | 1.0 |
76 | ORGANIZATION_TYPE_Transport__type_4 | 0.5012 | 0.0048 | 1.0 |
51 | OCCUPATION_TYPE_Waiters_barmen_staff | 0.5011 | 0.0089 | 1.0 |
6 | is_LIVE_REGION_NOT_WORK_REGION | 0.5010 | 0.0027 | 1.0 |
81 | ORGANIZATION_TYPE_University | 0.5009 | -0.0076 | 1.0 |
142 | is_EMERGENCYSTATE_MODE | 0.5008 | 0.0052 | 1.0 |
95 | ORGANIZATION_TYPE_Industry__type_9 | 0.5008 | -0.0043 | 1.0 |
32 | NAME_HOUSING_TYPE_Office_apartment | 0.5008 | -0.0048 | 1.0 |
27 | FONDKAPREMONT_MODE_not_specified | 0.5008 | -0.0031 | 1.0 |
47 | OCCUPATION_TYPE_Private_service_staff | 0.5008 | -0.0045 | 1.0 |
133 | NAME_TYPE_SUITE_Spouse,_partner | 0.5007 | -0.0019 | 1.0 |
31 | NAME_HOUSING_TYPE_Municipal_apartment | 0.5007 | 0.0019 | 1.0 |
13 | is_FLAG_DOCUMENT_9 | 0.5006 | -0.0054 | 1.0 |
130 | HOUSETYPE_MODE_specific_housing | 0.5006 | 0.0048 | 1.0 |
72 | ORGANIZATION_TYPE_Industry__type_11 | 0.5006 | 0.0036 | 1.0 |
3 | is_FLAG_EMAIL | 0.5006 | -0.0014 | 1.0 |
86 | ORGANIZATION_TYPE_Industry__type_4 | 0.5006 | 0.0060 | 1.0 |
74 | ORGANIZATION_TYPE_Services | 0.5005 | -0.0040 | 1.0 |
80 | ORGANIZATION_TYPE_Trade__type_2 | 0.5005 | -0.0035 | 1.0 |
77 | ORGANIZATION_TYPE_Industry__type_1 | 0.5005 | 0.0046 | 1.0 |
97 | ORGANIZATION_TYPE_Trade__type_6 | 0.5004 | -0.0053 | 1.0 |
63 | ORGANIZATION_TYPE_Electricity | 0.5004 | -0.0041 | 1.0 |
96 | ORGANIZATION_TYPE_Insurance | 0.5004 | -0.0049 | 1.0 |
134 | NAME_TYPE_SUITE_Children | 0.5004 | -0.0019 | 1.0 |
126 | NAME_FAMILY_STATUS_Separated | 0.5004 | -0.0008 | 1.0 |
129 | HOUSETYPE_MODE_terraced_house | 0.5003 | 0.0030 | 1.0 |
100 | ORGANIZATION_TYPE_Industry__type_12 | 0.5003 | -0.0054 | 1.0 |
67 | ORGANIZATION_TYPE_Transport__type_2 | 0.5003 | -0.0020 | 1.0 |
10 | is_FLAG_DOCUMENT_5 | 0.5003 | 0.0014 | 1.0 |
118 | WALLSMATERIAL_MODE_Mixed | 0.5003 | -0.0019 | 1.0 |
113 | ORGANIZATION_TYPE_Realtor | 0.5003 | 0.0044 | 1.0 |
38 | NAME_EDUCATION_TYPE_Academic_degree | 0.5003 | -0.0062 | 1.0 |
56 | OCCUPATION_TYPE_HR_staff | 0.5002 | -0.0032 | 1.0 |
89 | ORGANIZATION_TYPE_Culture | 0.5002 | -0.0040 | 1.0 |
90 | ORGANIZATION_TYPE_Hotel | 0.5002 | -0.0023 | 1.0 |
69 | ORGANIZATION_TYPE_Housing | 0.5002 | -0.0013 | 1.0 |
55 | OCCUPATION_TYPE_IT_staff | 0.5002 | -0.0029 | 1.0 |
135 | NAME_TYPE_SUITE_Other_A | 0.5002 | 0.0021 | 1.0 |
78 | ORGANIZATION_TYPE_Emergency | 0.5002 | -0.0025 | 1.0 |
108 | ORGANIZATION_TYPE_Cleaning | 0.5002 | 0.0036 | 1.0 |
91 | ORGANIZATION_TYPE_Industry__type_7 | 0.5002 | -0.0016 | 1.0 |
20 | NAME_INCOME_TYPE_Unemployed | 0.5002 | 0.0117 | 1.0 |
85 | ORGANIZATION_TYPE_Postal | 0.5002 | -0.0011 | 1.0 |
99 | ORGANIZATION_TYPE_Transport__type_1 | 0.5002 | -0.0036 | 1.0 |
84 | ORGANIZATION_TYPE_Business_Entity_Type_1 | 0.5002 | 0.0006 | 1.0 |
98 | ORGANIZATION_TYPE_Industry__type_2 | 0.5002 | -0.0022 | 1.0 |
60 | ORGANIZATION_TYPE_Religion | 0.5001 | -0.0047 | 1.0 |
102 | ORGANIZATION_TYPE_Trade__type_1 | 0.5001 | 0.0022 | 1.0 |
103 | ORGANIZATION_TYPE_Industry__type_5 | 0.5001 | -0.0017 | 1.0 |
136 | NAME_TYPE_SUITE_Other_B | 0.5001 | 0.0008 | 1.0 |
101 | ORGANIZATION_TYPE_Mobile | 0.5001 | 0.0019 | 1.0 |
106 | ORGANIZATION_TYPE_Advertising | 0.5001 | -0.0016 | 1.0 |
54 | OCCUPATION_TYPE_Secretaries | 0.5001 | -0.0008 | 1.0 |
33 | NAME_HOUSING_TYPE_Co_op_apartment | 0.5001 | -0.0007 | 1.0 |
114 | ORGANIZATION_TYPE_Industry__type_6 | 0.5001 | -0.0022 | 1.0 |
120 | WALLSMATERIAL_MODE_Others | 0.5001 | -0.0006 | 1.0 |
105 | ORGANIZATION_TYPE_Legal_Services | 0.5001 | -0.0009 | 1.0 |
104 | ORGANIZATION_TYPE_Industry__type_10 | 0.5000 | -0.0014 | 1.0 |
110 | ORGANIZATION_TYPE_Trade__type_4 | 0.5000 | -0.0019 | 1.0 |
112 | ORGANIZATION_TYPE_Industry__type_8 | 0.5000 | 0.0026 | 1.0 |
53 | OCCUPATION_TYPE_Realty_agents | 0.5000 | 0.0004 | 1.0 |
109 | ORGANIZATION_TYPE_Industry__type_13 | 0.5000 | 0.0009 | 1.0 |
23 | NAME_INCOME_TYPE_Maternity_leave | 0.5000 | 0.0027 | 1.0 |
137 | NAME_TYPE_SUITE_Group_of_people | 0.5000 | 0.0004 | 1.0 |
111 | ORGANIZATION_TYPE_Telecom | 0.5000 | 0.0003 | 1.0 |
22 | NAME_INCOME_TYPE_Businessman | 0.5000 | -0.0018 | 1.0 |
21 | NAME_INCOME_TYPE_Student | 0.5000 | -0.0018 | 1.0 |
107 | ORGANIZATION_TYPE_Trade__type_5 | 0.5000 | -0.0008 | 1.0 |
127 | NAME_FAMILY_STATUS_Unknown | 0.5000 | -0.0006 | 1.0 |
eval_agg01.query("auc <= 0.501").shape
(66, 4)
sel_feat = eval_agg01.query("auc > 0.501")["name"].tolist()
# for train
pdf01_baseline = pdf01_baseline[["SK_ID_CURR"] + sel_feat]
print(pdf01_baseline.shape)
# for test
pdf02_baseline = pdf02_baseline[["SK_ID_CURR"] + sel_feat]
print(pdf02_baseline.shape)
(307511, 78) (48744, 78)
# Due to add more columns for "Percent credit and income" so we cannot use info from meta data
# ls_continuous_name = pdf_meta[pdf_meta["sub_type"] == "float64"]["name"].tolist()
s_dtype = pdf_train.dtypes
ls_continuous_name = s_dtype[s_dtype == "float64"].index.tolist()
print(len(ls_continuous_name))
68
# for train feat
pdf11_baseline = pdf_train[["SK_ID_CURR"] + ls_continuous_name].copy()
# for test feat
pdf12_baseline = pdf_test[["SK_ID_CURR"] + ls_continuous_name].copy()
# print results
print(pdf11_baseline.shape, pdf12_baseline.shape)
display(pdf11_baseline.head().T)
((307511, 69), (48744, 69))
0 | 1 | 2 | 3 | 4 | |
---|---|---|---|---|---|
SK_ID_CURR | 100002.0000 | 1.0000e+05 | 100004.0000 | 100006.0000 | 100007.0000 |
AMT_INCOME_TOTAL | 202500.0000 | 2.7000e+05 | 67500.0000 | 135000.0000 | 121500.0000 |
AMT_CREDIT | 406597.5000 | 1.2935e+06 | 135000.0000 | 312682.5000 | 513000.0000 |
AMT_ANNUITY | 24700.5000 | 3.5698e+04 | 6750.0000 | 29686.5000 | 21865.5000 |
AMT_GOODS_PRICE | 351000.0000 | 1.1295e+06 | 135000.0000 | 297000.0000 | 513000.0000 |
REGION_POPULATION_RELATIVE | 0.0188 | 3.5410e-03 | 0.0100 | 0.0080 | 0.0287 |
DAYS_REGISTRATION | -3648.0000 | -1.1860e+03 | -4260.0000 | -9833.0000 | -4311.0000 |
OWN_CAR_AGE | 0.0000 | 0.0000e+00 | 26.0000 | 0.0000 | 0.0000 |
CNT_FAM_MEMBERS | 1.0000 | 2.0000e+00 | 1.0000 | 2.0000 | 1.0000 |
EXT_SOURCE_1 | 0.0830 | 3.1127e-01 | NaN | NaN | NaN |
EXT_SOURCE_2 | 0.2629 | 6.2225e-01 | 0.5559 | 0.6504 | 0.3227 |
EXT_SOURCE_3 | 0.1394 | NaN | 0.7296 | NaN | NaN |
APARTMENTS_AVG | 0.0247 | 9.5900e-02 | NaN | NaN | NaN |
BASEMENTAREA_AVG | 0.0369 | 5.2900e-02 | NaN | NaN | NaN |
YEARS_BEGINEXPLUATATION_AVG | 0.9722 | 9.8510e-01 | NaN | NaN | NaN |
YEARS_BUILD_AVG | 0.6192 | 7.9600e-01 | NaN | NaN | NaN |
COMMONAREA_AVG | 0.0143 | 6.0500e-02 | NaN | NaN | NaN |
ELEVATORS_AVG | 0.0000 | 8.0000e-02 | NaN | NaN | NaN |
ENTRANCES_AVG | 0.0690 | 3.4500e-02 | NaN | NaN | NaN |
FLOORSMAX_AVG | 0.0833 | 2.9170e-01 | NaN | NaN | NaN |
FLOORSMIN_AVG | 0.1250 | 3.3330e-01 | NaN | NaN | NaN |
LANDAREA_AVG | 0.0369 | 1.3000e-02 | NaN | NaN | NaN |
LIVINGAPARTMENTS_AVG | 0.0202 | 7.7300e-02 | NaN | NaN | NaN |
LIVINGAREA_AVG | 0.0190 | 5.4900e-02 | NaN | NaN | NaN |
NONLIVINGAPARTMENTS_AVG | 0.0000 | 3.9000e-03 | NaN | NaN | NaN |
NONLIVINGAREA_AVG | 0.0000 | 9.8000e-03 | NaN | NaN | NaN |
APARTMENTS_MODE | 0.0252 | 9.2400e-02 | NaN | NaN | NaN |
BASEMENTAREA_MODE | 0.0383 | 5.3800e-02 | NaN | NaN | NaN |
YEARS_BEGINEXPLUATATION_MODE | 0.9722 | 9.8510e-01 | NaN | NaN | NaN |
YEARS_BUILD_MODE | 0.6341 | 8.0400e-01 | NaN | NaN | NaN |
COMMONAREA_MODE | 0.0144 | 4.9700e-02 | NaN | NaN | NaN |
ELEVATORS_MODE | 0.0000 | 8.0600e-02 | NaN | NaN | NaN |
ENTRANCES_MODE | 0.0690 | 3.4500e-02 | NaN | NaN | NaN |
FLOORSMAX_MODE | 0.0833 | 2.9170e-01 | NaN | NaN | NaN |
FLOORSMIN_MODE | 0.1250 | 3.3330e-01 | NaN | NaN | NaN |
LANDAREA_MODE | 0.0377 | 1.2800e-02 | NaN | NaN | NaN |
LIVINGAPARTMENTS_MODE | 0.0220 | 7.9000e-02 | NaN | NaN | NaN |
LIVINGAREA_MODE | 0.0198 | 5.5400e-02 | NaN | NaN | NaN |
NONLIVINGAPARTMENTS_MODE | 0.0000 | 0.0000e+00 | NaN | NaN | NaN |
NONLIVINGAREA_MODE | 0.0000 | 0.0000e+00 | NaN | NaN | NaN |
APARTMENTS_MEDI | 0.0250 | 9.6800e-02 | NaN | NaN | NaN |
BASEMENTAREA_MEDI | 0.0369 | 5.2900e-02 | NaN | NaN | NaN |
YEARS_BEGINEXPLUATATION_MEDI | 0.9722 | 9.8510e-01 | NaN | NaN | NaN |
YEARS_BUILD_MEDI | 0.6243 | 7.9870e-01 | NaN | NaN | NaN |
COMMONAREA_MEDI | 0.0144 | 6.0800e-02 | NaN | NaN | NaN |
ELEVATORS_MEDI | 0.0000 | 8.0000e-02 | NaN | NaN | NaN |
ENTRANCES_MEDI | 0.0690 | 3.4500e-02 | NaN | NaN | NaN |
FLOORSMAX_MEDI | 0.0833 | 2.9170e-01 | NaN | NaN | NaN |
FLOORSMIN_MEDI | 0.1250 | 3.3330e-01 | NaN | NaN | NaN |
LANDAREA_MEDI | 0.0375 | 1.3200e-02 | NaN | NaN | NaN |
LIVINGAPARTMENTS_MEDI | 0.0205 | 7.8700e-02 | NaN | NaN | NaN |
LIVINGAREA_MEDI | 0.0193 | 5.5800e-02 | NaN | NaN | NaN |
NONLIVINGAPARTMENTS_MEDI | 0.0000 | 3.9000e-03 | NaN | NaN | NaN |
NONLIVINGAREA_MEDI | 0.0000 | 1.0000e-02 | NaN | NaN | NaN |
TOTALAREA_MODE | 0.0149 | 7.1400e-02 | NaN | NaN | NaN |
OBS_30_CNT_SOCIAL_CIRCLE | 2.0000 | 1.0000e+00 | 0.0000 | 2.0000 | 0.0000 |
DEF_30_CNT_SOCIAL_CIRCLE | 2.0000 | 0.0000e+00 | 0.0000 | 0.0000 | 0.0000 |
OBS_60_CNT_SOCIAL_CIRCLE | 2.0000 | 1.0000e+00 | 0.0000 | 2.0000 | 0.0000 |
DEF_60_CNT_SOCIAL_CIRCLE | 2.0000 | 0.0000e+00 | 0.0000 | 0.0000 | 0.0000 |
DAYS_LAST_PHONE_CHANGE | -1134.0000 | -8.2800e+02 | -815.0000 | -617.0000 | -1106.0000 |
AMT_REQ_CREDIT_BUREAU_HOUR | 0.0000 | 0.0000e+00 | 0.0000 | NaN | 0.0000 |
AMT_REQ_CREDIT_BUREAU_DAY | 0.0000 | 0.0000e+00 | 0.0000 | NaN | 0.0000 |
AMT_REQ_CREDIT_BUREAU_WEEK | 0.0000 | 0.0000e+00 | 0.0000 | NaN | 0.0000 |
AMT_REQ_CREDIT_BUREAU_MON | 0.0000 | 0.0000e+00 | 0.0000 | NaN | 0.0000 |
AMT_REQ_CREDIT_BUREAU_QRT | 0.0000 | 0.0000e+00 | 0.0000 | NaN | 0.0000 |
AMT_REQ_CREDIT_BUREAU_YEAR | 1.0000 | 0.0000e+00 | 0.0000 | NaN | 0.0000 |
CREDIT_INCOME_PERCENT | 2.0079 | 4.7908e+00 | 2.0000 | 2.3162 | 4.2222 |
ANNUITY_INCOME_PERCENT | 0.1220 | 1.3222e-01 | 0.1000 | 0.2199 | 0.1800 |
CREDIT_TERM | 0.0607 | 2.7598e-02 | 0.0500 | 0.0949 | 0.0426 |
eval_agg02 = feature_evaluate(pdf_train_filtered, pdf11_baseline)
display(eval_agg02)
name | auc | corr | coverage | |
---|---|---|---|---|
10 | EXT_SOURCE_3 | 0.6775 | -0.1774 | 0.8016 |
8 | EXT_SOURCE_1 | 0.6687 | -0.1589 | 0.4356 |
9 | EXT_SOURCE_2 | 0.6559 | -0.1602 | 0.9978 |
58 | DAYS_LAST_PHONE_CHANGE | 0.5558 | 0.0546 | 1.0000 |
18 | FLOORSMAX_AVG | 0.5502 | -0.0437 | 0.5013 |
46 | FLOORSMAX_MEDI | 0.5498 | -0.0434 | 0.5013 |
32 | FLOORSMAX_MODE | 0.5486 | -0.0425 | 0.5013 |
22 | LIVINGAREA_AVG | 0.5462 | -0.0315 | 0.4970 |
50 | LIVINGAREA_MEDI | 0.5456 | -0.0311 | 0.4970 |
5 | DAYS_REGISTRATION | 0.5450 | 0.0442 | 1.0000 |
53 | TOTALAREA_MODE | 0.5440 | -0.0312 | 0.5162 |
36 | LIVINGAREA_MODE | 0.5433 | -0.0290 | 0.4970 |
16 | ELEVATORS_AVG | 0.5385 | -0.0325 | 0.4663 |
11 | APARTMENTS_AVG | 0.5380 | -0.0276 | 0.4915 |
44 | ELEVATORS_MEDI | 0.5379 | -0.0322 | 0.4663 |
39 | APARTMENTS_MEDI | 0.5372 | -0.0269 | 0.4915 |
30 | ELEVATORS_MODE | 0.5356 | -0.0302 | 0.4663 |
19 | FLOORSMIN_AVG | 0.5355 | -0.0332 | 0.3214 |
25 | APARTMENTS_MODE | 0.5354 | -0.0249 | 0.4915 |
3 | AMT_GOODS_PRICE | 0.5353 | -0.0409 | 0.9991 |
47 | FLOORSMIN_MEDI | 0.5351 | -0.0329 | 0.3214 |
33 | FLOORSMIN_MODE | 0.5340 | -0.0323 | 0.3214 |
21 | LIVINGAPARTMENTS_AVG | 0.5337 | -0.0224 | 0.3167 |
49 | LIVINGAPARTMENTS_MEDI | 0.5331 | -0.0217 | 0.3167 |
4 | REGION_POPULATION_RELATIVE | 0.5326 | -0.0375 | 1.0000 |
67 | CREDIT_TERM | 0.5322 | 0.0128 | 1.0000 |
35 | LIVINGAPARTMENTS_MODE | 0.5313 | -0.0203 | 0.3167 |
41 | YEARS_BEGINEXPLUATATION_MEDI | 0.5309 | -0.0097 | 0.5112 |
13 | YEARS_BEGINEXPLUATATION_AVG | 0.5309 | -0.0093 | 0.5112 |
27 | YEARS_BEGINEXPLUATATION_MODE | 0.5304 | -0.0087 | 0.5112 |
14 | YEARS_BUILD_AVG | 0.5274 | -0.0233 | 0.3350 |
42 | YEARS_BUILD_MEDI | 0.5274 | -0.0233 | 0.3350 |
28 | YEARS_BUILD_MODE | 0.5266 | -0.0227 | 0.3350 |
12 | BASEMENTAREA_AVG | 0.5251 | -0.0215 | 0.4142 |
17 | ENTRANCES_AVG | 0.5244 | -0.0177 | 0.4957 |
40 | BASEMENTAREA_MEDI | 0.5243 | -0.0207 | 0.4142 |
45 | ENTRANCES_MEDI | 0.5243 | -0.0175 | 0.4957 |
31 | ENTRANCES_MODE | 0.5208 | -0.0154 | 0.4957 |
15 | COMMONAREA_AVG | 0.5206 | -0.0155 | 0.3016 |
1 | AMT_CREDIT | 0.5206 | -0.0318 | 1.0000 |
64 | AMT_REQ_CREDIT_BUREAU_YEAR | 0.5205 | 0.0197 | 0.8647 |
43 | COMMONAREA_MEDI | 0.5203 | -0.0155 | 0.3016 |
26 | BASEMENTAREA_MODE | 0.5202 | -0.0189 | 0.4142 |
55 | DEF_30_CNT_SOCIAL_CIRCLE | 0.5197 | 0.0341 | 0.9967 |
0 | AMT_INCOME_TOTAL | 0.5192 | -0.0015 | 1.0000 |
29 | COMMONAREA_MODE | 0.5181 | -0.0136 | 0.3016 |
20 | LANDAREA_AVG | 0.5173 | -0.0112 | 0.4059 |
48 | LANDAREA_MEDI | 0.5171 | -0.0120 | 0.4059 |
57 | DEF_60_CNT_SOCIAL_CIRCLE | 0.5170 | 0.0335 | 0.9967 |
66 | ANNUITY_INCOME_PERCENT | 0.5168 | 0.0123 | 1.0000 |
34 | LANDAREA_MODE | 0.5151 | -0.0111 | 0.4059 |
24 | NONLIVINGAREA_AVG | 0.5150 | -0.0109 | 0.4478 |
52 | NONLIVINGAREA_MEDI | 0.5144 | -0.0105 | 0.4478 |
38 | NONLIVINGAREA_MODE | 0.5137 | -0.0103 | 0.4478 |
6 | OWN_CAR_AGE | 0.5125 | 0.0029 | 1.0000 |
54 | OBS_30_CNT_SOCIAL_CIRCLE | 0.5086 | 0.0092 | 0.9967 |
56 | OBS_60_CNT_SOCIAL_CIRCLE | 0.5083 | 0.0089 | 0.9967 |
7 | CNT_FAM_MEMBERS | 0.5081 | 0.0109 | 1.0000 |
23 | NONLIVINGAPARTMENTS_AVG | 0.5061 | -0.0057 | 0.3058 |
62 | AMT_REQ_CREDIT_BUREAU_MON | 0.5060 | -0.0138 | 0.8647 |
51 | NONLIVINGAPARTMENTS_MEDI | 0.5050 | -0.0053 | 0.3058 |
63 | AMT_REQ_CREDIT_BUREAU_QRT | 0.5050 | -0.0014 | 0.8647 |
65 | CREDIT_INCOME_PERCENT | 0.5043 | -0.0094 | 1.0000 |
37 | NONLIVINGAPARTMENTS_MODE | 0.5040 | -0.0042 | 0.3058 |
2 | AMT_ANNUITY | 0.5025 | -0.0142 | 1.0000 |
60 | AMT_REQ_CREDIT_BUREAU_DAY | 0.5005 | 0.0016 | 0.8647 |
61 | AMT_REQ_CREDIT_BUREAU_WEEK | 0.5003 | 0.0009 | 0.8647 |
59 | AMT_REQ_CREDIT_BUREAU_HOUR | 0.5002 | 0.0016 | 0.8647 |
eval_agg02.query("auc <= 0.501").shape
(3, 4)
# The numbers in the DAYS_BIRTH column are negative because they are recorded relative to the current loan application.
# To see these stats in years, we can mutliple by -1 and divide by the number of days in a year:
pdf11_baseline["YEARS_BIRTH"] = pdf_train["DAYS_BIRTH"] / -365
pdf12_baseline["YEARS_BIRTH"] = pdf_test["DAYS_BIRTH"] / -365
# similarly
pdf11_baseline["REGISTRATION_YEAR"] = pdf_train["DAYS_REGISTRATION"] / -365
pdf12_baseline["REGISTRATION_YEAR"] = pdf_test["DAYS_REGISTRATION"] / -365
# similarly
pdf11_baseline["ID_PUBLISH_YEAR"] = pdf_train["DAYS_ID_PUBLISH"] / -365
pdf12_baseline["ID_PUBLISH_YEAR"] = pdf_test["DAYS_ID_PUBLISH"] / -365
# similarly
pdf11_baseline["LAST_PHONE_CHANGE_YEAR"] = pdf_train["DAYS_LAST_PHONE_CHANGE"] / -365
pdf12_baseline["LAST_PHONE_CHANGE_YEAR"] = pdf_test["DAYS_LAST_PHONE_CHANGE"] / -365
pdf_train["DAYS_EMPLOYED"].describe()
count 307511.0000 mean 63815.0459 std 141275.7665 min -17912.0000 25% -2760.0000 50% -1213.0000 75% -289.0000 max 365243.0000 Name: DAYS_EMPLOYED, dtype: float64
pdf_train["DAYS_EMPLOYED"].hist()
plt.show()
# check anomaly
anom = pdf_train[pdf_train["DAYS_EMPLOYED"] == 365243]
non_anom = pdf_train[pdf_train["DAYS_EMPLOYED"] != 365243]
print("Tỉ lệ phần trăm TARGET của non-anomalies: {}".format(100 * non_anom["TARGET"].mean()))
print("Tỉ lệ phần trăm TARGET của anomalies: {}".format(100 * anom["TARGET"].mean()))
print("Số lượng anomalies là {}".format(len(anom)))
Tỉ lệ phần trăm TARGET của non-anomalies: 8.65997453765 Tỉ lệ phần trăm TARGET của anomalies: 5.39964604327 Số lượng anomalies là 55374
def handling_days_employed(pdf_input, pdf_output):
# Create an anomalous flag column
pdf_output["DAYS_EMPLOYED_ANOM"] = pdf_input["DAYS_EMPLOYED"] == 365243
# Replace the anomalous values with nan
pdf_output["DAYS_EMPLOYED"] = pdf_input["DAYS_EMPLOYED"].replace({365243: np.nan})
# Calculate years employed
pdf_output["YEARS_EMPLOYED"] = pdf_output["DAYS_EMPLOYED"] / -365
# percent employed over years of birth
pdf_output['YEARS_EMPLOYED_PERCENT'] = pdf_output['YEARS_EMPLOYED'] / pdf_output['YEARS_BIRTH']
return pdf_output
pdf11_baseline = handling_days_employed(pdf_train, pdf11_baseline)
pdf12_baseline = handling_days_employed(pdf_test, pdf12_baseline)
%%time
def store_features(pdf_train, pdf_test, fname):
print(pdf_train.shape, pdf_test.shape)
fname = os.path.join("features", "{}.pkl.bz2".format(fname))
pdf_out = pd.concat([pdf_train, pdf_test]).reset_index(drop=True)
pdf_out.to_pickle(fname, compression="bz2")
print("Store features completed!")
store_features(pdf01_baseline, pdf02_baseline, "baseline")
store_features(pdf11_baseline, pdf12_baseline, "baseline_extend")
((307511, 78), (48744, 78)) Store features completed! ((307511, 77), (48744, 77)) Store features completed! CPU times: user 30.7 s, sys: 1.44 s, total: 32.2 s Wall time: 26.2 s