import os, sys, glob, datetime
# specify spark version, python version
spark_home = "/home/zero/spark-2.4.0-bin-hadoop2.7" # MODIFY THIS
python_path="/apps/anaconda3/bin/python"
# set environment variables
os.environ['SPARK_HOME'] = spark_home
os.environ['PYSPARK_PYTHON'] = python_path
os.environ['SPARK_LOCAL_IP'] = "127.0.0.1"
def setup_spark_env(app_name):
# set environment variables
spark_python = os.path.join(spark_home, 'python')
py4j = glob.glob(os.path.join(spark_python, 'lib', 'py4j-*.zip'))[0]
sys.path[:0] = [spark_python, py4j]
# specify Spark application parameters
PYSPARK_SUBMIT_ARGS="--master local[2]"
os.environ['PYSPARK_SUBMIT_ARGS'] = (PYSPARK_SUBMIT_ARGS
+ " --name '%s_%s'"%(app_name, datetime.datetime.now().strftime("%Y%m%d %H:%M"))
+ " pyspark-shell")
return
#
setup_spark_env("your_spark_process_name") # MODIFY THIS
# launching PySpark application
# execfile(os.path.join(spark_home, 'python/pyspark/shell.py'))
filename=os.path.join(spark_home, 'python/pyspark/shell.py')
exec(compile(open(filename, "rb").read(), filename, 'exec'))
sc.setLogLevel('ERROR')
print("{}".format(sc.applicationId))
Welcome to ____ __ / __/__ ___ _____/ /__ _\ \/ _ \/ _ `/ __/ '_/ /__ / .__/\_,_/_/ /_/\_\ version 2.4.0 /_/ Using Python version 3.6.4 (default, Jan 16 2018 18:10:19) SparkSession available as 'spark'. local-1557024762906
from pyspark.sql import functions as sf
from pyspark.sql import Row
from pyspark.sql.types import *
import numpy as np
import os, math, subprocess
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# some settings for displaying Pandas results
pd.set_option('display.width', 2000)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.precision', 4)
pd.set_option('display.max_colwidth', -1)
# load data
data_path = "home-credit-default-risk/application_train.csv"
df = sqlContext.read.format("csv").option("header", "true").load(data_path)
print(df.take(1))
[Row(SK_ID_CURR='100002', TARGET='1', NAME_CONTRACT_TYPE='Cash loans', CODE_GENDER='M', FLAG_OWN_CAR='N', FLAG_OWN_REALTY='Y', CNT_CHILDREN='0', AMT_INCOME_TOTAL='202500.0', AMT_CREDIT='406597.5', AMT_ANNUITY='24700.5', AMT_GOODS_PRICE='351000.0', NAME_TYPE_SUITE='Unaccompanied', NAME_INCOME_TYPE='Working', NAME_EDUCATION_TYPE='Secondary / secondary special', NAME_FAMILY_STATUS='Single / not married', NAME_HOUSING_TYPE='House / apartment', REGION_POPULATION_RELATIVE='0.018801', DAYS_BIRTH='-9461', DAYS_EMPLOYED='-637', DAYS_REGISTRATION='-3648.0', DAYS_ID_PUBLISH='-2120', OWN_CAR_AGE=None, FLAG_MOBIL='1', FLAG_EMP_PHONE='1', FLAG_WORK_PHONE='0', FLAG_CONT_MOBILE='1', FLAG_PHONE='1', FLAG_EMAIL='0', OCCUPATION_TYPE='Laborers', CNT_FAM_MEMBERS='1.0', REGION_RATING_CLIENT='2', REGION_RATING_CLIENT_W_CITY='2', WEEKDAY_APPR_PROCESS_START='WEDNESDAY', HOUR_APPR_PROCESS_START='10', REG_REGION_NOT_LIVE_REGION='0', REG_REGION_NOT_WORK_REGION='0', LIVE_REGION_NOT_WORK_REGION='0', REG_CITY_NOT_LIVE_CITY='0', REG_CITY_NOT_WORK_CITY='0', LIVE_CITY_NOT_WORK_CITY='0', ORGANIZATION_TYPE='Business Entity Type 3', EXT_SOURCE_1='0.08303696739132256', EXT_SOURCE_2='0.2629485927471776', EXT_SOURCE_3='0.13937578009978951', APARTMENTS_AVG='0.0247', BASEMENTAREA_AVG='0.0369', YEARS_BEGINEXPLUATATION_AVG='0.9722', YEARS_BUILD_AVG='0.6192', COMMONAREA_AVG='0.0143', ELEVATORS_AVG='0.0', ENTRANCES_AVG='0.069', FLOORSMAX_AVG='0.0833', FLOORSMIN_AVG='0.125', LANDAREA_AVG='0.0369', LIVINGAPARTMENTS_AVG='0.0202', LIVINGAREA_AVG='0.019', NONLIVINGAPARTMENTS_AVG='0.0', NONLIVINGAREA_AVG='0.0', APARTMENTS_MODE='0.0252', BASEMENTAREA_MODE='0.0383', YEARS_BEGINEXPLUATATION_MODE='0.9722', YEARS_BUILD_MODE='0.6341', COMMONAREA_MODE='0.0144', ELEVATORS_MODE='0.0', ENTRANCES_MODE='0.069', FLOORSMAX_MODE='0.0833', FLOORSMIN_MODE='0.125', LANDAREA_MODE='0.0377', LIVINGAPARTMENTS_MODE='0.022', LIVINGAREA_MODE='0.0198', NONLIVINGAPARTMENTS_MODE='0.0', NONLIVINGAREA_MODE='0.0', APARTMENTS_MEDI='0.025', BASEMENTAREA_MEDI='0.0369', YEARS_BEGINEXPLUATATION_MEDI='0.9722', YEARS_BUILD_MEDI='0.6243', COMMONAREA_MEDI='0.0144', ELEVATORS_MEDI='0.0', ENTRANCES_MEDI='0.069', FLOORSMAX_MEDI='0.0833', FLOORSMIN_MEDI='0.125', LANDAREA_MEDI='0.0375', LIVINGAPARTMENTS_MEDI='0.0205', LIVINGAREA_MEDI='0.0193', NONLIVINGAPARTMENTS_MEDI='0.0', NONLIVINGAREA_MEDI='0.0', FONDKAPREMONT_MODE='reg oper account', HOUSETYPE_MODE='block of flats', TOTALAREA_MODE='0.0149', WALLSMATERIAL_MODE='Stone, brick', EMERGENCYSTATE_MODE='No', OBS_30_CNT_SOCIAL_CIRCLE='2.0', DEF_30_CNT_SOCIAL_CIRCLE='2.0', OBS_60_CNT_SOCIAL_CIRCLE='2.0', DEF_60_CNT_SOCIAL_CIRCLE='2.0', DAYS_LAST_PHONE_CHANGE='-1134.0', FLAG_DOCUMENT_2='0', FLAG_DOCUMENT_3='1', FLAG_DOCUMENT_4='0', FLAG_DOCUMENT_5='0', FLAG_DOCUMENT_6='0', FLAG_DOCUMENT_7='0', FLAG_DOCUMENT_8='0', FLAG_DOCUMENT_9='0', FLAG_DOCUMENT_10='0', FLAG_DOCUMENT_11='0', FLAG_DOCUMENT_12='0', FLAG_DOCUMENT_13='0', FLAG_DOCUMENT_14='0', FLAG_DOCUMENT_15='0', FLAG_DOCUMENT_16='0', FLAG_DOCUMENT_17='0', FLAG_DOCUMENT_18='0', FLAG_DOCUMENT_19='0', FLAG_DOCUMENT_20='0', FLAG_DOCUMENT_21='0', AMT_REQ_CREDIT_BUREAU_HOUR='0.0', AMT_REQ_CREDIT_BUREAU_DAY='0.0', AMT_REQ_CREDIT_BUREAU_WEEK='0.0', AMT_REQ_CREDIT_BUREAU_MON='0.0', AMT_REQ_CREDIT_BUREAU_QRT='0.0', AMT_REQ_CREDIT_BUREAU_YEAR='1.0')]
total_records = df.count()
print("total_records:", total_records)
total_records: 307511
# check dtypes
for n, t in df.dtypes:
print("{} ({})".format(n, t))
SK_ID_CURR (string) TARGET (string) NAME_CONTRACT_TYPE (string) CODE_GENDER (string) FLAG_OWN_CAR (string) FLAG_OWN_REALTY (string) CNT_CHILDREN (string) AMT_INCOME_TOTAL (string) AMT_CREDIT (string) AMT_ANNUITY (string) AMT_GOODS_PRICE (string) NAME_TYPE_SUITE (string) NAME_INCOME_TYPE (string) NAME_EDUCATION_TYPE (string) NAME_FAMILY_STATUS (string) NAME_HOUSING_TYPE (string) REGION_POPULATION_RELATIVE (string) DAYS_BIRTH (string) DAYS_EMPLOYED (string) DAYS_REGISTRATION (string) DAYS_ID_PUBLISH (string) OWN_CAR_AGE (string) FLAG_MOBIL (string) FLAG_EMP_PHONE (string) FLAG_WORK_PHONE (string) FLAG_CONT_MOBILE (string) FLAG_PHONE (string) FLAG_EMAIL (string) OCCUPATION_TYPE (string) CNT_FAM_MEMBERS (string) REGION_RATING_CLIENT (string) REGION_RATING_CLIENT_W_CITY (string) WEEKDAY_APPR_PROCESS_START (string) HOUR_APPR_PROCESS_START (string) REG_REGION_NOT_LIVE_REGION (string) REG_REGION_NOT_WORK_REGION (string) LIVE_REGION_NOT_WORK_REGION (string) REG_CITY_NOT_LIVE_CITY (string) REG_CITY_NOT_WORK_CITY (string) LIVE_CITY_NOT_WORK_CITY (string) ORGANIZATION_TYPE (string) EXT_SOURCE_1 (string) EXT_SOURCE_2 (string) EXT_SOURCE_3 (string) APARTMENTS_AVG (string) BASEMENTAREA_AVG (string) YEARS_BEGINEXPLUATATION_AVG (string) YEARS_BUILD_AVG (string) COMMONAREA_AVG (string) ELEVATORS_AVG (string) ENTRANCES_AVG (string) FLOORSMAX_AVG (string) FLOORSMIN_AVG (string) LANDAREA_AVG (string) LIVINGAPARTMENTS_AVG (string) LIVINGAREA_AVG (string) NONLIVINGAPARTMENTS_AVG (string) NONLIVINGAREA_AVG (string) APARTMENTS_MODE (string) BASEMENTAREA_MODE (string) YEARS_BEGINEXPLUATATION_MODE (string) YEARS_BUILD_MODE (string) COMMONAREA_MODE (string) ELEVATORS_MODE (string) ENTRANCES_MODE (string) FLOORSMAX_MODE (string) FLOORSMIN_MODE (string) LANDAREA_MODE (string) LIVINGAPARTMENTS_MODE (string) LIVINGAREA_MODE (string) NONLIVINGAPARTMENTS_MODE (string) NONLIVINGAREA_MODE (string) APARTMENTS_MEDI (string) BASEMENTAREA_MEDI (string) YEARS_BEGINEXPLUATATION_MEDI (string) YEARS_BUILD_MEDI (string) COMMONAREA_MEDI (string) ELEVATORS_MEDI (string) ENTRANCES_MEDI (string) FLOORSMAX_MEDI (string) FLOORSMIN_MEDI (string) LANDAREA_MEDI (string) LIVINGAPARTMENTS_MEDI (string) LIVINGAREA_MEDI (string) NONLIVINGAPARTMENTS_MEDI (string) NONLIVINGAREA_MEDI (string) FONDKAPREMONT_MODE (string) HOUSETYPE_MODE (string) TOTALAREA_MODE (string) WALLSMATERIAL_MODE (string) EMERGENCYSTATE_MODE (string) OBS_30_CNT_SOCIAL_CIRCLE (string) DEF_30_CNT_SOCIAL_CIRCLE (string) OBS_60_CNT_SOCIAL_CIRCLE (string) DEF_60_CNT_SOCIAL_CIRCLE (string) DAYS_LAST_PHONE_CHANGE (string) FLAG_DOCUMENT_2 (string) FLAG_DOCUMENT_3 (string) FLAG_DOCUMENT_4 (string) FLAG_DOCUMENT_5 (string) FLAG_DOCUMENT_6 (string) FLAG_DOCUMENT_7 (string) FLAG_DOCUMENT_8 (string) FLAG_DOCUMENT_9 (string) FLAG_DOCUMENT_10 (string) FLAG_DOCUMENT_11 (string) FLAG_DOCUMENT_12 (string) FLAG_DOCUMENT_13 (string) FLAG_DOCUMENT_14 (string) FLAG_DOCUMENT_15 (string) FLAG_DOCUMENT_16 (string) FLAG_DOCUMENT_17 (string) FLAG_DOCUMENT_18 (string) FLAG_DOCUMENT_19 (string) FLAG_DOCUMENT_20 (string) FLAG_DOCUMENT_21 (string) AMT_REQ_CREDIT_BUREAU_HOUR (string) AMT_REQ_CREDIT_BUREAU_DAY (string) AMT_REQ_CREDIT_BUREAU_WEEK (string) AMT_REQ_CREDIT_BUREAU_MON (string) AMT_REQ_CREDIT_BUREAU_QRT (string) AMT_REQ_CREDIT_BUREAU_YEAR (string)
# count distinct
for cname in df.columns:
cnt_dist = df.select(cname).distinct().count()
pct_dist = cnt_dist * 100.0 / total_records
print("{}: {} ({:0.2f}%)".format(cname, cnt_dist, pct_dist))
SK_ID_CURR: 307511 (100.00%) TARGET: 2 (0.00%) NAME_CONTRACT_TYPE: 2 (0.00%) CODE_GENDER: 3 (0.00%) FLAG_OWN_CAR: 2 (0.00%) FLAG_OWN_REALTY: 2 (0.00%) CNT_CHILDREN: 15 (0.00%) AMT_INCOME_TOTAL: 2548 (0.83%) AMT_CREDIT: 5603 (1.82%) AMT_ANNUITY: 13673 (4.45%) AMT_GOODS_PRICE: 1003 (0.33%) NAME_TYPE_SUITE: 8 (0.00%) NAME_INCOME_TYPE: 8 (0.00%) NAME_EDUCATION_TYPE: 5 (0.00%) NAME_FAMILY_STATUS: 6 (0.00%) NAME_HOUSING_TYPE: 6 (0.00%) REGION_POPULATION_RELATIVE: 81 (0.03%) DAYS_BIRTH: 17460 (5.68%) DAYS_EMPLOYED: 12574 (4.09%) DAYS_REGISTRATION: 15688 (5.10%) DAYS_ID_PUBLISH: 6168 (2.01%) OWN_CAR_AGE: 63 (0.02%) FLAG_MOBIL: 2 (0.00%) FLAG_EMP_PHONE: 2 (0.00%) FLAG_WORK_PHONE: 2 (0.00%) FLAG_CONT_MOBILE: 2 (0.00%) FLAG_PHONE: 2 (0.00%) FLAG_EMAIL: 2 (0.00%) OCCUPATION_TYPE: 19 (0.01%) CNT_FAM_MEMBERS: 18 (0.01%) REGION_RATING_CLIENT: 3 (0.00%) REGION_RATING_CLIENT_W_CITY: 3 (0.00%) WEEKDAY_APPR_PROCESS_START: 7 (0.00%) HOUR_APPR_PROCESS_START: 24 (0.01%) REG_REGION_NOT_LIVE_REGION: 2 (0.00%) REG_REGION_NOT_WORK_REGION: 2 (0.00%) LIVE_REGION_NOT_WORK_REGION: 2 (0.00%) REG_CITY_NOT_LIVE_CITY: 2 (0.00%) REG_CITY_NOT_WORK_CITY: 2 (0.00%) LIVE_CITY_NOT_WORK_CITY: 2 (0.00%) ORGANIZATION_TYPE: 58 (0.02%) EXT_SOURCE_1: 114585 (37.26%) EXT_SOURCE_2: 119832 (38.97%) EXT_SOURCE_3: 815 (0.27%) APARTMENTS_AVG: 2340 (0.76%) BASEMENTAREA_AVG: 3781 (1.23%) YEARS_BEGINEXPLUATATION_AVG: 286 (0.09%) YEARS_BUILD_AVG: 150 (0.05%) COMMONAREA_AVG: 3182 (1.03%) ELEVATORS_AVG: 258 (0.08%) ENTRANCES_AVG: 286 (0.09%) FLOORSMAX_AVG: 404 (0.13%) FLOORSMIN_AVG: 306 (0.10%) LANDAREA_AVG: 3528 (1.15%) LIVINGAPARTMENTS_AVG: 1869 (0.61%) LIVINGAREA_AVG: 5200 (1.69%) NONLIVINGAPARTMENTS_AVG: 387 (0.13%) NONLIVINGAREA_AVG: 3291 (1.07%) APARTMENTS_MODE: 761 (0.25%) BASEMENTAREA_MODE: 3842 (1.25%) YEARS_BEGINEXPLUATATION_MODE: 222 (0.07%) YEARS_BUILD_MODE: 155 (0.05%) COMMONAREA_MODE: 3129 (1.02%) ELEVATORS_MODE: 27 (0.01%) ENTRANCES_MODE: 31 (0.01%) FLOORSMAX_MODE: 26 (0.01%) FLOORSMIN_MODE: 26 (0.01%) LANDAREA_MODE: 3564 (1.16%) LIVINGAPARTMENTS_MODE: 737 (0.24%) LIVINGAREA_MODE: 5302 (1.72%) NONLIVINGAPARTMENTS_MODE: 168 (0.05%) NONLIVINGAREA_MODE: 3328 (1.08%) APARTMENTS_MEDI: 1149 (0.37%) BASEMENTAREA_MEDI: 3773 (1.23%) YEARS_BEGINEXPLUATATION_MEDI: 246 (0.08%) YEARS_BUILD_MEDI: 152 (0.05%) COMMONAREA_MEDI: 3203 (1.04%) ELEVATORS_MEDI: 47 (0.02%) ENTRANCES_MEDI: 47 (0.02%) FLOORSMAX_MEDI: 50 (0.02%) FLOORSMIN_MEDI: 48 (0.02%) LANDAREA_MEDI: 3561 (1.16%) LIVINGAPARTMENTS_MEDI: 1098 (0.36%) LIVINGAREA_MEDI: 5282 (1.72%) NONLIVINGAPARTMENTS_MEDI: 215 (0.07%) NONLIVINGAREA_MEDI: 3324 (1.08%) FONDKAPREMONT_MODE: 5 (0.00%) HOUSETYPE_MODE: 4 (0.00%) TOTALAREA_MODE: 5117 (1.66%) WALLSMATERIAL_MODE: 8 (0.00%) EMERGENCYSTATE_MODE: 3 (0.00%) OBS_30_CNT_SOCIAL_CIRCLE: 34 (0.01%) DEF_30_CNT_SOCIAL_CIRCLE: 11 (0.00%) OBS_60_CNT_SOCIAL_CIRCLE: 34 (0.01%) DEF_60_CNT_SOCIAL_CIRCLE: 10 (0.00%) DAYS_LAST_PHONE_CHANGE: 3774 (1.23%) FLAG_DOCUMENT_2: 2 (0.00%) FLAG_DOCUMENT_3: 2 (0.00%) FLAG_DOCUMENT_4: 2 (0.00%) FLAG_DOCUMENT_5: 2 (0.00%) FLAG_DOCUMENT_6: 2 (0.00%) FLAG_DOCUMENT_7: 2 (0.00%) FLAG_DOCUMENT_8: 2 (0.00%) FLAG_DOCUMENT_9: 2 (0.00%) FLAG_DOCUMENT_10: 2 (0.00%) FLAG_DOCUMENT_11: 2 (0.00%) FLAG_DOCUMENT_12: 2 (0.00%) FLAG_DOCUMENT_13: 2 (0.00%) FLAG_DOCUMENT_14: 2 (0.00%) FLAG_DOCUMENT_15: 2 (0.00%) FLAG_DOCUMENT_16: 2 (0.00%) FLAG_DOCUMENT_17: 2 (0.00%) FLAG_DOCUMENT_18: 2 (0.00%) FLAG_DOCUMENT_19: 2 (0.00%) FLAG_DOCUMENT_20: 2 (0.00%) FLAG_DOCUMENT_21: 2 (0.00%) AMT_REQ_CREDIT_BUREAU_HOUR: 6 (0.00%) AMT_REQ_CREDIT_BUREAU_DAY: 10 (0.00%) AMT_REQ_CREDIT_BUREAU_WEEK: 10 (0.00%) AMT_REQ_CREDIT_BUREAU_MON: 25 (0.01%) AMT_REQ_CREDIT_BUREAU_QRT: 12 (0.00%) AMT_REQ_CREDIT_BUREAU_YEAR: 26 (0.01%)
# count NULL
for cname in df.columns:
cnt_null = df.where("{} is NULL".format(cname)).count()
pct_miss = cnt_null * 100.0 / total_records
print("{}: {} ({:0.2f}%)".format(cname, cnt_null, pct_miss))
SK_ID_CURR: 0 (0.00%) TARGET: 0 (0.00%) NAME_CONTRACT_TYPE: 0 (0.00%) CODE_GENDER: 0 (0.00%) FLAG_OWN_CAR: 0 (0.00%) FLAG_OWN_REALTY: 0 (0.00%) CNT_CHILDREN: 0 (0.00%) AMT_INCOME_TOTAL: 0 (0.00%) AMT_CREDIT: 0 (0.00%) AMT_ANNUITY: 12 (0.00%) AMT_GOODS_PRICE: 278 (0.09%) NAME_TYPE_SUITE: 1292 (0.42%) NAME_INCOME_TYPE: 0 (0.00%) NAME_EDUCATION_TYPE: 0 (0.00%) NAME_FAMILY_STATUS: 0 (0.00%) NAME_HOUSING_TYPE: 0 (0.00%) REGION_POPULATION_RELATIVE: 0 (0.00%) DAYS_BIRTH: 0 (0.00%) DAYS_EMPLOYED: 0 (0.00%) DAYS_REGISTRATION: 0 (0.00%) DAYS_ID_PUBLISH: 0 (0.00%) OWN_CAR_AGE: 202929 (65.99%) FLAG_MOBIL: 0 (0.00%) FLAG_EMP_PHONE: 0 (0.00%) FLAG_WORK_PHONE: 0 (0.00%) FLAG_CONT_MOBILE: 0 (0.00%) FLAG_PHONE: 0 (0.00%) FLAG_EMAIL: 0 (0.00%) OCCUPATION_TYPE: 96391 (31.35%) CNT_FAM_MEMBERS: 2 (0.00%) REGION_RATING_CLIENT: 0 (0.00%) REGION_RATING_CLIENT_W_CITY: 0 (0.00%) WEEKDAY_APPR_PROCESS_START: 0 (0.00%) HOUR_APPR_PROCESS_START: 0 (0.00%) REG_REGION_NOT_LIVE_REGION: 0 (0.00%) REG_REGION_NOT_WORK_REGION: 0 (0.00%) LIVE_REGION_NOT_WORK_REGION: 0 (0.00%) REG_CITY_NOT_LIVE_CITY: 0 (0.00%) REG_CITY_NOT_WORK_CITY: 0 (0.00%) LIVE_CITY_NOT_WORK_CITY: 0 (0.00%) ORGANIZATION_TYPE: 0 (0.00%) EXT_SOURCE_1: 173378 (56.38%) EXT_SOURCE_2: 660 (0.21%) EXT_SOURCE_3: 60965 (19.83%) APARTMENTS_AVG: 156061 (50.75%) BASEMENTAREA_AVG: 179943 (58.52%) YEARS_BEGINEXPLUATATION_AVG: 150007 (48.78%) YEARS_BUILD_AVG: 204488 (66.50%) COMMONAREA_AVG: 214865 (69.87%) ELEVATORS_AVG: 163891 (53.30%) ENTRANCES_AVG: 154828 (50.35%) FLOORSMAX_AVG: 153020 (49.76%) FLOORSMIN_AVG: 208642 (67.85%) LANDAREA_AVG: 182590 (59.38%) LIVINGAPARTMENTS_AVG: 210199 (68.35%) LIVINGAREA_AVG: 154350 (50.19%) NONLIVINGAPARTMENTS_AVG: 213514 (69.43%) NONLIVINGAREA_AVG: 169682 (55.18%) APARTMENTS_MODE: 156061 (50.75%) BASEMENTAREA_MODE: 179943 (58.52%) YEARS_BEGINEXPLUATATION_MODE: 150007 (48.78%) YEARS_BUILD_MODE: 204488 (66.50%) COMMONAREA_MODE: 214865 (69.87%) ELEVATORS_MODE: 163891 (53.30%) ENTRANCES_MODE: 154828 (50.35%) FLOORSMAX_MODE: 153020 (49.76%) FLOORSMIN_MODE: 208642 (67.85%) LANDAREA_MODE: 182590 (59.38%) LIVINGAPARTMENTS_MODE: 210199 (68.35%) LIVINGAREA_MODE: 154350 (50.19%) NONLIVINGAPARTMENTS_MODE: 213514 (69.43%) NONLIVINGAREA_MODE: 169682 (55.18%) APARTMENTS_MEDI: 156061 (50.75%) BASEMENTAREA_MEDI: 179943 (58.52%) YEARS_BEGINEXPLUATATION_MEDI: 150007 (48.78%) YEARS_BUILD_MEDI: 204488 (66.50%) COMMONAREA_MEDI: 214865 (69.87%) ELEVATORS_MEDI: 163891 (53.30%) ENTRANCES_MEDI: 154828 (50.35%) FLOORSMAX_MEDI: 153020 (49.76%) FLOORSMIN_MEDI: 208642 (67.85%) LANDAREA_MEDI: 182590 (59.38%) LIVINGAPARTMENTS_MEDI: 210199 (68.35%) LIVINGAREA_MEDI: 154350 (50.19%) NONLIVINGAPARTMENTS_MEDI: 213514 (69.43%) NONLIVINGAREA_MEDI: 169682 (55.18%) FONDKAPREMONT_MODE: 210295 (68.39%) HOUSETYPE_MODE: 154297 (50.18%) TOTALAREA_MODE: 148431 (48.27%) WALLSMATERIAL_MODE: 156341 (50.84%) EMERGENCYSTATE_MODE: 145755 (47.40%) OBS_30_CNT_SOCIAL_CIRCLE: 1021 (0.33%) DEF_30_CNT_SOCIAL_CIRCLE: 1021 (0.33%) OBS_60_CNT_SOCIAL_CIRCLE: 1021 (0.33%) DEF_60_CNT_SOCIAL_CIRCLE: 1021 (0.33%) DAYS_LAST_PHONE_CHANGE: 1 (0.00%) FLAG_DOCUMENT_2: 0 (0.00%) FLAG_DOCUMENT_3: 0 (0.00%) FLAG_DOCUMENT_4: 0 (0.00%) FLAG_DOCUMENT_5: 0 (0.00%) FLAG_DOCUMENT_6: 0 (0.00%) FLAG_DOCUMENT_7: 0 (0.00%) FLAG_DOCUMENT_8: 0 (0.00%) FLAG_DOCUMENT_9: 0 (0.00%) FLAG_DOCUMENT_10: 0 (0.00%) FLAG_DOCUMENT_11: 0 (0.00%) FLAG_DOCUMENT_12: 0 (0.00%) FLAG_DOCUMENT_13: 0 (0.00%) FLAG_DOCUMENT_14: 0 (0.00%) FLAG_DOCUMENT_15: 0 (0.00%) FLAG_DOCUMENT_16: 0 (0.00%) FLAG_DOCUMENT_17: 0 (0.00%) FLAG_DOCUMENT_18: 0 (0.00%) FLAG_DOCUMENT_19: 0 (0.00%) FLAG_DOCUMENT_20: 0 (0.00%) FLAG_DOCUMENT_21: 0 (0.00%) AMT_REQ_CREDIT_BUREAU_HOUR: 41519 (13.50%) AMT_REQ_CREDIT_BUREAU_DAY: 41519 (13.50%) AMT_REQ_CREDIT_BUREAU_WEEK: 41519 (13.50%) AMT_REQ_CREDIT_BUREAU_MON: 41519 (13.50%) AMT_REQ_CREDIT_BUREAU_QRT: 41519 (13.50%) AMT_REQ_CREDIT_BUREAU_YEAR: 41519 (13.50%)
# count zeros
for cname in df.columns:
cnt_zeros = df.where("{} = 0.0".format(cname)).count()
pct_zeros = cnt_zeros * 100.0 / total_records
print("{}: {} ({:0.2f}%)".format(cname, cnt_zeros, pct_zeros))
SK_ID_CURR: 0 (0.00%) TARGET: 282686 (91.93%) NAME_CONTRACT_TYPE: 0 (0.00%) CODE_GENDER: 0 (0.00%) FLAG_OWN_CAR: 0 (0.00%) FLAG_OWN_REALTY: 0 (0.00%) CNT_CHILDREN: 215371 (70.04%) AMT_INCOME_TOTAL: 0 (0.00%) AMT_CREDIT: 0 (0.00%) AMT_ANNUITY: 0 (0.00%) AMT_GOODS_PRICE: 0 (0.00%) NAME_TYPE_SUITE: 0 (0.00%) NAME_INCOME_TYPE: 0 (0.00%) NAME_EDUCATION_TYPE: 0 (0.00%) NAME_FAMILY_STATUS: 0 (0.00%) NAME_HOUSING_TYPE: 0 (0.00%) REGION_POPULATION_RELATIVE: 0 (0.00%) DAYS_BIRTH: 0 (0.00%) DAYS_EMPLOYED: 2 (0.00%) DAYS_REGISTRATION: 80 (0.03%) DAYS_ID_PUBLISH: 16 (0.01%) OWN_CAR_AGE: 2134 (0.69%) FLAG_MOBIL: 1 (0.00%) FLAG_EMP_PHONE: 55386 (18.01%) FLAG_WORK_PHONE: 246203 (80.06%) FLAG_CONT_MOBILE: 574 (0.19%) FLAG_PHONE: 221080 (71.89%) FLAG_EMAIL: 290069 (94.33%) OCCUPATION_TYPE: 0 (0.00%) CNT_FAM_MEMBERS: 0 (0.00%) REGION_RATING_CLIENT: 0 (0.00%) REGION_RATING_CLIENT_W_CITY: 0 (0.00%) WEEKDAY_APPR_PROCESS_START: 0 (0.00%) HOUR_APPR_PROCESS_START: 40 (0.01%) REG_REGION_NOT_LIVE_REGION: 302854 (98.49%) REG_REGION_NOT_WORK_REGION: 291899 (94.92%) LIVE_REGION_NOT_WORK_REGION: 295008 (95.93%) REG_CITY_NOT_LIVE_CITY: 283472 (92.18%) REG_CITY_NOT_WORK_CITY: 236644 (76.95%) LIVE_CITY_NOT_WORK_CITY: 252296 (82.04%) ORGANIZATION_TYPE: 0 (0.00%) EXT_SOURCE_1: 0 (0.00%) EXT_SOURCE_2: 0 (0.00%) EXT_SOURCE_3: 0 (0.00%) APARTMENTS_AVG: 751 (0.24%) BASEMENTAREA_AVG: 14745 (4.79%) YEARS_BEGINEXPLUATATION_AVG: 514 (0.17%) YEARS_BUILD_AVG: 102 (0.03%) COMMONAREA_AVG: 8442 (2.75%) ELEVATORS_AVG: 85718 (27.87%) ENTRANCES_AVG: 323 (0.11%) FLOORSMAX_AVG: 2938 (0.96%) FLOORSMIN_AVG: 2320 (0.75%) LANDAREA_AVG: 15600 (5.07%) LIVINGAPARTMENTS_AVG: 418 (0.14%) LIVINGAREA_AVG: 284 (0.09%) NONLIVINGAPARTMENTS_AVG: 54549 (17.74%) NONLIVINGAREA_AVG: 58735 (19.10%) APARTMENTS_MODE: 976 (0.32%) BASEMENTAREA_MODE: 16598 (5.40%) YEARS_BEGINEXPLUATATION_MODE: 142 (0.05%) YEARS_BUILD_MODE: 103 (0.03%) COMMONAREA_MODE: 9690 (3.15%) ELEVATORS_MODE: 89498 (29.10%) ENTRANCES_MODE: 387 (0.13%) FLOORSMAX_MODE: 3415 (1.11%) FLOORSMIN_MODE: 2517 (0.82%) LANDAREA_MODE: 17453 (5.68%) LIVINGAPARTMENTS_MODE: 519 (0.17%) LIVINGAREA_MODE: 444 (0.14%) NONLIVINGAPARTMENTS_MODE: 59255 (19.27%) NONLIVINGAREA_MODE: 67126 (21.83%) APARTMENTS_MEDI: 771 (0.25%) BASEMENTAREA_MEDI: 14991 (4.87%) YEARS_BEGINEXPLUATATION_MEDI: 548 (0.18%) YEARS_BUILD_MEDI: 101 (0.03%) COMMONAREA_MEDI: 8691 (2.83%) ELEVATORS_MEDI: 87026 (28.30%) ENTRANCES_MEDI: 329 (0.11%) FLOORSMAX_MEDI: 2995 (0.97%) FLOORSMIN_MEDI: 2351 (0.76%) LANDAREA_MEDI: 15919 (5.18%) LIVINGAPARTMENTS_MEDI: 433 (0.14%) LIVINGAREA_MEDI: 299 (0.10%) NONLIVINGAPARTMENTS_MEDI: 56097 (18.24%) NONLIVINGAREA_MEDI: 60954 (19.82%) FONDKAPREMONT_MODE: 0 (0.00%) HOUSETYPE_MODE: 0 (0.00%) TOTALAREA_MODE: 582 (0.19%) WALLSMATERIAL_MODE: 0 (0.00%) EMERGENCYSTATE_MODE: 0 (0.00%) OBS_30_CNT_SOCIAL_CIRCLE: 163910 (53.30%) DEF_30_CNT_SOCIAL_CIRCLE: 271324 (88.23%) OBS_60_CNT_SOCIAL_CIRCLE: 164666 (53.55%) DEF_60_CNT_SOCIAL_CIRCLE: 280721 (91.29%) DAYS_LAST_PHONE_CHANGE: 37672 (12.25%) FLAG_DOCUMENT_2: 307498 (100.00%) FLAG_DOCUMENT_3: 89171 (29.00%) FLAG_DOCUMENT_4: 307486 (99.99%) FLAG_DOCUMENT_5: 302863 (98.49%) FLAG_DOCUMENT_6: 280433 (91.19%) FLAG_DOCUMENT_7: 307452 (99.98%) FLAG_DOCUMENT_8: 282487 (91.86%) FLAG_DOCUMENT_9: 306313 (99.61%) FLAG_DOCUMENT_10: 307504 (100.00%) FLAG_DOCUMENT_11: 306308 (99.61%) FLAG_DOCUMENT_12: 307509 (100.00%) FLAG_DOCUMENT_13: 306427 (99.65%) FLAG_DOCUMENT_14: 306608 (99.71%) FLAG_DOCUMENT_15: 307139 (99.88%) FLAG_DOCUMENT_16: 304458 (99.01%) FLAG_DOCUMENT_17: 307429 (99.97%) FLAG_DOCUMENT_18: 305011 (99.19%) FLAG_DOCUMENT_19: 307328 (99.94%) FLAG_DOCUMENT_20: 307355 (99.95%) FLAG_DOCUMENT_21: 307408 (99.97%) AMT_REQ_CREDIT_BUREAU_HOUR: 264366 (85.97%) AMT_REQ_CREDIT_BUREAU_DAY: 264503 (86.01%) AMT_REQ_CREDIT_BUREAU_WEEK: 257456 (83.72%) AMT_REQ_CREDIT_BUREAU_MON: 222233 (72.27%) AMT_REQ_CREDIT_BUREAU_QRT: 215417 (70.05%) AMT_REQ_CREDIT_BUREAU_YEAR: 71801 (23.35%)
# count negative
for cname in df.columns:
cnt_neg = df.where("{} < 0".format(cname)).count()
pct_neg = cnt_neg * 100.0 / total_records
print("{}: {} ({:0.2f}%)".format(cname, cnt_neg, pct_neg))
SK_ID_CURR: 0 (0.00%) TARGET: 0 (0.00%) NAME_CONTRACT_TYPE: 0 (0.00%) CODE_GENDER: 0 (0.00%) FLAG_OWN_CAR: 0 (0.00%) FLAG_OWN_REALTY: 0 (0.00%) CNT_CHILDREN: 0 (0.00%) AMT_INCOME_TOTAL: 0 (0.00%) AMT_CREDIT: 0 (0.00%) AMT_ANNUITY: 0 (0.00%) AMT_GOODS_PRICE: 0 (0.00%) NAME_TYPE_SUITE: 0 (0.00%) NAME_INCOME_TYPE: 0 (0.00%) NAME_EDUCATION_TYPE: 0 (0.00%) NAME_FAMILY_STATUS: 0 (0.00%) NAME_HOUSING_TYPE: 0 (0.00%) REGION_POPULATION_RELATIVE: 0 (0.00%) DAYS_BIRTH: 307511 (100.00%) DAYS_EMPLOYED: 252135 (81.99%) DAYS_REGISTRATION: 307431 (99.97%) DAYS_ID_PUBLISH: 307495 (99.99%) OWN_CAR_AGE: 0 (0.00%) FLAG_MOBIL: 0 (0.00%) FLAG_EMP_PHONE: 0 (0.00%) FLAG_WORK_PHONE: 0 (0.00%) FLAG_CONT_MOBILE: 0 (0.00%) FLAG_PHONE: 0 (0.00%) FLAG_EMAIL: 0 (0.00%) OCCUPATION_TYPE: 0 (0.00%) CNT_FAM_MEMBERS: 0 (0.00%) REGION_RATING_CLIENT: 0 (0.00%) REGION_RATING_CLIENT_W_CITY: 0 (0.00%) WEEKDAY_APPR_PROCESS_START: 0 (0.00%) HOUR_APPR_PROCESS_START: 0 (0.00%) REG_REGION_NOT_LIVE_REGION: 0 (0.00%) REG_REGION_NOT_WORK_REGION: 0 (0.00%) LIVE_REGION_NOT_WORK_REGION: 0 (0.00%) REG_CITY_NOT_LIVE_CITY: 0 (0.00%) REG_CITY_NOT_WORK_CITY: 0 (0.00%) LIVE_CITY_NOT_WORK_CITY: 0 (0.00%) ORGANIZATION_TYPE: 0 (0.00%) EXT_SOURCE_1: 0 (0.00%) EXT_SOURCE_2: 0 (0.00%) EXT_SOURCE_3: 0 (0.00%) APARTMENTS_AVG: 0 (0.00%) BASEMENTAREA_AVG: 0 (0.00%) YEARS_BEGINEXPLUATATION_AVG: 0 (0.00%) YEARS_BUILD_AVG: 0 (0.00%) COMMONAREA_AVG: 0 (0.00%) ELEVATORS_AVG: 0 (0.00%) ENTRANCES_AVG: 0 (0.00%) FLOORSMAX_AVG: 0 (0.00%) FLOORSMIN_AVG: 0 (0.00%) LANDAREA_AVG: 0 (0.00%) LIVINGAPARTMENTS_AVG: 0 (0.00%) LIVINGAREA_AVG: 0 (0.00%) NONLIVINGAPARTMENTS_AVG: 0 (0.00%) NONLIVINGAREA_AVG: 0 (0.00%) APARTMENTS_MODE: 0 (0.00%) BASEMENTAREA_MODE: 0 (0.00%) YEARS_BEGINEXPLUATATION_MODE: 0 (0.00%) YEARS_BUILD_MODE: 0 (0.00%) COMMONAREA_MODE: 0 (0.00%) ELEVATORS_MODE: 0 (0.00%) ENTRANCES_MODE: 0 (0.00%) FLOORSMAX_MODE: 0 (0.00%) FLOORSMIN_MODE: 0 (0.00%) LANDAREA_MODE: 0 (0.00%) LIVINGAPARTMENTS_MODE: 0 (0.00%) LIVINGAREA_MODE: 0 (0.00%) NONLIVINGAPARTMENTS_MODE: 0 (0.00%) NONLIVINGAREA_MODE: 0 (0.00%) APARTMENTS_MEDI: 0 (0.00%) BASEMENTAREA_MEDI: 0 (0.00%) YEARS_BEGINEXPLUATATION_MEDI: 0 (0.00%) YEARS_BUILD_MEDI: 0 (0.00%) COMMONAREA_MEDI: 0 (0.00%) ELEVATORS_MEDI: 0 (0.00%) ENTRANCES_MEDI: 0 (0.00%) FLOORSMAX_MEDI: 0 (0.00%) FLOORSMIN_MEDI: 0 (0.00%) LANDAREA_MEDI: 0 (0.00%) LIVINGAPARTMENTS_MEDI: 0 (0.00%) LIVINGAREA_MEDI: 0 (0.00%) NONLIVINGAPARTMENTS_MEDI: 0 (0.00%) NONLIVINGAREA_MEDI: 0 (0.00%) FONDKAPREMONT_MODE: 0 (0.00%) HOUSETYPE_MODE: 0 (0.00%) TOTALAREA_MODE: 0 (0.00%) WALLSMATERIAL_MODE: 0 (0.00%) EMERGENCYSTATE_MODE: 0 (0.00%) OBS_30_CNT_SOCIAL_CIRCLE: 0 (0.00%) DEF_30_CNT_SOCIAL_CIRCLE: 0 (0.00%) OBS_60_CNT_SOCIAL_CIRCLE: 0 (0.00%) DEF_60_CNT_SOCIAL_CIRCLE: 0 (0.00%) DAYS_LAST_PHONE_CHANGE: 269838 (87.75%) FLAG_DOCUMENT_2: 0 (0.00%) FLAG_DOCUMENT_3: 0 (0.00%) FLAG_DOCUMENT_4: 0 (0.00%) FLAG_DOCUMENT_5: 0 (0.00%) FLAG_DOCUMENT_6: 0 (0.00%) FLAG_DOCUMENT_7: 0 (0.00%) FLAG_DOCUMENT_8: 0 (0.00%) FLAG_DOCUMENT_9: 0 (0.00%) FLAG_DOCUMENT_10: 0 (0.00%) FLAG_DOCUMENT_11: 0 (0.00%) FLAG_DOCUMENT_12: 0 (0.00%) FLAG_DOCUMENT_13: 0 (0.00%) FLAG_DOCUMENT_14: 0 (0.00%) FLAG_DOCUMENT_15: 0 (0.00%) FLAG_DOCUMENT_16: 0 (0.00%) FLAG_DOCUMENT_17: 0 (0.00%) FLAG_DOCUMENT_18: 0 (0.00%) FLAG_DOCUMENT_19: 0 (0.00%) FLAG_DOCUMENT_20: 0 (0.00%) FLAG_DOCUMENT_21: 0 (0.00%) AMT_REQ_CREDIT_BUREAU_HOUR: 0 (0.00%) AMT_REQ_CREDIT_BUREAU_DAY: 0 (0.00%) AMT_REQ_CREDIT_BUREAU_WEEK: 0 (0.00%) AMT_REQ_CREDIT_BUREAU_MON: 0 (0.00%) AMT_REQ_CREDIT_BUREAU_QRT: 0 (0.00%) AMT_REQ_CREDIT_BUREAU_YEAR: 0 (0.00%)
# stats for number
ls_features = [cname for cname in df.columns if cname != "SK_ID_CURR"]
pdf_stats = df.select(ls_features).describe().toPandas()
pdf_stats.T
0 | 1 | 2 | 3 | 4 | |
---|---|---|---|---|---|
summary | count | mean | stddev | min | max |
TARGET | 307511 | 0.08072881945686496 | 0.27241864564839546 | 0 | 1 |
NAME_CONTRACT_TYPE | 307511 | None | None | Cash loans | Revolving loans |
CODE_GENDER | 307511 | None | None | F | XNA |
FLAG_OWN_CAR | 307511 | None | None | N | Y |
FLAG_OWN_REALTY | 307511 | None | None | N | Y |
CNT_CHILDREN | 307511 | 0.4170517477423572 | 0.722121384437626 | 0 | 9 |
AMT_INCOME_TOTAL | 307511 | 168797.91929698453 | 237123.14627885324 | 100071.0 | 99900.0 |
AMT_CREDIT | 307511 | 599025.9997057016 | 402490.77699585445 | 1000417.5 | 999886.5 |
AMT_ANNUITY | 307499 | 27108.573909183444 | 14493.737315118291 | 100017.0 | 9999.0 |
AMT_GOODS_PRICE | 307233 | 538396.2074288895 | 369446.4605400576 | 1003500.0 | 999000.0 |
NAME_TYPE_SUITE | 306219 | None | None | Children | Unaccompanied |
NAME_INCOME_TYPE | 307511 | None | None | Businessman | Working |
NAME_EDUCATION_TYPE | 307511 | None | None | Academic degree | Secondary / secondary special |
NAME_FAMILY_STATUS | 307511 | None | None | Civil marriage | Widow |
NAME_HOUSING_TYPE | 307511 | None | None | Co-op apartment | With parents |
REGION_POPULATION_RELATIVE | 307511 | 0.02086811205779086 | 0.01383128012270465 | 0.00029 | 0.072508 |
DAYS_BIRTH | 307511 | -16036.995066843137 | 4363.98863178559 | -10000 | -9999 |
DAYS_EMPLOYED | 307511 | 63815.04590404896 | 141275.76651872776 | -1 | 365243 |
DAYS_REGISTRATION | 307511 | -4986.120327538418 | 3522.8863209630895 | -1.0 | 0.0 |
DAYS_ID_PUBLISH | 307511 | -2994.2023732484367 | 1509.4504190030277 | -1 | 0 |
OWN_CAR_AGE | 104582 | 12.061090818687727 | 11.944811582242771 | 0.0 | 91.0 |
FLAG_MOBIL | 307511 | 0.9999967480838083 | 0.0018033070153514828 | 0 | 1 |
FLAG_EMP_PHONE | 307511 | 0.8198893698111612 | 0.38428019893876475 | 0 | 1 |
FLAG_WORK_PHONE | 307511 | 0.1993684778755882 | 0.39952622815022937 | 0 | 1 |
FLAG_CONT_MOBILE | 307511 | 0.9981334001060125 | 0.04316389414243231 | 0 | 1 |
FLAG_PHONE | 307511 | 0.28106636835755466 | 0.44952054685675735 | 0 | 1 |
FLAG_EMAIL | 307511 | 0.0567199222141647 | 0.2313070397227082 | 0 | 1 |
OCCUPATION_TYPE | 211120 | None | None | Accountants | Waiters/barmen staff |
CNT_FAM_MEMBERS | 307509 | 2.152665450442101 | 0.9106815691792945 | 1.0 | 9.0 |
REGION_RATING_CLIENT | 307511 | 2.0524631639193394 | 0.5090339028156776 | 1 | 3 |
REGION_RATING_CLIENT_W_CITY | 307511 | 2.031520823645333 | 0.502737032914767 | 1 | 3 |
WEEKDAY_APPR_PROCESS_START | 307511 | None | None | FRIDAY | WEDNESDAY |
HOUR_APPR_PROCESS_START | 307511 | 12.063418869568894 | 3.2658322554378714 | 0 | 9 |
REG_REGION_NOT_LIVE_REGION | 307511 | 0.015144173704355291 | 0.12212647628215288 | 0 | 1 |
REG_REGION_NOT_WORK_REGION | 307511 | 0.05076891558350758 | 0.21952582879696045 | 0 | 1 |
LIVE_REGION_NOT_WORK_REGION | 307511 | 0.04065870814377372 | 0.19749861882842462 | 0 | 1 |
REG_CITY_NOT_LIVE_CITY | 307511 | 0.07817281333025486 | 0.2684437723734044 | 0 | 1 |
REG_CITY_NOT_WORK_CITY | 307511 | 0.23045354475124466 | 0.42112383591389696 | 0 | 1 |
LIVE_CITY_NOT_WORK_CITY | 307511 | 0.17955455252007246 | 0.38381661538559614 | 0 | 1 |
ORGANIZATION_TYPE | 307511 | None | None | Advertising | XNA |
EXT_SOURCE_1 | 134133 | 0.5021298056566661 | 0.21106224927392453 | 0.014568132412445587 | 0.9626927705613059 |
EXT_SOURCE_2 | 306851 | 0.5143926741308394 | 0.19106015498493495 | 0.00010795029167810804 | 9.936476188005656e-06 |
EXT_SOURCE_3 | 246546 | 0.5108529061799263 | 0.19484436446374864 | 0.0005272652387098817 | 0.8960095494948396 |
APARTMENTS_AVG | 151450 | 0.11744049917464643 | 0.10824029130032226 | 0.0 | 1.0 |
BASEMENTAREA_AVG | 127568 | 0.08844221905180051 | 0.08243815873568477 | 0.0 | 1.0 |
YEARS_BEGINEXPLUATATION_AVG | 157504 | 0.9777348581622161 | 0.059223314358362686 | 0.0 | 1.0 |
YEARS_BUILD_AVG | 103023 | 0.7524714325927284 | 0.11327992663224681 | 0.0 | 1.0 |
COMMONAREA_AVG | 92646 | 0.044620715411351 | 0.0760357450504091 | 0.0 | 1.0 |
ELEVATORS_AVG | 143620 | 0.07894151232416623 | 0.13457600110034398 | 0.0 | 1.0 |
ENTRANCES_AVG | 152683 | 0.1497246700679834 | 0.10004912076035907 | 0.0 | 1.0 |
FLOORSMAX_AVG | 154491 | 0.22628190703664716 | 0.1446406995480042 | 0.0 | 1.0 |
FLOORSMIN_AVG | 98869 | 0.23189350049056454 | 0.16138028880013763 | 0.0 | 1.0 |
LANDAREA_AVG | 124921 | 0.06633318417239706 | 0.08118364070179374 | 0.0 | 1.0 |
LIVINGAPARTMENTS_AVG | 97312 | 0.10077477495067376 | 0.09257613396049774 | 0.0 | 1.0 |
LIVINGAREA_AVG | 153161 | 0.1073990193325995 | 0.11056452318371307 | 0.0 | 1.0 |
NONLIVINGAPARTMENTS_AVG | 93997 | 0.008808672617209091 | 0.04773166205034795 | 0.0 | 1.0 |
NONLIVINGAREA_AVG | 137829 | 0.028357757075796745 | 0.06952318332123596 | 0.0 | 1.0 |
APARTMENTS_MODE | 151450 | 0.11423100693299629 | 0.10793603908753274 | 0.0 | 1.0 |
BASEMENTAREA_MODE | 127568 | 0.08754321224758496 | 0.08430717486924556 | 0.0 | 1.0 |
YEARS_BEGINEXPLUATATION_MODE | 157504 | 0.9770653729428661 | 0.06457543708048007 | 0.0 | 1.0 |
YEARS_BUILD_MODE | 103023 | 0.7596373227337418 | 0.11011102734194815 | 0.0 | 1.0 |
COMMONAREA_MODE | 92646 | 0.04255313775014629 | 0.07444452253839141 | 0.0 | 1.0 |
ELEVATORS_MODE | 143620 | 0.07448973610916797 | 0.1322561441505066 | 0.0 | 1.0 |
ENTRANCES_MODE | 152683 | 0.14519265864562494 | 0.10097698816024658 | 0.0 | 1.0 |
FLOORSMAX_MODE | 154491 | 0.22231504747847514 | 0.14370940659531573 | 0.0 | 1.0 |
FLOORSMIN_MODE | 98869 | 0.22805849255076463 | 0.16115977149547575 | 0.0 | 1.0 |
LANDAREA_MODE | 124921 | 0.06495768445657653 | 0.08175027780843509 | 0.0 | 1.0 |
LIVINGAPARTMENTS_MODE | 97312 | 0.10564485674943506 | 0.09788044657879368 | 0.0 | 1.0 |
LIVINGAREA_MODE | 153161 | 0.10597505043712185 | 0.1118452658778343 | 0.0 | 1.0 |
NONLIVINGAPARTMENTS_MODE | 93997 | 0.008076387544281912 | 0.04627626621983564 | 0.0 | 1.0 |
NONLIVINGAREA_MODE | 137829 | 0.02702231968598766 | 0.07025385904394447 | 0.0 | 1.0 |
APARTMENTS_MEDI | 151450 | 0.11784992076593205 | 0.10907590600115309 | 0.0 | 1.0 |
BASEMENTAREA_MEDI | 127568 | 0.08795485466574765 | 0.0821787495146342 | 0.0 | 1.0 |
YEARS_BEGINEXPLUATATION_MEDI | 157504 | 0.9777522640693693 | 0.0598973185051196 | 0.0 | 1.0 |
YEARS_BUILD_MEDI | 103023 | 0.7557462721916715 | 0.11206630964404381 | 0.0 | 1.0 |
COMMONAREA_MEDI | 92646 | 0.0445951017852909 | 0.07614426224091457 | 0.0 | 1.0 |
ELEVATORS_MEDI | 143620 | 0.0780778443113532 | 0.13446714769067444 | 0.0 | 1.0 |
ENTRANCES_MEDI | 152683 | 0.14921278072862998 | 0.1003683944976324 | 0.0 | 1.0 |
FLOORSMAX_MEDI | 154491 | 0.22589659009261476 | 0.14506702591935117 | 0.0 | 1.0 |
FLOORSMIN_MEDI | 98869 | 0.2316249380493541 | 0.16193354145715574 | 0.0 | 1.0 |
LANDAREA_MEDI | 124921 | 0.06716874904939972 | 0.08216701028007198 | 0.0 | 1.0 |
LIVINGAPARTMENTS_MEDI | 97312 | 0.10195447324073269 | 0.09364233271153845 | 0.0 | 1.0 |
LIVINGAREA_MEDI | 153161 | 0.10860673604899568 | 0.11226025867534792 | 0.0 | 1.0 |
NONLIVINGAPARTMENTS_MEDI | 93997 | 0.008651013330210322 | 0.04741472790780275 | 0.0 | 1.0 |
NONLIVINGAREA_MEDI | 137829 | 0.028235920597262158 | 0.07016648150682489 | 0.0 | 1.0 |
FONDKAPREMONT_MODE | 97216 | None | None | not specified | reg oper spec account |
HOUSETYPE_MODE | 153214 | None | None | block of flats | terraced house |
TOTALAREA_MODE | 159080 | 0.1025466626854412 | 0.10746232414961886 | 0.0 | 1.0 |
WALLSMATERIAL_MODE | 151170 | None | None | Block | Wooden |
EMERGENCYSTATE_MODE | 161756 | None | None | No | Yes |
OBS_30_CNT_SOCIAL_CIRCLE | 306490 | 1.4222454239942575 | 2.4009887461090127 | 0.0 | 9.0 |
DEF_30_CNT_SOCIAL_CIRCLE | 306490 | 0.1434206662533851 | 0.44669842938152715 | 0.0 | 8.0 |
OBS_60_CNT_SOCIAL_CIRCLE | 306490 | 1.4052921791901856 | 2.37980335197939 | 0.0 | 9.0 |
DEF_60_CNT_SOCIAL_CIRCLE | 306490 | 0.10004894123788705 | 0.3622908039755731 | 0.0 | 7.0 |
DAYS_LAST_PHONE_CHANGE | 307510 | -962.8587883320868 | 826.8084870406575 | -1.0 | 0.0 |
FLAG_DOCUMENT_2 | 307511 | 4.2274910491006824E-5 | 0.0065017890454897925 | 0 | 1 |
FLAG_DOCUMENT_3 | 307511 | 0.7100233812774177 | 0.45375196843273824 | 0 | 1 |
FLAG_DOCUMENT_4 | 307511 | 8.129790479039775E-5 | 0.009016183216550845 | 0 | 1 |
FLAG_DOCUMENT_5 | 307511 | 0.015114906458630749 | 0.12201022281354133 | 0 | 1 |
FLAG_DOCUMENT_6 | 307511 | 0.0880553866365756 | 0.28337589286299236 | 0 | 1 |
FLAG_DOCUMENT_7 | 307511 | 1.9186305530533867E-4 | 0.013850157677017446 | 0 | 1 |
FLAG_DOCUMENT_8 | 307511 | 0.08137595077899652 | 0.2734120489445129 | 0 | 1 |
FLAG_DOCUMENT_9 | 307511 | 0.00389579559755586 | 0.06229471080039349 | 0 | 1 |
FLAG_DOCUMENT_10 | 307511 | 2.276341334131137E-5 | 0.0047710553540692 | 0 | 1 |
FLAG_DOCUMENT_11 | 307511 | 0.003912055178513939 | 0.06242406326684515 | 0 | 1 |
FLAG_DOCUMENT_12 | 307511 | 6.503832383231819E-6 | 0.0025502570915978736 | 0 | 1 |
FLAG_DOCUMENT_13 | 307511 | 0.003525077151711646 | 0.05926771807375309 | 0 | 1 |
FLAG_DOCUMENT_14 | 307511 | 0.0029364803210291664 | 0.05410976737642881 | 0 | 1 |
FLAG_DOCUMENT_15 | 307511 | 0.0012097128232811183 | 0.03475993882769264 | 0 | 1 |
FLAG_DOCUMENT_16 | 307511 | 0.009928100133003373 | 0.09914416233784934 | 0 | 1 |
FLAG_DOCUMENT_17 | 307511 | 2.666571277125046E-4 | 0.016327488741596622 | 0 | 1 |
FLAG_DOCUMENT_18 | 307511 | 0.008129790479039774 | 0.08979823610939612 | 0 | 1 |
FLAG_DOCUMENT_19 | 307511 | 5.951006630657115E-4 | 0.02438746506586239 | 0 | 1 |
FLAG_DOCUMENT_20 | 307511 | 5.072989258920819E-4 | 0.022517620268446063 | 0 | 1 |
FLAG_DOCUMENT_21 | 307511 | 3.349473677364387E-4 | 0.018298531822437545 | 0 | 1 |
AMT_REQ_CREDIT_BUREAU_HOUR | 265992 | 0.006402448193930645 | 0.08384912844747658 | 0.0 | 4.0 |
AMT_REQ_CREDIT_BUREAU_DAY | 265992 | 0.0070002105326475985 | 0.11075740632435446 | 0.0 | 9.0 |
AMT_REQ_CREDIT_BUREAU_WEEK | 265992 | 0.0343619356973142 | 0.2046848758128244 | 0.0 | 8.0 |
AMT_REQ_CREDIT_BUREAU_MON | 265992 | 0.26739526000781977 | 0.9160023961526179 | 0.0 | 9.0 |
AMT_REQ_CREDIT_BUREAU_QRT | 265992 | 0.26547414959848414 | 0.7940556483207547 | 0.0 | 8.0 |
AMT_REQ_CREDIT_BUREAU_YEAR | 265992 | 1.899974435321363 | 1.869294998181561 | 0.0 | 9.0 |