import os, math, subprocess
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pandas_profiling
# some settings for displaying Pandas results
pd.set_option('display.width', 2000)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.precision', 4)
pd.set_option('display.max_colwidth', -1)
# load data
pdf_data = pd.read_csv("data/application_train.csv")
print(pdf_data.shape)
pdf_data.head()
(307511, 122)
SK_ID_CURR | TARGET | NAME_CONTRACT_TYPE | CODE_GENDER | FLAG_OWN_CAR | FLAG_OWN_REALTY | CNT_CHILDREN | AMT_INCOME_TOTAL | AMT_CREDIT | AMT_ANNUITY | AMT_GOODS_PRICE | NAME_TYPE_SUITE | NAME_INCOME_TYPE | NAME_EDUCATION_TYPE | NAME_FAMILY_STATUS | NAME_HOUSING_TYPE | REGION_POPULATION_RELATIVE | DAYS_BIRTH | DAYS_EMPLOYED | DAYS_REGISTRATION | DAYS_ID_PUBLISH | OWN_CAR_AGE | FLAG_MOBIL | FLAG_EMP_PHONE | FLAG_WORK_PHONE | FLAG_CONT_MOBILE | FLAG_PHONE | FLAG_EMAIL | OCCUPATION_TYPE | CNT_FAM_MEMBERS | REGION_RATING_CLIENT | REGION_RATING_CLIENT_W_CITY | WEEKDAY_APPR_PROCESS_START | HOUR_APPR_PROCESS_START | REG_REGION_NOT_LIVE_REGION | REG_REGION_NOT_WORK_REGION | LIVE_REGION_NOT_WORK_REGION | REG_CITY_NOT_LIVE_CITY | REG_CITY_NOT_WORK_CITY | LIVE_CITY_NOT_WORK_CITY | ORGANIZATION_TYPE | EXT_SOURCE_1 | EXT_SOURCE_2 | EXT_SOURCE_3 | APARTMENTS_AVG | BASEMENTAREA_AVG | YEARS_BEGINEXPLUATATION_AVG | YEARS_BUILD_AVG | COMMONAREA_AVG | ELEVATORS_AVG | ENTRANCES_AVG | FLOORSMAX_AVG | FLOORSMIN_AVG | LANDAREA_AVG | LIVINGAPARTMENTS_AVG | LIVINGAREA_AVG | NONLIVINGAPARTMENTS_AVG | NONLIVINGAREA_AVG | APARTMENTS_MODE | BASEMENTAREA_MODE | YEARS_BEGINEXPLUATATION_MODE | YEARS_BUILD_MODE | COMMONAREA_MODE | ELEVATORS_MODE | ENTRANCES_MODE | FLOORSMAX_MODE | FLOORSMIN_MODE | LANDAREA_MODE | LIVINGAPARTMENTS_MODE | LIVINGAREA_MODE | NONLIVINGAPARTMENTS_MODE | NONLIVINGAREA_MODE | APARTMENTS_MEDI | BASEMENTAREA_MEDI | YEARS_BEGINEXPLUATATION_MEDI | YEARS_BUILD_MEDI | COMMONAREA_MEDI | ELEVATORS_MEDI | ENTRANCES_MEDI | FLOORSMAX_MEDI | FLOORSMIN_MEDI | LANDAREA_MEDI | LIVINGAPARTMENTS_MEDI | LIVINGAREA_MEDI | NONLIVINGAPARTMENTS_MEDI | NONLIVINGAREA_MEDI | FONDKAPREMONT_MODE | HOUSETYPE_MODE | TOTALAREA_MODE | WALLSMATERIAL_MODE | EMERGENCYSTATE_MODE | OBS_30_CNT_SOCIAL_CIRCLE | DEF_30_CNT_SOCIAL_CIRCLE | OBS_60_CNT_SOCIAL_CIRCLE | DEF_60_CNT_SOCIAL_CIRCLE | DAYS_LAST_PHONE_CHANGE | FLAG_DOCUMENT_2 | FLAG_DOCUMENT_3 | FLAG_DOCUMENT_4 | FLAG_DOCUMENT_5 | FLAG_DOCUMENT_6 | FLAG_DOCUMENT_7 | FLAG_DOCUMENT_8 | FLAG_DOCUMENT_9 | FLAG_DOCUMENT_10 | FLAG_DOCUMENT_11 | FLAG_DOCUMENT_12 | FLAG_DOCUMENT_13 | FLAG_DOCUMENT_14 | FLAG_DOCUMENT_15 | FLAG_DOCUMENT_16 | FLAG_DOCUMENT_17 | FLAG_DOCUMENT_18 | FLAG_DOCUMENT_19 | FLAG_DOCUMENT_20 | FLAG_DOCUMENT_21 | AMT_REQ_CREDIT_BUREAU_HOUR | AMT_REQ_CREDIT_BUREAU_DAY | AMT_REQ_CREDIT_BUREAU_WEEK | AMT_REQ_CREDIT_BUREAU_MON | AMT_REQ_CREDIT_BUREAU_QRT | AMT_REQ_CREDIT_BUREAU_YEAR | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 100002 | 1 | Cash loans | M | N | Y | 0 | 202500.0 | 406597.5 | 24700.5 | 351000.0 | Unaccompanied | Working | Secondary / secondary special | Single / not married | House / apartment | 0.0188 | -9461 | -637 | -3648.0 | -2120 | NaN | 1 | 1 | 0 | 1 | 1 | 0 | Laborers | 1.0 | 2 | 2 | WEDNESDAY | 10 | 0 | 0 | 0 | 0 | 0 | 0 | Business Entity Type 3 | 0.0830 | 0.2629 | 0.1394 | 0.0247 | 0.0369 | 0.9722 | 0.6192 | 0.0143 | 0.00 | 0.0690 | 0.0833 | 0.1250 | 0.0369 | 0.0202 | 0.0190 | 0.0000 | 0.0000 | 0.0252 | 0.0383 | 0.9722 | 0.6341 | 0.0144 | 0.0000 | 0.0690 | 0.0833 | 0.1250 | 0.0377 | 0.022 | 0.0198 | 0.0 | 0.0 | 0.0250 | 0.0369 | 0.9722 | 0.6243 | 0.0144 | 0.00 | 0.0690 | 0.0833 | 0.1250 | 0.0375 | 0.0205 | 0.0193 | 0.0000 | 0.00 | reg oper account | block of flats | 0.0149 | Stone, brick | No | 2.0 | 2.0 | 2.0 | 2.0 | -1134.0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 |
1 | 100003 | 0 | Cash loans | F | N | N | 0 | 270000.0 | 1293502.5 | 35698.5 | 1129500.0 | Family | State servant | Higher education | Married | House / apartment | 0.0035 | -16765 | -1188 | -1186.0 | -291 | NaN | 1 | 1 | 0 | 1 | 1 | 0 | Core staff | 2.0 | 1 | 1 | MONDAY | 11 | 0 | 0 | 0 | 0 | 0 | 0 | School | 0.3113 | 0.6222 | NaN | 0.0959 | 0.0529 | 0.9851 | 0.7960 | 0.0605 | 0.08 | 0.0345 | 0.2917 | 0.3333 | 0.0130 | 0.0773 | 0.0549 | 0.0039 | 0.0098 | 0.0924 | 0.0538 | 0.9851 | 0.8040 | 0.0497 | 0.0806 | 0.0345 | 0.2917 | 0.3333 | 0.0128 | 0.079 | 0.0554 | 0.0 | 0.0 | 0.0968 | 0.0529 | 0.9851 | 0.7987 | 0.0608 | 0.08 | 0.0345 | 0.2917 | 0.3333 | 0.0132 | 0.0787 | 0.0558 | 0.0039 | 0.01 | reg oper account | block of flats | 0.0714 | Block | No | 1.0 | 0.0 | 1.0 | 0.0 | -828.0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2 | 100004 | 0 | Revolving loans | M | Y | Y | 0 | 67500.0 | 135000.0 | 6750.0 | 135000.0 | Unaccompanied | Working | Secondary / secondary special | Single / not married | House / apartment | 0.0100 | -19046 | -225 | -4260.0 | -2531 | 26.0 | 1 | 1 | 1 | 1 | 1 | 0 | Laborers | 1.0 | 2 | 2 | MONDAY | 9 | 0 | 0 | 0 | 0 | 0 | 0 | Government | NaN | 0.5559 | 0.7296 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.0 | 0.0 | 0.0 | 0.0 | -815.0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
3 | 100006 | 0 | Cash loans | F | N | Y | 0 | 135000.0 | 312682.5 | 29686.5 | 297000.0 | Unaccompanied | Working | Secondary / secondary special | Civil marriage | House / apartment | 0.0080 | -19005 | -3039 | -9833.0 | -2437 | NaN | 1 | 1 | 0 | 1 | 0 | 0 | Laborers | 2.0 | 2 | 2 | WEDNESDAY | 17 | 0 | 0 | 0 | 0 | 0 | 0 | Business Entity Type 3 | NaN | 0.6504 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 2.0 | 0.0 | 2.0 | 0.0 | -617.0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | NaN | NaN | NaN | NaN | NaN | NaN |
4 | 100007 | 0 | Cash loans | M | N | Y | 0 | 121500.0 | 513000.0 | 21865.5 | 513000.0 | Unaccompanied | Working | Secondary / secondary special | Single / not married | House / apartment | 0.0287 | -19932 | -3038 | -4311.0 | -3458 | NaN | 1 | 1 | 0 | 1 | 0 | 0 | Core staff | 1.0 | 2 | 2 | THURSDAY | 11 | 0 | 0 | 0 | 0 | 1 | 1 | Religion | NaN | 0.3227 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.0 | 0.0 | 0.0 | 0.0 | -1106.0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
%%time
profile = pandas_profiling.ProfileReport(pdf_data)
profile
CPU times: user 4min 2s, sys: 7.74 s, total: 4min 9s Wall time: 2min 59s
Dataset info
Number of variables | 122 |
---|---|
Number of observations | 307511 |
Total Missing (%) | 9.6% |
Total size in memory | 286.2 MiB |
Average record size in memory | 976.0 B |
Variables types
Numeric | 39 |
---|---|
Categorical | 16 |
Boolean | 33 |
Date | 0 |
Text (Unique) | 0 |
Rejected | 34 |
Unsupported | 0 |
Warnings
AMT_GOODS_PRICE
is highly correlated with AMT_CREDIT
(ρ = 0.98697) RejectedAMT_INCOME_TOTAL
is highly skewed (γ1 = 391.56) SkewedAMT_REQ_CREDIT_BUREAU_DAY
has 41519 / 13.5% missing values MissingAMT_REQ_CREDIT_BUREAU_DAY
is highly skewed (γ1 = 27.044) SkewedAMT_REQ_CREDIT_BUREAU_DAY
has 264503 / 86.0% zeros ZerosAMT_REQ_CREDIT_BUREAU_HOUR
has 41519 / 13.5% missing values MissingAMT_REQ_CREDIT_BUREAU_HOUR
has 264366 / 86.0% zeros ZerosAMT_REQ_CREDIT_BUREAU_MON
has 41519 / 13.5% missing values MissingAMT_REQ_CREDIT_BUREAU_MON
has 222233 / 72.3% zeros ZerosAMT_REQ_CREDIT_BUREAU_QRT
has 41519 / 13.5% missing values MissingAMT_REQ_CREDIT_BUREAU_QRT
is highly skewed (γ1 = 134.37) SkewedAMT_REQ_CREDIT_BUREAU_QRT
has 215417 / 70.1% zeros ZerosAMT_REQ_CREDIT_BUREAU_WEEK
has 41519 / 13.5% missing values MissingAMT_REQ_CREDIT_BUREAU_WEEK
has 257456 / 83.7% zeros ZerosAMT_REQ_CREDIT_BUREAU_YEAR
has 41519 / 13.5% missing values MissingAMT_REQ_CREDIT_BUREAU_YEAR
has 71801 / 23.3% zeros ZerosAPARTMENTS_AVG
has 156061 / 50.7% missing values MissingAPARTMENTS_MEDI
is highly correlated with LIVINGAPARTMENTS_MODE
(ρ = 0.93217) RejectedAPARTMENTS_MODE
is highly correlated with LIVINGAPARTMENTS_AVG
(ρ = 0.90828) RejectedBASEMENTAREA_AVG
has 179943 / 58.5% missing values MissingBASEMENTAREA_AVG
has 14745 / 4.8% zeros ZerosBASEMENTAREA_MEDI
is highly correlated with BASEMENTAREA_MODE
(ρ = 0.97794) RejectedBASEMENTAREA_MODE
is highly correlated with BASEMENTAREA_AVG
(ρ = 0.9735) RejectedCNT_CHILDREN
has 215371 / 70.0% zeros ZerosCOMMONAREA_AVG
has 214865 / 69.9% missing values MissingCOMMONAREA_AVG
has 8442 / 2.7% zeros ZerosCOMMONAREA_MEDI
is highly correlated with COMMONAREA_MODE
(ρ = 0.97989) RejectedCOMMONAREA_MODE
is highly correlated with COMMONAREA_AVG
(ρ = 0.97715) RejectedDAYS_LAST_PHONE_CHANGE
has 37672 / 12.3% zeros ZerosDEF_30_CNT_SOCIAL_CIRCLE
has 271324 / 88.2% zeros ZerosDEF_60_CNT_SOCIAL_CIRCLE
has 280721 / 91.3% zeros ZerosELEVATORS_AVG
has 163891 / 53.3% missing values MissingELEVATORS_AVG
has 85718 / 27.9% zeros ZerosELEVATORS_MEDI
is highly correlated with ELEVATORS_MODE
(ρ = 0.98283) RejectedELEVATORS_MODE
is highly correlated with ELEVATORS_AVG
(ρ = 0.97884) RejectedEMERGENCYSTATE_MODE
has 145755 / 47.4% missing values MissingENTRANCES_AVG
has 154828 / 50.3% missing values MissingENTRANCES_MEDI
is highly correlated with ENTRANCES_MODE
(ρ = 0.98068) RejectedENTRANCES_MODE
is highly correlated with ENTRANCES_AVG
(ρ = 0.97774) RejectedEXT_SOURCE_1
has 173378 / 56.4% missing values MissingEXT_SOURCE_3
has 60965 / 19.8% missing values MissingFLOORSMAX_AVG
has 153020 / 49.8% missing values MissingFLOORSMAX_MEDI
is highly correlated with FLOORSMAX_MODE
(ρ = 0.98824) RejectedFLOORSMAX_MODE
is highly correlated with FLOORSMAX_AVG
(ρ = 0.98569) RejectedFLOORSMIN_AVG
has 208642 / 67.8% missing values MissingFLOORSMIN_MEDI
is highly correlated with FLOORSMIN_MODE
(ρ = 0.98841) RejectedFLOORSMIN_MODE
is highly correlated with FLOORSMIN_AVG
(ρ = 0.98588) RejectedFONDKAPREMONT_MODE
has 210295 / 68.4% missing values MissingHOUSETYPE_MODE
has 154297 / 50.2% missing values MissingLANDAREA_AVG
has 182590 / 59.4% missing values MissingLANDAREA_AVG
has 15600 / 5.1% zeros ZerosLANDAREA_MEDI
is highly correlated with LANDAREA_MODE
(ρ = 0.98084) RejectedLANDAREA_MODE
is highly correlated with LANDAREA_AVG
(ρ = 0.9737) RejectedLIVINGAPARTMENTS_AVG
is highly correlated with APARTMENTS_AVG
(ρ = 0.94395) RejectedLIVINGAPARTMENTS_MEDI
is highly correlated with APARTMENTS_MEDI
(ρ = 0.94249) RejectedLIVINGAPARTMENTS_MODE
is highly correlated with APARTMENTS_MODE
(ρ = 0.93776) RejectedLIVINGAREA_AVG
is highly correlated with APARTMENTS_AVG
(ρ = 0.91362) RejectedLIVINGAREA_MEDI
is highly correlated with APARTMENTS_MEDI
(ρ = 0.91595) RejectedLIVINGAREA_MODE
is highly correlated with APARTMENTS_MODE
(ρ = 0.91038) RejectedNONLIVINGAPARTMENTS_AVG
has 213514 / 69.4% missing values MissingNONLIVINGAPARTMENTS_AVG
has 54549 / 17.7% zeros ZerosNONLIVINGAPARTMENTS_MEDI
is highly correlated with NONLIVINGAPARTMENTS_MODE
(ρ = 0.97857) RejectedNONLIVINGAPARTMENTS_MODE
is highly correlated with NONLIVINGAPARTMENTS_AVG
(ρ = 0.96937) RejectedNONLIVINGAREA_AVG
has 169682 / 55.2% missing values MissingNONLIVINGAREA_AVG
has 58735 / 19.1% zeros ZerosNONLIVINGAREA_MEDI
is highly correlated with NONLIVINGAREA_MODE
(ρ = 0.97584) RejectedNONLIVINGAREA_MODE
is highly correlated with NONLIVINGAREA_AVG
(ρ = 0.96609) RejectedOBS_30_CNT_SOCIAL_CIRCLE
has 163910 / 53.3% zeros ZerosOBS_60_CNT_SOCIAL_CIRCLE
is highly correlated with OBS_30_CNT_SOCIAL_CIRCLE
(ρ = 0.99849) RejectedOCCUPATION_TYPE
has 96391 / 31.3% missing values MissingORGANIZATION_TYPE
has a high cardinality: 58 distinct values WarningOWN_CAR_AGE
has 202929 / 66.0% missing values MissingREGION_RATING_CLIENT_W_CITY
is highly correlated with REGION_RATING_CLIENT
(ρ = 0.95084) RejectedTOTALAREA_MODE
is highly correlated with LIVINGAREA_MEDI
(ρ = 0.91936) RejectedWALLSMATERIAL_MODE
has 156341 / 50.8% missing values MissingYEARS_BEGINEXPLUATATION_AVG
has 150007 / 48.8% missing values MissingYEARS_BEGINEXPLUATATION_MEDI
is highly correlated with YEARS_BEGINEXPLUATATION_MODE
(ρ = 0.96354) RejectedYEARS_BEGINEXPLUATATION_MODE
is highly correlated with YEARS_BEGINEXPLUATATION_AVG
(ρ = 0.97189) RejectedYEARS_BUILD_AVG
has 204488 / 66.5% missing values MissingYEARS_BUILD_MEDI
is highly correlated with YEARS_BUILD_MODE
(ρ = 0.98946) RejectedYEARS_BUILD_MODE
is highly correlated with YEARS_BUILD_AVG
(ρ = 0.98944) RejectedAMT_ANNUITY
Numeric
Distinct count | 13673 |
---|---|
Unique (%) | 4.4% |
Missing (%) | 0.0% |
Missing (n) | 12 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 27109 |
---|---|
Minimum | 1615.5 |
Maximum | 258030 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 1615.5 |
---|---|
5-th percentile | 9000 |
Q1 | 16524 |
Median | 24903 |
Q3 | 34596 |
95-th percentile | 53325 |
Maximum | 258030 |
Range | 256410 |
Interquartile range | 18072 |
Descriptive statistics
Standard deviation | 14494 |
---|---|
Coef of variation | 0.53466 |
Kurtosis | 7.7073 |
Mean | 27109 |
MAD | 10975 |
Skewness | 1.5798 |
Sum | 8335900000 |
Variance | 210070000 |
Memory size | 2.3 MiB |
Value | Count | Frequency (%) | |
9000.0 | 6385 | 2.1% | |
13500.0 | 5514 | 1.8% | |
6750.0 | 2279 | 0.7% | |
10125.0 | 2035 | 0.7% | |
37800.0 | 1602 | 0.5% | |
11250.0 | 1459 | 0.5% | |
26217.0 | 1453 | 0.5% | |
20250.0 | 1345 | 0.4% | |
12375.0 | 1339 | 0.4% | |
31653.0 | 1269 | 0.4% | |
Other values (13662) | 282819 | 92.0% |
Minimum 5 values
Value | Count | Frequency (%) | |
1615.5 | 1 | 0.0% | |
1980.0 | 2 | 0.0% | |
1993.5 | 1 | 0.0% | |
2052.0 | 1 | 0.0% | |
2164.5 | 2 | 0.0% |
Maximum 5 values
Value | Count | Frequency (%) | |
216589.5 | 1 | 0.0% | |
220297.5 | 1 | 0.0% | |
225000.0 | 23 | 0.0% | |
230161.5 | 1 | 0.0% | |
258025.5 | 1 | 0.0% |
AMT_CREDIT
Numeric
Distinct count | 5603 |
---|---|
Unique (%) | 1.8% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 599030 |
---|---|
Minimum | 45000 |
Maximum | 4050000 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 45000 |
---|---|
5-th percentile | 135000 |
Q1 | 270000 |
Median | 513530 |
Q3 | 808650 |
95-th percentile | 1350000 |
Maximum | 4050000 |
Range | 4005000 |
Interquartile range | 538650 |
Descriptive statistics
Standard deviation | 402490 |
---|---|
Coef of variation | 0.67191 |
Kurtosis | 1.934 |
Mean | 599030 |
MAD | 316580 |
Skewness | 1.2348 |
Sum | 184210000000 |
Variance | 162000000000 |
Memory size | 2.3 MiB |
Value | Count | Frequency (%) | |
450000.0 | 9709 | 3.2% | |
675000.0 | 8877 | 2.9% | |
225000.0 | 8162 | 2.7% | |
180000.0 | 7342 | 2.4% | |
270000.0 | 7241 | 2.4% | |
900000.0 | 6246 | 2.0% | |
254700.0 | 4500 | 1.5% | |
545040.0 | 4437 | 1.4% | |
808650.0 | 4152 | 1.4% | |
135000.0 | 3660 | 1.2% | |
Other values (5593) | 243185 | 79.1% |
Minimum 5 values
Value | Count | Frequency (%) | |
45000.0 | 230 | 0.1% | |
47970.0 | 218 | 0.1% | |
48519.0 | 1 | 0.0% | |
49455.0 | 19 | 0.0% | |
49500.0 | 40 | 0.0% |
Maximum 5 values
Value | Count | Frequency (%) | |
3860019.0 | 1 | 0.0% | |
3956274.0 | 1 | 0.0% | |
4027680.0 | 1 | 0.0% | |
4031032.5 | 1 | 0.0% | |
4050000.0 | 8 | 0.0% |
AMT_GOODS_PRICE
Highly correlated
This variable is highly correlated with AMT_CREDIT
and should be ignored for analysis
Correlation | 0.98697 |
---|
AMT_INCOME_TOTAL
Numeric
Distinct count | 2548 |
---|---|
Unique (%) | 0.8% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 168800 |
---|---|
Minimum | 25650 |
Maximum | 117000000 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 25650 |
---|---|
5-th percentile | 67500 |
Q1 | 112500 |
Median | 147150 |
Q3 | 202500 |
95-th percentile | 337500 |
Maximum | 117000000 |
Range | 116970000 |
Interquartile range | 90000 |
Descriptive statistics
Standard deviation | 237120 |
---|---|
Coef of variation | 1.4048 |
Kurtosis | 191790 |
Mean | 168800 |
MAD | 66226 |
Skewness | 391.56 |
Sum | 51907000000 |
Variance | 56227000000 |
Memory size | 2.3 MiB |
Value | Count | Frequency (%) | |
135000.0 | 35750 | 11.6% | |
112500.0 | 31019 | 10.1% | |
157500.0 | 26556 | 8.6% | |
180000.0 | 24719 | 8.0% | |
90000.0 | 22483 | 7.3% | |
225000.0 | 20698 | 6.7% | |
202500.0 | 16341 | 5.3% | |
67500.0 | 11147 | 3.6% | |
270000.0 | 10827 | 3.5% | |
81000.0 | 6001 | 2.0% | |
Other values (2538) | 101970 | 33.2% |
Minimum 5 values
Value | Count | Frequency (%) | |
25650.0 | 2 | 0.0% | |
26100.0 | 3 | 0.0% | |
26460.0 | 1 | 0.0% | |
26550.0 | 2 | 0.0% | |
27000.0 | 66 | 0.0% |
Maximum 5 values
Value | Count | Frequency (%) | |
6750000.0 | 1 | 0.0% | |
9000000.0 | 1 | 0.0% | |
13500000.0 | 1 | 0.0% | |
18000090.0 | 1 | 0.0% | |
117000000.0 | 1 | 0.0% |
AMT_REQ_CREDIT_BUREAU_DAY
Numeric
Distinct count | 10 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 13.5% |
Missing (n) | 41519 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 0.0070002 |
---|---|
Minimum | 0 |
Maximum | 9 |
Zeros (%) | 86.0% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 0 |
Q1 | 0 |
Median | 0 |
Q3 | 0 |
95-th percentile | 0 |
Maximum | 9 |
Range | 9 |
Interquartile range | 0 |
Descriptive statistics
Standard deviation | 0.11076 |
---|---|
Coef of variation | 15.822 |
Kurtosis | 1151.9 |
Mean | 0.0070002 |
MAD | 0.013922 |
Skewness | 27.044 |
Sum | 1862 |
Variance | 0.012267 |
Memory size | 2.3 MiB |
Value | Count | Frequency (%) | |
0.0 | 264503 | 86.0% | |
1.0 | 1292 | 0.4% | |
2.0 | 106 | 0.0% | |
3.0 | 45 | 0.0% | |
4.0 | 26 | 0.0% | |
5.0 | 9 | 0.0% | |
6.0 | 8 | 0.0% | |
9.0 | 2 | 0.0% | |
8.0 | 1 | 0.0% | |
(Missing) | 41519 | 13.5% |
Minimum 5 values
Value | Count | Frequency (%) | |
0.0 | 264503 | 86.0% | |
1.0 | 1292 | 0.4% | |
2.0 | 106 | 0.0% | |
3.0 | 45 | 0.0% | |
4.0 | 26 | 0.0% |
Maximum 5 values
Value | Count | Frequency (%) | |
4.0 | 26 | 0.0% | |
5.0 | 9 | 0.0% | |
6.0 | 8 | 0.0% | |
8.0 | 1 | 0.0% | |
9.0 | 2 | 0.0% |
AMT_REQ_CREDIT_BUREAU_HOUR
Numeric
Distinct count | 6 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 13.5% |
Missing (n) | 41519 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 0.0064024 |
---|---|
Minimum | 0 |
Maximum | 4 |
Zeros (%) | 86.0% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 0 |
Q1 | 0 |
Median | 0 |
Q3 | 0 |
95-th percentile | 0 |
Maximum | 4 |
Range | 4 |
Interquartile range | 0 |
Descriptive statistics
Standard deviation | 0.083849 |
---|---|
Coef of variation | 13.096 |
Kurtosis | 254.24 |
Mean | 0.0064024 |
MAD | 0.012727 |
Skewness | 14.534 |
Sum | 1703 |
Variance | 0.0070307 |
Memory size | 2.3 MiB |
Value | Count | Frequency (%) | |
0.0 | 264366 | 86.0% | |
1.0 | 1560 | 0.5% | |
2.0 | 56 | 0.0% | |
3.0 | 9 | 0.0% | |
4.0 | 1 | 0.0% | |
(Missing) | 41519 | 13.5% |
Minimum 5 values
Value | Count | Frequency (%) | |
0.0 | 264366 | 86.0% | |
1.0 | 1560 | 0.5% | |
2.0 | 56 | 0.0% | |
3.0 | 9 | 0.0% | |
4.0 | 1 | 0.0% |
Maximum 5 values
Value | Count | Frequency (%) | |
0.0 | 264366 | 86.0% | |
1.0 | 1560 | 0.5% | |
2.0 | 56 | 0.0% | |
3.0 | 9 | 0.0% | |
4.0 | 1 | 0.0% |
AMT_REQ_CREDIT_BUREAU_MON
Numeric
Distinct count | 25 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 13.5% |
Missing (n) | 41519 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 0.2674 |
---|---|
Minimum | 0 |
Maximum | 27 |
Zeros (%) | 72.3% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 0 |
Q1 | 0 |
Median | 0 |
Q3 | 0 |
95-th percentile | 1 |
Maximum | 27 |
Range | 27 |
Interquartile range | 0 |
Descriptive statistics
Standard deviation | 0.916 |
---|---|
Coef of variation | 3.4256 |
Kurtosis | 90.435 |
Mean | 0.2674 |
MAD | 0.44681 |
Skewness | 7.8048 |
Sum | 71125 |
Variance | 0.83906 |
Memory size | 2.3 MiB |
Value | Count | Frequency (%) | |
0.0 | 222233 | 72.3% | |
1.0 | 33147 | 10.8% | |
2.0 | 5386 | 1.8% | |
3.0 | 1991 | 0.6% | |
4.0 | 1076 | 0.3% | |
5.0 | 602 | 0.2% | |
6.0 | 343 | 0.1% | |
7.0 | 298 | 0.1% | |
9.0 | 206 | 0.1% | |
8.0 | 185 | 0.1% | |
Other values (14) | 525 | 0.2% | |
(Missing) | 41519 | 13.5% |
Minimum 5 values
Value | Count | Frequency (%) | |
0.0 | 222233 | 72.3% | |
1.0 | 33147 | 10.8% | |
2.0 | 5386 | 1.8% | |
3.0 | 1991 | 0.6% | |
4.0 | 1076 | 0.3% |
Maximum 5 values
Value | Count | Frequency (%) | |
19.0 | 3 | 0.0% | |
22.0 | 1 | 0.0% | |
23.0 | 1 | 0.0% | |
24.0 | 1 | 0.0% | |
27.0 | 1 | 0.0% |
AMT_REQ_CREDIT_BUREAU_QRT
Numeric
Distinct count | 12 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 13.5% |
Missing (n) | 41519 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 0.26547 |
---|---|
Minimum | 0 |
Maximum | 261 |
Zeros (%) | 70.1% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 0 |
Q1 | 0 |
Median | 0 |
Q3 | 0 |
95-th percentile | 2 |
Maximum | 261 |
Range | 261 |
Interquartile range | 0 |
Descriptive statistics
Standard deviation | 0.79406 |
---|---|
Coef of variation | 2.9911 |
Kurtosis | 43707 |
Mean | 0.26547 |
MAD | 0.43 |
Skewness | 134.37 |
Sum | 70614 |
Variance | 0.63052 |
Memory size | 2.3 MiB |
Value | Count | Frequency (%) | |
0.0 | 215417 | 70.1% | |
1.0 | 33862 | 11.0% | |
2.0 | 14412 | 4.7% | |
3.0 | 1717 | 0.6% | |
4.0 | 476 | 0.2% | |
5.0 | 64 | 0.0% | |
6.0 | 28 | 0.0% | |
7.0 | 7 | 0.0% | |
8.0 | 7 | 0.0% | |
19.0 | 1 | 0.0% | |
(Missing) | 41519 | 13.5% |
Minimum 5 values
Value | Count | Frequency (%) | |
0.0 | 215417 | 70.1% | |
1.0 | 33862 | 11.0% | |
2.0 | 14412 | 4.7% | |
3.0 | 1717 | 0.6% | |
4.0 | 476 | 0.2% |
Maximum 5 values
Value | Count | Frequency (%) | |
6.0 | 28 | 0.0% | |
7.0 | 7 | 0.0% | |
8.0 | 7 | 0.0% | |
19.0 | 1 | 0.0% | |
261.0 | 1 | 0.0% |
AMT_REQ_CREDIT_BUREAU_WEEK
Numeric
Distinct count | 10 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 13.5% |
Missing (n) | 41519 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 0.034362 |
---|---|
Minimum | 0 |
Maximum | 8 |
Zeros (%) | 83.7% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 0 |
Q1 | 0 |
Median | 0 |
Q3 | 0 |
95-th percentile | 0 |
Maximum | 8 |
Range | 8 |
Interquartile range | 0 |
Descriptive statistics
Standard deviation | 0.20468 |
---|---|
Coef of variation | 5.9567 |
Kurtosis | 166.75 |
Mean | 0.034362 |
MAD | 0.066518 |
Skewness | 9.2936 |
Sum | 9140 |
Variance | 0.041896 |
Memory size | 2.3 MiB |
Value | Count | Frequency (%) | |
0.0 | 257456 | 83.7% | |
1.0 | 8208 | 2.7% | |
2.0 | 199 | 0.1% | |
3.0 | 58 | 0.0% | |
4.0 | 34 | 0.0% | |
6.0 | 20 | 0.0% | |
5.0 | 10 | 0.0% | |
8.0 | 5 | 0.0% | |
7.0 | 2 | 0.0% | |
(Missing) | 41519 | 13.5% |
Minimum 5 values
Value | Count | Frequency (%) | |
0.0 | 257456 | 83.7% | |
1.0 | 8208 | 2.7% | |
2.0 | 199 | 0.1% | |
3.0 | 58 | 0.0% | |
4.0 | 34 | 0.0% |
Maximum 5 values
Value | Count | Frequency (%) | |
4.0 | 34 | 0.0% | |
5.0 | 10 | 0.0% | |
6.0 | 20 | 0.0% | |
7.0 | 2 | 0.0% | |
8.0 | 5 | 0.0% |
AMT_REQ_CREDIT_BUREAU_YEAR
Numeric
Distinct count | 26 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 13.5% |
Missing (n) | 41519 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 1.9 |
---|---|
Minimum | 0 |
Maximum | 25 |
Zeros (%) | 23.3% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 0 |
Q1 | 0 |
Median | 1 |
Q3 | 3 |
95-th percentile | 6 |
Maximum | 25 |
Range | 25 |
Interquartile range | 3 |
Descriptive statistics
Standard deviation | 1.8693 |
---|---|
Coef of variation | 0.98385 |
Kurtosis | 1.969 |
Mean | 1.9 |
MAD | 1.4548 |
Skewness | 1.2436 |
Sum | 505380 |
Variance | 3.4943 |
Memory size | 2.3 MiB |
Value | Count | Frequency (%) | |
0.0 | 71801 | 23.3% | |
1.0 | 63405 | 20.6% | |
2.0 | 50192 | 16.3% | |
3.0 | 33628 | 10.9% | |
4.0 | 20714 | 6.7% | |
5.0 | 12052 | 3.9% | |
6.0 | 6967 | 2.3% | |
7.0 | 3869 | 1.3% | |
8.0 | 2127 | 0.7% | |
9.0 | 1096 | 0.4% | |
Other values (15) | 141 | 0.0% | |
(Missing) | 41519 | 13.5% |
Minimum 5 values
Value | Count | Frequency (%) | |
0.0 | 71801 | 23.3% | |
1.0 | 63405 | 20.6% | |
2.0 | 50192 | 16.3% | |
3.0 | 33628 | 10.9% | |
4.0 | 20714 | 6.7% |
Maximum 5 values
Value | Count | Frequency (%) | |
20.0 | 1 | 0.0% | |
21.0 | 1 | 0.0% | |
22.0 | 1 | 0.0% | |
23.0 | 1 | 0.0% | |
25.0 | 1 | 0.0% |
APARTMENTS_AVG
Numeric
Distinct count | 2340 |
---|---|
Unique (%) | 0.8% |
Missing (%) | 50.7% |
Missing (n) | 156061 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 0.11744 |
---|---|
Minimum | 0 |
Maximum | 1 |
Zeros (%) | 0.2% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 0.0082 |
Q1 | 0.0577 |
Median | 0.0876 |
Q3 | 0.1485 |
95-th percentile | 0.3268 |
Maximum | 1 |
Range | 1 |
Interquartile range | 0.0908 |
Descriptive statistics
Standard deviation | 0.10824 |
---|---|
Coef of variation | 0.92166 |
Kurtosis | 11.394 |
Mean | 0.11744 |
MAD | 0.073286 |
Skewness | 2.6418 |
Sum | 17786 |
Variance | 0.011716 |
Memory size | 2.3 MiB |
Value | Count | Frequency (%) | |
0.0825 | 6663 | 2.2% | |
0.0619 | 6332 | 2.1% | |
0.0928 | 4404 | 1.4% | |
0.0722 | 3986 | 1.3% | |
0.0082 | 3507 | 1.1% | |
0.0165 | 3027 | 1.0% | |
0.1031 | 2892 | 0.9% | |
0.1485 | 2769 | 0.9% | |
0.0124 | 2721 | 0.9% | |
0.0742 | 2231 | 0.7% | |
Other values (2329) | 112918 | 36.7% | |
(Missing) | 156061 | 50.7% |
Minimum 5 values
Value | Count | Frequency (%) | |
0.0 | 751 | 0.2% | |
0.001 | 197 | 0.1% | |
0.0014 | 1 | 0.0% | |
0.0015 | 6 | 0.0% | |
0.0017 | 1 | 0.0% |
Maximum 5 values
Value | Count | Frequency (%) | |
0.9814 | 9 | 0.0% | |
0.9876 | 7 | 0.0% | |
0.9897 | 1 | 0.0% | |
0.9907 | 2 | 0.0% | |
1.0 | 147 | 0.0% |
APARTMENTS_MEDI
Highly correlated
This variable is highly correlated with LIVINGAPARTMENTS_MODE
and should be ignored for analysis
Correlation | 0.93217 |
---|
APARTMENTS_MODE
Highly correlated
This variable is highly correlated with LIVINGAPARTMENTS_AVG
and should be ignored for analysis
Correlation | 0.90828 |
---|
BASEMENTAREA_AVG
Numeric
Distinct count | 3781 |
---|---|
Unique (%) | 1.2% |
Missing (%) | 58.5% |
Missing (n) | 179943 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 0.088442 |
---|---|
Minimum | 0 |
Maximum | 1 |
Zeros (%) | 4.8% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 0 |
Q1 | 0.0442 |
Median | 0.0763 |
Q3 | 0.1122 |
95-th percentile | 0.2237 |
Maximum | 1 |
Range | 1 |
Interquartile range | 0.068 |
Descriptive statistics
Standard deviation | 0.082438 |
---|---|
Coef of variation | 0.93211 |
Kurtosis | 25.93 |
Mean | 0.088442 |
MAD | 0.052361 |
Skewness | 3.5663 |
Sum | 11282 |
Variance | 0.0067961 |
Memory size | 2.3 MiB |
Value | Count | Frequency (%) | |
0.0 | 14745 | 4.8% | |
0.0545 | 251 | 0.1% | |
0.0818 | 251 | 0.1% | |
0.0727 | 248 | 0.1% | |
0.1091 | 246 | 0.1% | |
0.0796 | 245 | 0.1% | |
0.08 | 239 | 0.1% | |
0.0805 | 230 | 0.1% | |
0.0764 | 220 | 0.1% | |
0.0793 | 211 | 0.1% | |
Other values (3770) | 110682 | 36.0% | |
(Missing) | 179943 | 58.5% |
Minimum 5 values
Value | Count | Frequency (%) | |
0.0 | 14745 | 4.8% | |
0.0001 | 99 | 0.0% | |
0.0002 | 38 | 0.0% | |
0.0003 | 8 | 0.0% | |
0.0004 | 33 | 0.0% |
Maximum 5 values
Value | Count | Frequency (%) | |
0.9677 | 1 | 0.0% | |
0.9682 | 1 | 0.0% | |
0.9694 | 2 | 0.0% | |
0.9945 | 1 | 0.0% | |
1.0 | 130 | 0.0% |
BASEMENTAREA_MEDI
Highly correlated
This variable is highly correlated with BASEMENTAREA_MODE
and should be ignored for analysis
Correlation | 0.97794 |
---|
BASEMENTAREA_MODE
Highly correlated
This variable is highly correlated with BASEMENTAREA_AVG
and should be ignored for analysis
Correlation | 0.9735 |
---|
CNT_CHILDREN
Numeric
Distinct count | 15 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 0.41705 |
---|---|
Minimum | 0 |
Maximum | 19 |
Zeros (%) | 70.0% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 0 |
Q1 | 0 |
Median | 0 |
Q3 | 1 |
95-th percentile | 2 |
Maximum | 19 |
Range | 19 |
Interquartile range | 1 |
Descriptive statistics
Standard deviation | 0.72212 |
---|---|
Coef of variation | 1.7315 |
Kurtosis | 7.9041 |
Mean | 0.41705 |
MAD | 0.58418 |
Skewness | 1.9746 |
Sum | 128248 |
Variance | 0.52146 |
Memory size | 2.3 MiB |
Value | Count | Frequency (%) | |
0 | 215371 | 70.0% | |
1 | 61119 | 19.9% | |
2 | 26749 | 8.7% | |
3 | 3717 | 1.2% | |
4 | 429 | 0.1% | |
5 | 84 | 0.0% | |
6 | 21 | 0.0% | |
7 | 7 | 0.0% | |
14 | 3 | 0.0% | |
19 | 2 | 0.0% | |
Other values (5) | 9 | 0.0% |
Minimum 5 values
Value | Count | Frequency (%) | |
0 | 215371 | 70.0% | |
1 | 61119 | 19.9% | |
2 | 26749 | 8.7% | |
3 | 3717 | 1.2% | |
4 | 429 | 0.1% |
Maximum 5 values
Value | Count | Frequency (%) | |
10 | 2 | 0.0% | |
11 | 1 | 0.0% | |
12 | 2 | 0.0% | |
14 | 3 | 0.0% | |
19 | 2 | 0.0% |
CNT_FAM_MEMBERS
Numeric
Distinct count | 18 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 2 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 2.1527 |
---|---|
Minimum | 1 |
Maximum | 20 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 1 |
---|---|
5-th percentile | 1 |
Q1 | 2 |
Median | 2 |
Q3 | 3 |
95-th percentile | 4 |
Maximum | 20 |
Range | 19 |
Interquartile range | 1 |
Descriptive statistics
Standard deviation | 0.91068 |
---|---|
Coef of variation | 0.42305 |
Kurtosis | 2.802 |
Mean | 2.1527 |
MAD | 0.66587 |
Skewness | 0.98754 |
Sum | 661960 |
Variance | 0.82934 |
Memory size | 2.3 MiB |
Value | Count | Frequency (%) | |
2.0 | 158357 | 51.5% | |
1.0 | 67847 | 22.1% | |
3.0 | 52601 | 17.1% | |
4.0 | 24697 | 8.0% | |
5.0 | 3478 | 1.1% | |
6.0 | 408 | 0.1% | |
7.0 | 81 | 0.0% | |
8.0 | 20 | 0.0% | |
9.0 | 6 | 0.0% | |
10.0 | 3 | 0.0% | |
Other values (7) | 11 | 0.0% |
Minimum 5 values
Value | Count | Frequency (%) | |
1.0 | 67847 | 22.1% | |
2.0 | 158357 | 51.5% | |
3.0 | 52601 | 17.1% | |
4.0 | 24697 | 8.0% | |
5.0 | 3478 | 1.1% |
Maximum 5 values
Value | Count | Frequency (%) | |
13.0 | 1 | 0.0% | |
14.0 | 2 | 0.0% | |
15.0 | 1 | 0.0% | |
16.0 | 2 | 0.0% | |
20.0 | 2 | 0.0% |
CODE_GENDER
Categorical
Distinct count | 3 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
F | |
---|---|
M | |
XNA | 4 |
Value | Count | Frequency (%) | |
F | 202448 | 65.8% | |
M | 105059 | 34.2% | |
XNA | 4 | 0.0% |
COMMONAREA_AVG
Numeric
Distinct count | 3182 |
---|---|
Unique (%) | 1.0% |
Missing (%) | 69.9% |
Missing (n) | 214865 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 0.044621 |
---|---|
Minimum | 0 |
Maximum | 1 |
Zeros (%) | 2.7% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 0 |
Q1 | 0.0078 |
Median | 0.0211 |
Q3 | 0.0515 |
95-th percentile | 0.1601 |
Maximum | 1 |
Range | 1 |
Interquartile range | 0.0437 |
Descriptive statistics
Standard deviation | 0.076036 |
---|---|
Coef of variation | 1.704 |
Kurtosis | 45.988 |
Mean | 0.044621 |
MAD | 0.042024 |
Skewness | 5.4573 |
Sum | 4133.9 |
Variance | 0.0057814 |
Memory size | 2.3 MiB |
Value | Count | Frequency (%) | |
0.0 | 8442 | 2.7% | |
0.0079 | 544 | 0.2% | |
0.0078 | 475 | 0.2% | |
0.008 | 446 | 0.1% | |
0.0077 | 414 | 0.1% | |
0.0086 | 365 | 0.1% | |
0.0014 | 345 | 0.1% | |
0.006999999999999999 | 343 | 0.1% | |
0.0013 | 317 | 0.1% | |
0.0069 | 314 | 0.1% | |
Other values (3171) | 80641 | 26.2% | |
(Missing) | 214865 | 69.9% |
Minimum 5 values
Value | Count | Frequency (%) | |
0.0 | 8442 | 2.7% | |
0.0001 | 45 | 0.0% | |
0.0002 | 67 | 0.0% | |
0.0003 | 84 | 0.0% | |
0.0004 | 62 | 0.0% |
Maximum 5 values
Value | Count | Frequency (%) | |
0.9601 | 1 | 0.0% | |
0.9833 | 1 | 0.0% | |
0.9906 | 2 | 0.0% | |
0.9937 | 2 | 0.0% | |
1.0 | 92 | 0.0% |
COMMONAREA_MEDI
Highly correlated
This variable is highly correlated with COMMONAREA_MODE
and should be ignored for analysis
Correlation | 0.97989 |
---|
COMMONAREA_MODE
Highly correlated
This variable is highly correlated with COMMONAREA_AVG
and should be ignored for analysis
Correlation | 0.97715 |
---|
DAYS_BIRTH
Numeric
Distinct count | 17460 |
---|---|
Unique (%) | 5.7% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | -16037 |
---|---|
Minimum | -25229 |
Maximum | -7489 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | -25229 |
---|---|
5-th percentile | -23204 |
Q1 | -19682 |
Median | -15750 |
Q3 | -12413 |
95-th percentile | -9407 |
Maximum | -7489 |
Range | 17740 |
Interquartile range | 7269 |
Descriptive statistics
Standard deviation | 4364 |
---|---|
Coef of variation | -0.27212 |
Kurtosis | -1.0491 |
Mean | -16037 |
MAD | 3728.4 |
Skewness | -0.11567 |
Sum | -4931552390 |
Variance | 19044000 |
Memory size | 2.3 MiB |
Value | Count | Frequency (%) | |
-13749 | 43 | 0.0% | |
-13481 | 42 | 0.0% | |
-18248 | 41 | 0.0% | |
-10020 | 41 | 0.0% | |
-15771 | 40 | 0.0% | |
-10292 | 40 | 0.0% | |
-14395 | 39 | 0.0% | |
-14267 | 39 | 0.0% | |
-13263 | 39 | 0.0% | |
-11664 | 39 | 0.0% | |
Other values (17450) | 307108 | 99.9% |
Minimum 5 values
Value | Count | Frequency (%) | |
-25229 | 1 | 0.0% | |
-25201 | 2 | 0.0% | |
-25200 | 1 | 0.0% | |
-25197 | 2 | 0.0% | |
-25196 | 4 | 0.0% |
Maximum 5 values
Value | Count | Frequency (%) | |
-7679 | 1 | 0.0% | |
-7678 | 3 | 0.0% | |
-7676 | 2 | 0.0% | |
-7673 | 1 | 0.0% | |
-7489 | 1 | 0.0% |
DAYS_EMPLOYED
Numeric
Distinct count | 12574 |
---|---|
Unique (%) | 4.1% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 63815 |
---|---|
Minimum | -17912 |
Maximum | 365243 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | -17912 |
---|---|
5-th percentile | -6742.5 |
Q1 | -2760 |
Median | -1213 |
Q3 | -289 |
95-th percentile | 365240 |
Maximum | 365243 |
Range | 383155 |
Interquartile range | 2471 |
Descriptive statistics
Standard deviation | 141280 |
---|---|
Coef of variation | 2.2138 |
Kurtosis | 0.77161 |
Mean | 63815 |
MAD | 108560 |
Skewness | 1.6643 |
Sum | 19623828581 |
Variance | 19959000000 |
Memory size | 2.3 MiB |
Value | Count | Frequency (%) | |
365243 | 55374 | 18.0% | |
-200 | 156 | 0.1% | |
-224 | 152 | 0.0% | |
-199 | 151 | 0.0% | |
-230 | 151 | 0.0% | |
-212 | 150 | 0.0% | |
-229 | 143 | 0.0% | |
-384 | 143 | 0.0% | |
-231 | 140 | 0.0% | |
-215 | 138 | 0.0% | |
Other values (12564) | 250813 | 81.6% |
Minimum 5 values
Value | Count | Frequency (%) | |
-17912 | 1 | 0.0% | |
-17583 | 1 | 0.0% | |
-17546 | 1 | 0.0% | |
-17531 | 1 | 0.0% | |
-17522 | 1 | 0.0% |
Maximum 5 values
Value | Count | Frequency (%) | |
-3 | 3 | 0.0% | |
-2 | 2 | 0.0% | |
-1 | 1 | 0.0% | |
0 | 2 | 0.0% | |
365243 | 55374 | 18.0% |
DAYS_ID_PUBLISH
Numeric
Distinct count | 6168 |
---|---|
Unique (%) | 2.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | -2994.2 |
---|---|
Minimum | -7197 |
Maximum | 0 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | -7197 |
---|---|
5-th percentile | -4944 |
Q1 | -4299 |
Median | -3254 |
Q3 | -1720 |
95-th percentile | -375 |
Maximum | 0 |
Range | 7197 |
Interquartile range | 2579 |
Descriptive statistics
Standard deviation | 1509.5 |
---|---|
Coef of variation | -0.50412 |
Kurtosis | -1.1068 |
Mean | -2994.2 |
MAD | 1316.2 |
Skewness | 0.34933 |
Sum | -920750166 |
Variance | 2278400 |
Memory size | 2.3 MiB |
Value | Count | Frequency (%) | |
-4053 | 169 | 0.1% | |
-4095 | 162 | 0.1% | |
-4046 | 161 | 0.1% | |
-4417 | 159 | 0.1% | |
-4256 | 158 | 0.1% | |
-4151 | 157 | 0.1% | |
-4032 | 157 | 0.1% | |
-4200 | 156 | 0.1% | |
-4214 | 155 | 0.1% | |
-4171 | 155 | 0.1% | |
Other values (6158) | 305922 | 99.5% |
Minimum 5 values
Value | Count | Frequency (%) | |
-7197 | 1 | 0.0% | |
-6551 | 1 | 0.0% | |
-6383 | 1 | 0.0% | |
-6337 | 1 | 0.0% | |
-6274 | 1 | 0.0% |
Maximum 5 values
Value | Count | Frequency (%) | |
-4 | 57 | 0.0% | |
-3 | 51 | 0.0% | |
-2 | 50 | 0.0% | |
-1 | 64 | 0.0% | |
0 | 16 | 0.0% |
DAYS_LAST_PHONE_CHANGE
Numeric
Distinct count | 3774 |
---|---|
Unique (%) | 1.2% |
Missing (%) | 0.0% |
Missing (n) | 1 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | -962.86 |
---|---|
Minimum | -4292 |
Maximum | 0 |
Zeros (%) | 12.3% |
Quantile statistics
Minimum | -4292 |
---|---|
5-th percentile | -2522 |
Q1 | -1570 |
Median | -757 |
Q3 | -274 |
95-th percentile | 0 |
Maximum | 0 |
Range | 4292 |
Interquartile range | 1296 |
Descriptive statistics
Standard deviation | 826.81 |
---|---|
Coef of variation | -0.8587 |
Kurtosis | -0.30858 |
Mean | -962.86 |
MAD | 696.28 |
Skewness | -0.71361 |
Sum | -296090000 |
Variance | 683610 |
Memory size | 2.3 MiB |
Value | Count | Frequency (%) | |
0.0 | 37672 | 12.3% | |
-1.0 | 2812 | 0.9% | |
-2.0 | 2318 | 0.8% | |
-3.0 | 1763 | 0.6% | |
-4.0 | 1285 | 0.4% | |
-5.0 | 824 | 0.3% | |
-6.0 | 537 | 0.2% | |
-7.0 | 442 | 0.1% | |
-8.0 | 278 | 0.1% | |
-476.0 | 222 | 0.1% | |
Other values (3763) | 259357 | 84.3% |
Minimum 5 values
Value | Count | Frequency (%) | |
-4292.0 | 1 | 0.0% | |
-4185.0 | 1 | 0.0% | |
-4173.0 | 1 | 0.0% | |
-4153.0 | 1 | 0.0% | |
-4131.0 | 1 | 0.0% |
Maximum 5 values
Value | Count | Frequency (%) | |
-4.0 | 1285 | 0.4% | |
-3.0 | 1763 | 0.6% | |
-2.0 | 2318 | 0.8% | |
-1.0 | 2812 | 0.9% | |
0.0 | 37672 | 12.3% |
DAYS_REGISTRATION
Numeric
Distinct count | 15688 |
---|---|
Unique (%) | 5.1% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | -4986.1 |
---|---|
Minimum | -24672 |
Maximum | 0 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | -24672 |
---|---|
5-th percentile | -11416 |
Q1 | -7479.5 |
Median | -4504 |
Q3 | -2010 |
95-th percentile | -330 |
Maximum | 0 |
Range | 24672 |
Interquartile range | 5469.5 |
Descriptive statistics
Standard deviation | 3522.9 |
---|---|
Coef of variation | -0.70654 |
Kurtosis | -0.32135 |
Mean | -4986.1 |
MAD | 2915.4 |
Skewness | -0.59087 |
Sum | -1533300000 |
Variance | 12411000 |
Memory size | 2.3 MiB |
Value | Count | Frequency (%) | |
-1.0 | 113 | 0.0% | |
-7.0 | 98 | 0.0% | |
-6.0 | 96 | 0.0% | |
-2.0 | 92 | 0.0% | |
-4.0 | 92 | 0.0% | |
-5.0 | 86 | 0.0% | |
-9.0 | 84 | 0.0% | |
-3.0 | 84 | 0.0% | |
0.0 | 80 | 0.0% | |
-21.0 | 80 | 0.0% | |
Other values (15678) | 306606 | 99.7% |
Minimum 5 values
Value | Count | Frequency (%) | |
-24672.0 | 1 | 0.0% | |
-23738.0 | 1 | 0.0% | |
-23416.0 | 1 | 0.0% | |
-22928.0 | 1 | 0.0% | |
-22858.0 | 1 | 0.0% |
Maximum 5 values
Value | Count | Frequency (%) | |
-4.0 | 92 | 0.0% | |
-3.0 | 84 | 0.0% | |
-2.0 | 92 | 0.0% | |
-1.0 | 113 | 0.0% | |
0.0 | 80 | 0.0% |
DEF_30_CNT_SOCIAL_CIRCLE
Numeric
Distinct count | 11 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.3% |
Missing (n) | 1021 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 0.14342 |
---|---|
Minimum | 0 |
Maximum | 34 |
Zeros (%) | 88.2% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 0 |
Q1 | 0 |
Median | 0 |
Q3 | 0 |
95-th percentile | 1 |
Maximum | 34 |
Range | 34 |
Interquartile range | 0 |
Descriptive statistics
Standard deviation | 0.4467 |
---|---|
Coef of variation | 3.1146 |
Kurtosis | 126.31 |
Mean | 0.14342 |
MAD | 0.25393 |
Skewness | 5.1835 |
Sum | 43957 |
Variance | 0.19954 |
Memory size | 2.3 MiB |
Value | Count | Frequency (%) | |
0.0 | 271324 | 88.2% | |
1.0 | 28328 | 9.2% | |
2.0 | 5323 | 1.7% | |
3.0 | 1192 | 0.4% | |
4.0 | 253 | 0.1% | |
5.0 | 56 | 0.0% | |
6.0 | 11 | 0.0% | |
7.0 | 1 | 0.0% | |
8.0 | 1 | 0.0% | |
34.0 | 1 | 0.0% | |
(Missing) | 1021 | 0.3% |
Minimum 5 values
Value | Count | Frequency (%) | |
0.0 | 271324 | 88.2% | |
1.0 | 28328 | 9.2% | |
2.0 | 5323 | 1.7% | |
3.0 | 1192 | 0.4% | |
4.0 | 253 | 0.1% |
Maximum 5 values
Value | Count | Frequency (%) | |
5.0 | 56 | 0.0% | |
6.0 | 11 | 0.0% | |
7.0 | 1 | 0.0% | |
8.0 | 1 | 0.0% | |
34.0 | 1 | 0.0% |
DEF_60_CNT_SOCIAL_CIRCLE
Numeric
Distinct count | 10 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.3% |
Missing (n) | 1021 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 0.10005 |
---|---|
Minimum | 0 |
Maximum | 24 |
Zeros (%) | 91.3% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 0 |
Q1 | 0 |
Median | 0 |
Q3 | 0 |
95-th percentile | 1 |
Maximum | 24 |
Range | 24 |
Interquartile range | 0 |
Descriptive statistics
Standard deviation | 0.36229 |
---|---|
Coef of variation | 3.6211 |
Kurtosis | 86.563 |
Mean | 0.10005 |
MAD | 0.18327 |
Skewness | 5.2779 |
Sum | 30664 |
Variance | 0.13125 |
Memory size | 2.3 MiB |
Value | Count | Frequency (%) | |
0.0 | 280721 | 91.3% | |
1.0 | 21841 | 7.1% | |
2.0 | 3170 | 1.0% | |
3.0 | 598 | 0.2% | |
4.0 | 135 | 0.0% | |
5.0 | 20 | 0.0% | |
6.0 | 3 | 0.0% | |
24.0 | 1 | 0.0% | |
7.0 | 1 | 0.0% | |
(Missing) | 1021 | 0.3% |
Minimum 5 values
Value | Count | Frequency (%) | |
0.0 | 280721 | 91.3% | |
1.0 | 21841 | 7.1% | |
2.0 | 3170 | 1.0% | |
3.0 | 598 | 0.2% | |
4.0 | 135 | 0.0% |
Maximum 5 values
Value | Count | Frequency (%) | |
4.0 | 135 | 0.0% | |
5.0 | 20 | 0.0% | |
6.0 | 3 | 0.0% | |
7.0 | 1 | 0.0% | |
24.0 | 1 | 0.0% |
ELEVATORS_AVG
Numeric
Distinct count | 258 |
---|---|
Unique (%) | 0.1% |
Missing (%) | 53.3% |
Missing (n) | 163891 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 0.078942 |
---|---|
Minimum | 0 |
Maximum | 1 |
Zeros (%) | 27.9% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 0 |
Q1 | 0 |
Median | 0 |
Q3 | 0.12 |
95-th percentile | 0.36 |
Maximum | 1 |
Range | 1 |
Interquartile range | 0.12 |
Descriptive statistics
Standard deviation | 0.13458 |
---|---|
Coef of variation | 1.7048 |
Kurtosis | 7.8694 |
Mean | 0.078942 |
MAD | 0.09788 |
Skewness | 2.4394 |
Sum | 11338 |
Variance | 0.018111 |
Memory size | 2.3 MiB |
Value | Count | Frequency (%) | |
0.0 | 85718 | 27.9% | |
0.08 | 9886 | 3.2% | |
0.16 | 8806 | 2.9% | |
0.24 | 6071 | 2.0% | |
0.12 | 5593 | 1.8% | |
0.04 | 4585 | 1.5% | |
0.2 | 4072 | 1.3% | |
0.32 | 2788 | 0.9% | |
0.28 | 2272 | 0.7% | |
0.4 | 1532 | 0.5% | |
Other values (247) | 12297 | 4.0% | |
(Missing) | 163891 | 53.3% |
Minimum 5 values
Value | Count | Frequency (%) | |
0.0 | 85718 | 27.9% | |
0.002 | 1 | 0.0% | |
0.0024 | 1 | 0.0% | |
0.0048 | 3 | 0.0% | |
0.0064 | 5 | 0.0% |
Maximum 5 values
Value | Count | Frequency (%) | |
0.9 | 6 | 0.0% | |
0.92 | 20 | 0.0% | |
0.9332 | 2 | 0.0% | |
0.96 | 81 | 0.0% | |
1.0 | 158 | 0.1% |
ELEVATORS_MEDI
Highly correlated
This variable is highly correlated with ELEVATORS_MODE
and should be ignored for analysis
Correlation | 0.98283 |
---|
ELEVATORS_MODE
Highly correlated
This variable is highly correlated with ELEVATORS_AVG
and should be ignored for analysis
Correlation | 0.97884 |
---|
EMERGENCYSTATE_MODE
Categorical
Distinct count | 3 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 47.4% |
Missing (n) | 145755 |
No | |
---|---|
Yes | 2328 |
(Missing) |
Value | Count | Frequency (%) | |
No | 159428 | 51.8% | |
Yes | 2328 | 0.8% | |
(Missing) | 145755 | 47.4% |
ENTRANCES_AVG
Numeric
Distinct count | 286 |
---|---|
Unique (%) | 0.1% |
Missing (%) | 50.3% |
Missing (n) | 154828 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 0.14972 |
---|---|
Minimum | 0 |
Maximum | 1 |
Zeros (%) | 0.1% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 0.0345 |
Q1 | 0.069 |
Median | 0.1379 |
Q3 | 0.2069 |
95-th percentile | 0.3103 |
Maximum | 1 |
Range | 1 |
Interquartile range | 0.1379 |
Descriptive statistics
Standard deviation | 0.10005 |
---|---|
Coef of variation | 0.66822 |
Kurtosis | 11.593 |
Mean | 0.14972 |
MAD | 0.069965 |
Skewness | 2.3997 |
Sum | 22860 |
Variance | 0.01001 |
Memory size | 2.3 MiB |
Value | Count | Frequency (%) | |
0.1379 | 34007 | 11.1% | |
0.069 | 22956 | 7.5% | |
0.1034 | 19533 | 6.4% | |
0.2069 | 19062 | 6.2% | |
0.0345 | 15380 | 5.0% | |
0.1724 | 9185 | 3.0% | |
0.2759 | 7895 | 2.6% | |
0.2414 | 4165 | 1.4% | |
0.3448 | 2066 | 0.7% | |
0.3103 | 2049 | 0.7% | |
Other values (275) | 16385 | 5.3% | |
(Missing) | 154828 | 50.3% |
Minimum 5 values
Value | Count | Frequency (%) | |
0.0 | 323 | 0.1% | |
0.0055 | 1 | 0.0% | |
0.0086 | 2 | 0.0% | |
0.0114 | 1 | 0.0% | |
0.0172 | 7 | 0.0% |
Maximum 5 values
Value | Count | Frequency (%) | |
0.8621 | 14 | 0.0% | |
0.8966 | 52 | 0.0% | |
0.9309999999999999 | 21 | 0.0% | |
0.9655 | 25 | 0.0% | |
1.0 | 153 | 0.0% |
ENTRANCES_MEDI
Highly correlated
This variable is highly correlated with ENTRANCES_MODE
and should be ignored for analysis
Correlation | 0.98068 |
---|
ENTRANCES_MODE
Highly correlated
This variable is highly correlated with ENTRANCES_AVG
and should be ignored for analysis
Correlation | 0.97774 |
---|
EXT_SOURCE_1
Numeric
Distinct count | 114585 |
---|---|
Unique (%) | 37.3% |
Missing (%) | 56.4% |
Missing (n) | 173378 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 0.50213 |
---|---|
Minimum | 0.014568 |
Maximum | 0.96269 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 0.014568 |
---|---|
5-th percentile | 0.15802 |
Q1 | 0.33401 |
Median | 0.506 |
Q3 | 0.67505 |
95-th percentile | 0.83226 |
Maximum | 0.96269 |
Range | 0.94812 |
Interquartile range | 0.34105 |
Descriptive statistics
Standard deviation | 0.21106 |
---|---|
Coef of variation | 0.42033 |
Kurtosis | -0.96516 |
Mean | 0.50213 |
MAD | 0.17916 |
Skewness | -0.068755 |
Sum | 67352 |
Variance | 0.044547 |
Memory size | 2.3 MiB |
Value | Count | Frequency (%) | |
0.5464264086050881 | 5 | 0.0% | |
0.5984686928074158 | 5 | 0.0% | |
0.4990017461254777 | 5 | 0.0% | |
0.605151661169131 | 5 | 0.0% | |
0.4439821179601821 | 5 | 0.0% | |
0.528197430013715 | 5 | 0.0% | |
0.6227066347478732 | 5 | 0.0% | |
0.7657236984386736 | 5 | 0.0% | |
0.5810147955776347 | 5 | 0.0% | |
0.6677395635616753 | 5 | 0.0% | |
Other values (114574) | 134083 | 43.6% | |
(Missing) | 173378 | 56.4% |
Minimum 5 values
Value | Count | Frequency (%) | |
0.014568132412445587 | 1 | 0.0% | |
0.01469148240341735 | 1 | 0.0% | |
0.015052921304163646 | 1 | 0.0% | |
0.015600080580903924 | 1 | 0.0% | |
0.017094657791038883 | 1 | 0.0% |
Maximum 5 values
Value | Count | Frequency (%) | |
0.9460755215136519 | 1 | 0.0% | |
0.946097614386872 | 1 | 0.0% | |
0.9476493853501726 | 1 | 0.0% | |
0.9516239622079844 | 1 | 0.0% | |
0.962692770561306 | 1 | 0.0% |
EXT_SOURCE_2
Numeric
Distinct count | 119832 |
---|---|
Unique (%) | 39.0% |
Missing (%) | 0.2% |
Missing (n) | 660 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 0.51439 |
---|---|
Minimum | 8.1736e-08 |
Maximum | 0.855 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 8.1736e-08 |
---|---|
5-th percentile | 0.1333 |
Q1 | 0.39246 |
Median | 0.56596 |
Q3 | 0.66362 |
95-th percentile | 0.74773 |
Maximum | 0.855 |
Range | 0.855 |
Interquartile range | 0.27116 |
Descriptive statistics
Standard deviation | 0.19106 |
---|---|
Coef of variation | 0.37143 |
Kurtosis | -0.26913 |
Mean | 0.51439 |
MAD | 0.15717 |
Skewness | -0.79358 |
Sum | 157840 |
Variance | 0.036504 |
Memory size | 2.3 MiB |
Value | Count | Frequency (%) | |
0.2858978721410488 | 721 | 0.2% | |
0.2622583692422573 | 417 | 0.1% | |
0.26525634018619443 | 343 | 0.1% | |
0.15967923350263774 | 322 | 0.1% | |
0.2653117484731741 | 306 | 0.1% | |
0.26651977539251576 | 244 | 0.1% | |
0.2631435910213423 | 243 | 0.1% | |
0.16214456766623808 | 238 | 0.1% | |
0.16219210595922867 | 234 | 0.1% | |
0.16318703546427088 | 184 | 0.1% | |
Other values (119821) | 303599 | 98.7% | |
(Missing) | 660 | 0.2% |
Minimum 5 values
Value | Count | Frequency (%) | |
8.173616518884397e-08 | 1 | 0.0% | |
1.3159555812626235e-06 | 1 | 0.0% | |
5.002108762101576e-06 | 1 | 0.0% | |
5.600337749107766e-06 | 1 | 0.0% | |
5.9396509293128426e-06 | 1 | 0.0% |
Maximum 5 values
Value | Count | Frequency (%) | |
0.8206095060949257 | 1 | 0.0% | |
0.8206159442383357 | 1 | 0.0% | |
0.8213936273692694 | 1 | 0.0% | |
0.8217142127828599 | 1 | 0.0% | |
0.8549996664047012 | 26 | 0.0% |
EXT_SOURCE_3
Numeric
Distinct count | 815 |
---|---|
Unique (%) | 0.3% |
Missing (%) | 19.8% |
Missing (n) | 60965 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 0.51085 |
---|---|
Minimum | 0.00052727 |
Maximum | 0.89601 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 0.00052727 |
---|---|
5-th percentile | 0.15474 |
Q1 | 0.37065 |
Median | 0.53528 |
Q3 | 0.66906 |
95-th percentile | 0.78627 |
Maximum | 0.89601 |
Range | 0.89548 |
Interquartile range | 0.29841 |
Descriptive statistics
Standard deviation | 0.19484 |
---|---|
Coef of variation | 0.38141 |
Kurtosis | -0.66346 |
Mean | 0.51085 |
MAD | 0.16264 |
Skewness | -0.40939 |
Sum | 125950 |
Variance | 0.037964 |
Memory size | 2.3 MiB |
Value | Count | Frequency (%) | |
0.746300213050371 | 1460 | 0.5% | |
0.7136313997323308 | 1315 | 0.4% | |
0.6940926425266661 | 1276 | 0.4% | |
0.6706517530862718 | 1191 | 0.4% | |
0.6528965519806539 | 1154 | 0.4% | |
0.5814837058057234 | 1141 | 0.4% | |
0.6894791426446275 | 1138 | 0.4% | |
0.5954562029091491 | 1136 | 0.4% | |
0.5549467685334323 | 1132 | 0.4% | |
0.6212263380626669 | 1109 | 0.4% | |
Other values (804) | 234494 | 76.3% | |
(Missing) | 60965 | 19.8% |
Minimum 5 values
Value | Count | Frequency (%) | |
0.0005272652387098817 | 886 | 0.3% | |
0.011345719434837441 | 1 | 0.0% | |
0.012715923858768621 | 1 | 0.0% | |
0.013948465584840095 | 1 | 0.0% | |
0.014148265518207309 | 1 | 0.0% |
Maximum 5 values
Value | Count | Frequency (%) | |
0.8825303127941461 | 26 | 0.0% | |
0.8854883941521002 | 3 | 0.0% | |
0.8876642018413868 | 1 | 0.0% | |
0.8939760746042866 | 2 | 0.0% | |
0.8960095494948396 | 1 | 0.0% |
FLAG_CONT_MOBILE
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.99813 |
---|
1 | |
---|---|
0 | 574 |
Value | Count | Frequency (%) | |
1 | 306937 | 99.8% | |
0 | 574 | 0.2% |
FLAG_DOCUMENT_10
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 2.2763e-05 |
---|
0 | |
---|---|
1 | 7 |
Value | Count | Frequency (%) | |
0 | 307504 | 100.0% | |
1 | 7 | 0.0% |
FLAG_DOCUMENT_11
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.0039121 |
---|
0 | |
---|---|
1 | 1203 |
Value | Count | Frequency (%) | |
0 | 306308 | 99.6% | |
1 | 1203 | 0.4% |
FLAG_DOCUMENT_12
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 6.5038e-06 |
---|
0 | |
---|---|
1 | 2 |
Value | Count | Frequency (%) | |
0 | 307509 | 100.0% | |
1 | 2 | 0.0% |
FLAG_DOCUMENT_13
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.0035251 |
---|
0 | |
---|---|
1 | 1084 |
Value | Count | Frequency (%) | |
0 | 306427 | 99.6% | |
1 | 1084 | 0.4% |
FLAG_DOCUMENT_14
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.0029365 |
---|
0 | |
---|---|
1 | 903 |
Value | Count | Frequency (%) | |
0 | 306608 | 99.7% | |
1 | 903 | 0.3% |
FLAG_DOCUMENT_15
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.0012097 |
---|
0 | |
---|---|
1 | 372 |
Value | Count | Frequency (%) | |
0 | 307139 | 99.9% | |
1 | 372 | 0.1% |
FLAG_DOCUMENT_16
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.0099281 |
---|
0 | |
---|---|
1 | 3053 |
Value | Count | Frequency (%) | |
0 | 304458 | 99.0% | |
1 | 3053 | 1.0% |
FLAG_DOCUMENT_17
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.00026666 |
---|
0 | |
---|---|
1 | 82 |
Value | Count | Frequency (%) | |
0 | 307429 | 100.0% | |
1 | 82 | 0.0% |
FLAG_DOCUMENT_18
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.0081298 |
---|
0 | |
---|---|
1 | 2500 |
Value | Count | Frequency (%) | |
0 | 305011 | 99.2% | |
1 | 2500 | 0.8% |
FLAG_DOCUMENT_19
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.0005951 |
---|
0 | |
---|---|
1 | 183 |
Value | Count | Frequency (%) | |
0 | 307328 | 99.9% | |
1 | 183 | 0.1% |
FLAG_DOCUMENT_2
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 4.2275e-05 |
---|
0 | |
---|---|
1 | 13 |
Value | Count | Frequency (%) | |
0 | 307498 | 100.0% | |
1 | 13 | 0.0% |
FLAG_DOCUMENT_20
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.0005073 |
---|
0 | |
---|---|
1 | 156 |
Value | Count | Frequency (%) | |
0 | 307355 | 99.9% | |
1 | 156 | 0.1% |
FLAG_DOCUMENT_21
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.00033495 |
---|
0 | |
---|---|
1 | 103 |
Value | Count | Frequency (%) | |
0 | 307408 | 100.0% | |
1 | 103 | 0.0% |
FLAG_DOCUMENT_3
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.71002 |
---|
1 | |
---|---|
0 |
Value | Count | Frequency (%) | |
1 | 218340 | 71.0% | |
0 | 89171 | 29.0% |
FLAG_DOCUMENT_4
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 8.1298e-05 |
---|
0 | |
---|---|
1 | 25 |
Value | Count | Frequency (%) | |
0 | 307486 | 100.0% | |
1 | 25 | 0.0% |
FLAG_DOCUMENT_5
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.015115 |
---|
0 | |
---|---|
1 | 4648 |
Value | Count | Frequency (%) | |
0 | 302863 | 98.5% | |
1 | 4648 | 1.5% |
FLAG_DOCUMENT_6
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.088055 |
---|
0 | |
---|---|
1 | 27078 |
Value | Count | Frequency (%) | |
0 | 280433 | 91.2% | |
1 | 27078 | 8.8% |
FLAG_DOCUMENT_7
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.00019186 |
---|
0 | |
---|---|
1 | 59 |
Value | Count | Frequency (%) | |
0 | 307452 | 100.0% | |
1 | 59 | 0.0% |
FLAG_DOCUMENT_8
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.081376 |
---|
0 | |
---|---|
1 | 25024 |
Value | Count | Frequency (%) | |
0 | 282487 | 91.9% | |
1 | 25024 | 8.1% |
FLAG_DOCUMENT_9
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.0038958 |
---|
0 | |
---|---|
1 | 1198 |
Value | Count | Frequency (%) | |
0 | 306313 | 99.6% | |
1 | 1198 | 0.4% |
FLAG_EMAIL
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.05672 |
---|
0 | |
---|---|
1 | 17442 |
Value | Count | Frequency (%) | |
0 | 290069 | 94.3% | |
1 | 17442 | 5.7% |
FLAG_EMP_PHONE
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.81989 |
---|
1 | |
---|---|
0 |
Value | Count | Frequency (%) | |
1 | 252125 | 82.0% | |
0 | 55386 | 18.0% |
FLAG_MOBIL
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 1 |
---|
1 | |
---|---|
0 | 1 |
Value | Count | Frequency (%) | |
1 | 307510 | 100.0% | |
0 | 1 | 0.0% |
FLAG_OWN_CAR
Categorical
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
N | |
---|---|
Y |
Value | Count | Frequency (%) | |
N | 202924 | 66.0% | |
Y | 104587 | 34.0% |
FLAG_OWN_REALTY
Categorical
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Y | |
---|---|
N |
Value | Count | Frequency (%) | |
Y | 213312 | 69.4% | |
N | 94199 | 30.6% |
FLAG_PHONE
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.28107 |
---|
0 | |
---|---|
1 |
Value | Count | Frequency (%) | |
0 | 221080 | 71.9% | |
1 | 86431 | 28.1% |
FLAG_WORK_PHONE
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.19937 |
---|
0 | |
---|---|
1 |
Value | Count | Frequency (%) | |
0 | 246203 | 80.1% | |
1 | 61308 | 19.9% |
FLOORSMAX_AVG
Numeric
Distinct count | 404 |
---|---|
Unique (%) | 0.1% |
Missing (%) | 49.8% |
Missing (n) | 153020 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 0.22628 |
---|---|
Minimum | 0 |
Maximum | 1 |
Zeros (%) | 1.0% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 0.0417 |
Q1 | 0.1667 |
Median | 0.1667 |
Q3 | 0.3333 |
95-th percentile | 0.4792 |
Maximum | 1 |
Range | 1 |
Interquartile range | 0.1666 |
Descriptive statistics
Standard deviation | 0.14464 |
---|---|
Coef of variation | 0.63921 |
Kurtosis | 2.4325 |
Mean | 0.22628 |
MAD | 0.11612 |
Skewness | 1.2265 |
Sum | 34959 |
Variance | 0.020921 |
Memory size | 2.3 MiB |
Value | Count | Frequency (%) | |
0.1667 | 61875 | 20.1% | |
0.3333 | 31909 | 10.4% | |
0.0417 | 14600 | 4.7% | |
0.375 | 7926 | 2.6% | |
0.125 | 6974 | 2.3% | |
0.0833 | 6586 | 2.1% | |
0.0 | 2938 | 1.0% | |
0.4583 | 2828 | 0.9% | |
0.625 | 1915 | 0.6% | |
0.5417 | 1685 | 0.5% | |
Other values (393) | 15255 | 5.0% | |
(Missing) | 153020 | 49.8% |
Minimum 5 values
Value | Count | Frequency (%) | |
0.0 | 2938 | 1.0% | |
0.0067 | 1 | 0.0% | |
0.0083 | 3 | 0.0% | |
0.01 | 4 | 0.0% | |
0.0104 | 5 | 0.0% |
Maximum 5 values
Value | Count | Frequency (%) | |
0.9375 | 4 | 0.0% | |
0.9479 | 2 | 0.0% | |
0.9583 | 83 | 0.0% | |
0.9792 | 1 | 0.0% | |
1.0 | 167 | 0.1% |
FLOORSMAX_MEDI
Highly correlated
This variable is highly correlated with FLOORSMAX_MODE
and should be ignored for analysis
Correlation | 0.98824 |
---|
FLOORSMAX_MODE
Highly correlated
This variable is highly correlated with FLOORSMAX_AVG
and should be ignored for analysis
Correlation | 0.98569 |
---|
FLOORSMIN_AVG
Numeric
Distinct count | 306 |
---|---|
Unique (%) | 0.1% |
Missing (%) | 67.8% |
Missing (n) | 208642 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 0.23189 |
---|---|
Minimum | 0 |
Maximum | 1 |
Zeros (%) | 0.8% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 0.0417 |
Q1 | 0.0833 |
Median | 0.2083 |
Q3 | 0.375 |
95-th percentile | 0.5 |
Maximum | 1 |
Range | 1 |
Interquartile range | 0.2917 |
Descriptive statistics
Standard deviation | 0.16138 |
---|---|
Coef of variation | 0.69592 |
Kurtosis | 1.3383 |
Mean | 0.23189 |
MAD | 0.1246 |
Skewness | 0.9542 |
Sum | 22927 |
Variance | 0.026044 |
Memory size | 2.3 MiB |
Value | Count | Frequency (%) | |
0.2083 | 32875 | 10.7% | |
0.375 | 17845 | 5.8% | |
0.0417 | 17776 | 5.8% | |
0.0833 | 5086 | 1.7% | |
0.4167 | 3961 | 1.3% | |
0.1667 | 3537 | 1.2% | |
0.125 | 3336 | 1.1% | |
0.0 | 2320 | 0.8% | |
0.5 | 1688 | 0.5% | |
0.6667 | 1194 | 0.4% | |
Other values (295) | 9251 | 3.0% | |
(Missing) | 208642 | 67.8% |
Minimum 5 values
Value | Count | Frequency (%) | |
0.0 | 2320 | 0.8% | |
0.0067 | 3 | 0.0% | |
0.0104 | 3 | 0.0% | |
0.0138 | 1 | 0.0% | |
0.0158 | 4 | 0.0% |
Maximum 5 values
Value | Count | Frequency (%) | |
0.9304 | 1 | 0.0% | |
0.9408 | 2 | 0.0% | |
0.9583 | 10 | 0.0% | |
0.9792 | 5 | 0.0% | |
1.0 | 141 | 0.0% |
FLOORSMIN_MEDI
Highly correlated
This variable is highly correlated with FLOORSMIN_MODE
and should be ignored for analysis
Correlation | 0.98841 |
---|
FLOORSMIN_MODE
Highly correlated
This variable is highly correlated with FLOORSMIN_AVG
and should be ignored for analysis
Correlation | 0.98588 |
---|
FONDKAPREMONT_MODE
Categorical
Distinct count | 5 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 68.4% |
Missing (n) | 210295 |
reg oper account | |
---|---|
reg oper spec account | 12080 |
not specified | 5687 |
(Missing) |
Value | Count | Frequency (%) | |
reg oper account | 73830 | 24.0% | |
reg oper spec account | 12080 | 3.9% | |
not specified | 5687 | 1.8% | |
org spec account | 5619 | 1.8% | |
(Missing) | 210295 | 68.4% |
HOUR_APPR_PROCESS_START
Numeric
Distinct count | 24 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 12.063 |
---|---|
Minimum | 0 |
Maximum | 23 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 7 |
Q1 | 10 |
Median | 12 |
Q3 | 14 |
95-th percentile | 17 |
Maximum | 23 |
Range | 23 |
Interquartile range | 4 |
Descriptive statistics
Standard deviation | 3.2658 |
---|---|
Coef of variation | 0.27072 |
Kurtosis | -0.19417 |
Mean | 12.063 |
MAD | 2.6328 |
Skewness | -0.028024 |
Sum | 3709634 |
Variance | 10.666 |
Memory size | 2.3 MiB |
Value | Count | Frequency (%) | |
10 | 37722 | 12.3% | |
11 | 37229 | 12.1% | |
12 | 34233 | 11.1% | |
13 | 30959 | 10.1% | |
14 | 27682 | 9.0% | |
9 | 27384 | 8.9% | |
15 | 24839 | 8.1% | |
16 | 20385 | 6.6% | |
8 | 15127 | 4.9% | |
17 | 14900 | 4.8% | |
Other values (14) | 37051 | 12.0% |
Minimum 5 values
Value | Count | Frequency (%) | |
0 | 40 | 0.0% | |
1 | 86 | 0.0% | |
2 | 305 | 0.1% | |
3 | 1230 | 0.4% | |
4 | 2090 | 0.7% |
Maximum 5 values
Value | Count | Frequency (%) | |
19 | 3848 | 1.3% | |
20 | 1196 | 0.4% | |
21 | 405 | 0.1% | |
22 | 150 | 0.0% | |
23 | 41 | 0.0% |
HOUSETYPE_MODE
Categorical
Distinct count | 4 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 50.2% |
Missing (n) | 154297 |
block of flats | |
---|---|
specific housing | 1499 |
terraced house | 1212 |
(Missing) |
Value | Count | Frequency (%) | |
block of flats | 150503 | 48.9% | |
specific housing | 1499 | 0.5% | |
terraced house | 1212 | 0.4% | |
(Missing) | 154297 | 50.2% |
LANDAREA_AVG
Numeric
Distinct count | 3528 |
---|---|
Unique (%) | 1.1% |
Missing (%) | 59.4% |
Missing (n) | 182590 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 0.066333 |
---|---|
Minimum | 0 |
Maximum | 1 |
Zeros (%) | 5.1% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 0 |
Q1 | 0.0187 |
Median | 0.0481 |
Q3 | 0.0856 |
95-th percentile | 0.1947 |
Maximum | 1 |
Range | 1 |
Interquartile range | 0.0669 |
Descriptive statistics
Standard deviation | 0.081184 |
---|---|
Coef of variation | 1.2239 |
Kurtosis | 34.745 |
Mean | 0.066333 |
MAD | 0.049532 |
Skewness | 4.4587 |
Sum | 8286.4 |
Variance | 0.0065908 |
Memory size | 2.3 MiB |
Value | Count | Frequency (%) | |
0.0 | 15600 | 5.1% | |
0.0631 | 189 | 0.1% | |
0.0316 | 187 | 0.1% | |
0.0473 | 186 | 0.1% | |
0.0174 | 180 | 0.1% | |
0.0237 | 175 | 0.1% | |
0.0552 | 173 | 0.1% | |
0.0158 | 170 | 0.1% | |
0.0331 | 170 | 0.1% | |
0.015 | 165 | 0.1% | |
Other values (3517) | 107726 | 35.0% | |
(Missing) | 182590 | 59.4% |
Minimum 5 values
Value | Count | Frequency (%) | |
0.0 | 15600 | 5.1% | |
0.0001 | 13 | 0.0% | |
0.0002 | 13 | 0.0% | |
0.0003 | 9 | 0.0% | |
0.0004 | 11 | 0.0% |
Maximum 5 values
Value | Count | Frequency (%) | |
0.9497 | 1 | 0.0% | |
0.9690000000000001 | 1 | 0.0% | |
0.9777 | 3 | 0.0% | |
0.9829 | 10 | 0.0% | |
1.0 | 135 | 0.0% |
LANDAREA_MEDI
Highly correlated
This variable is highly correlated with LANDAREA_MODE
and should be ignored for analysis
Correlation | 0.98084 |
---|
LANDAREA_MODE
Highly correlated
This variable is highly correlated with LANDAREA_AVG
and should be ignored for analysis
Correlation | 0.9737 |
---|
LIVE_CITY_NOT_WORK_CITY
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.17955 |
---|
0 | |
---|---|
1 |
Value | Count | Frequency (%) | |
0 | 252296 | 82.0% | |
1 | 55215 | 18.0% |
LIVE_REGION_NOT_WORK_REGION
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.040659 |
---|
0 | |
---|---|
1 | 12503 |
Value | Count | Frequency (%) | |
0 | 295008 | 95.9% | |
1 | 12503 | 4.1% |
LIVINGAPARTMENTS_AVG
Highly correlated
This variable is highly correlated with APARTMENTS_AVG
and should be ignored for analysis
Correlation | 0.94395 |
---|
LIVINGAPARTMENTS_MEDI
Highly correlated
This variable is highly correlated with APARTMENTS_MEDI
and should be ignored for analysis
Correlation | 0.94249 |
---|
LIVINGAPARTMENTS_MODE
Highly correlated
This variable is highly correlated with APARTMENTS_MODE
and should be ignored for analysis
Correlation | 0.93776 |
---|
LIVINGAREA_AVG
Highly correlated
This variable is highly correlated with APARTMENTS_AVG
and should be ignored for analysis
Correlation | 0.91362 |
---|
LIVINGAREA_MEDI
Highly correlated
This variable is highly correlated with APARTMENTS_MEDI
and should be ignored for analysis
Correlation | 0.91595 |
---|
LIVINGAREA_MODE
Highly correlated
This variable is highly correlated with APARTMENTS_MODE
and should be ignored for analysis
Correlation | 0.91038 |
---|
NAME_CONTRACT_TYPE
Categorical
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Cash loans | |
---|---|
Revolving loans | 29279 |
Value | Count | Frequency (%) | |
Cash loans | 278232 | 90.5% | |
Revolving loans | 29279 | 9.5% |
NAME_EDUCATION_TYPE
Categorical
Distinct count | 5 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Secondary / secondary special | |
---|---|
Higher education | |
Incomplete higher | 10277 |
Other values (2) | 3980 |
Value | Count | Frequency (%) | |
Secondary / secondary special | 218391 | 71.0% | |
Higher education | 74863 | 24.3% | |
Incomplete higher | 10277 | 3.3% | |
Lower secondary | 3816 | 1.2% | |
Academic degree | 164 | 0.1% |
NAME_FAMILY_STATUS
Categorical
Distinct count | 6 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Married | |
---|---|
Single / not married | |
Civil marriage | 29775 |
Other values (3) | 35860 |
Value | Count | Frequency (%) | |
Married | 196432 | 63.9% | |
Single / not married | 45444 | 14.8% | |
Civil marriage | 29775 | 9.7% | |
Separated | 19770 | 6.4% | |
Widow | 16088 | 5.2% | |
Unknown | 2 | 0.0% |
NAME_HOUSING_TYPE
Categorical
Distinct count | 6 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
House / apartment | |
---|---|
With parents | 14840 |
Municipal apartment | 11183 |
Other values (3) | 8620 |
Value | Count | Frequency (%) | |
House / apartment | 272868 | 88.7% | |
With parents | 14840 | 4.8% | |
Municipal apartment | 11183 | 3.6% | |
Rented apartment | 4881 | 1.6% | |
Office apartment | 2617 | 0.9% | |
Co-op apartment | 1122 | 0.4% |
NAME_INCOME_TYPE
Categorical
Distinct count | 8 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Working | |
---|---|
Commercial associate | |
Pensioner | |
Other values (5) | 21758 |
Value | Count | Frequency (%) | |
Working | 158774 | 51.6% | |
Commercial associate | 71617 | 23.3% | |
Pensioner | 55362 | 18.0% | |
State servant | 21703 | 7.1% | |
Unemployed | 22 | 0.0% | |
Student | 18 | 0.0% | |
Businessman | 10 | 0.0% | |
Maternity leave | 5 | 0.0% |
NAME_TYPE_SUITE
Categorical
Distinct count | 8 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.4% |
Missing (n) | 1292 |
Unaccompanied | |
---|---|
Family | 40149 |
Spouse, partner | 11370 |
Other values (4) | 6174 |
Value | Count | Frequency (%) | |
Unaccompanied | 248526 | 80.8% | |
Family | 40149 | 13.1% | |
Spouse, partner | 11370 | 3.7% | |
Children | 3267 | 1.1% | |
Other_B | 1770 | 0.6% | |
Other_A | 866 | 0.3% | |
Group of people | 271 | 0.1% | |
(Missing) | 1292 | 0.4% |
NONLIVINGAPARTMENTS_AVG
Numeric
Distinct count | 387 |
---|---|
Unique (%) | 0.1% |
Missing (%) | 69.4% |
Missing (n) | 213514 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 0.0088087 |
---|---|
Minimum | 0 |
Maximum | 1 |
Zeros (%) | 17.7% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 0 |
Q1 | 0 |
Median | 0 |
Q3 | 0.0039 |
95-th percentile | 0.0309 |
Maximum | 1 |
Range | 1 |
Interquartile range | 0.0039 |
Descriptive statistics
Standard deviation | 0.047732 |
---|---|
Coef of variation | 5.4187 |
Kurtosis | 284.73 |
Mean | 0.0088087 |
MAD | 0.012235 |
Skewness | 15.541 |
Sum | 827.99 |
Variance | 0.0022783 |
Memory size | 2.3 MiB |
Value | Count | Frequency (%) | |
0.0 | 54549 | 17.7% | |
0.0039 | 13606 | 4.4% | |
0.0077 | 6351 | 2.1% | |
0.0116 | 3714 | 1.2% | |
0.0154 | 2533 | 0.8% | |
0.0193 | 1673 | 0.5% | |
0.0019 | 1250 | 0.4% | |
0.0232 | 1195 | 0.4% | |
0.027000000000000003 | 865 | 0.3% | |
0.0309 | 717 | 0.2% | |
Other values (376) | 7544 | 2.5% | |
(Missing) | 213514 | 69.4% |
Minimum 5 values
Value | Count | Frequency (%) | |
0.0 | 54549 | 17.7% | |
0.0002 | 1 | 0.0% | |
0.0003 | 5 | 0.0% | |
0.0004 | 25 | 0.0% | |
0.0005 | 6 | 0.0% |
Maximum 5 values
Value | Count | Frequency (%) | |
0.973 | 3 | 0.0% | |
0.9884 | 1 | 0.0% | |
0.9923 | 4 | 0.0% | |
0.9961 | 2 | 0.0% | |
1.0 | 97 | 0.0% |
NONLIVINGAPARTMENTS_MEDI
Highly correlated
This variable is highly correlated with NONLIVINGAPARTMENTS_MODE
and should be ignored for analysis
Correlation | 0.97857 |
---|
NONLIVINGAPARTMENTS_MODE
Highly correlated
This variable is highly correlated with NONLIVINGAPARTMENTS_AVG
and should be ignored for analysis
Correlation | 0.96937 |
---|
NONLIVINGAREA_AVG
Numeric
Distinct count | 3291 |
---|---|
Unique (%) | 1.1% |
Missing (%) | 55.2% |
Missing (n) | 169682 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 0.028358 |
---|---|
Minimum | 0 |
Maximum | 1 |
Zeros (%) | 19.1% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 0 |
Q1 | 0 |
Median | 0.0036 |
Q3 | 0.0277 |
95-th percentile | 0.1279 |
Maximum | 1 |
Range | 1 |
Interquartile range | 0.0277 |
Descriptive statistics
Standard deviation | 0.069523 |
---|---|
Coef of variation | 2.4516 |
Kurtosis | 64.912 |
Mean | 0.028358 |
MAD | 0.036058 |
Skewness | 6.559 |
Sum | 3908.5 |
Variance | 0.0048335 |
Memory size | 2.3 MiB |
Value | Count | Frequency (%) | |
0.0 | 58735 | 19.1% | |
0.0012 | 546 | 0.2% | |
0.0044 | 454 | 0.1% | |
0.0022 | 440 | 0.1% | |
0.0031 | 415 | 0.1% | |
0.0011 | 405 | 0.1% | |
0.001 | 405 | 0.1% | |
0.0036 | 399 | 0.1% | |
0.003 | 397 | 0.1% | |
0.0024 | 395 | 0.1% | |
Other values (3280) | 75238 | 24.5% | |
(Missing) | 169682 | 55.2% |
Minimum 5 values
Value | Count | Frequency (%) | |
0.0 | 58735 | 19.1% | |
0.0001 | 163 | 0.1% | |
0.0002 | 107 | 0.0% | |
0.0003 | 95 | 0.0% | |
0.0004 | 162 | 0.1% |
Maximum 5 values
Value | Count | Frequency (%) | |
0.9591 | 2 | 0.0% | |
0.9764 | 1 | 0.0% | |
0.9823 | 1 | 0.0% | |
0.9956 | 1 | 0.0% | |
1.0 | 136 | 0.0% |
NONLIVINGAREA_MEDI
Highly correlated
This variable is highly correlated with NONLIVINGAREA_MODE
and should be ignored for analysis
Correlation | 0.97584 |
---|
NONLIVINGAREA_MODE
Highly correlated
This variable is highly correlated with NONLIVINGAREA_AVG
and should be ignored for analysis
Correlation | 0.96609 |
---|
OBS_30_CNT_SOCIAL_CIRCLE
Numeric
Distinct count | 34 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.3% |
Missing (n) | 1021 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 1.4222 |
---|---|
Minimum | 0 |
Maximum | 348 |
Zeros (%) | 53.3% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 0 |
Q1 | 0 |
Median | 0 |
Q3 | 2 |
95-th percentile | 6 |
Maximum | 348 |
Range | 348 |
Interquartile range | 2 |
Descriptive statistics
Standard deviation | 2.401 |
---|---|
Coef of variation | 1.6882 |
Kurtosis | 1424.8 |
Mean | 1.4222 |
MAD | 1.6556 |
Skewness | 12.14 |
Sum | 435900 |
Variance | 5.7647 |
Memory size | 2.3 MiB |
Value | Count | Frequency (%) | |
0.0 | 163910 | 53.3% | |
1.0 | 48783 | 15.9% | |
2.0 | 29808 | 9.7% | |
3.0 | 20322 | 6.6% | |
4.0 | 14143 | 4.6% | |
5.0 | 9553 | 3.1% | |
6.0 | 6453 | 2.1% | |
7.0 | 4390 | 1.4% | |
8.0 | 2967 | 1.0% | |
9.0 | 2003 | 0.7% | |
Other values (23) | 4158 | 1.4% |
Minimum 5 values
Value | Count | Frequency (%) | |
0.0 | 163910 | 53.3% | |
1.0 | 48783 | 15.9% | |
2.0 | 29808 | 9.7% | |
3.0 | 20322 | 6.6% | |
4.0 | 14143 | 4.6% |
Maximum 5 values
Value | Count | Frequency (%) | |
28.0 | 1 | 0.0% | |
29.0 | 1 | 0.0% | |
30.0 | 2 | 0.0% | |
47.0 | 1 | 0.0% | |
348.0 | 1 | 0.0% |
OBS_60_CNT_SOCIAL_CIRCLE
Highly correlated
This variable is highly correlated with OBS_30_CNT_SOCIAL_CIRCLE
and should be ignored for analysis
Correlation | 0.99849 |
---|
OCCUPATION_TYPE
Categorical
Distinct count | 19 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 31.3% |
Missing (n) | 96391 |
Laborers | |
---|---|
Sales staff | |
Core staff | |
Other values (15) | |
(Missing) |
Value | Count | Frequency (%) | |
Laborers | 55186 | 17.9% | |
Sales staff | 32102 | 10.4% | |
Core staff | 27570 | 9.0% | |
Managers | 21371 | 6.9% | |
Drivers | 18603 | 6.0% | |
High skill tech staff | 11380 | 3.7% | |
Accountants | 9813 | 3.2% | |
Medicine staff | 8537 | 2.8% | |
Security staff | 6721 | 2.2% | |
Cooking staff | 5946 | 1.9% | |
Other values (8) | 13891 | 4.5% | |
(Missing) | 96391 | 31.3% |
ORGANIZATION_TYPE
Categorical
Distinct count | 58 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Business Entity Type 3 | |
---|---|
XNA | |
Self-employed | |
Other values (55) |
Value | Count | Frequency (%) | |
Business Entity Type 3 | 67992 | 22.1% | |
XNA | 55374 | 18.0% | |
Self-employed | 38412 | 12.5% | |
Other | 16683 | 5.4% | |
Medicine | 11193 | 3.6% | |
Business Entity Type 2 | 10553 | 3.4% | |
Government | 10404 | 3.4% | |
School | 8893 | 2.9% | |
Trade: type 7 | 7831 | 2.5% | |
Kindergarten | 6880 | 2.2% | |
Other values (48) | 73296 | 23.8% |
OWN_CAR_AGE
Numeric
Distinct count | 63 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 66.0% |
Missing (n) | 202929 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 12.061 |
---|---|
Minimum | 0 |
Maximum | 91 |
Zeros (%) | 0.7% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 1 |
Q1 | 5 |
Median | 9 |
Q3 | 15 |
95-th percentile | 30 |
Maximum | 91 |
Range | 91 |
Interquartile range | 10 |
Descriptive statistics
Standard deviation | 11.945 |
---|---|
Coef of variation | 0.99036 |
Kurtosis | 9.2149 |
Mean | 12.061 |
MAD | 7.6692 |
Skewness | 2.7454 |
Sum | 1261400 |
Variance | 142.68 |
Memory size | 2.3 MiB |
Value | Count | Frequency (%) | |
7.0 | 7424 | 2.4% | |
6.0 | 6382 | 2.1% | |
3.0 | 6370 | 2.1% | |
8.0 | 5887 | 1.9% | |
2.0 | 5852 | 1.9% | |
4.0 | 5557 | 1.8% | |
1.0 | 5280 | 1.7% | |
9.0 | 5020 | 1.6% | |
10.0 | 4806 | 1.6% | |
14.0 | 4594 | 1.5% | |
Other values (52) | 47410 | 15.4% | |
(Missing) | 202929 | 66.0% |
Minimum 5 values
Value | Count | Frequency (%) | |
0.0 | 2134 | 0.7% | |
1.0 | 5280 | 1.7% | |
2.0 | 5852 | 1.9% | |
3.0 | 6370 | 2.1% | |
4.0 | 5557 | 1.8% |
Maximum 5 values
Value | Count | Frequency (%) | |
63.0 | 2 | 0.0% | |
64.0 | 2443 | 0.8% | |
65.0 | 891 | 0.3% | |
69.0 | 1 | 0.0% | |
91.0 | 2 | 0.0% |
REGION_POPULATION_RELATIVE
Numeric
Distinct count | 81 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 0.020868 |
---|---|
Minimum | 0.00029 |
Maximum | 0.072508 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 0.00029 |
---|---|
5-th percentile | 0.00496 |
Q1 | 0.010006 |
Median | 0.01885 |
Q3 | 0.028663 |
95-th percentile | 0.04622 |
Maximum | 0.072508 |
Range | 0.072218 |
Interquartile range | 0.018657 |
Descriptive statistics
Standard deviation | 0.013831 |
---|---|
Coef of variation | 0.66279 |
Kurtosis | 3.2601 |
Mean | 0.020868 |
MAD | 0.010291 |
Skewness | 1.488 |
Sum | 6417.2 |
Variance | 0.0001913 |
Memory size | 2.3 MiB |
Value | Count | Frequency (%) | |
0.035792000000000004 | 16408 | 5.3% | |
0.04622 | 13442 | 4.4% | |
0.030755 | 12163 | 4.0% | |
0.025164 | 11950 | 3.9% | |
0.026392 | 11601 | 3.8% | |
0.031329 | 11321 | 3.7% | |
0.028663 | 11157 | 3.6% | |
0.019101 | 8694 | 2.8% | |
0.072508 | 8412 | 2.7% | |
0.020713 | 8066 | 2.6% | |
Other values (71) | 194297 | 63.2% |
Minimum 5 values
Value | Count | Frequency (%) | |
0.00029 | 2 | 0.0% | |
0.000533 | 39 | 0.0% | |
0.000938 | 28 | 0.0% | |
0.001276 | 558 | 0.2% | |
0.001333 | 235 | 0.1% |
Maximum 5 values
Value | Count | Frequency (%) | |
0.031329 | 11321 | 3.7% | |
0.032561 | 6636 | 2.2% | |
0.035792000000000004 | 16408 | 5.3% | |
0.04622 | 13442 | 4.4% | |
0.072508 | 8412 | 2.7% |
REGION_RATING_CLIENT
Numeric
Distinct count | 3 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 2.0525 |
---|---|
Minimum | 1 |
Maximum | 3 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 1 |
---|---|
5-th percentile | 1 |
Q1 | 2 |
Median | 2 |
Q3 | 2 |
95-th percentile | 3 |
Maximum | 3 |
Range | 2 |
Interquartile range | 0 |
Descriptive statistics
Standard deviation | 0.50903 |
---|---|
Coef of variation | 0.24801 |
Kurtosis | 0.80042 |
Mean | 2.0525 |
MAD | 0.29784 |
Skewness | 0.087468 |
Sum | 631155 |
Variance | 0.25912 |
Memory size | 2.3 MiB |
Value | Count | Frequency (%) | |
2 | 226984 | 73.8% | |
3 | 48330 | 15.7% | |
1 | 32197 | 10.5% |
Minimum 5 values
Value | Count | Frequency (%) | |
1 | 32197 | 10.5% | |
2 | 226984 | 73.8% | |
3 | 48330 | 15.7% |
Maximum 5 values
Value | Count | Frequency (%) | |
1 | 32197 | 10.5% | |
2 | 226984 | 73.8% | |
3 | 48330 | 15.7% |
REGION_RATING_CLIENT_W_CITY
Highly correlated
This variable is highly correlated with REGION_RATING_CLIENT
and should be ignored for analysis
Correlation | 0.95084 |
---|
REG_CITY_NOT_LIVE_CITY
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.078173 |
---|
0 | |
---|---|
1 | 24039 |
Value | Count | Frequency (%) | |
0 | 283472 | 92.2% | |
1 | 24039 | 7.8% |
REG_CITY_NOT_WORK_CITY
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.23045 |
---|
0 | |
---|---|
1 |
Value | Count | Frequency (%) | |
0 | 236644 | 77.0% | |
1 | 70867 | 23.0% |
REG_REGION_NOT_LIVE_REGION
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.015144 |
---|
0 | |
---|---|
1 | 4657 |
Value | Count | Frequency (%) | |
0 | 302854 | 98.5% | |
1 | 4657 | 1.5% |
REG_REGION_NOT_WORK_REGION
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.050769 |
---|
0 | |
---|---|
1 | 15612 |
Value | Count | Frequency (%) | |
0 | 291899 | 94.9% | |
1 | 15612 | 5.1% |
SK_ID_CURR
Numeric
Distinct count | 307511 |
---|---|
Unique (%) | 100.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 278180 |
---|---|
Minimum | 100002 |
Maximum | 456255 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 100002 |
---|---|
5-th percentile | 117950 |
Q1 | 189150 |
Median | 278200 |
Q3 | 367140 |
95-th percentile | 438430 |
Maximum | 456255 |
Range | 356253 |
Interquartile range | 178000 |
Descriptive statistics
Standard deviation | 102790 |
---|---|
Coef of variation | 0.36951 |
Kurtosis | -1.199 |
Mean | 278180 |
MAD | 89010 |
Skewness | -0.0012002 |
Sum | 85543569448 |
Variance | 10566000000 |
Memory size | 2.3 MiB |
Value | Count | Frequency (%) | |
100303 | 1 | 0.0% | |
131861 | 1 | 0.0% | |
158488 | 1 | 0.0% | |
156441 | 1 | 0.0% | |
160539 | 1 | 0.0% | |
150300 | 1 | 0.0% | |
148253 | 1 | 0.0% | |
154398 | 1 | 0.0% | |
152351 | 1 | 0.0% | |
238369 | 1 | 0.0% | |
Other values (307501) | 307501 | 100.0% |
Minimum 5 values
Value | Count | Frequency (%) | |
100002 | 1 | 0.0% | |
100003 | 1 | 0.0% | |
100004 | 1 | 0.0% | |
100006 | 1 | 0.0% | |
100007 | 1 | 0.0% |
Maximum 5 values
Value | Count | Frequency (%) | |
456251 | 1 | 0.0% | |
456252 | 1 | 0.0% | |
456253 | 1 | 0.0% | |
456254 | 1 | 0.0% | |
456255 | 1 | 0.0% |
TARGET
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.080729 |
---|
0 | |
---|---|
1 | 24825 |
Value | Count | Frequency (%) | |
0 | 282686 | 91.9% | |
1 | 24825 | 8.1% |
TOTALAREA_MODE
Highly correlated
This variable is highly correlated with LIVINGAREA_MEDI
and should be ignored for analysis
Correlation | 0.91936 |
---|
WALLSMATERIAL_MODE
Categorical
Distinct count | 8 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 50.8% |
Missing (n) | 156341 |
Panel | |
---|---|
Stone, brick | |
Block | 9253 |
Other values (4) | 11062 |
(Missing) |
Value | Count | Frequency (%) | |
Panel | 66040 | 21.5% | |
Stone, brick | 64815 | 21.1% | |
Block | 9253 | 3.0% | |
Wooden | 5362 | 1.7% | |
Mixed | 2296 | 0.7% | |
Monolithic | 1779 | 0.6% | |
Others | 1625 | 0.5% | |
(Missing) | 156341 | 50.8% |
WEEKDAY_APPR_PROCESS_START
Categorical
Distinct count | 7 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
TUESDAY | |
---|---|
WEDNESDAY | |
MONDAY | |
Other values (4) |
Value | Count | Frequency (%) | |
TUESDAY | 53901 | 17.5% | |
WEDNESDAY | 51934 | 16.9% | |
MONDAY | 50714 | 16.5% | |
THURSDAY | 50591 | 16.5% | |
FRIDAY | 50338 | 16.4% | |
SATURDAY | 33852 | 11.0% | |
SUNDAY | 16181 | 5.3% |
YEARS_BEGINEXPLUATATION_AVG
Numeric
Distinct count | 286 |
---|---|
Unique (%) | 0.1% |
Missing (%) | 48.8% |
Missing (n) | 150007 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 0.97773 |
---|---|
Minimum | 0 |
Maximum | 1 |
Zeros (%) | 0.2% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 0.9687 |
Q1 | 0.9767 |
Median | 0.9816 |
Q3 | 0.9866 |
95-th percentile | 0.996 |
Maximum | 1 |
Range | 1 |
Interquartile range | 0.0099 |
Descriptive statistics
Standard deviation | 0.059223 |
---|---|
Coef of variation | 0.060572 |
Kurtosis | 248.18 |
Mean | 0.97773 |
MAD | 0.010933 |
Skewness | -15.515 |
Sum | 154000 |
Variance | 0.0035074 |
Memory size | 2.3 MiB |
Value | Count | Frequency (%) | |
0.9871 | 4311 | 1.4% | |
0.9856 | 4189 | 1.4% | |
0.9861 | 4171 | 1.4% | |
0.9801 | 4123 | 1.3% | |
0.9866 | 4114 | 1.3% | |
0.9851 | 4096 | 1.3% | |
0.9806 | 4096 | 1.3% | |
0.9811 | 3986 | 1.3% | |
0.9816 | 3982 | 1.3% | |
0.9831 | 3970 | 1.3% | |
Other values (275) | 116466 | 37.9% | |
(Missing) | 150007 | 48.8% |
Minimum 5 values
Value | Count | Frequency (%) | |
0.0 | 514 | 0.2% | |
0.0179 | 1 | 0.0% | |
0.0447 | 1 | 0.0% | |
0.0969 | 1 | 0.0% | |
0.0974 | 1 | 0.0% |
Maximum 5 values
Value | Count | Frequency (%) | |
0.998 | 1096 | 0.4% | |
0.9985 | 1062 | 0.3% | |
0.9990000000000001 | 906 | 0.3% | |
0.9995 | 691 | 0.2% | |
1.0 | 186 | 0.1% |
YEARS_BEGINEXPLUATATION_MEDI
Highly correlated
This variable is highly correlated with YEARS_BEGINEXPLUATATION_MODE
and should be ignored for analysis
Correlation | 0.96354 |
---|
YEARS_BEGINEXPLUATATION_MODE
Highly correlated
This variable is highly correlated with YEARS_BEGINEXPLUATATION_AVG
and should be ignored for analysis
Correlation | 0.97189 |
---|
YEARS_BUILD_AVG
Numeric
Distinct count | 150 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 66.5% |
Missing (n) | 204488 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 0.75247 |
---|---|
Minimum | 0 |
Maximum | 1 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 0.592 |
Q1 | 0.6872 |
Median | 0.7552 |
Q3 | 0.8232 |
95-th percentile | 0.9524 |
Maximum | 1 |
Range | 1 |
Interquartile range | 0.136 |
Descriptive statistics
Standard deviation | 0.11328 |
---|---|
Coef of variation | 0.15054 |
Kurtosis | 4.3998 |
Mean | 0.75247 |
MAD | 0.08391 |
Skewness | -0.96249 |
Sum | 77522 |
Variance | 0.012832 |
Memory size | 2.3 MiB |
Value | Count | Frequency (%) | |
0.8232 | 2999 | 1.0% | |
0.8164 | 2864 | 0.9% | |
0.8028 | 2848 | 0.9% | |
0.728 | 2802 | 0.9% | |
0.7348 | 2761 | 0.9% | |
0.8096 | 2755 | 0.9% | |
0.83 | 2738 | 0.9% | |
0.7959999999999999 | 2734 | 0.9% | |
0.7484 | 2731 | 0.9% | |
0.7688 | 2712 | 0.9% | |
Other values (139) | 75079 | 24.4% | |
(Missing) | 204488 | 66.5% |
Minimum 5 values
Value | Count | Frequency (%) | |
0.0 | 102 | 0.0% | |
0.0004 | 2 | 0.0% | |
0.0072 | 4 | 0.0% | |
0.013999999999999999 | 3 | 0.0% | |
0.0208 | 1 | 0.0% |
Maximum 5 values
Value | Count | Frequency (%) | |
0.9728 | 813 | 0.3% | |
0.9796 | 786 | 0.3% | |
0.9864 | 661 | 0.2% | |
0.9932 | 478 | 0.2% | |
1.0 | 173 | 0.1% |
YEARS_BUILD_MEDI
Highly correlated
This variable is highly correlated with YEARS_BUILD_MODE
and should be ignored for analysis
Correlation | 0.98946 |
---|
YEARS_BUILD_MODE
Highly correlated
This variable is highly correlated with YEARS_BUILD_AVG
and should be ignored for analysis
Correlation | 0.98944 |
---|
%%time
profile.to_file(outputfile="{}_profiling.html".format("application_train"))
CPU times: user 6.29 ms, sys: 5.01 ms, total: 11.3 ms Wall time: 10.5 ms