import os, math, subprocess
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# some settings for displaying Pandas results
pd.set_option('display.width', 2000)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.precision', 4)
pd.set_option('display.max_colwidth', -1)
# load data
data_path = "home-credit-default-risk/application_train.csv"
pdf_data = pd.read_csv(data_path)
print("""
SELECT *
FROM pdf_data
LIMIT 3
""")
pdf_data.head(3)
SELECT * FROM pdf_data LIMIT 3
SK_ID_CURR | TARGET | NAME_CONTRACT_TYPE | CODE_GENDER | FLAG_OWN_CAR | FLAG_OWN_REALTY | CNT_CHILDREN | AMT_INCOME_TOTAL | AMT_CREDIT | AMT_ANNUITY | AMT_GOODS_PRICE | NAME_TYPE_SUITE | NAME_INCOME_TYPE | NAME_EDUCATION_TYPE | NAME_FAMILY_STATUS | NAME_HOUSING_TYPE | REGION_POPULATION_RELATIVE | DAYS_BIRTH | DAYS_EMPLOYED | DAYS_REGISTRATION | DAYS_ID_PUBLISH | OWN_CAR_AGE | FLAG_MOBIL | FLAG_EMP_PHONE | FLAG_WORK_PHONE | FLAG_CONT_MOBILE | FLAG_PHONE | FLAG_EMAIL | OCCUPATION_TYPE | CNT_FAM_MEMBERS | REGION_RATING_CLIENT | REGION_RATING_CLIENT_W_CITY | WEEKDAY_APPR_PROCESS_START | HOUR_APPR_PROCESS_START | REG_REGION_NOT_LIVE_REGION | REG_REGION_NOT_WORK_REGION | LIVE_REGION_NOT_WORK_REGION | REG_CITY_NOT_LIVE_CITY | REG_CITY_NOT_WORK_CITY | LIVE_CITY_NOT_WORK_CITY | ORGANIZATION_TYPE | EXT_SOURCE_1 | EXT_SOURCE_2 | EXT_SOURCE_3 | APARTMENTS_AVG | BASEMENTAREA_AVG | YEARS_BEGINEXPLUATATION_AVG | YEARS_BUILD_AVG | COMMONAREA_AVG | ELEVATORS_AVG | ENTRANCES_AVG | FLOORSMAX_AVG | FLOORSMIN_AVG | LANDAREA_AVG | LIVINGAPARTMENTS_AVG | LIVINGAREA_AVG | NONLIVINGAPARTMENTS_AVG | NONLIVINGAREA_AVG | APARTMENTS_MODE | BASEMENTAREA_MODE | YEARS_BEGINEXPLUATATION_MODE | YEARS_BUILD_MODE | COMMONAREA_MODE | ELEVATORS_MODE | ENTRANCES_MODE | FLOORSMAX_MODE | FLOORSMIN_MODE | LANDAREA_MODE | LIVINGAPARTMENTS_MODE | LIVINGAREA_MODE | NONLIVINGAPARTMENTS_MODE | NONLIVINGAREA_MODE | APARTMENTS_MEDI | BASEMENTAREA_MEDI | YEARS_BEGINEXPLUATATION_MEDI | YEARS_BUILD_MEDI | COMMONAREA_MEDI | ELEVATORS_MEDI | ENTRANCES_MEDI | FLOORSMAX_MEDI | FLOORSMIN_MEDI | LANDAREA_MEDI | LIVINGAPARTMENTS_MEDI | LIVINGAREA_MEDI | NONLIVINGAPARTMENTS_MEDI | NONLIVINGAREA_MEDI | FONDKAPREMONT_MODE | HOUSETYPE_MODE | TOTALAREA_MODE | WALLSMATERIAL_MODE | EMERGENCYSTATE_MODE | OBS_30_CNT_SOCIAL_CIRCLE | DEF_30_CNT_SOCIAL_CIRCLE | OBS_60_CNT_SOCIAL_CIRCLE | DEF_60_CNT_SOCIAL_CIRCLE | DAYS_LAST_PHONE_CHANGE | FLAG_DOCUMENT_2 | FLAG_DOCUMENT_3 | FLAG_DOCUMENT_4 | FLAG_DOCUMENT_5 | FLAG_DOCUMENT_6 | FLAG_DOCUMENT_7 | FLAG_DOCUMENT_8 | FLAG_DOCUMENT_9 | FLAG_DOCUMENT_10 | FLAG_DOCUMENT_11 | FLAG_DOCUMENT_12 | FLAG_DOCUMENT_13 | FLAG_DOCUMENT_14 | FLAG_DOCUMENT_15 | FLAG_DOCUMENT_16 | FLAG_DOCUMENT_17 | FLAG_DOCUMENT_18 | FLAG_DOCUMENT_19 | FLAG_DOCUMENT_20 | FLAG_DOCUMENT_21 | AMT_REQ_CREDIT_BUREAU_HOUR | AMT_REQ_CREDIT_BUREAU_DAY | AMT_REQ_CREDIT_BUREAU_WEEK | AMT_REQ_CREDIT_BUREAU_MON | AMT_REQ_CREDIT_BUREAU_QRT | AMT_REQ_CREDIT_BUREAU_YEAR | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 100002 | 1 | Cash loans | M | N | Y | 0 | 202500.0 | 406597.5 | 24700.5 | 351000.0 | Unaccompanied | Working | Secondary / secondary special | Single / not married | House / apartment | 0.0188 | -9461 | -637 | -3648.0 | -2120 | NaN | 1 | 1 | 0 | 1 | 1 | 0 | Laborers | 1.0 | 2 | 2 | WEDNESDAY | 10 | 0 | 0 | 0 | 0 | 0 | 0 | Business Entity Type 3 | 0.0830 | 0.2629 | 0.1394 | 0.0247 | 0.0369 | 0.9722 | 0.6192 | 0.0143 | 0.00 | 0.0690 | 0.0833 | 0.1250 | 0.0369 | 0.0202 | 0.0190 | 0.0000 | 0.0000 | 0.0252 | 0.0383 | 0.9722 | 0.6341 | 0.0144 | 0.0000 | 0.0690 | 0.0833 | 0.1250 | 0.0377 | 0.022 | 0.0198 | 0.0 | 0.0 | 0.0250 | 0.0369 | 0.9722 | 0.6243 | 0.0144 | 0.00 | 0.0690 | 0.0833 | 0.1250 | 0.0375 | 0.0205 | 0.0193 | 0.0000 | 0.00 | reg oper account | block of flats | 0.0149 | Stone, brick | No | 2.0 | 2.0 | 2.0 | 2.0 | -1134.0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 |
1 | 100003 | 0 | Cash loans | F | N | N | 0 | 270000.0 | 1293502.5 | 35698.5 | 1129500.0 | Family | State servant | Higher education | Married | House / apartment | 0.0035 | -16765 | -1188 | -1186.0 | -291 | NaN | 1 | 1 | 0 | 1 | 1 | 0 | Core staff | 2.0 | 1 | 1 | MONDAY | 11 | 0 | 0 | 0 | 0 | 0 | 0 | School | 0.3113 | 0.6222 | NaN | 0.0959 | 0.0529 | 0.9851 | 0.7960 | 0.0605 | 0.08 | 0.0345 | 0.2917 | 0.3333 | 0.0130 | 0.0773 | 0.0549 | 0.0039 | 0.0098 | 0.0924 | 0.0538 | 0.9851 | 0.8040 | 0.0497 | 0.0806 | 0.0345 | 0.2917 | 0.3333 | 0.0128 | 0.079 | 0.0554 | 0.0 | 0.0 | 0.0968 | 0.0529 | 0.9851 | 0.7987 | 0.0608 | 0.08 | 0.0345 | 0.2917 | 0.3333 | 0.0132 | 0.0787 | 0.0558 | 0.0039 | 0.01 | reg oper account | block of flats | 0.0714 | Block | No | 1.0 | 0.0 | 1.0 | 0.0 | -828.0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2 | 100004 | 0 | Revolving loans | M | Y | Y | 0 | 67500.0 | 135000.0 | 6750.0 | 135000.0 | Unaccompanied | Working | Secondary / secondary special | Single / not married | House / apartment | 0.0100 | -19046 | -225 | -4260.0 | -2531 | 26.0 | 1 | 1 | 1 | 1 | 1 | 0 | Laborers | 1.0 | 2 | 2 | MONDAY | 9 | 0 | 0 | 0 | 0 | 0 | 0 | Government | NaN | 0.5559 | 0.7296 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.0 | 0.0 | 0.0 | 0.0 | -815.0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
print("""
SELECT NAME_CONTRACT_TYPE
FROM pdf_data
WHERE CODE_GENDER = 'M'
""")
pdf_data[pdf_data["CODE_GENDER"] == 'M'][["NAME_CONTRACT_TYPE"]].head()
SELECT NAME_CONTRACT_TYPE FROM pdf_data WHERE CODE_GENDER = 'M'
NAME_CONTRACT_TYPE | |
---|---|
0 | Cash loans |
2 | Revolving loans |
4 | Cash loans |
5 | Cash loans |
7 | Cash loans |
print("""
SELECT DISTINCT NAME_CONTRACT_TYPE
FROM pdf_data
""")
pdf_data["NAME_CONTRACT_TYPE"].unique()
SELECT DISTINCT NAME_CONTRACT_TYPE FROM pdf_data
array(['Cash loans', 'Revolving loans'], dtype=object)
print("""
SELECT NAME_INCOME_TYPE, CODE_GENDER, AMT_INCOME_TOTAL
FROM pdf_data
WHERE CODE_GENDER = 'M' AND AMT_INCOME_TOTAL > 200000.0
""")
condition = (pdf_data["CODE_GENDER"] == 'M') & (pdf_data["AMT_INCOME_TOTAL"] > 200000.0)
pdf_data[condition][["NAME_INCOME_TYPE", "CODE_GENDER", "AMT_INCOME_TOTAL"]].head()
SELECT NAME_INCOME_TYPE, CODE_GENDER, AMT_INCOME_TOTAL FROM pdf_data WHERE CODE_GENDER = 'M' AND AMT_INCOME_TOTAL > 200000.0
NAME_INCOME_TYPE | CODE_GENDER | AMT_INCOME_TOTAL | |
---|---|---|---|
0 | Working | M | 202500.0 |
7 | State servant | M | 360000.0 |
13 | Working | M | 225000.0 |
28 | State servant | M | 270000.0 |
33 | Commercial associate | M | 360000.0 |
print("""
SELECT NAME_INCOME_TYPE, AMT_INCOME_TOTAL
FROM pdf_data
ORDER BY AMT_INCOME_TOTAL
""")
pdf_data[["NAME_INCOME_TYPE", "AMT_INCOME_TOTAL"]].sort_values("AMT_INCOME_TOTAL").head()
SELECT NAME_INCOME_TYPE, AMT_INCOME_TOTAL FROM pdf_data ORDER BY AMT_INCOME_TOTAL
NAME_INCOME_TYPE | AMT_INCOME_TOTAL | |
---|---|---|
1678 | Working | 25650.0 |
20727 | Pensioner | 25650.0 |
240137 | Pensioner | 26100.0 |
186643 | Pensioner | 26100.0 |
246104 | Pensioner | 26100.0 |
print("""
SELECT NAME_INCOME_TYPE, AMT_INCOME_TOTAL
FROM pdf_data
ORDER BY AMT_INCOME_TOTAL DESC
""")
pdf_data[["NAME_INCOME_TYPE", "AMT_INCOME_TOTAL"]].sort_values("AMT_INCOME_TOTAL", ascending=False).head()
SELECT NAME_INCOME_TYPE, AMT_INCOME_TOTAL FROM pdf_data ORDER BY AMT_INCOME_TOTAL DESC
NAME_INCOME_TYPE | AMT_INCOME_TOTAL | |
---|---|---|
12840 | Working | 1.1700e+08 |
203693 | Commercial associate | 1.8000e+07 |
246858 | Commercial associate | 1.3500e+07 |
77768 | Working | 9.0000e+06 |
131127 | Working | 6.7500e+06 |
print("""
SELECT *
FROM pdf_data
WHERE SK_ID_CURR IN (100002, 100010, 100011)
""")
condition = pdf_data["SK_ID_CURR"].isin([100002, 100010, 100011])
pdf_data[condition].head()
SELECT * FROM pdf_data WHERE SK_ID_CURR IN (100002, 100010, 100011)
SK_ID_CURR | TARGET | NAME_CONTRACT_TYPE | CODE_GENDER | FLAG_OWN_CAR | FLAG_OWN_REALTY | CNT_CHILDREN | AMT_INCOME_TOTAL | AMT_CREDIT | AMT_ANNUITY | AMT_GOODS_PRICE | NAME_TYPE_SUITE | NAME_INCOME_TYPE | NAME_EDUCATION_TYPE | NAME_FAMILY_STATUS | NAME_HOUSING_TYPE | REGION_POPULATION_RELATIVE | DAYS_BIRTH | DAYS_EMPLOYED | DAYS_REGISTRATION | DAYS_ID_PUBLISH | OWN_CAR_AGE | FLAG_MOBIL | FLAG_EMP_PHONE | FLAG_WORK_PHONE | FLAG_CONT_MOBILE | FLAG_PHONE | FLAG_EMAIL | OCCUPATION_TYPE | CNT_FAM_MEMBERS | REGION_RATING_CLIENT | REGION_RATING_CLIENT_W_CITY | WEEKDAY_APPR_PROCESS_START | HOUR_APPR_PROCESS_START | REG_REGION_NOT_LIVE_REGION | REG_REGION_NOT_WORK_REGION | LIVE_REGION_NOT_WORK_REGION | REG_CITY_NOT_LIVE_CITY | REG_CITY_NOT_WORK_CITY | LIVE_CITY_NOT_WORK_CITY | ORGANIZATION_TYPE | EXT_SOURCE_1 | EXT_SOURCE_2 | EXT_SOURCE_3 | APARTMENTS_AVG | BASEMENTAREA_AVG | YEARS_BEGINEXPLUATATION_AVG | YEARS_BUILD_AVG | COMMONAREA_AVG | ELEVATORS_AVG | ENTRANCES_AVG | FLOORSMAX_AVG | FLOORSMIN_AVG | LANDAREA_AVG | LIVINGAPARTMENTS_AVG | LIVINGAREA_AVG | NONLIVINGAPARTMENTS_AVG | NONLIVINGAREA_AVG | APARTMENTS_MODE | BASEMENTAREA_MODE | YEARS_BEGINEXPLUATATION_MODE | YEARS_BUILD_MODE | COMMONAREA_MODE | ELEVATORS_MODE | ENTRANCES_MODE | FLOORSMAX_MODE | FLOORSMIN_MODE | LANDAREA_MODE | LIVINGAPARTMENTS_MODE | LIVINGAREA_MODE | NONLIVINGAPARTMENTS_MODE | NONLIVINGAREA_MODE | APARTMENTS_MEDI | BASEMENTAREA_MEDI | YEARS_BEGINEXPLUATATION_MEDI | YEARS_BUILD_MEDI | COMMONAREA_MEDI | ELEVATORS_MEDI | ENTRANCES_MEDI | FLOORSMAX_MEDI | FLOORSMIN_MEDI | LANDAREA_MEDI | LIVINGAPARTMENTS_MEDI | LIVINGAREA_MEDI | NONLIVINGAPARTMENTS_MEDI | NONLIVINGAREA_MEDI | FONDKAPREMONT_MODE | HOUSETYPE_MODE | TOTALAREA_MODE | WALLSMATERIAL_MODE | EMERGENCYSTATE_MODE | OBS_30_CNT_SOCIAL_CIRCLE | DEF_30_CNT_SOCIAL_CIRCLE | OBS_60_CNT_SOCIAL_CIRCLE | DEF_60_CNT_SOCIAL_CIRCLE | DAYS_LAST_PHONE_CHANGE | FLAG_DOCUMENT_2 | FLAG_DOCUMENT_3 | FLAG_DOCUMENT_4 | FLAG_DOCUMENT_5 | FLAG_DOCUMENT_6 | FLAG_DOCUMENT_7 | FLAG_DOCUMENT_8 | FLAG_DOCUMENT_9 | FLAG_DOCUMENT_10 | FLAG_DOCUMENT_11 | FLAG_DOCUMENT_12 | FLAG_DOCUMENT_13 | FLAG_DOCUMENT_14 | FLAG_DOCUMENT_15 | FLAG_DOCUMENT_16 | FLAG_DOCUMENT_17 | FLAG_DOCUMENT_18 | FLAG_DOCUMENT_19 | FLAG_DOCUMENT_20 | FLAG_DOCUMENT_21 | AMT_REQ_CREDIT_BUREAU_HOUR | AMT_REQ_CREDIT_BUREAU_DAY | AMT_REQ_CREDIT_BUREAU_WEEK | AMT_REQ_CREDIT_BUREAU_MON | AMT_REQ_CREDIT_BUREAU_QRT | AMT_REQ_CREDIT_BUREAU_YEAR | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 100002 | 1 | Cash loans | M | N | Y | 0 | 202500.0 | 406597.5 | 24700.5 | 351000.0 | Unaccompanied | Working | Secondary / secondary special | Single / not married | House / apartment | 0.0188 | -9461 | -637 | -3648.0 | -2120 | NaN | 1 | 1 | 0 | 1 | 1 | 0 | Laborers | 1.0 | 2 | 2 | WEDNESDAY | 10 | 0 | 0 | 0 | 0 | 0 | 0 | Business Entity Type 3 | 0.0830 | 0.2629 | 0.1394 | 0.0247 | 0.0369 | 0.9722 | 0.6192 | 0.0143 | 0.0 | 0.069 | 0.0833 | 0.125 | 0.0369 | 0.0202 | 0.019 | 0.0 | 0.0 | 0.0252 | 0.0383 | 0.9722 | 0.6341 | 0.0144 | 0.0 | 0.069 | 0.0833 | 0.125 | 0.0377 | 0.022 | 0.0198 | 0.0 | 0.0 | 0.025 | 0.0369 | 0.9722 | 0.6243 | 0.0144 | 0.0 | 0.069 | 0.0833 | 0.125 | 0.0375 | 0.0205 | 0.0193 | 0.0 | 0.0 | reg oper account | block of flats | 0.0149 | Stone, brick | No | 2.0 | 2.0 | 2.0 | 2.0 | -1134.0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 |
7 | 100010 | 0 | Cash loans | M | Y | Y | 0 | 360000.0 | 1530000.0 | 42075.0 | 1530000.0 | Unaccompanied | State servant | Higher education | Married | House / apartment | 0.0031 | -18850 | -449 | -4597.0 | -2379 | 8.0 | 1 | 1 | 1 | 1 | 0 | 0 | Managers | 2.0 | 3 | 3 | MONDAY | 16 | 0 | 0 | 0 | 0 | 1 | 1 | Other | NaN | 0.7143 | 0.5407 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 2.0 | 0.0 | 2.0 | 0.0 | -1070.0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
8 | 100011 | 0 | Cash loans | F | N | Y | 0 | 112500.0 | 1019610.0 | 33826.5 | 913500.0 | Children | Pensioner | Secondary / secondary special | Married | House / apartment | 0.0186 | -20099 | 365243 | -7427.0 | -3514 | NaN | 1 | 0 | 0 | 1 | 0 | 0 | NaN | 2.0 | 2 | 2 | WEDNESDAY | 14 | 0 | 0 | 0 | 0 | 0 | 0 | XNA | 0.5873 | 0.2057 | 0.7517 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 |
print("""
SELECT *
FROM pdf_data
WHERE SK_ID_CURR NOT IN (100002, 100010, 100011)
""")
condition = ~pdf_data["SK_ID_CURR"].isin([100002, 100010, 100011])
pdf_data[condition].head()
SELECT * FROM pdf_data WHERE SK_ID_CURR NOT IN (100002, 100010, 100011)
SK_ID_CURR | TARGET | NAME_CONTRACT_TYPE | CODE_GENDER | FLAG_OWN_CAR | FLAG_OWN_REALTY | CNT_CHILDREN | AMT_INCOME_TOTAL | AMT_CREDIT | AMT_ANNUITY | AMT_GOODS_PRICE | NAME_TYPE_SUITE | NAME_INCOME_TYPE | NAME_EDUCATION_TYPE | NAME_FAMILY_STATUS | NAME_HOUSING_TYPE | REGION_POPULATION_RELATIVE | DAYS_BIRTH | DAYS_EMPLOYED | DAYS_REGISTRATION | DAYS_ID_PUBLISH | OWN_CAR_AGE | FLAG_MOBIL | FLAG_EMP_PHONE | FLAG_WORK_PHONE | FLAG_CONT_MOBILE | FLAG_PHONE | FLAG_EMAIL | OCCUPATION_TYPE | CNT_FAM_MEMBERS | REGION_RATING_CLIENT | REGION_RATING_CLIENT_W_CITY | WEEKDAY_APPR_PROCESS_START | HOUR_APPR_PROCESS_START | REG_REGION_NOT_LIVE_REGION | REG_REGION_NOT_WORK_REGION | LIVE_REGION_NOT_WORK_REGION | REG_CITY_NOT_LIVE_CITY | REG_CITY_NOT_WORK_CITY | LIVE_CITY_NOT_WORK_CITY | ORGANIZATION_TYPE | EXT_SOURCE_1 | EXT_SOURCE_2 | EXT_SOURCE_3 | APARTMENTS_AVG | BASEMENTAREA_AVG | YEARS_BEGINEXPLUATATION_AVG | YEARS_BUILD_AVG | COMMONAREA_AVG | ELEVATORS_AVG | ENTRANCES_AVG | FLOORSMAX_AVG | FLOORSMIN_AVG | LANDAREA_AVG | LIVINGAPARTMENTS_AVG | LIVINGAREA_AVG | NONLIVINGAPARTMENTS_AVG | NONLIVINGAREA_AVG | APARTMENTS_MODE | BASEMENTAREA_MODE | YEARS_BEGINEXPLUATATION_MODE | YEARS_BUILD_MODE | COMMONAREA_MODE | ELEVATORS_MODE | ENTRANCES_MODE | FLOORSMAX_MODE | FLOORSMIN_MODE | LANDAREA_MODE | LIVINGAPARTMENTS_MODE | LIVINGAREA_MODE | NONLIVINGAPARTMENTS_MODE | NONLIVINGAREA_MODE | APARTMENTS_MEDI | BASEMENTAREA_MEDI | YEARS_BEGINEXPLUATATION_MEDI | YEARS_BUILD_MEDI | COMMONAREA_MEDI | ELEVATORS_MEDI | ENTRANCES_MEDI | FLOORSMAX_MEDI | FLOORSMIN_MEDI | LANDAREA_MEDI | LIVINGAPARTMENTS_MEDI | LIVINGAREA_MEDI | NONLIVINGAPARTMENTS_MEDI | NONLIVINGAREA_MEDI | FONDKAPREMONT_MODE | HOUSETYPE_MODE | TOTALAREA_MODE | WALLSMATERIAL_MODE | EMERGENCYSTATE_MODE | OBS_30_CNT_SOCIAL_CIRCLE | DEF_30_CNT_SOCIAL_CIRCLE | OBS_60_CNT_SOCIAL_CIRCLE | DEF_60_CNT_SOCIAL_CIRCLE | DAYS_LAST_PHONE_CHANGE | FLAG_DOCUMENT_2 | FLAG_DOCUMENT_3 | FLAG_DOCUMENT_4 | FLAG_DOCUMENT_5 | FLAG_DOCUMENT_6 | FLAG_DOCUMENT_7 | FLAG_DOCUMENT_8 | FLAG_DOCUMENT_9 | FLAG_DOCUMENT_10 | FLAG_DOCUMENT_11 | FLAG_DOCUMENT_12 | FLAG_DOCUMENT_13 | FLAG_DOCUMENT_14 | FLAG_DOCUMENT_15 | FLAG_DOCUMENT_16 | FLAG_DOCUMENT_17 | FLAG_DOCUMENT_18 | FLAG_DOCUMENT_19 | FLAG_DOCUMENT_20 | FLAG_DOCUMENT_21 | AMT_REQ_CREDIT_BUREAU_HOUR | AMT_REQ_CREDIT_BUREAU_DAY | AMT_REQ_CREDIT_BUREAU_WEEK | AMT_REQ_CREDIT_BUREAU_MON | AMT_REQ_CREDIT_BUREAU_QRT | AMT_REQ_CREDIT_BUREAU_YEAR | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 100003 | 0 | Cash loans | F | N | N | 0 | 270000.0 | 1293502.5 | 35698.5 | 1129500.0 | Family | State servant | Higher education | Married | House / apartment | 0.0035 | -16765 | -1188 | -1186.0 | -291 | NaN | 1 | 1 | 0 | 1 | 1 | 0 | Core staff | 2.0 | 1 | 1 | MONDAY | 11 | 0 | 0 | 0 | 0 | 0 | 0 | School | 0.3113 | 0.6222 | NaN | 0.0959 | 0.0529 | 0.9851 | 0.796 | 0.0605 | 0.08 | 0.0345 | 0.2917 | 0.3333 | 0.013 | 0.0773 | 0.0549 | 0.0039 | 0.0098 | 0.0924 | 0.0538 | 0.9851 | 0.804 | 0.0497 | 0.0806 | 0.0345 | 0.2917 | 0.3333 | 0.0128 | 0.079 | 0.0554 | 0.0 | 0.0 | 0.0968 | 0.0529 | 0.9851 | 0.7987 | 0.0608 | 0.08 | 0.0345 | 0.2917 | 0.3333 | 0.0132 | 0.0787 | 0.0558 | 0.0039 | 0.01 | reg oper account | block of flats | 0.0714 | Block | No | 1.0 | 0.0 | 1.0 | 0.0 | -828.0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
2 | 100004 | 0 | Revolving loans | M | Y | Y | 0 | 67500.0 | 135000.0 | 6750.0 | 135000.0 | Unaccompanied | Working | Secondary / secondary special | Single / not married | House / apartment | 0.0100 | -19046 | -225 | -4260.0 | -2531 | 26.0 | 1 | 1 | 1 | 1 | 1 | 0 | Laborers | 1.0 | 2 | 2 | MONDAY | 9 | 0 | 0 | 0 | 0 | 0 | 0 | Government | NaN | 0.5559 | 0.7296 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.0 | 0.0 | 0.0 | 0.0 | -815.0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
3 | 100006 | 0 | Cash loans | F | N | Y | 0 | 135000.0 | 312682.5 | 29686.5 | 297000.0 | Unaccompanied | Working | Secondary / secondary special | Civil marriage | House / apartment | 0.0080 | -19005 | -3039 | -9833.0 | -2437 | NaN | 1 | 1 | 0 | 1 | 0 | 0 | Laborers | 2.0 | 2 | 2 | WEDNESDAY | 17 | 0 | 0 | 0 | 0 | 0 | 0 | Business Entity Type 3 | NaN | 0.6504 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 2.0 | 0.0 | 2.0 | 0.0 | -617.0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | NaN | NaN | NaN | NaN | NaN | NaN |
4 | 100007 | 0 | Cash loans | M | N | Y | 0 | 121500.0 | 513000.0 | 21865.5 | 513000.0 | Unaccompanied | Working | Secondary / secondary special | Single / not married | House / apartment | 0.0287 | -19932 | -3038 | -4311.0 | -3458 | NaN | 1 | 1 | 0 | 1 | 0 | 0 | Core staff | 1.0 | 2 | 2 | THURSDAY | 11 | 0 | 0 | 0 | 0 | 1 | 1 | Religion | NaN | 0.3227 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.0 | 0.0 | 0.0 | 0.0 | -1106.0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
5 | 100008 | 0 | Cash loans | M | N | Y | 0 | 99000.0 | 490495.5 | 27517.5 | 454500.0 | Spouse, partner | State servant | Secondary / secondary special | Married | House / apartment | 0.0358 | -16941 | -1588 | -4970.0 | -477 | NaN | 1 | 1 | 1 | 1 | 1 | 0 | Laborers | 2.0 | 2 | 2 | WEDNESDAY | 16 | 0 | 0 | 0 | 0 | 0 | 0 | Other | NaN | 0.3542 | 0.6212 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.0 | 0.0 | 0.0 | 0.0 | -2536.0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 |
print("""
SELECT CODE_GENDER, COUNT(TARGET)
FROM pdf_data
GROUP BY CODE_GENDER
ORDER BY NUM_TARGET
""")
pdf_data.groupby("CODE_GENDER").agg({"TARGET": "count"}).sort_values("TARGET")
SELECT CODE_GENDER, COUNT(TARGET) FROM pdf_data GROUP BY CODE_GENDER ORDER BY NUM_TARGET
TARGET | |
---|---|
CODE_GENDER | |
XNA | 4 |
M | 105059 |
F | 202448 |
print("""
SELECT AMT_INCOME_TOTAL
FROM pdf_data
ORDER BY NUM_TARGET DESC
LIMIT 5
""")
pdf_data.nlargest(5, columns="AMT_INCOME_TOTAL")
SELECT AMT_INCOME_TOTAL FROM pdf_data ORDER BY NUM_TARGET DESC LIMIT 5
SK_ID_CURR | TARGET | NAME_CONTRACT_TYPE | CODE_GENDER | FLAG_OWN_CAR | FLAG_OWN_REALTY | CNT_CHILDREN | AMT_INCOME_TOTAL | AMT_CREDIT | AMT_ANNUITY | AMT_GOODS_PRICE | NAME_TYPE_SUITE | NAME_INCOME_TYPE | NAME_EDUCATION_TYPE | NAME_FAMILY_STATUS | NAME_HOUSING_TYPE | REGION_POPULATION_RELATIVE | DAYS_BIRTH | DAYS_EMPLOYED | DAYS_REGISTRATION | DAYS_ID_PUBLISH | OWN_CAR_AGE | FLAG_MOBIL | FLAG_EMP_PHONE | FLAG_WORK_PHONE | FLAG_CONT_MOBILE | FLAG_PHONE | FLAG_EMAIL | OCCUPATION_TYPE | CNT_FAM_MEMBERS | REGION_RATING_CLIENT | REGION_RATING_CLIENT_W_CITY | WEEKDAY_APPR_PROCESS_START | HOUR_APPR_PROCESS_START | REG_REGION_NOT_LIVE_REGION | REG_REGION_NOT_WORK_REGION | LIVE_REGION_NOT_WORK_REGION | REG_CITY_NOT_LIVE_CITY | REG_CITY_NOT_WORK_CITY | LIVE_CITY_NOT_WORK_CITY | ORGANIZATION_TYPE | EXT_SOURCE_1 | EXT_SOURCE_2 | EXT_SOURCE_3 | APARTMENTS_AVG | BASEMENTAREA_AVG | YEARS_BEGINEXPLUATATION_AVG | YEARS_BUILD_AVG | COMMONAREA_AVG | ELEVATORS_AVG | ENTRANCES_AVG | FLOORSMAX_AVG | FLOORSMIN_AVG | LANDAREA_AVG | LIVINGAPARTMENTS_AVG | LIVINGAREA_AVG | NONLIVINGAPARTMENTS_AVG | NONLIVINGAREA_AVG | APARTMENTS_MODE | BASEMENTAREA_MODE | YEARS_BEGINEXPLUATATION_MODE | YEARS_BUILD_MODE | COMMONAREA_MODE | ELEVATORS_MODE | ENTRANCES_MODE | FLOORSMAX_MODE | FLOORSMIN_MODE | LANDAREA_MODE | LIVINGAPARTMENTS_MODE | LIVINGAREA_MODE | NONLIVINGAPARTMENTS_MODE | NONLIVINGAREA_MODE | APARTMENTS_MEDI | BASEMENTAREA_MEDI | YEARS_BEGINEXPLUATATION_MEDI | YEARS_BUILD_MEDI | COMMONAREA_MEDI | ELEVATORS_MEDI | ENTRANCES_MEDI | FLOORSMAX_MEDI | FLOORSMIN_MEDI | LANDAREA_MEDI | LIVINGAPARTMENTS_MEDI | LIVINGAREA_MEDI | NONLIVINGAPARTMENTS_MEDI | NONLIVINGAREA_MEDI | FONDKAPREMONT_MODE | HOUSETYPE_MODE | TOTALAREA_MODE | WALLSMATERIAL_MODE | EMERGENCYSTATE_MODE | OBS_30_CNT_SOCIAL_CIRCLE | DEF_30_CNT_SOCIAL_CIRCLE | OBS_60_CNT_SOCIAL_CIRCLE | DEF_60_CNT_SOCIAL_CIRCLE | DAYS_LAST_PHONE_CHANGE | FLAG_DOCUMENT_2 | FLAG_DOCUMENT_3 | FLAG_DOCUMENT_4 | FLAG_DOCUMENT_5 | FLAG_DOCUMENT_6 | FLAG_DOCUMENT_7 | FLAG_DOCUMENT_8 | FLAG_DOCUMENT_9 | FLAG_DOCUMENT_10 | FLAG_DOCUMENT_11 | FLAG_DOCUMENT_12 | FLAG_DOCUMENT_13 | FLAG_DOCUMENT_14 | FLAG_DOCUMENT_15 | FLAG_DOCUMENT_16 | FLAG_DOCUMENT_17 | FLAG_DOCUMENT_18 | FLAG_DOCUMENT_19 | FLAG_DOCUMENT_20 | FLAG_DOCUMENT_21 | AMT_REQ_CREDIT_BUREAU_HOUR | AMT_REQ_CREDIT_BUREAU_DAY | AMT_REQ_CREDIT_BUREAU_WEEK | AMT_REQ_CREDIT_BUREAU_MON | AMT_REQ_CREDIT_BUREAU_QRT | AMT_REQ_CREDIT_BUREAU_YEAR | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
12840 | 114967 | 1 | Cash loans | F | N | Y | 1 | 1.1700e+08 | 562491.0 | 26194.5 | 454500.0 | Unaccompanied | Working | Secondary / secondary special | Married | House / apartment | 0.0106 | -12615 | -922 | -6762.0 | -3643 | NaN | 1 | 1 | 0 | 1 | 0 | 0 | Laborers | 3.0 | 2 | 2 | TUESDAY | 14 | 0 | 0 | 0 | 0 | 0 | 0 | Business Entity Type 3 | 0.4608 | 0.1132 | 0.1455 | 0.1031 | 0.0947 | 0.9791 | NaN | NaN | 0.0 | 0.2069 | 0.1667 | NaN | 0.0688 | NaN | 0.0615 | NaN | NaN | 0.105 | 0.0983 | 0.9791 | NaN | NaN | 0.0 | 0.2069 | 0.1667 | NaN | 0.0704 | NaN | 0.0641 | NaN | NaN | 0.1041 | 0.0947 | 0.9791 | NaN | NaN | 0.0 | 0.2069 | 0.1667 | NaN | 0.07 | NaN | 0.0626 | NaN | NaN | NaN | block of flats | 0.0715 | Stone, brick | No | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 |
203693 | 336147 | 0 | Cash loans | M | Y | Y | 2 | 1.8000e+07 | 675000.0 | 69295.5 | 675000.0 | Unaccompanied | Commercial associate | Secondary / secondary special | Married | House / apartment | 0.0308 | -15704 | -4961 | -3338.0 | -4728 | 7.0 | 1 | 1 | 0 | 1 | 0 | 0 | NaN | 4.0 | 2 | 2 | THURSDAY | 22 | 0 | 0 | 0 | 0 | 0 | 0 | Business Entity Type 3 | 0.5434 | 0.7886 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | 0.0 | 1.0 | 0.0 | -1133.0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | NaN | NaN | NaN | NaN | NaN | NaN |
246858 | 385674 | 0 | Cash loans | M | Y | Y | 0 | 1.3500e+07 | 1400503.5 | 130945.5 | 1368000.0 | Unaccompanied | Commercial associate | Higher education | Married | House / apartment | 0.0308 | -13551 | -280 | -3953.0 | -4972 | 10.0 | 1 | 1 | 1 | 1 | 0 | 0 | NaN | 2.0 | 2 | 2 | SUNDAY | 12 | 0 | 0 | 0 | 0 | 0 | 0 | Business Entity Type 3 | NaN | 0.7135 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.0 | 0.0 | 0.0 | 0.0 | -13.0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | NaN | NaN | NaN | NaN | NaN | NaN |
77768 | 190160 | 0 | Cash loans | F | Y | N | 0 | 9.0000e+06 | 1431531.0 | 132601.5 | 1377000.0 | Unaccompanied | Working | Higher education | Civil marriage | House / apartment | 0.0101 | -16425 | -8476 | -7276.0 | -1656 | 8.0 | 1 | 1 | 0 | 1 | 0 | 0 | Managers | 2.0 | 2 | 2 | FRIDAY | 13 | 0 | 0 | 0 | 0 | 0 | 0 | Business Entity Type 1 | 0.6524 | 0.5041 | 0.4330 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | 0.0 | 1.0 | 0.0 | -3.0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
131127 | 252084 | 0 | Cash loans | M | Y | N | 0 | 6.7500e+06 | 790830.0 | 52978.5 | 675000.0 | Unaccompanied | Working | Higher education | Married | House / apartment | 0.0095 | -19341 | -443 | -7414.0 | -2886 | 8.0 | 1 | 1 | 1 | 1 | 1 | 0 | Laborers | 2.0 | 2 | 2 | TUESDAY | 14 | 0 | 1 | 1 | 0 | 1 | 1 | Transport: type 4 | NaN | 0.5528 | 0.4957 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.0 | 0.0 | 0.0 | 0.0 | -695.0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 4.0 |
print("""
SELECT AMT_INCOME_TOTAL
FROM pdf_data
ORDER BY NUM_TARGET DESC
LIMIT 10
OFFSET 5
""")
pdf_data.nlargest(10, columns="AMT_INCOME_TOTAL").tail(5)
SELECT AMT_INCOME_TOTAL FROM pdf_data ORDER BY NUM_TARGET DESC LIMIT 10 OFFSET 5
SK_ID_CURR | TARGET | NAME_CONTRACT_TYPE | CODE_GENDER | FLAG_OWN_CAR | FLAG_OWN_REALTY | CNT_CHILDREN | AMT_INCOME_TOTAL | AMT_CREDIT | AMT_ANNUITY | AMT_GOODS_PRICE | NAME_TYPE_SUITE | NAME_INCOME_TYPE | NAME_EDUCATION_TYPE | NAME_FAMILY_STATUS | NAME_HOUSING_TYPE | REGION_POPULATION_RELATIVE | DAYS_BIRTH | DAYS_EMPLOYED | DAYS_REGISTRATION | DAYS_ID_PUBLISH | OWN_CAR_AGE | FLAG_MOBIL | FLAG_EMP_PHONE | FLAG_WORK_PHONE | FLAG_CONT_MOBILE | FLAG_PHONE | FLAG_EMAIL | OCCUPATION_TYPE | CNT_FAM_MEMBERS | REGION_RATING_CLIENT | REGION_RATING_CLIENT_W_CITY | WEEKDAY_APPR_PROCESS_START | HOUR_APPR_PROCESS_START | REG_REGION_NOT_LIVE_REGION | REG_REGION_NOT_WORK_REGION | LIVE_REGION_NOT_WORK_REGION | REG_CITY_NOT_LIVE_CITY | REG_CITY_NOT_WORK_CITY | LIVE_CITY_NOT_WORK_CITY | ORGANIZATION_TYPE | EXT_SOURCE_1 | EXT_SOURCE_2 | EXT_SOURCE_3 | APARTMENTS_AVG | BASEMENTAREA_AVG | YEARS_BEGINEXPLUATATION_AVG | YEARS_BUILD_AVG | COMMONAREA_AVG | ELEVATORS_AVG | ENTRANCES_AVG | FLOORSMAX_AVG | FLOORSMIN_AVG | LANDAREA_AVG | LIVINGAPARTMENTS_AVG | LIVINGAREA_AVG | NONLIVINGAPARTMENTS_AVG | NONLIVINGAREA_AVG | APARTMENTS_MODE | BASEMENTAREA_MODE | YEARS_BEGINEXPLUATATION_MODE | YEARS_BUILD_MODE | COMMONAREA_MODE | ELEVATORS_MODE | ENTRANCES_MODE | FLOORSMAX_MODE | FLOORSMIN_MODE | LANDAREA_MODE | LIVINGAPARTMENTS_MODE | LIVINGAREA_MODE | NONLIVINGAPARTMENTS_MODE | NONLIVINGAREA_MODE | APARTMENTS_MEDI | BASEMENTAREA_MEDI | YEARS_BEGINEXPLUATATION_MEDI | YEARS_BUILD_MEDI | COMMONAREA_MEDI | ELEVATORS_MEDI | ENTRANCES_MEDI | FLOORSMAX_MEDI | FLOORSMIN_MEDI | LANDAREA_MEDI | LIVINGAPARTMENTS_MEDI | LIVINGAREA_MEDI | NONLIVINGAPARTMENTS_MEDI | NONLIVINGAREA_MEDI | FONDKAPREMONT_MODE | HOUSETYPE_MODE | TOTALAREA_MODE | WALLSMATERIAL_MODE | EMERGENCYSTATE_MODE | OBS_30_CNT_SOCIAL_CIRCLE | DEF_30_CNT_SOCIAL_CIRCLE | OBS_60_CNT_SOCIAL_CIRCLE | DEF_60_CNT_SOCIAL_CIRCLE | DAYS_LAST_PHONE_CHANGE | FLAG_DOCUMENT_2 | FLAG_DOCUMENT_3 | FLAG_DOCUMENT_4 | FLAG_DOCUMENT_5 | FLAG_DOCUMENT_6 | FLAG_DOCUMENT_7 | FLAG_DOCUMENT_8 | FLAG_DOCUMENT_9 | FLAG_DOCUMENT_10 | FLAG_DOCUMENT_11 | FLAG_DOCUMENT_12 | FLAG_DOCUMENT_13 | FLAG_DOCUMENT_14 | FLAG_DOCUMENT_15 | FLAG_DOCUMENT_16 | FLAG_DOCUMENT_17 | FLAG_DOCUMENT_18 | FLAG_DOCUMENT_19 | FLAG_DOCUMENT_20 | FLAG_DOCUMENT_21 | AMT_REQ_CREDIT_BUREAU_HOUR | AMT_REQ_CREDIT_BUREAU_DAY | AMT_REQ_CREDIT_BUREAU_WEEK | AMT_REQ_CREDIT_BUREAU_MON | AMT_REQ_CREDIT_BUREAU_QRT | AMT_REQ_CREDIT_BUREAU_YEAR | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
103006 | 219563 | 0 | Revolving loans | M | Y | Y | 0 | 4500000.0 | 2250000.0 | 225000.0 | 2250000.0 | Unaccompanied | Working | Higher education | Single / not married | House / apartment | 0.0073 | -10778 | -378 | -4919.0 | -3441 | 3.0 | 1 | 1 | 0 | 1 | 1 | 0 | Managers | 1.0 | 2 | 2 | SATURDAY | 15 | 0 | 1 | 1 | 0 | 0 | 0 | Construction | 0.2264 | 0.6480 | 0.6195 | 0.0670 | 0.0728 | 0.9776 | 0.6940 | NaN | 0.00 | 0.1379 | 0.1667 | 0.1250 | 0.0000 | NaN | 0.0606 | NaN | 0.0023 | 0.0630 | 0.0649 | 0.9777 | 0.7060 | NaN | 0.0000 | 0.1379 | 0.1667 | 0.0417 | 0.0000 | NaN | 0.0564 | NaN | 0.0024 | 0.0677 | 0.0728 | 0.9776 | 0.6981 | NaN | 0.00 | 0.1379 | 0.1667 | 0.1250 | 0.0000 | NaN | 0.0617 | NaN | 0.0023 | reg oper account | block of flats | 0.0426 | Stone, brick | No | 1.0 | 0.0 | 1.0 | 0.0 | -529.0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2.0 |
187833 | 317748 | 0 | Cash loans | M | N | N | 0 | 4500000.0 | 835380.0 | 42651.0 | 675000.0 | Unaccompanied | Working | Secondary / secondary special | Married | House / apartment | 0.0060 | -18715 | -3331 | -8877.0 | -2104 | NaN | 1 | 1 | 1 | 1 | 0 | 0 | Laborers | 2.0 | 2 | 2 | FRIDAY | 13 | 1 | 1 | 0 | 1 | 1 | 0 | Construction | NaN | 0.7488 | 0.2822 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.0 | 0.0 | 0.0 | 0.0 | -613.0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2.0 |
204564 | 337151 | 0 | Cash loans | M | N | N | 0 | 4500000.0 | 450000.0 | 47749.5 | 450000.0 | Unaccompanied | Working | Higher education | Single / not married | House / apartment | 0.0093 | -18461 | -8472 | -1179.0 | -1997 | NaN | 1 | 1 | 0 | 1 | 1 | 0 | Managers | 1.0 | 2 | 2 | TUESDAY | 12 | 0 | 0 | 0 | 0 | 0 | 0 | Military | 0.4746 | 0.7980 | NaN | 0.1608 | 0.1111 | 0.9806 | NaN | NaN | 0.00 | 0.3448 | 0.1667 | NaN | 0.0741 | NaN | 0.1360 | NaN | 0.0000 | 0.1639 | 0.1153 | 0.9806 | NaN | NaN | 0.0000 | 0.3448 | 0.1667 | NaN | 0.0758 | NaN | 0.1417 | NaN | 0.0000 | 0.1624 | 0.1111 | 0.9806 | NaN | NaN | 0.00 | 0.3448 | 0.1667 | NaN | 0.0754 | NaN | 0.1385 | NaN | 0.0000 | NaN | block of flats | 0.1234 | Panel | No | 0.0 | 0.0 | 0.0 | 0.0 | -3206.0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | NaN | NaN | NaN | NaN | NaN | NaN |
287463 | 432980 | 0 | Cash loans | M | Y | Y | 0 | 4500000.0 | 1755000.0 | 61132.5 | 1755000.0 | Unaccompanied | Working | Higher education | Married | House / apartment | 0.0100 | -18784 | -3618 | -9447.0 | -2315 | 11.0 | 1 | 1 | 1 | 1 | 0 | 0 | Managers | 2.0 | 2 | 2 | SATURDAY | 2 | 0 | 0 | 0 | 0 | 0 | 0 | Self-employed | 0.4295 | 0.4074 | 0.3458 | 0.1093 | 0.1009 | 0.9871 | 0.8232 | 0.0459 | 0.12 | 0.1034 | 0.3333 | 0.0417 | 0.0000 | 0.0883 | 0.0703 | 0.0039 | 0.0459 | 0.1113 | 0.1047 | 0.9871 | 0.8301 | 0.0463 | 0.1208 | 0.1034 | 0.3333 | 0.0417 | 0.0000 | 0.0964 | 0.0732 | 0.0039 | 0.0485 | 0.1103 | 0.1009 | 0.9871 | 0.8256 | 0.0462 | 0.12 | 0.1034 | 0.3333 | 0.0417 | 0.0000 | 0.0898 | 0.0715 | 0.0039 | 0.0468 | reg oper account | block of flats | 0.0903 | Panel | No | 3.0 | 0.0 | 3.0 | 0.0 | -284.0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 |
181698 | 310601 | 0 | Cash loans | M | Y | Y | 1 | 3950059.5 | 675000.0 | 66217.5 | 675000.0 | Unaccompanied | Commercial associate | Higher education | Married | With parents | 0.0326 | -10572 | -3163 | -10078.0 | -683 | 1.0 | 1 | 1 | 1 | 1 | 1 | 0 | Managers | 3.0 | 1 | 1 | MONDAY | 14 | 0 | 0 | 0 | 0 | 0 | 0 | Trade: type 2 | NaN | 0.5978 | 0.7310 | 0.0464 | 0.0221 | 0.9747 | NaN | NaN | 0.04 | 0.0345 | 0.3333 | NaN | 0.0192 | NaN | 0.0394 | NaN | 0.0089 | 0.0473 | 0.0229 | 0.9747 | NaN | NaN | 0.0403 | 0.0345 | 0.3333 | NaN | 0.0197 | NaN | 0.0411 | NaN | 0.0094 | 0.0468 | 0.0221 | 0.9747 | NaN | NaN | 0.04 | 0.0345 | 0.3333 | NaN | 0.0196 | NaN | 0.0401 | NaN | 0.0090 | NaN | block of flats | 0.0329 | Stone, brick | No | 4.0 | 0.0 | 4.0 | 0.0 | -902.0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 |
print("""
SELECT MAX(AMT_INCOME_TOTAL), MIN(AMT_INCOME_TOTAL), MEAN(AMT_INCOME_TOTAL)
FROM pdf_data
""")
pdf_data.agg({"AMT_INCOME_TOTAL": ["max", "min", "mean"]}).transpose()
SELECT MAX(AMT_INCOME_TOTAL), MIN(AMT_INCOME_TOTAL), MEAN(AMT_INCOME_TOTAL) FROM pdf_data
max | min | mean | |
---|---|---|---|
AMT_INCOME_TOTAL | 1.1700e+08 | 25650.0 | 168797.9193 |
print("""
SELECT *
FROM pdf1
INNER JOIN pdf2
ON pdf1.SK_ID_CURR = pdf2.SK_ID_CURR
""")
pdf1 = pdf_data[["SK_ID_CURR", "AMT_INCOME_TOTAL"]]
pdf2 = pdf_data[["SK_ID_CURR", "CODE_GENDER", "FLAG_OWN_CAR"]]
pdf1.merge(pdf2, on="SK_ID_CURR", how="inner").head()
SELECT * FROM pdf1 INNER JOIN pdf2 ON pdf1.SK_ID_CURR = pdf2.SK_ID_CURR
SK_ID_CURR | AMT_INCOME_TOTAL | CODE_GENDER | FLAG_OWN_CAR | |
---|---|---|---|---|
0 | 100002 | 202500.0 | M | N |
1 | 100003 | 270000.0 | F | N |
2 | 100004 | 67500.0 | M | Y |
3 | 100006 | 135000.0 | F | N |
4 | 100007 | 121500.0 | M | N |
print("""
SELECT * FROM pdf1
UNION ALL
SELECT * FROM pdf2
""")
pdf1 = pdf_data[["CODE_GENDER", "FLAG_OWN_CAR"]]
pdf2 = pdf_data[["CODE_GENDER", "FLAG_OWN_CAR"]]
print("Union all:", pd.concat([pdf1, pdf2]).shape)
print("Union:", pd.concat([pdf1, pdf2]).drop_duplicates().shape)
SELECT * FROM pdf1 UNION ALL SELECT * FROM pdf2 ('Union all:', (615022, 2)) ('Union:', (6, 2))
INSERT INTO table_name (column1, column2, column3, ...)
VALUES (value1, value2, value3, ...);
df1 = pd.DataFrame({'id': [1, 2], 'name': ['Harry Potter', 'Ron Weasley']})
df2 = pd.DataFrame({'id': [3], 'name': ['Hermione Granger']})
df3 = pd.concat([df1, df2]).reset_index(drop=True)
df3
id | name | |
---|---|---|
0 | 1 | Harry Potter |
1 | 2 | Ron Weasley |
2 | 3 | Hermione Granger |
UPDATE table_name
SET column1 = value1, column2 = value2, ...
WHERE condition;
df3.loc[df3["id"] == 2, "name"] = "Ron"
df3
id | name | |
---|---|---|
0 | 1 | Harry Potter |
1 | 2 | Ron |
2 | 3 | Hermione Granger |
DELETE FROM table_name WHERE condition;
df3.drop(df3[df3["name"] == "Ron"].index, inplace=True)
df3
id | name | |
---|---|---|
0 | 1 | Harry Potter |
2 | 3 | Hermione Granger |