#!/usr/bin/env python # coding: utf-8 # # Credit Risk Classifications # # by: Keith Qu # # Using the data set from Kaggle's Give Me Some Credit competition, which contains 150,000 observations with 10 features and the objective of predicting whether a lendee will have a serious delinquincy (90+ days past due) within 2 years. # # Using the methods described, we were able to attain an AUC score of 0.866752 on the private leaderboard. # # Contents: # #

# In[141]: import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from sklearn.manifold import TSNE from scipy.stats import randint, uniform from scipy import linalg from sklearn.decomposition import PCA from sklearn.preprocessing import MinMaxScaler, StandardScaler from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor, RandomForestClassifier from sklearn.linear_model import LogisticRegression from sklearn.model_selection import train_test_split, cross_val_predict, RandomizedSearchCV from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score from sklearn.metrics import accuracy_score, r2_score, mean_squared_error import xgboost as xgb import random get_ipython().run_line_magic('matplotlib', 'inline') # # ## Exploration, Cleaning, Creating Features # # We have the following variables: # #

SeriousDlqin2yrs (target): individual experiences a 90+ day overdue payment within 2 years
age
RevolvingUtilizationOfUnsecuredLines: balance on credit cards and lines of credit / sum of limits (excluding real etate, installment debt)
DebtRatio: monthly debt, alimony, living cost payments divided by monthly income
NumberRealEstateLoansOrLines: only real estate loans
NumberOfOpenCreditLinesAndLoans: # install ment loans and LOCs
NumberOfTime30-59DaysPastDueNotWorse: number of times person experienced 30-59 day lateness, but not more than that
NumberOfTime60-89DaysPastDueNotWorse: same as above with 60-89 days
NumberOfTime90DaysPastDueNotWorse: same as above with 90+
NumberOfDependents: number of family members excluding self
MonthlyIncome

NumberOfTime30-59DaysPastDueNotWorse
NumberOfTime60-89DaysPastDueNotWorse
NumberOfTimes90DaysLate

\$200 income, \$200 payment
\$2000 income, \$2000 payment
\$20000 income, \$20000 payment