#!/usr/bin/env python # coding: utf-8 # # Credit Risk Classifications # # by: Keith Qu # # Using the data set from Kaggle's Give Me Some Credit competition, which contains 150,000 observations with 10 features and the objective of predicting whether a lendee will have a serious delinquincy (90+ days past due) within 2 years. # # Using the methods described, we were able to attain an AUC score of 0.866752 on the private leaderboard. # # Contents: # # # In[141]: import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from sklearn.manifold import TSNE from scipy.stats import randint, uniform from scipy import linalg from sklearn.decomposition import PCA from sklearn.preprocessing import MinMaxScaler, StandardScaler from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor, RandomForestClassifier from sklearn.linear_model import LogisticRegression from sklearn.model_selection import train_test_split, cross_val_predict, RandomizedSearchCV from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score from sklearn.metrics import accuracy_score, r2_score, mean_squared_error import xgboost as xgb import random get_ipython().run_line_magic('matplotlib', 'inline') # # ## Exploration, Cleaning, Creating Features # # We have the following variables: # #