# import
import pandas as pd
# reading the file
data = pd.read_csv("data/train.csv")
# Looking the data
data.head(6)
Loan_ID | Gender | Married | Dependents | Education | Self_Employed | ApplicantIncome | CoapplicantIncome | LoanAmount | Loan_Amount_Term | Credit_History | Property_Area | Loan_Status | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | LP001002 | Male | No | 0 | Graduate | No | 5849 | 0.0 | NaN | 360.0 | 1.0 | Urban | Y |
1 | LP001003 | Male | Yes | 1 | Graduate | No | 4583 | 1508.0 | 128.0 | 360.0 | 1.0 | Rural | N |
2 | LP001005 | Male | Yes | 0 | Graduate | Yes | 3000 | 0.0 | 66.0 | 360.0 | 1.0 | Urban | Y |
3 | LP001006 | Male | Yes | 0 | Not Graduate | No | 2583 | 2358.0 | 120.0 | 360.0 | 1.0 | Urban | Y |
4 | LP001008 | Male | No | 0 | Graduate | No | 6000 | 0.0 | 141.0 | 360.0 | 1.0 | Urban | Y |
5 | LP001011 | Male | Yes | 2 | Graduate | Yes | 5417 | 4196.0 | 267.0 | 360.0 | 1.0 | Urban | Y |
# isnull or notnull
data.isnull().head(6)
Loan_ID | Gender | Married | Dependents | Education | Self_Employed | ApplicantIncome | CoapplicantIncome | LoanAmount | Loan_Amount_Term | Credit_History | Property_Area | Loan_Status | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | False | False | False | False | False | False | False | False | True | False | False | False | False |
1 | False | False | False | False | False | False | False | False | False | False | False | False | False |
2 | False | False | False | False | False | False | False | False | False | False | False | False | False |
3 | False | False | False | False | False | False | False | False | False | False | False | False | False |
4 | False | False | False | False | False | False | False | False | False | False | False | False | False |
5 | False | False | False | False | False | False | False | False | False | False | False | False | False |
data.notnull().head(6)
Loan_ID | Gender | Married | Dependents | Education | Self_Employed | ApplicantIncome | CoapplicantIncome | LoanAmount | Loan_Amount_Term | Credit_History | Property_Area | Loan_Status | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | True | True | True | True | True | True | True | True | False | True | True | True | True |
1 | True | True | True | True | True | True | True | True | True | True | True | True | True |
2 | True | True | True | True | True | True | True | True | True | True | True | True | True |
3 | True | True | True | True | True | True | True | True | True | True | True | True | True |
4 | True | True | True | True | True | True | True | True | True | True | True | True | True |
5 | True | True | True | True | True | True | True | True | True | True | True | True | True |
# use of any
data.isnull().values.any()
True
# use of all
data.isnull().values.all()
False
# taking the count of Null/NaN in each column of dataframe
data.isnull().sum()
Loan_ID 0 Gender 13 Married 3 Dependents 15 Education 0 Self_Employed 32 ApplicantIncome 0 CoapplicantIncome 0 LoanAmount 22 Loan_Amount_Term 14 Credit_History 50 Property_Area 0 Loan_Status 0 dtype: int64
# if want to know the total
data.isnull().sum().sum()
149
# if want to check in any particular column
data['Dependents'].isnull().sum()
15
Atul Singh
Follow me -
http://www.datagenx.net
https://twitter.com/datagenx
https://www.facebook.com/datastage4you