import pandas as pd
overdraft = pd.read_csv('sample.csv')
overdraft.head()
Unnamed: 0 | S/N | ACCOUNT | ACCOUNT NAME | PAST DUE DAYS | ACCOUNT SEGMENT | Year | Quarter | |
---|---|---|---|---|---|---|---|---|
0 | 1 | 1.0 | 51474.0 | Cardinal Logistics | 10 | Term Loan | 2014 | Q1 |
1 | 2 | 2.0 | 21658.0 | Cardinal Petroleum Limited | 10 | Term Loan | 2014 | Q1 |
2 | 3 | 3.0 | 50831.0 | Cash Oil Company Limited | 10 | Term Loan | 2014 | Q1 |
3 | 4 | 4.0 | 22274.0 | D.G Capital Microfinance | 10 | Term Loan | 2014 | Q1 |
4 | 5 | 5.0 | 50651.0 | Dream Finance Limited | 10 | Term Loan | 2014 | Q1 |
overdraft.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 2399 entries, 0 to 2398 Data columns (total 8 columns): Unnamed: 0 2399 non-null int64 S/N 2399 non-null float64 ACCOUNT 2399 non-null float64 ACCOUNT NAME 2399 non-null object PAST DUE DAYS 2399 non-null int64 ACCOUNT SEGMENT 2399 non-null object Year 2399 non-null int64 Quarter 2399 non-null object dtypes: float64(2), int64(3), object(3) memory usage: 150.1+ KB
overdraft['S/N'] = overdraft['S/N'].astype(int)
overdraft.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 2399 entries, 0 to 2398 Data columns (total 8 columns): Unnamed: 0 2399 non-null int64 S/N 2399 non-null int32 ACCOUNT 2399 non-null float64 ACCOUNT NAME 2399 non-null object PAST DUE DAYS 2399 non-null int64 ACCOUNT SEGMENT 2399 non-null object Year 2399 non-null int64 Quarter 2399 non-null object dtypes: float64(1), int32(1), int64(3), object(3) memory usage: 140.7+ KB
overdraft['S/N'].value_counts()
1 24 57 24 53 24 47 24 39 24 .. 130 5 129 5 132 5 131 5 133 3 Name: S/N, Length: 133, dtype: int64
overdraft['Quarter'].value_counts()
Q4 627 Q3 610 Q2 591 Q1 571 Name: Quarter, dtype: int64
overdraft['ACCOUNT NAME'].value_counts()
Dzikson Limited 24 Dream Finance Limited 24 Maxx Oil Limited 24 Sir Ernest Farms & Logistics Limited 24 Metalex Limited 24 .. Sawiz Petroleum Company Ltd 6 Oakpalm Limited-Eni Gh Contrac 5 Safebond Co.Ltd-Special Proj. 5 Papset Enterprises Limited 3 Maxx Energy Limited 1 Name: ACCOUNT NAME, Length: 131, dtype: int64
overdraft.head()
Unnamed: 0 | S/N | ACCOUNT | ACCOUNT NAME | PAST DUE DAYS | ACCOUNT SEGMENT | Year | Quarter | |
---|---|---|---|---|---|---|---|---|
0 | 1 | 1 | 51474.0 | Cardinal Logistics | 10 | Term Loan | 2014 | Q1 |
1 | 2 | 2 | 21658.0 | Cardinal Petroleum Limited | 10 | Term Loan | 2014 | Q1 |
2 | 3 | 3 | 50831.0 | Cash Oil Company Limited | 10 | Term Loan | 2014 | Q1 |
3 | 4 | 4 | 22274.0 | D.G Capital Microfinance | 10 | Term Loan | 2014 | Q1 |
4 | 5 | 5 | 50651.0 | Dream Finance Limited | 10 | Term Loan | 2014 | Q1 |
overdraft['Year'].value_counts()
2019 503 2018 482 2017 453 2016 385 2015 325 2014 251 Name: Year, dtype: int64
def filter_row(row):
if (row['Year'] == 2014) & (row['Quarter'] == 'Q1') & (row['PAST DUE DAYS'] == 10):
return True
else:
return False
boolean = overdraft.apply(filter_row, axis=1)
new_df = overdraft[boolean]
new_df.head()
Unnamed: 0 | S/N | ACCOUNT | ACCOUNT NAME | PAST DUE DAYS | ACCOUNT SEGMENT | Year | Quarter | |
---|---|---|---|---|---|---|---|---|
0 | 1 | 1 | 51474.0 | Cardinal Logistics | 10 | Term Loan | 2014 | Q1 |
1 | 2 | 2 | 21658.0 | Cardinal Petroleum Limited | 10 | Term Loan | 2014 | Q1 |
2 | 3 | 3 | 50831.0 | Cash Oil Company Limited | 10 | Term Loan | 2014 | Q1 |
3 | 4 | 4 | 22274.0 | D.G Capital Microfinance | 10 | Term Loan | 2014 | Q1 |
4 | 5 | 5 | 50651.0 | Dream Finance Limited | 10 | Term Loan | 2014 | Q1 |
So, new_df
contains account name which is in year 2014
and Quarter Q1
having past due days is 10
. Finally, we are going to count the number of accounts whose past due days changes from 10 to other value for quarters of each year.
# Total number of accounts
accounts_name = [row['ACCOUNT NAME'] for index, row in new_df.iterrows()]
new_df.shape[0]
54
print(len(accounts_name))
accounts_name
54
['Cardinal Logistics', 'Cardinal Petroleum Limited', 'Cash Oil Company Limited', 'D.G Capital Microfinance', 'Dream Finance Limited', 'Dzikson Limited', 'East Cantonments Pharmacy Ltd', 'Industrial Requirements Servicing (Inres) Limited', 'Lillygold Energy Resources Ltd', 'Magna International Transport Company Limited', 'Market Direct Limited', 'Maxx Oil Limited', 'Mchillen International Business (Ghana) Limited', 'Rice And Sugar Company Limited', 'United Edibles Limited', 'Vidof Company Limited', 'Yibema Industrial Services Limited', 'Osei Kwabena Trading Limited', 'Tcp Money Lending Limited', 'Arkomenz Limited', 'Isaac Owusu Ansah Ent.', 'Kofi Gyimah Enterprise Ltd', 'Thonket Comapny Limited', 'Beautiful Light Company Limited', 'Beige Capital Limited', 'D.A Microfinance/ Bond', 'Dabs Medical Supplies Limited', 'Mountcrest University College', 'Parade Development Enterprise', 'Royal Eufran Ghana Limited', 'Sir Ernest Farms & Logistics Limited', 'Steel Point Ventures', 'Vestel Publications', 'A.K Sonia', 'Able Academy School Complex Limited', 'Abra Metals Limited', 'Agapet Limited', 'Asuogyaman Company Limited', 'Cob- A Industries Limited', 'Elite Minerals Ghana', 'Instyle Industries Limited', 'Jorbies Transportation & Logistics Services', 'Kwamok Limited', 'Kwatsons Ghana Limited', 'Mansell Ghana Limited', 'Maxmart Limited', 'Metalex Limited', 'Ofori Poku Company Limited', 'Queensland International School', 'Vanguard Properties Development Company Limited', 'Visigo Optical Limited', 'Vista Hospitality Limited', 'Y & K Investments Limited', 'Yovna Company Limited']
change_from_10 = {'2014_Q1': 0, '2014_Q2': 0, '2014_Q3': 0, '2014_Q4': 0, '2015_Q1': 0, '2015_Q2': 0, '2015_Q3': 0, '2015_Q4': 0,
'2016_Q1': 0, '2016_Q2': 0, '2016_Q3': 0, '2016_Q4': 0, '2017_Q1': 0, '2017_Q2': 0, '2017_Q3': 0, '2017_Q4': 0,
'2018_Q1': 0, '2018_Q2': 0, '2018_Q3': 0, '2018_Q4': 0, '2019_Q1': 0, '2019_Q2': 0, '2019_Q3': 0, '2019_Q4': 0}
count = 0
for each_account in accounts_name:
df = overdraft[(overdraft['ACCOUNT NAME'] == each_account) & (overdraft['Quarter'] != 'Q1')]
for index, row in df.iterrows():
for year in [2014, 2015, 2016, 2017, 2018, 2019]:
if row['Year'] == year:
for quarter in ['Q1', 'Q2', 'Q3', 'Q4']:
if row['Quarter'] == quarter:
if row['PAST DUE DAYS'] != 10:
count = 1
key = str(year) + '_' + quarter
change_from_10[key] = change_from_10[key] + count
count = 0
change_from_10
{'2014_Q1': 0, '2014_Q2': 0, '2014_Q3': 0, '2014_Q4': 0, '2015_Q1': 0, '2015_Q2': 0, '2015_Q3': 1, '2015_Q4': 1, '2016_Q1': 0, '2016_Q2': 4, '2016_Q3': 3, '2016_Q4': 3, '2017_Q1': 0, '2017_Q2': 6, '2017_Q3': 7, '2017_Q4': 7, '2018_Q1': 0, '2018_Q2': 13, '2018_Q3': 13, '2018_Q4': 13, '2019_Q1': 0, '2019_Q2': 13, '2019_Q3': 13, '2019_Q4': 13}
number_of_10 = {'2014_Q1': 0, '2014_Q2': 0, '2014_Q3': 0, '2014_Q4': 0, '2015_Q1': 0, '2015_Q2': 0, '2015_Q3': 0, '2015_Q4': 0,
'2016_Q1': 0, '2016_Q2': 0, '2016_Q3': 0, '2016_Q4': 0, '2017_Q1': 0, '2017_Q2': 0, '2017_Q3': 0, '2017_Q4': 0,
'2018_Q1': 0, '2018_Q2': 0, '2018_Q3': 0, '2018_Q4': 0, '2019_Q1': 0, '2019_Q2': 0, '2019_Q3': 0, '2019_Q4': 0}
count = 0
for each_account in accounts_name:
df = overdraft[(overdraft['ACCOUNT NAME'] == each_account) & (overdraft['Quarter'] != 'Q1')]
for index, row in df.iterrows():
for year in [2014, 2015, 2016, 2017, 2018, 2019]:
if row['Year'] == year:
for quarter in ['Q1', 'Q2', 'Q3', 'Q4']:
if row['Quarter'] == quarter:
if row['PAST DUE DAYS'] == 10:
count = 1
key = str(year) + '_' + quarter
number_of_10[key] = number_of_10[key] + count
count = 0
number_of_10
{'2014_Q1': 0, '2014_Q2': 54, '2014_Q3': 54, '2014_Q4': 53, '2015_Q1': 0, '2015_Q2': 53, '2015_Q3': 53, '2015_Q4': 53, '2016_Q1': 0, '2016_Q2': 50, '2016_Q3': 51, '2016_Q4': 51, '2017_Q1': 0, '2017_Q2': 48, '2017_Q3': 47, '2017_Q4': 47, '2018_Q1': 0, '2018_Q2': 39, '2018_Q3': 39, '2018_Q4': 39, '2019_Q1': 0, '2019_Q2': 39, '2019_Q3': 39, '2019_Q4': 39}
# number_of_10 for Quarter Q1 rows in selected accounts_name.
count = 0
for year in range(2014, 2020):
for each_account in accounts_name:
df = overdraft[(overdraft['ACCOUNT NAME'] == each_account) & (overdraft['Year'] == year)]
filter_df = df[df['Quarter'] == 'Q1']
for index, row in filter_df.iterrows():
if row['PAST DUE DAYS'] == 10:
count = 1
break
key = str(year) + '_' + 'Q1'
number_of_10[key] = number_of_10[key] + count
number_of_10
{'2014_Q1': 54, '2014_Q2': 54, '2014_Q3': 54, '2014_Q4': 53, '2015_Q1': 54, '2015_Q2': 53, '2015_Q3': 53, '2015_Q4': 53, '2016_Q1': 54, '2016_Q2': 50, '2016_Q3': 51, '2016_Q4': 51, '2017_Q1': 54, '2017_Q2': 48, '2017_Q3': 47, '2017_Q4': 47, '2018_Q1': 54, '2018_Q2': 39, '2018_Q3': 39, '2018_Q4': 39, '2019_Q1': 54, '2019_Q2': 39, '2019_Q3': 39, '2019_Q4': 39}