#!/usr/bin/env python # coding: utf-8 # In[1]: import pandas as pd # In[2]: overdraft = pd.read_csv('sample.csv') # In[3]: overdraft.head() # In[4]: overdraft.info() # In[5]: overdraft['S/N'] = overdraft['S/N'].astype(int) overdraft.info() # In[6]: overdraft['S/N'].value_counts() # In[7]: overdraft['Quarter'].value_counts() # In[8]: overdraft['ACCOUNT NAME'].value_counts() # In[9]: overdraft.head() # In[10]: overdraft['Year'].value_counts() # In[11]: def filter_row(row): if (row['Year'] == 2014) & (row['Quarter'] == 'Q1') & (row['PAST DUE DAYS'] == 10): return True else: return False boolean = overdraft.apply(filter_row, axis=1) new_df = overdraft[boolean] new_df.head() # So, `new_df` contains account name which is in year `2014` and Quarter `Q1` having past due days is `10`. Finally, we are going to count the number of accounts whose past due days changes from 10 to other value for quarters of each year. # In[12]: # Total number of accounts accounts_name = [row['ACCOUNT NAME'] for index, row in new_df.iterrows()] new_df.shape[0] # In[13]: print(len(accounts_name)) accounts_name # In[14]: change_from_10 = {'2014_Q1': 0, '2014_Q2': 0, '2014_Q3': 0, '2014_Q4': 0, '2015_Q1': 0, '2015_Q2': 0, '2015_Q3': 0, '2015_Q4': 0, '2016_Q1': 0, '2016_Q2': 0, '2016_Q3': 0, '2016_Q4': 0, '2017_Q1': 0, '2017_Q2': 0, '2017_Q3': 0, '2017_Q4': 0, '2018_Q1': 0, '2018_Q2': 0, '2018_Q3': 0, '2018_Q4': 0, '2019_Q1': 0, '2019_Q2': 0, '2019_Q3': 0, '2019_Q4': 0} count = 0 for each_account in accounts_name: df = overdraft[(overdraft['ACCOUNT NAME'] == each_account) & (overdraft['Quarter'] != 'Q1')] for index, row in df.iterrows(): for year in [2014, 2015, 2016, 2017, 2018, 2019]: if row['Year'] == year: for quarter in ['Q1', 'Q2', 'Q3', 'Q4']: if row['Quarter'] == quarter: if row['PAST DUE DAYS'] != 10: count = 1 key = str(year) + '_' + quarter change_from_10[key] = change_from_10[key] + count count = 0 change_from_10 # In[15]: number_of_10 = {'2014_Q1': 0, '2014_Q2': 0, '2014_Q3': 0, '2014_Q4': 0, '2015_Q1': 0, '2015_Q2': 0, '2015_Q3': 0, '2015_Q4': 0, '2016_Q1': 0, '2016_Q2': 0, '2016_Q3': 0, '2016_Q4': 0, '2017_Q1': 0, '2017_Q2': 0, '2017_Q3': 0, '2017_Q4': 0, '2018_Q1': 0, '2018_Q2': 0, '2018_Q3': 0, '2018_Q4': 0, '2019_Q1': 0, '2019_Q2': 0, '2019_Q3': 0, '2019_Q4': 0} count = 0 for each_account in accounts_name: df = overdraft[(overdraft['ACCOUNT NAME'] == each_account) & (overdraft['Quarter'] != 'Q1')] for index, row in df.iterrows(): for year in [2014, 2015, 2016, 2017, 2018, 2019]: if row['Year'] == year: for quarter in ['Q1', 'Q2', 'Q3', 'Q4']: if row['Quarter'] == quarter: if row['PAST DUE DAYS'] == 10: count = 1 key = str(year) + '_' + quarter number_of_10[key] = number_of_10[key] + count count = 0 number_of_10 # In[18]: # number_of_10 for Quarter Q1 rows in selected accounts_name. count = 0 for year in range(2014, 2020): for each_account in accounts_name: df = overdraft[(overdraft['ACCOUNT NAME'] == each_account) & (overdraft['Year'] == year)] filter_df = df[df['Quarter'] == 'Q1'] for index, row in filter_df.iterrows(): if row['PAST DUE DAYS'] == 10: count = 1 break key = str(year) + '_' + 'Q1' number_of_10[key] = number_of_10[key] + count number_of_10 # In[ ]: # In[ ]: