In [1]:
import pandas as pd 
pd.set_option('display.max_columns', 500)
from tqdm import tqdm
import numpy as np
from matplotlib import pyplot as plt 
#import lightgbm as lgb
from scipy.stats import mode
from sklearn.preprocessing import LabelEncoder
from datetime import timedelta
from pandas import pivot_table

import seaborn as sns
sns.set()
%config InlineBackend.figure_format = 'svg'

Author Sergei Bulaev , Slack name: @ser-serege , Fall 2018

Part 1. Dataset and features description

This dataset contains the history of customer transactions for 3 months of preferential use of the banking product.

In the test file.the csv contains lines of c 518375 transactions made by the clients of the Bank. The cl_id column contains the internal client id. For each unique cl_id, you should predict whether the client will continue to use the product (target_flag). A value of 0 indicates failure and a value of 1 indicates continued use.

Column Transcription
PERIOD transaction month
cl_id client id
MCC seller category code
channel_type customer engagement channel
currency currency
TRDATETIME transaction date/time
amount transaction amount
trx_category type of transaction POS payment through
the POS terminal, C2C_OUT – transfer
(outgoing payment), C2C_IN – card
transaction (incoming payment), DEPOSIT
card in the ATM, WD_ATM_PARTNER – cash
withdrawals at ATMs partners
target_flag will the customer continue to use the product after the grace period (1/0) (target)
target_sum the amount of the transaction kind of POS for the three future months (target)
In [2]:
#raw_df = pd.read_csv('Rosbankk.csv',error_bad_lines=False)
#
#raw_df.to_csv('rosbank_train.csv')
#test = pd.read_csv('rosbank_test.csv',error_bad_lines=False)
raw_df = pd.read_csv('Rosbankk.csv',error_bad_lines=False)
In [3]:
#raw_df = pd.read_csv('rosbank_train.csv',error_bad_lines=False)
#del raw_df['Unnamed: 0']
In [4]:
raw_df.head()
Out[4]:
PERIOD cl_id MCC channel_type currency TRDATETIME amount trx_category target_flag target_sum
0 01/10/2017 0 5200 NaN 810 21OCT17:00:00:00 5023.0 POS 0 0.0
1 01/10/2017 0 6011 NaN 810 12OCT17:12:24:07 20000.0 DEPOSIT 0 0.0
2 01/12/2017 0 5921 NaN 810 05DEC17:00:00:00 767.0 POS 0 0.0
3 01/10/2017 0 5411 NaN 810 21OCT17:00:00:00 2031.0 POS 0 0.0
4 01/10/2017 0 6012 NaN 810 24OCT17:13:14:24 36562.0 C2C_OUT 0 0.0
In [13]:
raw_df['cl_id'].nunique()
Out[13]:
5000
In [18]:
plt.hist(raw_df[raw_df['target_flag'] == 1]['target_flag'].dropna(), color='red', alpha=0.3, bins=30);
plt.hist(raw_df[raw_df['target_flag'] == 0]['target_flag'].dropna(), color='green', alpha=0.5, bins=30);
print(round((raw_df[raw_df['target_flag'] == 1]['cl_id'].nunique() / raw_df['cl_id'].nunique())*100,1), '% of taget = 1')
55.3 % of taget = 1
In [19]:
raw_df.info()
raw_df.describe()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 490513 entries, 0 to 490512
Data columns (total 10 columns):
PERIOD          490513 non-null object
cl_id           490513 non-null int64
MCC             490513 non-null int64
channel_type    487603 non-null object
currency        490513 non-null int64
TRDATETIME      490513 non-null object
amount          490513 non-null float64
trx_category    490513 non-null object
target_flag     490513 non-null int64
target_sum      490513 non-null float64
dtypes: float64(2), int64(4), object(4)
memory usage: 37.4+ MB
Out[19]:
cl_id MCC currency amount target_flag target_sum
count 490513.000000 490513.000000 490513.000000 4.905130e+05 490513.000000 4.905130e+05
mean 4802.127361 5699.783822 810.601854 5.723594e+03 0.652588 1.627345e+05
std 2986.492835 662.933380 38.223123 7.703763e+04 0.476148 1.474097e+06
min 0.000000 742.000000 32.000000 4.000000e-02 0.000000 0.000000e+00
25% 2193.000000 5411.000000 810.000000 2.240000e+02 0.000000 0.000000e+00
50% 4576.000000 5661.000000 810.000000 6.240000e+02 1.000000 2.491009e+04
75% 7390.000000 5941.000000 810.000000 2.176000e+03 1.000000 1.516193e+05
max 10215.000000 9405.000000 986.000000 3.583500e+07 1.000000 4.606578e+07
In [20]:
print( 'Number of unique clients =',raw_df['cl_id'].nunique())
print ('At channel_type column there are ', round(100*(len(raw_df[raw_df['channel_type'].isna()]) / len(raw_df)),1), '% of empty cells')
Number of unique clients = 5000
At channel_type column there are  0.6 % of empty cells
Let's plot where in dimention distribution of target
In [21]:
X = raw_df[['cl_id','target_flag']].groupby('cl_id').agg('max').reset_index()
ind = X['target_flag']==0
plt.plot(X['cl_id'][ind], np.random.rand(np.sum(ind)), 'g.', label='negative case')
ind = X['target_flag']==1
plt.plot(X['cl_id'][ind], np.random.rand(np.sum(ind)), 'b.', label='positive case')
plt.legend()
Out[21]:
<matplotlib.legend.Legend at 0x111e19780>

From dataset we see, that:

MCC is not int number, it have to be categorial data(we will find descriptions in the Internet); Currency should be also categorial data

In total there are 490513 transactions for 3 month, by made 5000 clients. In general it's about 98 transactions by 1 client for 3 month of using card.

Time period is from 01/01/2017' to '01/12/2017'

From description we see that there are empty cells in channel_type feature. Let's fill them into 'type6'

In [27]:
raw_df.channel_type.unique()
Out[27]:
array([nan, 'type5', 'type4', 'type2', 'type3', 'type1'], dtype=object)
In [29]:
#From description we see that there are empty cells in channel_type feature. Let's fill them into 'type6'

raw_df.channel_type.fillna('type6', inplace = True)

From description we see that PERIOD and TRDATETIME have Object type. and strange format. let's parse it and convert to datetime format

In [77]:
from datetime import datetime, date, time

raw_df['PERIOD'] = raw_df['PERIOD'].apply(pd.to_datetime)

# Creating separate cols for yr, month,...
raw_df['Year'] = raw_df.TRDATETIME.str[5:7]
raw_df['Month'] = raw_df.TRDATETIME.str[2:5]
raw_df['Date'] = raw_df.TRDATETIME.str[0:2]
raw_df['Hour'] = raw_df.TRDATETIME.str[8:10]

# Replace month with ints
raw_df.Month = raw_df.Month.replace(to_replace=['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN','JUL','AUG',\
                                                'SEP','OCT','NOV','DEC' ], value=[1,2,3,4,5,6,7,8,9,10,11,12])

raw_df.Year = raw_df.Year.apply(pd.to_numeric)
raw_df.Date = raw_df.Date.apply(pd.to_numeric)
raw_df.Month= raw_df.Month.apply(pd.to_numeric)
raw_df.Hour = raw_df.Hour.apply(pd.to_numeric)
raw_df.Year = raw_df.Year + 2000

# making date format
def to_date(row):    
    return date(row[10], row[11], row[12])
raw_df['DateFormat'] = raw_df.apply(to_date, axis=1)

# making Quater of the Year feature 
def Quater(row):
    if row['Month']in [1, 2, 3]:
        return 1
    if row['Month']in [4, 5, 6]:
        return 2   
    if row['Month']in [7, 8, 9]:
        return 3 
    if row['Month']in [10, 11, 12]:
        return 4

# Applying features is the day is weekend and quater
raw_df['quater_of_year'] = raw_df.apply(Quater, axis = 1)
raw_df['weekend'] = raw_df['DateFormat'].astype('datetime64[ns]')
raw_df['weekend'] = ((raw_df.weekend.dt.dayofweek) // 5 ==1).astype(float)
In [26]:
raw_df.currency.unique()
Out[26]:
array([810, 978, 504, 704, 981, 985, 840, 949,  51, 826, 214, 764, 203,
       702, 360, 756, 933, 975,  36, 191, 784, 980, 124, 398, 376, 944,
       352, 417, 156, 752, 392, 484, 634, 188, 643, 348, 356, 458, 986,
       498, 578, 208, 344,  32, 410, 788, 480, 604, 941, 144, 946, 710,
       690,  44, 170, 901, 608, 554, 462])
In [31]:
raw_df.trx_category.unique()
Out[31]:
array(['POS', 'DEPOSIT', 'C2C_OUT', 'WD_ATM_ROS', 'BACK_TRX',
       'WD_ATM_PARTNER', 'WD_ATM_OTHER', 'C2C_IN', 'CAT', 'CASH_ADV'],
      dtype=object)
In python there is a library which can convert currancy. Let's convert all amounts to rubles
In [422]:
from currency_converter import CurrencyConverter

converter = CurrencyConverter(fallback_on_missing_rate=True, fallback_on_wrong_date=True)
converter_currencies = converter.currencies

def convert_to_rub(amount, currency, day):
    if currency == 'RUB':
        return amount
    else:
        if currency in converter_currencies:
            return converter.convert(amount, currency, 'RUB', date = day)
        else: amount     
        return amount
    
# also from task descripttion we know that there are cash in and cash out . 
# It that logic make the functions which convert in (+) or (-)
    
def cash_in_out(raw):
    if raw['trx_category'] == 'POS':
        return raw['amount']*(-1)
    if raw['trx_category'] == 'C2C_OUT':
        return raw['amount']*(-1)
    if raw['trx_category'] == 'WD_ATM_PARTNER':
        return raw['amount']*(-1)
    if raw['trx_category'] == 'WD_ATM_ROS':
        return raw['amount']*(-1)    
    else:
        return raw['amount']
    
raw_df['amount'] = raw_df.apply(lambda x: convert_to_rub(x['amount'], x['currency'], x['DateFormat']), axis = 1)
raw_df['cash_in_out'] = raw_df.apply(cash_in_out, axis=1)

Let's create a feature that discribe currnecy Rub, Dollar ,Euro other

In [ ]:
def Is_rub(raw):
    if raw['currency'] == 810:
        return 'Rub'
    if raw['currency'] == 643:
        return 'Rub'
    if raw['currency'] ==840:
        return '$'
    if raw['currency'] == 978:
        return 'Euro'
    else: 
        return 'other'
raw_df['cur']= raw_df.apply(Is_rub, axis=1)
Let create a feature of start and end of perion of using the card
In [ ]:
max_date = raw_df[['cl_id', 'DateFormat']].groupby('cl_id').max().reset_index()
max_date.columns = ['cl_id', 'last_action']

raw_df = pd.merge(raw_df, max_date, how='left', on='cl_id')
I have a hypothesis that if client continue to usethe card actively in the end of period of preferential use period he will continue to use card after . So let's create features for last 14 days and last 30 days before the end of period
In [ ]:
raw_df['last_action'] = pd.to_datetime(raw_df['last_action'])

raw_df['last_14_days'] = raw_df['last_action'] - timedelta(days=14)
raw_df['last_30_days'] = raw_df['last_action'] - timedelta(days=30)

raw_df['DateFormat'] = pd.to_datetime(raw_df['DateFormat'])
raw_df['last_14_days']= pd.to_datetime(raw_df['last_14_days'])
raw_df['last_30_days'] = pd.to_datetime(raw_df['last_30_days'])

def last_14_days1(row):
    if row['DateFormat']>=row['last_14_days']:
        return 1

def last_30_days1(row):
    if row['DateFormat']>=row['last_30_days']:
        return 1
    
raw_df['last_14_days'] = raw_df.apply(last_14_days1, axis=1)
raw_df['last_30_days'] = raw_df.apply(last_30_days1, axis=1)
The next one quation to solve is MCC (Merchnt Category Code) codes. This are codes identifies the kind of operation of client. For
In [754]:
mcc_codes = pd.read_excel('mcc_codes1.xlsx')
mcc_codes.columns = ['MCC', 'Name' , 'Group']

raw_df = pd.merge(raw_df, mcc_codes, 'left', on=['MCC'])
In [755]:
mcc_codes.head()
Out[755]:
MCC Name Group
0 742 Ветеринарные услуги Контрактные услуги
1 763 Сельскохозяйственные кооперативы Контрактные услуги
2 780 Услуги садоводства и ландшафтного дизайна Контрактные услуги
3 1520 Генеральные подрядчики – жилое и коммерческое ... Контрактные услуги
4 1711 Генеральные подрядчики по вентиляции, теплосна... Контрактные услуги
MCC codes have current name and grouped name. We will use it for groupby functions
One of MCC codes means cashback from POS operations
In [759]:
def cashback(raw):
    if raw['trx_category'] == 'POS':
        return raw['amount']*0.02
raw_df['cashback'] = raw_df.apply(cashback, axis=1)

Finaly we have

In [70]:
raw_df.head()
Out[70]:
PERIOD cl_id MCC channel_type currency TRDATETIME amount trx_category target_flag target_sum Year Month Date Hour DateFormat weekend cash_in_out quater_of_year last_action last_14_days last_30_days Name Group cashback cur
0 2017-01-10 0 5200 type0 810 21OCT17:00:00:00 5023.0 POS 0 0.0 2017 10 21 0 2017-10-21 1.0 -5023.0 4 2017-12-05 0.0 0.0 Товары для дома Розничные магазины 100.46 Rub
1 2017-01-10 0 6011 type0 810 12OCT17:12:24:07 20000.0 DEPOSIT 0 0.0 2017 10 12 12 2017-10-12 0.0 20000.0 4 2017-12-05 0.0 0.0 Финансовые учреждения – снятие наличности авто... Поставщик услуг 0.00 Rub
2 2017-01-12 0 5921 type0 810 05DEC17:00:00:00 767.0 POS 0 0.0 2017 12 5 0 2017-12-05 0.0 -767.0 4 2017-12-05 1.0 1.0 Магазины с продажей спиртных напитков навынос Различные магазины 15.34 Rub
3 2017-01-10 0 5411 type0 810 21OCT17:00:00:00 2031.0 POS 0 0.0 2017 10 21 0 2017-10-21 1.0 -2031.0 4 2017-12-05 0.0 0.0 Бакалейные магазины, супермаркеты Розничные магазины 40.62 Rub
4 2017-01-10 0 6012 type0 810 24OCT17:13:14:24 36562.0 C2C_OUT 0 0.0 2017 10 24 13 2017-10-24 0.0 -36562.0 4 2017-12-05 0.0 0.0 Финансовые учреждения – торговля и услуги Поставщик услуг 0.00 Rub
All NaN's mean that they are equal to 0
In [69]:
raw_df = raw_df.fillna(0)
In [73]:
# Save it fo file 
#raw_df.to_csv('rosbank_train1.csv')
#raw_df = pd.read_csv('rosbank_train1.csv')

Okay it seems that we prepare dataset for further groupby functtions.

In [76]:
raw_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 490513 entries, 0 to 490512
Data columns (total 25 columns):
PERIOD            490513 non-null object
cl_id             490513 non-null int64
MCC               490513 non-null int64
channel_type      490513 non-null object
currency          490513 non-null int64
TRDATETIME        490513 non-null object
amount            490513 non-null float64
trx_category      490513 non-null object
target_flag       490513 non-null int64
target_sum        490513 non-null float64
Year              490513 non-null int64
Month             490513 non-null int64
Date              490513 non-null int64
Hour              490513 non-null int64
DateFormat        490513 non-null object
weekend           490513 non-null float64
cash_in_out       490513 non-null float64
quater_of_year    490513 non-null int64
last_action       490513 non-null object
last_14_days      490513 non-null float64
last_30_days      490513 non-null float64
Name              490513 non-null object
Group             490513 non-null object
cashback          490513 non-null float64
cur               490513 non-null object
dtypes: float64(7), int64(9), object(9)
memory usage: 93.6+ MB
In [174]:
raw_df.head(2)
Out[174]:
PERIOD cl_id MCC channel_type currency TRDATETIME amount trx_category target_flag target_sum Year Month Date Hour DateFormat weekend cash_in_out quater_of_year last_action last_14_days last_30_days Name Group cashback cur
0 2017-01-10 0 5200 type0 810 21OCT17:00:00:00 5023.0 POS 0 0.0 2017 10 21 0 2017-10-21 1.0 -5023.0 4 2017-12-05 0.0 0.0 Товары для дома Розничные магазины 100.46 Rub
1 2017-01-10 0 6011 type0 810 12OCT17:12:24:07 20000.0 DEPOSIT 0 0.0 2017 10 12 12 2017-10-12 0.0 20000.0 4 2017-12-05 0.0 0.0 Финансовые учреждения – снятие наличности авто... Поставщик услуг 0.00 Rub
Doing some aggregations to generate sample of unique clients. General approach is to count for categorial features and aggregate by for numerical ['max', 'min', 'mean', 'count' , 'sum']
In [173]:
def days_in_use(x):
    return (np.max(x) - np.min(x)).days

days_usage = raw_df[['cl_id','DateFormat']].groupby('cl_id').agg(days_in_use)

days_usage['target_flag'] = raw_df['target_flag']

max_date = raw_df[['cl_id', 'DateFormat']].groupby('cl_id').max()
days_usage['days_from_end_period']= (max(raw_df['DateFormat']) - max_date['DateFormat']).dt.days


num_trans_total = raw_df[['cl_id','DateFormat']].groupby('cl_id').agg('count').reset_index()
num_trans_total.columns = ['cl_id', 'num_trans_total']
num_trans_total.index=num_trans_total.cl_id
days_usage['Num_trans_total'] = num_trans_total.num_trans_total

num_trans_month = raw_df[['cl_id','Month']].groupby('cl_id').agg(['max', 'min', 'mean', 'count', 'sum']).reset_index()
#num_trans_month.columns = ['cl_id', 'num_trans_month']
num_trans_month.index=num_trans_month.cl_id
days_usage[num_trans_month.columns] = num_trans_month


balance_on_end_of_period = raw_df[['cl_id', 'cash_in_out']].groupby('cl_id').agg(['max', 'min', 'mean', 'count','sum']).reset_index()
#balance_on_end_of_period.columns=['cl_id', 'balance_on_end_of_period']
balance_on_end_of_period.index=balance_on_end_of_period.cl_id
days_usage[balance_on_end_of_period.columns] = balance_on_end_of_period

cashback = raw_df[['cl_id', 'cashback']].groupby('cl_id').sum().reset_index()
cashback.columns=['cl_id', 'cashback']
cashback.index=cashback.cl_id
days_usage['cashback'] = cashback.cashback


spent_trx_category = raw_df[['cl_id', 'trx_category' ,'amount']].groupby(['cl_id', 'trx_category']).sum().\
                                                                                        unstack().reset_index()
spent_trx_category=spent_trx_category.fillna(0)
spent_trx_category.columns = ['cl_id', 'BACK_TRX', 'C2C_IN', 'C2C_OUT', 'CASH_ADV', 'CAT', 'DEPOSIT',
       'POS', 'WD_ATM_OTHER', 'WD_ATM_PARTNER', 'WD_ATM_ROS'] 
spent_trx_category.index= spent_trx_category.cl_id

days_usage[['BACK_TRX', 'C2C_IN', 'C2C_OUT', 'CASH_ADV', 'CAT', 'DEPOSIT',
       'POS', 'WD_ATM_OTHER', 'WD_ATM_PARTNER', 'WD_ATM_ROS']] = spent_trx_category[['BACK_TRX', 'C2C_IN', 'C2C_OUT',\
                                 'CASH_ADV', 'CAT', 'DEPOSIT','POS', 'WD_ATM_OTHER', 'WD_ATM_PARTNER', 'WD_ATM_ROS']]

quntity_of_mcc = raw_df[['cl_id','MCC']].groupby(['cl_id','MCC']).apply(lambda x: x.count()).unstack().\
                                                                                        max(axis=1).reset_index()
quntity_of_mcc.columns=['cl_id', 'quntity_of_mcc']
quntity_of_mcc.index= quntity_of_mcc.cl_id
days_usage['quntity_of_mcc']=quntity_of_mcc.quntity_of_mcc




multy_currency = raw_df[['cl_id', 'currency']].groupby(['cl_id', 'currency']).first().reset_index()
multy_currency = multy_currency.groupby(['cl_id']).count()
#multy_currency.index= multy_currency.cl_id

days_usage['multy_currency']=multy_currency.currency



last_14_days=raw_df[['cl_id', 'last_14_days']].groupby(['cl_id']).agg('sum')
last_30_days=raw_df[['cl_id', 'last_30_days']].groupby(['cl_id']).agg('sum')

days_usage[last_14_days.columns]=last_14_days
days_usage[last_30_days.columns]=last_30_days




group_mcc = pivot_table(raw_df, values='cash_in_out', 
                    index=['cl_id'], columns=['Group'], aggfunc=lambda cash_in_out: len(cash_in_out.unique())).fillna(0)
group_mcc2 = pivot_table(raw_df, values='cash_in_out', 
                    index=['cl_id'], columns=['Group'], aggfunc=np.sum).fillna(0)

mcc = pd.merge(group_mcc, group_mcc2, 'left', on=days_usage.index)

mcc.index=mcc.key_0
days_usage[mcc.columns]= mcc
del days_usage['key_0']

trx_category = raw_df[['cl_id','trx_category', 'cash_in_out']].groupby(['cl_id','trx_category']).agg(['max', \
                                                                'min', 'mean', 'count', 'sum']).unstack()
days_usage[trx_category.columns]=trx_category


quater_of_year = raw_df[['cl_id', 'quater_of_year']].groupby('cl_id').agg('sum').reset_index()
#quater_of_year.columns=['cl_id', 'quater_of_year']
quater_of_year.index=quater_of_year.cl_id
days_usage[quater_of_year.columns] = quater_of_year



last_action = raw_df[['cl_id', 'last_action']].groupby('cl_id').count().reset_index()
last_action.columns=['cl_id', 'last_action']
last_action.index=last_action.cl_id
days_usage['last_action'] = last_action.last_action

cur = raw_df[['cl_id','cur', 'cash_in_out']].groupby(['cl_id','cur']).agg(['max', 'min', 'mean', 'count', 'sum'\
                                                                          ]).unstack()
#cur.index=cur.cl_id
days_usage[cur.columns]=cur

last_14_days = raw_df[['cl_id', 'last_14_days']].groupby('cl_id').agg(['max', 'min', 'mean', 'count', 'sum'\ ]).reset_index()

last_14_days.columns=['cl_id', 'last_14_days']

last_14_days.index=last_14_days.cl_id days_usage[last_14_days.columns] = last_14_days

last_30_days = raw_df[['cl_id', 'last_30_days']].groupby('cl_id').agg(['max', 'min', 'mean', 'count', 'sum'\ ]).reset_index()

last_30_days.columns=['cl_id', 'last_30_days']

last_30_days.index=last_14_days.cl_id days_usage[last_30_days.columns] = last_30_days

Another one feature could be relation between (all count of transactions) / to (count of transactions by 14 last days and 30 last days)
In [177]:
days_usage['all_to_last14'] =  days_usage['last_14_days'] / days_usage['Num_trans_total']
days_usage['all_to_last30'] =  days_usage['last_30_days'] / days_usage['Num_trans_total']

We've got a dataset with 139 aggregated features

In [178]:
days_usage.head()
Out[178]:
DateFormat target_flag days_from_end_period Num_trans_total (cl_id, ) (Month, max) (Month, min) (Month, mean) (Month, count) (Month, sum) (cash_in_out, max) (cash_in_out, min) (cash_in_out, mean) (cash_in_out, count) (cash_in_out, sum) cashback BACK_TRX C2C_IN C2C_OUT CASH_ADV CAT DEPOSIT POS WD_ATM_OTHER WD_ATM_PARTNER WD_ATM_ROS quntity_of_mcc multy_currency last_14_days last_30_days 0_x Авиалинии, авиакомпании_x Автомобили и транспортные средства_x Аренда автомобилей_x Бизнес услуги_x Государственные услуги_x Коммунальные и кабельные услуги_x Контрактные услуги_x Личные услуги_x Магазины одежды_x Оптовые поставщики и производители_x Отели и мотели_x Поставщик услуг_x Продажи по почте/телефону_x Профессиональные услуги_x Развлечения_x Различные магазины_x Ремонтные услуги_x Розничные магазины_x Транспорт_x Членские организации_x 0_y Авиалинии, авиакомпании_y Автомобили и транспортные средства_y Аренда автомобилей_y Бизнес услуги_y Государственные услуги_y Коммунальные и кабельные услуги_y Контрактные услуги_y Личные услуги_y Магазины одежды_y Оптовые поставщики и производители_y Отели и мотели_y Поставщик услуг_y Продажи по почте/телефону_y Профессиональные услуги_y Развлечения_y Различные магазины_y Ремонтные услуги_y Розничные магазины_y Транспорт_y Членские организации_y (cash_in_out, max, BACK_TRX) (cash_in_out, max, C2C_IN) (cash_in_out, max, C2C_OUT) (cash_in_out, max, CASH_ADV) (cash_in_out, max, CAT) (cash_in_out, max, DEPOSIT) (cash_in_out, max, POS) (cash_in_out, max, WD_ATM_OTHER) (cash_in_out, max, WD_ATM_PARTNER) (cash_in_out, max, WD_ATM_ROS) (cash_in_out, min, BACK_TRX) (cash_in_out, min, C2C_IN) (cash_in_out, min, C2C_OUT) (cash_in_out, min, CASH_ADV) (cash_in_out, min, CAT) (cash_in_out, min, DEPOSIT) (cash_in_out, min, POS) (cash_in_out, min, WD_ATM_OTHER) (cash_in_out, min, WD_ATM_PARTNER) (cash_in_out, min, WD_ATM_ROS) (cash_in_out, mean, BACK_TRX) (cash_in_out, mean, C2C_IN) (cash_in_out, mean, C2C_OUT) (cash_in_out, mean, CASH_ADV) (cash_in_out, mean, CAT) (cash_in_out, mean, DEPOSIT) (cash_in_out, mean, POS) (cash_in_out, mean, WD_ATM_OTHER) (cash_in_out, mean, WD_ATM_PARTNER) (cash_in_out, mean, WD_ATM_ROS) (cash_in_out, count, BACK_TRX) (cash_in_out, count, C2C_IN) (cash_in_out, count, C2C_OUT) (cash_in_out, count, CASH_ADV) (cash_in_out, count, CAT) (cash_in_out, count, DEPOSIT) (cash_in_out, count, POS) (cash_in_out, count, WD_ATM_OTHER) (cash_in_out, count, WD_ATM_PARTNER) (cash_in_out, count, WD_ATM_ROS) (cash_in_out, sum, BACK_TRX) (cash_in_out, sum, C2C_IN) (cash_in_out, sum, C2C_OUT) (cash_in_out, sum, CASH_ADV) (cash_in_out, sum, CAT) (cash_in_out, sum, DEPOSIT) (cash_in_out, sum, POS) (cash_in_out, sum, WD_ATM_OTHER) (cash_in_out, sum, WD_ATM_PARTNER) (cash_in_out, sum, WD_ATM_ROS) cl_id quater_of_year last_action (cash_in_out, max, $) (cash_in_out, max, Euro) (cash_in_out, max, Rub) (cash_in_out, max, other) (cash_in_out, min, $) (cash_in_out, min, Euro) (cash_in_out, min, Rub) (cash_in_out, min, other) (cash_in_out, mean, $) (cash_in_out, mean, Euro) (cash_in_out, mean, Rub) (cash_in_out, mean, other) (cash_in_out, count, $) (cash_in_out, count, Euro) (cash_in_out, count, Rub) (cash_in_out, count, other) (cash_in_out, sum, $) (cash_in_out, sum, Euro) (cash_in_out, sum, Rub) (cash_in_out, sum, other) all_to_last14 all_to_last30
cl_id
0 54 0 118 5 0 12 10 10.400000 5 52 20000.0 -36562.00 -4876.600000 5 -24383.00 156.4200 0.0 0.0 36562.00 0.0 0.00 20000.0 7821.00 0.0 0.0 0.0 1.0 1 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 0.0 0.0 0.0 1.0 0.0 2.0 0.0 0.0 0.0 0.0 0.00 0.0 0.0 0.0 0.0 0.0 0.0 0.00 0.0 0.0 -16562.00 0.0 0.0 0.00 -767.00 0.0 -7054.00 0.00 0.0 NaN NaN -36562.0 NaN NaN 20000.0 -767.0 NaN NaN NaN NaN NaN -36562.0 NaN NaN 20000.0 -5023.00 NaN NaN NaN NaN NaN -36562.000000 NaN NaN 20000.0 -2607.000000 NaN NaN NaN NaN NaN 1.0 NaN NaN 1.0 3.0 NaN NaN NaN NaN NaN -36562.00 NaN NaN 20000.0 -7821.00 NaN NaN NaN 0 20 5 NaN NaN 20000.0 NaN NaN NaN -36562.00 NaN NaN NaN -4876.600000 NaN NaN NaN 5.0 NaN NaN NaN -24383.00 NaN 0.200000 0.200000
1 92 0 165 104 1 10 7 8.951923 104 931 50000.0 -60000.00 -745.942692 104 -77578.04 2451.5608 0.0 0.0 0.00 0.0 0.00 95000.0 122578.04 0.0 0.0 50000.0 41.0 2 15.0 61.0 0.0 0.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 5.0 0.0 0.0 4.0 0.0 0.0 6.0 57.0 0.0 14.0 2.0 0.0 0.0 0.0 0.00 0.0 -30.0 -210.0 0.0 0.0 0.0 -7631.98 0.0 0.0 44988.00 0.0 0.0 -8628.02 -90868.78 0.0 -15146.16 -51.10 0.0 NaN NaN NaN NaN NaN 50000.0 -3.0 NaN NaN -50000.0 NaN NaN NaN NaN NaN 45000.0 -60000.00 NaN NaN -50000.0 NaN NaN NaN NaN NaN 47500.0 -1213.643960 NaN NaN -50000.000000 NaN NaN NaN NaN NaN 2.0 101.0 NaN NaN 1.0 NaN NaN NaN NaN NaN 95000.0 -122578.04 NaN NaN -50000.0 1 336 104 NaN -3.0 50000.0 NaN NaN -83.90 -60000.00 NaN NaN -26.026765 -1095.616143 NaN NaN 34.0 70.0 NaN NaN -884.91 -76693.13 NaN 0.144231 0.586538
5 92 0 288 142 5 6 3 4.661972 142 662 40000.0 -56164.05 -1742.176972 142 -247389.13 5810.0150 13990.0 0.0 33878.38 0.0 0.00 130000.0 290500.75 5500.0 6000.0 66500.0 31.0 3 30.0 44.0 0.0 0.0 0.0 0.0 0.0 1.0 3.0 0.0 2.0 18.0 0.0 2.0 14.0 0.0 1.0 5.0 23.0 0.0 42.0 10.0 0.0 0.0 0.0 0.00 0.0 0.0 -3825.0 -4800.0 0.0 -1600.0 -83276.00 0.0 -2940.0 29121.62 0.0 -2204.0 -15919.01 -37229.20 0.0 -53534.10 -71183.44 0.0 13990.0 NaN -1000.0 NaN NaN 40000.0 -8.2 5000.0 -1000.0 -1000.0 13990.0 NaN -10000.0 NaN NaN 30000.0 -56164.05 500.0 -5000.0 -15000.0 13990.0 NaN -4234.797500 NaN NaN 32500.0 -2617.123874 2750.0 -3000.0 -4750.000000 1.0 NaN 8.0 NaN NaN 4.0 111.0 2.0 2.0 14.0 13990.0 NaN -33878.38 NaN NaN 130000.0 -290500.75 5500.0 -6000.0 -66500.0 5 267 142 NaN -8.2 40000.0 -107.0 NaN -43.63 -56164.05 -605.17 NaN -25.915000 -1808.048750 -360.6675 NaN 2.0 136.0 4.0 NaN -51.83 -245894.63 -1442.67 0.211268 0.309859
9 89 0 245 39 9 7 5 5.974359 39 233 117450.0 -110000.00 -6292.694615 39 -245415.09 247.3018 0.0 296950.0 0.00 0.0 0.00 5000.0 12365.09 0.0 100000.0 435000.0 27.0 1 7.0 16.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 9.0 0.0 0.0 0.0 6.0 0.0 1.0 0.0 0.0 0.0 0.0 0.00 0.0 0.0 0.0 0.0 0.0 0.0 0.00 0.0 0.0 -233050.00 0.0 0.0 0.00 -12326.09 0.0 -39.00 0.00 0.0 NaN 117450.0 NaN NaN NaN 5000.0 -39.0 NaN -100000.0 -75000.0 NaN 87000.0 NaN NaN NaN 5000.0 -5000.00 NaN -100000.0 -110000.0 NaN 98983.333333 NaN NaN NaN 5000.0 -426.382414 NaN -100000.0 -87000.000000 NaN 3.0 NaN NaN NaN 1.0 29.0 NaN 1.0 5.0 NaN 296950.0 NaN NaN NaN 5000.0 -12365.09 NaN -100000.0 -435000.0 9 94 39 NaN NaN 117450.0 NaN NaN NaN -110000.00 NaN NaN NaN -6292.694615 NaN NaN NaN 39.0 NaN NaN NaN -245415.09 NaN 0.179487 0.410256
10 89 0 186 463 10 9 7 7.976242 463 3693 35000.0 -150000.00 -380.894233 463 -176354.03 4019.1222 0.0 10000.0 21692.90 0.0 37694.98 426300.0 200956.11 0.0 5400.0 422300.0 103.0 1 59.0 135.0 0.0 0.0 23.0 0.0 1.0 0.0 1.0 0.0 0.0 12.0 0.0 0.0 44.0 0.0 1.0 0.0 61.0 1.0 188.0 0.0 0.0 0.0 0.0 -24549.83 0.0 -2400.0 0.0 -400.0 0.0 0.0 -14807.00 0.0 0.0 24602.08 0.0 -611.0 0.00 -37733.07 -650.0 -119805.21 0.00 0.0 NaN 10000.0 -100.0 NaN 13404.98 35000.0 -3.0 NaN -500.0 -100.0 NaN 10000.0 -14342.9 NaN 11000.0 1000.0 -11700.00 NaN -4000.0 -150000.0 NaN 10000.000000 -1141.731579 NaN 12564.993333 17762.5 -537.315802 NaN -1800.0 -10828.205128 NaN 1.0 19.0 NaN 3.0 24.0 374.0 NaN 3.0 39.0 NaN 10000.0 -21692.90 NaN 37694.98 426300.0 -200956.11 NaN -5400.0 -422300.0 10 1389 463 NaN NaN 35000.0 NaN NaN NaN -150000.00 NaN NaN NaN -380.894233 NaN NaN NaN 463.0 NaN NaN NaN -176354.03 NaN 0.127430 0.291577
In [179]:
days_usage.describe()
Out[179]:
DateFormat target_flag days_from_end_period Num_trans_total (cl_id, ) (Month, max) (Month, min) (Month, mean) (Month, count) (Month, sum) (cash_in_out, max) (cash_in_out, min) (cash_in_out, mean) (cash_in_out, count) (cash_in_out, sum) cashback BACK_TRX C2C_IN C2C_OUT CASH_ADV CAT DEPOSIT POS WD_ATM_OTHER WD_ATM_PARTNER WD_ATM_ROS quntity_of_mcc multy_currency last_14_days last_30_days 0_x Авиалинии, авиакомпании_x Автомобили и транспортные средства_x Аренда автомобилей_x Бизнес услуги_x Государственные услуги_x Коммунальные и кабельные услуги_x Контрактные услуги_x Личные услуги_x Магазины одежды_x Оптовые поставщики и производители_x Отели и мотели_x Поставщик услуг_x Продажи по почте/телефону_x Профессиональные услуги_x Развлечения_x Различные магазины_x Ремонтные услуги_x Розничные магазины_x Транспорт_x Членские организации_x 0_y Авиалинии, авиакомпании_y Автомобили и транспортные средства_y Аренда автомобилей_y Бизнес услуги_y Государственные услуги_y Коммунальные и кабельные услуги_y Контрактные услуги_y Личные услуги_y Магазины одежды_y Оптовые поставщики и производители_y Отели и мотели_y Поставщик услуг_y Продажи по почте/телефону_y Профессиональные услуги_y Развлечения_y Различные магазины_y Ремонтные услуги_y Розничные магазины_y Транспорт_y Членские организации_y (cash_in_out, max, BACK_TRX) (cash_in_out, max, C2C_IN) (cash_in_out, max, C2C_OUT) (cash_in_out, max, CASH_ADV) (cash_in_out, max, CAT) (cash_in_out, max, DEPOSIT) (cash_in_out, max, POS) (cash_in_out, max, WD_ATM_OTHER) (cash_in_out, max, WD_ATM_PARTNER) (cash_in_out, max, WD_ATM_ROS) (cash_in_out, min, BACK_TRX) (cash_in_out, min, C2C_IN) (cash_in_out, min, C2C_OUT) (cash_in_out, min, CASH_ADV) (cash_in_out, min, CAT) (cash_in_out, min, DEPOSIT) (cash_in_out, min, POS) (cash_in_out, min, WD_ATM_OTHER) (cash_in_out, min, WD_ATM_PARTNER) (cash_in_out, min, WD_ATM_ROS) (cash_in_out, mean, BACK_TRX) (cash_in_out, mean, C2C_IN) (cash_in_out, mean, C2C_OUT) (cash_in_out, mean, CASH_ADV) (cash_in_out, mean, CAT) (cash_in_out, mean, DEPOSIT) (cash_in_out, mean, POS) (cash_in_out, mean, WD_ATM_OTHER) (cash_in_out, mean, WD_ATM_PARTNER) (cash_in_out, mean, WD_ATM_ROS) (cash_in_out, count, BACK_TRX) (cash_in_out, count, C2C_IN) (cash_in_out, count, C2C_OUT) (cash_in_out, count, CASH_ADV) (cash_in_out, count, CAT) (cash_in_out, count, DEPOSIT) (cash_in_out, count, POS) (cash_in_out, count, WD_ATM_OTHER) (cash_in_out, count, WD_ATM_PARTNER) (cash_in_out, count, WD_ATM_ROS) (cash_in_out, sum, BACK_TRX) (cash_in_out, sum, C2C_IN) (cash_in_out, sum, C2C_OUT) (cash_in_out, sum, CASH_ADV) (cash_in_out, sum, CAT) (cash_in_out, sum, DEPOSIT) (cash_in_out, sum, POS) (cash_in_out, sum, WD_ATM_OTHER) (cash_in_out, sum, WD_ATM_PARTNER) (cash_in_out, sum, WD_ATM_ROS) cl_id quater_of_year last_action (cash_in_out, max, $) (cash_in_out, max, Euro) (cash_in_out, max, Rub) (cash_in_out, max, other) (cash_in_out, min, $) (cash_in_out, min, Euro) (cash_in_out, min, Rub) (cash_in_out, min, other) (cash_in_out, mean, $) (cash_in_out, mean, Euro) (cash_in_out, mean, Rub) (cash_in_out, mean, other) (cash_in_out, count, $) (cash_in_out, count, Euro) (cash_in_out, count, Rub) (cash_in_out, count, other) (cash_in_out, sum, $) (cash_in_out, sum, Euro) (cash_in_out, sum, Rub) (cash_in_out, sum, other) all_to_last14 all_to_last30
count 5000.000000 5000.000000 5000.000000 5000.000000 5000.000000 5000.000000 5000.000000 5000.000000 5000.000000 5000.000000 5.000000e+03 5.000000e+03 5000.000000 5000.000000 5.000000e+03 5000.000000 5.000000e+03 5.000000e+03 5.000000e+03 5000.000000 5.000000e+03 5.000000e+03 5.000000e+03 5.000000e+03 5.000000e+03 5.000000e+03 5000.000000 5000.000000 5000.000000 5000.000000 5000.000000 5000.000000 5000.000000 5000.000000 5000.000000 5000.000000 5000.000000 5000.000000 5000.000000 5000.000000 5000.000000 5000.00000 5000.000000 5000.00000 5000.000000 5000.000000 5000.000000 5000.000000 5000.000000 5000.000000 5000.000000 5000.000000 5.000000e+03 5.000000e+03 5000.000000 5000.000000 5000.000000 5.000000e+03 5000.000000 5000.000000 5.000000e+03 5000.000000 5.000000e+03 5.000000e+03 5000.000000 5.000000e+03 5.000000e+03 5.000000e+03 5000.000000 5.000000e+03 5.000000e+03 5000.000000 1.439000e+03 1621.000000 1237.000000 22.000000 399.000000 2864.000000 5000.000000 1.239000e+03 1769.000000 2771.000000 1.439000e+03 1621.000000 1237.000000 22.000000 399.000000 2864.000000 5.000000e+03 1.239000e+03 1769.000000 2771.000000 1.439000e+03 1621.000000 1237.000000 22.000000 399.000000 2864.000000 5.000000e+03 1.239000e+03 1769.000000 2771.000000 1439.000000 1621.000000 1237.000000 22.000000 399.000000 2864.000000 5000.000000 1239.000000 1769.000000 2771.000000 1.439000e+03 1.621000e+03 1.237000e+03 22.000000 3.990000e+02 2.864000e+03 5.000000e+03 1.239000e+03 1.769000e+03 2.771000e+03 5000.000000 5000.000000 5000.000000 334.000000 612.000000 4961.00000 5.690000e+02 334.000000 612.00000 4.961000e+03 5.690000e+02 334.000000 612.000000 4961.000000 5.690000e+02 334.000000 612.000000 4961.000000 569.000000 334.000000 612.000000 4.961000e+03 5.690000e+02 5000.000000 5000.000000
mean 80.037800 0.660400 295.871200 98.102600 5112.334400 8.816800 3.717800 6.211409 98.102600 605.729600 4.021630e+04 -8.753987e+04 -6215.328816 98.102600 -2.792761e+05 4052.247349 4.923835e+03 1.997943e+04 1.761498e+04 182.564988 4.520572e+03 9.654918e+04 2.026124e+05 1.495612e+04 6.654355e+04 1.336169e+05 31.878400 1.334200 17.131000 34.506000 0.000400 0.148800 4.162000 0.042600 0.802400 0.199800 1.315400 0.028800 0.612800 3.490000 0.179000 0.50820 10.186600 0.36940 0.862200 1.048000 25.021800 0.260200 34.066000 2.701200 0.022200 -0.164826 -3.786203e+03 -1.033511e+04 -180.652472 -2803.052196 -380.285280 -5.630722e+03 -261.365434 -2523.785862 -1.503417e+04 -806.801796 -1.965257e+04 -8.235479e+04 -461.339974 -7.045854e+03 -3.778230e+03 -5.704139e+04 -917.229866 -5.067815e+04 -1.554706e+04 -57.162534 1.061934e+04 24662.784170 -7556.655538 36293.383636 28593.694461 51380.781634 -1444.196930 1.792671e+04 -12012.888638 -12113.803681 7.307594e+03 7935.411271 -27020.835788 21196.679091 7635.384486 11307.069001 -5.640535e+04 4.137257e+03 -54473.968344 -56860.050523 8.780447e+03 14539.984896 -15430.521293 27690.751818 16458.589651 26885.516872 -5.370167e+03 9.223257e+03 -28898.022260 -29753.203625 1.867269 4.507094 4.410671 1.545455 3.000000 7.407821 83.285000 5.762712 5.623516 6.894262 1.710853e+04 6.162687e+04 -7.120039e+04 41492.042727 5.664877e+04 1.685565e+05 -2.026124e+05 6.035563e+04 -1.880824e+05 -2.410987e+05 5112.334400 234.044800 98.102600 -88.337904 -72.229314 39004.85666 -5.620406e+04 -320.234401 -377.35232 -6.566259e+04 -2.003456e+05 -167.975166 -166.315752 -6020.031017 -8.354668e+04 5.751497 9.045752 96.286233 9.455185 -743.035359 -754.708954 -2.481780e+05 -2.890325e+05 0.235787 0.407306
std 31.448176 0.473621 106.766772 78.107609 2977.079513 2.910487 2.692956 2.419363 78.107609 548.263005 7.541434e+04 7.006506e+05 21973.842289 78.107609 1.066506e+06 20838.426667 4.415733e+04 7.400456e+04 7.190746e+04 5847.074785 5.988268e+04 1.888095e+05 1.041921e+06 3.406932e+05 2.480496e+05 4.255087e+05 27.716357 0.755132 15.006881 28.777762 0.028284 0.638702 6.382061 1.280279 1.911671 0.819032 2.522458 0.269045 1.517016 5.410818 0.819203 1.64185 9.294614 1.83358 1.878381 2.073585 25.231176 0.825247 33.012016 7.663084 0.178083 11.654958 7.284167e+04 3.511627e+04 4249.653174 14798.782530 2886.851863 5.572677e+04 4573.610192 16659.010043 4.097060e+04 7058.531105 6.776273e+05 6.694330e+05 4750.689800 1.527050e+05 5.632749e+04 2.575192e+05 6550.986030 1.025710e+05 3.231858e+05 1544.977108 6.071413e+04 35316.189148 16527.111210 68895.176992 115245.297902 42588.076027 19509.590618 1.070726e+05 26571.071094 26078.738622 5.910445e+04 20868.342858 37126.057356 35285.824793 33897.465179 20638.972122 6.994826e+05 3.085396e+04 78073.397397 64831.694648 5.953510e+04 24045.050715 22022.681876 47577.921730 62759.470578 25033.110462 3.333152e+04 6.466467e+04 42335.035180 37893.874744 1.653514 8.670910 5.629426 1.682608 3.771088 7.458434 72.013636 6.949520 6.351384 9.383410 8.105451e+04 1.197153e+05 1.307457e+05 79641.729878 2.051341e+05 2.238397e+05 1.041921e+06 6.826066e+05 3.887149e+05 5.484802e+05 2977.079513 205.537546 78.107609 440.822216 352.888414 55357.05135 1.512514e+06 777.970969 633.95928 7.157630e+04 2.066845e+06 454.719705 357.055039 20334.718730 1.512281e+06 15.522180 13.016464 77.192242 13.053594 2546.761734 2114.429121 5.735781e+05 2.648037e+06 0.222301 0.235383
min 0.000000 0.000000 0.000000 1.000000 0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 -7.181700e+05 -3.583500e+07 -718170.000000 1.000000 -4.488611e+07 0.039600 0.000000e+00 0.000000e+00 0.000000e+00 0.000000 0.000000e+00 0.000000e+00 1.980000e+00 0.000000e+00 0.000000e+00 0.000000e+00 1.000000 1.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.00000 0.000000 0.00000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 -824.130000 -5.022440e+06 -1.468027e+06 -200176.000000 -296132.000000 -106734.940000 -3.440000e+06 -224000.000000 -887000.000000 -1.201293e+06 -260538.340000 -3.585077e+07 -8.796000e+06 -246003.000000 -1.071447e+07 -3.875200e+06 -1.188779e+07 -200000.000000 -3.633105e+06 -2.272900e+07 -100000.000000 9.300000e-01 0.500000 -300000.000000 25.950000 0.040000 10.000000 -1000000.000000 1.000000e+01 -400000.000000 -200000.000000 9.000000e-02 0.500000 -406668.940000 25.950000 0.040000 10.000000 -3.583500e+07 5.000000e+00 -450000.000000 -200000.000000 9.300000e-01 0.500000 -300000.000000 25.950000 0.040000 10.000000 -1.734655e+06 1.000000e+01 -400000.000000 -200000.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 9.300000e-01 5.000000e-01 -1.836251e+06 25.950000 4.000000e-02 1.000000e+01 -4.476561e+07 1.000000e+01 -3.921000e+06 -8.735000e+06 0.000000 1.000000 1.000000 -4000.000000 -3680.000000 -718170.00000 -3.583500e+07 -9000.000000 -5000.00000 -1.300000e+06 -3.583500e+07 -4000.000000 -3680.000000 -718170.000000 -3.583500e+07 1.000000 1.000000 1.000000 1.000000 -26093.610000 -24810.410000 -8.799597e+06 -4.468349e+07 0.004566 0.006849
25% 74.000000 0.000000 215.000000 34.000000 2537.500000 6.000000 1.000000 4.286898 34.000000 180.000000 4.038850e+03 -8.000000e+04 -3944.701826 34.000000 -3.053821e+05 1214.059550 0.000000e+00 0.000000e+00 0.000000e+00 0.000000 0.000000e+00 0.000000e+00 6.070298e+04 0.000000e+00 0.000000e+00 0.000000e+00 11.000000 1.000000 5.000000 11.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.00000 4.000000 0.00000 0.000000 0.000000 5.000000 0.000000 7.000000 0.000000 0.000000 0.000000 0.000000e+00 -9.927432e+03 0.000000 -400.000000 0.000000 -1.471675e+03 0.000000 -312.157500 -1.600400e+04 0.000000 0.000000e+00 -1.200750e+05 0.000000 -1.800000e+03 -1.130000e+03 -6.556540e+04 0.000000 -6.566566e+04 -5.906000e+03 0.000000 4.100000e+02 3386.700000 -6000.000000 877.160000 113.930000 21000.000000 -85.000000 3.310000e+03 -10000.000000 -10000.000000 1.283000e+02 365.000000 -40000.000000 707.720000 100.000000 1000.000000 -4.789700e+04 5.000000e+02 -70000.000000 -86000.000000 3.417600e+02 2340.000000 -19583.364444 877.160000 104.850000 10583.928571 -3.362742e+03 2.043833e+03 -33333.333333 -38000.000000 1.000000 1.000000 1.000000 1.000000 1.000000 2.000000 22.000000 1.000000 1.000000 2.000000 4.672800e+02 4.500000e+03 -7.692250e+04 877.160000 1.541200e+02 4.900000e+04 -2.220921e+05 4.850000e+03 -1.741000e+05 -2.393000e+05 2537.500000 74.000000 34.000000 -80.787500 -52.980000 4500.00000 -1.715100e+02 -293.000000 -408.00000 -7.990300e+04 -4.917000e+03 -149.902500 -154.359500 -4031.792569 -1.542008e+03 1.000000 1.000000 33.000000 2.000000 -485.247500 -793.660000 -3.013175e+05 -1.030000e+04 0.111111 0.267199
50% 88.000000 1.000000 293.000000 83.000000 5084.500000 9.000000 3.000000 6.093673 83.000000 457.000000 2.500000e+04 -4.350000e+04 -1117.961403 83.000000 -8.711050e+04 3025.829300 0.000000e+00 0.000000e+00 0.000000e+00 0.000000 0.000000e+00 1.700000e+04 1.512915e+05 0.000000e+00 0.000000e+00 3.100000e+03 25.000000 1.000000 13.000000 28.000000 0.000000 0.000000 2.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 2.000000 0.000000 0.00000 8.000000 0.00000 0.000000 0.000000 18.000000 0.000000 26.000000 0.000000 0.000000 0.000000 0.000000e+00 -1.999515e+03 0.000000 0.000000 0.000000 0.000000e+00 0.000000 0.000000 -3.600000e+03 0.000000 0.000000e+00 8.224495e+03 0.000000 0.000000e+00 0.000000e+00 -2.901261e+04 0.000000 -3.123081e+04 0.000000e+00 0.000000 1.665000e+03 10000.000000 -2500.000000 7125.000000 500.000000 40000.000000 -30.000000 5.000000e+03 -3000.000000 -3400.000000 6.720000e+02 2000.000000 -14000.000000 2913.700000 300.000000 5000.000000 -2.305733e+04 1.000000e+03 -25000.000000 -32000.000000 1.332286e+03 7194.945000 -7962.000000 4593.550000 500.000000 19471.666667 -1.701781e+03 3.800000e+03 -11750.000000 -15006.060606 1.000000 2.000000 2.000000 1.000000 2.000000 5.000000 68.000000 3.000000 3.000000 4.000000 1.950000e+03 1.650000e+04 -2.304017e+04 7125.000000 8.000000e+02 1.200000e+05 -1.512915e+05 1.370000e+04 -4.460000e+04 -6.250000e+04 5084.500000 184.000000 83.000000 -15.000000 -8.450000 25000.00000 -1.250000e+01 -109.720000 -156.02000 -4.348300e+04 -6.000000e+02 -59.102000 -56.094091 -1153.516436 -1.841500e+02 2.000000 4.000000 81.000000 5.000000 -138.500000 -345.245000 -8.758224e+04 -9.980000e+02 0.175624 0.357990
75% 91.000000 1.000000 394.000000 145.000000 7728.000000 12.000000 6.000000 7.920422 145.000000 880.250000 5.700000e+04 -2.032175e+04 -98.172956 145.000000 -5.658650e+03 4441.842550 1.100000e+02 3.582548e+03 0.000000e+00 0.000000 0.000000e+00 1.390000e+05 2.220921e+05 0.000000e+00 1.235000e+04 8.000000e+04 44.000000 1.000000 26.000000 51.000000 0.000000 0.000000 6.000000 0.000000 1.000000 0.000000 2.000000 0.000000 1.000000 5.000000 0.000000 0.00000 14.000000 0.00000 1.000000 1.000000 36.000000 0.000000 52.000000 3.000000 0.000000 0.000000 0.000000e+00 0.000000e+00 0.000000 0.000000 0.000000 0.000000e+00 0.000000 0.000000 0.000000e+00 0.000000 0.000000e+00 1.378275e+05 0.000000 0.000000e+00 0.000000e+00 -6.103983e+03 0.000000 -7.058542e+03 0.000000e+00 0.000000 5.180850e+03 34000.000000 -505.000000 40150.000000 3363.980000 70000.000000 -10.137500 1.000000e+04 -1000.000000 -1000.000000 2.499500e+03 7000.000000 -4952.000000 35720.000000 1000.000000 10000.000000 -9.196000e+03 3.450000e+03 -6000.000000 -7000.000000 4.157500e+03 17566.666667 -3158.993333 35720.000000 1500.000000 35000.000000 -9.701285e+02 5.583214e+03 -4325.000000 -5000.000000 2.000000 5.000000 5.000000 1.000000 3.000000 10.000000 126.000000 8.000000 7.000000 9.000000 6.511000e+03 7.209200e+04 -5.034320e+03 40150.000000 6.102525e+03 2.071000e+05 -6.070298e+04 3.640000e+04 -9.800000e+03 -1.050000e+04 7728.000000 341.000000 145.000000 -2.297500 -1.790000 57500.00000 5.000000e+01 -27.057500 -61.91500 -2.067100e+04 -1.147800e+02 -16.275833 -24.136981 -98.117624 -3.303200e+01 4.000000 12.000000 142.000000 12.000000 -30.170000 -96.137500 -5.900000e+03 -1.329200e+02 0.258065 0.470211
max 480.000000 1.000000 529.000000 784.000000 10215.000000 12.000000 12.000000 12.000000 784.000000 5139.000000 3.045000e+06 -1.010000e+00 97813.851333 784.000000 3.596701e+06 895312.220000 2.066000e+06 2.389583e+06 1.836251e+06 350000.000000 2.046500e+06 3.299500e+06 4.476561e+07 2.356900e+07 3.921000e+06 8.735000e+06 264.000000 8.000000 129.000000 282.000000 2.000000 14.000000 81.000000 89.000000 32.000000 18.000000 31.000000 8.000000 24.000000 144.000000 25.000000 34.00000 211.000000 39.00000 22.000000 34.000000 233.000000 12.000000 258.000000 182.000000 4.000000 0.000000 0.000000e+00 0.000000e+00 0.000000 805.260000 0.000000 9.380000e+05 0.000000 7741.370000 5.390000e+03 5174.690000 0.000000e+00 2.486500e+07 11945.450000 1.000000e+03 3.500000e+02 1.884720e+03 0.000000 6.914733e+04 7.420000e+03 0.000000 2.066000e+06 490000.000000 -0.130000 300000.000000 919000.000000 335000.000000 -0.070000 3.045000e+06 -50.000000 -50.000000 2.066000e+06 437009.380000 -1.940000 150000.000000 488339.000000 295000.000000 -1.010000e+00 1.035000e+06 -100.000000 -100.000000 2.066000e+06 437009.380000 -1.940000 175000.000000 694169.500000 295000.000000 -9.900000e-01 1.964083e+06 -100.000000 -100.000000 17.000000 220.000000 52.000000 8.000000 32.000000 75.000000 712.000000 101.000000 67.000000 194.000000 2.066000e+06 2.389583e+06 -1.940000e+00 350000.000000 2.046500e+06 3.299500e+06 -1.980000e+00 2.356900e+07 -1.000000e+02 -1.000000e+02 10215.000000 1866.000000 784.000000 2000.000000 3000.000000 919000.00000 3.045000e+06 2000.000000 200.00000 6.450600e+02 1.500000e+06 2000.000000 1152.417000 97813.851333 1.500000e+06 202.000000 162.000000 784.000000 114.000000 2097.770000 26265.480000 2.130147e+06 2.417164e+06 1.000000 1.000000
For sure there are a lot of NaN because of aggregations. So, if Nan, fill it by 0
In [180]:
days_usage=days_usage.fillna(0)
In [ ]:
days_usage.
In [194]:
days_usage.head()
Out[194]:
DateFormat target_flag days_from_end_period Num_trans_total (cl_id, ) (Month, max) (Month, min) (Month, mean) (Month, count) (Month, sum) (cash_in_out, max) (cash_in_out, min) (cash_in_out, mean) (cash_in_out, count) (cash_in_out, sum) cashback BACK_TRX C2C_IN C2C_OUT CASH_ADV CAT DEPOSIT POS WD_ATM_OTHER WD_ATM_PARTNER WD_ATM_ROS quntity_of_mcc multy_currency last_14_days last_30_days 0_x Авиалинии, авиакомпании_x Автомобили и транспортные средства_x Аренда автомобилей_x Бизнес услуги_x Государственные услуги_x Коммунальные и кабельные услуги_x Контрактные услуги_x Личные услуги_x Магазины одежды_x Оптовые поставщики и производители_x Отели и мотели_x Поставщик услуг_x Продажи по почте/телефону_x Профессиональные услуги_x Развлечения_x Различные магазины_x Ремонтные услуги_x Розничные магазины_x Транспорт_x Членские организации_x 0_y Авиалинии, авиакомпании_y Автомобили и транспортные средства_y Аренда автомобилей_y Бизнес услуги_y Государственные услуги_y Коммунальные и кабельные услуги_y Контрактные услуги_y Личные услуги_y Магазины одежды_y Оптовые поставщики и производители_y Отели и мотели_y Поставщик услуг_y Продажи по почте/телефону_y Профессиональные услуги_y Развлечения_y Различные магазины_y Ремонтные услуги_y Розничные магазины_y Транспорт_y Членские организации_y (cash_in_out, max, BACK_TRX) (cash_in_out, max, C2C_IN) (cash_in_out, max, C2C_OUT) (cash_in_out, max, CASH_ADV) (cash_in_out, max, CAT) (cash_in_out, max, DEPOSIT) (cash_in_out, max, POS) (cash_in_out, max, WD_ATM_OTHER) (cash_in_out, max, WD_ATM_PARTNER) (cash_in_out, max, WD_ATM_ROS) (cash_in_out, min, BACK_TRX) (cash_in_out, min, C2C_IN) (cash_in_out, min, C2C_OUT) (cash_in_out, min, CASH_ADV) (cash_in_out, min, CAT) (cash_in_out, min, DEPOSIT) (cash_in_out, min, POS) (cash_in_out, min, WD_ATM_OTHER) (cash_in_out, min, WD_ATM_PARTNER) (cash_in_out, min, WD_ATM_ROS) (cash_in_out, mean, BACK_TRX) (cash_in_out, mean, C2C_IN) (cash_in_out, mean, C2C_OUT) (cash_in_out, mean, CASH_ADV) (cash_in_out, mean, CAT) (cash_in_out, mean, DEPOSIT) (cash_in_out, mean, POS) (cash_in_out, mean, WD_ATM_OTHER) (cash_in_out, mean, WD_ATM_PARTNER) (cash_in_out, mean, WD_ATM_ROS) (cash_in_out, count, BACK_TRX) (cash_in_out, count, C2C_IN) (cash_in_out, count, C2C_OUT) (cash_in_out, count, CASH_ADV) (cash_in_out, count, CAT) (cash_in_out, count, DEPOSIT) (cash_in_out, count, POS) (cash_in_out, count, WD_ATM_OTHER) (cash_in_out, count, WD_ATM_PARTNER) (cash_in_out, count, WD_ATM_ROS) (cash_in_out, sum, BACK_TRX) (cash_in_out, sum, C2C_IN) (cash_in_out, sum, C2C_OUT) (cash_in_out, sum, CASH_ADV) (cash_in_out, sum, CAT) (cash_in_out, sum, DEPOSIT) (cash_in_out, sum, POS) (cash_in_out, sum, WD_ATM_OTHER) (cash_in_out, sum, WD_ATM_PARTNER) (cash_in_out, sum, WD_ATM_ROS) cl_id quater_of_year last_action (cash_in_out, max, $) (cash_in_out, max, Euro) (cash_in_out, max, Rub) (cash_in_out, max, other) (cash_in_out, min, $) (cash_in_out, min, Euro) (cash_in_out, min, Rub) (cash_in_out, min, other) (cash_in_out, mean, $) (cash_in_out, mean, Euro) (cash_in_out, mean, Rub) (cash_in_out, mean, other) (cash_in_out, count, $) (cash_in_out, count, Euro) (cash_in_out, count, Rub) (cash_in_out, count, other) (cash_in_out, sum, $) (cash_in_out, sum, Euro) (cash_in_out, sum, Rub) (cash_in_out, sum, other) all_to_last14 all_to_last30
cl_id
0 54 0 118 5 0 12 10 10.400000 5 52 20000.0 -36562.00 -4876.600000 5 -24383.00 156.4200 0.0 0.0 36562.00 0.0 0.00 20000.0 7821.00 0.0 0.0 0.0 1.0 1 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 0.0 0.0 0.0 1.0 0.0 2.0 0.0 0.0 0.0 0.0 0.00 0.0 0.0 0.0 0.0 0.0 0.0 0.00 0.0 0.0 -16562.00 0.0 0.0 0.00 -767.00 0.0 -7054.00 0.00 0.0 0.0 0.0 -36562.0 0.0 0.00 20000.0 -767.0 0.0 0.0 0.0 0.0 0.0 -36562.0 0.0 0.0 20000.0 -5023.00 0.0 0.0 0.0 0.0 0.000000 -36562.000000 0.0 0.000000 20000.0 -2607.000000 0.0 0.0 0.000000 0.0 0.0 1.0 0.0 0.0 1.0 3.0 0.0 0.0 0.0 0.0 0.0 -36562.00 0.0 0.00 20000.0 -7821.00 0.0 0.0 0.0 0 20 5 0.0 0.0 20000.0 0.0 0.0 0.00 -36562.00 0.00 0.0 0.000000 -4876.600000 0.0000 0.0 0.0 5.0 0.0 0.0 0.00 -24383.00 0.00 0.200000 0.200000
1 92 0 165 104 1 10 7 8.951923 104 931 50000.0 -60000.00 -745.942692 104 -77578.04 2451.5608 0.0 0.0 0.00 0.0 0.00 95000.0 122578.04 0.0 0.0 50000.0 41.0 2 15.0 61.0 0.0 0.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 5.0 0.0 0.0 4.0 0.0 0.0 6.0 57.0 0.0 14.0 2.0 0.0 0.0 0.0 0.00 0.0 -30.0 -210.0 0.0 0.0 0.0 -7631.98 0.0 0.0 44988.00 0.0 0.0 -8628.02 -90868.78 0.0 -15146.16 -51.10 0.0 0.0 0.0 0.0 0.0 0.00 50000.0 -3.0 0.0 0.0 -50000.0 0.0 0.0 0.0 0.0 0.0 45000.0 -60000.00 0.0 0.0 -50000.0 0.0 0.000000 0.000000 0.0 0.000000 47500.0 -1213.643960 0.0 0.0 -50000.000000 0.0 0.0 0.0 0.0 0.0 2.0 101.0 0.0 0.0 1.0 0.0 0.0 0.00 0.0 0.00 95000.0 -122578.04 0.0 0.0 -50000.0 1 336 104 0.0 -3.0 50000.0 0.0 0.0 -83.90 -60000.00 0.00 0.0 -26.026765 -1095.616143 0.0000 0.0 34.0 70.0 0.0 0.0 -884.91 -76693.13 0.00 0.144231 0.586538
5 92 0 288 142 5 6 3 4.661972 142 662 40000.0 -56164.05 -1742.176972 142 -247389.13 5810.0150 13990.0 0.0 33878.38 0.0 0.00 130000.0 290500.75 5500.0 6000.0 66500.0 31.0 3 30.0 44.0 0.0 0.0 0.0 0.0 0.0 1.0 3.0 0.0 2.0 18.0 0.0 2.0 14.0 0.0 1.0 5.0 23.0 0.0 42.0 10.0 0.0 0.0 0.0 0.00 0.0 0.0 -3825.0 -4800.0 0.0 -1600.0 -83276.00 0.0 -2940.0 29121.62 0.0 -2204.0 -15919.01 -37229.20 0.0 -53534.10 -71183.44 0.0 13990.0 0.0 -1000.0 0.0 0.00 40000.0 -8.2 5000.0 -1000.0 -1000.0 13990.0 0.0 -10000.0 0.0 0.0 30000.0 -56164.05 500.0 -5000.0 -15000.0 13990.0 0.000000 -4234.797500 0.0 0.000000 32500.0 -2617.123874 2750.0 -3000.0 -4750.000000 1.0 0.0 8.0 0.0 0.0 4.0 111.0 2.0 2.0 14.0 13990.0 0.0 -33878.38 0.0 0.00 130000.0 -290500.75 5500.0 -6000.0 -66500.0 5 267 142 0.0 -8.2 40000.0 -107.0 0.0 -43.63 -56164.05 -605.17 0.0 -25.915000 -1808.048750 -360.6675 0.0 2.0 136.0 4.0 0.0 -51.83 -245894.63 -1442.67 0.211268 0.309859
9 89 0 245 39 9 7 5 5.974359 39 233 117450.0 -110000.00 -6292.694615 39 -245415.09 247.3018 0.0 296950.0 0.00 0.0 0.00 5000.0 12365.09 0.0 100000.0 435000.0 27.0 1 7.0 16.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 9.0 0.0 0.0 0.0 6.0 0.0 1.0 0.0 0.0 0.0 0.0 0.00 0.0 0.0 0.0 0.0 0.0 0.0 0.00 0.0 0.0 -233050.00 0.0 0.0 0.00 -12326.09 0.0 -39.00 0.00 0.0 0.0 117450.0 0.0 0.0 0.00 5000.0 -39.0 0.0 -100000.0 -75000.0 0.0 87000.0 0.0 0.0 0.0 5000.0 -5000.00 0.0 -100000.0 -110000.0 0.0 98983.333333 0.000000 0.0 0.000000 5000.0 -426.382414 0.0 -100000.0 -87000.000000 0.0 3.0 0.0 0.0 0.0 1.0 29.0 0.0 1.0 5.0 0.0 296950.0 0.00 0.0 0.00 5000.0 -12365.09 0.0 -100000.0 -435000.0 9 94 39 0.0 0.0 117450.0 0.0 0.0 0.00 -110000.00 0.00 0.0 0.000000 -6292.694615 0.0000 0.0 0.0 39.0 0.0 0.0 0.00 -245415.09 0.00 0.179487 0.410256
10 89 0 186 463 10 9 7 7.976242 463 3693 35000.0 -150000.00 -380.894233 463 -176354.03 4019.1222 0.0 10000.0 21692.90 0.0 37694.98 426300.0 200956.11 0.0 5400.0 422300.0 103.0 1 59.0 135.0 0.0 0.0 23.0 0.0 1.0 0.0 1.0 0.0 0.0 12.0 0.0 0.0 44.0 0.0 1.0 0.0 61.0 1.0 188.0 0.0 0.0 0.0 0.0 -24549.83 0.0 -2400.0 0.0 -400.0 0.0 0.0 -14807.00 0.0 0.0 24602.08 0.0 -611.0 0.00 -37733.07 -650.0 -119805.21 0.00 0.0 0.0 10000.0 -100.0 0.0 13404.98 35000.0 -3.0 0.0 -500.0 -100.0 0.0 10000.0 -14342.9 0.0 11000.0 1000.0 -11700.00 0.0 -4000.0 -150000.0 0.0 10000.000000 -1141.731579 0.0 12564.993333 17762.5 -537.315802 0.0 -1800.0 -10828.205128 0.0 1.0 19.0 0.0 3.0 24.0 374.0 0.0 3.0 39.0 0.0 10000.0 -21692.90 0.0 37694.98 426300.0 -200956.11 0.0 -5400.0 -422300.0 10 1389 463 0.0 0.0 35000.0 0.0 0.0 0.00 -150000.00 0.00 0.0 0.000000 -380.894233 0.0000 0.0 0.0 463.0 0.0 0.0 0.00 -176354.03 0.00 0.127430 0.291577
In [196]:
days_usage.to_csv('days_us.csv')
In [ ]:
Plotting visual info. 
In [183]:
sns.heatmap(days_usage.corr())
Out[183]:
<matplotlib.axes._subplots.AxesSubplot at 0x10d20d908>