Notebook

In [1]:

import matplotlib.pyplot as plt
%matplotlib  inline
import pandas as pd
import numpy as np
import datetime

Import the data¶

In [2]:

start_day=datetime.datetime(2013,11,1)
end_day=datetime.datetime(2013,11,30)
store = pd.HDFStore('../stores/sms-call-internet-mi-table-blosc.h5')
intensity_data = store.select('telco_data', "index >= Timestamp('%s') & index < Timestamp('%s')" % (start_day, end_day)).fillna(0)
store.close()

Aggregate and symmetrize the in and out activity of cells¶

In [3]:

df_aggregated = intensity_data.groupby(['Square_id', 'Country_code'])\
  [['SMS_in', 'SMS_out', 'Call_in', 'Call_out']].sum()
df_aggregated['SMS'] = df_aggregated.SMS_in.values + df_aggregated.SMS_out.values
df_aggregated['Call'] = df_aggregated.Call_in.values + df_aggregated.Call_out.values

Entropies¶

In [4]:

def entropy(L):
    e = 0;
    t=sum(L);
    if t!=0 and len(L)>0:
        for l in L:
            
            if l>0:
                e += - (l/float(t))  * np.log(l/float(t));
          
        return e;
    else:
        return 0;

df2 = df_aggregated.SMS.reset_index()
df2bis = df2[(df2.Country_code!=39) & (df2.Country_code!=0)];
entropy_dict={}
cells = sorted(list(set(df2['Square_id'])))
for cell in cells:
    entropy_dict[cell]=entropy(df2bis[df2bis.Square_id==cell]['SMS'].values);

In [5]:

df_call = df_aggregated.Call.reset_index()
df_call_filter = df_call[(df_call.Country_code!=39) & (df_call.Country_code!=0)];

In [6]:

call_renorm_entropy_ds = df_call_filter.groupby(df_call_filter.Square_id)['Call'].apply(entropy)

Low entropy states $S < \mu - \sigma$¶

In [7]:

import pickle as pk
threshold_factor=0.1;
call_thr=3
country_call_cells={}
M = call_renorm_entropy_ds.max()
for i, country in enumerate(list(set(df_call_filter.Country_code))):
    toy_series=pd.Series(data=df_call_filter[df_call_filter.Country_code==country]['Call'].values, index=df_call_filter[df_call_filter.Country_code==country]['Square_id'].values).reindex(np.array(range(10000)))
    toy_series= toy_series.reindex(range(0,10000))
    max_calls = toy_series.max(skipna=True);
    act_mu = toy_series.mean(skipna=True)
    std_mu = toy_series.std(skipna=True)
    second_toy=toy_series.fillna(0).apply(lambda x: 0 if x<act_mu+std_mu else x) * call_renorm_entropy_ds.apply(lambda x: 0 if x>call_thr else M-x).values;
    deh=second_toy.apply(lambda x: 0 if x<=threshold_factor*second_toy.max(skipna=True) else 1).reshape(100,100);
    pk.dump(deh,open('../stores/phom/country_high_act_low_entropy_matrices/'+str(country)+'.pck','w'))