import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd
import numpy as np
import datetime
start_day=datetime.datetime(2013,11,1)
end_day=datetime.datetime(2013,11,30)
store = pd.HDFStore('../stores/sms-call-internet-mi-table-blosc.h5')
intensity_data = store.select('telco_data', "index >= Timestamp('%s') & index < Timestamp('%s')" % (start_day, end_day)).fillna(0)
store.close()
df_aggregated = intensity_data.groupby(['Square_id', 'Country_code'])\
[['SMS_in', 'SMS_out', 'Call_in', 'Call_out']].sum()
df_aggregated['SMS'] = df_aggregated.SMS_in.values + df_aggregated.SMS_out.values
df_aggregated['Call'] = df_aggregated.Call_in.values + df_aggregated.Call_out.values
def entropy(L):
e = 0;
t=sum(L);
if t!=0 and len(L)>0:
for l in L:
if l>0:
e += - (l/float(t)) * np.log(l/float(t));
return e;
else:
return 0;
df2 = df_aggregated.SMS.reset_index()
df2bis = df2[(df2.Country_code!=39) & (df2.Country_code!=0)];
entropy_dict={}
cells = sorted(list(set(df2['Square_id'])))
for cell in cells:
entropy_dict[cell]=entropy(df2bis[df2bis.Square_id==cell]['SMS'].values);
df_call = df_aggregated.Call.reset_index()
df_call_filter = df_call[(df_call.Country_code!=39) & (df_call.Country_code!=0)];
call_renorm_entropy_ds = df_call_filter.groupby(df_call_filter.Square_id)['Call'].apply(entropy)
import pickle as pk
threshold_factor=0.1;
call_thr=3
country_call_cells={}
M = call_renorm_entropy_ds.max()
for i, country in enumerate(list(set(df_call_filter.Country_code))):
toy_series=pd.Series(data=df_call_filter[df_call_filter.Country_code==country]['Call'].values, index=df_call_filter[df_call_filter.Country_code==country]['Square_id'].values).reindex(np.array(range(10000)))
toy_series= toy_series.reindex(range(0,10000))
max_calls = toy_series.max(skipna=True);
act_mu = toy_series.mean(skipna=True)
std_mu = toy_series.std(skipna=True)
second_toy=toy_series.fillna(0).apply(lambda x: 0 if x<act_mu+std_mu else x) * call_renorm_entropy_ds.apply(lambda x: 0 if x>call_thr else M-x).values;
deh=second_toy.apply(lambda x: 0 if x<=threshold_factor*second_toy.max(skipna=True) else 1).reshape(100,100);
pk.dump(deh,open('../stores/phom/country_high_act_low_entropy_matrices/'+str(country)+'.pck','w'))