#!/usr/bin/env python # coding: utf-8 # In[1]: import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', ' inline') import pandas as pd import numpy as np import datetime # # Import the data # In[2]: start_day=datetime.datetime(2013,11,1) end_day=datetime.datetime(2013,11,30) store = pd.HDFStore('../stores/sms-call-internet-mi-table-blosc.h5') intensity_data = store.select('telco_data', "index >= Timestamp('%s') & index < Timestamp('%s')" % (start_day, end_day)).fillna(0) store.close() # ### Aggregate and symmetrize the in and out activity of cells # In[3]: df_aggregated = intensity_data.groupby(['Square_id', 'Country_code'])\ [['SMS_in', 'SMS_out', 'Call_in', 'Call_out']].sum() df_aggregated['SMS'] = df_aggregated.SMS_in.values + df_aggregated.SMS_out.values df_aggregated['Call'] = df_aggregated.Call_in.values + df_aggregated.Call_out.values # ## Entropies # In[4]: def entropy(L): e = 0; t=sum(L); if t!=0 and len(L)>0: for l in L: if l>0: e += - (l/float(t)) * np.log(l/float(t)); return e; else: return 0; df2 = df_aggregated.SMS.reset_index() df2bis = df2[(df2.Country_code!=39) & (df2.Country_code!=0)]; entropy_dict={} cells = sorted(list(set(df2['Square_id']))) for cell in cells: entropy_dict[cell]=entropy(df2bis[df2bis.Square_id==cell]['SMS'].values); # In[5]: df_call = df_aggregated.Call.reset_index() df_call_filter = df_call[(df_call.Country_code!=39) & (df_call.Country_code!=0)]; # In[6]: call_renorm_entropy_ds = df_call_filter.groupby(df_call_filter.Square_id)['Call'].apply(entropy) # ## Low entropy states $S < \mu - \sigma$ # In[7]: import pickle as pk threshold_factor=0.1; call_thr=3 country_call_cells={} M = call_renorm_entropy_ds.max() for i, country in enumerate(list(set(df_call_filter.Country_code))): toy_series=pd.Series(data=df_call_filter[df_call_filter.Country_code==country]['Call'].values, index=df_call_filter[df_call_filter.Country_code==country]['Square_id'].values).reindex(np.array(range(10000))) toy_series= toy_series.reindex(range(0,10000)) max_calls = toy_series.max(skipna=True); act_mu = toy_series.mean(skipna=True) std_mu = toy_series.std(skipna=True) second_toy=toy_series.fillna(0).apply(lambda x: 0 if xcall_thr else M-x).values; deh=second_toy.apply(lambda x: 0 if x<=threshold_factor*second_toy.max(skipna=True) else 1).reshape(100,100); pk.dump(deh,open('../stores/phom/country_high_act_low_entropy_matrices/'+str(country)+'.pck','w'))