#!/usr/bin/env python # coding: utf-8 # In[1]: import os, sys import networkx as nx import numpy as np import pickle import pandas as pd sys.path.append('../'); import Holes as ho import matplotlib.pyplot as plt # In[2]: get_ipython().run_line_magic('matplotlib', 'inline') # ### Maximal homology dimension $k$ to calculate # In[3]: max_dimensions=1; # ### Load generators dictionaries # In[4]: import scipy.io data_dir='../stores/phom/country_high_act_low_entropy_matrices/' ld = os.listdir(data_dir); threshold_factor=0.1; thr=2.7 call_thresholds=[thr] nsigma_round = [-3.4, -3.0, -2.7, -2.3, -1.9, -1.6, -1.2, -0.9, -0.5, -0.1] countries_to_cells={} weighted_gen_dict={} for country in ld: if country == '.gitignore': continue country = country.split('.')[0] weighted_gen_dict[country]={} for call_thr in call_thresholds: try: filename=open('../stores/phom/output/gen/weighted_generators_'+country+'_.pck'); weighted_gen_dict[country][call_thr]=pickle.load(filename); filename.close() except: print 'didnt work with', country, call_thr # ### Construct spatial coherency and cyclity from generators # In[5]: wmax_H_1_persistent = {} for c in weighted_gen_dict: wmax_H_1_persistent[c] = {} for call_thr in weighted_gen_dict[c]: if len(weighted_gen_dict[c][call_thr][1])>0: wmax_H_1_persistent[c][call_thr] = np.max(map(lambda x: x.persistence_interval(), weighted_gen_dict[c][call_thr][1])) else: wmax_H_1_persistent[c][call_thr]=0; # In[6]: wH_0_persistent = {} wH_0_persistent_mod = {} for c in weighted_gen_dict: wH_0_persistent[c] = {} wH_0_persistent_mod[c] = {} for call_thr in weighted_gen_dict[c]: if len(weighted_gen_dict[c][call_thr][0])>1: wH_0_persistent[c][call_thr] = (sorted(map(lambda x: x.persistence_interval(), weighted_gen_dict[c][call_thr][0]))) wH_0_persistent_mod[c][call_thr] = (wH_0_persistent[c][call_thr][-1]-wH_0_persistent[c][call_thr][-2])/float(wH_0_persistent[c][call_thr][-1]) else: wH_0_persistent[c][call_thr]=[1]; wH_0_persistent_mod[c][call_thr]=[1]; # In[7]: coords_matrix={} country_order = {} for i,call_thr in enumerate(sorted(call_thresholds)): coords_matrix[call_thr]=[] country_order [call_thr] = [] for c in wH_0_persistent_mod.keys(): if call_thr in wH_0_persistent_mod[c]: coords_matrix[call_thr].append([ wH_0_persistent_mod[c][call_thr], wmax_H_1_persistent[c][call_thr]/float(max(wH_0_persistent[c][call_thr]))]); country_order[call_thr].append(c); # In[8]: X={} for thr in sorted(coords_matrix.keys()): X[thr]=np.zeros((int(len(coords_matrix[thr])),2)); print X[thr].shape for i, pair in enumerate(coords_matrix[thr]): if plt.is_numlike(pair[0]): pair = [pair[0], pair[1]] X[thr][i]=pair[0]; X[thr][i][1]=pair[1]; else: pair = [pair[0][0], pair[1]] X[thr][i][0]=pair[0]; X[thr][i][1]=pair[1]; # ### K-means # In[9]: import time as time import numpy as np import pylab as pl import mpl_toolkits.mplot3d.axes3d as p3 from sklearn import cluster, datasets k_means = cluster.KMeans(n_clusters=2, n_init=1) k_means.fit(X[2.7]) values = k_means.cluster_centers_.squeeze() labels = k_means.labels_ fig = plt.figure() ax = fig.add_subplot(111) for l in np.unique(labels): ax.plot(X[2.7][labels == l, 0], X[2.7][labels == l, 1], 'o', color=pl.cm.jet(np.float(l) / np.max(labels + 1))) plt.title('Without connectivity constraints') # ### Import population data # In[10]: import pandas as pd remitt=pd.read_csv('../data/phom/population_data.csv'); # In[11]: short_country_order = {} for thr in country_order: short_country_order[thr]=[] for c in country_order[thr]: try: short_country_order[thr].append(remitt[remitt['prefix']==int(c)]['country_cca'].values[0]) except: pass # In[12]: X_filter = pd.Series(short_country_order[thr]).str.match('[A-Z]+').values # In[13]: country_cluster_assignment={} k_means = cluster.KMeans(n_clusters=2, n_init=1) k_means.fit(X[thr][X_filter]); labels = k_means.labels_; country_cluster_assignment[thr] = zip(short_country_order[thr], labels) # In[14]: fig = plt.figure(figsize=(21,7)) ax1 = plt.subplot2grid((1,9), (0,0), colspan=4) thr=2.7 from sklearn import cluster, datasets k_means = cluster.KMeans(n_clusters=2, n_init=1) k_means.fit(X[thr][X_filter]) values = k_means.cluster_centers_.squeeze() labels = k_means.labels_ col = ['r','k'] for l in np.unique(labels): # ax1.plot(X[thr][labels == l, 0], X[thr][labels == l, 1], # 'o', color=pl.cm.jet(np.float(l) / np.max(labels + 1)),markersize=10) ax1.plot(X[thr][X_filter][labels == l, 0], X[thr][X_filter][labels == l, 1], '.', color=col[l],markersize=13) for i, c in enumerate(X[thr][X_filter]): plt.text(c[0], c[1], short_country_order[thr][i], fontsize=14) xtickNames = plt.setp(ax1, xticklabels=np.linspace(0,1,6)) plt.setp(xtickNames, fontsize=23) ytickNames = plt.setp(ax1, yticklabels=np.linspace(-0.05,.2,6)) plt.setp(ytickNames, fontsize=23) ax1.set_ylabel(r'$r_1$',fontsize=28) ax1.set_xlabel(r'$r_0$',fontsize=28) plt.text(0.02,0.186,'(a)',fontsize=23) plt.ylim(-0.02,0.2) plt.xlim(0,1.05) #plt.tight_layout() ax2 = plt.subplot2grid((1,9), (0,4), colspan=2) group1=[] group2=[] for c_tuple in country_cluster_assignment[thr]: if c_tuple[1]==0: group1.append(c_tuple[0]); else: group2.append(c_tuple[0]); GDP_group1=[] GDP_group2=[] for c in group1: try: GDP_group1.append(remitt['GDPpercapita'][remitt['country_cca']==c].values[0]) except: print 'Country not in dataset :', c for c in group2: try: GDP_group2.append(remitt['GDPpercapita'][remitt['country_cca']==c].values[0]) except: print 'Country not in dataset :', c data = [(GDP_group1), (GDP_group2)] numDists=2 #plt.subplots_adjust(left=0.095, right=0.95, top=0.9, bottom=0.25) bp = plt.boxplot(data, notch=0, sym='+', vert=1, whis=1.5) plt.setp(bp['boxes'], color='black') plt.setp(bp['whiskers'], color='black') plt.setp(bp['fliers'], color='red', marker='+') # Add a horizontal grid to the plot, but make it very light in color # so we can use it for reading data values but not be distracting ax2.yaxis.grid(True, linestyle='-', which='major', color='lightgrey', alpha=0.5) # Hide these grid behind plot objects ax2.set_axisbelow(True) #ax1.set_title('Comparison of IID Bootstrap Resampling Across Five Distributions') ax2.set_ylabel(r'Avg GDP per capita ($10^3$USD)',fontsize=23) # Now fill the boxes with desired colors boxColors = ['red','black'] numBoxes = numDists medians = range(numBoxes) for i in range(numBoxes): box = bp['boxes'][i] boxX = [] boxY = [] for j in range(5): boxX.append(box.get_xdata()[j]) boxY.append(box.get_ydata()[j]) boxCoords = zip(boxX,boxY) # Alternate between Dark Khaki and Royal Blue k = i % 2 boxPolygon = plt.Polygon(boxCoords, facecolor=boxColors[k], alpha=0.7) ax2.add_patch(boxPolygon) # Now draw the median lines back over what we just filled in med = bp['medians'][i] medianX = [] medianY = [] for j in range(2): medianX.append(med.get_xdata()[j]) medianY.append(med.get_ydata()[j]) plt.plot(medianX, medianY, 'k') medians[i] = medianY[0] # Finally, overplot the sample averages, with horizontal alignment # in the center of each box plt.plot([np.average(med.get_xdata())], [np.average(data[i])], color='w', marker='*', markeredgecolor='k') # Set the axes ranges and axes labels ax2.set_xlim(0.5, numBoxes+0.5) xtickNames = plt.setp(ax2, xticklabels=['Cluster 1','Cluster 2']) plt.setp(xtickNames, fontsize=23) ytickNames = plt.setp(ax2, yticklabels=range(0,100,10)) plt.setp(ytickNames, fontsize=23) plt.text(0.6,75000,'(b)',fontsize=23) ax3 = plt.subplot2grid((1,9), (0,6), colspan=2) GDP_group1=[] GDP_group2=[] for c in group1: try: GDP_group1.append(remitt['AvgMIRemittance'][remitt['country_cca']==c].values[0]) except: print 'Country not in dataset :', c for c in group2: try: GDP_group2.append(remitt['AvgMIRemittance'][remitt['country_cca']==c].values[0]) except: print 'Country not in dataset :', c data = [GDP_group1, (GDP_group2)] numDists=2 #plt.subplots_adjust(left=0.095, right=0.95, top=0.9, bottom=0.25) bp = plt.boxplot(data, notch=0, sym='+', vert=1, whis=1.5) plt.setp(bp['boxes'], color='black') plt.setp(bp['whiskers'], color='black') plt.setp(bp['fliers'], color='red', marker='+') # Add a horizontal grid to the plot, but make it very light in color # so we can use it for reading data values but not be distracting ax3.yaxis.grid(True, linestyle='-', which='major', color='lightgrey', alpha=0.5) # Hide these grid behind plot objects ax3.set_axisbelow(True) #ax1.set_title('Comparison of IID Bootstrap Resampling Across Five Distributions') ax3.set_ylabel('Avg remittance fraction per capita (%)',fontsize=23) # Now fill the boxes with desired colors boxColors = ['red','black'] numBoxes = numDists medians = range(numBoxes) for i in range(numBoxes): box = bp['boxes'][i] boxX = [] boxY = [] for j in range(5): boxX.append(box.get_xdata()[j]) boxY.append(box.get_ydata()[j]) boxCoords = zip(boxX,boxY) # Alternate between Dark Khaki and Royal Blue k = i % 2 boxPolygon = plt.Polygon(boxCoords, facecolor=boxColors[k], alpha=0.7) ax3.add_patch(boxPolygon) # Now draw the median lines back over what we just filled in med = bp['medians'][i] medianX = [] medianY = [] for j in range(2): medianX.append(med.get_xdata()[j]) medianY.append(med.get_ydata()[j]) ax3.plot(medianX, medianY, 'k') medians[i] = medianY[0] # Finally, overplot the sample averages, with horizontal alignment # in the center of each box plt.plot([np.average(med.get_xdata())], [np.average(data[i])], color='w', marker='*', markeredgecolor='k') # Set the axes ranges and axes labels ax3.set_xlim(0.5, numBoxes+0.5) top = 100 bottom = -5 ax3.set_ylim(0,50) xtickNames = plt.setp(ax3, xticklabels=['Cluster 1','Cluster 2']) plt.setp(xtickNames, fontsize=23) ytickNames = plt.setp(ax3, yticklabels=range(0,60,10)) plt.setp(ytickNames, fontsize=23) plt.text(0.6,47,'(c)',fontsize=23) plt.tight_layout() plt.show()