#!/usr/bin/env python
# coding: utf-8

# In[1]:


import os, sys
import networkx as nx
import numpy as np
import pickle
import pandas as pd
sys.path.append('../');
import Holes as ho
import matplotlib.pyplot as plt


# In[2]:


get_ipython().run_line_magic('matplotlib', 'inline')


# ### Maximal homology dimension $k$ to calculate

# In[3]:


max_dimensions=1;


# ### Load generators dictionaries

# In[4]:


import scipy.io
data_dir='../stores/phom/country_high_act_low_entropy_matrices/'
ld = os.listdir(data_dir);
threshold_factor=0.1;
thr=2.7
call_thresholds=[thr] 
nsigma_round = [-3.4, -3.0, -2.7, -2.3, -1.9, -1.6, -1.2, -0.9, -0.5, -0.1]

countries_to_cells={}
    
weighted_gen_dict={}
for country in ld:
    if country == '.gitignore':
            continue
    country = country.split('.')[0]
    weighted_gen_dict[country]={}
    for call_thr in call_thresholds:
        try:
            filename=open('../stores/phom/output/gen/weighted_generators_'+country+'_.pck');
            weighted_gen_dict[country][call_thr]=pickle.load(filename);
            filename.close()
        except:
            print 'didnt work with', country, call_thr


# ### Construct spatial coherency and cyclity from generators

# In[5]:


wmax_H_1_persistent = {}

for c in weighted_gen_dict:
    wmax_H_1_persistent[c] = {}
    for call_thr in weighted_gen_dict[c]:
        if len(weighted_gen_dict[c][call_thr][1])>0:
            wmax_H_1_persistent[c][call_thr] = np.max(map(lambda x:  x.persistence_interval(), weighted_gen_dict[c][call_thr][1]))
        else:
            wmax_H_1_persistent[c][call_thr]=0;


# In[6]:


wH_0_persistent = {}
wH_0_persistent_mod = {}
for c in weighted_gen_dict:
    wH_0_persistent[c] = {}
    wH_0_persistent_mod[c] = {}
    for call_thr in weighted_gen_dict[c]:
        if len(weighted_gen_dict[c][call_thr][0])>1:
            wH_0_persistent[c][call_thr] = (sorted(map(lambda x:  x.persistence_interval(), weighted_gen_dict[c][call_thr][0])))
            wH_0_persistent_mod[c][call_thr] = (wH_0_persistent[c][call_thr][-1]-wH_0_persistent[c][call_thr][-2])/float(wH_0_persistent[c][call_thr][-1])
        else:
            wH_0_persistent[c][call_thr]=[1];
            wH_0_persistent_mod[c][call_thr]=[1];


# In[7]:


coords_matrix={}
country_order = {}
for i,call_thr in enumerate(sorted(call_thresholds)):
    coords_matrix[call_thr]=[]
    country_order [call_thr] = []
    for c in wH_0_persistent_mod.keys():
        if call_thr in wH_0_persistent_mod[c]:
            coords_matrix[call_thr].append([ wH_0_persistent_mod[c][call_thr], wmax_H_1_persistent[c][call_thr]/float(max(wH_0_persistent[c][call_thr]))]); 
            country_order[call_thr].append(c);


# In[8]:


X={}
for thr in sorted(coords_matrix.keys()):
    X[thr]=np.zeros((int(len(coords_matrix[thr])),2));
    print X[thr].shape
    for i, pair in enumerate(coords_matrix[thr]):
        if plt.is_numlike(pair[0]):
            pair = [pair[0], pair[1]]
            X[thr][i]=pair[0];
            X[thr][i][1]=pair[1];
        else:
            pair = [pair[0][0], pair[1]]
            X[thr][i][0]=pair[0];
            X[thr][i][1]=pair[1];


# ### K-means

# In[9]:


import time as time
import numpy as np
import pylab as pl
import mpl_toolkits.mplot3d.axes3d as p3
from sklearn import cluster, datasets

k_means = cluster.KMeans(n_clusters=2, n_init=1)
k_means.fit(X[2.7]) 

values = k_means.cluster_centers_.squeeze()
labels = k_means.labels_
fig = plt.figure()
ax = fig.add_subplot(111)
for l in np.unique(labels):
    ax.plot(X[2.7][labels == l, 0], X[2.7][labels == l, 1],
              'o', color=pl.cm.jet(np.float(l) / np.max(labels + 1)))
plt.title('Without connectivity constraints')


# ### Import population data

# In[10]:


import pandas as pd
remitt=pd.read_csv('../data/phom/population_data.csv');


# In[11]:


short_country_order = {}
for thr in country_order:
    short_country_order[thr]=[]
    for c in country_order[thr]:
        try:
            short_country_order[thr].append(remitt[remitt['prefix']==int(c)]['country_cca'].values[0])
        except:
            pass


# In[12]:


X_filter = pd.Series(short_country_order[thr]).str.match('[A-Z]+').values


# In[13]:


country_cluster_assignment={}
k_means = cluster.KMeans(n_clusters=2, n_init=1)
k_means.fit(X[thr][X_filter]);
labels = k_means.labels_;
country_cluster_assignment[thr] = zip(short_country_order[thr], labels)


# In[14]:


fig = plt.figure(figsize=(21,7))
ax1 = plt.subplot2grid((1,9), (0,0), colspan=4)

thr=2.7

from sklearn import cluster, datasets
k_means = cluster.KMeans(n_clusters=2, n_init=1)
k_means.fit(X[thr][X_filter]) 
values = k_means.cluster_centers_.squeeze()
labels = k_means.labels_

col = ['r','k']

for l in np.unique(labels):
#    ax1.plot(X[thr][labels == l, 0], X[thr][labels == l, 1],
 #             'o', color=pl.cm.jet(np.float(l) / np.max(labels + 1)),markersize=10)
    ax1.plot(X[thr][X_filter][labels == l, 0], X[thr][X_filter][labels == l, 1],
              '.', color=col[l],markersize=13)
for i, c in enumerate(X[thr][X_filter]):
    plt.text(c[0], c[1], short_country_order[thr][i], fontsize=14)

xtickNames = plt.setp(ax1, xticklabels=np.linspace(0,1,6))
plt.setp(xtickNames, fontsize=23)
ytickNames = plt.setp(ax1, yticklabels=np.linspace(-0.05,.2,6))
plt.setp(ytickNames, fontsize=23)
ax1.set_ylabel(r'$r_1$',fontsize=28)
ax1.set_xlabel(r'$r_0$',fontsize=28)
plt.text(0.02,0.186,'(a)',fontsize=23)
plt.ylim(-0.02,0.2)
plt.xlim(0,1.05)

#plt.tight_layout()
    
ax2 = plt.subplot2grid((1,9), (0,4), colspan=2)

group1=[]
group2=[]
for c_tuple in country_cluster_assignment[thr]:
    if c_tuple[1]==0:
        group1.append(c_tuple[0]);
    else:
        group2.append(c_tuple[0]);

GDP_group1=[]
GDP_group2=[]
for c in group1:
    try:
        GDP_group1.append(remitt['GDPpercapita'][remitt['country_cca']==c].values[0])
    except:
        print 'Country not in dataset :', c
for c in group2:
    try:
        GDP_group2.append(remitt['GDPpercapita'][remitt['country_cca']==c].values[0])
    except:
        print 'Country not in dataset :', c

data = [(GDP_group1), (GDP_group2)]
numDists=2
#plt.subplots_adjust(left=0.095, right=0.95, top=0.9, bottom=0.25)
bp = plt.boxplot(data, notch=0, sym='+', vert=1, whis=1.5)
plt.setp(bp['boxes'], color='black')
plt.setp(bp['whiskers'], color='black')
plt.setp(bp['fliers'], color='red', marker='+')

# Add a horizontal grid to the plot, but make it very light in color
# so we can use it for reading data values but not be distracting
ax2.yaxis.grid(True, linestyle='-', which='major', color='lightgrey',
              alpha=0.5)

# Hide these grid behind plot objects
ax2.set_axisbelow(True)
#ax1.set_title('Comparison of IID Bootstrap Resampling Across Five Distributions')
ax2.set_ylabel(r'Avg GDP per capita ($10^3$USD)',fontsize=23)

# Now fill the boxes with desired colors
boxColors = ['red','black']
numBoxes = numDists
medians = range(numBoxes)
for i in range(numBoxes):
  box = bp['boxes'][i]
  boxX = []
  boxY = []
  for j in range(5):
      boxX.append(box.get_xdata()[j])
      boxY.append(box.get_ydata()[j])
  boxCoords = zip(boxX,boxY)
  # Alternate between Dark Khaki and Royal Blue
  k = i % 2
  boxPolygon = plt.Polygon(boxCoords, facecolor=boxColors[k], alpha=0.7)
  ax2.add_patch(boxPolygon)
  # Now draw the median lines back over what we just filled in
  med = bp['medians'][i]
  medianX = []
  medianY = []
  for j in range(2):
      medianX.append(med.get_xdata()[j])
      medianY.append(med.get_ydata()[j])
      plt.plot(medianX, medianY, 'k')
      medians[i] = medianY[0]
  # Finally, overplot the sample averages, with horizontal alignment
  # in the center of each box
  plt.plot([np.average(med.get_xdata())], [np.average(data[i])],
           color='w', marker='*', markeredgecolor='k')

# Set the axes ranges and axes labels
ax2.set_xlim(0.5, numBoxes+0.5)
xtickNames = plt.setp(ax2, xticklabels=['Cluster 1','Cluster 2'])
plt.setp(xtickNames, fontsize=23)
ytickNames = plt.setp(ax2, yticklabels=range(0,100,10))
plt.setp(ytickNames, fontsize=23)
plt.text(0.6,75000,'(b)',fontsize=23)

ax3 = plt.subplot2grid((1,9), (0,6), colspan=2)
GDP_group1=[]
GDP_group2=[]
for c in group1:
    try:
        GDP_group1.append(remitt['AvgMIRemittance'][remitt['country_cca']==c].values[0])
    except:
        print 'Country not in dataset :', c
for c in group2:
    try:
        GDP_group2.append(remitt['AvgMIRemittance'][remitt['country_cca']==c].values[0])
    except:
        print 'Country not in dataset :', c

data = [GDP_group1, (GDP_group2)]
numDists=2
#plt.subplots_adjust(left=0.095, right=0.95, top=0.9, bottom=0.25)
bp = plt.boxplot(data, notch=0, sym='+', vert=1, whis=1.5)
plt.setp(bp['boxes'], color='black')
plt.setp(bp['whiskers'], color='black')
plt.setp(bp['fliers'], color='red', marker='+')

# Add a horizontal grid to the plot, but make it very light in color
# so we can use it for reading data values but not be distracting
ax3.yaxis.grid(True, linestyle='-', which='major', color='lightgrey',
              alpha=0.5)
# Hide these grid behind plot objects
ax3.set_axisbelow(True)
#ax1.set_title('Comparison of IID Bootstrap Resampling Across Five Distributions')
ax3.set_ylabel('Avg remittance fraction per capita (%)',fontsize=23)

# Now fill the boxes with desired colors
boxColors = ['red','black']
numBoxes = numDists
medians = range(numBoxes)
for i in range(numBoxes):
  box = bp['boxes'][i]
  boxX = []
  boxY = []
  for j in range(5):
      boxX.append(box.get_xdata()[j])
      boxY.append(box.get_ydata()[j])
  boxCoords = zip(boxX,boxY)
  # Alternate between Dark Khaki and Royal Blue
  k = i % 2
  boxPolygon = plt.Polygon(boxCoords, facecolor=boxColors[k], alpha=0.7)
  ax3.add_patch(boxPolygon)
  # Now draw the median lines back over what we just filled in
  med = bp['medians'][i]
  medianX = []
  medianY = []
  for j in range(2):
      medianX.append(med.get_xdata()[j])
      medianY.append(med.get_ydata()[j])
      ax3.plot(medianX, medianY, 'k')
      medians[i] = medianY[0]
  # Finally, overplot the sample averages, with horizontal alignment
  # in the center of each box
  plt.plot([np.average(med.get_xdata())], [np.average(data[i])],
           color='w', marker='*', markeredgecolor='k')

# Set the axes ranges and axes labels
ax3.set_xlim(0.5, numBoxes+0.5)
top = 100
bottom = -5
ax3.set_ylim(0,50)
xtickNames = plt.setp(ax3, xticklabels=['Cluster 1','Cluster 2'])
plt.setp(xtickNames, fontsize=23)
ytickNames = plt.setp(ax3, yticklabels=range(0,60,10))
plt.setp(ytickNames, fontsize=23)

plt.text(0.6,47,'(c)',fontsize=23)
plt.tight_layout()
plt.show()