import numpy as np, requests, pandas as pd, zipfile, StringIO
countries=pd.read_html('http://www.geonames.org/countries/',header=0,infer_types=False)[1]
countries.columns=['ISO2','ISO3','ISONUM','FIPS','Country','Capital','Area','Population','Continent']
countries.set_index('Country',drop=True,inplace=True)
countries.head(5)
C:\Anaconda\lib\site-packages\pandas\io\html.py:841: FutureWarning: infer_types will have no effect in 0.14 warnings.warn("infer_types will have no effect in 0.14", FutureWarning)
ISO2 | ISO3 | ISONUM | FIPS | Capital | Area | Population | Continent | |
---|---|---|---|---|---|---|---|---|
Country | ||||||||
Andorra | AD | AND | 20 | AN | Andorra la Vella | 468.0 | 84000 | EU |
United Arab Emirates | AE | ARE | 784 | AE | Abu Dhabi | 82880.0 | 4975593 | AS |
Afghanistan | AF | AFG | 4 | AF | Kabul | 647500.0 | 29121286 | AS |
Antigua and Barbuda | AG | ATG | 28 | AC | St. John's | 443.0 | 86754 | nan |
Anguilla | AI | AIA | 660 | AV | The Valley | 102.0 | 13254 | nan |
hdi=pd.read_excel('ihdi_series_cartagena.xlsx',sheetname=u'LEx, EDUx, INCx',header=1).dropna(axis=1, how='all').set_index('country')
hdi.head()
iso3 | lex2010 | lex2011 | lex2012 | lex2013 | edux2010 | edux2011 | edux2012 | edux2013 | gnix2010 | gnix2011 | gnix2012 | gnix2013 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
country | |||||||||||||
Afghanistan | AFG | 0.609 | 0.617 | 0.623 | 0.630 | 0.357000 | 0.365333 | 0.365333 | 0.365333 | 0.425934 | 0.427753 | 0.443368 | 0.445065 |
Albania | ALB | 0.874 | 0.877 | 0.880 | 0.883 | 0.601675 | 0.608519 | 0.608519 | 0.608519 | 0.674701 | 0.680974 | 0.680882 | 0.683453 |
Algeria | DZA | 0.779 | 0.781 | 0.783 | 0.785 | 0.631478 | 0.642589 | 0.642589 | 0.642589 | 0.725495 | 0.729289 | 0.727595 | 0.730002 |
Andorra | AND | 0.936 | 0.937 | 0.939 | 0.941 | 0.670287 | 0.670287 | 0.670287 | 0.670287 | 0.918319 | 0.912079 | 0.908631 | 0.907283 |
Angola | AGO | 0.472 | 0.478 | 0.485 | 0.491 | 0.440879 | 0.474212 | 0.474212 | 0.474212 | 0.614749 | 0.625145 | 0.624848 | 0.626393 |
ihdi=pd.read_excel('ihdi_series_cartagena.xlsx',sheetname=u'iLEx, iEDUx,iINCx',header=1).dropna(axis=1, how='all').set_index('country')
ihdi.head()
iso3 | ilex2010 | ilex2011 | ilex2012 | ilex2013 | iedux2010 | iedux2011 | iedux2012 | iedux2013 | ignix2010 | ignix2011 | ignix2012 | ignix2013 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
country | |||||||||||||
Afghanistan | AFG | 0.250908 | 0.302947 | 0.305893 | 0.413910 | 0.216699 | 0.221757 | 0.221757 | 0.200933 | NaN | NaN | NaN | 0.396998 |
Albania | ALB | 0.778734 | 0.778776 | 0.781440 | 0.795583 | 0.525262 | 0.536106 | 0.536106 | 0.536106 | 0.577544 | 0.556356 | 0.556281 | 0.558381 |
Algeria | DZA | 0.639559 | 0.667755 | 0.669465 | 0.653905 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
Andorra | AND | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
Angola | AGO | 0.218536 | 0.257642 | 0.261415 | 0.264158 | 0.325369 | NaN | 0.310135 | 0.310135 | 0.390980 | 0.312572 | 0.312424 | 0.313197 |
gii=pd.read_excel('gii_series_cartagena.xlsx',sheetname=u'GII',header=2).dropna(axis=1, how='all').set_index('Country')
gii.head()
iso3 | GII1995 | GII2000 | GII2005 | GII2010 | GII2011 | GII2012 | GII2013 | |
---|---|---|---|---|---|---|---|---|
Country | ||||||||
Afghanistan | AFG | NaN | NaN | 0.743191 | 0.720173 | 0.706641 | 0.712844 | 0.705302 |
Albania | ALB | 0.661140 | NaN | 0.366596 | 0.268824 | 0.271414 | 0.251003 | 0.245376 |
Algeria | DZA | 0.594158 | 0.621349 | 0.551894 | 0.505261 | 0.412363 | 0.471756 | 0.425258 |
Andorra | AND | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
Angola | AGO | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
cc={
'Bolivia (Plurinational State of)':'Bolivia',
'Brunei Darussalam':'Brunei',
'Congo':'Republic of the Congo',
'Congo (Democratic Republic of the)':'Democratic Republic of the Congo',
"Cote d'Ivoire":'Ivory Coast',
'Hong Kong, China (SAR)':'Hong Kong',
'Iran (Islamic Republic of)':'Iran',
"Korea (Democratic People's Republic of)":'North Korea',
'Korea (Republic of)':'South Korea',
"Lao People's Democratic Republic":'Laos',
'Libyan Arab Jamahiriya':'Libya',
'Micronesia (Federated States of)':'Micronesia',
'Moldova (Republic of)':'Moldova',
'Myanmar':'Myanmar [Burma]',
'Palestine (State of)':'Palestine',
'Russian Federation':'Russia',
'Sao Tome and Principe':u'São Tomé and PrÃncipe',
'Syrian Arab Republic':'Syria',
'Tanzania (United Republic of)':'Tanzania',
'The former Yugoslav Republic of Macedonia':'Macedonia',
'Timor-Leste':'East Timor',
'Venezuela (Bolivarian Republic of)':'Venezuela'
}
cc2={
"Cote d'Ivoire":u"Côte d'Ivoire",
"Korea (Democratic People's Republic of)":"Korea (Democratic People's Rep. of)",
'Palestine (State of)':'Palestine, State of',
'Vietnam':'Viet Nam',
'Libyan Arab Jamahiriya':'Libya'
}
cc3={
'Libyan Arab Jamahiriya':'Libya'
}
cc4={
"Korea (Democratic People's Republic of)":"Korea (Democratic People's Rep. of)",
'Libyan Arab Jamahiriya':'Libya'
}
continent_converter={
'EU':'Europe',
'AS':'Asia',
'SA':'South America',
'nan':'North America',
'AN':'Antarctica',
'AF':'Africa',
'OC':'Oceania'
}
def country_name_converter(country):
if country in cc: return cc[country]
else: return country
def country_name_converter2(country):
if country in cc2: return cc2[country]
else: return country
def country_name_converter3(country):
if country in cc3: return cc3[country]
else: return country
def country_name_converter4(country):
if country in cc4: return cc4[country]
else: return country
data={}
for country in ihdi.index:
if country_name_converter(country) in countries.index:
icountry=country_name_converter(country)
data[countries.loc[icountry]['ISONUM']]={}
data[countries.loc[icountry]['ISONUM']]['name']=icountry
data[countries.loc[icountry]['ISONUM']]['code']=countries.loc[icountry]['ISO3']
data[countries.loc[icountry]['ISONUM']]['population']=countries.loc[icountry]['Population']
data[countries.loc[icountry]['ISONUM']]['area']=countries.loc[icountry]['Area']
data[countries.loc[icountry]['ISONUM']]['continent']=continent_converter[countries.loc[icountry]['Continent']]
data[countries.loc[icountry]['ISONUM']]['hdi']={}
data[countries.loc[icountry]['ISONUM']]['ihdi']={}
data[countries.loc[icountry]['ISONUM']]['gii']={}
for year in range(2010,2014):
try:
data[countries.loc[icountry]['ISONUM']]['hdi'][year]={}
if np.isnan(hdi.loc[country]['edux'+repr(year)]):
data[countries.loc[icountry]['ISONUM']]['hdi'][year]['edu']="NaN"
else: data[countries.loc[icountry]['ISONUM']]['hdi'][year]['edu']=round(hdi.loc[country]['edux'+repr(year)],3)
if np.isnan(hdi.loc[country]['lex'+repr(year)]):
data[countries.loc[icountry]['ISONUM']]['hdi'][year]['le']="NaN"
else: data[countries.loc[icountry]['ISONUM']]['hdi'][year]['le']=round(hdi.loc[country]['lex'+repr(year)],3)
if np.isnan(hdi.loc[country]['gnix'+repr(year)]):
data[countries.loc[icountry]['ISONUM']]['hdi'][year]['gni']="NaN"
else: data[countries.loc[icountry]['ISONUM']]['hdi'][year]['gni']=round(hdi.loc[country]['gnix'+repr(year)],3)
except: print country,'hdi'
try:
data[countries.loc[icountry]['ISONUM']]['ihdi'][year]={}
if np.isnan(ihdi.loc[country]['iedux'+repr(year)]):
data[countries.loc[icountry]['ISONUM']]['ihdi'][year]['edu']="NaN"
else: data[countries.loc[icountry]['ISONUM']]['ihdi'][year]['edu']=round(ihdi.loc[country]['iedux'+repr(year)],3)
if np.isnan(ihdi.loc[country]['ilex'+repr(year)]):
data[countries.loc[icountry]['ISONUM']]['ihdi'][year]['le']="NaN"
else: data[countries.loc[icountry]['ISONUM']]['ihdi'][year]['le']=round(ihdi.loc[country]['ilex'+repr(year)],3)
if np.isnan(ihdi.loc[country]['ignix'+repr(year)]):
data[countries.loc[icountry]['ISONUM']]['ihdi'][year]['gni']="NaN"
else: data[countries.loc[icountry]['ISONUM']]['ihdi'][year]['gni']=round(ihdi.loc[country]['ignix'+repr(year)],3)
except:
print country,'ihdi'
try:
gcountry=country_name_converter2(country)
if np.isnan(gii.loc[gcountry]['GII'+repr(year)]):
data[countries.loc[icountry]['ISONUM']]['gii'][year]="NaN"
else: data[countries.loc[icountry]['ISONUM']]['gii'][year]=round(gii.loc[gcountry]['GII'+repr(year)],3)
except:
print country,'gii'
import json
file('data.json','w').write(json.dumps(data))
GNI=pd.read_excel('hdi_series_cartagena.xlsx',sheetname=u'Gross National Income',header=2).set_index('Country')
GNI.head()
iso3 | GNI1980 | GNI1981 | GNI1982 | GNI1983 | GNI1984 | GNI1985 | GNI1986 | GNI1987 | GNI1988 | ... | GNI2004 | GNI2005 | GNI2006 | GNI2007 | GNI2008 | GNI2009 | GNI2010 | GNI2011 | GNI2012 | GNI2013 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Country | |||||||||||||||||||||
Afghanistan | AFG | 2548.254832 | 2466.626434 | 2562.737937 | 2626.621422 | 2844.072554 | 2990.928165 | 3083.504539 | 3250.936824 | 2969.797438 | ... | 1125.869194 | 1125.857560 | 1202.399608 | 1263.661214 | 1432.486514 | 1454.914569 | 1677.201601 | 1697.523824 | 1882.395509 | 1903.656673 |
Albania | ALB | 3206.926814 | 3232.740139 | 3339.181470 | 3359.854185 | 3319.652177 | 3205.344390 | 3178.175640 | 3264.309099 | 3150.099437 | ... | 6553.656875 | 6920.969736 | 7373.477016 | 7867.466696 | 8350.808162 | 8400.746220 | 8705.765308 | 9074.926805 | 9069.389993 | 9225.050846 |
Algeria | DZA | 10215.030188 | 10039.512129 | 10014.561086 | 10313.216799 | 10560.014313 | 10928.949084 | 11025.723938 | 10849.097215 | 10724.071343 | ... | 10879.228521 | 11133.509897 | 11577.415445 | 11650.490699 | 12153.510881 | 12227.579531 | 12185.446764 | 12495.444586 | 12356.046736 | 12554.571986 |
Andorra | AND | 31812.140284 | 31393.180239 | 30148.593193 | 29267.883014 | 28510.312997 | 27767.417852 | 27206.021185 | 26932.811055 | 27286.989362 | ... | 33749.938605 | 34385.304454 | 35450.339111 | 37555.137778 | 38365.363439 | 40870.160552 | 43673.948833 | 41906.643301 | 40960.901628 | 40597.124810 |
Angola | AGO | 3259.908578 | 3253.536712 | 3010.757634 | 2912.914398 | 2897.972093 | 2946.294587 | 2902.630135 | 2901.611617 | 3104.058261 | ... | 3057.616822 | 3878.839822 | 5041.274640 | 5662.865490 | 6271.567560 | 5246.697802 | 5853.837614 | 6270.903706 | 6258.566318 | 6322.938484 |
5 rows × 35 columns
LE=pd.read_excel('hdi_series_cartagena.xlsx',sheetname=u'Life Expectency',header=2).set_index('Country')
LE.head()
iso3 | Life_Exp1980 | Life_Exp1981 | Life_Exp1982 | Life_Exp1983 | Life_Exp1984 | Life_Exp1985 | Life_Exp1986 | Life_Exp1987 | Life_Exp1988 | ... | Life_Exp2004 | Life_Exp2005 | Life_Exp2006 | Life_Exp2007 | Life_Exp2008 | Life_Exp2009 | Life_Exp2010 | Life_Exp2011 | Life_Exp2012 | Life_Exp2013 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Country | |||||||||||||||||||||
Afghanistan | AFG | 41.242 | 41.770 | 42.347 | 42.977 | 43.661 | 44.400 | 45.192 | 46.024 | 46.880 | ... | 56.583 | 57.071 | 57.582 | 58.102 | 58.618 | 59.124 | 59.612 | 60.079 | 60.524 | 60.947 |
Albania | ALB | 70.218 | 70.426 | 70.646 | 70.886 | 71.144 | 71.398 | 71.615 | 71.770 | 71.853 | ... | 75.725 | 75.949 | 76.124 | 76.278 | 76.433 | 76.598 | 76.780 | 76.979 | 77.185 | 77.392 |
Algeria | DZA | 58.198 | 59.524 | 60.826 | 62.051 | 63.160 | 64.120 | 64.911 | 65.554 | 66.072 | ... | 69.682 | 69.854 | 70.020 | 70.180 | 70.332 | 70.477 | 70.615 | 70.747 | 70.874 | 71.000 |
Andorra | AND | 74.856 | 75.115 | 75.344 | 75.541 | 75.707 | 75.848 | 75.973 | 76.094 | 76.222 | ... | 80.005 | 80.192 | 80.343 | 80.471 | 80.589 | 80.703 | 80.818 | 80.935 | 81.054 | 81.173 |
Angola | AGO | 40.182 | 40.311 | 40.429 | 40.547 | 40.671 | 40.794 | 40.902 | 40.988 | 41.050 | ... | 48.036 | 48.572 | 49.041 | 49.471 | 49.882 | 50.286 | 50.689 | 51.094 | 51.498 | 51.899 |
5 rows × 35 columns
EYS=pd.read_excel('hdi_series_cartagena.xlsx',sheetname=u'Expected Years of Schooling',header=2).set_index('Country')
EYS.head()
iso3 | EYS1980 | EYS1981 | EYS1982 | EYS1983 | EYS1984 | EYS1985 | EYS1986 | EYS1987 | EYS1988 | ... | EYS2004 | EYS2005 | EYS2006 | EYS2007 | EYS2008 | EYS2009 | EYS2010 | EYS2011 | EYS2012 | EYS2013 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Country | |||||||||||||||||||||
Afghanistan | AFG | 1.80000 | 1.80000 | 1.80000 | 1.97500 | 2.15000 | 2.32500 | 2.50000 | 2.52500 | 2.55000 | ... | 7.50000 | 7.74000 | 7.98000 | 8.22000 | 8.46000 | 8.70000 | 9.00000 | 9.30000 | 9.30000 | 9.30000 |
Albania | ALB | 11.30000 | 11.30000 | 10.80000 | 10.30000 | 10.30000 | 10.50000 | 10.60000 | 10.40000 | 10.40000 | ... | 10.80000 | 10.80000 | 10.80000 | 10.80000 | 10.80000 | 10.80000 | 10.80000 | 10.80000 | 10.80000 | 10.80000 |
Algeria | DZA | 9.40000 | 9.40000 | 9.40000 | 9.40000 | 9.40000 | 9.40000 | 9.40000 | 9.40000 | 9.60000 | ... | 11.80000 | 12.00000 | 12.10000 | 12.30000 | 12.75000 | 13.20000 | 13.60000 | 14.00000 | 14.00000 | 14.00000 |
Andorra | AND | 10.79878 | 10.79878 | 10.79878 | 10.79878 | 10.79878 | 10.79878 | 10.79878 | 10.79878 | 10.79878 | ... | 10.79199 | 10.82593 | 11.18277 | 11.18277 | 11.67192 | 11.67192 | 11.67192 | 11.67192 | 11.67192 | 11.67192 |
Angola | AGO | 4.20000 | 4.20000 | 4.20000 | 4.20000 | 4.20000 | 4.20000 | 4.20000 | 4.20000 | 4.20000 | ... | 7.35000 | 7.82500 | 8.30000 | 8.77500 | 9.25000 | 9.72500 | 10.20000 | 11.40000 | 11.40000 | 11.40000 |
5 rows × 35 columns
MYS=pd.read_excel('hdi_series_cartagena.xlsx',sheetname=u'Mean Years of Schooling',header=2).set_index('Country')
MYS.head()
#fux db
MYS.loc["Vanuatu","MYS1980"]=np.NaN
data2={}
for country in GNI.index:
if country_name_converter(country) in countries.index:
icountry=country_name_converter(country)
data2[countries.loc[icountry]['ISONUM']]={}
data2[countries.loc[icountry]['ISONUM']]['name']=icountry
data2[countries.loc[icountry]['ISONUM']]['code']=countries.loc[icountry]['ISO3']
data2[countries.loc[icountry]['ISONUM']]['population']=countries.loc[icountry]['Population']
data2[countries.loc[icountry]['ISONUM']]['area']=countries.loc[icountry]['Area']
data2[countries.loc[icountry]['ISONUM']]['continent']=continent_converter[countries.loc[icountry]['Continent']]
data2[countries.loc[icountry]['ISONUM']]['hdi']={}
for year in range(1980,2014):
try:
data2[countries.loc[icountry]['ISONUM']]['hdi'][year]={}
data2[countries.loc[icountry]['ISONUM']]['hdi'][year]['edu']=\
round((MYS.loc[country_name_converter3(country)]['MYS'+repr(year)]/15+\
EYS.loc[country_name_converter3(country)]['EYS'+repr(year)]/18)/2,3)
data2[countries.loc[icountry]['ISONUM']]['hdi'][year]['le']=\
round((LE.loc[country_name_converter4(country)]['Life_Exp'+repr(year)]-20)/(85-20),3)
data2[countries.loc[icountry]['ISONUM']]['hdi'][year]['gni']=\
round((np.log(GNI.loc[country]['GNI'+repr(year)])-np.log(100))/(np.log(75000)-np.log(100)),3)
except: print country,'hdi',year
Vanuatu hdi 1980
import json
file('data2.json','w').write(json.dumps(data2))
for country in GNI.index:
if country_name_converter(country) in countries.index:
icountry=country_name_converter(country)
for year in range(1980,2010):
try:
data[countries.loc[icountry]['ISONUM']]['hdi'][year]={}
data[countries.loc[icountry]['ISONUM']]['hdi'][year]['edu']=\
round((MYS.loc[country_name_converter3(country)]['MYS'+repr(year)]/15+\
EYS.loc[country_name_converter3(country)]['EYS'+repr(year)]/18)/2,3)
data[countries.loc[icountry]['ISONUM']]['hdi'][year]['le']=\
round((LE.loc[country_name_converter4(country)]['Life_Exp'+repr(year)]-20)/(85-20),3)
data[countries.loc[icountry]['ISONUM']]['hdi'][year]['gni']=\
round((np.log(GNI.loc[country]['GNI'+repr(year)])-np.log(100))/(np.log(75000)-np.log(100)),3)
if np.isnan(data[countries.loc[icountry]['ISONUM']]['hdi'][year]['edu']):
data[countries.loc[icountry]['ISONUM']]['hdi'][year]['edu']="NaN"
if np.isnan(data[countries.loc[icountry]['ISONUM']]['hdi'][year]['le']):
data[countries.loc[icountry]['ISONUM']]['hdi'][year]['le']="NaN"
if np.isnan(data[countries.loc[icountry]['ISONUM']]['hdi'][year]['gni']):
data[countries.loc[icountry]['ISONUM']]['hdi'][year]['gni']="NaN"
data[countries.loc[icountry]['ISONUM']]['ihdi'][year]={'edu':"NaN",'le':"NaN",'gni':"NaN"}
data[countries.loc[icountry]['ISONUM']]['gii'][year]="NaN"
if year in [1995,2000,2005]:
try:
gcountry=country_name_converter2(country)
if np.isnan(gii.loc[gcountry]['GII'+repr(year)]):
data[countries.loc[icountry]['ISONUM']]['gii'][year]="NaN"
else: data[countries.loc[icountry]['ISONUM']]['gii'][year]=round(gii.loc[gcountry]['GII'+repr(year)],3)
except:
print country,'gii'
except: print country,'error',year
import json
file('data3.json','w').write(json.dumps(data))
pop=pd.read_excel('http://esa.un.org/unpd/wpp/Excel-Data/EXCEL_FILES/1_Population/WPP2012_POP_F01_1_TOTAL_POPULATION_BOTH_SEXES.XLS',sheet="ESTIMATES",header=16)
pop=pop.set_index(['Country code'])
for i in data:
data[i]['population']={}
for year in range(1980,2011):
data[i]['population'][year]=\
pop.loc[(int)(i)].values[::-1][2010-year]*1000
pop2=pd.read_excel('http://esa.un.org/unpd/wpp/Excel-Data/EXCEL_FILES/1_Population/WPP2012_POP_F01_1_TOTAL_POPULATION_BOTH_SEXES.XLS',sheetname="NO CHANGE",header=16)
pop2=pop2.set_index(['Country code'])
for i in data:
for year in range(2011,2014):
data[i]['population'][year]=\
pop2.loc[(int)(i)].values[::-1][2100-year]*1000
import json
file('data3.json','w').write(json.dumps(data))