import numpy as np, requests, pandas as pd
url='http://hdr.undp.org/en/content/table-1-human-development-index-and-its-components'
r=requests.get(url)
hdi=pd.read_html(r.content,header=0,infer_types=False)[0].drop('Change in rank, 2012-2013',axis=1).set_index(['Country'],drop=True)
hdi.head()
C:\Program Files\Anaconda\lib\site-packages\pandas\io\html.py:841: FutureWarning: infer_types will have no effect in 0.14 warnings.warn("infer_types will have no effect in 0.14", FutureWarning)
HDI rank | Human Development Index (HDI) Value, 2013 | Life expectancy at birth (years), 2013 | Mean years of schooling (years), 2012 a | Expected years of schooling (years), 2012 a | Gross national income (GNI) per capita (2011 PPP $), 2013 | Human Development Index (HDI) Value, 2012 | |
---|---|---|---|---|---|---|---|
Country | |||||||
Very high human development | nan | nan | nan | nan | nan | nan | nan |
Norway | 1.0 | 0.944 | 81.5 | 12.6 | 17.6 | 63909 | 0.943 |
Australia | 2.0 | 0.933 | 82.5 | 12.8 | 19.9 | 41524 | 0.931 |
Switzerland | 3.0 | 0.917 | 82.6 | 12.2 | 15.7 | 53762 | 0.916 |
Netherlands | 4.0 | 0.915 | 81.0 | 11.9 | 17.9 | 42397 | 0.915 |
ids=pd.read_csv('http://bl.ocks.org/d/4090846/world-country-names.tsv',sep='\t').set_index(['name'],drop=True)
ids.head()
id | |
---|---|
name | |
Northern Cyprus | -1 |
Kosovo | -2 |
Somaliland | -3 |
Afghanistan | 4 |
Albania | 8 |
def country_name_converter(country):
if country=="Venezuela, Bolivarian Republic of": return "Venezuela (Bolivarian Republic of)"
elif country=="Tanzania, United Republic of": return "Tanzania (United Republic of)"
elif country=="Moldova, Republic of": return "Moldova (Republic of)"
elif country=="Micronesia, Federated States of": return "Micronesia (Federated States of)"
elif country=="Macedonia, the former Yugoslav Republic of": return "The former Yugoslav Republic of Macedonia"
elif country=="Korea, Republic of": return "Korea (Republic of)"
elif country=="Korea, Democratic People's Republic of": return "Korea (Democratic People's Rep. of)"
elif country=="Côte d'Ivoire": return "C\xc3\xb4te d'Ivoire"
elif country=="Iran, Islamic Republic of": return "Iran (Islamic Republic of)"
elif country=="Hong Kong": return "Hong Kong, China (SAR)"
elif country=="Palestinian Territory, Occupied": return "Palestine, State of"
elif country=="Congo, the Democratic Republic of the": return "Congo (Democratic Republic of the)"
elif country=="Bolivia, Plurinational State of": return "Bolivia (Plurinational State of)"
else: return country
import re
codes={}
for i in ids.index:
try:
a=[i]
a.append(round(float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\
["Human Development Index (HDI) Value, 2013"])),3))
a.append(round((float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\
["Life expectancy at birth (years), 2013"]))-20)/(85-20),3))
a.append(round((float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\
["Mean years of schooling (years), 2012 a"]))/15+\
float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\
["Expected years of schooling (years), 2012 a"]))/18)/2,3))
a.append(round((np.log(float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\
["Gross national income (GNI) per capita (2011 PPP $), 2013"])))-np.log(100))\
/(np.log(75000)-np.log(100)),3))
a.append(round((a[2]*a[3]*a[4])**(1.0/3.0),3))
codes[repr(ids.loc[i][0])]=a
except: pass
import json
file('../hdi.json','w').write(json.dumps(codes))