In [1]:

import numpy as np, requests, pandas as pd

In [2]:

url='http://hdr.undp.org/en/content/table-1-human-development-index-and-its-components'
r=requests.get(url)
hdi=pd.read_html(r.content,header=0,infer_types=False)[0].drop('Change in rank, 2012-2013',axis=1).set_index(['Country'],drop=True)
hdi.head()

C:\Program Files\Anaconda\lib\site-packages\pandas\io\html.py:841: FutureWarning: infer_types will have no effect in 0.14
  warnings.warn("infer_types will have no effect in 0.14", FutureWarning)

Out[2]:

	HDI rank	Human Development Index (HDI) Value, 2013	Life expectancy at birth (years), 2013	Mean years of schooling (years), 2012 a	Expected years of schooling (years), 2012 a	Gross national income (GNI) per capita (2011 PPP $), 2013	Human Development Index (HDI) Value, 2012
Country
Very high human development	nan	nan	nan	nan	nan	nan	nan
Norway	1.0	0.944	81.5	12.6	17.6	63909	0.943
Australia	2.0	0.933	82.5	12.8	19.9	41524	0.931
Switzerland	3.0	0.917	82.6	12.2	15.7	53762	0.916
Netherlands	4.0	0.915	81.0	11.9	17.9	42397	0.915

In [3]:

ids=pd.read_csv('http://bl.ocks.org/d/4090846/world-country-names.tsv',sep='\t').set_index(['name'],drop=True)
ids.head()

Out[3]:

	id
name
Northern Cyprus	-1
Kosovo	-2
Somaliland	-3
Afghanistan	4
Albania	8

In [4]:

def country_name_converter(country):
    if country=="Venezuela, Bolivarian Republic of": return "Venezuela (Bolivarian Republic of)"
    elif country=="Tanzania, United Republic of": return "Tanzania (United Republic of)"
    elif country=="Moldova, Republic of": return "Moldova (Republic of)"
    elif country=="Micronesia, Federated States of": return "Micronesia (Federated States of)"
    elif country=="Macedonia, the former Yugoslav Republic of": return "The former Yugoslav Republic of Macedonia"
    elif country=="Korea, Republic of": return "Korea (Republic of)"
    elif country=="Korea, Democratic People's Republic of": return "Korea (Democratic People's Rep. of)"
    elif country=="Côte d'Ivoire": return "C\xc3\xb4te d'Ivoire"
    elif country=="Iran, Islamic Republic of": return "Iran (Islamic Republic of)"
    elif country=="Hong Kong": return "Hong Kong, China (SAR)"
    elif country=="Palestinian Territory, Occupied": return "Palestine, State of"
    elif country=="Congo, the Democratic Republic of the": return "Congo (Democratic Republic of the)"
    elif country=="Bolivia, Plurinational State of": return "Bolivia (Plurinational State of)"
    else: return country

In [94]:

import re
codes={}
for i in ids.index:
    try: 
        a=[i]
        a.append(round(float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\
                                    ["Human Development Index (HDI) Value, 2013"])),3))
        a.append(round((float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\
                                     ["Life expectancy at birth (years), 2013"]))-20)/(85-20),3))
        a.append(round((float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\
                                     ["Mean years of schooling (years), 2012 a"]))/15+\
                  float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\
                               ["Expected years of schooling (years), 2012 a"]))/18)/2,3))
        a.append(round((np.log(float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\
                        ["Gross national income (GNI) per capita (2011 PPP $), 2013"])))-np.log(100))\
                        /(np.log(75000)-np.log(100)),3))
        a.append(round((a[2]*a[3]*a[4])**(1.0/3.0),3))
        codes[repr(ids.loc[i][0])]=a
    except: pass

In [96]:

import json
file('../hdi.json','w').write(json.dumps(codes))