import numpy as np, requests, pandas as pd, zipfile, StringIO
url='http://api.worldbank.org/v2/en/indicator/ny.gdp.pcap.pp.kd?downloadformat=csv'
filename='ny.gdp.pcap.pp.kd_Indicator_en_csv_v2.csv'
r = requests.get(url)
z = zipfile.ZipFile(StringIO.StringIO(r.content))
gdp=pd.read_csv(z.open(filename),skiprows=[0,1]).drop('Unnamed: 58',axis=1).drop('Indicator Code',axis=1)
gdp.head(2)
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) <ipython-input-2-39949019b48c> in <module>() 3 r = requests.get(url) 4 z = zipfile.ZipFile(StringIO.StringIO(r.content)) ----> 5 gdp=pd.read_csv(z.open(filename),skiprows=[0,1]).drop('Unnamed: 58',axis=1).drop('Indicator Code',axis=1) 6 gdp.head(2) C:\Anaconda2\lib\zipfile.pyc in open(self, name, mode, pwd) 959 else: 960 # Get info object for name --> 961 zinfo = self.getinfo(name) 962 963 zef_file.seek(zinfo.header_offset, 0) C:\Anaconda2\lib\zipfile.pyc in getinfo(self, name) 907 if info is None: 908 raise KeyError( --> 909 'There is no item named %r in the archive' % name) 910 911 return info KeyError: "There is no item named 'ny.gdp.pcap.pp.kd_Indicator_en_csv_v2.csv' in the archive"
url='http://api.worldbank.org/v2/en/indicator/ny.gnp.pcap.pp.kd?downloadformat=csv'
filename='ny.gnp.pcap.pp.kd_Indicator_en_csv_v2.csv'
r = requests.get(url)
z = zipfile.ZipFile(StringIO.StringIO(r.content))
gnp=pd.read_csv(z.open(filename),skiprows=[0,1]).drop('Unnamed: 58',axis=1).drop('Indicator Code',axis=1)
gnp.head(2)
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) <ipython-input-3-82b5b2448674> in <module>() 3 r = requests.get(url) 4 z = zipfile.ZipFile(StringIO.StringIO(r.content)) ----> 5 gnp=pd.read_csv(z.open(filename),skiprows=[0,1]).drop('Unnamed: 58',axis=1).drop('Indicator Code',axis=1) 6 gnp.head(2) C:\Anaconda2\lib\zipfile.pyc in open(self, name, mode, pwd) 959 else: 960 # Get info object for name --> 961 zinfo = self.getinfo(name) 962 963 zef_file.seek(zinfo.header_offset, 0) C:\Anaconda2\lib\zipfile.pyc in getinfo(self, name) 907 if info is None: 908 raise KeyError( --> 909 'There is no item named %r in the archive' % name) 910 911 return info KeyError: "There is no item named 'ny.gnp.pcap.pp.kd_Indicator_en_csv_v2.csv' in the archive"
url='http://api.worldbank.org/v2/en/indicator/sp.dyn.le00.in?downloadformat=csv'
filename='sp.dyn.le00.in_Indicator_en_csv_v2.csv'
r = requests.get(url)
z = zipfile.ZipFile(StringIO.StringIO(r.content))
le=pd.read_csv(z.open(filename),skiprows=[0,1]).drop('Unnamed: 58',axis=1).drop('Indicator Code',axis=1)
le.head(2)
Country Name | Country Code | Indicator Name | 1961 | 1962 | 1963 | 1964 | 1965 | 1966 | 1967 | ... | 2005 | 2006 | 2007 | 2008 | 2009 | 2010 | 2011 | 2012 | 2013 | 2014 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Aruba | ABW | Life expectancy at birth, total (years) | 65.988024 | 66.365537 | 66.713976 | 67.044293 | 67.369756 | 67.699 | 68.034683 | ... | 74.228073 | 74.375707 | 74.526244 | 74.67422 | 74.816146 | 74.952024 | 75.08039 | 75.206756 | NaN | NaN |
1 | Andorra | AND | Life expectancy at birth, total (years) | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2 rows × 57 columns
url='http://api.worldbank.org/v2/en/indicator/se.adt.litr.zs?downloadformat=csv'
filename='se.adt.litr.zs_Indicator_en_csv_v2.csv'
r = requests.get(url)
z = zipfile.ZipFile(StringIO.StringIO(r.content))
alr=pd.read_csv(z.open(filename),skiprows=[0,1]).drop('Unnamed: 58',axis=1).drop('Indicator Code',axis=1)
alr.head(2)
Country Name | Country Code | Indicator Name | 1961 | 1962 | 1963 | 1964 | 1965 | 1966 | 1967 | ... | 2005 | 2006 | 2007 | 2008 | 2009 | 2010 | 2011 | 2012 | 2013 | 2014 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Aruba | ABW | Literacy rate, adult total (% of people ages 1... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | 96.822639 | NaN | NaN | NaN | NaN |
1 | Andorra | AND | Literacy rate, adult total (% of people ages 1... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2 rows × 57 columns
url='http://api.worldbank.org/v2/en/indicator/se.prm.enrr?downloadformat=csv'
filename='se.prm.enrr_Indicator_en_csv_v2.csv'
r = requests.get(url)
z = zipfile.ZipFile(StringIO.StringIO(r.content))
ger1=pd.read_csv(z.open(filename),skiprows=[0,1]).drop('Unnamed: 58',axis=1).drop('Indicator Code',axis=1)
ger1.head(2)
Country Name | Country Code | Indicator Name | 1961 | 1962 | 1963 | 1964 | 1965 | 1966 | 1967 | ... | 2005 | 2006 | 2007 | 2008 | 2009 | 2010 | 2011 | 2012 | 2013 | 2014 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Aruba | ABW | School enrollment, primary (% gross) | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | 111.37672 | 114.23859 | 115.22441 | 113.07883 | 113.77574 | 113.72866 | 105.2194 | 104.06276 | NaN | NaN |
1 | Andorra | AND | School enrollment, primary (% gross) | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2 rows × 57 columns
url='http://api.worldbank.org/v2/en/indicator/se.sec.enrr?downloadformat=csv'
filename='se.sec.enrr_Indicator_en_csv_v2.csv'
r = requests.get(url)
z = zipfile.ZipFile(StringIO.StringIO(r.content))
ger2=pd.read_csv(z.open(filename),skiprows=[0,1]).drop('Unnamed: 58',axis=1).drop('Indicator Code',axis=1)
ger2.head(2)
Country Name | Country Code | Indicator Name | 1961 | 1962 | 1963 | 1964 | 1965 | 1966 | 1967 | ... | 2005 | 2006 | 2007 | 2008 | 2009 | 2010 | 2011 | 2012 | 2013 | 2014 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Aruba | ABW | School enrollment, secondary (% gross) | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | 95.92882 | 97.35637 | 102.54636 | 94.64913 | 96.72344 | 95.83605 | 97.01512 | 99.98673 | NaN | NaN |
1 | Andorra | AND | School enrollment, secondary (% gross) | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2 rows × 57 columns
url='http://api.worldbank.org/v2/en/indicator/se.ter.enrr?downloadformat=csv'
filename='se.ter.enrr_Indicator_en_csv_v2.csv'
r = requests.get(url)
z = zipfile.ZipFile(StringIO.StringIO(r.content))
ger3=pd.read_csv(z.open(filename),skiprows=[0,1]).drop('Unnamed: 58',axis=1).drop('Indicator Code',axis=1)
ger3.head(2)
Country Name | Country Code | Indicator Name | 1961 | 1962 | 1963 | 1964 | 1965 | 1966 | 1967 | ... | 2005 | 2006 | 2007 | 2008 | 2009 | 2010 | 2011 | 2012 | 2013 | 2014 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Aruba | ABW | School enrollment, tertiary (% gross) | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | 31.68347 | 30.90319 | 33.92613 | 35.24049 | 35.36232 | 37.35172 | 38.73762 | 37.76113 | NaN | NaN |
1 | Andorra | AND | School enrollment, tertiary (% gross) | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2 rows × 57 columns
ids=pd.read_csv('http://bl.ocks.org/d/4090846/world-country-names.tsv',sep='\t').set_index(['name'],drop=True)
ids.head()
id | |
---|---|
name | |
Northern Cyprus | -1 |
Kosovo | -2 |
Somaliland | -3 |
Afghanistan | 4 |
Albania | 8 |
def country_name_converter(country):
if country=="Venezuela, Bolivarian Republic of": return "Venezuela (Bolivarian Republic of)"
elif country=="Tanzania, United Republic of": return "Tanzania (United Republic of)"
elif country=="Moldova, Republic of": return "Moldova (Republic of)"
elif country=="Micronesia, Federated States of": return "Micronesia (Federated States of)"
elif country=="Macedonia, the former Yugoslav Republic of": return "The former Yugoslav Republic of Macedonia"
elif country=="Korea, Republic of": return "Korea (Republic of)"
elif country=="Korea, Democratic People's Republic of": return "Korea (Democratic People's Rep. of)"
elif country=="Côte d'Ivoire": return "C\xc3\xb4te d'Ivoire"
elif country=="Iran, Islamic Republic of": return "Iran (Islamic Republic of)"
elif country=="Hong Kong": return "Hong Kong, China (SAR)"
elif country=="Palestinian Territory, Occupied": return "Palestine, State of"
elif country=="Congo, the Democratic Republic of the": return "Congo (Democratic Republic of the)"
elif country=="Bolivia, Plurinational State of": return "Bolivia (Plurinational State of)"
else: return country
import re
codes={}
for i in ids.index:
try:
a=[i]
a.append(round(float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\
[u"Human Development Index (HDI) Value, 2013"])),3))
a.append(round((float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\
[u"Life expectancy at birth (years), 2013"]))-20)/(85-20),3))
a.append(round((float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\
[u"Mean years of schooling (years), 2012 a"]))/15+\
float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\
[u"Expected years of schooling (years), 2012 a"]))/18)/2,3))
a.append(round((np.log(float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\
[u"Gross national income (GNI) per capita (2011 PPP $), 2013"])))-np.log(100))\
/(np.log(75000)-np.log(100)),3))
a.append(round((a[2]*a[3]*a[4])**(1.0/3.0),3))
codes[repr(ids.loc[i][0])]=a
except: pass
import json
file('hdi2.json','w').write(json.dumps(codes))