In [1]:
import numpy as np, requests, pandas as pd, zipfile, StringIO
In [2]:
url='http://api.worldbank.org/v2/en/indicator/ny.gdp.pcap.pp.kd?downloadformat=csv'
filename='ny.gdp.pcap.pp.kd_Indicator_en_csv_v2.csv'
r = requests.get(url)
z = zipfile.ZipFile(StringIO.StringIO(r.content))
gdp=pd.read_csv(z.open(filename),skiprows=[0,1]).drop('Unnamed: 58',axis=1).drop('Indicator Code',axis=1)
gdp.head(2)
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-2-39949019b48c> in <module>()
      3 r = requests.get(url)
      4 z = zipfile.ZipFile(StringIO.StringIO(r.content))
----> 5 gdp=pd.read_csv(z.open(filename),skiprows=[0,1]).drop('Unnamed: 58',axis=1).drop('Indicator Code',axis=1)
      6 gdp.head(2)

C:\Anaconda2\lib\zipfile.pyc in open(self, name, mode, pwd)
    959             else:
    960                 # Get info object for name
--> 961                 zinfo = self.getinfo(name)
    962 
    963             zef_file.seek(zinfo.header_offset, 0)

C:\Anaconda2\lib\zipfile.pyc in getinfo(self, name)
    907         if info is None:
    908             raise KeyError(
--> 909                 'There is no item named %r in the archive' % name)
    910 
    911         return info

KeyError: "There is no item named 'ny.gdp.pcap.pp.kd_Indicator_en_csv_v2.csv' in the archive"
In [3]:
url='http://api.worldbank.org/v2/en/indicator/ny.gnp.pcap.pp.kd?downloadformat=csv'
filename='ny.gnp.pcap.pp.kd_Indicator_en_csv_v2.csv'
r = requests.get(url)
z = zipfile.ZipFile(StringIO.StringIO(r.content))
gnp=pd.read_csv(z.open(filename),skiprows=[0,1]).drop('Unnamed: 58',axis=1).drop('Indicator Code',axis=1)
gnp.head(2)
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-3-82b5b2448674> in <module>()
      3 r = requests.get(url)
      4 z = zipfile.ZipFile(StringIO.StringIO(r.content))
----> 5 gnp=pd.read_csv(z.open(filename),skiprows=[0,1]).drop('Unnamed: 58',axis=1).drop('Indicator Code',axis=1)
      6 gnp.head(2)

C:\Anaconda2\lib\zipfile.pyc in open(self, name, mode, pwd)
    959             else:
    960                 # Get info object for name
--> 961                 zinfo = self.getinfo(name)
    962 
    963             zef_file.seek(zinfo.header_offset, 0)

C:\Anaconda2\lib\zipfile.pyc in getinfo(self, name)
    907         if info is None:
    908             raise KeyError(
--> 909                 'There is no item named %r in the archive' % name)
    910 
    911         return info

KeyError: "There is no item named 'ny.gnp.pcap.pp.kd_Indicator_en_csv_v2.csv' in the archive"
In [51]:
url='http://api.worldbank.org/v2/en/indicator/sp.dyn.le00.in?downloadformat=csv'
filename='sp.dyn.le00.in_Indicator_en_csv_v2.csv'
r = requests.get(url)
z = zipfile.ZipFile(StringIO.StringIO(r.content))
le=pd.read_csv(z.open(filename),skiprows=[0,1]).drop('Unnamed: 58',axis=1).drop('Indicator Code',axis=1)
le.head(2)
Out[51]:
Country Name Country Code Indicator Name 1961 1962 1963 1964 1965 1966 1967 ... 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014
0 Aruba ABW Life expectancy at birth, total (years) 65.988024 66.365537 66.713976 67.044293 67.369756 67.699 68.034683 ... 74.228073 74.375707 74.526244 74.67422 74.816146 74.952024 75.08039 75.206756 NaN NaN
1 Andorra AND Life expectancy at birth, total (years) NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

2 rows × 57 columns

In [53]:
url='http://api.worldbank.org/v2/en/indicator/se.adt.litr.zs?downloadformat=csv'
filename='se.adt.litr.zs_Indicator_en_csv_v2.csv'
r = requests.get(url)
z = zipfile.ZipFile(StringIO.StringIO(r.content))
alr=pd.read_csv(z.open(filename),skiprows=[0,1]).drop('Unnamed: 58',axis=1).drop('Indicator Code',axis=1)
alr.head(2)
Out[53]:
Country Name Country Code Indicator Name 1961 1962 1963 1964 1965 1966 1967 ... 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014
0 Aruba ABW Literacy rate, adult total (% of people ages 1... NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN 96.822639 NaN NaN NaN NaN
1 Andorra AND Literacy rate, adult total (% of people ages 1... NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

2 rows × 57 columns

In [54]:
url='http://api.worldbank.org/v2/en/indicator/se.prm.enrr?downloadformat=csv'
filename='se.prm.enrr_Indicator_en_csv_v2.csv'
r = requests.get(url)
z = zipfile.ZipFile(StringIO.StringIO(r.content))
ger1=pd.read_csv(z.open(filename),skiprows=[0,1]).drop('Unnamed: 58',axis=1).drop('Indicator Code',axis=1)
ger1.head(2)
Out[54]:
Country Name Country Code Indicator Name 1961 1962 1963 1964 1965 1966 1967 ... 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014
0 Aruba ABW School enrollment, primary (% gross) NaN NaN NaN NaN NaN NaN NaN ... 111.37672 114.23859 115.22441 113.07883 113.77574 113.72866 105.2194 104.06276 NaN NaN
1 Andorra AND School enrollment, primary (% gross) NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

2 rows × 57 columns

In [55]:
url='http://api.worldbank.org/v2/en/indicator/se.sec.enrr?downloadformat=csv'
filename='se.sec.enrr_Indicator_en_csv_v2.csv'
r = requests.get(url)
z = zipfile.ZipFile(StringIO.StringIO(r.content))
ger2=pd.read_csv(z.open(filename),skiprows=[0,1]).drop('Unnamed: 58',axis=1).drop('Indicator Code',axis=1)
ger2.head(2)
Out[55]:
Country Name Country Code Indicator Name 1961 1962 1963 1964 1965 1966 1967 ... 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014
0 Aruba ABW School enrollment, secondary (% gross) NaN NaN NaN NaN NaN NaN NaN ... 95.92882 97.35637 102.54636 94.64913 96.72344 95.83605 97.01512 99.98673 NaN NaN
1 Andorra AND School enrollment, secondary (% gross) NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

2 rows × 57 columns

In [56]:
url='http://api.worldbank.org/v2/en/indicator/se.ter.enrr?downloadformat=csv'
filename='se.ter.enrr_Indicator_en_csv_v2.csv'
r = requests.get(url)
z = zipfile.ZipFile(StringIO.StringIO(r.content))
ger3=pd.read_csv(z.open(filename),skiprows=[0,1]).drop('Unnamed: 58',axis=1).drop('Indicator Code',axis=1)
ger3.head(2)
Out[56]:
Country Name Country Code Indicator Name 1961 1962 1963 1964 1965 1966 1967 ... 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014
0 Aruba ABW School enrollment, tertiary (% gross) NaN NaN NaN NaN NaN NaN NaN ... 31.68347 30.90319 33.92613 35.24049 35.36232 37.35172 38.73762 37.76113 NaN NaN
1 Andorra AND School enrollment, tertiary (% gross) NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

2 rows × 57 columns

In [3]:
ids=pd.read_csv('http://bl.ocks.org/d/4090846/world-country-names.tsv',sep='\t').set_index(['name'],drop=True)
ids.head()
Out[3]:
id
name
Northern Cyprus -1
Kosovo -2
Somaliland -3
Afghanistan 4
Albania 8
In [4]:
def country_name_converter(country):
    if country=="Venezuela, Bolivarian Republic of": return "Venezuela (Bolivarian Republic of)"
    elif country=="Tanzania, United Republic of": return "Tanzania (United Republic of)"
    elif country=="Moldova, Republic of": return "Moldova (Republic of)"
    elif country=="Micronesia, Federated States of": return "Micronesia (Federated States of)"
    elif country=="Macedonia, the former Yugoslav Republic of": return "The former Yugoslav Republic of Macedonia"
    elif country=="Korea, Republic of": return "Korea (Republic of)"
    elif country=="Korea, Democratic People's Republic of": return "Korea (Democratic People's Rep. of)"
    elif country=="Côte d'Ivoire": return "C\xc3\xb4te d'Ivoire"
    elif country=="Iran, Islamic Republic of": return "Iran (Islamic Republic of)"
    elif country=="Hong Kong": return "Hong Kong, China (SAR)"
    elif country=="Palestinian Territory, Occupied": return "Palestine, State of"
    elif country=="Congo, the Democratic Republic of the": return "Congo (Democratic Republic of the)"
    elif country=="Bolivia, Plurinational State of": return "Bolivia (Plurinational State of)"
    else: return country
In [6]:
import re
codes={}
for i in ids.index:
    try: 
        a=[i]
        a.append(round(float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\
                                    [u"Human Development Index (HDI) Value, 2013"])),3))
        a.append(round((float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\
                                     [u"Life expectancy at birth (years), 2013"]))-20)/(85-20),3))
        a.append(round((float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\
                                     [u"Mean years of schooling (years), 2012 a"]))/15+\
                  float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\
                               [u"Expected years of schooling (years), 2012 a"]))/18)/2,3))
        a.append(round((np.log(float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\
                        [u"Gross national income (GNI) per capita (2011 PPP $), 2013"])))-np.log(100))\
                        /(np.log(75000)-np.log(100)),3))
        a.append(round((a[2]*a[3]*a[4])**(1.0/3.0),3))
        codes[repr(ids.loc[i][0])]=a
    except: pass
In [8]:
import json
file('hdi2.json','w').write(json.dumps(codes))