In [1]:
import numpy as np, requests, pandas as pd
In [2]:
url='http://hdr.undp.org/en/content/table-1-human-development-index-and-its-components'
r=requests.get(url)
hdi=pd.read_html(r.content,header=0,infer_types=False)[0].drop('Change in rank, 2012-2013',axis=1).set_index(['Country'],drop=True)
hdi.head()
C:\Program Files\Anaconda\lib\site-packages\pandas\io\html.py:841: FutureWarning: infer_types will have no effect in 0.14
  warnings.warn("infer_types will have no effect in 0.14", FutureWarning)
Out[2]:
HDI rank Human Development Index (HDI) Value, 2013 Life expectancy at birth (years), 2013 Mean years of schooling (years), 2012 a Expected years of schooling (years), 2012 a Gross national income (GNI) per capita (2011 PPP $), 2013 Human Development Index (HDI) Value, 2012
Country
Very high human development nan nan nan nan nan nan nan
Norway 1.0 0.944 81.5 12.6 17.6 63909 0.943
Australia 2.0 0.933 82.5 12.8 19.9 41524 0.931
Switzerland 3.0 0.917 82.6 12.2 15.7 53762 0.916
Netherlands 4.0 0.915 81.0 11.9 17.9 42397 0.915
In [3]:
ids=pd.read_csv('http://bl.ocks.org/d/4090846/world-country-names.tsv',sep='\t').set_index(['name'],drop=True)
ids.head()
Out[3]:
id
name
Northern Cyprus -1
Kosovo -2
Somaliland -3
Afghanistan 4
Albania 8
In [4]:
def country_name_converter(country):
    if country=="Venezuela, Bolivarian Republic of": return "Venezuela (Bolivarian Republic of)"
    elif country=="Tanzania, United Republic of": return "Tanzania (United Republic of)"
    elif country=="Moldova, Republic of": return "Moldova (Republic of)"
    elif country=="Micronesia, Federated States of": return "Micronesia (Federated States of)"
    elif country=="Macedonia, the former Yugoslav Republic of": return "The former Yugoslav Republic of Macedonia"
    elif country=="Korea, Republic of": return "Korea (Republic of)"
    elif country=="Korea, Democratic People's Republic of": return "Korea (Democratic People's Rep. of)"
    elif country=="Côte d'Ivoire": return "C\xc3\xb4te d'Ivoire"
    elif country=="Iran, Islamic Republic of": return "Iran (Islamic Republic of)"
    elif country=="Hong Kong": return "Hong Kong, China (SAR)"
    elif country=="Palestinian Territory, Occupied": return "Palestine, State of"
    elif country=="Congo, the Democratic Republic of the": return "Congo (Democratic Republic of the)"
    elif country=="Bolivia, Plurinational State of": return "Bolivia (Plurinational State of)"
    else: return country
In [94]:
import re
codes={}
for i in ids.index:
    try: 
        a=[i]
        a.append(round(float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\
                                    ["Human Development Index (HDI) Value, 2013"])),3))
        a.append(round((float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\
                                     ["Life expectancy at birth (years), 2013"]))-20)/(85-20),3))
        a.append(round((float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\
                                     ["Mean years of schooling (years), 2012 a"]))/15+\
                  float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\
                               ["Expected years of schooling (years), 2012 a"]))/18)/2,3))
        a.append(round((np.log(float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\
                        ["Gross national income (GNI) per capita (2011 PPP $), 2013"])))-np.log(100))\
                        /(np.log(75000)-np.log(100)),3))
        a.append(round((a[2]*a[3]*a[4])**(1.0/3.0),3))
        codes[repr(ids.loc[i][0])]=a
    except: pass
In [96]:
import json
file('../hdi.json','w').write(json.dumps(codes))