#!/usr/bin/env python # coding: utf-8 # In[1]: import numpy as np, requests, pandas as pd # In[2]: url='http://hdr.undp.org/en/content/table-1-human-development-index-and-its-components' r=requests.get(url) hdi=pd.read_html(r.content,header=0,infer_types=False)[0].drop('Change in rank, 2012-2013',axis=1).set_index(['Country'],drop=True) hdi.head() # In[3]: ids=pd.read_csv('http://bl.ocks.org/d/4090846/world-country-names.tsv',sep='\t').set_index(['name'],drop=True) ids.head() # In[4]: def country_name_converter(country): if country=="Venezuela, Bolivarian Republic of": return "Venezuela (Bolivarian Republic of)" elif country=="Tanzania, United Republic of": return "Tanzania (United Republic of)" elif country=="Moldova, Republic of": return "Moldova (Republic of)" elif country=="Micronesia, Federated States of": return "Micronesia (Federated States of)" elif country=="Macedonia, the former Yugoslav Republic of": return "The former Yugoslav Republic of Macedonia" elif country=="Korea, Republic of": return "Korea (Republic of)" elif country=="Korea, Democratic People's Republic of": return "Korea (Democratic People's Rep. of)" elif country=="Côte d'Ivoire": return "C\xc3\xb4te d'Ivoire" elif country=="Iran, Islamic Republic of": return "Iran (Islamic Republic of)" elif country=="Hong Kong": return "Hong Kong, China (SAR)" elif country=="Palestinian Territory, Occupied": return "Palestine, State of" elif country=="Congo, the Democratic Republic of the": return "Congo (Democratic Republic of the)" elif country=="Bolivia, Plurinational State of": return "Bolivia (Plurinational State of)" else: return country # In[94]: import re codes={} for i in ids.index: try: a=[i] a.append(round(float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\ ["Human Development Index (HDI) Value, 2013"])),3)) a.append(round((float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\ ["Life expectancy at birth (years), 2013"]))-20)/(85-20),3)) a.append(round((float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\ ["Mean years of schooling (years), 2012 a"]))/15+\ float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\ ["Expected years of schooling (years), 2012 a"]))/18)/2,3)) a.append(round((np.log(float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\ ["Gross national income (GNI) per capita (2011 PPP $), 2013"])))-np.log(100))\ /(np.log(75000)-np.log(100)),3)) a.append(round((a[2]*a[3]*a[4])**(1.0/3.0),3)) codes[repr(ids.loc[i][0])]=a except: pass # In[96]: import json file('../hdi.json','w').write(json.dumps(codes))