#!/usr/bin/env python # coding: utf-8 # In[1]: import numpy as np, requests, pandas as pd, zipfile, StringIO # In[2]: url='http://api.worldbank.org/v2/en/indicator/ny.gdp.pcap.pp.kd?downloadformat=csv' filename='ny.gdp.pcap.pp.kd_Indicator_en_csv_v2.csv' r = requests.get(url) z = zipfile.ZipFile(StringIO.StringIO(r.content)) gdp=pd.read_csv(z.open(filename),skiprows=[0,1]).drop('Unnamed: 58',axis=1).drop('Indicator Code',axis=1) gdp.head(2) # In[3]: url='http://api.worldbank.org/v2/en/indicator/ny.gnp.pcap.pp.kd?downloadformat=csv' filename='ny.gnp.pcap.pp.kd_Indicator_en_csv_v2.csv' r = requests.get(url) z = zipfile.ZipFile(StringIO.StringIO(r.content)) gnp=pd.read_csv(z.open(filename),skiprows=[0,1]).drop('Unnamed: 58',axis=1).drop('Indicator Code',axis=1) gnp.head(2) # In[51]: url='http://api.worldbank.org/v2/en/indicator/sp.dyn.le00.in?downloadformat=csv' filename='sp.dyn.le00.in_Indicator_en_csv_v2.csv' r = requests.get(url) z = zipfile.ZipFile(StringIO.StringIO(r.content)) le=pd.read_csv(z.open(filename),skiprows=[0,1]).drop('Unnamed: 58',axis=1).drop('Indicator Code',axis=1) le.head(2) # In[53]: url='http://api.worldbank.org/v2/en/indicator/se.adt.litr.zs?downloadformat=csv' filename='se.adt.litr.zs_Indicator_en_csv_v2.csv' r = requests.get(url) z = zipfile.ZipFile(StringIO.StringIO(r.content)) alr=pd.read_csv(z.open(filename),skiprows=[0,1]).drop('Unnamed: 58',axis=1).drop('Indicator Code',axis=1) alr.head(2) # In[54]: url='http://api.worldbank.org/v2/en/indicator/se.prm.enrr?downloadformat=csv' filename='se.prm.enrr_Indicator_en_csv_v2.csv' r = requests.get(url) z = zipfile.ZipFile(StringIO.StringIO(r.content)) ger1=pd.read_csv(z.open(filename),skiprows=[0,1]).drop('Unnamed: 58',axis=1).drop('Indicator Code',axis=1) ger1.head(2) # In[55]: url='http://api.worldbank.org/v2/en/indicator/se.sec.enrr?downloadformat=csv' filename='se.sec.enrr_Indicator_en_csv_v2.csv' r = requests.get(url) z = zipfile.ZipFile(StringIO.StringIO(r.content)) ger2=pd.read_csv(z.open(filename),skiprows=[0,1]).drop('Unnamed: 58',axis=1).drop('Indicator Code',axis=1) ger2.head(2) # In[56]: url='http://api.worldbank.org/v2/en/indicator/se.ter.enrr?downloadformat=csv' filename='se.ter.enrr_Indicator_en_csv_v2.csv' r = requests.get(url) z = zipfile.ZipFile(StringIO.StringIO(r.content)) ger3=pd.read_csv(z.open(filename),skiprows=[0,1]).drop('Unnamed: 58',axis=1).drop('Indicator Code',axis=1) ger3.head(2) # In[ ]: # In[3]: ids=pd.read_csv('http://bl.ocks.org/d/4090846/world-country-names.tsv',sep='\t').set_index(['name'],drop=True) ids.head() # In[ ]: # In[4]: def country_name_converter(country): if country=="Venezuela, Bolivarian Republic of": return "Venezuela (Bolivarian Republic of)" elif country=="Tanzania, United Republic of": return "Tanzania (United Republic of)" elif country=="Moldova, Republic of": return "Moldova (Republic of)" elif country=="Micronesia, Federated States of": return "Micronesia (Federated States of)" elif country=="Macedonia, the former Yugoslav Republic of": return "The former Yugoslav Republic of Macedonia" elif country=="Korea, Republic of": return "Korea (Republic of)" elif country=="Korea, Democratic People's Republic of": return "Korea (Democratic People's Rep. of)" elif country=="Côte d'Ivoire": return "C\xc3\xb4te d'Ivoire" elif country=="Iran, Islamic Republic of": return "Iran (Islamic Republic of)" elif country=="Hong Kong": return "Hong Kong, China (SAR)" elif country=="Palestinian Territory, Occupied": return "Palestine, State of" elif country=="Congo, the Democratic Republic of the": return "Congo (Democratic Republic of the)" elif country=="Bolivia, Plurinational State of": return "Bolivia (Plurinational State of)" else: return country # In[6]: import re codes={} for i in ids.index: try: a=[i] a.append(round(float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\ [u"Human Development Index (HDI) Value, 2013"])),3)) a.append(round((float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\ [u"Life expectancy at birth (years), 2013"]))-20)/(85-20),3)) a.append(round((float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\ [u"Mean years of schooling (years), 2012 a"]))/15+\ float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\ [u"Expected years of schooling (years), 2012 a"]))/18)/2,3)) a.append(round((np.log(float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\ [u"Gross national income (GNI) per capita (2011 PPP $), 2013"])))-np.log(100))\ /(np.log(75000)-np.log(100)),3)) a.append(round((a[2]*a[3]*a[4])**(1.0/3.0),3)) codes[repr(ids.loc[i][0])]=a except: pass # In[8]: import json file('hdi2.json','w').write(json.dumps(codes))