In [18]:
import numpy as np, requests, pandas as pd
In [24]:
countries=pd.read_html('http://www.geonames.org/countries/',header=0,infer_types=False)[1]
countries.columns=['ISO2','ISO3','ISONUM','FIPS','Country','Capital','Area','Population','Continent']
countries.set_index('Country',drop=True,inplace=True)
countries.head(5)
Out[24]:
ISO2 ISO3 ISONUM FIPS Capital Area Population Continent
Country
Andorra AD AND 20 AN Andorra la Vella 468.0 84000 EU
United Arab Emirates AE ARE 784 AE Abu Dhabi 82880.0 4975593 AS
Afghanistan AF AFG 4 AF Kabul 647500.0 29121286 AS
Antigua and Barbuda AG ATG 28 AC St. John's 443.0 86754 nan
Anguilla AI AIA 660 AV The Valley 102.0 13254 nan
In [35]:
codes={}
for i in countries.index:
    #codes[countries.loc[i]['ISONUM']]=[countries.loc[i]['Population'],countries.loc[i]['ISO3']]
    try: codes[int(countries.loc[i]['ISONUM'])]=[float(countries.loc[i]['Population']),str(countries.loc[i]['ISO3'])]
    except: pass
    
In [94]:
import re
codes={}
for i in ids.index:
    try: 
        a=[i]
        a.append(round(float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\
                                    ["Human Development Index (HDI) Value, 2013"])),3))
        a.append(round((float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\
                                     ["Life expectancy at birth (years), 2013"]))-20)/(85-20),3))
        a.append(round((float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\
                                     ["Mean years of schooling (years), 2012 a"]))/15+\
                  float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\
                               ["Expected years of schooling (years), 2012 a"]))/18)/2,3))
        a.append(round((np.log(float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\
                        ["Gross national income (GNI) per capita (2011 PPP $), 2013"])))-np.log(100))\
                        /(np.log(75000)-np.log(100)),3))
        a.append(round((a[2]*a[3]*a[4])**(1.0/3.0),3))
        a.append(round((a[2]*a[3]*a[4])**(1.0/3.0),3))
        codes[repr(ids.loc[i][0])]=a
    except: pass
In [37]:
import json
file('../pop.json','w').write(json.dumps(codes))