In [2]:
import numpy as np, requests, pandas as pd, zipfile, StringIO
In [3]:
countries=pd.read_html('http://www.geonames.org/countries/',header=0,infer_types=False)[1]
countries.columns=['ISO2','ISO3','ISONUM','FIPS','Country','Capital','Area','Population','Continent']
countries.set_index('Country',drop=True,inplace=True)
countries.head(5)
C:\Anaconda\lib\site-packages\pandas\io\html.py:841: FutureWarning: infer_types will have no effect in 0.14
  warnings.warn("infer_types will have no effect in 0.14", FutureWarning)
Out[3]:
ISO2 ISO3 ISONUM FIPS Capital Area Population Continent
Country
Andorra AD AND 20 AN Andorra la Vella 468.0 84000 EU
United Arab Emirates AE ARE 784 AE Abu Dhabi 82880.0 4975593 AS
Afghanistan AF AFG 4 AF Kabul 647500.0 29121286 AS
Antigua and Barbuda AG ATG 28 AC St. John's 443.0 86754 nan
Anguilla AI AIA 660 AV The Valley 102.0 13254 nan
In [4]:
hdi=pd.read_excel('ihdi_series_cartagena.xlsx',sheetname=u'LEx, EDUx, INCx',header=1).dropna(axis=1, how='all').set_index('country')
hdi.head()
Out[4]:
iso3 lex2010 lex2011 lex2012 lex2013 edux2010 edux2011 edux2012 edux2013 gnix2010 gnix2011 gnix2012 gnix2013
country
Afghanistan AFG 0.609 0.617 0.623 0.630 0.357000 0.365333 0.365333 0.365333 0.425934 0.427753 0.443368 0.445065
Albania ALB 0.874 0.877 0.880 0.883 0.601675 0.608519 0.608519 0.608519 0.674701 0.680974 0.680882 0.683453
Algeria DZA 0.779 0.781 0.783 0.785 0.631478 0.642589 0.642589 0.642589 0.725495 0.729289 0.727595 0.730002
Andorra AND 0.936 0.937 0.939 0.941 0.670287 0.670287 0.670287 0.670287 0.918319 0.912079 0.908631 0.907283
Angola AGO 0.472 0.478 0.485 0.491 0.440879 0.474212 0.474212 0.474212 0.614749 0.625145 0.624848 0.626393
In [5]:
ihdi=pd.read_excel('ihdi_series_cartagena.xlsx',sheetname=u'iLEx, iEDUx,iINCx',header=1).dropna(axis=1, how='all').set_index('country')
ihdi.head()
Out[5]:
iso3 ilex2010 ilex2011 ilex2012 ilex2013 iedux2010 iedux2011 iedux2012 iedux2013 ignix2010 ignix2011 ignix2012 ignix2013
country
Afghanistan AFG 0.250908 0.302947 0.305893 0.413910 0.216699 0.221757 0.221757 0.200933 NaN NaN NaN 0.396998
Albania ALB 0.778734 0.778776 0.781440 0.795583 0.525262 0.536106 0.536106 0.536106 0.577544 0.556356 0.556281 0.558381
Algeria DZA 0.639559 0.667755 0.669465 0.653905 NaN NaN NaN NaN NaN NaN NaN NaN
Andorra AND NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
Angola AGO 0.218536 0.257642 0.261415 0.264158 0.325369 NaN 0.310135 0.310135 0.390980 0.312572 0.312424 0.313197
In [6]:
gii=pd.read_excel('gii_series_cartagena.xlsx',sheetname=u'GII',header=2).dropna(axis=1, how='all').set_index('Country')
gii.head()
Out[6]:
iso3 GII1995 GII2000 GII2005 GII2010 GII2011 GII2012 GII2013
Country
Afghanistan AFG NaN NaN 0.743191 0.720173 0.706641 0.712844 0.705302
Albania ALB 0.661140 NaN 0.366596 0.268824 0.271414 0.251003 0.245376
Algeria DZA 0.594158 0.621349 0.551894 0.505261 0.412363 0.471756 0.425258
Andorra AND NaN NaN NaN NaN NaN NaN NaN
Angola AGO NaN NaN NaN NaN NaN NaN NaN
In [7]:
cc={
'Bolivia (Plurinational State of)':'Bolivia',
'Brunei Darussalam':'Brunei',
'Congo':'Republic of the Congo',
'Congo (Democratic Republic of the)':'Democratic Republic of the Congo',
"Cote d'Ivoire":'Ivory Coast',
'Hong Kong, China (SAR)':'Hong Kong',
'Iran (Islamic Republic of)':'Iran',
"Korea (Democratic People's Republic of)":'North Korea',
'Korea (Republic of)':'South Korea',
"Lao People's Democratic Republic":'Laos',
'Libyan Arab Jamahiriya':'Libya',
'Micronesia (Federated States of)':'Micronesia',
'Moldova (Republic of)':'Moldova',
'Myanmar':'Myanmar [Burma]',
'Palestine (State of)':'Palestine',
'Russian Federation':'Russia',
'Sao Tome and Principe':u'São Tomé and Príncipe',
'Syrian Arab Republic':'Syria',
'Tanzania (United Republic of)':'Tanzania',
'The former Yugoslav Republic of Macedonia':'Macedonia',
'Timor-Leste':'East Timor',
'Venezuela (Bolivarian Republic of)':'Venezuela'
}
cc2={
"Cote d'Ivoire":u"Côte d'Ivoire",
"Korea (Democratic People's Republic of)":"Korea (Democratic People's Rep. of)",
'Palestine (State of)':'Palestine, State of',
'Vietnam':'Viet Nam',
'Libyan Arab Jamahiriya':'Libya'
}
cc3={
'Libyan Arab Jamahiriya':'Libya'
}
cc4={
"Korea (Democratic People's Republic of)":"Korea (Democratic People's Rep. of)",
'Libyan Arab Jamahiriya':'Libya'
}

continent_converter={
'EU':'Europe',
'AS':'Asia',
'SA':'South America',
'nan':'North America',
'AN':'Antarctica',
'AF':'Africa',
'OC':'Oceania'
}

def country_name_converter(country):
    if country in cc: return cc[country]
    else: return country
def country_name_converter2(country):
    if country in cc2: return cc2[country]
    else: return country
def country_name_converter3(country):
    if country in cc3: return cc3[country]
    else: return country
def country_name_converter4(country):
    if country in cc4: return cc4[country]
    else: return country
In [8]:
data={}
for country in ihdi.index: 
    if country_name_converter(country) in countries.index:
        icountry=country_name_converter(country)
        
        data[countries.loc[icountry]['ISONUM']]={}
        data[countries.loc[icountry]['ISONUM']]['name']=icountry
        data[countries.loc[icountry]['ISONUM']]['code']=countries.loc[icountry]['ISO3']
        data[countries.loc[icountry]['ISONUM']]['population']=countries.loc[icountry]['Population']
        data[countries.loc[icountry]['ISONUM']]['area']=countries.loc[icountry]['Area']
        data[countries.loc[icountry]['ISONUM']]['continent']=continent_converter[countries.loc[icountry]['Continent']]
        data[countries.loc[icountry]['ISONUM']]['hdi']={}
        data[countries.loc[icountry]['ISONUM']]['ihdi']={}
        data[countries.loc[icountry]['ISONUM']]['gii']={}
        for year in range(2010,2014):
            try:
                data[countries.loc[icountry]['ISONUM']]['hdi'][year]={}
                if np.isnan(hdi.loc[country]['edux'+repr(year)]):
                    data[countries.loc[icountry]['ISONUM']]['hdi'][year]['edu']="NaN"
                else: data[countries.loc[icountry]['ISONUM']]['hdi'][year]['edu']=round(hdi.loc[country]['edux'+repr(year)],3)
                if np.isnan(hdi.loc[country]['lex'+repr(year)]):
                    data[countries.loc[icountry]['ISONUM']]['hdi'][year]['le']="NaN"
                else: data[countries.loc[icountry]['ISONUM']]['hdi'][year]['le']=round(hdi.loc[country]['lex'+repr(year)],3)
                if np.isnan(hdi.loc[country]['gnix'+repr(year)]):
                    data[countries.loc[icountry]['ISONUM']]['hdi'][year]['gni']="NaN"
                else: data[countries.loc[icountry]['ISONUM']]['hdi'][year]['gni']=round(hdi.loc[country]['gnix'+repr(year)],3)
            except: print country,'hdi'
            try:
                data[countries.loc[icountry]['ISONUM']]['ihdi'][year]={}
                if np.isnan(ihdi.loc[country]['iedux'+repr(year)]):
                    data[countries.loc[icountry]['ISONUM']]['ihdi'][year]['edu']="NaN"
                else: data[countries.loc[icountry]['ISONUM']]['ihdi'][year]['edu']=round(ihdi.loc[country]['iedux'+repr(year)],3)
                if np.isnan(ihdi.loc[country]['ilex'+repr(year)]):
                    data[countries.loc[icountry]['ISONUM']]['ihdi'][year]['le']="NaN"
                else: data[countries.loc[icountry]['ISONUM']]['ihdi'][year]['le']=round(ihdi.loc[country]['ilex'+repr(year)],3)
                if np.isnan(ihdi.loc[country]['ignix'+repr(year)]):
                    data[countries.loc[icountry]['ISONUM']]['ihdi'][year]['gni']="NaN"
                else: data[countries.loc[icountry]['ISONUM']]['ihdi'][year]['gni']=round(ihdi.loc[country]['ignix'+repr(year)],3)
            except:
                print country,'ihdi'
            try:
                gcountry=country_name_converter2(country)
                if np.isnan(gii.loc[gcountry]['GII'+repr(year)]):
                    data[countries.loc[icountry]['ISONUM']]['gii'][year]="NaN"
                else: data[countries.loc[icountry]['ISONUM']]['gii'][year]=round(gii.loc[gcountry]['GII'+repr(year)],3)
            except:
                print country,'gii'
In [33]:
import json
file('data.json','w').write(json.dumps(data))
In [9]:
GNI=pd.read_excel('hdi_series_cartagena.xlsx',sheetname=u'Gross National Income',header=2).set_index('Country')
GNI.head()
Out[9]:
iso3 GNI1980 GNI1981 GNI1982 GNI1983 GNI1984 GNI1985 GNI1986 GNI1987 GNI1988 ... GNI2004 GNI2005 GNI2006 GNI2007 GNI2008 GNI2009 GNI2010 GNI2011 GNI2012 GNI2013
Country
Afghanistan AFG 2548.254832 2466.626434 2562.737937 2626.621422 2844.072554 2990.928165 3083.504539 3250.936824 2969.797438 ... 1125.869194 1125.857560 1202.399608 1263.661214 1432.486514 1454.914569 1677.201601 1697.523824 1882.395509 1903.656673
Albania ALB 3206.926814 3232.740139 3339.181470 3359.854185 3319.652177 3205.344390 3178.175640 3264.309099 3150.099437 ... 6553.656875 6920.969736 7373.477016 7867.466696 8350.808162 8400.746220 8705.765308 9074.926805 9069.389993 9225.050846
Algeria DZA 10215.030188 10039.512129 10014.561086 10313.216799 10560.014313 10928.949084 11025.723938 10849.097215 10724.071343 ... 10879.228521 11133.509897 11577.415445 11650.490699 12153.510881 12227.579531 12185.446764 12495.444586 12356.046736 12554.571986
Andorra AND 31812.140284 31393.180239 30148.593193 29267.883014 28510.312997 27767.417852 27206.021185 26932.811055 27286.989362 ... 33749.938605 34385.304454 35450.339111 37555.137778 38365.363439 40870.160552 43673.948833 41906.643301 40960.901628 40597.124810
Angola AGO 3259.908578 3253.536712 3010.757634 2912.914398 2897.972093 2946.294587 2902.630135 2901.611617 3104.058261 ... 3057.616822 3878.839822 5041.274640 5662.865490 6271.567560 5246.697802 5853.837614 6270.903706 6258.566318 6322.938484

5 rows × 35 columns

In [10]:
LE=pd.read_excel('hdi_series_cartagena.xlsx',sheetname=u'Life Expectency',header=2).set_index('Country')
LE.head()
Out[10]:
iso3 Life_Exp1980 Life_Exp1981 Life_Exp1982 Life_Exp1983 Life_Exp1984 Life_Exp1985 Life_Exp1986 Life_Exp1987 Life_Exp1988 ... Life_Exp2004 Life_Exp2005 Life_Exp2006 Life_Exp2007 Life_Exp2008 Life_Exp2009 Life_Exp2010 Life_Exp2011 Life_Exp2012 Life_Exp2013
Country
Afghanistan AFG 41.242 41.770 42.347 42.977 43.661 44.400 45.192 46.024 46.880 ... 56.583 57.071 57.582 58.102 58.618 59.124 59.612 60.079 60.524 60.947
Albania ALB 70.218 70.426 70.646 70.886 71.144 71.398 71.615 71.770 71.853 ... 75.725 75.949 76.124 76.278 76.433 76.598 76.780 76.979 77.185 77.392
Algeria DZA 58.198 59.524 60.826 62.051 63.160 64.120 64.911 65.554 66.072 ... 69.682 69.854 70.020 70.180 70.332 70.477 70.615 70.747 70.874 71.000
Andorra AND 74.856 75.115 75.344 75.541 75.707 75.848 75.973 76.094 76.222 ... 80.005 80.192 80.343 80.471 80.589 80.703 80.818 80.935 81.054 81.173
Angola AGO 40.182 40.311 40.429 40.547 40.671 40.794 40.902 40.988 41.050 ... 48.036 48.572 49.041 49.471 49.882 50.286 50.689 51.094 51.498 51.899

5 rows × 35 columns

In [11]:
EYS=pd.read_excel('hdi_series_cartagena.xlsx',sheetname=u'Expected Years of Schooling',header=2).set_index('Country')
EYS.head()
Out[11]:
iso3 EYS1980 EYS1981 EYS1982 EYS1983 EYS1984 EYS1985 EYS1986 EYS1987 EYS1988 ... EYS2004 EYS2005 EYS2006 EYS2007 EYS2008 EYS2009 EYS2010 EYS2011 EYS2012 EYS2013
Country
Afghanistan AFG 1.80000 1.80000 1.80000 1.97500 2.15000 2.32500 2.50000 2.52500 2.55000 ... 7.50000 7.74000 7.98000 8.22000 8.46000 8.70000 9.00000 9.30000 9.30000 9.30000
Albania ALB 11.30000 11.30000 10.80000 10.30000 10.30000 10.50000 10.60000 10.40000 10.40000 ... 10.80000 10.80000 10.80000 10.80000 10.80000 10.80000 10.80000 10.80000 10.80000 10.80000
Algeria DZA 9.40000 9.40000 9.40000 9.40000 9.40000 9.40000 9.40000 9.40000 9.60000 ... 11.80000 12.00000 12.10000 12.30000 12.75000 13.20000 13.60000 14.00000 14.00000 14.00000
Andorra AND 10.79878 10.79878 10.79878 10.79878 10.79878 10.79878 10.79878 10.79878 10.79878 ... 10.79199 10.82593 11.18277 11.18277 11.67192 11.67192 11.67192 11.67192 11.67192 11.67192
Angola AGO 4.20000 4.20000 4.20000 4.20000 4.20000 4.20000 4.20000 4.20000 4.20000 ... 7.35000 7.82500 8.30000 8.77500 9.25000 9.72500 10.20000 11.40000 11.40000 11.40000

5 rows × 35 columns

In [63]:
MYS=pd.read_excel('hdi_series_cartagena.xlsx',sheetname=u'Mean Years of Schooling',header=2).set_index('Country')
MYS.head()
#fux db
MYS.loc["Vanuatu","MYS1980"]=np.NaN
In [16]:
data2={}
for country in GNI.index: 
    if country_name_converter(country) in countries.index:
        icountry=country_name_converter(country)
        
        data2[countries.loc[icountry]['ISONUM']]={}
        data2[countries.loc[icountry]['ISONUM']]['name']=icountry
        data2[countries.loc[icountry]['ISONUM']]['code']=countries.loc[icountry]['ISO3']
        data2[countries.loc[icountry]['ISONUM']]['population']=countries.loc[icountry]['Population']
        data2[countries.loc[icountry]['ISONUM']]['area']=countries.loc[icountry]['Area']
        data2[countries.loc[icountry]['ISONUM']]['continent']=continent_converter[countries.loc[icountry]['Continent']]
        data2[countries.loc[icountry]['ISONUM']]['hdi']={}
        for year in range(1980,2014):
            try:
                data2[countries.loc[icountry]['ISONUM']]['hdi'][year]={}
                data2[countries.loc[icountry]['ISONUM']]['hdi'][year]['edu']=\
                    round((MYS.loc[country_name_converter3(country)]['MYS'+repr(year)]/15+\
                    EYS.loc[country_name_converter3(country)]['EYS'+repr(year)]/18)/2,3)
                data2[countries.loc[icountry]['ISONUM']]['hdi'][year]['le']=\
                    round((LE.loc[country_name_converter4(country)]['Life_Exp'+repr(year)]-20)/(85-20),3)
                data2[countries.loc[icountry]['ISONUM']]['hdi'][year]['gni']=\
                    round((np.log(GNI.loc[country]['GNI'+repr(year)])-np.log(100))/(np.log(75000)-np.log(100)),3)
            except: print country,'hdi',year
Vanuatu hdi 1980
In [231]:
import json
file('data2.json','w').write(json.dumps(data2))
In [64]:
for country in GNI.index: 
    if country_name_converter(country) in countries.index:
        icountry=country_name_converter(country)
        for year in range(1980,2010):
            try:
                data[countries.loc[icountry]['ISONUM']]['hdi'][year]={}
                data[countries.loc[icountry]['ISONUM']]['hdi'][year]['edu']=\
                    round((MYS.loc[country_name_converter3(country)]['MYS'+repr(year)]/15+\
                    EYS.loc[country_name_converter3(country)]['EYS'+repr(year)]/18)/2,3)
                data[countries.loc[icountry]['ISONUM']]['hdi'][year]['le']=\
                    round((LE.loc[country_name_converter4(country)]['Life_Exp'+repr(year)]-20)/(85-20),3)
                data[countries.loc[icountry]['ISONUM']]['hdi'][year]['gni']=\
                    round((np.log(GNI.loc[country]['GNI'+repr(year)])-np.log(100))/(np.log(75000)-np.log(100)),3)
                if np.isnan(data[countries.loc[icountry]['ISONUM']]['hdi'][year]['edu']):
                    data[countries.loc[icountry]['ISONUM']]['hdi'][year]['edu']="NaN"
                if np.isnan(data[countries.loc[icountry]['ISONUM']]['hdi'][year]['le']):
                    data[countries.loc[icountry]['ISONUM']]['hdi'][year]['le']="NaN"
                if np.isnan(data[countries.loc[icountry]['ISONUM']]['hdi'][year]['gni']):
                    data[countries.loc[icountry]['ISONUM']]['hdi'][year]['gni']="NaN"
                
                data[countries.loc[icountry]['ISONUM']]['ihdi'][year]={'edu':"NaN",'le':"NaN",'gni':"NaN"}
                
                data[countries.loc[icountry]['ISONUM']]['gii'][year]="NaN"
                if year in [1995,2000,2005]:
                    try:
                        gcountry=country_name_converter2(country)
                        if np.isnan(gii.loc[gcountry]['GII'+repr(year)]):
                            data[countries.loc[icountry]['ISONUM']]['gii'][year]="NaN"
                        else: data[countries.loc[icountry]['ISONUM']]['gii'][year]=round(gii.loc[gcountry]['GII'+repr(year)],3)
                    except:
                        print country,'gii'
            except: print country,'error',year
In [18]:
import json
file('data3.json','w').write(json.dumps(data))
In [65]:
pop=pd.read_excel('http://esa.un.org/unpd/wpp/Excel-Data/EXCEL_FILES/1_Population/WPP2012_POP_F01_1_TOTAL_POPULATION_BOTH_SEXES.XLS',sheet="ESTIMATES",header=16)
In [66]:
pop=pop.set_index(['Country code'])
In [67]:
for i in data:
    data[i]['population']={}
    for year in range(1980,2011):
        data[i]['population'][year]=\
        pop.loc[(int)(i)].values[::-1][2010-year]*1000        
In [68]:
pop2=pd.read_excel('http://esa.un.org/unpd/wpp/Excel-Data/EXCEL_FILES/1_Population/WPP2012_POP_F01_1_TOTAL_POPULATION_BOTH_SEXES.XLS',sheetname="NO CHANGE",header=16)
In [69]:
pop2=pop2.set_index(['Country code'])
In [70]:
for i in data:
    for year in range(2011,2014):
        data[i]['population'][year]=\
        pop2.loc[(int)(i)].values[::-1][2100-year]*1000
In [71]:
import json
file('data3.json','w').write(json.dumps(data))