#!/usr/bin/env python
# coding: utf-8

# In[2]:


import numpy as np, requests, pandas as pd, zipfile, StringIO


# In[3]:


countries=pd.read_html('http://www.geonames.org/countries/',header=0,infer_types=False)[1]
countries.columns=['ISO2','ISO3','ISONUM','FIPS','Country','Capital','Area','Population','Continent']
countries.set_index('Country',drop=True,inplace=True)
countries.head(5)


# In[4]:


hdi=pd.read_excel('ihdi_series_cartagena.xlsx',sheetname=u'LEx, EDUx, INCx',header=1).dropna(axis=1, how='all').set_index('country')
hdi.head()


# In[5]:


ihdi=pd.read_excel('ihdi_series_cartagena.xlsx',sheetname=u'iLEx, iEDUx,iINCx',header=1).dropna(axis=1, how='all').set_index('country')
ihdi.head()


# In[6]:


gii=pd.read_excel('gii_series_cartagena.xlsx',sheetname=u'GII',header=2).dropna(axis=1, how='all').set_index('Country')
gii.head()


# In[7]:


cc={
'Bolivia (Plurinational State of)':'Bolivia',
'Brunei Darussalam':'Brunei',
'Congo':'Republic of the Congo',
'Congo (Democratic Republic of the)':'Democratic Republic of the Congo',
"Cote d'Ivoire":'Ivory Coast',
'Hong Kong, China (SAR)':'Hong Kong',
'Iran (Islamic Republic of)':'Iran',
"Korea (Democratic People's Republic of)":'North Korea',
'Korea (Republic of)':'South Korea',
"Lao People's Democratic Republic":'Laos',
'Libyan Arab Jamahiriya':'Libya',
'Micronesia (Federated States of)':'Micronesia',
'Moldova (Republic of)':'Moldova',
'Myanmar':'Myanmar [Burma]',
'Palestine (State of)':'Palestine',
'Russian Federation':'Russia',
'Sao Tome and Principe':u'SÃ£o TomÃ© and PrÃ­ncipe',
'Syrian Arab Republic':'Syria',
'Tanzania (United Republic of)':'Tanzania',
'The former Yugoslav Republic of Macedonia':'Macedonia',
'Timor-Leste':'East Timor',
'Venezuela (Bolivarian Republic of)':'Venezuela'
}
cc2={
"Cote d'Ivoire":u"Côte d'Ivoire",
"Korea (Democratic People's Republic of)":"Korea (Democratic People's Rep. of)",
'Palestine (State of)':'Palestine, State of',
'Vietnam':'Viet Nam',
'Libyan Arab Jamahiriya':'Libya'
}
cc3={
'Libyan Arab Jamahiriya':'Libya'
}
cc4={
"Korea (Democratic People's Republic of)":"Korea (Democratic People's Rep. of)",
'Libyan Arab Jamahiriya':'Libya'
}

continent_converter={
'EU':'Europe',
'AS':'Asia',
'SA':'South America',
'nan':'North America',
'AN':'Antarctica',
'AF':'Africa',
'OC':'Oceania'
}

def country_name_converter(country):
    if country in cc: return cc[country]
    else: return country
def country_name_converter2(country):
    if country in cc2: return cc2[country]
    else: return country
def country_name_converter3(country):
    if country in cc3: return cc3[country]
    else: return country
def country_name_converter4(country):
    if country in cc4: return cc4[country]
    else: return country


# In[8]:


data={}
for country in ihdi.index: 
    if country_name_converter(country) in countries.index:
        icountry=country_name_converter(country)
        
        data[countries.loc[icountry]['ISONUM']]={}
        data[countries.loc[icountry]['ISONUM']]['name']=icountry
        data[countries.loc[icountry]['ISONUM']]['code']=countries.loc[icountry]['ISO3']
        data[countries.loc[icountry]['ISONUM']]['population']=countries.loc[icountry]['Population']
        data[countries.loc[icountry]['ISONUM']]['area']=countries.loc[icountry]['Area']
        data[countries.loc[icountry]['ISONUM']]['continent']=continent_converter[countries.loc[icountry]['Continent']]
        data[countries.loc[icountry]['ISONUM']]['hdi']={}
        data[countries.loc[icountry]['ISONUM']]['ihdi']={}
        data[countries.loc[icountry]['ISONUM']]['gii']={}
        for year in range(2010,2014):
            try:
                data[countries.loc[icountry]['ISONUM']]['hdi'][year]={}
                if np.isnan(hdi.loc[country]['edux'+repr(year)]):
                    data[countries.loc[icountry]['ISONUM']]['hdi'][year]['edu']="NaN"
                else: data[countries.loc[icountry]['ISONUM']]['hdi'][year]['edu']=round(hdi.loc[country]['edux'+repr(year)],3)
                if np.isnan(hdi.loc[country]['lex'+repr(year)]):
                    data[countries.loc[icountry]['ISONUM']]['hdi'][year]['le']="NaN"
                else: data[countries.loc[icountry]['ISONUM']]['hdi'][year]['le']=round(hdi.loc[country]['lex'+repr(year)],3)
                if np.isnan(hdi.loc[country]['gnix'+repr(year)]):
                    data[countries.loc[icountry]['ISONUM']]['hdi'][year]['gni']="NaN"
                else: data[countries.loc[icountry]['ISONUM']]['hdi'][year]['gni']=round(hdi.loc[country]['gnix'+repr(year)],3)
            except: print country,'hdi'
            try:
                data[countries.loc[icountry]['ISONUM']]['ihdi'][year]={}
                if np.isnan(ihdi.loc[country]['iedux'+repr(year)]):
                    data[countries.loc[icountry]['ISONUM']]['ihdi'][year]['edu']="NaN"
                else: data[countries.loc[icountry]['ISONUM']]['ihdi'][year]['edu']=round(ihdi.loc[country]['iedux'+repr(year)],3)
                if np.isnan(ihdi.loc[country]['ilex'+repr(year)]):
                    data[countries.loc[icountry]['ISONUM']]['ihdi'][year]['le']="NaN"
                else: data[countries.loc[icountry]['ISONUM']]['ihdi'][year]['le']=round(ihdi.loc[country]['ilex'+repr(year)],3)
                if np.isnan(ihdi.loc[country]['ignix'+repr(year)]):
                    data[countries.loc[icountry]['ISONUM']]['ihdi'][year]['gni']="NaN"
                else: data[countries.loc[icountry]['ISONUM']]['ihdi'][year]['gni']=round(ihdi.loc[country]['ignix'+repr(year)],3)
            except:
                print country,'ihdi'
            try:
                gcountry=country_name_converter2(country)
                if np.isnan(gii.loc[gcountry]['GII'+repr(year)]):
                    data[countries.loc[icountry]['ISONUM']]['gii'][year]="NaN"
                else: data[countries.loc[icountry]['ISONUM']]['gii'][year]=round(gii.loc[gcountry]['GII'+repr(year)],3)
            except:
                print country,'gii'


# In[33]:


import json
file('data.json','w').write(json.dumps(data))


# In[9]:


GNI=pd.read_excel('hdi_series_cartagena.xlsx',sheetname=u'Gross National Income',header=2).set_index('Country')
GNI.head()


# In[10]:


LE=pd.read_excel('hdi_series_cartagena.xlsx',sheetname=u'Life Expectency',header=2).set_index('Country')
LE.head()


# In[11]:


EYS=pd.read_excel('hdi_series_cartagena.xlsx',sheetname=u'Expected Years of Schooling',header=2).set_index('Country')
EYS.head()


# In[63]:


MYS=pd.read_excel('hdi_series_cartagena.xlsx',sheetname=u'Mean Years of Schooling',header=2).set_index('Country')
MYS.head()
#fux db
MYS.loc["Vanuatu","MYS1980"]=np.NaN


# In[16]:


data2={}
for country in GNI.index: 
    if country_name_converter(country) in countries.index:
        icountry=country_name_converter(country)
        
        data2[countries.loc[icountry]['ISONUM']]={}
        data2[countries.loc[icountry]['ISONUM']]['name']=icountry
        data2[countries.loc[icountry]['ISONUM']]['code']=countries.loc[icountry]['ISO3']
        data2[countries.loc[icountry]['ISONUM']]['population']=countries.loc[icountry]['Population']
        data2[countries.loc[icountry]['ISONUM']]['area']=countries.loc[icountry]['Area']
        data2[countries.loc[icountry]['ISONUM']]['continent']=continent_converter[countries.loc[icountry]['Continent']]
        data2[countries.loc[icountry]['ISONUM']]['hdi']={}
        for year in range(1980,2014):
            try:
                data2[countries.loc[icountry]['ISONUM']]['hdi'][year]={}
                data2[countries.loc[icountry]['ISONUM']]['hdi'][year]['edu']=\
                    round((MYS.loc[country_name_converter3(country)]['MYS'+repr(year)]/15+\
                    EYS.loc[country_name_converter3(country)]['EYS'+repr(year)]/18)/2,3)
                data2[countries.loc[icountry]['ISONUM']]['hdi'][year]['le']=\
                    round((LE.loc[country_name_converter4(country)]['Life_Exp'+repr(year)]-20)/(85-20),3)
                data2[countries.loc[icountry]['ISONUM']]['hdi'][year]['gni']=\
                    round((np.log(GNI.loc[country]['GNI'+repr(year)])-np.log(100))/(np.log(75000)-np.log(100)),3)
            except: print country,'hdi',year


# In[231]:


import json
file('data2.json','w').write(json.dumps(data2))


# In[64]:


for country in GNI.index: 
    if country_name_converter(country) in countries.index:
        icountry=country_name_converter(country)
        for year in range(1980,2010):
            try:
                data[countries.loc[icountry]['ISONUM']]['hdi'][year]={}
                data[countries.loc[icountry]['ISONUM']]['hdi'][year]['edu']=\
                    round((MYS.loc[country_name_converter3(country)]['MYS'+repr(year)]/15+\
                    EYS.loc[country_name_converter3(country)]['EYS'+repr(year)]/18)/2,3)
                data[countries.loc[icountry]['ISONUM']]['hdi'][year]['le']=\
                    round((LE.loc[country_name_converter4(country)]['Life_Exp'+repr(year)]-20)/(85-20),3)
                data[countries.loc[icountry]['ISONUM']]['hdi'][year]['gni']=\
                    round((np.log(GNI.loc[country]['GNI'+repr(year)])-np.log(100))/(np.log(75000)-np.log(100)),3)
                if np.isnan(data[countries.loc[icountry]['ISONUM']]['hdi'][year]['edu']):
                    data[countries.loc[icountry]['ISONUM']]['hdi'][year]['edu']="NaN"
                if np.isnan(data[countries.loc[icountry]['ISONUM']]['hdi'][year]['le']):
                    data[countries.loc[icountry]['ISONUM']]['hdi'][year]['le']="NaN"
                if np.isnan(data[countries.loc[icountry]['ISONUM']]['hdi'][year]['gni']):
                    data[countries.loc[icountry]['ISONUM']]['hdi'][year]['gni']="NaN"
                
                data[countries.loc[icountry]['ISONUM']]['ihdi'][year]={'edu':"NaN",'le':"NaN",'gni':"NaN"}
                
                data[countries.loc[icountry]['ISONUM']]['gii'][year]="NaN"
                if year in [1995,2000,2005]:
                    try:
                        gcountry=country_name_converter2(country)
                        if np.isnan(gii.loc[gcountry]['GII'+repr(year)]):
                            data[countries.loc[icountry]['ISONUM']]['gii'][year]="NaN"
                        else: data[countries.loc[icountry]['ISONUM']]['gii'][year]=round(gii.loc[gcountry]['GII'+repr(year)],3)
                    except:
                        print country,'gii'
            except: print country,'error',year


# In[18]:


import json
file('data3.json','w').write(json.dumps(data))


# In[65]:


pop=pd.read_excel('http://esa.un.org/unpd/wpp/Excel-Data/EXCEL_FILES/1_Population/WPP2012_POP_F01_1_TOTAL_POPULATION_BOTH_SEXES.XLS',sheet="ESTIMATES",header=16)


# In[66]:


pop=pop.set_index(['Country code'])


# In[67]:


for i in data:
    data[i]['population']={}
    for year in range(1980,2011):
        data[i]['population'][year]=\
        pop.loc[(int)(i)].values[::-1][2010-year]*1000        


# In[68]:


pop2=pd.read_excel('http://esa.un.org/unpd/wpp/Excel-Data/EXCEL_FILES/1_Population/WPP2012_POP_F01_1_TOTAL_POPULATION_BOTH_SEXES.XLS',sheetname="NO CHANGE",header=16)


# In[69]:


pop2=pop2.set_index(['Country code'])


# In[70]:


for i in data:
    for year in range(2011,2014):
        data[i]['population'][year]=\
        pop2.loc[(int)(i)].values[::-1][2100-year]*1000


# In[71]:


import json
file('data3.json','w').write(json.dumps(data))