#!/usr/bin/env python # coding: utf-8 # In[2]: import numpy as np, requests, pandas as pd, zipfile, StringIO # In[3]: countries=pd.read_html('http://www.geonames.org/countries/',header=0,infer_types=False)[1] countries.columns=['ISO2','ISO3','ISONUM','FIPS','Country','Capital','Area','Population','Continent'] countries.set_index('Country',drop=True,inplace=True) countries.head(5) # In[4]: hdi=pd.read_excel('ihdi_series_cartagena.xlsx',sheetname=u'LEx, EDUx, INCx',header=1).dropna(axis=1, how='all').set_index('country') hdi.head() # In[5]: ihdi=pd.read_excel('ihdi_series_cartagena.xlsx',sheetname=u'iLEx, iEDUx,iINCx',header=1).dropna(axis=1, how='all').set_index('country') ihdi.head() # In[6]: gii=pd.read_excel('gii_series_cartagena.xlsx',sheetname=u'GII',header=2).dropna(axis=1, how='all').set_index('Country') gii.head() # In[7]: cc={ 'Bolivia (Plurinational State of)':'Bolivia', 'Brunei Darussalam':'Brunei', 'Congo':'Republic of the Congo', 'Congo (Democratic Republic of the)':'Democratic Republic of the Congo', "Cote d'Ivoire":'Ivory Coast', 'Hong Kong, China (SAR)':'Hong Kong', 'Iran (Islamic Republic of)':'Iran', "Korea (Democratic People's Republic of)":'North Korea', 'Korea (Republic of)':'South Korea', "Lao People's Democratic Republic":'Laos', 'Libyan Arab Jamahiriya':'Libya', 'Micronesia (Federated States of)':'Micronesia', 'Moldova (Republic of)':'Moldova', 'Myanmar':'Myanmar [Burma]', 'Palestine (State of)':'Palestine', 'Russian Federation':'Russia', 'Sao Tome and Principe':u'São Tomé and Príncipe', 'Syrian Arab Republic':'Syria', 'Tanzania (United Republic of)':'Tanzania', 'The former Yugoslav Republic of Macedonia':'Macedonia', 'Timor-Leste':'East Timor', 'Venezuela (Bolivarian Republic of)':'Venezuela' } cc2={ "Cote d'Ivoire":u"Côte d'Ivoire", "Korea (Democratic People's Republic of)":"Korea (Democratic People's Rep. of)", 'Palestine (State of)':'Palestine, State of', 'Vietnam':'Viet Nam', 'Libyan Arab Jamahiriya':'Libya' } cc3={ 'Libyan Arab Jamahiriya':'Libya' } cc4={ "Korea (Democratic People's Republic of)":"Korea (Democratic People's Rep. of)", 'Libyan Arab Jamahiriya':'Libya' } continent_converter={ 'EU':'Europe', 'AS':'Asia', 'SA':'South America', 'nan':'North America', 'AN':'Antarctica', 'AF':'Africa', 'OC':'Oceania' } def country_name_converter(country): if country in cc: return cc[country] else: return country def country_name_converter2(country): if country in cc2: return cc2[country] else: return country def country_name_converter3(country): if country in cc3: return cc3[country] else: return country def country_name_converter4(country): if country in cc4: return cc4[country] else: return country # In[8]: data={} for country in ihdi.index: if country_name_converter(country) in countries.index: icountry=country_name_converter(country) data[countries.loc[icountry]['ISONUM']]={} data[countries.loc[icountry]['ISONUM']]['name']=icountry data[countries.loc[icountry]['ISONUM']]['code']=countries.loc[icountry]['ISO3'] data[countries.loc[icountry]['ISONUM']]['population']=countries.loc[icountry]['Population'] data[countries.loc[icountry]['ISONUM']]['area']=countries.loc[icountry]['Area'] data[countries.loc[icountry]['ISONUM']]['continent']=continent_converter[countries.loc[icountry]['Continent']] data[countries.loc[icountry]['ISONUM']]['hdi']={} data[countries.loc[icountry]['ISONUM']]['ihdi']={} data[countries.loc[icountry]['ISONUM']]['gii']={} for year in range(2010,2014): try: data[countries.loc[icountry]['ISONUM']]['hdi'][year]={} if np.isnan(hdi.loc[country]['edux'+repr(year)]): data[countries.loc[icountry]['ISONUM']]['hdi'][year]['edu']="NaN" else: data[countries.loc[icountry]['ISONUM']]['hdi'][year]['edu']=round(hdi.loc[country]['edux'+repr(year)],3) if np.isnan(hdi.loc[country]['lex'+repr(year)]): data[countries.loc[icountry]['ISONUM']]['hdi'][year]['le']="NaN" else: data[countries.loc[icountry]['ISONUM']]['hdi'][year]['le']=round(hdi.loc[country]['lex'+repr(year)],3) if np.isnan(hdi.loc[country]['gnix'+repr(year)]): data[countries.loc[icountry]['ISONUM']]['hdi'][year]['gni']="NaN" else: data[countries.loc[icountry]['ISONUM']]['hdi'][year]['gni']=round(hdi.loc[country]['gnix'+repr(year)],3) except: print country,'hdi' try: data[countries.loc[icountry]['ISONUM']]['ihdi'][year]={} if np.isnan(ihdi.loc[country]['iedux'+repr(year)]): data[countries.loc[icountry]['ISONUM']]['ihdi'][year]['edu']="NaN" else: data[countries.loc[icountry]['ISONUM']]['ihdi'][year]['edu']=round(ihdi.loc[country]['iedux'+repr(year)],3) if np.isnan(ihdi.loc[country]['ilex'+repr(year)]): data[countries.loc[icountry]['ISONUM']]['ihdi'][year]['le']="NaN" else: data[countries.loc[icountry]['ISONUM']]['ihdi'][year]['le']=round(ihdi.loc[country]['ilex'+repr(year)],3) if np.isnan(ihdi.loc[country]['ignix'+repr(year)]): data[countries.loc[icountry]['ISONUM']]['ihdi'][year]['gni']="NaN" else: data[countries.loc[icountry]['ISONUM']]['ihdi'][year]['gni']=round(ihdi.loc[country]['ignix'+repr(year)],3) except: print country,'ihdi' try: gcountry=country_name_converter2(country) if np.isnan(gii.loc[gcountry]['GII'+repr(year)]): data[countries.loc[icountry]['ISONUM']]['gii'][year]="NaN" else: data[countries.loc[icountry]['ISONUM']]['gii'][year]=round(gii.loc[gcountry]['GII'+repr(year)],3) except: print country,'gii' # In[33]: import json file('data.json','w').write(json.dumps(data)) # In[9]: GNI=pd.read_excel('hdi_series_cartagena.xlsx',sheetname=u'Gross National Income',header=2).set_index('Country') GNI.head() # In[10]: LE=pd.read_excel('hdi_series_cartagena.xlsx',sheetname=u'Life Expectency',header=2).set_index('Country') LE.head() # In[11]: EYS=pd.read_excel('hdi_series_cartagena.xlsx',sheetname=u'Expected Years of Schooling',header=2).set_index('Country') EYS.head() # In[63]: MYS=pd.read_excel('hdi_series_cartagena.xlsx',sheetname=u'Mean Years of Schooling',header=2).set_index('Country') MYS.head() #fux db MYS.loc["Vanuatu","MYS1980"]=np.NaN # In[16]: data2={} for country in GNI.index: if country_name_converter(country) in countries.index: icountry=country_name_converter(country) data2[countries.loc[icountry]['ISONUM']]={} data2[countries.loc[icountry]['ISONUM']]['name']=icountry data2[countries.loc[icountry]['ISONUM']]['code']=countries.loc[icountry]['ISO3'] data2[countries.loc[icountry]['ISONUM']]['population']=countries.loc[icountry]['Population'] data2[countries.loc[icountry]['ISONUM']]['area']=countries.loc[icountry]['Area'] data2[countries.loc[icountry]['ISONUM']]['continent']=continent_converter[countries.loc[icountry]['Continent']] data2[countries.loc[icountry]['ISONUM']]['hdi']={} for year in range(1980,2014): try: data2[countries.loc[icountry]['ISONUM']]['hdi'][year]={} data2[countries.loc[icountry]['ISONUM']]['hdi'][year]['edu']=\ round((MYS.loc[country_name_converter3(country)]['MYS'+repr(year)]/15+\ EYS.loc[country_name_converter3(country)]['EYS'+repr(year)]/18)/2,3) data2[countries.loc[icountry]['ISONUM']]['hdi'][year]['le']=\ round((LE.loc[country_name_converter4(country)]['Life_Exp'+repr(year)]-20)/(85-20),3) data2[countries.loc[icountry]['ISONUM']]['hdi'][year]['gni']=\ round((np.log(GNI.loc[country]['GNI'+repr(year)])-np.log(100))/(np.log(75000)-np.log(100)),3) except: print country,'hdi',year # In[231]: import json file('data2.json','w').write(json.dumps(data2)) # In[64]: for country in GNI.index: if country_name_converter(country) in countries.index: icountry=country_name_converter(country) for year in range(1980,2010): try: data[countries.loc[icountry]['ISONUM']]['hdi'][year]={} data[countries.loc[icountry]['ISONUM']]['hdi'][year]['edu']=\ round((MYS.loc[country_name_converter3(country)]['MYS'+repr(year)]/15+\ EYS.loc[country_name_converter3(country)]['EYS'+repr(year)]/18)/2,3) data[countries.loc[icountry]['ISONUM']]['hdi'][year]['le']=\ round((LE.loc[country_name_converter4(country)]['Life_Exp'+repr(year)]-20)/(85-20),3) data[countries.loc[icountry]['ISONUM']]['hdi'][year]['gni']=\ round((np.log(GNI.loc[country]['GNI'+repr(year)])-np.log(100))/(np.log(75000)-np.log(100)),3) if np.isnan(data[countries.loc[icountry]['ISONUM']]['hdi'][year]['edu']): data[countries.loc[icountry]['ISONUM']]['hdi'][year]['edu']="NaN" if np.isnan(data[countries.loc[icountry]['ISONUM']]['hdi'][year]['le']): data[countries.loc[icountry]['ISONUM']]['hdi'][year]['le']="NaN" if np.isnan(data[countries.loc[icountry]['ISONUM']]['hdi'][year]['gni']): data[countries.loc[icountry]['ISONUM']]['hdi'][year]['gni']="NaN" data[countries.loc[icountry]['ISONUM']]['ihdi'][year]={'edu':"NaN",'le':"NaN",'gni':"NaN"} data[countries.loc[icountry]['ISONUM']]['gii'][year]="NaN" if year in [1995,2000,2005]: try: gcountry=country_name_converter2(country) if np.isnan(gii.loc[gcountry]['GII'+repr(year)]): data[countries.loc[icountry]['ISONUM']]['gii'][year]="NaN" else: data[countries.loc[icountry]['ISONUM']]['gii'][year]=round(gii.loc[gcountry]['GII'+repr(year)],3) except: print country,'gii' except: print country,'error',year # In[18]: import json file('data3.json','w').write(json.dumps(data)) # In[65]: pop=pd.read_excel('http://esa.un.org/unpd/wpp/Excel-Data/EXCEL_FILES/1_Population/WPP2012_POP_F01_1_TOTAL_POPULATION_BOTH_SEXES.XLS',sheet="ESTIMATES",header=16) # In[66]: pop=pop.set_index(['Country code']) # In[67]: for i in data: data[i]['population']={} for year in range(1980,2011): data[i]['population'][year]=\ pop.loc[(int)(i)].values[::-1][2010-year]*1000 # In[68]: pop2=pd.read_excel('http://esa.un.org/unpd/wpp/Excel-Data/EXCEL_FILES/1_Population/WPP2012_POP_F01_1_TOTAL_POPULATION_BOTH_SEXES.XLS',sheetname="NO CHANGE",header=16) # In[69]: pop2=pop2.set_index(['Country code']) # In[70]: for i in data: for year in range(2011,2014): data[i]['population'][year]=\ pop2.loc[(int)(i)].values[::-1][2100-year]*1000 # In[71]: import json file('data3.json','w').write(json.dumps(data))