In [2]:
import numpy as np, pandas as pd
from pygeocoder import Geocoder
import time
import matplotlib.pyplot as plt
%matplotlib inline
In [ ]:
#test geocoder
Geocoder.geocode('MIERCUREA CIUC, ROMANIA').coordinates
In [2]:
#define database path
path='http://blog.csaladen.es/szekelyfold%20lakossag/db/'

#use if files downloaded to local path
path='E:/Skydrive/GitHub/Blog/szekelyfold lakossag/db/'
In [3]:
#load population for all years 1990-2014
df={}

#hargita
df[u'Hargita']=pd.read_html(path+'harghita9095.xls',infer_types=False)[0].ix[5:]
df[u'Hargita'].columns=['County','Loc','Age','Sex']+[i for i in range(1990,1996)]
df[u'Hargita']=df[u'Hargita'].drop('County',axis=1).replace('-', np.nan).fillna(method='ffill').set_index(['Loc','Age','Sex'])

dfhelper=pd.read_html(path+'harghita9602.xls',infer_types=False)[0].ix[5:]
dfhelper.columns=['County','Loc','Age','Sex']+[i for i in range(1996,2003)]
dfhelper=dfhelper.drop('County',axis=1).replace('-', np.nan).fillna(method='ffill').set_index(['Loc','Age','Sex'])
df[u'Hargita']=df[u'Hargita'].join(dfhelper)

dfhelper=pd.read_html(path+'harghita0308.xls',infer_types=False)[0].ix[5:]
dfhelper.columns=['County','Loc','Age','Sex']+[i for i in range(2003,2009)]
dfhelper=dfhelper.drop('County',axis=1).replace('-', np.nan).fillna(method='ffill').set_index(['Loc','Age','Sex'])
df[u'Hargita']=df[u'Hargita'].join(dfhelper)

dfhelper=pd.read_html(path+'harghita0914.xls',infer_types=False)[0].ix[5:]
dfhelper.columns=['County','Loc','Age','Sex']+[i for i in range(2009,2015)]
dfhelper=dfhelper.drop('County',axis=1).replace('-', np.nan).fillna(method='ffill').set_index(['Loc','Age','Sex'])
df[u'Hargita']=df[u'Hargita'].join(dfhelper)

df[u'Hargita'].head()
C:\Program Files\Anaconda\lib\site-packages\pandas\io\html.py:841: FutureWarning: infer_types will have no effect in 0.14
  warnings.warn("infer_types will have no effect in 0.14", FutureWarning)
Out[3]:
1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 ... 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014
Loc Age Sex
TOTAL Total Total 364166 360965 348352 349005.0 347911 346829.0 345322 344199 343467 342859.0 ... 327297 326650 325997 325682.0 325345 325127 324419 323820.0 323317.0 322896.0
Masculin 183099 181771 174240 174671.0 173923 173098.0 172245 171414 170777 170461.0 ... 162352 161885 161497 161253.0 160921 160684 160337 159892.0 159646.0 159394.0
Feminin 181067 179194 174112 174334.0 173988 173731.0 173077 172785 172690 172398.0 ... 164945 164765 164500 164429.0 164424 164443 164082 163928.0 163671.0 163502.0
0- 4 ani Total : : : 24211.0 : 20575.0 19607 18847 18642 18774.0 ... 17426 17538 17832 18026.0 18365 18396 18172 17603.0 17366.0 16894.0
Masculin : : : 12405.0 : 10487.0 10053 9583 9432 9520.0 ... 8973 8962 9126 9190.0 9297 9323 9237 8953.0 8849.0 8681.0

5 rows × 25 columns

In [4]:
#kovaszna

df[u'Kovászna']=pd.read_html(path+'covasna9095.xls',infer_types=False)[0].ix[5:]
df[u'Kovászna'].columns=['County','Loc','Age','Sex']+[i for i in range(1990,1996)]
df[u'Kovászna']=df[u'Kovászna'].drop('County',axis=1).replace('-', np.nan).fillna(method='ffill').set_index(['Loc','Age','Sex'])

dfhelper=pd.read_html(path+'covasna9602.xls',infer_types=False)[0].ix[5:]
dfhelper.columns=['County','Loc','Age','Sex']+[i for i in range(1996,2003)]
dfhelper=dfhelper.drop('County',axis=1).replace('-', np.nan).fillna(method='ffill').set_index(['Loc','Age','Sex'])
df[u'Kovászna']=df[u'Kovászna'].join(dfhelper)

dfhelper=pd.read_html(path+'covasna0308.xls',infer_types=False)[0].ix[5:]
dfhelper.columns=['County','Loc','Age','Sex']+[i for i in range(2003,2009)]
dfhelper=dfhelper.drop('County',axis=1).replace('-', np.nan).fillna(method='ffill').set_index(['Loc','Age','Sex'])
df[u'Kovászna']=df[u'Kovászna'].join(dfhelper)

dfhelper=pd.read_html(path+'covasna0914.xls',infer_types=False)[0].ix[5:]
dfhelper.columns=['County','Loc','Age','Sex']+[i for i in range(2009,2015)]
dfhelper=dfhelper.drop('County',axis=1).replace('-', np.nan).fillna(method='ffill').set_index(['Loc','Age','Sex'])
df[u'Kovászna']=df[u'Kovászna'].join(dfhelper)

df[u'Kovászna'].head()
Out[4]:
1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 ... 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014
Loc Age Sex
TOTAL Total Total 238919 238247 233256 234358.0 233708 232580.0 232111 231904 231172 230840 ... 223996 224100 223634 223208 222846 222481 222065 221419 220962 220750
Masculin 118928 118421 115338 116151.0 115673 114839.0 114533 114520 113905 113610 ... 110357 110361 110166 109860 109572 109332 109103 108734 108535 108478
Feminin 119991 119826 117918 118207.0 118035 117741.0 117578 117384 117267 117230 ... 113639 113739 113468 113348 113274 113149 112962 112685 112427 112272
0- 4 ani Total : : : 16602.0 : 13249.0 12501 12233 12282 12325 ... 12532 12669 12794 12767 12878 12728 12433 12036 11986 11859
Masculin : : : 8433.0 : 6751.0 6376 6271 6301 6296 ... 6430 6456 6496 6450 6473 6436 6337 6129 6155 6146

5 rows × 25 columns

In [5]:
#maros

df[u'Maros']=pd.read_html(path+'mures9094.xls',infer_types=False)[0].ix[5:]
df[u'Maros'].columns=['County','Loc','Age','Sex']+[i for i in range(1990,1995)]
df[u'Maros']=df[u'Maros'].drop('County',axis=1).replace('-', np.nan).fillna(method='ffill').set_index(['Loc','Age','Sex'])

dfhelper=pd.read_html(path+'mures9599.xls',infer_types=False)[0].ix[5:]
dfhelper.columns=['County','Loc','Age','Sex']+[i for i in range(1995,2000)]
dfhelper=dfhelper.drop('County',axis=1).replace('-', np.nan).fillna(method='ffill').set_index(['Loc','Age','Sex'])
df[u'Maros']=df[u'Maros'].join(dfhelper)

dfhelper=pd.read_html(path+'mures0004.xls',infer_types=False)[0].ix[5:]
dfhelper.columns=['County','Loc','Age','Sex']+[i for i in range(2000,2005)]
dfhelper=dfhelper.drop('County',axis=1).replace('-', np.nan).fillna(method='ffill').set_index(['Loc','Age','Sex'])
df[u'Maros']=df[u'Maros'].join(dfhelper)

dfhelper=pd.read_html(path+'mures0509.xls',infer_types=False)[0].ix[5:]
dfhelper.columns=['County','Loc','Age','Sex']+[i for i in range(2005,2010)]
dfhelper=dfhelper.drop('County',axis=1).replace('-', np.nan).fillna(method='ffill').set_index(['Loc','Age','Sex'])
df[u'Maros']=df[u'Maros'].join(dfhelper)

dfhelper=pd.read_html(path+'mures1014.xls',infer_types=False)[0].ix[5:]
dfhelper.columns=['County','Loc','Age','Sex']+[i for i in range(2010,2015)]
dfhelper=dfhelper.drop('County',axis=1).replace('-', np.nan).fillna(method='ffill').set_index(['Loc','Age','Sex'])
df[u'Maros']=df[u'Maros'].join(dfhelper)

df[u'Maros'].head()
Out[5]:
1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 ... 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014
Loc Age Sex
TOTAL Total Total 624141 622048 610059 609289.0 607578 606777 604967.0 603517 602426 602155 ... 583410 584205 582274 581400 581628 580672 579747.0 578578.0 577064.0 575937.0
Masculin 309028 308057 301023 300740.0 299472 298763 297715.0 296653 295806 295486 ... 285799 286010 284992 284462 284509 283947 283424.0 282675.0 282104.0 281580.0
Feminin 315113 313991 309036 308549.0 308106 308014 307252.0 306864 306620 306669 ... 297611 298195 297282 296938 297119 296725 296323.0 295903.0 294960.0 294357.0
0- 4 ani Total : : : 39955.0 : 34720 32986.0 32134 31768 32003 ... 31209 31126 31312 31452 31816 31874 31504.0 30925.0 30375.0 29490.0
Masculin : : : 20450.0 : 17811 17088.0 16613 16490 16507 ... 16053 15965 16110 16177 16388 16449 16251.0 15916.0 15662.0 15209.0

5 rows × 25 columns

In [127]:
#fix known db errors
df[u"Maros"].ix[('116340 CRAIESTI',"85 ani si peste","Masculin"),:]=\
df[u"Maros"].loc['116340 CRAIESTI'].loc["85 ani si peste"].loc["Total"].replace(':',0).astype('float')\
.add(-df[u"Maros"].loc['116340 CRAIESTI'].loc["85 ani si peste"].loc["Feminin"].replace(':',0).astype('float'))

df[u"Hargita"].ix[('83464 ORAS BALAN',"85 ani si peste","Masculin"),:]=\
df[u"Hargita"].loc['83464 ORAS BALAN'].loc["85 ani si peste"].loc["Total"].replace(':',0).astype('float')\
.add(-df[u"Hargita"].loc['83464 ORAS BALAN'].loc["85 ani si peste"].loc["Feminin"].replace(':',0).astype('float'))

#sort indexes
for county in df:
    df[county].sort_index(axis=0)
In [14]:
#geocode: assign coordinate to localities, if already saved, then just load from file

#run code below OR next cell - load directly
#next line is just a cell-blocker
=
geo={u'Hargita':{},u'Kovászna':{},u'Maros':{}}
for county in geo:
    for i in set(df[county].index.levels[0]):
        try:
            geo[county][i]=Geocoder.geocode(i[6:]+', '+county+', ROMANIA').coordinates
            #google maps doesnt like too many queries too fast
            time.sleep(0.2)
            print i[6:],geo[county][i]
        except:
            print i[6:],'ERROR'
            
#save geo data
import json
file('geo.json','w').write(json.dumps(geo))
  File "<ipython-input-14-3178ea1f7f41>", line 5
    =
    ^
SyntaxError: invalid syntax
In [6]:
import json
geo=json.loads(open('geo.json').read())
In [7]:
#pretty hungarian names for settlements
hun={"115600 BALA":u"Bala",
"120343 ZAGAR":u"Zágor",
"119894 ORAS UNGHENI":u"Nyárádtõ",
"119974 VALEA LARGA":u"Mezõceked",
"116796 FINTINELE":u"Gyulakuta",
"117505 HODOSA":u"Székelyhodos",
"116590 EREMITU":u"Nyárádremete",
"117113 GLODENI":u"Marossárpatak",
"114809 MUNICIPIUL REGHIN":u"Szászrégen",
"114970 ACATARI":u"Ákosfalva",
"119803 SINCAI":u"Mezõsámsod",
"115771 BICHIS":u"Magyarbükkös",
"119590 SOLOVASTRU":u"Görgényoroszfalu",
"115959 CEUASU DE CIMPIE":u"Mezõcsávás",
"116046 CHETANI":u"Maroskece",
"114854 ORAS SOVATA":u"Szováta",
"118931 RASTOLITA":u"Ratosnya",
"114453 SINTANA DE MURES":u"Marosszentanna",
"116439 CUCI":u"Kutyfalva",
"116126 CHIHERU DE JOS":u"Alsóköhér",
"119858 TAURENI":u"Mezõtóhát",
"115389 BAND":u"Mezõbánd",
"116723 FARAGAU":u"Faragó",
"114417 SINGEORGIU DE MURES":u"Marosszentgyörgy",
"117426 HODAC":u"Görgényhodák",
"120496 SARATENI":u"Sóvárad",
"119242 ORAS SARMASU":u"Nagysármás",
"120138 VATAVA":u"Felsõrépás",
"117177 GORNESTI":u"Gernyeszeg",
"120478 CHIBED":u"Kibéd",
"114382 SINCRAIU DE MURES":u"Marosszentkirály",
"115824 BOGATA":u"Marosbogát",
"115183 APOLD":u"Apold",
"119331 ORAS SANGEORGIU DE PADURE":u"Erdõszentgyörgy",
"118209 MICA":u"Mikefalva",
"120511 BERENI":u"Székelybere",
"117783 IDECIU DE JOS":u"Alsóidecs",
"119750 SAULIA":u"Mezõsályi",
"117550 IBANESTI":u"Libánfalva",
"120502 MADARAS":u"Mezõmadaras",
"117319 GURGHIU":u"Görgényszentimre",
"117925 LIVEZENI":u"Jedd",
"115147 ALUNIS":u"Magyaró",
"119466 SINPAUL":u"Kerelõszentpál",
"119386 SANGER":u"Mezõszengyel",
"117042 GHINDARI":u"Makfalva",
"119723 SUSENI":u"Marosfelfalu",
"118058 LUNCA BRADULUI":u"Palotailva",
"115851 BREAZA":u"Beresztelke",
"120316 VOIVODENI":u"Vajdaszentivány",
"120487 CORUNCA":u"Koronka",
"120254 VINATORI":u"Héjjasfalva",
"114319 MUNICIPIUL TIRGU MURES":u"Marosvásárhely",
"116867 GALESTI":u"Nyárádgálfalva",
"118370 MIHESU DE CIMPIE":u"Mezõméhes",
"118575 OGRA":u"Marosugra",
"118469 NADES":u"Szásznádas",
"118753 PASARENI":u"Backamadaras",
"116983 GHEORGHE DOJA":u"Lukafalva",
"119625 STINCENI":u"Gödemesterháza",
"116493 DANES":u"Dános",
"115637 BALAUSERI":u"Balavásár",
"116545 DEDA":u"Déda",
"115708 BEICA DE JOS":u"Alsóbölkény",
"117998 LUNCA":u"Tekeújfalu",
"114355 CRISTESTI":u"Maroskeresztúr",
"118995 RICIU":u"Mezõrücs",
"120076 VARGATA":u"Csíkfalva",
"119527 SINPETRU DE CIMPIE":u"Uzdiszentpéter",
"115236 ATINTIS":u"Cintos",
"117667 ICLANZEL":u"Kisikland",
"116938 GANESTI":u"Vámosgálfalva",
"117275 GREBENISU DE CIMPIE":u"Mezõgerebenes",
"116224 COZMA":u"Kozmatelke",
"114925 MUNICIPIUL TARNAVENI":u"Dicsõszentmárton",
"114514 MUNICIPIUL SIGHISOARA":u"Segesvár",
"118799 PETELEA":u"Petele",
"120174 VETCA":u"Székelyvécke",
"119153 RUSII-MUNTI":u"Marosoroszfalu",
"115307 BAHNEA":u"Bonyha",
"116171 COROISINMARTIN":u"Kóródszentmárton",
"114603 ALBESTI":u"Fehéregyháza",
"120370 ZAU DE CIMPIE":u"Mezõzáh",
"116395 CUCERDEA":u"Oláhkocsárd",
"118691 PANET":u"Mezõpanit",
"115076 ADAMUS":u"Ádámos",
"115520 BATOS":u"Bátos",
"117827 ORAS IERNUT":u"Radnót",
"115897 BRINCOVENESTI":u"Marosvécs",
"114710 ORAS LUDUS":u"Marosludas",
"116288 CRACIUNESTI":u"Nyárádkarácson",
"119661 SUPLAC":u"Küküllõszéplak",
"118511 NEAUA":u"Havad",
"119206 SASCHIZ":u"Szászkézd",
"118094 MAGHERANI":u"Nyárádmagyarós",
"118281 ORAS MIERCUREA NIRAJULUI":u"Nyárádszereda",
"118824 POGACEAUA":u"Mezõpagocsa",
"116652 ERNEI":u"Nagyernye",
"115575 BAGACIU":u"Szászbogács",
"120218 VIISOARA":u"Csatófalva",
"116340 CRAIESTI":u"Mezõkirályfalva",
"118637 PAPIU ILARIAN":u"Mezõbodon",
"63866 BELIN":u"Bölön",
"64318 GHELINTA":u"Gelence",
"65105 MERENI":u"Kézdialmás",
"64719 POIAN":u"Kézdiszentkereszt",
"65099 VALEA MARE":u"Nagypatak",
"63394 MUNICIPIUL SFINTU GHEORGHE":u"Sepsiszentgyörgy",
"63633 BARCANI":u"Zágonbárkány",
"65011 ZAGON":u"Zágon",
"64130 CATALINA":u"Szentkatolna",
"63688 SITA BUZAULUI":u"Bodzaszita",
"63777 AITA MARE":u"Nagyajta",
"64773 RECI":u"Réty",
"64005 BRATES":u"Barátos",
"64194 CERNAT":u"Csernáton",
"65121 BIXAD":u"Sepsibükszád",
"64238 CHICHIS":u"Kökös",
"63937 BOROSNEU MARE":u"Nagyborosnyó",
"63447 ORAS BARAOLT":u"Barót",
"64568 MOACSA":u"Maksa",
"65147 DALNIC":u"Dálnok",
"65139 MICFALAU":u"Mikóujfalu",
"64997 VIRGHIS":u"Vargyas",
"65113 ARCUS":u"Árkos",
"65154 ESTELNIC":u"Esztelnek",
"63553 COMANDAU":u"Kommandó",
"63526 ORAS COVASNA":u"Kovászna",
"65048 ZABALA":u"Zabola",
"64390 HAGHIG":u"Hídvég",
"63740 MUNICIPIUL TARGU SECUIESC":u"Kézdivásárhely",
"64826 SINZIENI":u"Kézdiszentlélek",
"64639 OZUN":u"Uzon",
"63580 ORAS INTORSURA BUZAULUI":u"Bodzaforduló",
"64041 BRADUT":u"Bardóc",
"64906 VALEA CRISULUI":u"Sepsikõröspatak",
"64602 OJDULA":u"Ozsdola",
"64461 LEMNIA":u"Lemhény",
"64096 BRETCU":u"Bereck",
"64942 VILCELE":u"Elõpatak",
"63802 BATANI":u"Nagybacon",
"64265 DOBIRLAU":u"Dobolló",
"64504 MALNAS":u"Málnás",
"64345 GHIDFALAU":u"Gidófalva",
"63893 BODOC":u"Sepsibodok",
"64871 TURIA":u"Torja",
"64425 ILIENI":u"Illyefalva",
"83936 BILBOR":u"Bélbor",
"86519 SANTIMBRU":u"Büdösfürdõ",
"83320 MUNICIPIUL MIERCUREA CIUC":u"Csíkszereda",
"84380 DIRJIU":u"Székelyderzs",
"86495 RACU":u"Csíkrákos",
"86222 ULIES":u"Kányád",
"84754 LUNCA DE SUS":u"Gyimesfelsõlok",
"83133 MUNICIPIUL ODORHEIU SECUIESC":u"Székelyudvarhely",
"83525 ORAS CRISTURU SECUIESC":u"Székelykeresztúr",
"85412 REMETEA":u"Gyergyóremete",
"83428 ORAS BAILE TUSNAD":u"Tusnádfürdõ",
"86366 ZETEA":u"Zetelaka",
"83151 BRADESTI":u"Fenyéd",
"84086 CIUMANI":u"Gyergyócsomafalva",
"85127 MUGENI":u"Bögöz",
"84594 LAZAREA":u"Gyergyószárhegy",
"86453 TOMESTI":u"Csíkszenttamás",
"83749 ORAS VLAHITA":u"Szentegyháza",
"85341 PRAID":u"Parajd",
"85582 SECUIENI":u"Újszékely",
"84148 CORBU":u"Gyergyóholló",
"85074 MIHAILENI":u"Csíkszentmihály",
"85984 SIMONESTI":u"Siménfalva",
"86339 VOSLABENI":u"Vasláb",
"83963 CAPILNITA":u"Kápolnásfalu",
"85289 PLAIESII DE JOS":u"Kászonaltíz",
"86461 CICEU":u"Csíkcsicsó",
"86479 LELICENI":u"Csíkszentlélek",
"85877 SUBCETATE":u"Gyergyóvárhegy",
"84415 FRUMOASA":u"Csíkszépvíz",
"85680 SANCRAIENI":u"Csíkszentkirály",
"83375 PAULENI-CIUC":u"Csíkpálfalva",
"84264 DEALU":u"Oroszhegy",
"84558 JOSENI":u"Gyergyóalfalu",
"84344 DITRAU":u"Gyergyóditró",
"85243 OCLAND":u"Oklánd",
"85920 SUSENI":u"Gyergyóújfalu",
"86446 COZMENI":u"Csíkkozmás",
"84629 LUETA":u"Lövéte",
"84656 LUNCA DE JOS":u"Gyimesközéplok",
"84460 GALAUTAS":u"Galócás",
"83491 ORAS BORSEC":u"Borszék",
"83981 CIUCSINGEORGIU":u"Csíkszentgyörgy",
"83785 ATID":u"Etéd",
"86133 TULGHES":u"Gyergyótölgyes",
"84825 LUPENI":u"Farkaslaka",
"85626 SICULENI":u"Madéfalva",
"85056 MERESTI":u"Homoródalmás",
"86311 VARSAG":u"Székelyvarság",
"85788 SANMARTIN":u"Csíkszentmárton",
"85467 SACEL":u"Székelyandrásfalva",
"85760 SINDOMINIC":u"Csíkszentdomokos",
"83847 AVRAMESTI":u"Szentábrahám",
"84175 CORUND":u"Korond",
"84102 CARTA":u"Csíkkarcfalva",
"83464 ORAS BALAN":u"Balánbánya",
"86501 SATU MARE":u"Máréfalva",
"84923 MARTINIS":u"Homoródszentmárton",
"86438 MADARAS":u"Csíkmadaras",
"86188 TUSNAD":u"Tusnád",
"86487 PORUMBENI":u"Nagygalambfalva",
"83632 MUNICIPIUL TOPLITA":u"Maroshévíz",
"83561 MUNICIPIUL GHEORGHENI":u"Gyergyószentmiklós",
"85528 SARMAS":u"Salamás",
"83197 FELICENI":u"Felsõboldogfalva",
"84237 DANESTI":u"Csíkdánfalva"}

import json
file('hun.json','w').write(json.dumps(hun))
In [134]:
#static plot iterator
def valueplot(county,loc,year):
    
    global perc
    perc=False
    
    dk=df[county]
    m=len(dk.xs([loc,'Masculin'],level=[0,2]).index)-1
    
    #crazy exception need to be handled sepparately, "Total" immovable at first row
    if loc==u'64504 MALNAS':
        x=np.arange(dk.xs([loc,'Masculin'],level=[0,2])[1:][year].count())
        yt=np.array(dk.xs([loc,'Total'],level=[0,2])[year].values[0],dtype=float)
        ym=np.array(dk.xs([loc,'Masculin'],level=[0,2])[1:][year].values,dtype=float)
        yf=np.array(dk.xs([loc,'Feminin'],level=[0,2])[1:][year].values,dtype=float)
    else:
        x=np.arange(dk.xs([loc,'Masculin'],level=[0,2])[:m][year].count())
        yt=np.array(dk.xs([loc,'Total'],level=[0,2])[year].values[m],dtype=float)
        ym=np.array(dk.xs([loc,'Masculin'],level=[0,2])[:m][year].values,dtype=float)
        yf=np.array(dk.xs([loc,'Feminin'],level=[0,2])[:m][year].values,dtype=float)
        
    age=round(np.sum([(2.5+i*5)*(ym[i]+yf[i]) for i in range(len(x))])/yt,1)
    plotter(county,loc,year,x,ym,yf,yt,age,'value')

def percplot(county,loc,year):
    
    global perc
    perc=True
    
    dk=df[county]
    m=len(dk.xs([loc,'Masculin'],level=[0,2]).index)-1
    
    #crazy exception need to be handled sepparately, "Total" immovable at first row
    if loc==u'64504 MALNAS':
        x=np.arange(dk.xs([loc,'Masculin'],level=[0,2])[1:][year].count())
        yt=np.array(dk.xs([loc,'Total'],level=[0,2])[year].values[0],dtype=float)
        ym=np.array(dk.xs([loc,'Masculin'],level=[0,2])[1:][year].values,dtype=float)
        yf=np.array(dk.xs([loc,'Feminin'],level=[0,2])[1:][year].values,dtype=float)
    else:
        x=np.arange(dk.xs([loc,'Masculin'],level=[0,2])[:m][year].count())
        yt=np.array(dk.xs([loc,'Total'],level=[0,2])[year].values[m],dtype=float)
        ym=np.array(dk.xs([loc,'Masculin'],level=[0,2])[:m][year].values,dtype=float)
        yf=np.array(dk.xs([loc,'Feminin'],level=[0,2])[:m][year].values,dtype=float)
    
    age=round(np.sum([(2.5+i*5)*(ym[i]+yf[i]) for i in range(len(x))])/yt,1)
    ym=ym/yt*1000
    yf=yf/yt*1000
    plotter(county,loc,year,x,ym,yf,yt,age,'perc')

def plotter(county,loc,year,x,ym,yf,yt,age,savepath):
    
    #if not in animation, then create nex axis
    global ax,static
        
    if static:
        fig = plt.figure(figsize=(11,7))
        ax = plt.axes(axisbg='#ffffff',axisbelow=True)
        with plt.xkcd(): ax.grid(color='#bbbbbb', linestyle='solid')
    else:
        ax.cla()

    with plt.xkcd():
        ax.barh(x,yf,color='r',zorder=2)
        ax.barh(x,-ym,color='k',zorder=2)

    if savepath=='value':
        yx=max([plt.xlim()[1],-plt.xlim()[0]])
    else: yx=50
        
    ax.set_xlim(-yx,yx)
    ax.set_ylim(0,len(x))
    ax.set_yticks(x)
    ax.set_yticklabels(x*5,size=14)
    
    ax.set_xticks(np.round(-yx*0.8+np.arange(0,yx*1.8,yx*0.2)).astype(int))
    if savepath=='value':
        labels=np.round(abs(-yx*0.8+np.arange(0,yx*1.8,yx*0.2))).astype(int)
        
    else:
        labels=(np.round(abs(-yx*0.8+np.arange(0,yx*1.8,yx*0.2))).astype(int)/10).astype(str)
        labels=[l+'%' for l in labels]
    ax.set_xticklabels(labels,size=14)
        
    ax.imshow(plt.imread('szekelypar.png'),extent=[int(-0.35*yx),int(0.35*yx),3.5,11.5],zorder=3,alpha=0.9)
    ax.imshow(plt.imread('szekelydata.png'),extent=[int(0.75*yx),int(0.98*yx),15,17.7],zorder=1,alpha=1)
    ax.imshow(plt.imread('tulipanos2.png'),extent=[int(-0.73*yx),int(-0.98*yx),15,17.5],zorder=1,alpha=1)
    ax.imshow(plt.imread('tulipanos2.png'),extent=[int(0.73*yx),int(0.98*yx),3,0.3],zorder=1,alpha=1)
    ax.imshow(plt.imread('tulipanos2.png'),extent=[int(-0.73*yx),int(-0.98*yx),3,0.3],zorder=1,alpha=1)
        
    ax.set_aspect(yx/12)
    
    with plt.xkcd():
        hpt=u"adat: insse.ro"
        ax.text(0.95*yx,3.7,hpt,size=10,horizontalalignment ="center",verticalalignment ="bottom",rotation=90,color="k")
        hpt=u"kép: chicagohungarians.com"
        ax.text(0.95*yx,7.5,hpt,size=10,horizontalalignment ="center",verticalalignment ="bottom",rotation=90,color="k")
        hpt=u"korfája"
        ax.text(0.02*yx,1.73,hpt,size=17,horizontalalignment ="left",verticalalignment ="top",rotation=0,color="k")
        hpt=yt.astype(int)
        ax.text(-0.02*yx,13.7,hpt,size=17,horizontalalignment ="right",verticalalignment ="top",rotation=0,color="r")
        hpt=u"lakos"
        ax.text(0.02*yx,13.7,hpt,size=17,horizontalalignment ="left",verticalalignment ="top",rotation=0,color="k")
        hpt=u"év"
        ax.text(0.02*yx,12.6,hpt,size=14,horizontalalignment ="left",verticalalignment ="top",rotation=0,color="k")
        hpt=age
        ax.text(-0.02*yx,12.6,hpt,size=14,horizontalalignment ="right",verticalalignment ="top",rotation=0,color="r")
        hpt=repr(year)[0:2]
        ax.text(-0.02*yx,14.75,hpt,size=48,horizontalalignment ="right",verticalalignment ="center",rotation=0,color="r")
        hpt=repr(year)[2:4]
        ax.text(0.02*yx,14.75,hpt,size=48,horizontalalignment ="left",verticalalignment ="center",rotation=0,color="k")
        hpt=u"@csaladenes"
        ax.text(-0.68*yx,17.7,hpt,size=12,horizontalalignment ="left",verticalalignment ="top",rotation=0,color="r")
        if loc=='TOTAL': 
            if county!=u"Székelyföld":
                hpt=county+' megye'
            else: hpt=county
        else: hpt=hun[loc]
        ax.text(-0.02*yx,1.73,hpt,size=17,horizontalalignment ="right",verticalalignment ="top",rotation=0,color="r")

    plt.subplots_adjust(0.05,0.05,0.95,0.95)
    if static:
        plt.show()
        if loc=='TOTAL': 
            if county==u"Kovászna": hpt="CV"
            elif county==u"Hargita": hpt="HR"
            elif county==u"Maros": hpt="MS"
            elif county==u"Mindhárom": hpt="3m"
            else: hpt="szf"
        else: hpt=loc
        fig.savefig('../korfak/'+savepath+'/'+hpt+'_'+repr(year)+'_korfa_population_pyramid.png')
In [135]:
static=True
valueplot(u'Maros',u'TOTAL',1999)
percplot(u'Maros',u'TOTAL',1999)
In [11]:
#szekelyfold aggregation

#define non-szekelyfold regions
non_szf=[
"119625 STINCENI",
"118931 RASTOLITA",
"116545 DEDA",
"119153 RUSII-MUNTI",
"115147 ALUNIS",
"114809 MUNICIPIUL REGHIN",
"115520 BATOS",
"115183 APOLD",
"114514 MUNICIPIUL SIGHISOARA",
"119206 SASCHIZ",
"114603 ALBESTI",
"116493 DANES",
"120218 VIISOARA",
"120343 ZAGAR",
"118469 NADES",
"115575 BAGACIU",
"118209 MICA",
"116395 CUCERDEA",
"115076 ADAMUS",
"114925 MUNICIPIUL TARNAVENI",
"115771 BICHIS",
"116046 CHETANI",
"117827 ORAS IERNUT",
"114710 ORAS LUDUS",
"118637 PAPIU ILARIAN",
"119386 SANGER",
"118370 MIHESU DE CIMPIE",
"119242 ORAS SARMASU",
"118824 POGACEAUA",
"119527 SINPETRU DE CIMPIE",
"115851 BREAZA",
"118799 PETELEA",
"115708 BEICA DE JOS",
"116224 COZMA",
"115600 BALA",
"118058 LUNCA BRADULUI",
"120138 VATAVA",
"116723 FARAGAU",
"117998 LUNCA",
"119590 SOLOVASTRU",
"115897 BRINCOVENESTI",
"119723 SUSENI",
"117783 IDECIU DE JOS",
"116340 CRAIESTI",
"119858 TAURENI",
"119974 VALEA LARGA",
"119750 SAULIA",
"120370 ZAU DE CIMPIE",
"117319 GURGHIU",
"117550 IBANESTI",
"117426 HODAC",
"120316 VOIVODENI",
"116938 GANESTI",
"116439 CUCI",
"115236 ATINTIS",
"115824 BOGATA"
]
In [98]:
#3 county aggregation
df[u"Mindhárom"]=df[u"Hargita"].replace(':',0).astype('float').loc["TOTAL"]\
            .add(df[u"Kovászna"].replace(':',0).astype('float').loc["TOTAL"])\
            .add(df[u"Maros"].replace(':',0).astype('float').loc["TOTAL"])
df[u"Mindhárom"]["Loc"] = u"TOTAL"
df[u"Mindhárom"].set_index("Loc", append=True, inplace=True)
df[u"Mindhárom"]=df[u"Mindhárom"].reorder_levels(['Loc', 'Age', 'Sex'])
In [99]:
static=True
valueplot(u"Mindhárom",u"TOTAL",1999)
percplot(u"Mindhárom",u"TOTAL",1999)
In [100]:
#szekelyland
df[u"Székelyföld"]=df[u"Mindhárom"].loc[u"TOTAL"]
for i in range(len(non_szf)):
    df[u"Székelyföld"]=df[u"Székelyföld"].add(-df[u"Maros"].loc[non_szf[i]].fillna(0).replace(':',0).astype('float').fillna(0))   
df[u"Székelyföld"]["Loc"] = u"TOTAL"
df[u"Székelyföld"].set_index("Loc", append=True, inplace=True)
df[u"Székelyföld"]=df[u"Székelyföld"].reorder_levels(['Loc', 'Age', 'Sex'])
In [107]:
static=True
valueplot(u"Székelyföld",u"TOTAL",2000)
percplot(u"Székelyföld",u"TOTAL",2000)