#!/usr/bin/env python # coding: utf-8 # In[1]: #szekelyland workbook - extrapolating from romanian values using mortality statistics from INSSE import pandas as pd, numpy as np # In[2]: df=pd.read_csv('exportPivot_POP206C.csv') de=pd.read_csv('exportPivot_POP206E.csv') # In[3]: de.columns # In[4]: df=df.drop(u' UM: Numar persoane',axis=1).set_index([u' Ani',u'Clasificarea internationala a maladiilor - Revizia a X a 1994',u' Macroregiuni regiuni de dezvoltare si judete']) de=de.drop(u' UM: Numar persoane',axis=1).set_index([u' Ani',u'Sexe',u' Grupe de varsta ',u' Macroregiuni regiuni de dezvoltare si judete']) # In[5]: de.head() # In[6]: for i in df.index.levels[0].unique(): print i # In[7]: for i in df.index.levels[1].unique(): print i # In[8]: d={ "Alte cauze":1094, "Boli ale aparatului circulator":1064, "Boli ale aparatului digestiv":1078, "Boli ale aparatului genito-urinar":1084, "Boli ale aparatului respirator":1072, "Boli ale sistemului nervos boli ale ochiului si anexele sale boli ale urechii si apofizei mastoide":9, "Boli endocrine de nutritie si metabolism":1051, "Boli infectioase si parazitare":1001, "Leziuni traumatice otraviri si alte consecinte ale cauzelor externe":1095, "Malformatii congenitale deformatii si anomalii cromozomiale":1093, "Sarcina nastere si lauzie":1087, "Total":1000, "Tulburari mentale si de comportament":1055, "Tumori":1026, "Unele afectiuni a caror origine se situeaza in perioada perinatala":1092, "din care: Boala ischemica a inimii":0, "din care: Boli cerebro-vasculare":0, "din care: Diabet zaharat":0, "din care: Tuberculoza":0} n0=[1048,1082,1083] n9=[1058,1062,1063] # In[9]: #calculate county share in romania total pop={} for j in df.index.levels[0].unique(): if j[6:] not in pop:pop[j[6:]]={} for k in df.index.levels[1].unique(): if d[k]!=0: if d[k] not in pop[j[6:]]:pop[j[6:]][str(d[k])]={} pop[j[6:]][str(d[k])]["ro"]=df.loc[j].loc[k].loc[' TOTAL'][0] try: a=df.loc[j].loc[k].loc[' Covasna'][0] except: a=0 try: b=df.loc[j].loc[k].loc[' Harghita'][0] except: b=0 try: c=df.loc[j].loc[k].loc[' Mures'][0] except: c=0 pop[j[6:]][str(d[k])]["szf"]=(a+b+c)*1.0/df.loc[j].loc[k].loc[' TOTAL'][0] # In[10]: for y in pop: for c in n9: if str(c) not in pop[y]:pop[y][str(c)]={} for m in ["szf","ro"]: pop[y][str(c)][m]=pop[y]['9'][m]/3.0 pop[y].pop('9'); for c in n0: if str(c) not in pop[y]:pop[y][str(c)]={} for m in ["szf","ro"]: pop[y][str(c)][m]=pop[y]['1000'][m] # In[11]: #calculate age deviation from national average e={"Feminin":"f","Masculin":"m","Total":"s"} pop2={} pop3={} for j in de.index.levels[0].unique(): if j[6:] not in pop2:pop2[j[6:]]={} if j[6:] not in pop3:pop3[j[6:]]={} for k in de.index.levels[1].unique(): if e[k] not in pop2[j[6:]]:pop2[j[6:]][e[k]]={} if e[k] not in pop3[j[6:]]:pop3[j[6:]][e[k]]={} for l in de.index.levels[2].unique(): age=l[:3].strip().strip('-') try: a=de.loc[j].loc[k].loc[l].loc[' Covasna'][0] except: a=0 try: b=de.loc[j].loc[k].loc[l].loc[' Harghita'][0] except: b=0 try: c=de.loc[j].loc[k].loc[l].loc[' Mures'][0] except: c=0 try: d=de.loc[j].loc[k].loc[l].loc[' TOTAL'][0] except: d=0 if age!='To': if age!='0': if age!='85': pop2[j[6:]][e[k]][age]=(a+b+c)*1.0 pop3[j[6:]][e[k]][age]=(d)*1.0 else: pop2[j[6:]][e[k]]['90']=(a+b+c)*3.0/10 pop2[j[6:]][e[k]]['95']=(a+b+c)*1.0/10 pop2[j[6:]][e[k]]['85']=(a+b+c)*6.0/10 pop3[j[6:]][e[k]]['90']=(d)*3.0/10 pop3[j[6:]][e[k]]['95']=(d)*1.0/10 pop3[j[6:]][e[k]]['85']=(d)*6.0/10 else: pop2[j[6:]][e[k]]['0']=(a+b+c)*1.0/5 pop2[j[6:]][e[k]]['1']=(a+b+c)*1.0/5 pop2[j[6:]][e[k]]['2']=(a+b+c)*1.0/5 pop2[j[6:]][e[k]]['3']=(a+b+c)*1.0/5 pop2[j[6:]][e[k]]['4']=(a+b+c)*1.0/5 pop3[j[6:]][e[k]]['0']=(d)*1.0/5 pop3[j[6:]][e[k]]['1']=(d)*1.0/5 pop3[j[6:]][e[k]]['2']=(d)*1.0/5 pop3[j[6:]][e[k]]['3']=(d)*1.0/5 pop3[j[6:]][e[k]]['4']=(d)*1.0/5 # In[12]: for i in pop2: for j in ["f","m"]: for a in pop2[i][j]: pop2[i][j][a]=pop2[i][j][a]/sum(pop2[i]["s"].values()) pop3[i][j][a]=pop3[i][j][a]/sum(pop3[i]["s"].values()) # In[13]: import zipfile,json #read RO data z = zipfile.ZipFile('db2/642.zip') ro = json.loads(z.open('data.json').read()) # In[14]: h=json.loads(file('hierarchy2.json').read()) # In[15]: szf=[] for i in ro: szf.append({"a":i["a"], "c":i["c"], "g":i["g"], "t":i["t"], "s":i["s"]*float(pop[str(i["t"])][h[i['c']]["group"]]["szf"])}) # In[16]: szf2=[] for i in ro: szf2.append({"a":i["a"], "c":i["c"], "g":i["g"], "t":i["t"], "s":i["s"]*float(pop[str(i["t"])][h[i['c']]["group"]]["szf"]) *float(pop2[str(i["t"])][i["g"]][str(i["a"])]) /float(pop3[str(i["t"])][i["g"]][str(i["a"])]) }) # In[17]: #save files try: import zlib compression = zipfile.ZIP_DEFLATED except: compression = zipfile.ZIP_STORED file('db2/data.json','w').write(json.dumps(szf)) zf = zipfile.ZipFile('db2/9999.zip', mode='w') zf.write('db2/data.json','data.json',compress_type=compression) zf.close() file('db2/data.json','w').write(json.dumps(szf2)) zf = zipfile.ZipFile('db2/9998.zip', mode='w') zf.write('db2/data.json','data.json',compress_type=compression) zf.close() # In[18]: #update dictionaries # In[19]: #run only once c=json.loads(file('countries.json').read()) c=[u'9998']+c file('countries.json','w').write(json.dumps(c)) # In[36]: c=json.loads(file('cnames.json').read()) c[u'9998']=u'Székelyland' file('cnames.json','w').write(json.dumps(c)) # In[37]: c=json.loads(file('hnames.json').read()) c[u'Székelyland']=u'Székelyföld' file('hnames.json','w').write(json.dumps(c)) # In[22]: #update population # In[23]: p=json.loads(file('pop.json').read()) # In[26]: x=pd.read_csv('exportPivot_POP107A.csv') # In[27]: x=x.drop([u' Medii de rezidenta',u' UM: Numar persoane'],axis=1).set_index([u'Varste si grupe de varsta',u' Sexe',u' Ani',u' Macroregiuni regiuni de dezvoltare si judete']) # In[28]: x=x.unstack(u' Macroregiuni regiuni de dezvoltare si judete') # In[29]: x=pd.DataFrame(x.T.sum()) # In[30]: sorted(x.index.levels[0].unique()) # In[31]: indice=[str(i)+' ani' for i in range(5)]+[str(i*5)+'-'+str((i+1)*5-1)+' ani' for i in range(1,17)] indice[5]='5- 9 ani' # In[32]: p['9999']={} p['9998']={} gg={"f":u" Feminin","m":u" Masculin"} for y in range(1999,2013): if str(y) not in p['9999']:p['9999'][str(y)]={} for g in ["f","m"]: if g not in p['9999'][str(y)]:p['9999'][str(y)][g]={} for i in indice: p['9999'][str(y)][g][str(i[:2].strip().strip('-'))]=str(float(x.loc[i].loc[gg[g]].loc[' Anul '+str(y)][0])) p['9999'][str(y)][g][str(85)]=str(float(x.loc['85 ani si peste'].loc[gg[g]].loc[' Anul '+str(y)][0])*6.0/10) p['9999'][str(y)][g][str(90)]=str(float(x.loc['85 ani si peste'].loc[gg[g]].loc[' Anul '+str(y)][0])*3.0/10) p['9999'][str(y)][g][str(95)]=str(float(x.loc['85 ani si peste'].loc[gg[g]].loc[' Anul '+str(y)][0])*1.0/10) # In[33]: p['9998']=p['9999'] # In[34]: #save updated population file file('pop.json','w').write(json.dumps(p)) # In[35]: p['9998']['2005']['m'] # In[60]: p['642']['2005']['f']