In [1]:
#szekelyland workbook - extrapolating from romanian values using mortality statistics from INSSE
import pandas as pd, numpy as np
In [2]:
df=pd.read_csv('exportPivot_POP206C.csv')
de=pd.read_csv('exportPivot_POP206E.csv')
In [3]:
de.columns
Out[3]:
Index([u'Sexe', u' Grupe de varsta ',
       u' Macroregiuni  regiuni de dezvoltare si judete', u' Ani',
       u' UM: Numar persoane', u' Valoare'],
      dtype='object')
In [4]:
df=df.drop(u' UM: Numar persoane',axis=1).set_index([u' Ani',u'Clasificarea internationala a maladiilor - Revizia a X a 1994',u' Macroregiuni  regiuni de dezvoltare si judete'])
de=de.drop(u' UM: Numar persoane',axis=1).set_index([u' Ani',u'Sexe',u' Grupe de varsta ',u' Macroregiuni  regiuni de dezvoltare si judete'])
In [5]:
de.head()
Out[5]:
Valoare
Ani Sexe Grupe de varsta Macroregiuni regiuni de dezvoltare si judete
Anul 1999 Total Total TOTAL 265194
Anul 2000 Total Total TOTAL 255820
Anul 2001 Total Total TOTAL 259603
Anul 2002 Total Total TOTAL 269666
Anul 2003 Total Total TOTAL 266575
In [6]:
for i in df.index.levels[0].unique():
    print i
 Anul 1999
 Anul 2000
 Anul 2001
 Anul 2002
 Anul 2003
 Anul 2004
 Anul 2005
 Anul 2006
 Anul 2007
 Anul 2008
 Anul 2009
 Anul 2010
 Anul 2011
 Anul 2012
In [7]:
for i in df.index.levels[1].unique():
    print i
Alte cauze
Boli ale aparatului circulator
Boli ale aparatului digestiv
Boli ale aparatului genito-urinar
Boli ale aparatului respirator
Boli ale sistemului nervos  boli ale ochiului si anexele sale  boli ale urechii si apofizei mastoide
Boli endocrine  de nutritie si metabolism
Boli infectioase si parazitare
Leziuni traumatice  otraviri si alte consecinte ale cauzelor externe
Malformatii congenitale  deformatii si anomalii  cromozomiale
Sarcina  nastere si lauzie
Total
Tulburari mentale si de comportament
Tumori
Unele afectiuni a caror origine se situeaza in perioada perinatala
din care:   Boala ischemica a inimii
din care:  Boli cerebro-vasculare
din care:  Diabet zaharat
din care: Tuberculoza
In [8]:
d={
"Alte cauze":1094,
"Boli ale aparatului circulator":1064,
"Boli ale aparatului digestiv":1078,
"Boli ale aparatului genito-urinar":1084,
"Boli ale aparatului respirator":1072,
"Boli ale sistemului nervos  boli ale ochiului si anexele sale  boli ale urechii si apofizei mastoide":9,
"Boli endocrine  de nutritie si metabolism":1051,
"Boli infectioase si parazitare":1001,
"Leziuni traumatice  otraviri si alte consecinte ale cauzelor externe":1095,
"Malformatii congenitale  deformatii si anomalii  cromozomiale":1093,
"Sarcina  nastere si lauzie":1087,
"Total":1000,
"Tulburari mentale si de comportament":1055,
"Tumori":1026,
"Unele afectiuni a caror origine se situeaza in perioada perinatala":1092,
"din care:   Boala ischemica a inimii":0,
"din care:  Boli cerebro-vasculare":0,
"din care:  Diabet zaharat":0,
"din care: Tuberculoza":0}
n0=[1048,1082,1083]
n9=[1058,1062,1063]
In [9]:
#calculate county share in romania total
pop={}
for j in df.index.levels[0].unique():
    if j[6:] not in pop:pop[j[6:]]={}
    for k in df.index.levels[1].unique():
        if d[k]!=0:
            if d[k] not in pop[j[6:]]:pop[j[6:]][str(d[k])]={}
            pop[j[6:]][str(d[k])]["ro"]=df.loc[j].loc[k].loc[' TOTAL'][0]
            try: a=df.loc[j].loc[k].loc[' Covasna'][0]
            except: a=0
            try: b=df.loc[j].loc[k].loc[' Harghita'][0]
            except: b=0
            try: c=df.loc[j].loc[k].loc[' Mures'][0]
            except: c=0
            pop[j[6:]][str(d[k])]["szf"]=(a+b+c)*1.0/df.loc[j].loc[k].loc[' TOTAL'][0]
In [10]:
for y in pop:
    for c in n9:
        if str(c) not in pop[y]:pop[y][str(c)]={}
        for m in ["szf","ro"]:
            pop[y][str(c)][m]=pop[y]['9'][m]/3.0
    pop[y].pop('9');
    for c in n0:
        if str(c) not in pop[y]:pop[y][str(c)]={}
        for m in ["szf","ro"]:
            pop[y][str(c)][m]=pop[y]['1000'][m]
        
In [11]:
#calculate age deviation from national average
e={"Feminin":"f","Masculin":"m","Total":"s"}
pop2={}
pop3={}
for j in de.index.levels[0].unique():
    if j[6:] not in pop2:pop2[j[6:]]={}
    if j[6:] not in pop3:pop3[j[6:]]={}
    for k in de.index.levels[1].unique():
        if e[k] not in pop2[j[6:]]:pop2[j[6:]][e[k]]={}
        if e[k] not in pop3[j[6:]]:pop3[j[6:]][e[k]]={}
        for l in de.index.levels[2].unique():
            age=l[:3].strip().strip('-')
            try: a=de.loc[j].loc[k].loc[l].loc[' Covasna'][0]
            except: a=0
            try: b=de.loc[j].loc[k].loc[l].loc[' Harghita'][0]
            except: b=0
            try: c=de.loc[j].loc[k].loc[l].loc[' Mures'][0]
            except: c=0
            try: d=de.loc[j].loc[k].loc[l].loc[' TOTAL'][0]
            except: d=0
            if age!='To':
                if age!='0':
                    if age!='85':
                        pop2[j[6:]][e[k]][age]=(a+b+c)*1.0
                        pop3[j[6:]][e[k]][age]=(d)*1.0
                    else:
                        pop2[j[6:]][e[k]]['90']=(a+b+c)*3.0/10
                        pop2[j[6:]][e[k]]['95']=(a+b+c)*1.0/10
                        pop2[j[6:]][e[k]]['85']=(a+b+c)*6.0/10
                        pop3[j[6:]][e[k]]['90']=(d)*3.0/10
                        pop3[j[6:]][e[k]]['95']=(d)*1.0/10
                        pop3[j[6:]][e[k]]['85']=(d)*6.0/10
        
                else:
                    pop2[j[6:]][e[k]]['0']=(a+b+c)*1.0/5
                    pop2[j[6:]][e[k]]['1']=(a+b+c)*1.0/5
                    pop2[j[6:]][e[k]]['2']=(a+b+c)*1.0/5
                    pop2[j[6:]][e[k]]['3']=(a+b+c)*1.0/5
                    pop2[j[6:]][e[k]]['4']=(a+b+c)*1.0/5
                    pop3[j[6:]][e[k]]['0']=(d)*1.0/5
                    pop3[j[6:]][e[k]]['1']=(d)*1.0/5
                    pop3[j[6:]][e[k]]['2']=(d)*1.0/5
                    pop3[j[6:]][e[k]]['3']=(d)*1.0/5
                    pop3[j[6:]][e[k]]['4']=(d)*1.0/5
In [12]:
for i in pop2:
    for j in ["f","m"]:
        for a in pop2[i][j]:
            pop2[i][j][a]=pop2[i][j][a]/sum(pop2[i]["s"].values())
            pop3[i][j][a]=pop3[i][j][a]/sum(pop3[i]["s"].values())
In [13]:
import zipfile,json
#read RO data
z = zipfile.ZipFile('db2/642.zip') 
ro = json.loads(z.open('data.json').read())
In [14]:
h=json.loads(file('hierarchy2.json').read())
In [15]:
szf=[]
for i in ro:
    szf.append({"a":i["a"],
                "c":i["c"],
                "g":i["g"],
                "t":i["t"],
                "s":i["s"]*float(pop[str(i["t"])][h[i['c']]["group"]]["szf"])})  
In [16]:
szf2=[]
for i in ro:
    szf2.append({"a":i["a"],
                "c":i["c"],
                "g":i["g"],
                "t":i["t"],
                "s":i["s"]*float(pop[str(i["t"])][h[i['c']]["group"]]["szf"])
                          *float(pop2[str(i["t"])][i["g"]][str(i["a"])])
                          /float(pop3[str(i["t"])][i["g"]][str(i["a"])])
                })
In [17]:
#save files
try:
    import zlib
    compression = zipfile.ZIP_DEFLATED
except:
    compression = zipfile.ZIP_STORED

file('db2/data.json','w').write(json.dumps(szf))  
zf = zipfile.ZipFile('db2/9999.zip', mode='w')
zf.write('db2/data.json','data.json',compress_type=compression)
zf.close()
file('db2/data.json','w').write(json.dumps(szf2))  
zf = zipfile.ZipFile('db2/9998.zip', mode='w')
zf.write('db2/data.json','data.json',compress_type=compression)
zf.close()
In [18]:
#update dictionaries
In [19]:
#run only once
c=json.loads(file('countries.json').read())
c=[u'9998']+c
file('countries.json','w').write(json.dumps(c))  
In [36]:
c=json.loads(file('cnames.json').read())
c[u'9998']=u'Székelyland'
file('cnames.json','w').write(json.dumps(c))  
In [37]:
c=json.loads(file('hnames.json').read())
c[u'Székelyland']=u'Székelyföld'
file('hnames.json','w').write(json.dumps(c))  
In [22]:
#update population
In [23]:
p=json.loads(file('pop.json').read())
In [26]:
x=pd.read_csv('exportPivot_POP107A.csv')
In [27]:
x=x.drop([u' Medii de rezidenta',u' UM: Numar persoane'],axis=1).set_index([u'Varste si grupe de varsta',u' Sexe',u' Ani',u' Macroregiuni  regiuni de dezvoltare si judete'])
In [28]:
x=x.unstack(u' Macroregiuni  regiuni de dezvoltare si judete')
In [29]:
x=pd.DataFrame(x.T.sum())
In [30]:
sorted(x.index.levels[0].unique())
Out[30]:
['0 ani',
 '0- 4 ani',
 '1 ani',
 '10 ani',
 '10-14 ani',
 '11 ani',
 '12 ani',
 '13 ani',
 '14 ani',
 '15 ani',
 '15-19 ani',
 '16 ani',
 '17 ani',
 '18 ani',
 '19 ani',
 '2 ani',
 '20 ani',
 '20-24 ani',
 '21 ani',
 '22 ani',
 '23 ani',
 '24 ani',
 '25 ani',
 '25-29 ani',
 '26 ani',
 '27 ani',
 '28 ani',
 '29 ani',
 '3 ani',
 '30 ani',
 '30-34 ani',
 '31 ani',
 '32 ani',
 '33 ani',
 '34 ani',
 '35 ani',
 '35-39 ani',
 '36 ani',
 '37 ani',
 '38 ani',
 '39 ani',
 '4 ani',
 '40 ani',
 '40-44 ani',
 '41 ani',
 '42 ani',
 '43 ani',
 '44 ani',
 '45 ani',
 '45-49 ani',
 '46 ani',
 '47 ani',
 '48 ani',
 '49 ani',
 '5 ani',
 '5- 9 ani',
 '50 ani',
 '50-54 ani',
 '51 ani',
 '52 ani',
 '53 ani',
 '54 ani',
 '55 ani',
 '55-59 ani',
 '56 ani',
 '57 ani',
 '58 ani',
 '59 ani',
 '6 ani',
 '60 ani',
 '60-64 ani',
 '61 ani',
 '62 ani',
 '63 ani',
 '64 ani',
 '65 ani',
 '65-69 ani',
 '66 ani',
 '67 ani',
 '68 ani',
 '69 ani',
 '7 ani',
 '70 ani',
 '70-74 ani',
 '71 ani',
 '72 ani',
 '73 ani',
 '74 ani',
 '75 ani',
 '75-79 ani',
 '76 ani',
 '77 ani',
 '78 ani',
 '79 ani',
 '8 ani',
 '80 ani',
 '80-84 ani',
 '81 ani',
 '82 ani',
 '83 ani',
 '84 ani',
 '85 ani si peste',
 '9 ani',
 'Total']
In [31]:
indice=[str(i)+' ani' for i in range(5)]+[str(i*5)+'-'+str((i+1)*5-1)+' ani' for i in range(1,17)]
indice[5]='5- 9 ani'
In [32]:
p['9999']={}
p['9998']={}
gg={"f":u" Feminin","m":u" Masculin"}
for y in range(1999,2013):
    if str(y) not in p['9999']:p['9999'][str(y)]={}
    for g in ["f","m"]:
        if g not in p['9999'][str(y)]:p['9999'][str(y)][g]={}
        for i in indice:
            p['9999'][str(y)][g][str(i[:2].strip().strip('-'))]=str(float(x.loc[i].loc[gg[g]].loc[' Anul '+str(y)][0]))
        p['9999'][str(y)][g][str(85)]=str(float(x.loc['85 ani si peste'].loc[gg[g]].loc[' Anul '+str(y)][0])*6.0/10)
        p['9999'][str(y)][g][str(90)]=str(float(x.loc['85 ani si peste'].loc[gg[g]].loc[' Anul '+str(y)][0])*3.0/10)
        p['9999'][str(y)][g][str(95)]=str(float(x.loc['85 ani si peste'].loc[gg[g]].loc[' Anul '+str(y)][0])*1.0/10)
In [33]:
p['9998']=p['9999']
In [34]:
#save updated population file
file('pop.json','w').write(json.dumps(p))  
In [35]:
p['9998']['2005']['m']
Out[35]:
{'0': '6632.0',
 '1': '6498.0',
 '10': '35468.0',
 '15': '46996.0',
 '2': '6426.0',
 '20': '46190.0',
 '25': '51603.0',
 '3': '6473.0',
 '30': '50243.0',
 '35': '46590.0',
 '4': '6921.0',
 '40': '34284.0',
 '45': '41425.0',
 '5': '32264.0',
 '50': '39348.0',
 '55': '30272.0',
 '60': '27693.0',
 '65': '23561.0',
 '70': '19324.0',
 '75': '13498.0',
 '80': '7376.0',
 '85': '1777.2',
 '90': '888.6',
 '95': '296.2'}
In [60]:
p['642']['2005']['f']
Out[60]:
{u'0': u'104877.0',
 u'1': u'101698.0',
 u'10': u'590487.0',
 u'15': u'846418.0',
 u'2': u'101017.0',
 u'20': u'790489.0',
 u'25': u'870167.0',
 u'3': u'100385.0',
 u'30': u'823108.0',
 u'35': u'871298.0',
 u'4': u'104074.0',
 u'40': u'619479.0',
 u'45': u'775247.0',
 u'5': u'540506.0',
 u'50': u'779300.0',
 u'55': u'652751.0',
 u'60': u'528157.0',
 u'65': u'614468.0',
 u'70': u'525710.0',
 u'75': u'398790.0',
 u'80': u'238385.0',
 u'85': u'69058.0',
 u'90': u'28777.0',
 u'95': u'5685.0'}