In [52]:
import pandas as pd, numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
#load CPI
cpi=pd.read_html('IPC_8_5_2015.xls',header=0)[0]
cpi.columns=['Year']+range(5)
cpi=cpi.drop(range(1,5),axis=1)[2:]
cpi=cpi.set_index('Year')
cpi.head()
Out[2]:
0
Year
Anul 1991 14701010
Anul 1992 4735590
Anul 1993 1329834
Anul 1994 561690
Anul 1995 424681
In [3]:
#load first part of labor data
df=pd.read_csv('exportPivot_FOM103A.csv').reset_index(level=5).reset_index(level=4).drop('level_4',axis=1)
df.index=df.index.swaplevel(0,1)
df=df.unstack()
df.columns = df.columns.droplevel()
df=df.drop(u' Ani',axis=1)
df.columns=range(2001,2009)
df.head()
Out[3]:
2001 2002 2003 2004 2005 2006 2007 2008
Feminin A Agricultura vanatoare si silvicultura Alba 35.6 31.3 30 27.5 28 26.8 26.2 25.8
Arad 30.1 27.5 25.8 24.8 25.4 23.7 23.1 23
Arges 51.5 45.3 43.4 40.1 40.5 38.6 37.4 36.7
Bacau 48.6 43.2 40.9 37.6 37.9 36.2 35.2 34.7
Bihor 63.6 56.3 53.7 49.7 49.6 47.2 46 45
In [4]:
#load 2nd part of labor data
dg=pd.read_csv('exportPivot_FOM103A(1).csv').reset_index(level=5).reset_index(level=4).drop('level_4',axis=1)
dg.index=dg.index.swaplevel(0,1)
dg=dg.unstack()
dg.columns = dg.columns.droplevel()
dg=dg.drop(u' Ani',axis=1)
dg.columns=range(1992,2001)
dg.head()
Out[4]:
1992 1993 1994 1995 1996 1997 1998 1999 2000
Feminin A Agricultura vanatoare si silvicultura Alba 42.4 43.6 38.4 31.8 34.2 34.1 34.1 35.4 36.4
Arad 39.6 33.9 36.2 32.7 31.9 30.1 29.7 30.2 31.5
Arges 60.4 62.4 52.5 44.1 48.5 48.4 49 50.7 52.6
Bacau 59.7 61.3 48.4 42.5 46.1 46.2 46.3 48.2 50
Bihor 62.3 61.3 64.2 54.5 59.8 60.3 60.4 62.7 65.5
In [5]:
#merge dataframes
dg[range(2001,2009)]=df[range(2001,2009)]
dg.head()
Out[5]:
1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008
Feminin A Agricultura vanatoare si silvicultura Alba 42.4 43.6 38.4 31.8 34.2 34.1 34.1 35.4 36.4 35.6 31.3 30 27.5 28 26.8 26.2 25.8
Arad 39.6 33.9 36.2 32.7 31.9 30.1 29.7 30.2 31.5 30.1 27.5 25.8 24.8 25.4 23.7 23.1 23
Arges 60.4 62.4 52.5 44.1 48.5 48.4 49 50.7 52.6 51.5 45.3 43.4 40.1 40.5 38.6 37.4 36.7
Bacau 59.7 61.3 48.4 42.5 46.1 46.2 46.3 48.2 50 48.6 43.2 40.9 37.6 37.9 36.2 35.2 34.7
Bihor 62.3 61.3 64.2 54.5 59.8 60.3 60.4 62.7 65.5 63.6 56.3 53.7 49.7 49.6 47.2 46 45
In [6]:
#load first part of salary data
dh=pd.read_csv('exportPivot_FOM106A.csv').reset_index(level=5).reset_index(level=4).drop('level_4',axis=1)
dh.index=dh.index.swaplevel(0,1)
dh=dh.unstack()
dh.columns = dh.columns.droplevel()
dh=dh.drop(u' Ani',axis=1)[1:]
dh.columns=range(1990,2009)
dh.head()
Out[6]:
1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008
Total Agricultura vanatoare Alba NaN 6657 16198 51156 104888 172409 265364 433591 678615 1244730 1616274 1979834 2553320 3502950 4200240 479 649 836 936
Arad NaN 6772 16198 46126 115894 145146 236365 470299 728322 920535 1481668 2188808 2922893 3691041 4894790 465 638 805 870
Arges NaN 6388 16139 45937 94921 165966 250108 485230 737100 1158867 1385852 2071189 2954218 3407402 4675222 512 604 746 962
Bacau NaN 6395 16837 45838 116178 156111 236731 444497 663362 1226652 1755922 2865649 3200276 3391211 4701315 585 608 752 888
Bihor NaN 6952 17101 49267 108049 168891 241080 494484 695764 1171666 1340431 2098684 2669157 3968259 5331557 584 563 621 661
In [7]:
#load second part of labor data CAEN2
di=pd.read_csv('exportPivot_FOM103D.csv').reset_index(level=5).reset_index(level=4).drop('level_4',axis=1)
di.index=di.index.swaplevel(0,1)
di=di.unstack()
di.columns = di.columns.droplevel()
di=di.drop(u' Ani',axis=1)
di.columns=range(2009,2015)
di.head()
Out[7]:
2009 2010 2011 2012 2013 2014
Feminin A AGRICULTURA SILVICULTURA SI PESCUIT Alba 25.8 26.6 26 26.2 26.5 25
Arad 22.9 23.1 23.1 23.7 24 22.6
Arges 36.3 37.2 37.5 38.9 39.6 37.2
Bacau 34.4 35.3 35.3 36.7 37.5 35.2
Bihor 45 46 45.9 47.9 48.6 45.8
In [8]:
#load second part of salary data CAEN2
dj=pd.read_csv('exportPivot_FOM106E.csv').reset_index(level=5).reset_index(level=4).drop('level_4',axis=1)
dj.index=dj.index.swaplevel(0,1)
dj=dj.unstack()
dj.columns = dj.columns.droplevel()
dj=dj.drop(u' Ani',axis=1)
dj.columns=range(2009,2015)
dj.head()
Out[8]:
2009 2010 2011 2012 2013 2014
Feminin A AGRICULTURA SILVICULTURA SI PESCUIT Alba NaN NaN NaN 1318 926 1354
Arad NaN NaN NaN 958 1284 1053
Arges NaN NaN NaN 1018 1043 1030
Bacau NaN NaN NaN 979 1029 1031
Bihor NaN NaN NaN 807 882 1038
In [9]:
#load third part of salary data CAEN2, for global normalization
dk=pd.read_csv('exportPivot_FOM106F.csv').reset_index(level=5).reset_index(level=4).drop('level_4',axis=1)
dk.index=dk.index.swaplevel(0,1)
dk.index=dk.index.swaplevel(1,2)
dk=dk.unstack()
dk.columns = dk.columns.droplevel()
dk=dk.drop(u' Ani',axis=1)
dk.columns=range(2000,2014)
dk.head()
Out[9]:
2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013
Feminin A AGRICULTURA SILVICULTURA SI PESCUIT Total NaN NaN NaN 356 475 503 602 733 897 986 1010 1012 1055 1141
B INDUSTRIA EXTRACTIVA Total NaN NaN NaN 721 873 1172 1408 1712 2315 2294 2480 2705 2946 3113
C INDUSTRIA PRELUCRATOARE Total NaN NaN NaN 377 479 567 627 749 915 1000 1085 1153 1212 1283
D PRODUCTIA SI FURNIZAREA DE ENERGIE ELECTRICA SI TERMICA GAZE APA CALDA SI AER CONDITIONAT Total NaN NaN NaN 753 916 1264 1466 1748 2273 2425 2576 2690 2836 2816
E DISTRIBUTIA APEI; SALUBRITATE GESTIONAREA DESEURILOR ACTIVITATI DE DECONTAMINARE Total NaN NaN NaN 434 532 648 757 928 1122 1209 1260 1337 1386 1548
In [10]:
#renormalize CAENs
In [11]:
cc={#CAEN 2
    'B  INDUSTRIA EXTRACTIVA':'C Industria extractiva',
    'N  ACTIVITATI DE SERVICII ADMINISTRATIVE SI ACTIVITATI DE SERVICII SUPORT':'L Administratie publica si aparare',
    'M  ACTIVITATI PROFESIONALE  STIINTIFICE SI TEHNICE':'M Invatamant',
    'J  INFORMATII SI COMUNICATII':'Posta si telecomunicatii',
    'S  ALTE ACTIVITATI DE SERVICII':'Celelalte activitati ale economiei nationale',
    'R  ACTIVITATI DE SPECTACOLE  CULTURALE SI RECREATIVE':'Celelalte activitati ale economiei nationale',
    'INDUSTRIE':'Industrie',
    'A  AGRICULTURA  SILVICULTURA SI PESCUIT': 'A Agricultura  vanatoare si silvicultura',
    'O  ADMINISTRATIE PUBLICA SI APARARE; ASIGURARI SOCIALE DIN SISTEMUL PUBLIC':'L Administratie publica si aparare',
    'P  INVATAMANT':'M Invatamant',
    'F  CONSTRUCTII':'F Constructii',
    'K  INTERMEDIERI FINANCIARE SI ASIGURARI':'J Intermedieri financiare',
    'C  INDUSTRIA PRELUCRATOARE':'D Industria prelucratoare',
    'TOTAL':'Total',
    'D  PRODUCTIA SI FURNIZAREA DE ENERGIE ELECTRICA SI TERMICA  GAZE  APA CALDA SI AER CONDITIONAT':'E Energie electrica si termica  gaze si apa',
    'Q  SANATATE SI ASISTENTA SOCIALA':'N Sanatate si asistenta sociala',
    'L  TRANZACTII IMOBILIARE':'K Tranzactii imobiliare si alte servicii',
    'G  COMERT CU RIDICATA SI CU AMANUNTUL; REPARAREA AUTOVEHICULELOR SI MOTOCICLETELOR':'G Comert',
    'CAEN Rev.2  (activitati ale economiei nationale)':'CAEN Rev.1 (activitati ale economiei nationale - sectiuni)',
    'E  DISTRIBUTIA APEI; SALUBRITATE  GESTIONAREA DESEURILOR  ACTIVITATI DE DECONTAMINARE':'E Energie electrica si termica  gaze si apa',
    'I  HOTELURI SI RESTAURANTE':'H Hoteluri si restaurante',
    'H  TRANSPORT SI DEPOZITARE':'Transport si depozitare',

    ' B  INDUSTRIA EXTRACTIVA':'C Industria extractiva',
    ' N  ACTIVITATI DE SERVICII ADMINISTRATIVE SI ACTIVITATI DE SERVICII SUPORT':'L Administratie publica si aparare',
    ' M  ACTIVITATI PROFESIONALE  STIINTIFICE SI TEHNICE':'M Invatamant',
    ' J  INFORMATII SI COMUNICATII':'Posta si telecomunicatii',
    ' S  ALTE ACTIVITATI DE SERVICII':'Celelalte activitati ale economiei nationale',
    ' R  ACTIVITATI DE SPECTACOLE  CULTURALE SI RECREATIVE':'Celelalte activitati ale economiei nationale',
    ' INDUSTRIE':'Industrie',
    ' A  AGRICULTURA  SILVICULTURA SI PESCUIT': 'A Agricultura  vanatoare si silvicultura',
    ' O  ADMINISTRATIE PUBLICA SI APARARE; ASIGURARI SOCIALE DIN SISTEMUL PUBLIC':'L Administratie publica si aparare',
    ' P  INVATAMANT':'M Invatamant',
    ' F  CONSTRUCTII':'F Constructii',
    ' K  INTERMEDIERI FINANCIARE SI ASIGURARI':'J Intermedieri financiare',
    ' C  INDUSTRIA PRELUCRATOARE':'D Industria prelucratoare',
    ' TOTAL':'Total',
    ' D  PRODUCTIA SI FURNIZAREA DE ENERGIE ELECTRICA SI TERMICA  GAZE  APA CALDA SI AER CONDITIONAT':'E Energie electrica si termica  gaze si apa',
    ' Q  SANATATE SI ASISTENTA SOCIALA':'N Sanatate si asistenta sociala',
    ' L  TRANZACTII IMOBILIARE':'K Tranzactii imobiliare si alte servicii',
    ' G  COMERT CU RIDICATA SI CU AMANUNTUL; REPARAREA AUTOVEHICULELOR SI MOTOCICLETELOR':'G Comert',
    ' CAEN Rev.2  (activitati ale economiei nationale)':'CAEN Rev.1 (activitati ale economiei nationale - sectiuni)',
    ' E  DISTRIBUTIA APEI; SALUBRITATE  GESTIONAREA DESEURILOR  ACTIVITATI DE DECONTAMINARE':'E Energie electrica si termica  gaze si apa',
    ' I  HOTELURI SI RESTAURANTE':'H Hoteluri si restaurante',
    ' H  TRANSPORT SI DEPOZITARE':'Transport si depozitare',

    'Posta si telecomunicatii':'Posta si telecomunicatii',
    'L Administratie publica si aparare':'L Administratie publica si aparare',
    'Silvicultura  exploatarea forestiera':'A Agricultura  vanatoare si silvicultura',
    'Industrie':'Industrie',
    'I Transport  depozitare si comunicatii':'Transport si depozitare',
    'Transport si depozitare':'Transport si depozitare',
    'B Pescuit si piscicultura':'A Agricultura  vanatoare si silvicultura',
    'C Industria extractiva':'C Industria extractiva',
    'Celelalte activitati ale economiei nationale':'Celelalte activitati ale economiei nationale',
    'A Agricultura  vanatoare si silvicultura':'A Agricultura  vanatoare si silvicultura',
    'F Constructii':'F Constructii',
    'H Hoteluri si restaurante':'H Hoteluri si restaurante',
    'K Tranzactii imobiliare si alte servicii':'K Tranzactii imobiliare si alte servicii',
    'N Sanatate si asistenta sociala':'N Sanatate si asistenta sociala',
    'G Comert':'G Comert',
    'Agricultura  vanatoare':'A Agricultura  vanatoare si silvicultura',
    'CAEN Rev.1 (activitati ale economiei nationale - sectiuni)':'CAEN Rev.1 (activitati ale economiei nationale - sectiuni)',
    'D Industria prelucratoare':'D Industria prelucratoare',
    'M Invatamant':'M Invatamant',
    'Total':'Total',
    'J Intermedieri financiare':'J Intermedieri financiare',
    'E Energie electrica si termica  gaze si apa':'E Energie electrica si termica  gaze si apa'}
In [12]:
rr={u' Harghita':'HR',
    u' Covasna':'CV',
    u' Mures':'MS',
    u' TOTAL':'RO',
    u'Total':'RO',
    u' Regiunea CENTRU':'TR',
    u' Regiunea VEST':'TR',
    u' Regiunea NORD-VEST':'TR'}
In [13]:
def interpolate(d,years,gfit=1,depth=2,polyorder=1,override=True):
    #depth * length of interpolation substrings will be taken to the left and right
    #for example for {1971:5,1972:6,1973:7,1974:5} interpolating it over 1969-1990
    #for the section 1960-1970 (2 elements) the values from 1972,1973,1974 (3 elements) will be taken with depth 1.5
    #for the section 1974-1990 (15 elements) all values  (4 elements) will be taken to extrapolate
    if (gfit>2): 
        print 'interpolate takes only 1 (polynomial) or 2 (exponential) as 3rd argument [default=2]'
        return
    mydict={}
    missing_points=[[]]
    for year in years:
        if year not in d.keys():
            missing_points[-1].append(year)
        else:
            missing_points.append([])
    for m in missing_points:
        if m:
            fit=gfit
            if ((m[-1]<np.sort(d.keys())[0])|(m[0]>np.sort(d.keys())[-1])): #check if it is ends of the interval, then extrapolate mean only
                if not override: fit=0
            
            if fit==0: #take average
                y = {k: d[k] for k in set(d.keys()).intersection(range(max(min(years),min(m)-int(3)),min(max(years),max(m)+int(3))+1))}
                for i in range(len(m)):
                    mydict[m[i]]=np.mean(y.values())
            elif fit==1:
                #intersector
                y = {k: d[k] for k in set(d.keys()).intersection(range(max(min(years),min(m)-int(depth*len(m))),min(max(years),max(m)+int(depth*len(m)))+1))}
                #print y
                w = np.polyfit(y.keys(),y.values(),polyorder) # obtaining regression parameters
                if (polyorder==1):
                    intersector=w[0]*np.array(m)+w[1]
                else:
                    intersector=w[0]*np.array(m)*np.array(m)+w[1]*np.array(m)+w[2]
                for i in range(len(m)):
                    mydict[m[i]]=max(0,intersector[i])
            else:
                #intersector
                y = {k: d[k] for k in set(d.keys()).intersection(range(max(min(years),min(m)-int(depth*len(m))),min(max(years),max(m)+int(depth*len(m)))+1))}
                #print y
                w = np.polyfit(y.keys(),np.log(y.values()),1) # obtaining log regression parameters (exp fitting)
                intersector=np.exp(w[1])*np.exp(w[0]*np.array(m))
                for i in range(len(m)):
                    mydict[m[i]]=max(0,intersector[i])
    
    #return interpolated points
    return mydict
In [16]:
years=range(1990,2015)
In [17]:
#Labor data
#CAEN 1
data={}
for i in dg.T.iteritems():
    if i[0][2] in {u' Harghita',u' Covasna',u' Mures',u' TOTAL',u' Regiunea CENTRU',u' Regiunea VEST',u' Regiunea NORD-VEST'}:
        if i[0][0] not in data: data[i[0][0]]={}
        if rr[i[0][2]] not in data[i[0][0]]: data[i[0][0]][rr[i[0][2]]]={}
        if i[0][1] not in {'Agricultura  vanatoare', 'Industrie', 'INDUSTRIE', ' INDUSTRIE'}: #eliminate doublecounting
            if cc[i[0][1]] not in data[i[0][0]][rr[i[0][2]]]: data[i[0][0]][rr[i[0][2]]][cc[i[0][1]]]=i[1].astype(float)
            else: data[i[0][0]][rr[i[0][2]]][cc[i[0][1]]]=data[i[0][0]][rr[i[0][2]]][cc[i[0][1]]].add(i[1].astype(float),fill_value=0)

mydata={}
for i in data:
    if i not in mydata: mydata[i]={}
    for j in data[i]:
        if j not in mydata[i]: mydata[i][j]={}
        for k in data[i][j]:
            if k not in mydata[i][j]: mydata[i][j][k]={}
            for y in data[i][j][k].index:
                if not np.isnan(data[i][j][k].loc[y] ):
                    mydata[i][j][k][y]=data[i][j][k].loc[y] 
            mydata[i][j][k].update(interpolate(mydata[i][j][k],years))
In [18]:
#CAEN 2
data={}
for i in di.T.iteritems():
    if i[0][2] in {u' Harghita',u' Covasna',u' Mures',u' TOTAL',u' Regiunea CENTRU',u' Regiunea VEST',u' Regiunea NORD-VEST'}:
        if i[0][0] not in data: data[i[0][0]]={}
        if rr[i[0][2]] not in data[i[0][0]]: data[i[0][0]][rr[i[0][2]]]={}
        if i[0][1] not in {'Agricultura  vanatoare','Industrie', 'INDUSTRIE', ' INDUSTRIE'}: #eliminate doublecounting
            if cc[i[0][1]] not in data[i[0][0]][rr[i[0][2]]]: data[i[0][0]][rr[i[0][2]]][cc[i[0][1]]]=i[1].astype(float)
            else: data[i[0][0]][rr[i[0][2]]][cc[i[0][1]]]=data[i[0][0]][rr[i[0][2]]][cc[i[0][1]]].add(i[1].astype(float),fill_value=0)
for i in data:
    if i not in mydata: mydata[i]={}
    for j in data[i]:
        if j not in mydata[i]: mydata[i][j]={}
        for k in data[i][j]:
            if k not in mydata[i][j]: mydata[i][j][k]={}
            for y in data[i][j][k].index:
                if not np.isnan(data[i][j][k].loc[y] ):
                    mydata[i][j][k][y]=data[i][j][k].loc[y] 
                if ((k=='K Tranzactii imobiliare si alte servicii') and (y>2008)): mydata[i][j][k].pop(y)
            mydata[i][j][k].update(interpolate(mydata[i][j][k],years))   
            
#calculate male
for i in {' Feminin'}:
    mydata[' Masculin']={}
    for j in mydata[i]:
        mydata[' Masculin'][j]={}
        for k in mydata[i][j]:
            mydata[' Masculin'][j][k]={}
            for y in mydata[i][j][k]:
                mydata[' Masculin'][j][k][y]=mydata[' Total'][j][k][y]-mydata[' Feminin'][j][k][y]
In [19]:
#Salary data
#CAEN 1
data={}
for i in dh.T.iteritems():
    if i[0][2] in {u' Harghita',u' Covasna',u' Mures',u' TOTAL',u' Regiunea CENTRU',u' Regiunea VEST',u' Regiunea NORD-VEST'}:
        if i[0][0] not in data: data[i[0][0]]={}
        if rr[i[0][2]] not in data[i[0][0]]: data[i[0][0]][rr[i[0][2]]]={}
        if i[0][1] not in {'Agricultura  vanatoare','Industrie', 'INDUSTRIE', ' INDUSTRIE'}: #eliminate doublecounting
            if cc[i[0][1]] not in data[i[0][0]][rr[i[0][2]]]: data[i[0][0]][rr[i[0][2]]][cc[i[0][1]]]={}
            for y in i[1].index:
                if y not in data[i[0][0]][rr[i[0][2]]][cc[i[0][1]]]: data[i[0][0]][rr[i[0][2]]][cc[i[0][1]]][y]=[]
                data[i[0][0]][rr[i[0][2]]][cc[i[0][1]]][y].append((float)(i[1].loc[y]))

mydata2={}
for i in data:
    if i not in mydata2: mydata2[i]={}
    for j in data[i]:
        if j not in mydata2[i]: mydata2[i][j]={}
        for k in data[i][j]:
            if k not in mydata2[i][j]: mydata2[i][j][k]={}
            for y in data[i][j][k]:
                if not np.isnan(np.nanmean(data[i][j][k][y])):
                    mydata2[i][j][k][y]=np.nanmean(data[i][j][k][y])
            #mydata2[i][j][k].update(interpolate(mydata2[i][j][k],years))
#CAEN 2
data={}
for i in dj.T.iteritems():
    if i[0][2] in {u' Harghita',u' Covasna',u' Mures',u' TOTAL',u' Regiunea CENTRU',u' Regiunea VEST',u' Regiunea NORD-VEST'}:
        if i[0][0] not in data: data[i[0][0]]={}
        if rr[i[0][2]] not in data[i[0][0]]: data[i[0][0]][rr[i[0][2]]]={}
        if i[0][1] not in {'Agricultura  vanatoare'}: #eliminate doublecounting
            if cc[i[0][1]] not in data[i[0][0]][rr[i[0][2]]]: data[i[0][0]][rr[i[0][2]]][cc[i[0][1]]]={}
            for y in i[1].index:
                if y not in data[i[0][0]][rr[i[0][2]]][cc[i[0][1]]]: data[i[0][0]][rr[i[0][2]]][cc[i[0][1]]][y]=[]
                data[i[0][0]][rr[i[0][2]]][cc[i[0][1]]][y].append((float)(i[1].loc[y]))

for i in data:
    if i not in mydata2: mydata2[i]={}
    for j in data[i]:
        if j not in mydata2[i]: mydata2[i][j]={}
        for k in data[i][j]:
            if k not in mydata2[i][j]: mydata2[i][j][k]={}
            for y in data[i][j][k]:
                if not np.isnan(np.nanmean(data[i][j][k][y])):
                    mydata2[i][j][k][y]=np.nanmean(data[i][j][k][y])
            #mydata2[i][j][k].update(interpolate(mydata2[i][j][k],years))
C:\Anaconda\lib\site-packages\numpy\lib\nanfunctions.py:598: RuntimeWarning: Mean of empty slice
  warnings.warn("Mean of empty slice", RuntimeWarning)
In [20]:
#National salary data
#CAEN 2
data={}
for i in dk.T.iteritems():
    if i[0][2] in {u'Total'}:
        if i[0][0] not in data: data[i[0][0]]={}
        if rr[i[0][2]] not in data[i[0][0]]: data[i[0][0]][rr[i[0][2]]]={}
        if i[0][1] not in {'Agricultura  vanatoare','Industrie', 'INDUSTRIE', ' INDUSTRIE'}: #eliminate doublecounting
            if cc[i[0][1]] not in data[i[0][0]][rr[i[0][2]]]: data[i[0][0]][rr[i[0][2]]][cc[i[0][1]]]={}
            for y in i[1].index:
                if y not in data[i[0][0]][rr[i[0][2]]][cc[i[0][1]]]: data[i[0][0]][rr[i[0][2]]][cc[i[0][1]]][y]=[]
                data[i[0][0]][rr[i[0][2]]][cc[i[0][1]]][y].append((float)(i[1].loc[y]))

mydata3={}
for i in data:
    if i not in mydata3: mydata3[i]={}
    for j in data[i]:
        if j not in mydata3[i]: mydata3[i][j]={}
        for k in data[i][j]:
            if k not in mydata3[i][j]: mydata3[i][j][k]={}
            for y in data[i][j][k]:
                if not np.isnan(np.nanmean(data[i][j][k][y])):
                    mydata3[i][j][k][y]=np.nanmean(data[i][j][k][y])
            #mydata2[i][j][k].update(interpolate(mydata2[i][j][k],years))
In [21]:
mydata3[' Total']['RO']['A Agricultura  vanatoare si silvicultura']
Out[21]:
{2000: 163.0,
 2001: 227.0,
 2002: 284.0,
 2003: 377.0,
 2004: 480.0,
 2005: 527.0,
 2006: 617.0,
 2007: 743.0,
 2008: 914.0,
 2009: 1007.0,
 2010: 1024.0,
 2011: 1044.0,
 2012: 1093.0,
 2013: 1179.0}
In [22]:
#calculate gender inequality
gii={" Masculin":{}," Feminin":{}}
for i in mydata3[' Masculin']['RO']:
    gii[" Masculin"][i]={}
    gii[" Feminin"][i]={}
    for j in mydata3[' Masculin']['RO'][i]:
        gii[" Masculin"][i][j]=mydata3[' Masculin']['RO'][i][j]/mydata3[' Total']['RO'][i][j]
        gii[" Feminin"][i][j]=mydata3[' Feminin']['RO'][i][j]/mydata3[' Total']['RO'][i][j]
    gii[" Masculin"][i].update(interpolate(gii[" Masculin"][i],years))
    gii[" Feminin"][i].update(interpolate(gii[" Feminin"][i],years))
    
#readjust to country-level earnings
GII={}
for i in gii: #sex
    if i not in GII: GII[i]={}
    for j in mydata2[i]: #judet
        if j not in GII[i]: GII[i][j]={}
        for k in gii[i]: #indicator
            if k not in GII[i][j]: GII[i][j][k]={}
            for y in gii[i][k]: #year
                try: 
                    GII[i][j][k][y]=gii[i][k][y]*mydata2[' Total'][j][k][y]
                    if np.isnan(GII[i][j][k][y]): GII[i][j][k].pop(y)
                except: pass
            GII[i][j][k].update(interpolate(GII[i][j][k],years,2,1.5))
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
C:\Anaconda\lib\site-packages\numpy\lib\polynomial.py:588: RankWarning: Polyfit may be poorly conditioned
  warnings.warn(msg, RankWarning)
In [23]:
#create cpi dict
cp={2013:100}
for i in cpi.T.iteritems():
    cp[(int)(i[0][5:])]=(int)(i[1])/100.0
cp.update(interpolate(cp,years))
In [24]:
cp
Out[24]:
{1990: 246664.29999992251,
 1991: 147010.1,
 1992: 47355.9,
 1993: 13298.34,
 1994: 5616.9,
 1995: 4246.81,
 1996: 3059.46,
 1997: 1200.9,
 1998: 754.83,
 1999: 517.7,
 2000: 355.4,
 2001: 264.3,
 2002: 215.69,
 2003: 187.11,
 2004: 167.25,
 2005: 153.42,
 2006: 143.97,
 2007: 137.33,
 2008: 127.34,
 2009: 120.6,
 2010: 113.67,
 2011: 107.45,
 2012: 103.98,
 2013: 100,
 2014: 96.019999999999527}
In [25]:
#create final data
DATA={}
for i in GII:
    if i not in DATA: DATA[i]={}
    for j in GII[i]:
        if j not in DATA[i]: DATA[i][j]={}
        for k in GII[i][j]:
            if k not in DATA[i][j]: DATA[i][j][k]={}
            for y in GII[i][j][k]:
                yr=repr(y)
                DATA[i][j][k][yr]={}
                DATA[i][j][k][yr]['munka']=mydata[i][j][k][y]
                salary=GII[i][j][k][y]
                DATA[i][j][k][yr]['fizu']=salary
                if y<2005: salary/=10000.0
                DATA[i][j][k][yr]['realfizu']=salary*cp[y]/100.0
                if k in {'Celelalte activitati ale economiei nationale','N Sanatate si asistenta sociala'}:
                    DATA[i][j][k][yr]['realfizu']=DATA[i][j][k][yr]['fizu']
In [26]:
ff={'Total':u'Összesen',
'H Hoteluri si restaurante':u'Vendéglátóipar',
'C Industria extractiva':u'Asványkincsek, bányászat',
'Posta si telecomunicatii':u'Távközlés',
'L Administratie publica si aparare':u'Közügy, hatóság',
'Celelalte activitati ale economiei nationale':u'Más',
'K Tranzactii imobiliare si alte servicii':u'Ingatlan',
'N Sanatate si asistenta sociala':u'Egészségügy',
'F Constructii':u'Építőipar',
'D Industria prelucratoare':u'Feldolgozóipar',
'M Invatamant':u'Oktatás',
'Industrie':u'Összes ipar',
'G Comert':u'Kereskedelem',
'J Intermedieri financiare':u'Pénzügy',
'E Energie electrica si termica  gaze si apa':u'Energiaipar',
'A Agricultura  vanatoare si silvicultura':u'Mezőgasdaság',
'Transport si depozitare':u'Szállítás'}
In [27]:
#save data
import json

for county in DATA[' Feminin']:
    DATA2=[]
    for i in DATA[' Feminin'][county]:
        helper={}
        helper["name"]=ff[i]
        helper["region"]=ff[i]
        helper["income"]=[]
        helper["lifeExpectancy"]=[]
        helper["population"]=[]
        helper["avgwage"]=[]
        for syear in range(1990,2015):
            year=repr(syear)
            helper["income"].append([syear,DATA[' Feminin'][county][i][year]['munka']/\
                                     (DATA[' Feminin'][county][i][year]['munka']+DATA[' Masculin'][county][i][year]['munka'])])
            helper["lifeExpectancy"].append([syear,DATA[' Feminin'][county][i][year]['realfizu']/\
                                     (DATA[' Masculin'][county][i][year]['realfizu'])])
            helper["avgwage"].append([syear,DATA[' Masculin'][county][i][year]['realfizu']])
            helper["population"].append([syear,(DATA[' Feminin'][county][i][year]['munka']+\
                                             DATA[' Masculin'][county][i][year]['munka'])*1000]) 
        DATA2.append(helper)
    file(county+'nations.json','w').write(json.dumps(DATA2))
In [28]:
#szekelyfold
DATA2=[]
for i in DATA[' Feminin']['HR']:
    helper={}
    helper["name"]=ff[i]
    helper["region"]=ff[i]
    helper["income"]=[]
    helper["lifeExpectancy"]=[]
    helper["population"]=[]
    helper["avgwage"]=[]
    for syear in range(1990,2015):
        year=repr(syear)
        helper["income"].append([syear,(DATA[' Feminin']['HR'][i][year]['munka']+\
                                        DATA[' Feminin']['CV'][i][year]['munka']+\
                                        DATA[' Feminin']['MS'][i][year]['munka'])/\
                                 (DATA[' Feminin']['HR'][i][year]['munka']+DATA[' Masculin']['HR'][i][year]['munka']+\
                                  DATA[' Feminin']['CV'][i][year]['munka']+DATA[' Masculin']['CV'][i][year]['munka']+\
                                  DATA[' Feminin']['MS'][i][year]['munka']+DATA[' Masculin']['MS'][i][year]['munka'])])
        helper["lifeExpectancy"].append([syear,(DATA[' Feminin']['HR'][i][year]['realfizu']+\
                                         DATA[' Feminin']['CV'][i][year]['realfizu']+\
                                         DATA[' Feminin']['MS'][i][year]['realfizu'])/\
                                 (DATA[' Masculin']['HR'][i][year]['realfizu']+\
                                  DATA[' Masculin']['CV'][i][year]['realfizu']+\
                                  DATA[' Masculin']['MS'][i][year]['realfizu'])])
        helper["avgwage"].append([syear,(DATA[' Masculin']['HR'][i][year]['realfizu']+\
                                  DATA[' Masculin']['CV'][i][year]['realfizu']+\
                                  DATA[' Masculin']['MS'][i][year]['realfizu'])/3.0])
        helper["population"].append([syear,((DATA[' Feminin']['HR'][i][year]['munka']+\
                                             DATA[' Feminin']['CV'][i][year]['munka']+\
                                             DATA[' Feminin']['MS'][i][year]['munka'])+\
                                         (DATA[' Masculin']['HR'][i][year]['munka']+\
                                          DATA[' Masculin']['CV'][i][year]['munka']+\
                                          DATA[' Masculin']['MS'][i][year]['munka']))*1000]) 
    DATA2.append(helper)
file('SZFnations.json','w').write(json.dumps(DATA2))
In [31]:
#load first part of labor data
df=pd.read_csv('exportPivot_FOM106F(1).csv').reset_index(level=5).reset_index(level=4).drop('level_4',axis=1)
df.index=df.index.swaplevel(0,1)
df=df.unstack()
df.columns = df.columns.droplevel()
df=df.drop(u' Ani',axis=1)
df.columns=range(2000,2014)
df=df.drop(range(2000,2003),axis=1)
df=df.astype(float)
df.head()
Out[31]:
2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013
Feminin Total 01 Agricultura vanatoare si servicii anexe 339 461 486 595 721 866 982 994 981 1014 1097
05 Extractia carbunelui superior si inferior 755 824 1109 1295 1577 1849 1838 1866 1842 1955 2104
06 Extractia petrolului brut si a gazelor naturale 817 1018 1394 1678 2040 3048 3057 3233 3726 4213 4517
07 Extractia minereurilor metalifere 553 717 819 945 1207 1512 1669 1879 2091 2193 2141
08 Alte activitati extractive 610 717 825 938 1066 1387 1363 1500 1685 1629 1623
In [74]:
dm=df.loc[' Feminin'].loc[u'Total']/df.loc[' Masculin'].loc[u'Total']*100
dm=dm.astype(int)
dm.index.name=u'Agazat'
dm.to_csv('data.csv')
In [72]:
dm.head()
Out[72]:
2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013
name
01 Agricultura vanatoare si servicii anexe 98 106 99 101 102 101 100 102 99 97 97
05 Extractia carbunelui superior si inferior 73 70 79 80 83 78 75 75 74 73 79
06 Extractia petrolului brut si a gazelor naturale 97 97 99 93 96 108 105 116 121 119 118
07 Extractia minereurilor metalifere 77 89 83 84 81 90 97 93 98 101 100
08 Alte activitati extractive 110 106 110 111 108 119 112 115 121 123 119