In [1]:
import numpy as np, pandas as pd
from pygeocoder import Geocoder
import matplotlib.pyplot as plt
%matplotlib inline
In [82]:
df=pd.read_excel('http://www.recensamantromania.ro/wp-content/uploads/2013/07/sR_TAB_13.xls')
megye=[]
for i in df.index[6:3434]:
    try:
        if np.isnan(df.ix[int(i)-1,u'13. POPULATIA  STABILA DUPA RELIGIE  - JUDETE, MUNICIPII, ORASE, COMUNE']) and\
            np.isnan(df.ix[int(i)+1,u'13. POPULATIA  STABILA DUPA RELIGIE  - JUDETE, MUNICIPII, ORASE, COMUNE']):
            megye.append([i,df.ix[i,u'13. POPULATIA  STABILA DUPA RELIGIE  - JUDETE, MUNICIPII, ORASE, COMUNE']])
    except:
        pass
In [83]:
#run once only!
df=df.drop([u'Unnamed: 1',u'Unnamed: 24'],axis=1)
df.columns=[u'Falu',u'Ortodox',u'Katolikus',u'Református',u'Pünkösdista',u'Görög katolikus',u'Baptista',u'Adventista',u'Muzulmán',u'Unitárius',u'Jehova tanúja',u'Lutheránus evangélikus',u'Ókatolikus',u'Lutheránus',u'Szerb ortodox',u'Evangélikus',u'Kálvinista',u'Zsidó',u'Örmény',u'Más',u'Nem vallásos',u'Ateista',u'N/A']
df=df.drop(u'N/A',axis=1)
In [84]:
#run once only!
df=df.loc[df.index[7:]]
In [6]:
data={}
data2={}
data3={}
ez=0
for i in df.index:
    try:
        if megye[ez][0]<i: ez+=1
        if not (megye[ez][1]==df.ix[int(i),u'Falu']):
            if df.ix[int(i),u'Falu'] not in [u'  A. MUNICIPII SI ORASE',u'  B. COMUNE',np.NaN,'NaN']:
                if megye[ez-1][1][2:] not in data: data[megye[ez-1][1][2:]]={}
                data[megye[ez-1][1][2:]][df.ix[int(i),u'Falu'][3:]]={}
                data3[df.ix[int(i),u'Falu'][2:]]={}
                for j in df.columns[1:]:
                    if df.ix[int(i),j] not in [u'*',u'-',np.NaN,'NaN']:
                        data[megye[ez-1][1][2:]][df.ix[int(i),u'Falu'][3:]][j]=df.ix[int(i),j]   
                        data3[df.ix[int(i),u'Falu'][2:]][j]=df.ix[int(i),j] 
        else: 
            if df.ix[int(i),u'Falu'] not in [u'  A. MUNICIPII SI ORASE',u'  B. COMUNE',np.NaN,'NaN']:
                data2[df.ix[int(i),u'Falu'][2:]]={}
                for j in df.columns[1:]:
                    if df.ix[int(i),j] not in [u'*',u'-',np.NaN,'NaN']:
                        data2[df.ix[int(i),u'Falu'][2:]][j]=df.ix[int(i),j]  
    except: pass
In [10]:
#add Bucharest to main dataset
data['MUNICIPIUL BUCURESTI']={}
data['MUNICIPIUL BUCURESTI']['MUNICIPIUL BUCURESTI']=data2['MUNICIPIUL BUCURESTI']
In [11]:
#save religion data
import json
file('data.json','w').write(json.dumps(data))
file('data2.json','w').write(json.dumps(data2))
file('data3.json','w').write(json.dumps(data3))
In [163]:
#county name converter
cc={}
for i in pd.read_csv('ro.csv').T.iteritems():
    if ' '+i[1][11].upper() not in cc: cc[' '+i[1][11].upper()]=i[1][9].upper()

#fix db
cc['Bicazu ']='BACAU'
cc['Municipiul Brasov']='BRASOV'
cc['Oras intorsura ']='COVASNA'
cc['Sanmihaiu de ']='MURES'
cc['Municipiul Resita CS']='CARAS-SEVERIN'


#hungarian settlement names, where applicable
hun3={}
dh=pd.read_csv('magyar.csv',sep='|').dropna(axis=0)
for i in dh.T.iteritems():
    try:
        m=cc[i[1][1][str.find(i[1][1],',')+1:]] #county
        if m not in hun3: hun3[m]={}
        f=i[1][1][:str.find(i[1][1],',')].upper() #comune
        if (i[1][2]):
            if  (i[1][0].upper()[:-1]==f): # village
                hun3[m][f]=i[1][2]
            if  ('MUNICIPIUL '+i[1][0].upper()[:-1]==f): # city
                hun3[m][f]=i[1][2]
            if  ('ORAS '+i[1][0].upper()[:-1]==f): # town
                hun3[m][f]=repr(i[1][2])
    except: pass
    
file('hun2.json','w').write(json.dumps(hun3))
In [88]:
#parse country for settlement coordinates
coords={}
de=pd.read_csv('ro.csv')
for i in de.T.iteritems():
    if i[1][9].upper() not in coords: coords[i[1][9].upper()]={}
    if i[1][5]!='V':
        coords[i[1][9].upper()][i[1][8].upper()]=[i[1][0],i[1][1]]
    coords[i[1][9].upper()][i[1][2].upper()]=[i[1][0],i[1][1]]
    if i[1][11].upper() not in cc: cc[i[1][11].upper()]=i[1][9].upper()

#fix db
coords['MURES']['ORAS SANGEORGIU DE PADURE']=[Geocoder.geocode('SANGEORGIU DE PADURE').coordinates[1],Geocoder.geocode('SANGEORGIU DE PADURE').coordinates[0]]
coords['MURES']['RICIU']=[Geocoder.geocode('RICIU, MURES, ROMANIA').coordinates[1],Geocoder.geocode('RICIU, MURES, ROMANIA').coordinates[0]]    
coords['MUNICIPIUL BUCURESTI']={'MUNICIPIUL BUCURESTI':coords['BUCURESTI']['MUNICIPIUL BUCURESTI']}
coords['HARGHITA']['RICIU']=[Geocoder.geocode('RICIU, MURES, ROMANIA').coordinates[1],Geocoder.geocode('RICIU, MURES, ROMANIA').coordinates[0]]    
import json
file('coords2.json','w').write(json.dumps(coords))