import numpy as np, requests, zipfile, StringIO, pandas as pd, json, copy
#suppres warnings
import warnings
warnings.simplefilter(action = "ignore")
import matplotlib.pyplot as plt
%matplotlib inline
Model zip betöltése D3plus formátumra
z = zipfile.ZipFile('../zips2/183.zip') #romania
ro=json.loads(z.open('data.json').read())
items=json.loads(file('../json/items.json').read())
items2=json.loads(file('../json/items2.json').read())
INSSE adatok betöltése - a kódok alapján megkereshetjük a fileokat az insse.ro adatbázisban
df=pd.read_csv('../db/exportPivot_AGR306ARO.csv') #production
dfe=pd.read_csv('../db/exportPivot_EXP101JRO.csv') #exports
dfi=pd.read_csv('../db/exportPivot_EXP102JRO.csv') #imports
Formázás
dfe=dfe.set_index(['Judete',' Sectiuni si capitole conform Nomenclatorului Combinat (NC)',' Luni'])
dfi=dfi.set_index(['Judete',' Sectiuni si capitole conform Nomenclatorului Combinat (NC)',' Luni'])
df=df.set_index(['Categorii de paduri',u' Macroregiuni regiuni de dezvoltare si judete',u' Ani'])
Romániai megyék részesedése a romániai fa-importban
dk=dfi.drop(dfi.columns[0],axis=1)
de=dk.unstack(level=0)[u' Valoare']
dz=de.T
F=(dz[u' IX. Produse din lemn exclusiv mobilier'])
#F/=F.loc['T']
L=(dz[u' X. Hirtie si articole din acestea'])
#L/=L.loc['T']
D3plus dátumformázó - havi frekvenciájú adatok
g=[]
mc={'ian':'Jan',
'feb':'Feb',
'mar':'Mar',
'apr':'Apr',
'mai':'May',
'iun':'Jun',
'iul':'Jul',
'aug':'Aug',
'sep':'Sep',
'oct':'Oct',
'noi':'Nov',
'dec':'Dec'}
for i in F:
for j in F[i].index:
v=F[i][j]*1.1#EUR/USD
if str(v)!='nan':
if j!='T':
#print i[6:9],i[-4:],j,v
g.append({"flow":u"imp","type":u"Fa","year":i[-4:]+'-'+mc[i[6:9]]+'-01',"county":j,"v":v})
for i in L:
for j in L[i].index:
v=L[i][j]*1.1#EUR/USD
if str(v)!='nan':
if j!='T':
#print i[6:9],i[-4:],j,v
g.append({"flow":u"imp","type":u"Papír","year":i[-4:]+'-'+mc[i[6:9]]+'-01',"county":j,"v":v})
Romániai megyék részesedése a romániai fa-exportban
dk=dfe.drop(dfe.columns[0],axis=1)
de=dk.unstack(level=0)[u' Valoare']
dz=de.drop(' Total').T
F=(dz[u' IX. Produse din lemn exclusiv mobilier'])
#F/=F.loc['T']
L=(dz[u' X. Hirtie si articole din acestea'])
#L/=L.loc['T']
for i in F:
for j in F[i].index:
v=F[i][j]*1.1#EUR/USD
if str(v)!='nan':
if j!='T':
#print i[6:9],i[-4:],j,v
g.append({"flow":u"exp","type":u"Fa","year":i[-4:]+'-'+mc[i[6:9]]+'-01',"county":j,"v":v})
for i in L:
for j in L[i].index:
v=L[i][j]*1.1#EUR/USD
if str(v)!='nan':
if j!='T':
#print i[6:9],i[-4:],j,v
g.append({"flow":u"exp","type":u"Papír","year":i[-4:]+'-'+mc[i[6:9]]+'-01',"county":j,"v":v})
file('../json/ro_impex.json','w').write(json.dumps(g))
Romániai megyék részesedése a romániai fa-kitermelésben
dk=df.drop(df.columns[0],axis=1)
de=dk.unstack(level=1)[' Valoare']
de['T']=de.T.sum()
dz=de.T
F=(dz['Diverse specii moi']+dz['Rasinoase'])
#F/=F.loc['T']
L=(dz['Diverse specii tari']+dz['Fag']+dz['Stejar'])
#L/=L.loc['T']
h=[]
for i in F:
for j in F[i].index:
v=F[i][j]#*1000.0
if str(v)!='nan':
if j!='T':
#print int(i[6:]),j,v
h.append({"flow":u"prod","type":u"Fenyő","year":int(i[6:]),"county":j,"v":v})
for i in L:
for j in L[i].index:
v=L[i][j]#*1000.0
if str(v)!='nan':
if j!='T':
#print int(i[6:]),j,v
h.append({"flow":u"prod","type":u"Lombhullató","year":int(i[6:]),"county":j,"v":v})
file('../json/ro_prod.json','w').write(json.dumps(h))
Adatok D3plus formátumban való elmentése
def reject_outliers(data, m = 2.):
d = np.abs(data - np.median(data))
mdev = np.median(d)
s = d/mdev if mdev else 0.
return data[s<m][data[s<m]>0]
s=[]
for i in g:
k={}
k['item']=i['type']
k['partner']=i['county']
k['year']=i['year']
k['p']=i['v']
if i['flow']=='imp': #import
k['g']='f'
if k['item']=='Fa':
k['s']=i['v']*np.median(reject_outliers(np.array(items2['c'][str(min(2014,int(i['year'][:4])))]+\
items2['nc'][str(min(2014,int(i['year'][:4])))])))
else:
k['s']=i['v']*np.median(reject_outliers(np.array(items2['pp'][str(min(2014,int(i['year'][:4])))])))
else: #export
k['g']='m'
if k['item']=='Fa':
k['s']=i['v']*np.median(reject_outliers(np.array(items['c'][str(min(2014,int(i['year'][:4])))]+\
items['nc'][str(min(2014,int(i['year'][:4])))])))
else:
k['s']=i['v']*np.median(reject_outliers(np.array(items['pp'][str(min(2014,int(i['year'][:4])))])))
s.append(k)
w=[]
for i in h:
k={}
k['item']=i['type']
k['partner']=i['county']
k['year']=i['year']
k['p']=i['v']*1000
if i['flow']=='imp': #import
k['g']='f'
if k['item']==u'Lombhullató':
k['s']=i['v']*1000*np.median(reject_outliers(np.array(items2['nc'][str(min(2014,max(1997,int(i['year']))))])))
else:
k['s']=i['v']*1000*np.median(reject_outliers(np.array(items2['c'][str(min(2014,max(1997,int(i['year']))))])))
else: #export
k['g']='m'
if k['item']==u'Lombhullató':
k['s']=i['v']*1000*np.median(reject_outliers(np.array(items['nc'][str(min(2014,max(1997,int(i['year']))))])))
else:
k['s']=i['v']*1000*np.median(reject_outliers(np.array(items['c'][str(min(2014,max(1997,int(i['year']))))])))
w.append(k)
Globális JSON könyvtárak frissítése
try:
import zlib
compression = zipfile.ZIP_DEFLATED
except:
compression = zipfile.ZIP_STORED
file('dummy.json','w').write(json.dumps(s))
zf = zipfile.ZipFile('../zips2/998.zip', mode='w')
zf.write('dummy.json','data.json',compress_type=compression)
zf.close()
file('dummy.json','w').write(json.dumps(w))
zf = zipfile.ZipFile('../zips2/997.zip', mode='w')
zf.write('dummy.json','data.json',compress_type=compression)
zf.close()
c=json.loads(file('../json/countries3.json').read())
if u'Románia (megyék)' not in c:
c.append(u'Románia (megyék)')
if u'Románia (kitermelés)' not in c:
c.append(u'Románia (kitermelés)')
file('../json/countries4.json','w').write(json.dumps(c))
d=json.loads(file('../json/cdict3.json').read())
d[u'Románia (megyék)']=998
d[u'998']=u'Románia (megyék)'
d[u'Románia (kitermelés)']=997
d[u'997']=u'Románia (kitermelés)'
#for i in df.index.levels[1]:
# d[i.strip()]=i.strip()
file('../json/cdict4.json','w').write(json.dumps(d))
Megye-címkék magyarítása
r={" Alba":u"Fehér",
" Arad":"Arad",
" Arges":u"Argeș",
" Bacau":u"Bákó",
" Bihor":u"Bihar",
" Bistrita-Nasaud":"Beszterce",
" Botosani":u"Botoșani",
" Braila":u"Brăila",
" Brasov":u"Brassó",
" Buzau":u"Buzău",
" Calarasi":u"Călărași",
" Caras-Severin":u"Krassó-Szörény",
" Cluj":u"Kolozs",
" Constanta":"Konstanca",
" Covasna":u"Kovászna",
" Dambovita":u"Dâmbovița",
" Dolj":"Dolj",
" Galati":"Galac",
" Giurgiu":"Giurgiu",
" Gorj":"Gorj",
" Harghita":"Hargita",
" Hunedoara":"Hunyad",
" Ialomita":u"Ialomița",
" Iasi":u"Iași",
" Ilfov":"Ilfov",
" Maramures":u"Máramaros",
" Mehedinti":u"Mehedinți",
" Municipiul Bucuresti":"Bukarest",
" Mures":"Maros",
" Neamt":u"Neamț",
" Olt":"Olt",
" Prahova":"Prahova",
" Salaj":u"Szilágy",
" Satu Mare":u"Szatmár",
" Sibiu":"Szeben",
" Suceava":"Suceava",
" Teleorman":"Teleorman",
" Timis":"Temes",
" Tulcea":"Tulcea",
" Valcea":u"Vâlcea",
" Vaslui":"Vaslui",
" Vrancea":"Vrancea"}
e=json.loads(file('../json/hnames3.json').read())
e[u'Románia (megyék)']=u'Románia (megyék)'
e[u'Románia (kitermelés)']=u'Románia (kitermelés)'
for i in df.index.levels[1]:
e[i.strip()]=r[i]
e[i]=r[i]
file('../json/hnames4.json','w').write(json.dumps(e))