#!/usr/bin/env python # coding: utf-8 # ## SZÉKELYDATA Erdők # ### Romániai megyék adat-betöltő és formázó munkafüzet # In[79]: import numpy as np, requests, zipfile, StringIO, pandas as pd, json, copy #suppres warnings import warnings warnings.simplefilter(action = "ignore") # In[80]: import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') # Model zip betöltése D3plus formátumra # In[81]: z = zipfile.ZipFile('../zips2/183.zip') #romania ro=json.loads(z.open('data.json').read()) items=json.loads(file('../json/items.json').read()) items2=json.loads(file('../json/items2.json').read()) # INSSE adatok betöltése - a kódok alapján megkereshetjük a fileokat az insse.ro adatbázisban # In[82]: df=pd.read_csv('../db/exportPivot_AGR306ARO.csv') #production dfe=pd.read_csv('../db/exportPivot_EXP101JRO.csv') #exports dfi=pd.read_csv('../db/exportPivot_EXP102JRO.csv') #imports # Formázás # In[83]: dfe=dfe.set_index(['Judete',' Sectiuni si capitole conform Nomenclatorului Combinat (NC)',' Luni']) dfi=dfi.set_index(['Judete',' Sectiuni si capitole conform Nomenclatorului Combinat (NC)',' Luni']) # In[84]: df=df.set_index(['Categorii de paduri',u' Macroregiuni regiuni de dezvoltare si judete',u' Ani']) # Romániai megyék részesedése a romániai fa-importban # In[85]: dk=dfi.drop(dfi.columns[0],axis=1) de=dk.unstack(level=0)[u' Valoare'] dz=de.T F=(dz[u' IX. Produse din lemn exclusiv mobilier']) #F/=F.loc['T'] L=(dz[u' X. Hirtie si articole din acestea']) #L/=L.loc['T'] # D3plus dátumformázó - havi frekvenciájú adatok # In[86]: g=[] mc={'ian':'Jan', 'feb':'Feb', 'mar':'Mar', 'apr':'Apr', 'mai':'May', 'iun':'Jun', 'iul':'Jul', 'aug':'Aug', 'sep':'Sep', 'oct':'Oct', 'noi':'Nov', 'dec':'Dec'} for i in F: for j in F[i].index: v=F[i][j]*1.1#EUR/USD if str(v)!='nan': if j!='T': #print i[6:9],i[-4:],j,v g.append({"flow":u"imp","type":u"Fa","year":i[-4:]+'-'+mc[i[6:9]]+'-01',"county":j,"v":v}) # In[87]: for i in L: for j in L[i].index: v=L[i][j]*1.1#EUR/USD if str(v)!='nan': if j!='T': #print i[6:9],i[-4:],j,v g.append({"flow":u"imp","type":u"Papír","year":i[-4:]+'-'+mc[i[6:9]]+'-01',"county":j,"v":v}) # Romániai megyék részesedése a romániai fa-exportban # In[88]: dk=dfe.drop(dfe.columns[0],axis=1) de=dk.unstack(level=0)[u' Valoare'] dz=de.drop(' Total').T F=(dz[u' IX. Produse din lemn exclusiv mobilier']) #F/=F.loc['T'] L=(dz[u' X. Hirtie si articole din acestea']) #L/=L.loc['T'] # In[89]: for i in F: for j in F[i].index: v=F[i][j]*1.1#EUR/USD if str(v)!='nan': if j!='T': #print i[6:9],i[-4:],j,v g.append({"flow":u"exp","type":u"Fa","year":i[-4:]+'-'+mc[i[6:9]]+'-01',"county":j,"v":v}) # In[90]: for i in L: for j in L[i].index: v=L[i][j]*1.1#EUR/USD if str(v)!='nan': if j!='T': #print i[6:9],i[-4:],j,v g.append({"flow":u"exp","type":u"Papír","year":i[-4:]+'-'+mc[i[6:9]]+'-01',"county":j,"v":v}) # In[91]: file('../json/ro_impex.json','w').write(json.dumps(g)) # Romániai megyék részesedése a romániai fa-kitermelésben # In[92]: dk=df.drop(df.columns[0],axis=1) de=dk.unstack(level=1)[' Valoare'] de['T']=de.T.sum() dz=de.T F=(dz['Diverse specii moi']+dz['Rasinoase']) #F/=F.loc['T'] L=(dz['Diverse specii tari']+dz['Fag']+dz['Stejar']) #L/=L.loc['T'] # In[93]: h=[] for i in F: for j in F[i].index: v=F[i][j]#*1000.0 if str(v)!='nan': if j!='T': #print int(i[6:]),j,v h.append({"flow":u"prod","type":u"Fenyő","year":int(i[6:]),"county":j,"v":v}) # In[94]: for i in L: for j in L[i].index: v=L[i][j]#*1000.0 if str(v)!='nan': if j!='T': #print int(i[6:]),j,v h.append({"flow":u"prod","type":u"Lombhullató","year":int(i[6:]),"county":j,"v":v}) # In[95]: file('../json/ro_prod.json','w').write(json.dumps(h)) # Adatok D3plus formátumban való elmentése # In[121]: def reject_outliers(data, m = 2.): d = np.abs(data - np.median(data)) mdev = np.median(d) s = d/mdev if mdev else 0. return data[s0] # In[122]: s=[] for i in g: k={} k['item']=i['type'] k['partner']=i['county'] k['year']=i['year'] k['p']=i['v'] if i['flow']=='imp': #import k['g']='f' if k['item']=='Fa': k['s']=i['v']*np.median(reject_outliers(np.array(items2['c'][str(min(2014,int(i['year'][:4])))]+\ items2['nc'][str(min(2014,int(i['year'][:4])))]))) else: k['s']=i['v']*np.median(reject_outliers(np.array(items2['pp'][str(min(2014,int(i['year'][:4])))]))) else: #export k['g']='m' if k['item']=='Fa': k['s']=i['v']*np.median(reject_outliers(np.array(items['c'][str(min(2014,int(i['year'][:4])))]+\ items['nc'][str(min(2014,int(i['year'][:4])))]))) else: k['s']=i['v']*np.median(reject_outliers(np.array(items['pp'][str(min(2014,int(i['year'][:4])))]))) s.append(k) # In[123]: w=[] for i in h: k={} k['item']=i['type'] k['partner']=i['county'] k['year']=i['year'] k['p']=i['v']*1000 if i['flow']=='imp': #import k['g']='f' if k['item']==u'Lombhullató': k['s']=i['v']*1000*np.median(reject_outliers(np.array(items2['nc'][str(min(2014,max(1997,int(i['year']))))]))) else: k['s']=i['v']*1000*np.median(reject_outliers(np.array(items2['c'][str(min(2014,max(1997,int(i['year']))))]))) else: #export k['g']='m' if k['item']==u'Lombhullató': k['s']=i['v']*1000*np.median(reject_outliers(np.array(items['nc'][str(min(2014,max(1997,int(i['year']))))]))) else: k['s']=i['v']*1000*np.median(reject_outliers(np.array(items['c'][str(min(2014,max(1997,int(i['year']))))]))) w.append(k) # Globális JSON könyvtárak frissítése # In[124]: try: import zlib compression = zipfile.ZIP_DEFLATED except: compression = zipfile.ZIP_STORED # In[125]: file('dummy.json','w').write(json.dumps(s)) zf = zipfile.ZipFile('../zips2/998.zip', mode='w') zf.write('dummy.json','data.json',compress_type=compression) zf.close() # In[126]: file('dummy.json','w').write(json.dumps(w)) zf = zipfile.ZipFile('../zips2/997.zip', mode='w') zf.write('dummy.json','data.json',compress_type=compression) zf.close() # In[127]: c=json.loads(file('../json/countries3.json').read()) if u'Románia (megyék)' not in c: c.append(u'Románia (megyék)') if u'Románia (kitermelés)' not in c: c.append(u'Románia (kitermelés)') file('../json/countries4.json','w').write(json.dumps(c)) # In[128]: d=json.loads(file('../json/cdict3.json').read()) d[u'Románia (megyék)']=998 d[u'998']=u'Románia (megyék)' d[u'Románia (kitermelés)']=997 d[u'997']=u'Románia (kitermelés)' #for i in df.index.levels[1]: # d[i.strip()]=i.strip() file('../json/cdict4.json','w').write(json.dumps(d)) # Megye-címkék magyarítása # In[129]: r={" Alba":u"Fehér", " Arad":"Arad", " Arges":u"Argeș", " Bacau":u"Bákó", " Bihor":u"Bihar", " Bistrita-Nasaud":"Beszterce", " Botosani":u"Botoșani", " Braila":u"Brăila", " Brasov":u"Brassó", " Buzau":u"Buzău", " Calarasi":u"Călărași", " Caras-Severin":u"Krassó-Szörény", " Cluj":u"Kolozs", " Constanta":"Konstanca", " Covasna":u"Kovászna", " Dambovita":u"Dâmbovița", " Dolj":"Dolj", " Galati":"Galac", " Giurgiu":"Giurgiu", " Gorj":"Gorj", " Harghita":"Hargita", " Hunedoara":"Hunyad", " Ialomita":u"Ialomița", " Iasi":u"Iași", " Ilfov":"Ilfov", " Maramures":u"Máramaros", " Mehedinti":u"Mehedinți", " Municipiul Bucuresti":"Bukarest", " Mures":"Maros", " Neamt":u"Neamț", " Olt":"Olt", " Prahova":"Prahova", " Salaj":u"Szilágy", " Satu Mare":u"Szatmár", " Sibiu":"Szeben", " Suceava":"Suceava", " Teleorman":"Teleorman", " Timis":"Temes", " Tulcea":"Tulcea", " Valcea":u"Vâlcea", " Vaslui":"Vaslui", " Vrancea":"Vrancea"} # In[130]: e=json.loads(file('../json/hnames3.json').read()) e[u'Románia (megyék)']=u'Románia (megyék)' e[u'Románia (kitermelés)']=u'Románia (kitermelés)' for i in df.index.levels[1]: e[i.strip()]=r[i] e[i]=r[i] file('../json/hnames4.json','w').write(json.dumps(e))