#!/usr/bin/env python # coding: utf-8 # ## SZÉKELYDATA Erdők # ### Globális adat-betöltő és formázó munkafüzet # In[50]: import numpy as np, requests, zipfile, StringIO, pandas as pd, json, copy #suppres warnings import warnings warnings.simplefilter(action = "ignore") # In[101]: import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') # FAO adatok betöltése - impex dollár-értékek és volumenek # In[8]: #z = zipfile.ZipFile('Forestry_Trade_Flows_E_All_Data.zip') #stream web content of zip file to read z = zipfile.ZipFile('../db/Forestry_Trade_Flows_E_All_Data2.zip') #stream web content of zip file to read df=pd.read_csv(z.open('Forestry_Trade_Flows_E_All_Data.csv')) # In[9]: set(df[df['Reporter Countries']=='Romania']['Element']) # Formázás D3plus formátumba # In[10]: data={} cs=set() for i in df.T.iteritems(): country=i[1][0] partner=i[1][2] cs.add(country) cs.add(partner) item=i[1][5] element=i[1][7] for k in range(9,44,2): if country not in data: data[country]=[] if not np.isnan(i[1][k]): if 'Value' in element: year=int(df.columns[k][1:]) if 'Import' in element: g="f" else: g="m" data[country].append({"partner":partner, "item":item, "g":g, "year":year, "s":i[1][k]}) # In[11]: data={} cs=set() for i in df.T.iteritems(): country=i[1][0] partner=i[1][2] cs.add(country) cs.add(partner) item=i[1][5] element=i[1][7] for k in range(9,44,2): if country not in data: data[country]={} if not np.isnan(i[1][k]): year=int(df.columns[k][1:]) if 'Import' in element: g="f" else: g="m" if g not in data[country]:data[country][g]={} if partner not in data[country][g]:data[country][g][partner]={} if year not in data[country][g][partner]:data[country][g][partner][year]={} if item not in data[country][g][partner][year]:data[country][g][partner][year][item]={} if 'Value' in element: data[country][g][partner][year][item]["s"]=i[1][k] else: data[country][g][partner][year][item]["p"]=i[1][k] # In[12]: data2={} for country in data: if country not in data2:data2[country]=[] for g in data[country]: for partner in data[country][g]: for year in data[country][g][partner]: for item in data[country][g][partner][year]: if (("s" in data[country][g][partner][year][item]) and ("p" in data[country][g][partner][year][item])): data2[country].append({"partner":partner, "item":item, "g":g, "year":year, "s":data[country][g][partner][year][item]["s"], "p":data[country][g][partner][year][item]["p"] }) # Árgrafikon # In[234]: from jupyterthemes import jtplot # you can select an alternative theme's plot style by name # oceans16 | grade3 | chesterish | onedork | monokai | solarizedl jtplot.style('solarized-light') # In[235]: def reject_outliers(data, m = 2.): d = np.abs(data - np.nanmedian(data)) mdev = np.nanmedian(d) s = d/mdev if mdev else 0. return data[s0] # In[243]: fig,ax=plt.subplots(2,5,figsize=(15,6)) eip=['ip','ep'] alph=[1,0.7] yrs=['Y1997','Y2014'] colors=['royalBlue','seaGreen'] woods=[[u'Newsprint',u'Paper+-Board Ex Newsprnt'], [u'Sawnwood (NC)'],[u'Sawnwood (C)'], [u'Ind Rwd Wir (NC) Other',u'Ind Rwd Wir (NC) Tropica'],[u'Ind Rwd Wir (C)']] ti=[u'Papír',u'Lombhullató\nFûrészáru',u'Fenyô\nFûrészáru',u'Lombhullató\nRönkfa',u'Fenyô\nRönkfa'] uni=['$\$/tonna$','$\$/m^3$','$\$/m^3$','$\$/m^3$','$\$/m^3$'] impex=['Import','Export'] for wod in range(len(woods)): wood=woods[wod] for yr in range(len(yrs)): year=yrs[yr] dx=df.set_index(['Element','Item','Reporter Countries','Partner Countries'])\ .drop(['Reporter Country Code','Partner Country Code','Item Code', 'Element Code','Unit'],axis=1)[year].unstack(0) dx['ep']=dx['Export Value']/dx['Export Quantity'] dx['ip']=dx['Import Value']/dx['Import Quantity'] dw=dx.loc[wood] for ii in range(2): dw=dx.loc[wood] dw=dw.set_index(dw.index.swaplevel(0,1))[eip[ii]] y=[] for i in dw.index.levels[0]: y.append(np.nanmedian(reject_outliers(np.array(dw.loc[i].values)))) z=np.array(y)[~pd.isnull(y)] ax[ii][wod].hist(z,np.arange(0,1.4,0.05),alpha=alph[yr],label=year[1:]) ax[ii][wod].set_xlim(0,1.4) ax[ii][wod].set_ylim(0,50) w=np.nanmedian(reject_outliers(np.array(dw.loc['Romania'].values))) ax[ii][wod].axvline(w,c=colors[yr],ls='--') w=np.nanmedian(y) ax[ii][wod].axvline(w,c=colors[yr],ls='-') if ii!=1: ax[ii][wod].set_xticklabels([]) ax[ii][wod].set_title(ti[wod],y=1.06) else: ax[ii][wod].set_xlabel(u'Ár ('+'$1000$ '+uni[wod]+')') if wod!=0: ax[ii][wod].set_yticklabels([]) else: ax[ii][wod].set_ylabel(u'Országok száma\n'+impex[ii]+u'-árak') if wod==2 and ii==1: if yr==1: ax[0][3].text(0.65,42,u'$←$'+u' Románia',color=colors[yr]) ax[0][3].text(0.44,32,u'$←$'+u' Medián',color=colors[yr]) ax[0][0].legend(loc=0,framealpha=0,fontsize=10) plt.suptitle(u'A nemzetközi fakereskedelem árainak eloszlása országok szerint',y=1.06,fontsize=14) plt.show() # Országonkénti mentés zip file-okba, amelyekbe json-okat csomagolunk # In[237]: try: import zlib compression = zipfile.ZIP_DEFLATED except: compression = zipfile.ZIP_STORED # In[ ]: for c in data2: file('dummy.json','w').write(json.dumps(data2[c])) zf = zipfile.ZipFile('zips2/'+str(c)+'.zip', mode='w') zf.write('dummy.json','data.json',compress_type=compression) zf.close() # Globális névjegyzékek és könyvtárak elkészítése # In[ ]: file('json/countries2.json','w').write(json.dumps(data.keys())) # In[ ]: file('json/data2.json','w').write(json.dumps(data)) # In[ ]: cdict={} for i in df.T.iteritems(): if i[1][0] not in cdict:cdict[i[1][0]]=i[1][1] if i[1][2] not in cdict:cdict[i[1][2]]=i[1][3] if i[1][1] not in cdict:cdict[i[1][1]]=i[1][0] if i[1][3] not in cdict:cdict[i[1][3]]=i[1][2] # In[ ]: cdict[107]=u"C\xf4te d'Ivoire" cdict[u"C\xf4te d'Ivoire"]=cdict["C\xf4te d'Ivoire"] cdict.pop("C\xf4te d'Ivoire") # In[ ]: cdict[182]=u'R\xe9union' cdict[u'R\xe9union']=cdict['R\xe9union'] cdict.pop('R\xe9union') # In[ ]: file('json/cdict2.json','w').write(json.dumps(cdict)) # In[ ]: countries=[] for k in cdict: try: int(k) except: countries.append(k) file('json/countries2.json','w').write(json.dumps(countries))