#!/usr/bin/env python # coding: utf-8 # In[76]: import pandas as pd, numpy as np, json, os import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') # In[77]: plt.style.use('seaborn-whitegrid') plt.style.available # In[78]: #!pip install d3IpyPlus from d3IpyPlus import ScatterPlot, LinePlot, BarPlot, StackedArea # In[79]: #!pip install pyecharts # In[80]: m=json.loads(open('ignore/member_timelines.json','r').read()) members=json.loads(open('ignore/member_simple.json','r').read()) # In[81]: d=pd.DataFrame(members['deaths'],index=['v']).T # In[82]: d['v']=' †' # In[83]: s=pd.DataFrame(members['s2'],index=['s']).T s=s.join(d) s['v']=s['v'].fillna('') s['s']=s['s']+s['v'] s=s[['s']] s['w']=s['s'].str.split(' ').str[0].str[0]+s['s'].str.split(' ').str[-1].str[0] # In[84]: p=pd.DataFrame(members['part'],index=['Part ']).T p.head() # In[85]: szd_color='#E91C62' # In[86]: pcolors=json.loads(open('json/pcolors.json','r').read()) p['Color']=p['Part '].str.split(' ').str[1].str.strip() p['Part ']=p['Part '].str.split('-').str[0].str.strip() p=p.join(pd.DataFrame(pcolors,index=['cvalue']).T,on='Color') # In[87]: g=pd.DataFrame(members['megye'],index=['Megye ']).T g.head() # In[88]: def replace_all(text, dic): for i in dic: text = text.replace(i, dic[i]) return text # In[89]: medals={'🥈':2,'🥇':3,'🥉':1.5,'🏅':1.25,'🚩':1.5} medals_none={i:'' for i in medals} bizotts={'Parlamenti Iroda':2, 'Állambiztonság':2, 'Állami ügyek':1.5, 'Választások':1.5, 'Parlament':1.5, 'Pénzügy':1.5, 'Biztonság':1.5, '🇺🇳':2, '🇺🇸':1.5, '🇫🇷':1.5, '🇮🇱':1.5, '🇪🇸':1.5, '🇮🇹':1.5, '🇬🇧':1.5, '🏅🔼 Szenátus':2, '🏅🔽 Képviselőház':2, '🥇🔼 Szenátus':5, '🥇🔽 Képviselőház':5, '🥈🔼 Szenátus':4, '🥈🔽 Képviselőház':4, '🥉🔼 Szenátus':3, '🥉🔽 Képviselőház':3} # In[90]: data={'Ország':{},'Bizottság':{},'Párt':{},'Megye':{}} attention={} for k,i in enumerate(m): if k%1000==0: print(k/len(m)*100,'%') name=i['Simple'] if i['Típus'] in list(data.keys()): csoport=i['Csoportok'] csoport=replace_all(csoport,medals_none) if csoport not in data[i['Típus']]:data[i['Típus']][csoport]={} for t in pd.date_range(i['start'],i['end']): st=str(t)[:10] if st not in data[i['Típus']][csoport]:data[i['Típus']][csoport][st]=set() data[i['Típus']][csoport][st].add(name) if name not in attention:attention[name]={} if st not in attention[name]:attention[name][st]=0 v=1 for j in medals: if j in i['Csoportok']: v*=medals[j] for j in bizotts: if j in i['Csoportok']: v*=bizotts[j] attention[name][st]+=v # In[91]: df=pd.DataFrame(attention) # In[92]: pd.to_datetime('2019-07-01')-pd.to_datetime('2018-01-24') # In[93]: counts=df.count().sort_values(ascending=False) # In[94]: counts.head(7) # In[95]: counts2=df.max().sort_values(ascending=False) # In[96]: counts2={} for i in df.columns: counts2[i]=df[i].last_valid_index() counts2=pd.DataFrame(counts2,index=[0]).T[0] # In[97]: counts2.head(7) # In[98]: min_time=365*6 min_time1=365*4 min_dep=2 # In[99]: de=pd.DataFrame(counts).join(p) de=de[de[0]>min_time1] dd=de.groupby('Part ').nunique() de=de.groupby('Part ').mean() de=np.round(de.loc[dd[dd[0]>min_dep].index]/365,1) de.columns=['Atlagos regiseg'] de=de.join(p.set_index('Part ').drop_duplicates()) dj=de.groupby(['Part ','cvalue']).mean().reset_index().sort_values(by='Atlagos regiseg', ascending=False) # In[100]: de # In[101]: sample_data = list(de.reset_index().T.to_dict().values()) scplot = BarPlot( x='Part ', y={'value':'Atlagos regiseg','label':'Atlagos regiseg (ev)'}, id='Part ', aggs={'Atlagos regiseg':'mean'}, text=' ', color='cvalue', order={'value':'Atlagos regiseg','sort':'desc'}, tooltip=['Part '], legend=False, width='100%', ) scplot.draw(sample_data) scplot.height='100%' open('time1.html','w').write(scplot.dump_html(sample_data)) # In[102]: from IPython.display import IFrame # In[103]: from pyecharts.charts import Bar, Grid from pyecharts import options as opts #docs https://pyecharts.org/#/en-us/ # In[104]: bar = ( Bar( init_opts=opts.InitOpts( width='100%', height='410px' ) ) .add_xaxis(list(dj['Part '].values)) .add_yaxis('Átlagos régiség', [opts.BarItem( name=i[1]['Part '], value=np.round(i[1]['Atlagos regiseg'],1), itemstyle_opts=opts.ItemStyleOpts(color=i[1]['cvalue']), ) for i in dj.T.iteritems()] ) .set_global_opts( legend_opts=opts.LegendOpts(is_show=False), title_opts=opts.TitleOpts( title="Parlamenterek átlagos régisége", pos_left='center', pos_top='20' ), xaxis_opts=opts.AxisOpts( type_='category', axislabel_opts =opts.LabelOpts( rotate=-50, font_weight='normal' ) ), # graphic_opts=[ # opts.GraphicImage( # graphic_item=opts.GraphicItem( # id_="logo", # right=50, # top=50, # z=-10, # bounding="raw", # origin=[75, 75], # ), # graphic_imagestyle_opts=opts.GraphicImageStyleOpts( # image="https://szekelydata.csaladen.es/favicon.ico", # width=25, # height=25, # opacity=0.8, # ), # ) # ], ) .set_series_opts( label_opts=opts.LabelOpts( ) ) .render('time1e.html') ) IFrame(src='time1e.html', width='98%', height=440) # In[105]: from plotly import offline as po import plotly.plotly as py import plotly.graph_objs as go trace0 = go.Bar( x=list(dj['Part '].values), y=np.round(np.array(dj['Atlagos regiseg'].values),1), marker=dict( color=list(dj['cvalue'].values) ), ) data = [trace0] fig = go.Figure(data=data) file_name='time1c.html' po.plot(fig, filename=file_name, auto_open=False); IFrame(src=file_name, width='98%', height=420) # In[106]: de=pd.DataFrame(counts).join(g) de=de[de[0]>min_time1] dd=de.groupby('Megye ').nunique() de=de.groupby('Megye ').mean() de=np.round(de.loc[dd[dd[0]>min_dep].index]/365,1) de.columns=['Atlagos regiseg'] dj=de.groupby(['Megye ']).mean().reset_index().sort_values(by='Atlagos regiseg', ascending=False) # In[107]: ro_shape=json.loads(open('json/romania-counties.json','r').read()) megyek=json.loads(open('C:/users/csala/Onedrive/Github/universal/ro/hun_megyek.json','r').read()) megye_map={} for i,di in enumerate(ro_shape['objects']['ROU_adm1']['geometries']): if (di['properties']['VARNAME_1']): megye_map[i]=di['properties']['VARNAME_1'] else: megye_map[i]=di['properties']['NAME_1'] megye_map={megyek[megye_map[i]]:i for i in megye_map} dj['id']=[megye_map[i] if i in megye_map else -1 for i in dj['Megye '].str.split(' ').str[1].values] open('json/megye_map.json','w').write(json.dumps(list(dj.T.to_dict().values()))) # In[108]: sample_data = list(de.reset_index().T.to_dict().values()) scplot = BarPlot( x='Megye ', y={'value':'Atlagos regiseg','label':'Atlagos regiseg (ev)'}, id='Megye ', aggs={'Atlagos regiseg':'mean'}, text=' ', # color='cvalue', order={'value':'Atlagos regiseg','sort':'desc'}, # tooltip=['Part '], legend=False, width='100%', ) scplot.draw(sample_data) scplot.height='100%' open('time2.html','w').write(scplot.dump_html(sample_data)) # In[109]: bar = ( Bar( init_opts=opts.InitOpts( width='100%', height='410px' ) ) .add_xaxis(list(dj['Megye '].values)) .add_yaxis('Átlagos régiség', [opts.BarItem( name=i[1]['Megye '], value=np.round(i[1]['Atlagos regiseg'],1), itemstyle_opts=opts.ItemStyleOpts(color=szd_color), ) for i in dj.T.iteritems()] ) .set_global_opts( legend_opts=opts.LegendOpts(is_show=False), xaxis_opts=opts.AxisOpts( type_='category', axislabel_opts =opts.LabelOpts( rotate=-90, font_weight='normal' ), # offset=0 ), # graphic_opts=[ # opts.GraphicImage( # graphic_item=opts.GraphicItem( # id_="logo", # right=50, # top=50, # z=-10, # bounding="raw", # origin=[75, 75], # ), # graphic_imagestyle_opts=opts.GraphicImageStyleOpts( # image="https://szekelydata.csaladen.es/favicon.ico", # width=25, # height=25, # opacity=0.8, # ), # ) # ], ) .set_series_opts( label_opts=opts.LabelOpts( ) ) .render('time2e.html') ) IFrame(src='time2e.html', width='98%', height=440) # In[110]: from plotly import offline as po import plotly.plotly as py import plotly.graph_objs as go trace0 = go.Bar( x=list(dj['Megye '].values), y=np.round(np.array(dj['Atlagos regiseg'].values),1) ) data = [trace0] fig = go.Figure(data=data) file_name='time2c.html' po.plot(fig, filename=file_name, auto_open=False); IFrame(src=file_name, width='98%', height=420) # In[111]: de=pd.DataFrame(counts2).join(p).join(d) de=de.reset_index() de=de[[str(i)=='nan' for i in list(de['v'].values)]] #keep only alive ones de['szul']=de['index'].str.split('|').str[1].str.strip() de=de[[str(i)!='Ismeretlen' for i in list(de['szul'].values)]] #keep only known birth dates de['Eletkor']=(pd.to_datetime(de[0])-pd.to_datetime(de['szul'])).dt.days/365 dd=de.groupby('Part ').nunique() de=de.groupby('Part ').mean()[['Eletkor']] de=np.round(de.loc[dd[dd[0]>min_dep].index],0) de.columns=['Atlagos eletkor'] de=de.join(p.set_index('Part ').drop_duplicates()) dj=de.groupby(['Part ','cvalue']).mean().reset_index().sort_values(by='Atlagos eletkor', ascending=False) # In[112]: sample_data = list(de.reset_index().T.to_dict().values()) scplot = BarPlot( x='Part ', y='Atlagos eletkor', id='Part ', aggs={'Atlagos eletkor':'mean'}, text=' ', color='cvalue', order={'value':'Atlagos eletkor','sort':'desc'}, tooltip=['Part '], legend=False, width='100%', ) scplot.draw(sample_data) scplot.height='100%' open('time3.html','w').write(scplot.dump_html(sample_data)) # In[113]: from plotly import offline as po import plotly.plotly as py import plotly.graph_objs as go trace0 = go.Bar( x=list(dj['Part '].values), y=np.round(np.array(dj['Atlagos eletkor'].values),0), marker=dict( color=list(dj['cvalue'].values) ), ) data = [trace0] fig = go.Figure(data=data) file_name='time3c.html' po.plot(fig, filename=file_name, auto_open=False); IFrame(src=file_name, width='98%', height=420) # In[114]: de=pd.DataFrame(counts2).join(g).join(d) de=de.reset_index() de=de[[str(i)=='nan' for i in list(de['v'].values)]] #keep only alive ones de['szul']=de['index'].str.split('|').str[1].str.strip() de=de[[str(i)!='Ismeretlen' for i in list(de['szul'].values)]] #keep only known birth dates de['Eletkor']=(pd.to_datetime(de[0])-pd.to_datetime(de['szul'])).dt.days/365 dd=de.groupby('Megye ').nunique() de=de.groupby('Megye ').mean()[['Eletkor']] de=np.round(de.loc[dd[dd[0]>min_dep].index],0) de.columns=['Atlagos eletkor'] dj=de.groupby(['Megye ']).mean().reset_index().sort_values(by='Atlagos eletkor', ascending=False) # In[115]: ro_shape=json.loads(open('json/romania-counties.json','r').read()) megyek=json.loads(open('C:/users/csala/Onedrive/Github/universal/ro/hun_megyek.json','r').read()) megye_map={} for i,di in enumerate(ro_shape['objects']['ROU_adm1']['geometries']): if (di['properties']['VARNAME_1']): megye_map[i]=di['properties']['VARNAME_1'] else: megye_map[i]=di['properties']['NAME_1'] megye_map={megyek[megye_map[i]]:i for i in megye_map} dj['id']=[megye_map[i] if i in megye_map else -1 for i in dj['Megye '].str.split(' ').str[1].values] open('json/megye_map2.json','w').write(json.dumps(list(dj.T.to_dict().values()))) # In[116]: sample_data = list(de.reset_index().T.to_dict().values()) scplot = BarPlot( x='Megye ', y='Atlagos eletkor', id='Megye ', aggs={'Atlagos eletkor':'mean'}, text=' ', # color='cvalue', order={'value':'Atlagos eletkor','sort':'desc'}, # tooltip=['Part '], legend=False, width='100%', ) scplot.draw(sample_data) scplot.height='100%' open('time4.html','w').write(scplot.dump_html(sample_data)) # In[117]: from plotly import offline as po import plotly.plotly as py import plotly.graph_objs as go trace0 = go.Bar( x=list(dj['Megye '].values), y=np.round(np.array(dj['Atlagos eletkor'].values),0) ) data = [trace0] fig = go.Figure(data=data) file_name='time4c.html' po.plot(fig, filename=file_name, auto_open=False); IFrame(src=file_name, width='98%', height=420) # Fusions # In[118]: switches={} fusions={} for k,i in enumerate(m): if k%1000==0: print(k/len(m)*100,'%') name=i['Simple'] if i['Típus'] in ['Megye','Párt']: for t in pd.date_range(i['start'],i['end']): st=str(t)[:10] if name not in switches:switches[name]={} if i['Típus']not in switches[name]:switches[name][i['Típus']]={'count':0,'current':i['Csoportok']} if switches[name][i['Típus']]['current']!=i['Csoportok']: p1=i['Csoportok'] p2=switches[name][i['Típus']]['current'] if i['Típus'] not in fusions:fusions[i['Típus']]={} if p1 not in fusions[i['Típus']]:fusions[i['Típus']][p1]={} if p2 not in fusions[i['Típus']][p1]:fusions[i['Típus']][p1][p2]=0 fusions[i['Típus']][p1][p2]+=1 switches[name][i['Típus']]['current']=str(i['Csoportok']) switches[name][i['Típus']]['count']+=1 # In[119]: def get_sub(dz,x,k=1): part=dz.T[x].sort_values(ascending=False) print(part.mean()) print(part.head(5)) df=pd.DataFrame(part).join(pd.DataFrame(counts)).join(s) df['a']=df[x]/(df[0]/k) df=df.replace([np.inf, -np.inf], np.nan) df=df[df[0]>min_time] #remove outliers part=df.sort_values(by='a',ascending=False)[['a','s','w']] return part # In[120]: dw=pd.DataFrame({i:{j:switches[i][j]['count'] for j in switches[i]} for i in switches}) part=get_sub(dw,'Párt',365) megye=get_sub(dw,'Megye',365) # In[121]: top=15 fig,axes=plt.subplots(1,2,figsize=(11,3)) to_plot=part.set_index('s')['a'] ax=axes[0] print(to_plot.mean()) ax=to_plot.head(top).plot(kind='bar',ax=ax,color=szd_color) ax.set_xlabel("") ax.set_ylabel("Pártváltás / év") to_plot=megye.set_index('s')['a'] ax=axes[1] print(to_plot.mean()) to_plot.head(top).plot(kind='bar',ax=ax,color=szd_color) ax.set_xlabel("") ax.set_ylabel("Megyeváltás / év") plt.savefig('fig/valtas.png',bbox_inches='tight',transparent=True) plt.show() # Fusions # In[122]: fusions['part']=fusions['Párt'] fusions.pop('Párt'); open('json/fusions.json','w').write(json.dumps(fusions)) # In[123]: switches2={} fusions2={} for k,i in enumerate(m): if k%1000==0: print(k/len(m)*100,'%') name=i['Simple'] if i['Típus'] in ['Párt']: if not ((i['Típus']=='Párt')and(i['Csoportok']=='👤 Independent - Independent')): for t in pd.date_range(i['start'],i['end']): st=str(t)[:10] if name not in switches2:switches2[name]={} if i['Típus']not in switches2[name]:switches2[name][i['Típus']]={'count':0,'current':i['Csoportok']} if switches2[name][i['Típus']]['current']!=i['Csoportok']: p1=i['Csoportok'] p2=switches2[name][i['Típus']]['current'] if i['Típus'] not in fusions2:fusions2[i['Típus']]={} if p1 not in fusions2[i['Típus']]:fusions2[i['Típus']][p1]={} if p2 not in fusions2[i['Típus']][p1]:fusions2[i['Típus']][p1][p2]=0 fusions2[i['Típus']][p1][p2]+=1 switches2[name][i['Típus']]['current']=str(i['Csoportok']) switches2[name][i['Típus']]['count']+=1 # In[124]: fusions2['part']=fusions2['Párt'] fusions2.pop('Párt'); open('json/fusions2.json','w').write(json.dumps(fusions2)) # Orszag # In[125]: countries={} for k,i in enumerate(m): if k%1000==0: print(k/len(m)*100,'%') name=i['Simple'] if i['Típus'] in ['Ország','Bizottság']: for t in pd.date_range(i['start'],i['end']): st=str(t)[:10] if name not in countries:countries[name]={} if i['Típus'] not in countries[name]:countries[name][i['Típus']]={} if i['Csoportok'] not in countries[name][i['Típus']]:countries[name][i['Típus']][i['Csoportok']]=0 countries[name][i['Típus']][i['Csoportok']]+=1 # In[126]: dz=pd.DataFrame({i:{j:sum(countries[i][j].values()) for j in countries[i]} for i in countries}).fillna(0) dz2=pd.DataFrame({i:{j:len(countries[i][j].values()) for j in countries[i]} for i in countries}).fillna(0) # In[127]: orsz1=get_sub(dz2,'Ország') biz1=get_sub(dz2,'Bizottság') # In[128]: orsz2=get_sub(dz,'Ország') biz2=get_sub(dz,'Bizottság') # In[129]: top=15 fig,axes=plt.subplots(1,2,figsize=(11,3)) to_plot=orsz2.set_index('s')['a'] ax=axes[0] print(to_plot.mean()) to_plot.head(top).plot(kind='bar',ax=ax,color=szd_color) ax.set_xlabel("") ax.set_ylabel("Ország-barátság-csoport / év") to_plot=biz2.set_index('s')['a'] ax=axes[1] print(to_plot.mean()) to_plot.head(top).plot(kind='bar',ax=ax,color=szd_color) ax.set_xlabel("") ax.set_ylabel("Bizottság / év") plt.savefig('fig/valtas2.png',bbox_inches='tight',transparent=True) plt.show() # Scatter # In[130]: part2=pd.DataFrame(part[['a','w','s']]) part2.columns=['Part','Nev','Nev2'] megye2=pd.DataFrame(megye['a']) megye2.columns=['Megye'] sc1=part2.join(megye2).join(p).join(g).join(pd.DataFrame(counts/365,columns=['Evek'])) # In[131]: orsz3=pd.DataFrame(orsz2[['a','w','s']]) orsz3.columns=['Orszag','Nev','Nev2'] biz3=pd.DataFrame(biz2[['a']]) biz3.columns=['Bizottsag'] sc2=orsz3.join(biz3).join(p).join(g).join(pd.DataFrame(counts/365,columns=['Evek'])) # In[132]: fig,axes=plt.subplots(1,2,figsize=(9,4)) ax=axes[0] sc1.plot(x='Part',y='Megye',kind='scatter',ax=ax,color=szd_color) ax.set_xlabel('Pártváltás / év') ax.set_ylabel('Megyeváltás / év') ax=axes[1] sc2.plot(x='Orszag',y='Bizottsag',kind='scatter',ax=ax,color=szd_color) ax.set_xlabel('Ország-barátság-csoport / év') ax.set_ylabel('Bizottság / év') plt.savefig('fig/valtas3.png',bbox_inches='tight',transparent=True) plt.show() # In[133]: sample_data = list(sc1.reset_index().T.to_dict().values()) scplot = ScatterPlot(x={'value':'Part','label':'Partvaltas / ev'}, y={'value':'Megye','label':'Megyevaltas / ev'}, id='index', text='Nev', color='cvalue', # order={'value':'Evek','sort':'asc'}, tooltip=['Nev2', 'Part ','Megye '], legend=False, width='100%', size='Evek') scplot.draw(sample_data) open('scatter1b.html','w').write(scplot.dump_html(sample_data)) # In[134]: sample_data = list(sc2.reset_index().T.to_dict().values()) scplot = ScatterPlot(x={'value':'Orszag','label':'Orszag-baratsag-csoport'}, y={'value':'Bizottsag','label':'Bizottsag'}, id='index', text='Nev', color='cvalue', # order={'value':'Evek','sort':'asc'}, tooltip=['Nev2', 'Part ','Megye '], legend=False, width='100%', size='Evek') scplot.draw(sample_data) open('scatter2b.html','w').write(scplot.dump_html(sample_data)) # In[ ]: # In[ ]: