In [147]:
import pandas as pd, numpy as np
import bs4, requests, json, os
In [2]:
#os.chdir('E:/Onedrive - Lancaster University/datarepo/influence/ro')
os.chdir('C:/users/csala/Onedrive - Lancaster University/datarepo/influence/ro')
In [3]:
base_url='http://www.cdep.ro'
url=base_url+'/pls/parlam/structura2015.ab?idl=1'
In [4]:
url
Out[4]:
'http://www.cdep.ro/pls/parlam/structura2015.ab?idl=1'
In [5]:
r=requests.get(url)
soup = bs4.BeautifulSoup(r.content)
In [6]:
tables=soup.findAll('table')
table=tables[1]
In [7]:
links=list(set([l['href'] for l in table.findAll('a')]))
In [8]:
def state_format(j):
    j
    j=j.replace('Grupul parlamentar de prietenie cu ','')
    j=j.replace('Grupul Parlamentar de prietenie cu ','')
    j=j.replace('\n ','')
    j=j.replace('Secretar','')
    j=j.replace('Vicepreşedinte','')
    j=j.replace('Preşedinte','')
    j=j.strip()
    return j
In [71]:
members=[]
party_imgs={}
state_imgs={}
parsed_links=set()
pages={}
In [74]:
for link in links:
    if link not in parsed_links:
        if len(parsed_links)%100==0: print(len(parsed_links)/len(links)*100,'%')
        idm=link[link.find('idm=')+4:link.find('idm=')+4+link[link.find('idm=')+4:].find('&')]
        leg=link[link.find('leg=')+4:link.find('leg=')+4+link[link.find('leg=')+4:].find('&')]
        url=base_url+link
        r=requests.get(url)
        soup = bs4.BeautifulSoup(r.content)
        pages[link]=str(soup)
        name=soup.find('title').text
        olddiv=soup.find('div',{'id':'olddiv'})
        pretty_name=olddiv.find('h1').text
        img=olddiv.find('img')['src']
        divs=soup.find('div',{'id':'olddiv'}).find('div').findAll('div',{'class':'boxDep'})
        camera=divs[0].find('h3').text
        judet=divs[0].find('a').text
        birth_ro=soup.find('div',{'class':'profile-pic-dep'}).text.replace('\n','').replace('n.','').strip()
        start=''
        end=''
        if 'data valid' in repr(divs[0]):
            start=repr(divs[0])[repr(divs[0]).find('data valid')+14:]
            start=start[:start.find('<')]
            start=start.replace(':','').replace('-','').strip()
        if 'mandatului' in repr(divs[0]):
            end=repr(divs[0])[repr(divs[0]).find('mandatului')+10:]
            end=end[:end.find('<')]
            end=end.replace(':','').replace('-','').strip()
        comisii=[]
        comisii_abbr=[]
        parties=[]
        states=[]
        activitate=[]
        for div in divs:
            header=div.find('h3').text
            if 'omisii permanente' in header:
                comisii=['Comisia '+j.replace('\n','').strip() for j in div.text.split('Comisia ')[1:]]
                comisii_abbr=[j.text for j in div.findAll('a')]
            if 'iunea politic' in header:
                parties=[j for j in div.findAll('tr',{'valign':'center'}) if j.findAll('table')]
            if 'altor state' in header:
                states+=div.findAll('tr')
            if 'lte grupuri' in header:
                states+=div.findAll('tr')
            if 'cifre' in header:
                activitate=[j.text.split(':') for j in div.findAll('tr') if j.text.split(':')!=['']]        
        
        state=[state_format(states[j].text) for j in range(len(states))]    
        state_abbr=['' if states[j].find('a')==None else states[j].find('a').text for j in range(len(states))]
        state_img=['' if states[j].find('img')==None else states[j].find('img')['src'] for j in range(len(states))]
        party=[parties[j].find('table').text for j in range(len(parties))]
        party_abbr=['' if parties[j].find('table').find('a')==None else parties[j].find('table').find('a').text for j in range(len(parties))]
        party_img=['' if parties[j].find('img')==None else parties[j].find('img')['src'] for j in range(len(parties))]
            
        for i in range(len(state_img)):
            s=state_img[i]
            t=state_abbr[i]
            if t not in state_imgs:state_imgs[t]=s
        for i in range(len(party_img)):
            s=party_img[i]
            a=party_abbr[i]
            t=party[i]
            if a not in party_imgs:party_imgs[a]={'name':t,'img':s}
        members.append({'name':name,'birth_ro':birth_ro,'idm':idm,'link':url,'leg':leg,'start':start,
                        'img':img,'pretty_name':pretty_name,'camera':camera,'judet':judet,'end':end,
                       'party_abbr':party_abbr,'party':party,'state':state,'state_abbr':state_abbr,
                        'activitate':activitate,'comisii':comisii,'comisii_abbr':comisii_abbr})
        parsed_links.add(link)
69.78367062107466 %
72.10979297511048 %
74.43591532914631 %
76.76203768318214 %
79.08816003721796 %
81.41428239125378 %
83.7404047452896 %
86.06652709932543 %
88.39264945336124 %
90.71877180739708 %
93.0448941614329 %
95.37101651546871 %
97.69713886950454 %
In [75]:
len(members), len(links)
Out[75]:
(4299, 4299)
In [76]:
open('data/members.json','w').write(json.dumps({'members':members,
                                                'party_imgs':party_imgs,
                                                'state_imgs':state_imgs}))
Out[76]:
4788854
In [77]:
#members=json.loads(open('data/members.json','r').read())['members']
In [78]:
import pickle
In [79]:
def save_obj(obj, name ):
    with open('data/'+ name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(name ):
    with open('data/' + name + '.pkl', 'rb') as f:
        return pickle.load(f)
In [83]:
save_obj(pages,'pages')
In [81]:
#pages=load_obj('pages')

Clean

In [86]:
legs={'2016':['2016-12-21','2019-07-01'],
'2012':['2012-12-20','2016-12-21'],
'2008':['2008-12-19','2012-12-20'],
'2004':['2004-12-19','2008-12-13'],
'2000':['2000-12-15','2004-11-30'],
'1996':['1996-11-27','2000-11-30'],
'1992':['1992-10-28','1996-11-22'],
'1990':['1990-06-19','1992-10-16']}

Manual fix for tihs Monitorul official:

In [151]:
ro_months={'ian.':'Jan',
          ' ia ':' Jan ',
          'feb.':'Feb',
          'mar.':'Mar',
          'apr.':'Apr',
          'mai':'May',
          'iun.':'Jun',
          ' iu ':' Jun ',
          'iul.':'Jul',
          'aug.':'Aug',
          'sep.':'Sep',
          'oct.':'Oct',
          'noi.':'Nov',
          'dec.':'Dec',
          'ianuarie':'Jan',
          'februarie':'Feb',
          'martie':'Mar',
          'aprilie':'Apr',
          'mai':'May',
          'iunie':'Jun',
          'iulie':'Jul',
          'august':'Aug',
          'septembrie':'Sep',
          'octombrie':'Oct',
          'noiembrie':'Nov',
          'decembrie':'Dec',}
def date_ro(d,s=True):
    ds=d.split(' ')
    x=ds[1].replace('0201','2001').replace('0092','1992')
    t=pd.to_datetime(ro_months[ds[0].strip()]+' '+x)
    if s: return str(t)[:10]
    else: return t
def date_ro2(d,s=False):
    for i in range(20):
        d=d.replace('  ',' ')
    ds=d.split(' ')
    t=pd.to_datetime(ds[0]+' '+ro_months[ds[1]]+' '+ds[2])
    s=str(t)[:10]
    if s=='2004-02-17': s='2004-12-17'
    return s
In [152]:
decess={'Ratiu Ion1917-2000':'6 Jun 1917',
      'Popovici Dan Ion Cristian1946-1996':'26 Dec 1946',
      'Palfi Mozes Zoltan1943-2011':'1 Jan 1943',
      'Blaga Ionel1929-1994':'17 Mar 1929',
      'Daraban Aurel1939-2004':'27 Sep 1939',
      'Croitoru Mircea-Adrian1941-1999':'1 Jan 1941',
      'Budeanu Radu1943-1997':'1 Jan 1943',
      'Coposu Corneliu1914-1995':'20 May 1914',
      'Ignat Miron1941-2018':'24 Aug 1941',
      'Nastase Toma1932-1997':'1 Jan 1932',
      'Musat Mircea1930-1994':'1 Jan 1930',
      'Stoica Stefan1976-2014':'1 Jan 1976',
      'Grama Mihail1924-1999':'1 Jan 1924',
      'Bot Octavian1951-2015':'1 Jan 1951', #real 1 Jan
      'Iorgovan Antonie1948-2007':'9 Aug 1948',
      'Dinescu Valentin1955-2008':'25 Dec 1955',
      'Babias Iohan-Peter1952-2002':'28 Jun 1952',
      'Munteanu Mircea Mihai1933-1998':'26 May 1933',
      'Timis Ioan1951-2010':'17 Sep 1951',
      'Barbu Eugen1924-1993':'1 Jan 1924',
      'Tcaciuc Stefan1936-2005':'13 Jan 1936',
      'Mircovici Niculae1950-2016':'1 Oct 1950',
      'Rusu Horia Mircea1952-2001':'18 Sep 1952',
      'Racoceanu Viorel1962-2006':'8 Jun 1962',
      'Andrei Zeno1935-2001':'1 Jan 1935',
      'Surdu-Soreanu Raul-Victor1947-2011':'11 Jul 1947',
      'Dan Iosif1950-2007':'14 Oct 1950',
      'Dutu Ion1942-2000':'7 Oct 1942',
      'Bindea Liviu-Doru1957-2006':'26 Jul 1957',
      'Verestoy Attila1954-2018':'1 Mar 1954',
      'Dragomir Nelu Aristide1957-1995':'13 Oct 1957',
      'Micle Ulpiu-Radu-Sabin1935-2000':'1 Jan 1935',
       'Cojocariu Emil1938-1994':'2 Dec 1938',
       'Policrat Rene-Radu1910-1993':'12 Aug 1910',
       'Serban Gheorghe1954-1998':'25 Jun 1954',
       'Vladoiu Aurel1948-2015':'27 Jan 1948',
       'Preda Ion1947-2007':'1 Jan 1947',
       'Coste Marina-Adelina1965-2017':'30 Nov 1965',
       'Sincai Ovidiu1949-1999':'14 Dec 1949',
       'Grosaru Mircea1952-2014':'30 Jun 1952',
       'Florescu Nicolae-Doru1960-2001':'1 jan 1960',
       'Sinko Stefan1939-1995':'1 Jan 1939',
       'Alecsandrescu Nicolae1923-1993':'1 Jan 1923',
       'Ratoi Neculai1939-2016':'15 Mar 1939',
       'Ichim Mircea-Adrian1944-1993':'1 Jan 1944',
       'Fotopolos Sotiris1937-2008':'6 Dec 1937',
       'Ciobanu Gheorghe1964-2015':'22 Sep 1964',
       'Dumitrescu Liana1973-2011':'20 Jan 1973',
       'Dida Corneliu Ioan1942-2008':'26 May 1942',
       'Draghici Sonia-Maria1956-2016':'25 Jul 1956'}
In [153]:
def replace_all(text, dic):
    for i in dic:
        text = text.replace(i, dic[i])
    return text
In [154]:
nmembers={}
nparty_imgs={}
nstate_imgs={}
party_set=set()
country_set=set()
deaths={}
for i in members:
    if '-' not in i['birth_ro']:
        birth=str(pd.to_datetime(replace_all(i['birth_ro'],ro_months)))
        death=''
    else:
        birth=str(pd.to_datetime(replace_all(i['name']+i['birth_ro'],decess)))
        death=i['birth_ro'].split('-')[1].strip()
    if len(birth)>3:
        name=i['name']+' | '+birth[:10]
    else:
        name=i['name']+' | Ismeretlen'
    if death:
        if name not in deaths:
            deaths[name]=death
    since=str(pd.to_datetime(legs[i['leg']][0]))[:10]
    until=str(pd.to_datetime(legs[i['leg']][1]))[:10] 
    if i['start']: since=date_ro2(i['start'],True)
    if i['end']: until=date_ro2(i['end'])
    if name not in nmembers:
        nmembers[name]={}
    if 'Name' not in nmembers[name]:
        nmembers[name]['Name']={'full':i['pretty_name'],'simple':name,'short':i['name']}
    if 'Photo' not in nmembers[name]:
        nmembers[name]['Photo']=[]
    nmembers[name]['Photo'].append(base_url+i['img'])
    if 'UserID' not in nmembers[name]:
        nmembers[name]['UserID']=[]
    nmembers[name]['UserID'].append(i['idm'])
    if 'Camera' not in nmembers[name]:
        nmembers[name]['Camera']=[]
    nmembers[name]['Camera'].append(i['camera'])
    if 'Starts' not in nmembers[name]:
        nmembers[name]['Starts']=[]
    nmembers[name]['Starts'].append(since)
    if 'Ends' not in nmembers[name]:
        nmembers[name]['Ends']=[]
    nmembers[name]['Ends'].append(until)
    if 'Link' not in nmembers[name]:
        nmembers[name]['Link']=[]
    nmembers[name]['Link'].append(i['link'])
    
    if 'Parties' not in nmembers[name]:
        nmembers[name]['Parties']=[]
    parties=i['party']
    parties_abbr=i['party_abbr']
    for pi in range(len(parties)):
        p=parties[pi].strip()
        #reset since
        since=str(pd.to_datetime(legs[i['leg']][0]))[:10]
        until=str(pd.to_datetime(legs[i['leg']][1]))[:10] 
        if i['start']: since=date_ro2(i['start'],True)
        if i['end']: until=date_ro2(i['end'])
        #process
        if '-' not in p:
            if p!='independent':
                abbr='Minorități'
                party=p
            else:
                abbr='Independent'
                party='Independent'
        else:
            abbr=parties_abbr[pi]
            party=p.split('-')[1].strip()
            if ('din ') in party:
                if ('din R') not in party:
                    if ('din B') not in party:
                        since=max(since,date_ro(party[party.find('din ')+4:].strip(),True))
                        party='Independent'
            #else:
            if True: #keep, for cases with both 'din' and 'pana'
                if len(p.split('-'))>2:
                    s=''.join(p.split('-')[2:]).strip()
                    if ('din ') in s:
                        if ('din R') not in s:
                            since=max(since,date_ro(s[s.find('din ')+4:].strip(),True))
                    if ('până în ') in s:
                        until=min(until,date_ro(s[s.find('până în ')+8:].strip(),True))
        if abbr in ['independent','','Neafiliaţi']:
            abbr='Independent'
        nmembers[name]['Parties'].append({'party':abbr,
                'start':since,'end':until,'judet':i['judet']})
        party_set.add(abbr)
        
    if 'Countries' not in nmembers[name]:
        nmembers[name]['Countries']=[]
    states=i['state']
    states_abbr=i['state_abbr']
    for pi in range(len(states)):
        p=states[pi].strip()
        #reset since
        since=str(pd.to_datetime(legs[i['leg']][0]))[:10]
        until=str(pd.to_datetime(legs[i['leg']][1]))[:10] 
        if i['start']: since=date_ro2(i['start'],True)
        if i['end']: until=date_ro2(i['end'])
        #process
        if '-' in p:
            state=p.split('-')[0]
            s=''.join(p.split('-')[1:]).strip()
            if ('din ') in s:
                if ('din R') not in s:
                    since=max(since,date_ro(s[s.find('din ')+4:].strip(),True))
            if ('până în ') in s:
                until=min(until,date_ro(s[s.find('până în ')+8:].strip(),True))
        nmembers[name]['Countries'].append({'country':state_format(states_abbr[pi]),'start':since,'end':until})
        country_set.add(states_abbr[pi])
    
    if 'Groups' not in nmembers[name]:
        nmembers[name]['Groups']=[]
    comisii=i['comisii']
    comisii_abbr=i['comisii_abbr']
    for pi in range(len(comisii)):
        p=comisii[pi].strip()
        #reset since
        since=str(pd.to_datetime(legs[i['leg']][0]))[:10]
        until=str(pd.to_datetime(legs[i['leg']][1]))[:10] 
        if i['start']: since=date_ro2(i['start'],True)
        if i['end']: until=date_ro2(i['end'])
        #process
        if '(' in p:
            s=''.join(p.split('(')[1:]).strip()
            s=s[:s.find(')')]
            if ('din ') in s:
                since=max(since,date_ro(s[s.find('din ')+4:].strip(),True))
            if ('până în ') in s:
                until=min(until,date_ro(s[s.find('până în ')+8:].strip(),True))
        nmembers[name]['Groups'].append({'group':comisii_abbr[pi],
                'start':since,'end':until})
    
    #reset since
    since=str(pd.to_datetime(legs[i['leg']][0]))[:10]
    until=str(pd.to_datetime(legs[i['leg']][1]))[:10] 
    if i['start']: since=date_ro2(i['start'],True)
    if i['end']: until=date_ro2(i['end'])
    #process
    if 'Activity' not in nmembers[name]:
        nmembers[name]['Activity']={}
    activitate=i['activitate']
    for a in activitate:
        if len(a)>1:
            an=a[0]
            if an not in ['Membru în']:
                av=int(a[1].split('(')[0].strip().split(',')[0].strip().split('-')[0].strip())
                if an not in nmembers[name]['Activity']:nmembers[name]['Activity'][an]=[]
                nmembers[name]['Activity'][an].append({'value':av,'start':since,'end':until})        
            if an=='Luari de cuvânt':
                if 'în ' in a[1]:
                    av=int(a[1].split('(în ')[-1].strip().split('sedinte)')[0].strip())
                    nmembers[name]['Activity']['Sedințe']={'value':av,'start':since,'end':until}
In [155]:
nmembers['Peres Alexandru | 1952-12-20']['Parties']
Out[155]:
[{'party': 'PD', 'start': '2004-12-17', 'end': '2008-12-13', 'judet': 'ALBA'},
 {'party': 'PD', 'start': '1996-11-27', 'end': '2000-11-30', 'judet': 'ALBA'},
 {'party': 'PD', 'start': '2000-12-15', 'end': '2004-11-30', 'judet': 'ALBA'},
 {'party': 'PDL', 'start': '2012-12-20', 'end': '2015-02-01', 'judet': 'ALBA'},
 {'party': 'PNL', 'start': '2015-02-01', 'end': '2016-12-21', 'judet': 'ALBA'},
 {'party': 'PNL', 'start': '2016-12-21', 'end': '2019-07-01', 'judet': 'ALBA'},
 {'party': 'PDL', 'start': '2008-12-19', 'end': '2012-12-20', 'judet': 'ALBA'}]
In [156]:
# nmembers={}
# nparty_imgs={}
# nstate_imgs={}
# party_set=set()
# country_set=set()
# deaths={}
# for i in members:
#     if '-' not in i['birth_ro']:
#         birth=str(pd.to_datetime(replace_all(i['birth_ro'],ro_months)))
#         death=''
#     else:
#         birth=str(pd.to_datetime(replace_all(i['name']+i['birth_ro'],decess)))
#         death=i['birth_ro'].split('-')[1].strip()
#     if len(birth)>3:
#         name=i['name']+' | '+birth[:10]
#     else:
#         name=i['name']+' | Ismeretlen'
#     if death:
#         if name not in deaths:
#             deaths[name]=death
#     since=str(pd.to_datetime(legs[i['leg']][0]))[:10]
#     until=str(pd.to_datetime(legs[i['leg']][1]))[:10] 
#     if i['start']: since=date_ro2(i['start'])
#     if i['end']: until=date_ro2(i['end'])
#     if name not in nmembers:
#         nmembers[name]={}
#     if 'Name' not in nmembers[name]:
#         nmembers[name]['Name']={'full':i['pretty_name'],'simple':name,'short':i['name']}
#     if 'Photo' not in nmembers[name]:
#         nmembers[name]['Photo']=[]
#     nmembers[name]['Photo'].append(base_url+i['img'])
#     if 'UserID' not in nmembers[name]:
#         nmembers[name]['UserID']=[]
#     nmembers[name]['UserID'].append(i['idm'])
#     if 'Camera' not in nmembers[name]:
#         nmembers[name]['Camera']=[]
#     nmembers[name]['Camera'].append(i['camera'])
#     if 'Starts' not in nmembers[name]:
#         nmembers[name]['Starts']=[]
#     nmembers[name]['Starts'].append(since)
#     if 'Ends' not in nmembers[name]:
#         nmembers[name]['Ends']=[]
#     nmembers[name]['Ends'].append(until)
#     if 'Link' not in nmembers[name]:
#         nmembers[name]['Link']=[]
#     nmembers[name]['Link'].append(i['link'])
    
#     if 'Parties' not in nmembers[name]:
#         nmembers[name]['Parties']=[]
#     parties=i['party']
#     parties_abbr=i['party_abbr']
#     for pi in range(len(parties)):
#         p=parties[pi].strip()
#         if '-' not in p:
#             if p!='independent':
#                 abbr='Minorități'
#                 party=p
#             else:
#                 abbr='Independent'
#                 party='Independent'
#         else:
#             abbr=parties_abbr[pi]
#             party=p.split('-')[1].strip()
#             if ('din ') in party:
#                 if ('din R') not in party:
#                     if ('din B') not in party:
#                         since=date_ro(party[party.find('din ')+4:].strip(),True)
#                         party='Independent'
#             #else:
#             if True:
#                 if len(p.split('-'))>2:
#                     s=''.join(p.split('-')[2:]).strip()
#                     if ('din ') in s:
#                         if ('din R') not in s:
#                             since=date_ro(s[s.find('din ')+4:].strip(),True)
#                     if ('până în ') in s:
#                         until=date_ro(s[s.find('până în ')+8:].strip(),True)
#         if abbr in ['independent','','Neafiliaţi']:
#             abbr='Independent'
#         nmembers[name]['Parties'].append({'party':abbr,
#                 'start':since,'end':until,'judet':i['judet']})
#         party_set.add(abbr)
    
#     if 'Countries' not in nmembers[name]:
#         nmembers[name]['Countries']=[]
#     states=i['state']
#     states_abbr=i['state_abbr']
#     for pi in range(len(states)):
#         p=states[pi].strip()
#         if '-' in p:
#             state=p.split('-')[0]
#             s=''.join(p.split('-')[1:]).strip()
#             if ('din ') in s:
#                 if ('din R') not in s:
#                     since=date_ro(s[s.find('din ')+4:].strip(),True)
#             if ('până în ') in s:
#                 until=date_ro(s[s.find('până în ')+8:].strip(),True)
#         nmembers[name]['Countries'].append({'country':state_format(states_abbr[pi]),'start':since,'end':until})
#         country_set.add(states_abbr[pi])
    
#     if 'Groups' not in nmembers[name]:
#         nmembers[name]['Groups']=[]
#     comisii=i['comisii']
#     comisii_abbr=i['comisii_abbr']
#     for pi in range(len(comisii)):
#         p=comisii[pi].strip()
#         if '(' in p:
#             s=''.join(p.split('(')[1:]).strip()
#             s=s[:s.find(')')]
#             if ('din ') in s:
#                 since=date_ro(s[s.find('din ')+4:].strip(),True)
#             if ('până în ') in s:
#                 until=date_ro(s[s.find('până în ')+8:].strip(),True)
#         nmembers[name]['Groups'].append({'group':comisii_abbr[pi],
#                 'start':since,'end':until})
    
#     if 'Activity' not in nmembers[name]:
#         nmembers[name]['Activity']={}
#     activitate=i['activitate']
#     for a in activitate:
#         if len(a)>1:
#             an=a[0]
#             if an not in ['Membru în']:
#                 av=int(a[1].split('(')[0].strip().split(',')[0].strip().split('-')[0].strip())
#                 if an not in nmembers[name]['Activity']:nmembers[name]['Activity'][an]=[]
#                 nmembers[name]['Activity'][an].append({'value':av,'start':since,'end':until})        
#             if an=='Luari de cuvânt':
#                 if 'în ' in a[1]:
#                     av=int(a[1].split('(în ')[-1].strip().split('sedinte)')[0].strip())
#                     nmembers[name]['Activity']['Sedințe']={'value':av,'start':since,'end':until}
In [157]:
len(nmembers),len(members),len(deaths)
Out[157]:
(2818, 4299, 50)
In [158]:
open('data/nmembers.json','w').write(json.dumps(nmembers))
Out[158]:
4728665
In [159]:
open('data/deaths.json','w').write(json.dumps(deaths))
Out[159]:
2021

No need to re-run

In [102]:
def party_cleaner(i,p):
    if i==p: return p
    p=p[len(i)+1:].split('-')[0].strip()
    return p
In [103]:
#! run once
for i in party_set.difference(set(party_imgs.keys())):
    party_imgs[i]={'name': i, 'img': ''}
for i in set(party_imgs.keys()).difference(party_set):
    party_imgs.pop(i)
for i in party_imgs:
    party_imgs[i]['name']=party_cleaner(i,party_imgs[i]['name'])
In [104]:
nstate_imgs={}
In [105]:
for i in state_imgs:
    nstate_imgs[state_format(i)]=state_imgs[i]
nstate_imgs['']=''
In [106]:
#img=json.loads(open('data/img.json','r').read())
img={'party':party_imgs,'state':nstate_imgs}
In [107]:
#manual
img['party']['PP-DD']['name']='Partidul Poporului Dan Diaconescu'
img['party']['PUR-SL']['name']='Partidul Umanist din România'
img['party']['RMDSZ']=img['party']['UDMR']
img['party']['RMDSZ']['name']='Romániai Magyar Demokrata Szövetség'
In [108]:
open('data/img.json','w').write(json.dumps(img))
Out[108]:
11628
In [ ]: