In [3]:
import pandas as pd, numpy as np
import bs4
import requests, os
import matplotlib.pyplot as plt
%matplotlib inline
In [4]:
os.chdir('C:/users/csala/Onedrive - Lancaster University/datarepo/influence/ro')
In [189]:
base_url='http://www.cdep.ro'
In [153]:
def get_url(par1,par2):
    return 'http://www.parlament.ro/pls/steno/evot2015.xml?par1='+par1+'&par2='+par2
In [154]:
dates=pd.date_range(start='2006-02-06', end='2019-04-25')
In [155]:
votes=[]
parsed_votes=set()
In [156]:
for date in dates[:]:
    d=str(date)[:10].replace('-','')
    #print(d)
    url=get_url('1',d)
    r=requests.get(url)
    soup = bs4.BeautifulSoup(r.content)
    for i in soup.findAll("row"):
        v=i.find("votid").text
        if v not in parsed_votes:
            if i.find("descriere"):
                desc=i.find("descriere").text
            else:
                desc=''
            t=i.find("time_vot").text
            c=i.find("camera").text
            votes.append({'votid':v,'descriere':desc,'time':t,'camera':c})
            parsed_votes.add(v)
In [6]:
import json
In [161]:
open('data/votes.json','w').write(json.dumps(votes))
Out[161]:
1655007
In [160]:
len(votes)
Out[160]:
16017
In [141]:
records=[]
parsed_records=set()
In [142]:
for vote in votes[:]:
    d=vote[0]['votid']
    #print(d)
    if d not in parsed_records:
        url=get_url('2',d)
        r=requests.get(url)
        soup = bs4.BeautifulSoup(r.content)
        for i in soup.findAll("row"):
            v=i.find("vot").text
            n=i.find("nume").text+' '+i.find("prenume").text
            n2=i.find("prenume").text+' '+i.find("nume").text
            g=i.find("grup").text
            c=i.find("camera").text
            records.append({'votid':d,'vot':v,'grup':g,'name':n,'name2':n2,'camera':c})
        parsed_records.add(d)
In [152]:
open('data/records.json','w').write(json.dumps(records))
Out[152]:
321378064
In [149]:
len(records)
Out[149]:
3382156
In [163]:
len(parsed_records)
Out[163]:
16017
In [162]:
len(votes)
Out[162]:
16017
In [37]:
votes_desc=[]
parsed_years=set()
In [39]:
for year in range(1990,2020):
    if year not in parsed_years:
        url='http://parlament.ro/pls/proiecte/upl_pck2015.lista?anp='+str(year)
        r=requests.get(url)
        soup = bs4.BeautifulSoup(r.content)
        tables=soup.findAll('table')
        if len(tables)>1:
            for tr in tables[1].findAll("tr"):
                tds=tr.findAll('td')
                if len(tds):
                    lname=tds[1].find('a').text
                    llink=tds[1].find('a')['href']
                    ldesc=tds[2].text.replace('\n','')
                    votes_desc.append({'lname':lname,'llink':llink,'ldesc':ldesc})
            parsed_years.add(year)
            print(year)
2019
In [40]:
open('data/votes_desc.json','w').write(json.dumps(votes_desc))
Out[40]:
4951920
In [8]:
#votes=json.loads(open('data/votes.json','r').read())
In [187]:
ldesc={i['lname'][i['lname'].find(' ')+1:i['lname'].find('/')]+'/'+i['lname'][-4:]:i['ldesc'] for i in votes_desc}
llink={i['lname'][i['lname'].find(' ')+1:i['lname'].find('/')]+'/'+i['lname'][-4:]:i['llink'] for i in votes_desc}
In [202]:
nvotes=[]
for v in votes:
    found=False
    d=v['descriere'].replace('Pl','PL')
    if 'PL' in d:
        base=d[d.find('PL'):]
        if base=='PLx 5962010 A':base='PLx 596/2010'
        elif base=='PL 301 302/2010 C':base='PLx 301/2010'
        elif base=='PLx/2013 240 si PLx 241/2013 pe poz.14 si 15':base='PLx 241/2013'
        elif base=='PLx/515/09 C':base='PLx 515/2009'
        elif base=='PL 337 338/2009 C':base='PLx 337/2009'
        elif base=='PL.643/2011':base='PLx 643/2011'
        elif base=='PLx.492/2011':base='PLx 492/2011'
        elif base=='PL 566 568 571/2013':base='PLx 566/2013'
        lname=base[base.find('PL'):]
        lname=lname.replace('PL','').replace(' ','').replace('x','').replace('-','').replace('//','/').replace('//','/')\
            .replace('/A','').replace('/T','').replace('nr.','')\
            .replace(' A','').replace(' C','').replace(' R','')\
            .replace('A','').replace('T','').replace('pct.1','').replace('R','').replace('C','').replace('t','')\
            .replace('/207','/2007').replace('/07','/2007').replace('/08','/2008').replace('/09','/2009')\
            .replace('/10','/2010').replace('/11','/2011').replace('/12','/2012').replace('/13','/2013')\
            .replace('/14','/2014').replace('/15','/2015').replace('/16','/2016').replace('/17','/2017')\
            .replace('/18','/2018').replace('/19','/2019')
        year=lname[-4:]
        law=lname[:lname.find('/')]
        lname=law+'/'+year
        
        if lname not in ldesc:
            try:
                lname2=law+'/'+str(int(year)-1)
                if lname2 in ldesc:
                    found=True
                    v['lung']=ldesc[lname2]
                    v['lege']=lname2
                    v['link']=base_url+llink[lname2]
            except:
                pass
            if not found:
                for year2 in range(1990,2020):
                    if not found:
                        if str(year2) in base:
                            lname2=law+'/'+str(year2)
                            if lname2 in ldesc:
                                found=True
                                v['lung']=ldesc[lname2]
                                v['lege']=lname2
                                v['link']=base_url+llink[lname2]
                        elif str(year2) in lname:
                            lname2=law+'/'+str(year2)
                            if lname2 in ldesc:
                                found=True
                                v['lung']=ldesc[lname2]
                                v['lege']=lname2
                                v['link']=base_url+llink[lname2]
                    if not found:
                        try:
                            lname2=law+'/'+str(int(year2)-1)
                            if lname2 in ldesc:
                                found=True   
                                v['lung']=ldesc[lname2]
                                v['lege']=lname2
                                v['link']=base_url+llink[lname2]
                        except:
                            pass

        else:
            found=True
            v['lung']=ldesc[lname]
            v['lege']=lname
            v['link']=base_url+llink[lname]
        
        if not found:
            print(law,'|',lname,'|',lname2,'|',year,'|',year2,'|',base)
        else:
            v['type']='PL'
    if not found:
        v['type']='Other'
        v['lung']=v['descriere']
        v['lege']=''
        v['link']=''
    nvotes.append(v)
        
enuluivarianaSenaulu | enuluivarianaSenaulu/ului | enuluivarianaSenaulu/2018 | ului | 2019 | PLenului varianta Senatului
anuldeaciuneeuropeanindomeniulaparariiOM(2016)950(PHD36 | anuldeaciuneeuropeanindomeniulaparariiOM(2016)950(PHD36/17). | anuldeaciuneeuropeanindomeniulaparariiOM(2016)950(PHD36/2018 | 17). | 2019 | PLanul de actiune european in domeniul apararii COM(2016)950 (PH CD 36/2017).
anuldeaciunealUE20172019ombaereadifereneideremunerareinrefemeisibarbaiOM(2017)678(PHD12 | anuldeaciunealUE20172019ombaereadifereneideremunerareinrefemeisibarbaiOM(2017)678(PHD12/18). | anuldeaciunealUE20172019ombaereadifereneideremunerareinrefemeisibarbaiOM(2017)678(PHD12/2018 | 18). | 2019 | PLanul de actiune al UE 2017 - 2019 Combaterea diferentei de remunerare intre femei si barbati COM(2017) 678 (PH CD 12/2018).
anuldeaciunepenrueducaiadigialaOM(2018)22(PHD20 | anuldeaciunepenrueducaiadigialaOM(2018)22(PHD20/18). | anuldeaciunepenrueducaiadigialaOM(2018)22(PHD20/2018 | 18). | 2019 | PLanul de actiune pentru educatia digitala COM(2018) 22 (PH CD 20/2018).
anuldeaciuneprivindFinech:penruunsecorfinanciareuropeanmaicompeiivsimaiinovaorOM(2018)109(PHD33 | anuldeaciuneprivindFinech:penruunsecorfinanciareuropeanmaicompeiivsimaiinovaorOM(2018)109(PHD33/18). | anuldeaciuneprivindFinech:penruunsecorfinanciareuropeanmaicompeiivsimaiinovaorOM(2018)109(PHD33/2018 | 18). | 2019 | PLanul de actiune privind FinTech: pentru un sector financiar european mai competitiv si mai inovator COM(2018)109 (PH CD 33 /2018).
anuldeaciunevizandmobiliaeamiliaraJOIN(2018)5(PHD34 | anuldeaciunevizandmobiliaeamiliaraJOIN(2018)5(PHD34/18). | anuldeaciunevizandmobiliaeamiliaraJOIN(2018)5(PHD34/2018 | 18). | 2019 | PLanul de actiune vizand mobilitatea militara JOIN(2018) 5 (PH CD 34 /2018).
In [203]:
open('data/nvotes.json','w').write(json.dumps(nvotes))
Out[203]:
5464124