import pandas as pd, numpy as np
import bs4
import requests, os
import matplotlib.pyplot as plt
%matplotlib inline
os.chdir('C:/users/csala/Onedrive - Lancaster University/datarepo/influence/ro')
base_url='http://www.cdep.ro'
def get_url(par1,par2):
return 'http://www.parlament.ro/pls/steno/evot2015.xml?par1='+par1+'&par2='+par2
dates=pd.date_range(start='2006-02-06', end='2019-04-25')
votes=[]
parsed_votes=set()
for date in dates[:]:
d=str(date)[:10].replace('-','')
#print(d)
url=get_url('1',d)
r=requests.get(url)
soup = bs4.BeautifulSoup(r.content)
for i in soup.findAll("row"):
v=i.find("votid").text
if v not in parsed_votes:
if i.find("descriere"):
desc=i.find("descriere").text
else:
desc=''
t=i.find("time_vot").text
c=i.find("camera").text
votes.append({'votid':v,'descriere':desc,'time':t,'camera':c})
parsed_votes.add(v)
import json
open('data/votes.json','w').write(json.dumps(votes))
1655007
len(votes)
16017
records=[]
parsed_records=set()
for vote in votes[:]:
d=vote[0]['votid']
#print(d)
if d not in parsed_records:
url=get_url('2',d)
r=requests.get(url)
soup = bs4.BeautifulSoup(r.content)
for i in soup.findAll("row"):
v=i.find("vot").text
n=i.find("nume").text+' '+i.find("prenume").text
n2=i.find("prenume").text+' '+i.find("nume").text
g=i.find("grup").text
c=i.find("camera").text
records.append({'votid':d,'vot':v,'grup':g,'name':n,'name2':n2,'camera':c})
parsed_records.add(d)
open('data/records.json','w').write(json.dumps(records))
321378064
len(records)
3382156
len(parsed_records)
16017
len(votes)
16017
votes_desc=[]
parsed_years=set()
for year in range(1990,2020):
if year not in parsed_years:
url='http://parlament.ro/pls/proiecte/upl_pck2015.lista?anp='+str(year)
r=requests.get(url)
soup = bs4.BeautifulSoup(r.content)
tables=soup.findAll('table')
if len(tables)>1:
for tr in tables[1].findAll("tr"):
tds=tr.findAll('td')
if len(tds):
lname=tds[1].find('a').text
llink=tds[1].find('a')['href']
ldesc=tds[2].text.replace('\n','')
votes_desc.append({'lname':lname,'llink':llink,'ldesc':ldesc})
parsed_years.add(year)
print(year)
2019
open('data/votes_desc.json','w').write(json.dumps(votes_desc))
4951920
#votes=json.loads(open('data/votes.json','r').read())
ldesc={i['lname'][i['lname'].find(' ')+1:i['lname'].find('/')]+'/'+i['lname'][-4:]:i['ldesc'] for i in votes_desc}
llink={i['lname'][i['lname'].find(' ')+1:i['lname'].find('/')]+'/'+i['lname'][-4:]:i['llink'] for i in votes_desc}
nvotes=[]
for v in votes:
found=False
d=v['descriere'].replace('Pl','PL')
if 'PL' in d:
base=d[d.find('PL'):]
if base=='PLx 5962010 A':base='PLx 596/2010'
elif base=='PL 301 302/2010 C':base='PLx 301/2010'
elif base=='PLx/2013 240 si PLx 241/2013 pe poz.14 si 15':base='PLx 241/2013'
elif base=='PLx/515/09 C':base='PLx 515/2009'
elif base=='PL 337 338/2009 C':base='PLx 337/2009'
elif base=='PL.643/2011':base='PLx 643/2011'
elif base=='PLx.492/2011':base='PLx 492/2011'
elif base=='PL 566 568 571/2013':base='PLx 566/2013'
lname=base[base.find('PL'):]
lname=lname.replace('PL','').replace(' ','').replace('x','').replace('-','').replace('//','/').replace('//','/')\
.replace('/A','').replace('/T','').replace('nr.','')\
.replace(' A','').replace(' C','').replace(' R','')\
.replace('A','').replace('T','').replace('pct.1','').replace('R','').replace('C','').replace('t','')\
.replace('/207','/2007').replace('/07','/2007').replace('/08','/2008').replace('/09','/2009')\
.replace('/10','/2010').replace('/11','/2011').replace('/12','/2012').replace('/13','/2013')\
.replace('/14','/2014').replace('/15','/2015').replace('/16','/2016').replace('/17','/2017')\
.replace('/18','/2018').replace('/19','/2019')
year=lname[-4:]
law=lname[:lname.find('/')]
lname=law+'/'+year
if lname not in ldesc:
try:
lname2=law+'/'+str(int(year)-1)
if lname2 in ldesc:
found=True
v['lung']=ldesc[lname2]
v['lege']=lname2
v['link']=base_url+llink[lname2]
except:
pass
if not found:
for year2 in range(1990,2020):
if not found:
if str(year2) in base:
lname2=law+'/'+str(year2)
if lname2 in ldesc:
found=True
v['lung']=ldesc[lname2]
v['lege']=lname2
v['link']=base_url+llink[lname2]
elif str(year2) in lname:
lname2=law+'/'+str(year2)
if lname2 in ldesc:
found=True
v['lung']=ldesc[lname2]
v['lege']=lname2
v['link']=base_url+llink[lname2]
if not found:
try:
lname2=law+'/'+str(int(year2)-1)
if lname2 in ldesc:
found=True
v['lung']=ldesc[lname2]
v['lege']=lname2
v['link']=base_url+llink[lname2]
except:
pass
else:
found=True
v['lung']=ldesc[lname]
v['lege']=lname
v['link']=base_url+llink[lname]
if not found:
print(law,'|',lname,'|',lname2,'|',year,'|',year2,'|',base)
else:
v['type']='PL'
if not found:
v['type']='Other'
v['lung']=v['descriere']
v['lege']=''
v['link']=''
nvotes.append(v)
enuluivarianaSenaulu | enuluivarianaSenaulu/ului | enuluivarianaSenaulu/2018 | ului | 2019 | PLenului varianta Senatului anuldeaciuneeuropeanindomeniulaparariiOM(2016)950(PHD36 | anuldeaciuneeuropeanindomeniulaparariiOM(2016)950(PHD36/17). | anuldeaciuneeuropeanindomeniulaparariiOM(2016)950(PHD36/2018 | 17). | 2019 | PLanul de actiune european in domeniul apararii COM(2016)950 (PH CD 36/2017). anuldeaciunealUE20172019ombaereadifereneideremunerareinrefemeisibarbaiOM(2017)678(PHD12 | anuldeaciunealUE20172019ombaereadifereneideremunerareinrefemeisibarbaiOM(2017)678(PHD12/18). | anuldeaciunealUE20172019ombaereadifereneideremunerareinrefemeisibarbaiOM(2017)678(PHD12/2018 | 18). | 2019 | PLanul de actiune al UE 2017 - 2019 Combaterea diferentei de remunerare intre femei si barbati COM(2017) 678 (PH CD 12/2018). anuldeaciunepenrueducaiadigialaOM(2018)22(PHD20 | anuldeaciunepenrueducaiadigialaOM(2018)22(PHD20/18). | anuldeaciunepenrueducaiadigialaOM(2018)22(PHD20/2018 | 18). | 2019 | PLanul de actiune pentru educatia digitala COM(2018) 22 (PH CD 20/2018). anuldeaciuneprivindFinech:penruunsecorfinanciareuropeanmaicompeiivsimaiinovaorOM(2018)109(PHD33 | anuldeaciuneprivindFinech:penruunsecorfinanciareuropeanmaicompeiivsimaiinovaorOM(2018)109(PHD33/18). | anuldeaciuneprivindFinech:penruunsecorfinanciareuropeanmaicompeiivsimaiinovaorOM(2018)109(PHD33/2018 | 18). | 2019 | PLanul de actiune privind FinTech: pentru un sector financiar european mai competitiv si mai inovator COM(2018)109 (PH CD 33 /2018). anuldeaciunevizandmobiliaeamiliaraJOIN(2018)5(PHD34 | anuldeaciunevizandmobiliaeamiliaraJOIN(2018)5(PHD34/18). | anuldeaciunevizandmobiliaeamiliaraJOIN(2018)5(PHD34/2018 | 18). | 2019 | PLanul de actiune vizand mobilitatea militara JOIN(2018) 5 (PH CD 34 /2018).
open('data/nvotes.json','w').write(json.dumps(nvotes))
5464124