Source of the judges.
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
plt.style.use('ggplot')
import dateutil.parser
import re
import time
url = "http://www.bvger.ch/gericht/richter/00563/00580/index.html?lang=de"
response = requests.get(url)
response.encoding = 'utf-8' # Fix Encoding
judges_IV_soup = BeautifulSoup(response.text, 'html.parser')
judges_IV = judges_IV_soup.find_all('div', {'class': 'contentFlex flexUnterseite'})
def extract_judges(html_elements):
"""Process a list of html elements containig data about the judges"""
lst = []
for soup in html_elements:
#name
name = soup.find('a').text
name = re.sub('\(.+\)', '', name).strip()
#url
html = soup.find('a').get('href')
url = 'http://www.bvger.ch' + html
response = requests.get(url)
response.encoding = 'utf-8'
judges_text = BeautifulSoup(response.text, 'html.parser')
partei = judges_text.find('div', {'class': 'webText flexTinymceDiv'}).text[-3:]
partei = partei.replace('los', 'parteilos').replace('ux.', 'FDP').replace('PLR', 'FDP')
partei = partei.replace('que', 'parteilos').replace('üne', 'Grüne')
partei = partei.replace('UDC', 'SVP').replace('DC.', 'SVP').replace('DC.', 'FDP')
partei = partei.replace('rts', 'Grüne').replace('üne', 'Grüne').replace('GrGrüne', 'Grüne')
partei = partei.replace('UDC', 'SVP').replace('DC.', 'SVP').replace('DC.', 'FDP')
partei = partei.replace('07.', 'parteilos').replace('ale', 'GLP')
partei = partei.replace('PS', 'SP').replace('VP.', 'SVP')
judge = {'Name': name,
'Partei': partei.strip(),
}
lst.append(judge)
return lst
judges_IV_list = extract_judges(judges_IV)
url = "http://www.bvger.ch/gericht/richter/00563/00581/index.html?lang=de"
response = requests.get(url)
response.encoding = 'utf-8'
judges_V_soup = BeautifulSoup(response.text, 'html.parser')
judges_V = judges_V_soup.find_all('div', {'class': 'contentFlex flexUnterseite'})
judges_V_list = extract_judges(judges_V)
judges_list = judges_IV_list + judges_V_list
judges_list = pd.DataFrame(judges_list)
first_names = []
last_names = []
for name in judges_list['Name']:
parts = name.split(' ')
if len(parts) == 3 and parts[2] == 'R.':
# Handle special case for David R. Wenger
first_names.append(' '.join(parts[1:]))
last_names.append(parts[0])
else:
# Normal Case: Last Element is first name, everything else is last name
first_names.append(parts[-1])
last_names.append(' '.join(parts[:-1]))
vorname = pd.DataFrame(first_names)
nachname = pd.DataFrame(last_names)
df = pd.concat([judges_list, nachname, vorname], axis=1)
df.columns = [['Name', 'Partei', 'Nachname', 'Vorname']]
df['Nachname Vorname'] = df['Vorname'] + ' ' + df['Nachname']
df.to_csv('richter_partei.csv', index=False)
df
Name | Partei | Nachname | Vorname | Nachname Vorname | |
---|---|---|---|---|---|
0 | Spälti Giannakitsas Nina | SP | Spälti Giannakitsas | Nina | Nina Spälti Giannakitsas |
1 | Bovier Gérald | SVP | Bovier | Gérald | Gérald Bovier |
2 | Brüschweiler Daniela | BDP | Brüschweiler | Daniela | Daniela Brüschweiler |
3 | Cattaneo Daniele | FDP | Cattaneo | Daniele | Daniele Cattaneo |
4 | Cotting-Schalch Claudia | FDP | Cotting-Schalch | Claudia | Claudia Cotting-Schalch |
5 | Felley Yanick | SVP | Felley | Yanick | Yanick Felley |
6 | Scherrer Gérard | parteilos | Scherrer | Gérard | Gérard Scherrer |
7 | Lang Walter | parteilos | Lang | Walter | Walter Lang |
8 | Schürch Hans | FDP | Schürch | Hans | Hans Schürch |
9 | Tellenbach Bendicht | SP | Tellenbach | Bendicht | Bendicht Tellenbach |
10 | Theis Contessina | Grüne | Theis | Contessina | Contessina Theis |
11 | Thurnheer Simon Mathias | SVP | Thurnheer Simon | Mathias | Mathias Thurnheer Simon |
12 | Wespi Thomas | CVP | Wespi | Thomas | Thomas Wespi |
13 | Luterbacher Christa | SP | Luterbacher | Christa | Christa Luterbacher |
14 | Antonioni Luftensteiner Emilia | Grüne | Antonioni Luftensteiner | Emilia | Emilia Antonioni Luftensteiner |
15 | Badoud François | parteilos | Badoud | François | François Badoud |
16 | Balmelli-Mühlematter Barbara | GLP | Balmelli-Mühlematter | Barbara | Barbara Balmelli-Mühlematter |
17 | Beck Kadima Muriel | Grüne | Beck Kadima | Muriel | Muriel Beck Kadima |
18 | Cossy Sylvie | Grüne | Cossy | Sylvie | Sylvie Cossy |
19 | Freihofer Gabriela | SVP | Freihofer | Gabriela | Gabriela Freihofer |
20 | Marti Esther | GLP | Marti | Esther | Esther Marti |
21 | König Markus | SP | König | Markus | Markus König |
22 | Monnet Jean-Pierre | parteilos | Monnet | Jean-Pierre | Jean-Pierre Monnet |
23 | Schenker Senn Regula | SP | Schenker Senn | Regula | Regula Schenker Senn |
24 | Waeber William | SP | Waeber | William | William Waeber |
25 | Wenger David R. | SVP | Wenger | David R. | David R. Wenger |
26 | Willisegger Daniel | SVP | Willisegger | Daniel | Daniel Willisegger |
df_partei_count = pd.DataFrame(df['Partei'].value_counts())
df_partei_count = df_partei_count.reset_index()
total = sum(list(df_partei_count['Partei']))
def per(x):
percentage = x / total * 100
return percentage
df_partei_count['percentage'] = round(df_partei_count['Partei'].apply(per))
df_partei_count
index | Partei | percentage | |
---|---|---|---|
0 | SP | 6 | 22.0 |
1 | SVP | 6 | 22.0 |
2 | Grüne | 4 | 15.0 |
3 | parteilos | 4 | 15.0 |
4 | FDP | 3 | 11.0 |
5 | GLP | 2 | 7.0 |
6 | CVP | 1 | 4.0 |
7 | BDP | 1 | 4.0 |