import re
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import glob
plt.style.use('ggplot')
import dateutil.parser
import re
import time
from collections import Counter
%matplotlib inline
whole_list_of_names = []
for name in glob.glob('txtfiles/*'):
name = name.split('/')[-1]
whole_list_of_names.append(name)
len(whole_list_of_names)
28876
In the online database there are more cases mentioned than we anaylsed. This is due to duplicates in the database and merged cases. We used 'fdupes -dN .' on the command line to locate and remove 300 plus duplicate files.
Every expression is developed three times, as the verdicts are in three languages, French, German and Italian.
def extracting_aktennummer_german(doc):
try:
entscheid = re.search(r'Abteilung [A-Z]+\n[A-Z]-[0-9]+/[0-9]+', doc)
entscheid = entscheid.group()
entscheid = entscheid.replace('\n', '')
return entscheid
except:
None
def extracting_aktennummer_french(doc):
try:
entscheid = re.search(r'Cour [A-Z]+\n[A-Z]-[0-9]+/[0-9]+', doc)
entscheid = entscheid.group()
entscheid = entscheid.replace('\n', '')
return entscheid
except:
None
def extracting_aktennummer_italian(doc):
try:
entscheid = re.search(r'Corte [A-Z]+\n[A-Z]-[0-9]+/[0-9]+', doc)
entscheid = entscheid.group()
entscheid = entscheid.replace('\n', '')
return entscheid
except:
None
def extracting_entscheid_italian(doc):
try:
doc = doc.replace('\n1. \n', '')
doc = doc.replace('\n1. \n1.', '')
doc = doc.replace('\n1.\n1.', '')
entscheid = re.findall(r'Tribunale amministrativo federale pronuncia:\n.*([^.]*)', doc)
entscheid = entscheid[0].replace('Oggetto', '').replace('\n', '').strip()
entscheid = entscheid[:150]
return entscheid
except:
None
def extracting_entscheid_french(doc):
try:
doc = doc.replace('\n1. \n', '')
doc = doc.replace('\n1. \n1.', '')
doc = doc.replace('\n1.\n1.', '')
doc = doc.replace('\n1.\n\n', '')
doc = doc.replace('\n', '')
entscheid = re.findall(r'le Tribunal administratif fédéral prononce\s*:1.([^.]*)', doc)
entscheid = entscheid[:150]
return entscheid
except:
None
def extracting_entscheid_german(doc):
try:
#search_date = re.search(r'[0-9]+\.', doc)
#search_date = search_date.group()
#doc = doc.replace(search_date, '')
doc = doc.replace('4.', '')
doc = doc.replace('6.', '')
doc = doc.replace('13.', '')
doc = doc.replace('8.', '')
doc = doc.replace('24.', '')
doc = doc.replace('25.', '')
doc = doc.replace('18.', '')
doc = doc.replace('30.', '')
doc = doc.replace('\n1. \n', '')
doc = doc.replace('\n1. \n1.', '')
doc = doc.replace('\n1.\n1.', '')
entscheid = re.findall(r'erkennt das Bundesverwaltungsgericht\s*:\s*1.([^.]*)', doc)
entscheid = entscheid[:150]
return entscheid
except:
None
Using findall, because the word might occur several times in the document. This way we can just take the first instance, making sure we are extracting correct term.
def extracting_gegenstand_italian(doc):
try:
gegenstand = re.findall(r'Oggetto.*([^,]*)', doc)
gegenstand = gegenstand[0].replace('Oggetto', '').replace('\n', '').strip()
gegenstand = gegenstand[:84]
return gegenstand
except:
None
def extracting_gegenstand_french(doc):
try:
gegenstand = re.findall(r'Objet.*([^,]*)', doc)
gegenstand = gegenstand[0].replace('Objet', '').replace('\n', '').strip()
gegenstand = gegenstand[:84]
return gegenstand
except:
None
def extracting_gegenstand_german(doc):
try:
gegenstand = re.findall(r'Gegenstand.*([^,]*)', doc)
gegenstand = gegenstand[0].replace('Gegenstand', '').replace('\n', '').strip()
gegenstand = gegenstand[:84]
return gegenstand
except:
None
def extracting_date_french(doc):
Datum = re.findall(r"Arrêt du [0-9]+[er]* [A-Z]*[éèàâæûa-z]+ 20[0-9]+", doc)
try:
Datum = Datum[0]
except:
None
Datum = str(Datum).replace("['", '').replace("']", '').replace('Arrêt du', '').strip()
Datum = Datum.replace('1er', '1')
return Datum
def extracting_date_german(doc):
Datum = re.findall(r"Urteil vom [0-9]+.[ ]*[ÄÖÜA-Z][äüöa-z]+ 20[0-9]+", doc)
try:
Datum = Datum[0]
except:
None
Datum = str(Datum).replace("['", '').replace("']", '').replace('Urteil vom ', '').strip()
Datum = Datum.replace(".Ap", '. Ap')
return Datum
def extracting_date_italian(doc):
#
Datum = re.findall(r"Sentenza del[l']*[ ]*[0-9]+[ |°][a-z]+ 20[0-9]+", doc)
try:
Datum = Datum[0]
except:
None
Datum = str(Datum).replace("['", '').replace("']", '').replace('Sentenza del ', '').replace('°', ' ').strip()
Datum = Datum.replace("Sentenza dell'", '')
return Datum
The list of judges was pulled of the Website with a separate scraper. This gave us the current judges. Judges from earlier years were research by hand from documentation in Swiss parlament.
df_richter = pd.read_csv('data/richter_partei.csv', delimiter=',')
Making the list of judges with their party
relevant_clean_judges = list(df_richter['Nachname'])
def lawyers_countries(x):
avocat_countries = re.search('Parties\n*.*\n*.*', x)
anwalt_countries = re.search('Parteien\n*.*\n*.*', x)
avvocato_countries = re.search('Parti\n*.*\n*.*', x)
try:
if anwalt_countries != None:
anwalt_countries = anwalt_countries.group()
x = x.replace(anwalt_countries, '|||')
return x
elif avocat_countries != None:
avocat_countries = avocat_countries.group()
x = x.replace(avocat_countries, '|||')
return text
elif avvocato_countries != None:
avvocato_countries = avvocato_countries.group()
x = x.replace(avvocato_countries, '|||')
return x
else:
return x
except:
None
def gerichtsschreiber(x):
gerichtsschreiber = re.search(r'Gerichtsschreiber.*\.', x)
gerichtsschreiberin = re.search(r'Gerichtsschreiberin.*\n*', x)
cancelliera = re.search(r'cancellier.*,', x)
greffier = re.search(r'Greffier:.*', x)
try:
if gerichtsschreiber != None:
gerichtsschreiber = gerichtsschreiber.group()
x = x.replace(gerichtsschreiber, '|||')
return x
elif gerichtsschreiberin != None:
gerichtsschreiberin = gerichtsschreiberin.group()
x = x.replace(gerichtsschreiberin, '|||')
return x
elif cancelliera != None:
cancelliera = cancelliera.group()
x = x.replace(cancelliera, '|||')
return x
elif greffier != None:
greffier = greffier.group()
x = x.replace(greffier, '|||')
return x
for y in relevant_clean_judges:
y = y + ', greffi'
greffier = re.search(y, x)
if greffier != None:
greffier = greffier.group()
x = x.replace(greffier, '|||')
else:
return x
except:
None
#searching for the relevant judges
#Lists I already have
#whole_list_of_names i my first list
#relevant_clean_judges is my second list
main_judge_list = []
case_list = []
vorsitz_list = []
for file in whole_list_of_names: #medium_sample_list
#Importing the texts
file_name = file
file = open('txtfiles/' + file, 'r')
text = file.read()
beginning = text[0:310]
end = text[-2000:]
end = end[0:1815]
text = beginning + end
#Prepping text files
text = text.replace('E-4432/2006', 'E-4432/20 fsdfasdfaasdfasdfasdfasdfdasfasfasfasdfasfasfasdfsdfasdf')
text = text.replace(';', ',')
text = text.replace('\n\n', '\n')
text = text.replace(':\n1. Die', ':\n1.\nDie')
text = text.replace(':\n1. Le', ':\n1.\nLe')
text = text.replace(':\n1. Nella', ':\n1.\nNella')
text = text.replace('Demnach erkennt das Bundesverwaltungsgericht: \n1.\n', 'Demnach erkennt das Bundesverwaltungsgericht:\n1.\n')
text = text.replace(':\n1. Il', ':\n1.\nIl')
#dealing with Gerichtsschreiber
text = gerichtsschreiber(text)
#Pulling out lawyer's names, so they don't clash with judges names
text = lawyers_countries(text)
#Makinging small judge name lists
short_judge_list = []
for judge in relevant_clean_judges:
try:
judge = re.search(judge, text)
if judge != None:
judge = judge.group()
short_judge_list.append(judge)
else:
continue
except:
None
#Getting the date
if extracting_date_french(text) == '[]' and extracting_date_italian(text) == '[]':
date = extracting_date_german(text)
elif extracting_date_french(text) == '[]' and extracting_date_german(text) == '[]':
date = extracting_date_italian(text)
else:
date = extracting_date_french(text)
#Getting Gegenstand
if extracting_gegenstand_german(text) == None and extracting_gegenstand_french(text) == None:
gegenstand = extracting_gegenstand_italian(text)
#print(file_name, gegenstand, date)
elif extracting_gegenstand_french(text) == None and extracting_gegenstand_italian(text) == None:
gegenstand = extracting_gegenstand_german(text)
#print(file_name, gegenstand, date)
else:
gegenstand = extracting_gegenstand_french(text)
#print(file_name, gegenstand, date)
#Getting Entscheid
if extracting_entscheid_german(text) == None and extracting_entscheid_french(text) == None:
entscheid = extracting_entscheid_italian(text)
#print(file_name, entscheid, date)
elif extracting_entscheid_french(text) == None and extracting_entscheid_italian(text) == None:
entscheid = extracting_entscheid_german(text)
#print(file_name, entscheid, date)
else:
entscheid = extracting_entscheid_french(text)
#print(file_name, entscheid, date)
#Getting Aktennummer
if extracting_aktennummer_german(text) == None and extracting_aktennummer_french(text) == None:
aktennummer = extracting_aktennummer_italian(text)
#print(file_name, aktennummer, date)
elif extracting_aktennummer_french(text) == None and extracting_aktennummer_italian(text) == None:
aktennummer = extracting_aktennummer_german(text)
#print(file_name, aktennummer, date)
else:
aktennummer = extracting_aktennummer_french(text)
#print(file_name, aktennummer, date)
#Making small judge dictionaries
small_judge_list = []
try:
for judge in short_judge_list:
jugdes_small_dicts = {'judge': judge,
'date': date,
'gegenstand': gegenstand,
'decision': entscheid,
'aktennummer': aktennummer,
'myfile_number': file_name}
small_judge_list.append(jugdes_small_dicts)
except:
None
#Making separate case file
small_case_list = []
try:
case_dict = {'date': date,
'gegenstand': gegenstand,
'decision': entscheid,
'aktennummer': aktennummer,
'myfile_number': file_name}
small_case_list.append(case_dict)
except:
None
case_list = case_list + small_case_list
main_judge_list = main_judge_list + small_judge_list
df_judges = pd.DataFrame(main_judge_list)
df_cases = pd.DataFrame(case_list)
df_judges
aktennummer | date | decision | gegenstand | judge | myfile_number | |
---|---|---|---|---|---|---|
0 | Cour VE-1461/2011 | 21 mars 2011 | None | None | Antonioni | 100.txt |
1 | Cour VE-1461/2011 | 21 mars 2011 | None | None | de Coulon Scuntaro | 100.txt |
2 | Abteilung IVD-3473/2006 | 20. Februar 2009 | Die Beschwerde wird abgewiesen | None | Tellenbach | 1000.txt |
3 | Abteilung IVD-3473/2006 | 20. Februar 2009 | Die Beschwerde wird abgewiesen | None | Bovier | 1000.txt |
4 | Abteilung IVD-3473/2006 | 20. Februar 2009 | Die Beschwerde wird abgewiesen | None | Schmid | 1000.txt |
5 | Cour VE-5900/2010 | 2 février 2012 | Le recours est reje | None | Badoud | 1001.txt |
6 | Cour VE-5900/2010 | 2 février 2012 | Le recours est reje | None | Schenker Senn | 1001.txt |
7 | Cour VE-5900/2010 | 2 février 2012 | Le recours est reje | None | Antonioni | 1001.txt |
8 | Abteilung IVD-3357/2006 | 9. Juli 2009 | Die Beschwerde wird bezüglich des Wegweisungsv... | None | Schürch | 1002.txt |
9 | Abteilung IVD-3357/2006 | 9. Juli 2009 | Die Beschwerde wird bezüglich des Wegweisungsv... | None | Marti | 1002.txt |
10 | Abteilung IVD-3357/2006 | 9. Juli 2009 | Die Beschwerde wird bezüglich des Wegweisungsv... | None | Spälti Giannakitsas | 1002.txt |
11 | Abteilung IVD-3357/2006 | 9. Juli 2009 | Die Beschwerde wird bezüglich des Wegweisungsv... | None | Badoud | 1002.txt |
12 | Abteilung IVD-3357/2006 | 9. Juli 2009 | Die Beschwerde wird bezüglich des Wegweisungsv... | None | Zoller | 1002.txt |
13 | Abteilung IVD-7620/2009 | 3. Mai 2010 | Die Beschwerde wird abgewiesen | None | Freihofer | 1003.txt |
14 | Abteilung IVD-7620/2009 | 3. Mai 2010 | Die Beschwerde wird abgewiesen | None | Bovier | 1003.txt |
15 | Abteilung IVD-7620/2009 | 3. Mai 2010 | Die Beschwerde wird abgewiesen | None | Haefeli | 1003.txt |
16 | Cour VE-6531/2006 | 28 mai 2008 | Le recours est admis | None | Badoud | 1004.txt |
17 | Cour VE-6531/2006 | 28 mai 2008 | Le recours est admis | None | Brodard | 1004.txt |
18 | Cour VE-6531/2006 | 28 mai 2008 | Le recours est admis | None | Weber | 1004.txt |
19 | Cour IVD-1111/2011 | 24 février 2011 | Le recours est rejeté | None | Scherrer | 1005.txt |
20 | Cour IVD-1111/2011 | 24 février 2011 | Le recours est rejeté | None | Bovier | 1005.txt |
21 | Cour VE-868/2007 | 18 octobre 2010 | Le recours est rejeté | None | Freihofer | 1006.txt |
22 | Cour VE-868/2007 | 18 octobre 2010 | Le recours est rejeté | None | Brodard | 1006.txt |
23 | Cour VE-868/2007 | 18 octobre 2010 | Le recours est rejeté | None | de Coulon Scuntaro | 1006.txt |
24 | Cour IVD-7461/2009 | 3 décembre 2009 | Le recours est rejeté | None | Scherrer | 1007.txt |
25 | Cour IVD-7461/2009 | 3 décembre 2009 | Le recours est rejeté | None | Brodard | 1007.txt |
26 | Abteilung IVD-5213/2010 | 26. Juli 2010 | Die Beschwerde wird abgewiesen | None | Wespi | 1008.txt |
27 | Abteilung IVD-5213/2010 | 26. Juli 2010 | Die Beschwerde wird abgewiesen | None | Cattaneo | 1008.txt |
28 | Cour VE-1171/2011 | 30 mars 2011 | Le recours est rejeté | None | Monnet | 1009.txt |
29 | Cour VE-1171/2011 | 30 mars 2011 | Le recours est rejeté | None | Beck Kadima | 1009.txt |
... | ... | ... | ... | ... | ... | ... |
73163 | Cour IVD-2432/2011 | 31 mai 2011 | La demande de révision est rejetée, dans la me... | None | Cotting-Schalch | 9883.txt |
73164 | Cour IVD-2432/2011 | 31 mai 2011 | La demande de révision est rejetée, dans la me... | None | Cattaneo | 9883.txt |
73165 | Abteilung VE-1618/2008 | 28. Februar 2011 | Die Beschwerde wird abgewiesen | None | Schenker Senn | 989.txt |
73166 | Abteilung VE-1618/2008 | 28. Februar 2011 | Die Beschwerde wird abgewiesen | None | Freihofer | 989.txt |
73167 | Abteilung VE-1618/2008 | 28. Februar 2011 | Die Beschwerde wird abgewiesen | None | Huber | 989.txt |
73168 | Abteilung VE-2696/2009 | 7. Mai 2009 | Die Beschwerde wird abgewiesen | None | Schürch | 99.txt |
73169 | Abteilung VE-2696/2009 | 7. Mai 2009 | Die Beschwerde wird abgewiesen | None | Gysi | 99.txt |
73170 | Abteilung VE-2040/2011 | 11. April 2011 | Die Beschwerde wird abgewiesen, soweit darauf ... | None | Freihofer | 990.txt |
73171 | Abteilung VE-2040/2011 | 11. April 2011 | Die Beschwerde wird abgewiesen, soweit darauf ... | None | Gysi | 990.txt |
73172 | Abteilung IVD-3274/2011 | 21. Juni 2011 | Die Beschwerde wird abgewiesen | None | Theis | 991.txt |
73173 | Abteilung IVD-3274/2011 | 21. Juni 2011 | Die Beschwerde wird abgewiesen | None | Cattaneo | 991.txt |
73174 | Abteilung VE-5102/2006 | 31. Mai 2007 | Die Beschwerde wird gutgeheissen und die Verfü... | None | Huber | 992.txt |
73175 | Abteilung VE-5102/2006 | 31. Mai 2007 | Die Beschwerde wird gutgeheissen und die Verfü... | None | Gysi | 992.txt |
73176 | Abteilung VE-5102/2006 | 31. Mai 2007 | Die Beschwerde wird gutgeheissen und die Verfü... | None | Weber | 992.txt |
73177 | Corte IVD-2873/2009 | 8 maggio 2009 | Il ricorso è respinto | None | de Coulon Scuntaro | 993.txt |
73178 | Corte IVD-2873/2009 | 8 maggio 2009 | Il ricorso è respinto | None | Angeli | 993.txt |
73179 | Abteilung IVD-4839/2010 | 2. November 2010 | Die Beschwerde wird abgewiesen | None | Freihofer | 994.txt |
73180 | Abteilung IVD-4839/2010 | 2. November 2010 | Die Beschwerde wird abgewiesen | None | Haefeli | 994.txt |
73181 | Abteilung IVD-3370/2009 | 28. Mai 2009 | Die Beschwerde wird gutgeheissen, soweit sie n... | None | Galliker | 995.txt |
73182 | Abteilung IVD-3370/2009 | 28. Mai 2009 | Die Beschwerde wird gutgeheissen, soweit sie n... | None | Haefeli | 995.txt |
73183 | Corte IVD-926/2010 | 22 febbraio 2010 | Nella misura in cui ammissibile, il ricorso è ... | None | Scherrer | 996.txt |
73184 | Corte IVD-926/2010 | 22 febbraio 2010 | Nella misura in cui ammissibile, il ricorso è ... | None | Angeli | 996.txt |
73185 | Abteilung VE-5292/2009 | 19. November 2009 | Die Beschwerde wird abgewiesen | None | Lang | 997.txt |
73186 | Abteilung VE-5292/2009 | 19. November 2009 | Die Beschwerde wird abgewiesen | None | Huber | 997.txt |
73187 | Abteilung VE-5292/2009 | 19. November 2009 | Die Beschwerde wird abgewiesen | None | Gysi | 997.txt |
73188 | Cour IVD-8772/2010 | 5 avril 2011 | Le recours est rejeté | None | Badoud | 998.txt |
73189 | Cour IVD-8772/2010 | 5 avril 2011 | Le recours est rejeté | None | Cotting-Schalch | 998.txt |
73190 | Cour VE-6364/2008 | 4 novembre 2008 | Le recours est rejeté | None | Monnet | 999.txt |
73191 | Cour VE-6364/2008 | 4 novembre 2008 | Le recours est rejeté | None | Bovier | 999.txt |
73192 | Cour VE-6364/2008 | 4 novembre 2008 | Le recours est rejeté | None | Gysi | 999.txt |
73193 rows × 6 columns
jugdes_count = df_judges['judge'].value_counts()
judges_count = pd.DataFrame(jugdes_count)
judges_count.to_csv('jugdes_full_count.csv')
We have the data. But we can't start analysing it just yet. We need to harmonise the various data points, i.e. dates
Deleting all the rows with no date. If there was a date, then the document wasn't a case file.
df_judges = df_judges[df_judges.date != '[]']
df_cases = df_cases[df_cases.date != '[]']
Creating a function, to harmonise all the dates
def date_harm(date):
#German
date = date.replace('. Januar ', '.1.')
date = date.replace('. Februar ', '.2.')
date = date.replace('. März ', '.3.')
date = date.replace('. April ', '.4.')
date = date.replace(' April ', '.4.')
date = date.replace('. Mai ', '.5.')
date = date.replace('. Juni ', '.6.')
date = date.replace('. Juli ', '.7.')
date = date.replace('. August ', '.8.')
date = date.replace('. September ', '.9.')
date = date.replace('. Oktober ', '.10.')
date = date.replace('. November ', '.11.')
date = date.replace('. Dezember ', '.12.')
#French
date = date.replace(' janvier ', '.1.')
date = date.replace(' février ', '.2.')
date = date.replace(' mars ', '.3.')
date = date.replace(' avril ', '.4.')
date = date.replace(' mai ', '.5.')
date = date.replace(' juin ', '.6.')
date = date.replace(' juillet ', '.7.')
date = date.replace(' août ', '.8.')
date = date.replace(' septembre ', '.9.')
date = date.replace(' octobre ', '.10.')
date = date.replace(' novembre ', '.11.')
date = date.replace(' décembre ', '.12.')
#Italian
date = date.replace(' gennaio ', '.1.')
date = date.replace(' febbraio ', '.2.')
date = date.replace(' marzo ', '.3.')
date = date.replace(' aprile ', '.4.')
date = date.replace(' maggio ', '.5.')
date = date.replace(' giugno ', '.6.')
date = date.replace(' luglio ', '.7.')
date = date.replace(' agosto ', '.8.')
date = date.replace(' settembre ', '.9.')
date = date.replace(' ottobre ', '.10.')
date = date.replace(' novembre ', '.11.')
date = date.replace(' dicembre ', '.12.')
return date
df_judges['date_new'] = df_judges['date'].apply(date_harm)
df_cases['date_new'] = df_cases['date'].apply(date_harm)
/usr/local/lib/python3.5/site-packages/ipykernel/__main__.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy if __name__ == '__main__':
def parse_date(str_date):
try:
return dateutil.parser.parse(str_date)
except:
None
df_cases['date_new'] = df_cases['date_new'].apply(parse_date)
df_judges['date_new'] = df_judges['date_new'].apply(parse_date)
/usr/local/lib/python3.5/site-packages/ipykernel/__main__.py:2: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy from ipykernel import kernelapp as app
df_cases.index = df_cases['date_new']
df_judges.index = df_judges['date_new']
Two dates were entered wrongly. Correcting them.
#Correcting wrongly posted dates.
df_cases['date_new']['2001-09-30'] = '2011-09-30'
df_judges['date_new']['2001-09-30'] = '2011-09-30'
/usr/local/lib/python3.5/site-packages/ipykernel/__main__.py:2: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy from ipykernel import kernelapp as app /usr/local/lib/python3.5/site-packages/ipykernel/__main__.py:3: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy app.launch_new_instance() /usr/local/lib/python3.5/site-packages/IPython/core/interactiveshell.py:2885: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy exec(code_obj, self.user_global_ns, self.user_ns)
Again checking for duplicates
df_cases = df_cases.drop_duplicates(keep='first')
df_judges = df_judges.drop_duplicates(keep='first')
fig, ax = plt.subplots(figsize =(10,5), facecolor='White')
df_cases.resample('M')['aktennummer'].count().plot(ax=ax)
ax.set_title("Urteile Bundesverwaltungsgericht 2007 - September 2016", fontname='DIN Condensed', fontsize=24)
<matplotlib.text.Text at 0x10ff03240>
Harmonising all the decisions.
def decision_harm_auto(x):
try:
gutgeheissen = re.search(r'utgeheissen', x)
gutgeheissen2 = re.search(r'utheissen', x)
gutgeheissen3 = re.search(r'gutzuheissen', x)
admis = re.search(r'admis', x)
accolto = re.search(r'ccolto', x)
accolta = re.search(r'ccolta', x)
joint = re.search(r'Les causes D-3901/2008, D-3902/2008, D-3903/2008, D-3904/2008 et D-3905/2008 sont jointes', x)
annulée = re.search(r'annulée', x)
aufgehoben = re.search('aufgehoben', x)
nicht_eingetreten = re.search('nicht eingetreten', x)
abgeschrieben = re.search('abgeschrieben', x)
gegenstandslos_geworden = re.search('gegenstandslos geworden', x)
abgewiesen = re.search(r'bgewiesen', x)
abgewiesen2 = re.search(r'abge-wiesen', x)
abgewiesen3 = re.search(r'abgwiesen', x)
rejeté = re.search(r'ejet', x)
respinto = re.search(r'espint', x)
irrecevable = re.search(r'irrecevable', x)
#angenommen
if gutgeheissen != None:
x = 'Gutgeheissen'
return x
elif gutgeheissen2 != None:
x = 'Gutgeheissen'
return x
elif gutgeheissen3 != None:
x = 'Gutgeheissen'
return x
elif admis != None:
x = 'Gutgeheissen'
return x
elif accolto != None:
x = 'Gutgeheissen'
return x
elif accolta != None:
x = 'Gutgeheissen'
return x
elif aufgehoben != None:
x = 'Gutgeheissen'
return x
elif joint != None:
x = 'Gutgeheissen'
return x
elif annulée != None:
x = 'Gutgeheissen'
return x
#abgewiesen
elif abgewiesen != None:
x = 'Abgewiesen'
return x
elif rejeté != None:
x = 'Abgewiesen'
return x
elif respinto != None:
x = 'Abgewiesen'
return x
elif irrecevable != None:
x = 'Abgewiesen'
return x
elif nicht_eingetreten != None:
x = 'Abgewiesen'
return x
elif abgewiesen2 != None:
x = 'Abgewiesen'
return x
elif abgewiesen3 != None:
x = 'Abgewiesen'
return x
elif abgeschrieben != None:
x = 'Abgewiesen'
return x
elif gegenstandslos_geworden != None:
x = 'Abgewiesen'
return x
else:
return x
except:
None
df_cases['decision_harm_auto'] = df_cases['decision'].apply(decision_harm_auto)
df_judges['decision_harm_auto'] = df_judges['decision'].apply(decision_harm_auto)
Percentage of cases that weren't considered.
df_cases_non_harm_count = df_cases[df_cases['decision_harm_auto'] != 'Abgewiesen']
df_cases_non_harm_count = df_cases_non_harm_count[df_cases_non_harm_count['decision_harm_auto'] != 'Gutgeheissen']
Weitergezogen_oder_vereinigt = df_cases_non_harm_count['aktennummer'].count()
Prozent_weitergezogen_etc = round((Weitergezogen_oder_vereinigt / df_cases['aktennummer'].count()) * 100, 1)
Prozent_weitergezogen_etc
3.3999999999999999
#Creating new dfs with decision counts
df_gutgeheissen = pd.DataFrame(df_judges[df_judges['decision_harm_auto'] == 'Gutgeheissen']['judge'].value_counts())
df_gutgeheissen = df_gutgeheissen.reset_index()
df_abgewiesen = pd.DataFrame(df_judges[df_judges['decision_harm_auto'] == 'Abgewiesen']['judge'].value_counts())
df_abgewiesen = df_abgewiesen.reset_index()
df_judge_quota = df_gutgeheissen.merge(df_abgewiesen, left_on='index', right_on='index')
df_judge_quota.columns = [['judge', 'gutgeheissen', 'abgewiesen']]
#del df_judge_quota['index']
df_judge_quota['quota'] = round(df_judge_quota['gutgeheissen'] / (df_judge_quota['gutgeheissen'] + df_judge_quota['abgewiesen']) * 100, 1)
Bringing in the parties of the jugdges. This was scraped from the BVGer site. And gathered from documentation from Swiss Parliament.
df_judge_partei = pd.read_csv('data/richter_partei.csv', delimiter=',')
df_judge_quota = df_judge_quota.merge(df_judge_partei, left_on='judge', right_on='Nachname')
df_judge_quota[['judge', 'Partei', 'gutgeheissen', 'abgewiesen', 'quota']].sort_values(by='quota').head(5)
judge | Partei | gutgeheissen | abgewiesen | quota | |
---|---|---|---|---|---|
40 | Wenger | SVP | 33 | 447 | 6.9 |
23 | Haefeli | SVP | 280 | 2553 | 9.9 |
42 | Balmelli | GLP | 14 | 126 | 10.0 |
41 | Brüschweiler | BDP | 30 | 246 | 10.9 |
31 | Willisegger | SVP | 153 | 1175 | 11.5 |
df_judge_quota[['judge', 'Partei', 'gutgeheissen', 'abgewiesen', 'quota']].sort_values(by='quota', ascending=False).head(5)
judge | Partei | gutgeheissen | abgewiesen | quota | |
---|---|---|---|---|---|
19 | Theis | Grüne | 295 | 746 | 28.3 |
34 | Kojic | parteilos | 103 | 264 | 28.1 |
36 | Weber | FDP | 85 | 235 | 26.6 |
0 | Luterbacher | SP | 536 | 1556 | 25.6 |
38 | Dubey | parteilos | 51 | 168 | 23.3 |
df_partei_vergleich = df_judges.merge(df_judge_partei, left_on='judge', right_on='Nachname')
Making sure all cells are stripped
def strip_partei(x):
x = x.strip()
return x
df_partei_vergleich['Partei'] = df_partei_vergleich['Partei'].apply(strip_partei)
df_partei_vergleich['Partei'].value_counts()
SP 15104 SVP 14653 parteilos 14410 FDP 10713 CVP 9516 Grüne 5183 GLP 3170 BDP 280 Name: Partei, dtype: int64
Creating new dfs with decision counts
df_P_gutgeheissen = pd.DataFrame(df_partei_vergleich[df_partei_vergleich['decision_harm_auto'] == 'Gutgeheissen']['Partei'].value_counts())
df_P_gutgeheissen = df_P_gutgeheissen.reset_index()
df_P_abgewiesen = pd.DataFrame(df_partei_vergleich[df_partei_vergleich['decision_harm_auto'] == 'Abgewiesen']['Partei'].value_counts())
df_P_abgewiesen = df_P_abgewiesen.reset_index()
df_P_quota = df_P_gutgeheissen.merge(df_P_abgewiesen, left_on='index', right_on='index')
df_P_quota.columns = [['judge', 'gutgeheissen', 'abgewiesen']]
df_P_quota['quota in %'] = round(df_P_quota['gutgeheissen'] / (df_P_quota['gutgeheissen'] + df_P_quota['abgewiesen']) * 100, 1)
df_P_quota.sort_values(by='quota in %', ascending=True)
judge | gutgeheissen | abgewiesen | quota in % | |
---|---|---|---|---|
7 | BDP | 30 | 246 | 10.9 |
2 | SVP | 1850 | 12305 | 13.1 |
3 | FDP | 1596 | 8742 | 15.4 |
4 | CVP | 1470 | 7752 | 15.9 |
6 | GLP | 496 | 2563 | 16.2 |
1 | parteilos | 2295 | 11566 | 16.6 |
0 | SP | 3036 | 11515 | 20.9 |
5 | Grüne | 1049 | 3943 | 21.0 |
fig, ax = plt.subplots(figsize =(10,5), facecolor='White')
df_cases[df_cases['decision_harm_auto'] == 'Abgewiesen'].resample('M')['aktennummer'].count().plot(ax=ax)
ax.set_title("Urteile Bundesverwaltungsgericht 2007 - , abgewiesene Klagen", fontname='DIN Condensed', fontsize=24)
df_cases[df_cases['decision_harm_auto'] == 'Gutgeheissen'].resample('M')['aktennummer'].count().plot(ax=ax)
ax.set_title("Gutgeheissene vs Abgewiesene Urteile 2007 - 2016", fontname='DIN Condensed', fontsize=24)
plt.savefig('Gutgeheissene vs Abgewiesene Urteile 2007 - 2016.png', transparent=True, bbox_inches='tight')
plt.savefig('Gutgeheissene vs Abgewiesene Urteile 2007 - 2016.pdf', transparent=True, bbox_inches='tight')
df_w = df_partei_vergleich[df_partei_vergleich['Geschlecht'] == 'w']
w_gutgeheissen = df_w[df_w['decision_harm_auto'] == 'Gutgeheissen']['aktennummer'].count()
w_abgewiesen = df_w[df_w['decision_harm_auto'] == 'Abgewiesen']['aktennummer'].count()
Prozent_w_gutgeheissen = w_gutgeheissen / (w_gutgeheissen + w_abgewiesen) * 100
Prozent_w_gutgeheissen
19.034852546916888
df_m = df_partei_vergleich[df_partei_vergleich['Geschlecht'] == 'm']
m_gutgeheissen = df_m[df_m['decision_harm_auto'] == 'Gutgeheissen']['aktennummer'].count()
m_abgewiesen = df_m[df_m['decision_harm_auto'] == 'Abgewiesen']['aktennummer'].count()
Prozent_m_gutgeheissen = m_gutgeheissen / (m_gutgeheissen + m_abgewiesen) * 100
Prozent_m_gutgeheissen
15.759842196104215
#resample documentations:
#http://stackoverflow.com/questions/17001389/pandas-resample-documentation
df_Wenger = df_judges[df_judges['judge'] == 'Wenger']
df_Wenger[df_Wenger['decision_harm_auto'] == 'Abgewiesen'].resample('Q')['aktennummer'].count().plot()
df_Wenger[df_Wenger['decision_harm_auto'] == 'Gutgeheissen'].resample('Q')['aktennummer'].count().plot()
plt.savefig('wenger.png', transparent=True, bbox_inches='tight')
plt.savefig('wenger.pdf', transparent=True, bbox_inches='tight')
df_haefeli = df_judges[df_judges['judge'] == 'Haefeli']
df_haefeli[df_haefeli['decision_harm_auto'] == 'Abgewiesen'].resample('Q')['aktennummer'].count().plot()
df_haefeli[df_haefeli['decision_harm_auto'] == 'Gutgeheissen'].resample('Q')['aktennummer'].count().plot()
plt.savefig('haefeli.png', transparent=True, bbox_inches='tight')
plt.savefig('haefeli.pdf', transparent=True, bbox_inches='tight')
df_theis = df_judges[df_judges['judge'] == 'Theis']
df_theis[df_theis['decision_harm_auto'] == 'Abgewiesen'].resample('Q')['aktennummer'].count().plot()
df_theis[df_theis['decision_harm_auto'] == 'Gutgeheissen'].resample('Q')['aktennummer'].count().plot()
plt.savefig('theis.png', transparent=True, bbox_inches='tight')
plt.savefig('theis.pdf', transparent=True, bbox_inches='tight')