from __future__ import division, print_function
import pandas as pd
import sys
import calendar
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.plotly as py
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
import numpy as np
init_notebook_mode(connected=True)
def get_calendar_df():
'''Read the parsed court records from a file'''
df = pd.read_csv('calendars_parsed.csv')
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
df['Title'] = df['Title'].astype(str)
df['day_of_week'] = df['Date'].dt.day_name()
df['month'] = df['Date'].dt.month_name()
df['day_of_month'] = df['Date'].dt.day
df['year'] = df['Date'].dt.year
return df[df.Status == 'Confirmed']
def filter_df(df, date_min=None, date_max=None, day_of_week=None,
day_of_month=None, month=None, year=None,
case_number=None, case_title=None, event=None, court=None):
'''Filter a dataframe based on specified criteria'''
if date_min is not None:
df = df.loc[df['Date'] >= date_min]
if date_max is not None:
df = df.loc[df['Date'] <= date_max]
if day_of_week is not None:
df = df[df['day_of_week'].str.lower().str.contains(day_of_week.lower())]
if day_of_month is not None:
df = df[df['day_of_month'] == day_of_month]
if month is not None:
df = df[df['month'].str.lower().str.contains(month.lower())]
if year is not None:
df = df[df['year'] == year]
if case_number is not None:
df = df[df['Case Number'].str.lower().str.contains(case_number.lower())]
if case_title is not None:
df = df[df['Title'].str.lower().str.contains(case_title.lower())]
if event is not None:
df = df[df['Event'].str.lower().str.contains(event.lower())]
if court is not None:
df = df[df['Court'].str.lower().str.contains(court.lower())]
return df
def number_by_month(df):
'''Return a count of the number of rows of the dataframe by month'''
df_by_month = pd.DataFrame(columns=['Month', 'Count'])
min_year = min(df.year)
max_year = max(df.year)
for year in range(min_year, max_year+1):
for month_number in range(1, 13):
month = calendar.month_name[month_number]
count = rows(filter_df(df, month=month, year=year))
df_by_month = df_by_month.append(
{'Month': month + ' ' + str(year), 'Count': count},
ignore_index=True)
first_nonzero_index = df_by_month.Count.replace(0, np.nan).first_valid_index()
last_nonzero_index = df_by_month.Count.replace(0, np.nan).last_valid_index()
return df_by_month.iloc[first_nonzero_index:last_nonzero_index+1]
def number_by_day_of_week(df):
'''Return a count of the number of rows of the dataframe by day of week'''
df_by_day_of_week = pd.DataFrame(columns=['Day of Week', 'Count'])
for day_number in range(1, 8):
day_name = calendar.day_name[day_number]
count = rows(filter_df(df, day_of_week=day_name))
df_by_day_of_week = df_by_day_of_week.append({'Day of Week': day_name, 'Count': count}, ignore_index=True)
return df_by_day_of_week
def number_by_day_of_month(df):
'''Return a count of the number of rows of the dataframe by day of month'''
df_by_day_of_month = pd.DataFrame(columns=['Day of Month', 'Count'])
for day_number in range(1, 32):
count = rows(filter_df(df, day_of_month=day_number))
df_by_day_of_month = df_by_day_of_month.append({'Day of Month': day_number, 'Count': count}, ignore_index=True)
return df_by_day_of_month
def rows(df):
'''Convenience function to return the number of rows of a dataframe'''
return len(df.index)
def bar_chart(labels, counts, xlabel=None, ylabel=None):
'''Make a bar chart'''
percent = [str(round(100*count/sum(counts),1)) + '%' for count in counts]
data = [go.Bar(
x=labels,
y=counts,
text=percent,
textposition = 'auto',
opacity=0.6
)]
layout = go.Layout(xaxis=dict(title=xlabel), yaxis=dict(title=ylabel))
fig = go.Figure(data=data, layout=layout)
iplot(fig)
def line_plot(labels, counts, xlabel=None, ylabel=None):
'''Make a bar chart'''
data = [go.Scatter(
x=labels,
y=counts
)]
layout = go.Layout(xaxis=dict(title=xlabel),
yaxis=dict(title=ylabel, range=[0, 1.1*max(counts)]))
fig = go.Figure(data=data, layout=layout)
iplot(fig)
df = get_calendar_df()
labels = ['Small Claims', 'Civil Collection', 'Misdemeanors', 'Felonies',
'Family', 'Other']
counts = [0]*6
counts[0] = rows(filter_df(df, case_number='SC'))
counts[1] = rows(filter_df(df, case_number='CC'))
counts[2] = rows(filter_df(df, case_number='CM'))
counts[3] = rows(filter_df(df, case_number='-F1-|-F2-|-F3-|-F4-|-F5-|-F6-'))
counts[4] = rows(filter_df(df, case_number='-AD-|-RS-|-JP-|-DC-|-DR-|-GU-'))
counts[5] = rows(df) - sum(counts[0:5])
bar_chart(labels, counts, ylabel='Number of hearings')
labels = ['Felony 1', 'Felony 2', 'Felony 3', 'Felony 4', 'Felony 5', 'Felony 6', 'Misdemeanor', 'Infraction']
counts = [0]*8
df_criminal = filter_df(df, case_number='-CM-|-IF-|-F1-|-F2-|-F3-|-F4-|-F5-|-F6-')
counts[0] = rows(filter_df(df_criminal, case_number='-F1-'))
counts[1] = rows(filter_df(df_criminal, case_number='-F2-'))
counts[2] = rows(filter_df(df_criminal, case_number='-F3-'))
counts[3] = rows(filter_df(df_criminal, case_number='-F4-'))
counts[4] = rows(filter_df(df_criminal, case_number='-F5-'))
counts[5] = rows(filter_df(df_criminal, case_number='-F6-'))
counts[6] = rows(filter_df(df_criminal, case_number='-CM-'))
counts[7] = rows(filter_df(df_criminal, case_number='-IF-'))
bar_chart(labels, counts, ylabel='Number of hearings')
labels = ['Eviction Hearing', 'Damages Hearing', 'Finance System of Richmond', 'Natco Credit Union', 'Other']
df_sc = filter_df(df, case_number='-SC-')
counts = [0]*5
counts[0] = rows(filter_df(df_sc, event='eviction'))
counts[1] = rows(filter_df(df_sc, event='damage'))
counts[2] = rows(filter_df(df_sc, case_title='Fs V|Finance System'))
counts[3] = rows(filter_df(df_sc, case_title='Natco'))
counts[4] = sum(counts[0:4])
bar_chart(labels, counts, ylabel='Number of hearings')
labels = ['Reid Hospital', 'Damages Hearing', 'Finance System of Richmond',
'Natco Credit Union', 'Creditmax', 'Capital One', 'Other']
df_cc = filter_df(df, case_number='-CC-')
counts = [0]*7
counts[0] = rows(filter_df(df_cc, case_title='Reid Hospital'))
counts[1] = rows(filter_df(df_cc, event='damage'))
counts[2] = rows(filter_df(df_cc, case_title='Fs V|Finance System'))
counts[3] = rows(filter_df(df_cc, case_title='Natco'))
counts[4] = rows(filter_df(df_cc, case_title='Creditmax'))
counts[5] = rows(filter_df(df_cc, case_title='Capital One'))
counts[6] = sum(counts[0:6])
bar_chart(labels, counts, ylabel='Number of hearings')
df_evictions = filter_df(df, event='eviction')
df_evictions_by_month = number_by_month(df_evictions)
line_plot(df_evictions_by_month.Month,
df_evictions_by_month.Count,
xlabel=None,
ylabel='Number of eviction hearings')
df_damages = filter_df(df, event='damages hearing')
df_damages = df_damages[df_damages['Case Number'].isin(df_evictions['Case Number'])]
df_damages_by_month = number_by_month(df_damages)
line_plot(df_damages_by_month.Month,
df_damages_by_month.Count,
xlabel=None,
ylabel='Number of eviction-related damages hearings')
Note that damages hearings from evictions typically occur 30-45 days after a property is vacated.
df_damages_filtered = filter_df(df, event='damages hearing', case_title='KT Property Group LLC|Hoosier Investment LLC')
df_damages_filtered = df_damages_filtered[df_damages_filtered['Case Number'].isin(df_evictions['Case Number'])]
df_damages_by_month_filtered = number_by_month(df_damages_filtered)
line_plot(df_damages_by_month.Month,
df_damages_by_month_filtered.Count/df_damages_by_month.Count,
xlabel=None,
ylabel='Fraction of total due to Anderson Rentals and KT Property Group')
df_felony = filter_df(df, case_number='-F1-|-F2-|-F3-|-F4-|-F5-|-F6-')
#print(df_felony.sort_values('Date'))
df_felonies_by_month = number_by_month(df_felony)
line_plot(df_felonies_by_month.Month,
df_felonies_by_month.Count,
xlabel=None,
ylabel='Number of felony hearings')