Notebook

Reports from Court Records¶

In [9]:

from __future__ import division, print_function
import pandas as pd
import sys
import calendar
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.plotly as py
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
import numpy as np
init_notebook_mode(connected=True)


def get_calendar_df():
    '''Read the parsed court records from a file'''
    df = pd.read_csv('calendars_parsed.csv')
    df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
    df['Title'] = df['Title'].astype(str)
    df['day_of_week'] = df['Date'].dt.day_name()
    df['month'] = df['Date'].dt.month_name()
    df['day_of_month'] = df['Date'].dt.day
    df['year'] = df['Date'].dt.year
    return df[df.Status == 'Confirmed']


def filter_df(df, date_min=None, date_max=None, day_of_week=None,
              day_of_month=None, month=None, year=None,
              case_number=None, case_title=None, event=None, court=None):
    '''Filter a dataframe based on specified criteria'''
    if date_min is not None:
        df = df.loc[df['Date'] >= date_min]
    if date_max is not None:
        df = df.loc[df['Date'] <= date_max]
    if day_of_week is not None:
        df = df[df['day_of_week'].str.lower().str.contains(day_of_week.lower())]
    if day_of_month is not None:
        df = df[df['day_of_month'] == day_of_month]
    if month is not None:
        df = df[df['month'].str.lower().str.contains(month.lower())]
    if year is not None:
        df = df[df['year'] == year]
    if case_number is not None:
        df = df[df['Case Number'].str.lower().str.contains(case_number.lower())]
    if case_title is not None:
        df = df[df['Title'].str.lower().str.contains(case_title.lower())]
    if event is not None:
        df = df[df['Event'].str.lower().str.contains(event.lower())]
    if court is not None:
        df = df[df['Court'].str.lower().str.contains(court.lower())]
    return df


def number_by_month(df):
    '''Return a count of the number of rows of the dataframe by month'''
    df_by_month = pd.DataFrame(columns=['Month', 'Count'])
    min_year = min(df.year)
    max_year = max(df.year)
    for year in range(min_year, max_year+1):
        for month_number in range(1, 13):
            month = calendar.month_name[month_number]
            count = rows(filter_df(df, month=month, year=year))
            df_by_month = df_by_month.append(
                {'Month': month + ' ' + str(year), 'Count': count},
                ignore_index=True)
    first_nonzero_index = df_by_month.Count.replace(0, np.nan).first_valid_index()
    last_nonzero_index = df_by_month.Count.replace(0, np.nan).last_valid_index()
    return df_by_month.iloc[first_nonzero_index:last_nonzero_index+1]


def number_by_day_of_week(df):
    '''Return a count of the number of rows of the dataframe by day of week'''
    df_by_day_of_week = pd.DataFrame(columns=['Day of Week', 'Count'])
    for day_number in range(1, 8):
        day_name = calendar.day_name[day_number]
        count = rows(filter_df(df, day_of_week=day_name))
        df_by_day_of_week = df_by_day_of_week.append({'Day of Week': day_name, 'Count': count}, ignore_index=True)
    return df_by_day_of_week


def number_by_day_of_month(df):
    '''Return a count of the number of rows of the dataframe by day of month'''
    df_by_day_of_month = pd.DataFrame(columns=['Day of Month', 'Count'])
    for day_number in range(1, 32):
        count = rows(filter_df(df, day_of_month=day_number))
        df_by_day_of_month = df_by_day_of_month.append({'Day of Month': day_number, 'Count': count}, ignore_index=True)
    return df_by_day_of_month


def rows(df):
    '''Convenience function to return the number of rows of a dataframe'''
    return len(df.index)


def bar_chart(labels, counts, xlabel=None, ylabel=None):
    '''Make a bar chart'''
    percent = [str(round(100*count/sum(counts),1)) + '%' for count in counts]
    data = [go.Bar(
            x=labels,
            y=counts,
            text=percent,
            textposition = 'auto',
            opacity=0.6
        )]
    layout = go.Layout(xaxis=dict(title=xlabel), yaxis=dict(title=ylabel))
    fig = go.Figure(data=data, layout=layout)
    iplot(fig)


def line_plot(labels, counts, xlabel=None, ylabel=None):
    '''Make a bar chart'''
    data = [go.Scatter(
            x=labels,
            y=counts
        )]
    layout = go.Layout(xaxis=dict(title=xlabel),
                       yaxis=dict(title=ylabel, range=[0, 1.1*max(counts)]))
    fig = go.Figure(data=data, layout=layout)
    iplot(fig)


df = get_calendar_df()

Cases broken down by type¶

In [10]:

labels = ['Small Claims', 'Civil Collection', 'Misdemeanors', 'Felonies',
          'Family', 'Other']
counts = [0]*6
counts[0] = rows(filter_df(df, case_number='SC'))
counts[1] = rows(filter_df(df, case_number='CC'))
counts[2] = rows(filter_df(df, case_number='CM'))
counts[3] = rows(filter_df(df, case_number='-F1-|-F2-|-F3-|-F4-|-F5-|-F6-'))
counts[4] = rows(filter_df(df, case_number='-AD-|-RS-|-JP-|-DC-|-DR-|-GU-'))
counts[5] = rows(df) - sum(counts[0:5])
bar_chart(labels, counts, ylabel='Number of hearings')

Criminal cases by type¶

In [11]:

labels = ['Felony 1', 'Felony 2', 'Felony 3', 'Felony 4', 'Felony 5', 'Felony 6', 'Misdemeanor', 'Infraction']
counts = [0]*8
df_criminal = filter_df(df, case_number='-CM-|-IF-|-F1-|-F2-|-F3-|-F4-|-F5-|-F6-')
counts[0] = rows(filter_df(df_criminal, case_number='-F1-'))
counts[1] = rows(filter_df(df_criminal, case_number='-F2-'))
counts[2] = rows(filter_df(df_criminal, case_number='-F3-'))
counts[3] = rows(filter_df(df_criminal, case_number='-F4-'))
counts[4] = rows(filter_df(df_criminal, case_number='-F5-'))
counts[5] = rows(filter_df(df_criminal, case_number='-F6-'))
counts[6] = rows(filter_df(df_criminal, case_number='-CM-'))
counts[7] = rows(filter_df(df_criminal, case_number='-IF-'))
bar_chart(labels, counts, ylabel='Number of hearings')

Small claims cases by type¶

In [12]:

labels = ['Eviction Hearing', 'Damages Hearing', 'Finance System of Richmond', 'Natco Credit Union', 'Other']
df_sc = filter_df(df, case_number='-SC-')
counts = [0]*5

counts[0] = rows(filter_df(df_sc, event='eviction'))
counts[1] = rows(filter_df(df_sc, event='damage'))
counts[2] = rows(filter_df(df_sc, case_title='Fs V|Finance System'))
counts[3] = rows(filter_df(df_sc, case_title='Natco'))
counts[4] = sum(counts[0:4])

bar_chart(labels, counts, ylabel='Number of hearings')

Civil collection cases by type¶

In [13]:

labels = ['Reid Hospital', 'Damages Hearing', 'Finance System of Richmond',
          'Natco Credit Union', 'Creditmax', 'Capital One', 'Other']
df_cc = filter_df(df, case_number='-CC-')
counts = [0]*7

counts[0] = rows(filter_df(df_cc, case_title='Reid Hospital'))
counts[1] = rows(filter_df(df_cc, event='damage'))
counts[2] = rows(filter_df(df_cc, case_title='Fs V|Finance System'))
counts[3] = rows(filter_df(df_cc, case_title='Natco'))
counts[4] = rows(filter_df(df_cc, case_title='Creditmax'))
counts[5] = rows(filter_df(df_cc, case_title='Capital One'))
counts[6] = sum(counts[0:6])

bar_chart(labels, counts, ylabel='Number of hearings')

Eviction hearings as a function of time¶

In [14]:

df_evictions = filter_df(df, event='eviction')
df_evictions_by_month = number_by_month(df_evictions)
line_plot(df_evictions_by_month.Month,
          df_evictions_by_month.Count,
          xlabel=None,
          ylabel='Number of eviction hearings')

In [27]:

df_damages = filter_df(df, event='damages hearing')
df_damages = df_damages[df_damages['Case Number'].isin(df_evictions['Case Number'])]
df_damages_by_month = number_by_month(df_damages)
line_plot(df_damages_by_month.Month,
          df_damages_by_month.Count,
          xlabel=None,
          ylabel='Number of eviction-related damages hearings')

Note that damages hearings from evictions typically occur 30-45 days after a property is vacated.

In [32]:

df_damages_filtered = filter_df(df, event='damages hearing', case_title='KT Property Group LLC|Hoosier Investment LLC')
df_damages_filtered = df_damages_filtered[df_damages_filtered['Case Number'].isin(df_evictions['Case Number'])]
df_damages_by_month_filtered = number_by_month(df_damages_filtered)
line_plot(df_damages_by_month.Month,
          df_damages_by_month_filtered.Count/df_damages_by_month.Count,
          xlabel=None,
          ylabel='Fraction of total due to Anderson Rentals and KT Property Group')

Felonies over time¶

In [8]:

df_felony = filter_df(df, case_number='-F1-|-F2-|-F3-|-F4-|-F5-|-F6-')
#print(df_felony.sort_values('Date'))
df_felonies_by_month = number_by_month(df_felony)
line_plot(df_felonies_by_month.Month,
          df_felonies_by_month.Count,
          xlabel=None,
          ylabel='Number of felony hearings')

Reports from Court Records¶

Cases broken down by type¶

Criminal cases by type¶

Small claims cases by type¶

Civil collection cases by type¶

Eviction hearings as a function of time¶

Eviction-related damages hearings as a function of time for all landlords¶

Fraction of overall eviction-related damages hearings over time initiated by K&R Hoosier Investments (Anderson Rentals) and KT Property Group¶

Felonies over time¶