In [1]:
import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from IPython.core.display import display, HTML
%matplotlib inline
In [2]:
import folium
from folium.plugins import FastMarkerCluster
In [3]:
pd.options.display.max_columns = None
# display(HTML("<style>.container { width:100% !important; }</style>"))
In [4]:
CSV_PATH = os.path.join('data', 'hacknight_ticket_sample_data_2015.csv')
df = pd.read_csv(CSV_PATH,low_memory=False, parse_dates=['issue_date', 'ticket_queue_date'])
In [5]:
CSV_PATH = os.path.join('data', 'hacknight_sample_data_geocode.csv')
addrs_df = pd.read_csv(CSV_PATH)
addrs_df['address'] = addrs_df['address'] + ', chicago, il'
In [6]:
geocoded_df = pd.merge(left=df, right=addrs_df, how='inner', on='address')
geocoded_df.head()
Out[6]:
ticket_number issue_date violation_location license_plate_number license_plate_state license_plate_type zipcode violation_code violation_description unit unit_description vehicle_make fine_level1_amount fine_level2_amount current_amount_due total_payments ticket_queue ticket_queue_date notice_level hearing_disposition notice_number officer address lat lng
0 9188814621 2015-05-07 13:52:00 2134 S ARCHER AV 7c189a16ef79db9413c1f46b7e5d1712e5c0c1575be352... MI PAS 48103 0964190A EXP. METER NON-CENTRAL BUSINESS DISTRICT 498 DOF BUIC 50 100 0.0 50.0 Paid 2015-05-13 NaN NaN 0 798 2100 s archer av, chicago, il 41.854262 -87.631986
1 9188417037 2015-01-01 21:29:00 2167 S ARCHER AV ac7f6f4be49dfa223571838e586653aa5cdcb6bdfc6385... IL PAS NaN 0964190A EXP. METER NON-CENTRAL BUSINESS DISTRICT 502 DOF MITS 50 100 0.0 50.0 Paid 2015-01-05 NaN NaN 0 1529 2100 s archer av, chicago, il 41.854262 -87.631986
2 9188598734 2015-03-26 13:14:00 2140 S ARCHER AV a493850f62c8ca5a25787271859f0409c289fb84130f33... OH PAS 45409 0976160A REAR AND FRONT PLATE REQUIRED 498 DOF PORS 60 120 146.4 0.0 Notice 2015-04-08 SEIZ NaN 5191566530 780 2100 s archer av, chicago, il 41.854262 -87.631986
3 9189154639 2015-07-24 14:18:00 2172 S ARCHER AV 38a1ef0c0a325c79e9dba69f6e96652fcbf1e9dcb5db3f... IL PAS 606161514 0964190A EXP. METER NON-CENTRAL BUSINESS DISTRICT 498 DOF MERZ 50 100 0.0 0.0 Dismissed 2015-09-14 VIOL Not Liable 5176435360 796 2100 s archer av, chicago, il 41.854262 -87.631986
4 9188041623 2015-01-17 14:45:00 2107 S ARCHER AV e3c39cce6c30735cc977656a463233ea6f516328828e8d... LA PAS 70072 0964190A EXP. METER NON-CENTRAL BUSINESS DISTRICT 502 DOF HOND 50 100 122.0 0.0 Notice 2015-02-04 SEIZ NaN 5181845840 1461 2100 s archer av, chicago, il 41.854262 -87.631986

The map below shows where the most prolific ticketer (Officer #728) wrote their tickets. As you zoom in on the map, the map will automatically show finer detail as to where the tickets were written.

In [7]:
# Which CPD officers wrote at least 50 tickets in 2015
prolific_CPD = geocoded_df.loc[geocoded_df['unit_description'] == 'CPD', 'officer'].value_counts()\
                [geocoded_df.loc[geocoded_df['unit_description'] == 'CPD','officer'].value_counts() >= 50]
In [8]:
chi_lat = 41.8
chi_lon = -87.75
In [9]:
chi_map = folium.Map(location=[chi_lat, chi_lon], zoom_start=10)

# Officer #728 is the officer that wrote the most tickets in 2015
subset_df = geocoded_df[geocoded_df['officer'].isin(prolific_CPD.index.tolist())]

chi_map.add_child(FastMarkerCluster(subset_df[['lat', 'lng']].values.tolist()))
chi_map
Out[9]:
In [10]:
# What violations occurred at least 100 times?
query = geocoded_df['violation_description'].value_counts()
query[(query > 100)]
Out[10]:
EXPIRED PLATES OR TEMPORARY REGISTRATION              8791
STREET CLEANING                                       6273
EXP. METER NON-CENTRAL BUSINESS DISTRICT              5773
NO CITY STICKER VEHICLE UNDER/EQUAL TO 16,000 LBS.    5183
RESIDENTIAL PERMIT PARKING                            4013
PARKING/STANDING PROHIBITED ANYTIME                   3188
REAR AND FRONT PLATE REQUIRED                         2717
EXPIRED METER CENTRAL BUSINESS DISTRICT               2474
NO STANDING/PARKING TIME RESTRICTED                   1793
RUSH HOUR PARKING                                     1593
WITHIN 15' OF FIRE HYDRANT                            1002
PARK OR STAND IN BUS/TAXI/CARRIAGE STAND               876
STOP SIGN OR TRAFFIC SIGNAL                            502
DOUBLE PARKING OR STANDING                             474
TRUCK,RV,BUS, OR TAXI RESIDENTIAL STREET               425
NONCOMPLIANT PLATE(S)                                  406
PARK ALLEY                                             372
PARK OR STAND ON CROSSWALK                             327
DISABLED PARKING ZONE                                  312
BLOCK ACCESS/ALLEY/DRIVEWAY/FIRELANE                   281
STAND, PARK, OR OTHER USE OF BUS LANE                  278
ABANDONED VEH. FOR 7 DAYS OR INOPERABLE                264
WRONG DIRECTION OR 12'' FROM CURB                      212
OBSTRUCT ROADWAY                                       180
SPECIAL EVENTS RESTRICTION                             150
3-7 AM SNOW ROUTE                                      145
TWO HEAD LAMPS REQUIRED VISIBLE 1000'                  145
PARK OR STAND ON SIDEWALK                              125
SAFETY BELTS REQUIRED                                  123
WINDOWS MISSING OR CRACKED BEYOND 6                    118
Name: violation_description, dtype: int64

Where were people ticketed for not having a parking permit?

In [11]:
chi_map = folium.Map(location=[chi_lat, chi_lon], zoom_start=10)

# Where were people ticketed for not having a parking permit?
subset_df = geocoded_df[geocoded_df['violation_description'].isin(['EXPIRED PLATES OR TEMPORARY REGISTRATION'])]

chi_map.add_child(FastMarkerCluster(subset_df[['lat', 'lng']].values.tolist()))
chi_map
Out[11]:
In [12]:
chi_map = folium.Map(location=[chi_lat, chi_lon], zoom_start=10)

# Where were people ticketed for not having a parking permit?
subset_df = geocoded_df[geocoded_df['violation_description'].isin(['EXPIRED PLATES OR TEMPORARY REGISTRATION'])]

chi_map.add_child(FastMarkerCluster(subset_df[['lat', 'lng']].values.tolist()))
chi_map
Out[12]:
In [13]:
# What are the rare cares that were ticketed? (>=5 to eliminate unknown makers, <=50 to get rare cars)
query = geocoded_df['vehicle_make'].value_counts()
query[(query >= 5) & (query <= 50)]
Out[13]:
HUMM    45
RROV    40
FIAT    33
TESL    29
GEO     28
UNKN    28
HARL    22
GENU    20
YAMA    20
KENW    18
OTHR    16
KAWA    15
MASE    14
SMRT    14
HINO    12
TRIU    11
WORH    11
FERR     8
STRG     8
KYMC     7
BENT     7
ASTO     7
EGIL     6
SPNT     6
THMP     5
PETR     5
VESP     5
Name: vehicle_make, dtype: int64
In [14]:
# What are the rare cares that were ticketed? (>=5 to eliminate randos, <=50 to get rare cars)
rare_cars = query[(query >= 5) & (query <= 50)].index.tolist()

Where were rare cars (as defined above) ticketed?

In [15]:
chi_map = folium.Map(location=[chi_lat, chi_lon], zoom_start=10)

# Where were rare cars (as defined above) ticketed?
subset_df = geocoded_df[geocoded_df['vehicle_make'].isin(rare_cars)]

chi_map.add_child(FastMarkerCluster(subset_df[['lat', 'lng']].values.tolist()))
chi_map
Out[15]: