import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from IPython.core.display import display, HTML
%matplotlib inline
import folium
from folium.plugins import FastMarkerCluster
pd.options.display.max_columns = None
# display(HTML("<style>.container { width:100% !important; }</style>"))
CSV_PATH = os.path.join('data', 'hacknight_ticket_sample_data_2015.csv')
df = pd.read_csv(CSV_PATH,low_memory=False, parse_dates=['issue_date', 'ticket_queue_date'])
CSV_PATH = os.path.join('data', 'hacknight_sample_data_geocode.csv')
addrs_df = pd.read_csv(CSV_PATH)
addrs_df['address'] = addrs_df['address'] + ', chicago, il'
geocoded_df = pd.merge(left=df, right=addrs_df, how='inner', on='address')
geocoded_df.head()
ticket_number | issue_date | violation_location | license_plate_number | license_plate_state | license_plate_type | zipcode | violation_code | violation_description | unit | unit_description | vehicle_make | fine_level1_amount | fine_level2_amount | current_amount_due | total_payments | ticket_queue | ticket_queue_date | notice_level | hearing_disposition | notice_number | officer | address | lat | lng | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 9188814621 | 2015-05-07 13:52:00 | 2134 S ARCHER AV | 7c189a16ef79db9413c1f46b7e5d1712e5c0c1575be352... | MI | PAS | 48103 | 0964190A | EXP. METER NON-CENTRAL BUSINESS DISTRICT | 498 | DOF | BUIC | 50 | 100 | 0.0 | 50.0 | Paid | 2015-05-13 | NaN | NaN | 0 | 798 | 2100 s archer av, chicago, il | 41.854262 | -87.631986 |
1 | 9188417037 | 2015-01-01 21:29:00 | 2167 S ARCHER AV | ac7f6f4be49dfa223571838e586653aa5cdcb6bdfc6385... | IL | PAS | NaN | 0964190A | EXP. METER NON-CENTRAL BUSINESS DISTRICT | 502 | DOF | MITS | 50 | 100 | 0.0 | 50.0 | Paid | 2015-01-05 | NaN | NaN | 0 | 1529 | 2100 s archer av, chicago, il | 41.854262 | -87.631986 |
2 | 9188598734 | 2015-03-26 13:14:00 | 2140 S ARCHER AV | a493850f62c8ca5a25787271859f0409c289fb84130f33... | OH | PAS | 45409 | 0976160A | REAR AND FRONT PLATE REQUIRED | 498 | DOF | PORS | 60 | 120 | 146.4 | 0.0 | Notice | 2015-04-08 | SEIZ | NaN | 5191566530 | 780 | 2100 s archer av, chicago, il | 41.854262 | -87.631986 |
3 | 9189154639 | 2015-07-24 14:18:00 | 2172 S ARCHER AV | 38a1ef0c0a325c79e9dba69f6e96652fcbf1e9dcb5db3f... | IL | PAS | 606161514 | 0964190A | EXP. METER NON-CENTRAL BUSINESS DISTRICT | 498 | DOF | MERZ | 50 | 100 | 0.0 | 0.0 | Dismissed | 2015-09-14 | VIOL | Not Liable | 5176435360 | 796 | 2100 s archer av, chicago, il | 41.854262 | -87.631986 |
4 | 9188041623 | 2015-01-17 14:45:00 | 2107 S ARCHER AV | e3c39cce6c30735cc977656a463233ea6f516328828e8d... | LA | PAS | 70072 | 0964190A | EXP. METER NON-CENTRAL BUSINESS DISTRICT | 502 | DOF | HOND | 50 | 100 | 122.0 | 0.0 | Notice | 2015-02-04 | SEIZ | NaN | 5181845840 | 1461 | 2100 s archer av, chicago, il | 41.854262 | -87.631986 |
The map below shows where the most prolific ticketer (Officer #728) wrote their tickets. As you zoom in on the map, the map will automatically show finer detail as to where the tickets were written.
# Which CPD officers wrote at least 50 tickets in 2015
prolific_CPD = geocoded_df.loc[geocoded_df['unit_description'] == 'CPD', 'officer'].value_counts()\
[geocoded_df.loc[geocoded_df['unit_description'] == 'CPD','officer'].value_counts() >= 50]
chi_lat = 41.8
chi_lon = -87.75
chi_map = folium.Map(location=[chi_lat, chi_lon], zoom_start=10)
# Officer #728 is the officer that wrote the most tickets in 2015
subset_df = geocoded_df[geocoded_df['officer'].isin(prolific_CPD.index.tolist())]
chi_map.add_child(FastMarkerCluster(subset_df[['lat', 'lng']].values.tolist()))
chi_map
# What violations occurred at least 100 times?
query = geocoded_df['violation_description'].value_counts()
query[(query > 100)]
EXPIRED PLATES OR TEMPORARY REGISTRATION 8791 STREET CLEANING 6273 EXP. METER NON-CENTRAL BUSINESS DISTRICT 5773 NO CITY STICKER VEHICLE UNDER/EQUAL TO 16,000 LBS. 5183 RESIDENTIAL PERMIT PARKING 4013 PARKING/STANDING PROHIBITED ANYTIME 3188 REAR AND FRONT PLATE REQUIRED 2717 EXPIRED METER CENTRAL BUSINESS DISTRICT 2474 NO STANDING/PARKING TIME RESTRICTED 1793 RUSH HOUR PARKING 1593 WITHIN 15' OF FIRE HYDRANT 1002 PARK OR STAND IN BUS/TAXI/CARRIAGE STAND 876 STOP SIGN OR TRAFFIC SIGNAL 502 DOUBLE PARKING OR STANDING 474 TRUCK,RV,BUS, OR TAXI RESIDENTIAL STREET 425 NONCOMPLIANT PLATE(S) 406 PARK ALLEY 372 PARK OR STAND ON CROSSWALK 327 DISABLED PARKING ZONE 312 BLOCK ACCESS/ALLEY/DRIVEWAY/FIRELANE 281 STAND, PARK, OR OTHER USE OF BUS LANE 278 ABANDONED VEH. FOR 7 DAYS OR INOPERABLE 264 WRONG DIRECTION OR 12'' FROM CURB 212 OBSTRUCT ROADWAY 180 SPECIAL EVENTS RESTRICTION 150 3-7 AM SNOW ROUTE 145 TWO HEAD LAMPS REQUIRED VISIBLE 1000' 145 PARK OR STAND ON SIDEWALK 125 SAFETY BELTS REQUIRED 123 WINDOWS MISSING OR CRACKED BEYOND 6 118 Name: violation_description, dtype: int64
Where were people ticketed for not having a parking permit?
chi_map = folium.Map(location=[chi_lat, chi_lon], zoom_start=10)
# Where were people ticketed for not having a parking permit?
subset_df = geocoded_df[geocoded_df['violation_description'].isin(['EXPIRED PLATES OR TEMPORARY REGISTRATION'])]
chi_map.add_child(FastMarkerCluster(subset_df[['lat', 'lng']].values.tolist()))
chi_map
chi_map = folium.Map(location=[chi_lat, chi_lon], zoom_start=10)
# Where were people ticketed for not having a parking permit?
subset_df = geocoded_df[geocoded_df['violation_description'].isin(['EXPIRED PLATES OR TEMPORARY REGISTRATION'])]
chi_map.add_child(FastMarkerCluster(subset_df[['lat', 'lng']].values.tolist()))
chi_map
# What are the rare cares that were ticketed? (>=5 to eliminate unknown makers, <=50 to get rare cars)
query = geocoded_df['vehicle_make'].value_counts()
query[(query >= 5) & (query <= 50)]
HUMM 45 RROV 40 FIAT 33 TESL 29 GEO 28 UNKN 28 HARL 22 GENU 20 YAMA 20 KENW 18 OTHR 16 KAWA 15 MASE 14 SMRT 14 HINO 12 TRIU 11 WORH 11 FERR 8 STRG 8 KYMC 7 BENT 7 ASTO 7 EGIL 6 SPNT 6 THMP 5 PETR 5 VESP 5 Name: vehicle_make, dtype: int64
# What are the rare cares that were ticketed? (>=5 to eliminate randos, <=50 to get rare cars)
rare_cars = query[(query >= 5) & (query <= 50)].index.tolist()
Where were rare cars (as defined above) ticketed?
chi_map = folium.Map(location=[chi_lat, chi_lon], zoom_start=10)
# Where were rare cars (as defined above) ticketed?
subset_df = geocoded_df[geocoded_df['vehicle_make'].isin(rare_cars)]
chi_map.add_child(FastMarkerCluster(subset_df[['lat', 'lng']].values.tolist()))
chi_map
Where were Ferraris and Maseratis ticketed?
chi_map = folium.Map(location=[chi_lat, chi_lon], zoom_start=10)
# Where were Ferraris and Maseratis ticketed?
subset_df = geocoded_df[geocoded_df['vehicle_make'].isin(['FERR', 'MASE'])]
chi_map.add_child(FastMarkerCluster(subset_df[['lat', 'lng']].values.tolist()))
chi_map
geocoded_df['notice_level'].value_counts()
VIOL 14471 SEIZ 9422 DETR 6245 FINL 4294 DLS 2275 Name: notice_level, dtype: int64
Where were vehicles seized?
chi_map = folium.Map(location=[chi_lat, chi_lon], zoom_start=10)
# Where were vehicles seized?
subset_df = geocoded_df[geocoded_df['notice_level'] == 'SEIZ']
chi_map.add_child(FastMarkerCluster(subset_df[['lat', 'lng']].values.tolist()))
chi_map
query = geocoded_df['license_plate_type'].value_counts()
query[query >= 20]
PAS 42611 TMP 2278 TRK 2201 TXI 560 DLC 213 HCP 202 FFM 182 MCY 158 ENV 97 LIV 94 APP 77 DOM 71 COL 62 PML 46 CBK 44 SOX 38 INT 26 USM 24 BRS 22 PFR 22 Name: license_plate_type, dtype: int64
Where were vehicles with temporary license plates ticketed?
chi_map = folium.Map(location=[chi_lat, chi_lon], zoom_start=10)
# Where were vehicles with temporary license plates ticketed?
subset_df = geocoded_df[geocoded_df['license_plate_type'] == 'TMP']
chi_map.add_child(FastMarkerCluster(subset_df[['lat', 'lng']].values.tolist()))
chi_map
chi_map = folium.Map(location=[chi_lat, chi_lon], zoom_start=10)
# Officer #728 is the officer that wrote the most tickets in 2015
subset_df = geocoded_df[geocoded_df['officer'] == '653']
chi_map.add_child(FastMarkerCluster(subset_df[['lat', 'lng']].values.tolist()))
chi_map
chi_map = folium.Map(location=[chi_lat, chi_lon], zoom_start=10)
# Officer #728 is the officer that wrote the most tickets in 2015
subset_df = geocoded_df[geocoded_df['officer'] == '790']
chi_map.add_child(FastMarkerCluster(subset_df[['lat', 'lng']].values.tolist()))
chi_map