#!/usr/bin/env python # coding: utf-8 # In[1]: import os import pandas as pd import numpy as np import seaborn as sns import matplotlib.pyplot as plt from IPython.core.display import display, HTML get_ipython().run_line_magic('matplotlib', 'inline') # In[2]: import folium from folium.plugins import FastMarkerCluster # In[3]: pd.options.display.max_columns = None # display(HTML("")) # In[4]: CSV_PATH = os.path.join('data', 'hacknight_ticket_sample_data_2015.csv') df = pd.read_csv(CSV_PATH,low_memory=False, parse_dates=['issue_date', 'ticket_queue_date']) # In[5]: CSV_PATH = os.path.join('data', 'hacknight_sample_data_geocode.csv') addrs_df = pd.read_csv(CSV_PATH) addrs_df['address'] = addrs_df['address'] + ', chicago, il' # In[6]: geocoded_df = pd.merge(left=df, right=addrs_df, how='inner', on='address') geocoded_df.head() # The map below shows where the most prolific ticketer (Officer #728) wrote their tickets. As you zoom in on the map, the map will automatically show finer detail as to where the tickets were written. # In[7]: # Which CPD officers wrote at least 50 tickets in 2015 prolific_CPD = geocoded_df.loc[geocoded_df['unit_description'] == 'CPD', 'officer'].value_counts()\ [geocoded_df.loc[geocoded_df['unit_description'] == 'CPD','officer'].value_counts() >= 50] # In[8]: chi_lat = 41.8 chi_lon = -87.75 # In[9]: chi_map = folium.Map(location=[chi_lat, chi_lon], zoom_start=10) # Officer #728 is the officer that wrote the most tickets in 2015 subset_df = geocoded_df[geocoded_df['officer'].isin(prolific_CPD.index.tolist())] chi_map.add_child(FastMarkerCluster(subset_df[['lat', 'lng']].values.tolist())) chi_map # In[10]: # What violations occurred at least 100 times? query = geocoded_df['violation_description'].value_counts() query[(query > 100)] # **Where were people ticketed for not having a parking permit?** # In[11]: chi_map = folium.Map(location=[chi_lat, chi_lon], zoom_start=10) # Where were people ticketed for not having a parking permit? subset_df = geocoded_df[geocoded_df['violation_description'].isin(['EXPIRED PLATES OR TEMPORARY REGISTRATION'])] chi_map.add_child(FastMarkerCluster(subset_df[['lat', 'lng']].values.tolist())) chi_map # In[12]: chi_map = folium.Map(location=[chi_lat, chi_lon], zoom_start=10) # Where were people ticketed for not having a parking permit? subset_df = geocoded_df[geocoded_df['violation_description'].isin(['EXPIRED PLATES OR TEMPORARY REGISTRATION'])] chi_map.add_child(FastMarkerCluster(subset_df[['lat', 'lng']].values.tolist())) chi_map # In[13]: # What are the rare cares that were ticketed? (>=5 to eliminate unknown makers, <=50 to get rare cars) query = geocoded_df['vehicle_make'].value_counts() query[(query >= 5) & (query <= 50)] # In[14]: # What are the rare cares that were ticketed? (>=5 to eliminate randos, <=50 to get rare cars) rare_cars = query[(query >= 5) & (query <= 50)].index.tolist() # **Where were rare cars (as defined above) ticketed?** # In[15]: chi_map = folium.Map(location=[chi_lat, chi_lon], zoom_start=10) # Where were rare cars (as defined above) ticketed? subset_df = geocoded_df[geocoded_df['vehicle_make'].isin(rare_cars)] chi_map.add_child(FastMarkerCluster(subset_df[['lat', 'lng']].values.tolist())) chi_map # **Where were Ferraris and Maseratis ticketed?** # In[16]: chi_map = folium.Map(location=[chi_lat, chi_lon], zoom_start=10) # Where were Ferraris and Maseratis ticketed? subset_df = geocoded_df[geocoded_df['vehicle_make'].isin(['FERR', 'MASE'])] chi_map.add_child(FastMarkerCluster(subset_df[['lat', 'lng']].values.tolist())) chi_map # In[17]: geocoded_df['notice_level'].value_counts() # **Where were vehicles seized?** # In[18]: chi_map = folium.Map(location=[chi_lat, chi_lon], zoom_start=10) # Where were vehicles seized? subset_df = geocoded_df[geocoded_df['notice_level'] == 'SEIZ'] chi_map.add_child(FastMarkerCluster(subset_df[['lat', 'lng']].values.tolist())) chi_map # In[19]: query = geocoded_df['license_plate_type'].value_counts() query[query >= 20] # **Where were vehicles with temporary license plates ticketed?** # In[20]: chi_map = folium.Map(location=[chi_lat, chi_lon], zoom_start=10) # Where were vehicles with temporary license plates ticketed? subset_df = geocoded_df[geocoded_df['license_plate_type'] == 'TMP'] chi_map.add_child(FastMarkerCluster(subset_df[['lat', 'lng']].values.tolist())) chi_map # In[21]: chi_map = folium.Map(location=[chi_lat, chi_lon], zoom_start=10) # Officer #728 is the officer that wrote the most tickets in 2015 subset_df = geocoded_df[geocoded_df['officer'] == '653'] chi_map.add_child(FastMarkerCluster(subset_df[['lat', 'lng']].values.tolist())) chi_map # In[22]: chi_map = folium.Map(location=[chi_lat, chi_lon], zoom_start=10) # Officer #728 is the officer that wrote the most tickets in 2015 subset_df = geocoded_df[geocoded_df['officer'] == '790'] chi_map.add_child(FastMarkerCluster(subset_df[['lat', 'lng']].values.tolist())) chi_map # In[ ]: # In[ ]: