Exploring NYC 311 Calls¶

Plotly and IPython Widgets¶

In [14]:

from IPython.display import Image
Image(url='http://i.imgur.com/h5GLIHc.gif')

Out[14]:

In [1]:

import pandas as pd
import datetime

from IPython.html import widgets 
from IPython.display import display, clear_output

import plotly.plotly as py
from plotly.graph_objs import *
import plotly
from plotly.widgets import GraphWidget

/Users/chris/anaconda/lib/python2.7/site-packages/pandas/computation/expressions.py:21: UserWarning: The installed version of numexpr 2.0.1 is not supported in pandas and will be not be used
The minimum supported version is 2.1

  "version is 2.1\n".format(ver=ver), UserWarning)

In [2]:

df = pd.read_csv('https://raw.githubusercontent.com/plotly/widgets/master/ipython-examples/311_150k.csv', parse_dates=True, index_col=1)
df = df
df.head()

/Users/chris/anaconda/lib/python2.7/site-packages/pandas/io/parsers.py:1154: DtypeWarning:

Columns (8,39,46,47,48) have mixed types. Specify dtype option on import or set low_memory=False.

Out[2]:

	Unique Key	Closed Date	Agency	Agency Name	Complaint Type	Descriptor	Location Type	Incident Zip	Incident Address	Street Name	...	Bridge Highway Name	Bridge Highway Direction	Road Ramp	Bridge Highway Segment	Garage Lot Name	Ferry Direction	Ferry Terminal Name	Latitude	Longitude	Location
Created Date
2014-11-16 23:46:00	29300358	11/16/2014 11:46:00 PM	DSNY	BCC - Queens East	Derelict Vehicles	14 Derelict Vehicles	Street	11432	80-25 PARSONS BOULEVARD	PARSONS BOULEVARD	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	40.719411	-73.808882	(40.719410639341916, -73.80888158860446)
2014-11-16 02:24:35	29299837	11/16/2014 02:24:35 AM	DOB	Department of Buildings	Building/Use	Illegal Conversion Of Residential Building/Space	NaN	10465	938 HUNTINGTON AVENUE	HUNTINGTON AVENUE	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	40.827862	-73.830641	(40.827862046105416, -73.83064067165407)
2014-11-16 02:17:12	29297857	11/16/2014 02:50:48 AM	NYPD	New York City Police Department	Illegal Parking	Blocked Sidewalk	Street/Sidewalk	11201	229 DUFFIELD STREET	DUFFIELD STREET	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	40.691248	-73.984375	(40.69124772858873, -73.98437529459297)
2014-11-16 02:15:13	29294647	NaN	NYPD	New York City Police Department	Noise - Street/Sidewalk	Loud Music/Party	Street/Sidewalk	10040	128 NAGLE AVENUE	NAGLE AVENUE	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	40.861248	-73.926308	(40.861247930170535, -73.92630783362215)
2014-11-16 02:14:01	29300211	NaN	NYPD	New York City Police Department	Illegal Parking	Commercial Overnight Parking	Street/Sidewalk	10306	625 LINCOLN AVENUE	LINCOLN AVENUE	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	40.570565	-74.092229	(40.57056460126485, -74.09222907551542)

5 rows × 51 columns

In [3]:

grouped = df.resample('24H', how='count')

In [4]:

slider = widgets.IntSliderWidget()
slider.min = 1
slider.value=24
slider.description = 'Window length (hours)'

In [5]:

column_headers_dropdown = widgets.DropdownWidget()
column_headers_dropdown.values  = {column: column for column in df.columns}
column_headers_dropdown.value = 'Complaint Type'
column_headers_dropdown.description = 'Select which column to graph (e.g. Tree)'


search_complaints_text_input = widgets.TextWidget()
search_complaints_text_input.description = 'Search complaint types'

In [6]:

graph_time_series_agg = GraphWidget('https://plot.ly/~chris/4103')
graph_time_series_agg.ranges = {'x': [1413648000000, 1416157200000], 'y': [0, 7744.210526315789]}
graph_complaints = GraphWidget('https://plot.ly/~chris/4103')

In [7]:

def replot(on_zoom=False):
    search_text = search_complaints_text_input.value
    slider_val = slider.value
    column_name = column_headers_dropdown.value

    # Time window that we're looking at
    ranges = graph_time_series_agg.ranges
    xr = ranges['x']
    x1 = datetime.datetime.fromtimestamp(int(xr[0]/1000.))
    x2 = datetime.datetime.fromtimestamp(int(xr[1]/1000.))

    # Text value that we're looking at
    search_idx = df['Complaint Type'].str.contains(search_complaints_text_input.value).fillna(False)
    windowed_idx = search_idx & (df.index>x1) & (df.index<x2)

    if not on_zoom:
        # Replot the time series aggregate on input change
        grouped = df[search_idx].resample('{}H'.format(slider_val), how='count')

        graph_time_series_agg.restyle({
            'x': [grouped['Complaint Type'].index],
            'y': [grouped['Complaint Type']],
            'type': 'bar'
        })

    
    # Replot the value counts aggregate
    vc = df[windowed_idx][column_name].value_counts()
    
    graph_complaints.restyle({'x': [vc.index], 'y': [vc.values], 'type': 'bar'})

def on_trait_change():
    replot()

search_complaints_text_input.on_trait_change(on_trait_change, 'value')
column_headers_dropdown.on_trait_change(on_trait_change, 'value')
slider.on_trait_change(on_trait_change, 'value')

In [8]:

def replot_complaints_on_zoom(_, ranges):
    graph_time_series_agg.ranges = ranges

    replot(True)

graph_time_series_agg.on_zoom(replot_complaints_on_zoom)

In [9]:

display(slider)
display(search_complaints_text_input)
display(column_headers_dropdown)

display(graph_time_series_agg)
display(graph_complaints)

In [10]:

replot()

In [13]:

graph_time_series_agg.relayout({'title': 'Number of complaints over time'})
graph_complaints.relayout({'yaxis.title': 'Number of events'})

In [2]:

# CSS styling within IPython notebook - feel free to re-use
from IPython.core.display import HTML
import urllib2

HTML(urllib2.urlopen('http://bit.ly/1Bf5Hft').read())

Out[2]: