from IPython.display import Image
Image(url='http://i.imgur.com/h5GLIHc.gif')
import pandas as pd
import datetime
from IPython.html import widgets
from IPython.display import display, clear_output
import plotly.plotly as py
from plotly.graph_objs import *
import plotly
from plotly.widgets import GraphWidget
/Users/chris/anaconda/lib/python2.7/site-packages/pandas/computation/expressions.py:21: UserWarning: The installed version of numexpr 2.0.1 is not supported in pandas and will be not be used The minimum supported version is 2.1 "version is 2.1\n".format(ver=ver), UserWarning)
df = pd.read_csv('https://raw.githubusercontent.com/plotly/widgets/master/ipython-examples/311_150k.csv', parse_dates=True, index_col=1)
df = df
df.head()
/Users/chris/anaconda/lib/python2.7/site-packages/pandas/io/parsers.py:1154: DtypeWarning: Columns (8,39,46,47,48) have mixed types. Specify dtype option on import or set low_memory=False.
Unique Key | Closed Date | Agency | Agency Name | Complaint Type | Descriptor | Location Type | Incident Zip | Incident Address | Street Name | ... | Bridge Highway Name | Bridge Highway Direction | Road Ramp | Bridge Highway Segment | Garage Lot Name | Ferry Direction | Ferry Terminal Name | Latitude | Longitude | Location | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Created Date | |||||||||||||||||||||
2014-11-16 23:46:00 | 29300358 | 11/16/2014 11:46:00 PM | DSNY | BCC - Queens East | Derelict Vehicles | 14 Derelict Vehicles | Street | 11432 | 80-25 PARSONS BOULEVARD | PARSONS BOULEVARD | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 40.719411 | -73.808882 | (40.719410639341916, -73.80888158860446) |
2014-11-16 02:24:35 | 29299837 | 11/16/2014 02:24:35 AM | DOB | Department of Buildings | Building/Use | Illegal Conversion Of Residential Building/Space | NaN | 10465 | 938 HUNTINGTON AVENUE | HUNTINGTON AVENUE | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 40.827862 | -73.830641 | (40.827862046105416, -73.83064067165407) |
2014-11-16 02:17:12 | 29297857 | 11/16/2014 02:50:48 AM | NYPD | New York City Police Department | Illegal Parking | Blocked Sidewalk | Street/Sidewalk | 11201 | 229 DUFFIELD STREET | DUFFIELD STREET | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 40.691248 | -73.984375 | (40.69124772858873, -73.98437529459297) |
2014-11-16 02:15:13 | 29294647 | NaN | NYPD | New York City Police Department | Noise - Street/Sidewalk | Loud Music/Party | Street/Sidewalk | 10040 | 128 NAGLE AVENUE | NAGLE AVENUE | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 40.861248 | -73.926308 | (40.861247930170535, -73.92630783362215) |
2014-11-16 02:14:01 | 29300211 | NaN | NYPD | New York City Police Department | Illegal Parking | Commercial Overnight Parking | Street/Sidewalk | 10306 | 625 LINCOLN AVENUE | LINCOLN AVENUE | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 40.570565 | -74.092229 | (40.57056460126485, -74.09222907551542) |
5 rows × 51 columns
grouped = df.resample('24H', how='count')
slider = widgets.IntSliderWidget()
slider.min = 1
slider.value=24
slider.description = 'Window length (hours)'
column_headers_dropdown = widgets.DropdownWidget()
column_headers_dropdown.values = {column: column for column in df.columns}
column_headers_dropdown.value = 'Complaint Type'
column_headers_dropdown.description = 'Select which column to graph (e.g. Tree)'
search_complaints_text_input = widgets.TextWidget()
search_complaints_text_input.description = 'Search complaint types'
graph_time_series_agg = GraphWidget('https://plot.ly/~chris/4103')
graph_time_series_agg.ranges = {'x': [1413648000000, 1416157200000], 'y': [0, 7744.210526315789]}
graph_complaints = GraphWidget('https://plot.ly/~chris/4103')
def replot(on_zoom=False):
search_text = search_complaints_text_input.value
slider_val = slider.value
column_name = column_headers_dropdown.value
# Time window that we're looking at
ranges = graph_time_series_agg.ranges
xr = ranges['x']
x1 = datetime.datetime.fromtimestamp(int(xr[0]/1000.))
x2 = datetime.datetime.fromtimestamp(int(xr[1]/1000.))
# Text value that we're looking at
search_idx = df['Complaint Type'].str.contains(search_complaints_text_input.value).fillna(False)
windowed_idx = search_idx & (df.index>x1) & (df.index<x2)
if not on_zoom:
# Replot the time series aggregate on input change
grouped = df[search_idx].resample('{}H'.format(slider_val), how='count')
graph_time_series_agg.restyle({
'x': [grouped['Complaint Type'].index],
'y': [grouped['Complaint Type']],
'type': 'bar'
})
# Replot the value counts aggregate
vc = df[windowed_idx][column_name].value_counts()
graph_complaints.restyle({'x': [vc.index], 'y': [vc.values], 'type': 'bar'})
def on_trait_change():
replot()
search_complaints_text_input.on_trait_change(on_trait_change, 'value')
column_headers_dropdown.on_trait_change(on_trait_change, 'value')
slider.on_trait_change(on_trait_change, 'value')
def replot_complaints_on_zoom(_, ranges):
graph_time_series_agg.ranges = ranges
replot(True)
graph_time_series_agg.on_zoom(replot_complaints_on_zoom)
display(slider)
display(search_complaints_text_input)
display(column_headers_dropdown)
display(graph_time_series_agg)
display(graph_complaints)
replot()
graph_time_series_agg.relayout({'title': 'Number of complaints over time'})
graph_complaints.relayout({'yaxis.title': 'Number of events'})
# CSS styling within IPython notebook - feel free to re-use
from IPython.core.display import HTML
import urllib2
HTML(urllib2.urlopen('http://bit.ly/1Bf5Hft').read())