#!/usr/bin/env python # coding: utf-8 # ## Visualizing Caltrain Ridership - 2016 Weekday Average # # This notebook is contains a set of timetable visualizations built with open data published by Caltrain. You can [view the project](https://github.com/invisiblefunnel/learning/blob/master/2016%20Caltrain%20Weekday%20Ridership%20Timetables.ipynb) on GitHub. # # - [GTFS feed updated on 2017-04-04](http://transitfeeds.com/p/caltrain/122/20170404) # - [2016 Annual Passenger Counts by Train – Weekdays](http://www.caltrain.com/Assets/_Marketing/caltrain/xls/2016/2016+Annual+Passenger+Counts+by+Train+$!e2$!80$!93+Weekdays.xls) # In[1]: from collections import defaultdict from gtfslib.dao import Dao from gtfslib.model import CalendarDate, Route, Trip from gtfslib.utils import fmttime from ipy_table import make_table from itertools import tee import networkx as nx from nxpd import draw import pandas as pd def pairwise(iterable): "s -> (s0,s1), (s1,s2), (s2,s3), ..." a, b = tee(iterable) next(b, None) return zip(a, b) def transpose(list_of_lists): return list(map(list, zip(*list_of_lists))) # In[2]: dao = Dao() # Feed includes service from 2016 dao.load_gtfs("./data/Caltrain-2016-08-18.zip", disable_normalization=True) # In[3]: fltr = ( (CalendarDate.date=="2016-06-08") & (Trip.direction_id=="1") & Route.route_id.in_([ 'Bu-16APR', # Baby Bullet 'Li-16APR', # Limited 'Lo-16APR' # Local ])) # Sort by stop time since all trips begin at the same stop def trip_sort(trip): start = trip.stop_times[0] return start.arrival_time or start.departure_time trips = sorted([t for t in dao.trips(fltr=fltr)], key=trip_sort) # In[4]: patterns = set() for trip in trips: pattern = tuple(st.stop_id for st in trip.stop_times) patterns.add(pattern) # In[5]: G = nx.DiGraph() # Build a pattern graph for pattern in patterns: for a, b in pairwise(pattern): G.add_edge(a, b) # Label nodes with stop names for stop_id, data in G.nodes(data=True): stop = dao.stop(stop_id) data["label"] = stop.stop_name.split("Caltrain")[0].strip() # In[6]: df = pd.read_csv("./data/2016 Average Weekday Passenger Counts by Train.csv") ridership = {} for _, row in df.iterrows(): trip_short_name = str(row["trip_short_name"]) stop_name = row["stop_name"] if trip_short_name not in ridership: ridership[trip_short_name] = {} ridership[trip_short_name][stop_name] = dict(row) # In[7]: # This only works if the patterns form a directed-acyclic graph header = [G.node[stop_id]["label"] for stop_id in nx.topological_sort(G)] # In[8]: draw(G, show='ipynb') # In[9]: def build_trip_row(trip, cell_func): row = [trip.trip_short_name] stop_ridership = ridership[trip.trip_short_name] # assumes one visit per stop stimes_by_stop_name = {G.node[st.stop_id]["label"]: st for st in trip.stop_times} for stop_name in header: if stop_name in stop_ridership and stop_name in stimes_by_stop_name: stime = stimes_by_stop_name[stop_name] riders = stop_ridership[stop_name] value = cell_func(stime, riders) row.append(value or "-") else: row.append("-") return row def build_trip_rows(cell_func): rows = [] for trip in trips: if trip.trip_short_name not in ridership: continue rows.append(build_trip_row(trip, cell_func)) return rows def build_timetable(cell_func): return [[""]+header] + build_trip_rows(cell_func) # ## Timetable # In[10]: def stop_time(stime, riders): return fmttime(stime.arrival_time or stime.departure_time) make_table(transpose(build_timetable(stop_time))) # ## Boardings # In[11]: def boardings(stime, riders): return round(riders["boardings"]) make_table(transpose(build_timetable(boardings))) # ## Alightings # In[12]: def alightings(stime, riders): return round(riders["alightings"]) make_table(transpose(build_timetable(alightings))) # ## Boardings and alightings # In[13]: def boardings_and_alightings(stime, riders): return "+%d/-%d" % (round(riders["boardings"]), round(riders["alightings"])) make_table(transpose(build_timetable(boardings_and_alightings))) # ## Onboard # In[14]: def onboard(stime, riders): return round(riders["onboard"]) make_table(transpose(build_timetable(onboard))) # Did you enjoy this post? Let me know on [twitter](https://twitter.com/invisiblefunnel). Also, Remix is [hiring](https://www.remix.com/jobs).