#!/usr/bin/env python
# coding: utf-8

# ## Visualizing Caltrain Ridership - 2016 Weekday Average
# 
# This notebook is contains a set of timetable visualizations built with open data published by Caltrain. You can [view the project](https://github.com/invisiblefunnel/learning/blob/master/2016%20Caltrain%20Weekday%20Ridership%20Timetables.ipynb)  on GitHub.
# 
# - [GTFS feed updated on 2017-04-04](http://transitfeeds.com/p/caltrain/122/20170404)
# - [2016 Annual Passenger Counts by Train – Weekdays](http://www.caltrain.com/Assets/_Marketing/caltrain/xls/2016/2016+Annual+Passenger+Counts+by+Train+$!e2$!80$!93+Weekdays.xls)

# In[1]:


from collections import defaultdict
from gtfslib.dao import Dao
from gtfslib.model import CalendarDate, Route, Trip
from gtfslib.utils import fmttime
from ipy_table import make_table
from itertools import tee
import networkx as nx
from nxpd import draw
import pandas as pd

def pairwise(iterable):
    "s -> (s0,s1), (s1,s2), (s2,s3), ..."
    a, b = tee(iterable)
    next(b, None)
    return zip(a, b)

def transpose(list_of_lists):
    return list(map(list, zip(*list_of_lists)))


# In[2]:


dao = Dao()
# Feed includes service from 2016
dao.load_gtfs("./data/Caltrain-2016-08-18.zip", disable_normalization=True)


# In[3]:


fltr = (
    (CalendarDate.date=="2016-06-08") &
    (Trip.direction_id=="1") &
    Route.route_id.in_([
        'Bu-16APR', # Baby Bullet
        'Li-16APR', # Limited
        'Lo-16APR'  # Local
    ]))

# Sort by stop time since all trips begin at the same stop
def trip_sort(trip):
    start = trip.stop_times[0]
    return start.arrival_time or start.departure_time

trips = sorted([t for t in dao.trips(fltr=fltr)], key=trip_sort)


# In[4]:


patterns = set()

for trip in trips:
    pattern = tuple(st.stop_id for st in trip.stop_times)
    patterns.add(pattern)


# In[5]:


G = nx.DiGraph()

# Build a pattern graph
for pattern in patterns:
    for a, b in pairwise(pattern):
        G.add_edge(a, b)

# Label nodes with stop names
for stop_id, data in G.nodes(data=True):
    stop = dao.stop(stop_id)
    data["label"] = stop.stop_name.split("Caltrain")[0].strip()


# In[6]:


df = pd.read_csv("./data/2016 Average Weekday Passenger Counts by Train.csv")

ridership = {}
for _, row in df.iterrows():
    trip_short_name = str(row["trip_short_name"])
    stop_name = row["stop_name"]
    if trip_short_name not in ridership:
        ridership[trip_short_name] = {}
    ridership[trip_short_name][stop_name] = dict(row)


# In[7]:


# This only works if the patterns form a directed-acyclic graph
header = [G.node[stop_id]["label"] for stop_id in nx.topological_sort(G)]


# In[8]:


draw(G, show='ipynb')


# In[9]:


def build_trip_row(trip, cell_func):
    row = [trip.trip_short_name]
    stop_ridership = ridership[trip.trip_short_name]
    # assumes one visit per stop
    stimes_by_stop_name = {G.node[st.stop_id]["label"]: st for st in trip.stop_times}
    for stop_name in header:
        if stop_name in stop_ridership and stop_name in stimes_by_stop_name:
            stime = stimes_by_stop_name[stop_name]
            riders = stop_ridership[stop_name]
            value = cell_func(stime, riders)
            row.append(value or "-")
        else:
            row.append("-")
    return row

def build_trip_rows(cell_func):
    rows = []
    for trip in trips:
        if trip.trip_short_name not in ridership:
            continue
        rows.append(build_trip_row(trip, cell_func))
    return rows

def build_timetable(cell_func):
    return [[""]+header] + build_trip_rows(cell_func)


# ## Timetable

# In[10]:


def stop_time(stime, riders):
    return fmttime(stime.arrival_time or stime.departure_time)

make_table(transpose(build_timetable(stop_time)))


# ## Boardings

# In[11]:


def boardings(stime, riders): return round(riders["boardings"])

make_table(transpose(build_timetable(boardings)))


# ## Alightings

# In[12]:


def alightings(stime, riders): return round(riders["alightings"])

make_table(transpose(build_timetable(alightings)))


# ## Boardings and alightings

# In[13]:


def boardings_and_alightings(stime, riders):
    return "+%d/-%d" % (round(riders["boardings"]), round(riders["alightings"]))

make_table(transpose(build_timetable(boardings_and_alightings)))


# ## Onboard

# In[14]:


def onboard(stime, riders): return round(riders["onboard"])

make_table(transpose(build_timetable(onboard)))


# Did you enjoy this post? Let me know on [twitter](https://twitter.com/invisiblefunnel). Also, Remix is [hiring](https://www.remix.com/jobs).