%matplotlib inline import matplotlib.pyplot as plt import numpy as np import psycopg2 import pandas as pd from mpl_toolkits.basemap import Basemap from matplotlib.collections import LineCollection import fiona from shapely.geometry import Point, LineString from shapely.wkt import loads as wkt_load from itertools import izip import math db = psycopg2.connect('dbname=bondis_bahia user=manuel') colors = ['#1f77b4','#ff7f0e','#2ca02c','#d62728','#9467bd','#8c564b','#e377c2','#7f7f7f','#bcbd22','#17becf'] def plot_temporal_profile(weekly_profile, xsize=15): a = np.zeros((7,24)) plt.figure(figsize=(12,24/7.)) ax = plt.gca() plt.xticks(range(0,24)) t = ax.transData canvas = ax.figure.canvas plt.title("Perfil del usuario: %s" % weekly_profile['id_tarjeta'][0]) for dow, hour, trips in zip(weekly_profile['day_of_week'], weekly_profile['hour'] / 100, weekly_profile['trips']): a[dow][int(hour)] = trips text = ax.text(int(hour) - 0.25, dow + 0.1, trips, transform=t, color='black') text.draw(canvas.get_renderer()) plt.imshow(a,interpolation='nearest', cmap='Blues') plot_temporal_profile(pd.read_sql("select * from get_user_temporal_profile(11061406, '1 hour')", db)) plot_temporal_profile(pd.read_sql("select * from get_user_temporal_profile(99019579, '1 hour')", db)) avg_trips_per_hour = pd.read_sql("""with viajes_por_dia_y_hora as ( select date_trunc('day', fechahora) as dia, date_part('hour', fechahora) as hora, sum(pasajeros) as p from omnibus_usuarios where date_part('month', fechahora) = 8 and extract(dow from fechahora) <> 0 and extract(dow from fechahora) <> 6 group by dia, hora) select hora, avg(p) from viajes_por_dia_y_hora group by hora order by hora""", db) avg_trips_per_hour_by_type = pd.read_sql("""select date_trunc('day', fechahora) as dia, date_part('hour', fechahora) as hora, tipo_pasaje, sum(pasajeros) as p from omnibus_usuarios where date_part('month', fechahora) = 8 and extract(dow from fechahora) <> 0 and extract(dow from fechahora) <> 6 group by dia, hora, tipo_pasaje""", db) plt.figure(figsize=(15,15)) plt.subplot2grid((3,2), (0,0), colspan=2) plt.title('Todos los tipos de pasaje') plt.ylim(0,10000) avg_trips_per_hour['avg'].plot(kind='bar', color=colors[0]) tipos_de_pasaje = ('Pesos', 'Frecuente', 'Libre', 'Viaje') for i, t in enumerate(tipos_de_pasaje): plt.subplot2grid((3,2), ((i / 2) + 1,i % 2)) plt.ylim(0,10000) plt.xlim(0,23) plt.title('Tipo de pasaje: %s' % t) avg_trips_per_hour_by_type[avg_trips_per_hour_by_type['tipo_pasaje'] == t].groupby(['hora'])['p'].mean().plot(kind='bar', color=colors[i+1]) plt.savefig('avg_trips.png', format='png') encadenados_cte = """with encadenados as (select o1.fechahora as fechahora1, o1.locacion as locacion1, o1.linea as linea1, o2.fechahora as fechahora2, o2.locacion as locacion2, o2.linea as linea2, st_distance_sphere(o1.locacion, o2.locacion) as distancia from omnibus_usuarios o1 inner join omnibus_usuarios o2 on o2.id_tarjeta = o1.id_tarjeta where abs(extract(epoch from o2.fechahora - o1.fechahora)) < 45*60 -- menos de 45m entre viajes consecutivos and o2.fechahora > o1.fechahora and o1.linea <> o2.linea -- distintas lineas and date_trunc('day', o2.fechahora) = date_trunc('day', o1.fechahora) -- misma fecha order by distancia desc)""" encadenados = pd.read_sql(encadenados_cte + """select linea1, linea2, count(*) as c from encadenados group by linea1, linea2 order by linea1, linea2""", db) lineas = ['500','502','503','504','505','506','507','509','512','513','513E','514','516','517','518','519','519A'] pares = np.zeros((17, 17)) for linea1, linea2, viajes in zip(encadenados['linea1'], encadenados['linea2'], encadenados['c']): pares[lineas.index(linea1)][lineas.index(linea2)] = viajes plt.figure(figsize=(10,10)) plt.xticks(range(0,17), lineas) plt.yticks(range(0,17), lineas) plt.imshow(pares,interpolation='nearest', cmap='Reds') c = plt.colorbar() import itertools def plot_encadenados(linea1, linea2): encadenados_l1_l2 = pd.read_sql(""" with encadenados as (select o1.fechahora as fechahora1, o1.locacion as locacion1, o1.linea as linea1, o2.fechahora as fechahora2, o2.locacion as locacion2, o2.linea as linea2, st_distance_sphere(o1.locacion, o2.locacion) as distancia from omnibus_usuarios o1 inner join omnibus_usuarios o2 on o2.id_tarjeta = o1.id_tarjeta where abs(extract(epoch from o2.fechahora - o1.fechahora)) < 45*60 -- menos de 45m entre viajes consecutivos and o2.fechahora > o1.fechahora and o1.linea = %s and o2.linea = %s -- distintas lineas and date_trunc('day', o2.fechahora) = date_trunc('day', o1.fechahora) -- misma fecha order by distancia desc) select linea1, linea2, date_round(fechahora1, '30 minutes') as fechahora, count(*) as c, st_x(locacion1) as longitud1, st_y(locacion1) as latitud1, st_x(locacion2) as longitud2, st_y(locacion2) as latitud2 from encadenados where linea1 = %s and linea2 = %s and date_part('hour', date_round(fechahora1, '30 minutes')) < 9 group by linea1, linea2, locacion1, locacion2, fechahora """, params=(linea1, linea2, linea1, linea2), con=db) shp = fiona.open('shps/bahia_4326_2.shp') bds = shp.bounds shp.close() extra = 0.01 ll = (bds[0], bds[1]) ur = (bds[2], bds[3]) coords = list(itertools.chain(ll, ur)) w, h = coords[2] - coords[0], coords[3] - coords[1] plt.clf() fig = plt.figure(figsize=(18,18)) ax = fig.add_subplot(111, axisbg='w', frame_on=False) m = Basemap( projection='tmerc', lon_0=-62.2584, lat_0=-38.6879, ellps = 'WGS84', llcrnrlon=coords[0] - extra * w, llcrnrlat=coords[1] - extra + 0.01 * h, urcrnrlon=coords[2] + extra * w, urcrnrlat=coords[3] + extra + 0.01 * h, lat_ts=0, resolution='i', suppress_ticks=True) # transformar puntos a las coordenadas proyectadas del mapa map_points_514 = pd.Series( [Point(m(mapped_x, mapped_y)) for mapped_x, mapped_y in zip(encadenados_l1_l2['longitud1'], encadenados_l1_l2['latitud1'])]) map_points_517 = pd.Series( [Point(m(mapped_x, mapped_y)) for mapped_x, mapped_y in zip(encadenados_l1_l2['longitud2'], encadenados_l1_l2['latitud2'])]) m.scatter( [geom.x for geom in map_points_514], [geom.y for geom in map_points_514], s=[math.sqrt(trips) * 20 for trips in encadenados_l1_l2['c']], marker='o', lw=.55, facecolor='red', edgecolor='w', alpha=1, antialiased=True, zorder=3) m.scatter( [geom.x for geom in map_points_517], [geom.y for geom in map_points_517], s=[math.sqrt(trips) * 15 for trips in encadenados_l1_l2['c']], marker='o', lw=.55, facecolor='blue', edgecolor='w', alpha=1, antialiased=True, zorder=3) x = pd.read_sql("select linea, st_astext(recorrido) as r from omnibus_recorridos where linea = %s or linea = %s", params=(linea1, linea2), con=db) for r,linea in izip([wkt_load(g) for g in x[u'r']], x['linea']): col = LineCollection([[m(*c) for c in r.coords]]) col.set_color((1,0,0,0.5) if linea == linea1 else (0,0,1,0.5)) ax.add_collection(col) d = m.readshapefile( 'shps/bahia_4326_2', 'bahia_4326', color=(0.1,0.1,0.1,0.3), zorder=2) plot_encadenados('517', '514') # plt.savefig('mapa.pdf', format='pdf')