MapD Charting Example with Altair

Let's see if we can replicate this MapD charting example in Python with Altair, Vega Lite, and Vega:

In [1]:
import altair as alt
import jupyterlab_omnisci.vega_ibis
import ibis

alt.data_transformers.enable('ibis')
alt.renderers.enable('ibis')
Out[1]:
RendererRegistry.enable('ibis')
In [2]:
conn = ibis.mapd.connect(
    host='metis.mapd.com', user='mapd', password='HyperInteractive',
    port=443, database='mapd', protocol= 'https'
)
t = conn.table("flights_donotmodify")
In [3]:
t.info()
Table rows: 7009728

Column               Type                                     Non-null #
------               ----                                     ----------
flight_year          int16                                    7009728   
flight_month         int16                                    7009728   
flight_dayofmonth    int16                                    7009728   
flight_dayofweek     int16                                    7009728   
deptime              int16                                    6873482   
crsdeptime           int16                                    7009728   
arrtime              int16                                    6858079   
crsarrtime           int16                                    7009728   
uniquecarrier        String(nullable=True)                    7009728   
flightnum            int16                                    7009728   
tailnum              String(nullable=True)                    6926363   
actualelapsedtime    int16                                    6855029   
crselapsedtime       int16                                    7008884   
airtime              int16                                    6855029   
arrdelay             int16                                    6855029   
depdelay             int16                                    6873482   
origin               String(nullable=True)                    7009728   
dest                 String(nullable=True)                    7009728   
distance             int16                                    7009728   
taxiin               int16                                    6858079   
taxiout              int16                                    6872670   
cancelled            int16                                    7009728   
cancellationcode     String(nullable=True)                    137434    
diverted             int16                                    7009728   
carrierdelay         int16                                    1524735   
weatherdelay         int16                                    1524735   
nasdelay             int16                                    1524735   
securitydelay        int16                                    1524735   
lateaircraftdelay    int16                                    1524735   
dep_timestamp        Timestamp(timezone=None, nullable=True)  6873482   
arr_timestamp        Timestamp(timezone=None, nullable=True)  6858079   
carrier_name         String(nullable=True)                    7009728   
plane_type           String(nullable=True)                    6199619   
plane_manufacturer   String(nullable=True)                    6199619   
plane_issue_date     date                                     6199385   
plane_model          String(nullable=True)                    6199619   
plane_status         String(nullable=True)                    6199619   
plane_aircraft_type  String(nullable=True)                    6199619   
plane_engine_type    String(nullable=True)                    6199619   
plane_year           int16                                    5996236   
origin_name          String(nullable=True)                    7009728   
origin_city          String(nullable=True)                    7009728   
origin_state         String(nullable=True)                    7009728   
origin_country       String(nullable=True)                    7009728   
origin_lat           float32                                  7009728   
origin_lon           float32                                  7009728   
dest_name            String(nullable=True)                    7009728   
dest_city            String(nullable=True)                    7009728   
dest_state           String(nullable=True)                    7009728   
dest_country         String(nullable=True)                    7009728   
dest_lat             float32                                  7009728   
dest_lon             float32                                  7009728   
origin_merc_x        float32                                  7009728   
origin_merc_y        float32                                  7009728   
dest_merc_x          float32                                  7009728   
dest_merc_y          float32                                  7009728   
In [7]:
states = alt.selection_multi(fields=['origin_state'])
airlines = alt.selection_multi(fields=['carrier_name'])

flights_by_state = alt.Chart(
    t[t.origin_state, t.carrier_name],
    title="Total Number of Flights by State"
).transform_filter(
    airlines
).mark_bar().encode(
    x='count()',
    y=alt.Y('origin_state', sort=alt.Sort(encoding='x', order='descending')),
    color=alt.condition(states, alt.ColorValue("steelblue"), alt.ColorValue("grey"))
).add_selection(states).properties(
    height=400,
    width=600
)


carrier_delay = alt.Chart(
    t[t.depdelay, t.arrdelay, t.carrier_name, t.origin_state],
    title="Carrier Departure Delay by Arrival Delay (Minutes)"
).transform_filter(
    states
).transform_aggregate(
    depdelay='mean(depdelay)',
    arrdelay='mean(arrdelay)',
    groupby=["carrier_name"]
).mark_point(filled=True, size=200).encode(
    x='depdelay',
    y='arrdelay',
    color=alt.condition(airlines, alt.ColorValue("steelblue"), alt.ColorValue("grey")),
    tooltip=['carrier_name', 'depdelay', 'arrdelay']
).add_selection(airlines).properties(
    height=400,
    width=600
)


(flights_by_state | carrier_delay)
/usr/local/miniconda3/envs/jupyterlab-omnisci/lib/python3.7/site-packages/altair/utils/core.py:90: UserWarning: I don't know how to infer vegalite type from 'empty'.  Defaulting to nominal.
  "Defaulting to nominal.".format(typ))
Out[7]:
HConcatChart({
  hconcat: [Chart({
    data: Empty DataFrame
    Columns: [origin_state, carrier_name]
    Index: [],
    encoding: FacetedEncoding({
      color: ColorValue({
        condition: ColorValue({
          selection: 'selector001',
          value: 'steelblue'
        }),
        value: 'grey'
      }),
      x: X({
        shorthand: 'count()'
      }),
      y: Y({
        shorthand: 'origin_state',
        sort: Sort({
          encoding: 'x',
          order: 'descending'
        })
      })
    }),
    height: 400,
    mark: 'bar',
    selection: {'selector001': SelectionDef({
      fields: ['origin_state'],
      type: 'multi'
    })},
    title: 'Total Number of Flights by State',
    transform: [FilterTransform({
      filter: {'selection': 'selector002'}
    })],
    width: 600
  }), Chart({
    data: Empty DataFrame
    Columns: [depdelay, arrdelay, carrier_name, origin_state]
    Index: [],
    encoding: FacetedEncoding({
      color: ColorValue({
        condition: ColorValue({
          selection: 'selector002',
          value: 'steelblue'
        }),
        value: 'grey'
      }),
      tooltip: [Tooltip({
        shorthand: 'carrier_name'
      }), Tooltip({
        shorthand: 'depdelay'
      }), Tooltip({
        shorthand: 'arrdelay'
      })],
      x: X({
        shorthand: 'depdelay'
      }),
      y: Y({
        shorthand: 'arrdelay'
      })
    }),
    height: 400,
    mark: MarkDef({
      filled: True,
      size: 200,
      type: 'point'
    }),
    selection: {'selector002': SelectionDef({
      fields: ['carrier_name'],
      type: 'multi'
    })},
    title: 'Carrier Departure Delay by Arrival Delay (Minutes)',
    transform: [FilterTransform({
      filter: {'selection': 'selector001'}
    }), AggregateTransform({
      aggregate: [AggregatedFieldDef({
        as: 'depdelay',
        field: 'depdelay',
        op: 'mean'
      }), AggregatedFieldDef({
        as: 'arrdelay',
        field: 'arrdelay',
        op: 'mean'
      })],
      groupby: ['carrier_name']
    })],
    width: 600
  })]
})
In [ ]:
# jupyterlab_omnisci.vega_ibis.FALLBACK = False

# alt.Chart(t[t.dep_timestamp]).mark_line().encode(
#     x='monthdate(dep_timestamp):O',
#     y='count()'
# )