import holoviews as hv
from holoviews import opts, dim
hv.extension('bokeh')
import pandas as pd
import numpy as np
import itertools
!conda env export --no-builds | grep -v "^prefix: " > ../environments/styling_by_attribute.yml
Consider an example dataframe:
df = pd.DataFrame({
'Time': np.tile(np.arange(100), 9),
'y': np.random.randn(900).cumsum(axis=0),
'gridsize': np.repeat([0, 1, 2, 0, 1, 2, 0, 1, 2], 100),
'turbulence': np.repeat([0, 0, 0, 1, 1, 1, 2, 2, 2], 100)})
df.head()
Time | y | gridsize | turbulence | |
---|---|---|---|---|
0 | 0 | -0.177242 | 0 | 0 |
1 | 1 | -0.971227 | 0 | 0 |
2 | 2 | -0.031994 | 0 | 0 |
3 | 3 | 0.082544 | 0 | 0 |
4 | 4 | 0.242916 | 0 | 0 |
Given the Dataframe df
, how to we display all the y
values as a timeseries, while keeping the information about gridsize
and turbulence
in the visual appearance of each line or marker?
There are several possibilities:
Declare additional kdim in hv.Dataset
, then use .to()
to obtain a HoloMap
.
Declare additional vdim, then use dim(...)
as styling parameter.
Pre-filter pandas.DataFrame
or xarray.Dataset
, then set group=
or label=
while looping through the respective Elements. Afterwards, access the group
s as e.g. opts.Scatter('mygroup')
etc.
Given a number of Elements, one can always provide explicit hv.Cycle
s through the various attributes, e.g.:
styles = itertools.product(['blue', 'orange'], ['<', 'o', 's'])
a, b = zip(*styles)
opts.Scatter(color=hv.Cycle(list(a)), marker=hv.Cycle(list(b)))
Options('Scatter', color=Cycle(['blue', 'blue', 'blue', 'orange', 'orange', 'orange']), marker=Cycle(['<', 'o', 's', '<', 'o', 's']))
A more flexible way that does not assume anything about the order of these Elements is by pushing additional key dimensions inton NdOverlay. If you are unsure about how this works, you should read up on dimensioned containers, a really powerful way of faceting the same data by laying them out next to, on top of, or behind each other (using sliders and dropdown menus), see e.g. here: Dimensioned containers
def cycle_kdim_opts(layout, kdim_opts):
"""
For each given kdim of an Nd holoviews container, create an options dict
that can be passed into a holoviews `opts` object.
Parameters
----------
layout : A holoviews Nd container (HoloMap, ...)
kdim_opts : dict of the form {kdim: {style_option: [alternatives]}}
For an example, see below.
"""
# Output shown for:
# kdim_opts = {
# 'h': {'color': ['orange', 'cyan']},
# 'g': {'size': [30, 10]},
# }
values = {kd.name: list(d) for kd, d in zip(layout.kdims, zip(*layout.data.keys()))}
# print(values)
# {'g': ['a', 'b', 'b'], 'h': ['d', 'c', 'd']}
mapping = {}
for kd, o in kdim_opts.items():
unique_values = list(set(values[kd]))
styles = list(o.values())[0]
mapping[kd] = dict(zip(unique_values, styles))
# print(mapping)
# {'h': {'c': 'orange', 'd': 'cyan'}, 'g': {'b': 30, 'a': 10}}
kdim2style = {k: list(v.keys())[0] for k, v in kdim_opts.items()}
# print(kdim2style)
# {'h': 'color', 'g': 'size'}
mapped_styles = {kdim2style[kd]: hv.Cycle([mapping[kd][value] for value in values])
for kd, values in values.items()}
# print(mapped_styles)
# {'size': Cycle(['10', '30', '30']), 'color': Cycle(['cyan', 'orange', 'cyan'])}
return mapped_styles
df1 = pd.DataFrame({'asset': ['A', 'B', 'B'], 'x': [1.,2.,3.], 'y': [1.,2.,3.]})
df2 = pd.DataFrame({'asset': ['A', 'B', 'B', 'C'], 'x': [1.5,2.5,3.5,4], 'y': [1.,2.,3.,1.]})
df = df1.assign(source='exp').merge(df2.assign(source='mod'), how='outer')
labels = hv.Labels(df.assign(l=df.asset+',\n'+df.source), ['x', 'y'], 'l')
l = hv.Dataset(df, ['x', 'y', 'asset', 'source',], []).to(hv.Points).overlay()
od = {
'source': {'size': [30, 10]},
'asset': {'color': ['orange', 'cyan', 'yellow']},
}
options = (
opts.NdOverlay(legend_position='right', show_legend=True, width=500),
opts.Points(padding=.5, show_title=False, title_format='',
toolbar=None, **cycle_kdim_opts(l, od)),
)
l.opts(*options) * labels
Even shorter is the following way that became possible after writing the original version of this notebook:
df1 = pd.DataFrame({'asset': ['A', 'B', 'B'], 'x': [1.,2.,3.], 'y': [1.,2.,3.]})
df2 = pd.DataFrame({'asset': ['A', 'B', 'B', 'C'], 'x': [1.5,2.5,3.5,4], 'y': [1.,2.,3.,1.]})
df = df1.assign(source='exp').merge(df2.assign(source='mod'), how='outer')
labels = hv.Labels(df.assign(l=df.asset+',\n'+df.source), ['x', 'y'], 'l')
l = hv.Dataset(df, ['x', 'y', 'asset', 'source',], []).to(hv.Points).overlay()
od = {
'source': {'size': [30, 10]},
'asset': {'color': ['orange', 'cyan', 'yellow']},
}
options = (
opts.NdOverlay(legend_position='right', show_legend=True, width=500),
opts.Points(padding=.5, show_title=False, title_format='',
toolbar=None,
size=dim('source').categorize({'exp':30, 'mod': 10}),
color=dim('asset').categorize({'A':'orange', 'B':'cyan', 'C':'yellow'})
),
)
l.opts(*options) * labels
See the following code shared by @philippjfr: https://github.com/pyviz/holoviews/issues/3534.
For this code to work, you need to declare additional vdims
, which you can then access like so:
df = pd.DataFrame({
'Time': np.tile(np.arange(100), 9),
'y': np.random.randn(900).cumsum(axis=0),
'gridsize': np.repeat([0, 1, 2, 0, 1, 2, 0, 1, 2], 100),
'turbulence': np.repeat([0, 0, 0, 1, 1, 1, 2, 2, 2], 100)})
df['label'] = ['Gridsize: %s, Turbulence: %s' % (r.gridsize, r.turbulence) for i, r in df.iterrows()]
curves = hv.Dataset(df, ['Time']).to(hv.Curve, 'Time', ['y', 'gridsize','turbulence'], 'label')
curves.overlay('label').opts(
opts.Curve(color=dim('gridsize').categorize({0: 'red', 1: 'blue', 2: 'green'}),
line_dash=dim('turbulence').categorize({0: 'solid', 1: 'dashed', 2: 'dotdash'})),
opts.NdOverlay(width=900, legend_position='right'))
df1 = pd.DataFrame({'asset': ['A', 'B', 'B'], 'x': [1.,2.,3.], 'y': [1.,2.,3.]})
df2 = pd.DataFrame({'asset': ['A', 'B', 'B', 'C'], 'x': [1.5,2.5,3.5,4], 'y': [1.,2.,3.,1.]})
df = df1.assign(source='exp').merge(df2.assign(source='mod'), how='outer')
labels = hv.Labels(df.assign(l=df.asset+',\n'+df.source), ['x', 'y'], 'l')
l = hv.Points(df, ['x', 'y'], ['asset', 'source',])
options = (
opts.NdOverlay(legend_position='right', show_legend=True, width=500),
opts.Points(padding=.5, show_title=False, show_legend=True,
marker=dim('source').categorize({'exp':'circle', 'mod':'diamond'}),
color=dim('asset').categorize({'A':'orange', 'B':'cyan', 'C':'yellow'}),
size=10, toolbar=None)
)
l.opts(*options) * labels