Overview

This notebook demonstrates how to use DataShader to display large datasets inside a plotly FigureWidget. Change callbacks are used to recompute the datashader image whenever the axis range or figure size changes

Imports

In [1]:
# core
import io
import base64 
import time

# pandas
import pandas as pd

# numpy
import numpy as np

# scikit learn
from sklearn import datasets

# datashader
import datashader as ds
import datashader.transfer_functions as tf
from datashader.colors import inferno

Generate dataset

We will create a large dataset by duplicating the Iris dataset many times with random noise

In [2]:
num_copies = 7000 # 1,050,000 rows

iris_data = datasets.load_iris()
feature_names = [name.replace(' (cm)', '').replace(' ', '_') for name in iris_data.feature_names]
iris_df_orig = pd.DataFrame(iris_data.data, columns=feature_names)
target_orig = iris_data.target + 1

# frame of features
iris_df = pd.concat(
    np.random.normal(scale=0.2, size=iris_df_orig.shape) + iris_df_orig for i in range(num_copies)
).reset_index(drop=True)

# array of targets
target = [t for i in range(num_copies) for t in target_orig]

# dataframe that includes target as categorical
iris_target_df = pd.concat([iris_df, pd.Series(target, name='target', dtype='category')], axis=1)

iris_df.describe()
Out[2]:
sepal_length sepal_width petal_length petal_width
count 1.050000e+06 1.050000e+06 1.050000e+06 1.050000e+06
mean 5.843271e+00 3.053854e+00 3.758803e+00 1.198912e+00
std 8.493378e-01 4.763162e-01 1.769648e+00 7.867648e-01
min 3.559033e+00 1.114488e+00 2.296150e-01 -8.285020e-01
25% 5.155895e+00 2.740263e+00 1.638386e+00 3.924995e-01
50% 5.802342e+00 3.033030e+00 4.306771e+00 1.316363e+00
75% 6.444157e+00 3.345420e+00 5.142291e+00 1.829025e+00
max 8.717877e+00 5.090993e+00 7.581433e+00 3.250024e+00

Define DataShader image generation function

Define a function that inputs an x/y ranges and the plot width/height and generates a DataShader image of the dataset. The image will be returned as a PIL image object

In [3]:
def gen_ds_image(x_range, y_range, plot_width, plot_height):
    if x_range is None or y_range is None or plot_width is None or plot_height is None:
        return None
    
    cvs = ds.Canvas(x_range=x_range, y_range=y_range, plot_height=plot_height, plot_width=plot_width)
    agg_scatter = cvs.points(iris_target_df, 
                         'sepal_length', 'sepal_width', 
                          ds.count_cat('target'))
    img = tf.shade(agg_scatter)
    img = tf.dynspread(img, threshold=0.95, max_px=5, shape='circle')
    
    return img.to_pil()

Define initial ranges and plot size

In [4]:
x_range=[3, 10]
y_range=[0, 6]
plot_height=500
plot_width=700
In [5]:
# Test image generation function and display the PIL image
initial_img = gen_ds_image(x_range, y_range, plot_width, plot_height)
In [6]:
initial_img
Out[6]:

Create FigureWidget with background image

In [7]:
import plotly.graph_objs as go
In [8]:
f = go.FigureWidget(data=[{'x': x_range, 
                           'y': y_range, 
                           'mode': 'markers',
                           'marker': {'opacity': 0}}], # invisible trace to init axes and to support autoresize
                    layout={'width': plot_width, 'height': plot_height})
f
In [9]:
# Set background image
f.layout.images = [go.layout.Image(
    source = initial_img,  # plotly now performs auto conversion of PIL image to png data URI
    xref = "x",
    yref = "y",
    x = x_range[0],
    y = y_range[1],
    sizex = x_range[1] - x_range[0],
    sizey = y_range[1] - y_range[0],
    sizing = "stretch",
    layer = "below")]

Install change callback to update image on zoom/resize

In [10]:
def update_ds_image(layout, x_range, y_range, plot_width, plot_height):
    img = f.layout.images[0]
    
    # Update with batch_update so all updates happen simultaneously
    with f.batch_update():
        img.x = x_range[0]
        img.y = y_range[1]
        img.sizex = x_range[1] - x_range[0]
        img.sizey = y_range[1] - y_range[0]
        img.source = gen_ds_image(x_range, y_range, plot_width, plot_height)

# Install callback to run exactly once if one or more of the following properties changes
#  - xaxis range
#  - yaxis range
#  - figure width
#  - figure height
f.layout.on_change(update_ds_image, 'xaxis.range', 'yaxis.range', 'width', 'height')

Image updates on drag zoom

In [11]:
f.layout.dragmode = 'zoom'
f
In [ ]: