#!/usr/bin/env python # coding: utf-8 # # Overview # This notebook demonstrates how to use DataShader to display large datasets inside a plotly `FigureWidget`. Change callbacks are used to recompute the datashader image whenever the axis range or figure size changes # ## Imports # In[1]: # core import io import base64 import time # pandas import pandas as pd # numpy import numpy as np # scikit learn from sklearn import datasets # datashader import datashader as ds import datashader.transfer_functions as tf from datashader.colors import inferno # ## Generate dataset # We will create a large dataset by duplicating the Iris dataset many times with random noise # In[2]: num_copies = 7000 # 1,050,000 rows iris_data = datasets.load_iris() feature_names = [name.replace(' (cm)', '').replace(' ', '_') for name in iris_data.feature_names] iris_df_orig = pd.DataFrame(iris_data.data, columns=feature_names) target_orig = iris_data.target + 1 # frame of features iris_df = pd.concat( np.random.normal(scale=0.2, size=iris_df_orig.shape) + iris_df_orig for i in range(num_copies) ).reset_index(drop=True) # array of targets target = [t for i in range(num_copies) for t in target_orig] # dataframe that includes target as categorical iris_target_df = pd.concat([iris_df, pd.Series(target, name='target', dtype='category')], axis=1) iris_df.describe() # ## Define DataShader image generation function # Define a function that inputs an x/y ranges and the plot width/height and generates a DataShader image of the dataset. The image will be returned as a PIL image object # In[3]: def gen_ds_image(x_range, y_range, plot_width, plot_height): if x_range is None or y_range is None or plot_width is None or plot_height is None: return None cvs = ds.Canvas(x_range=x_range, y_range=y_range, plot_height=plot_height, plot_width=plot_width) agg_scatter = cvs.points(iris_target_df, 'sepal_length', 'sepal_width', ds.count_cat('target')) img = tf.shade(agg_scatter) img = tf.dynspread(img, threshold=0.95, max_px=5, shape='circle') return img.to_pil() # ## Define initial ranges and plot size # In[4]: x_range=[3, 10] y_range=[0, 6] plot_height=500 plot_width=700 # In[5]: # Test image generation function and display the PIL image initial_img = gen_ds_image(x_range, y_range, plot_width, plot_height) # In[6]: initial_img # # Create FigureWidget with background image # In[7]: import plotly.graph_objs as go # In[8]: f = go.FigureWidget(data=[{'x': x_range, 'y': y_range, 'mode': 'markers', 'marker': {'opacity': 0}}], # invisible trace to init axes and to support autoresize layout={'width': plot_width, 'height': plot_height}) f # In[9]: # Set background image f.layout.images = [go.layout.Image( source = initial_img, # plotly now performs auto conversion of PIL image to png data URI xref = "x", yref = "y", x = x_range[0], y = y_range[1], sizex = x_range[1] - x_range[0], sizey = y_range[1] - y_range[0], sizing = "stretch", layer = "below")] # ## Install change callback to update image on zoom/resize # In[10]: def update_ds_image(layout, x_range, y_range, plot_width, plot_height): img = f.layout.images[0] # Update with batch_update so all updates happen simultaneously with f.batch_update(): img.x = x_range[0] img.y = y_range[1] img.sizex = x_range[1] - x_range[0] img.sizey = y_range[1] - y_range[0] img.source = gen_ds_image(x_range, y_range, plot_width, plot_height) # Install callback to run exactly once if one or more of the following properties changes # - xaxis range # - yaxis range # - figure width # - figure height f.layout.on_change(update_ds_image, 'xaxis.range', 'yaxis.range', 'width', 'height') # ## Image updates on drag zoom # In[11]: f.layout.dragmode = 'zoom' f # In[ ]: