#!/usr/bin/env python # coding: utf-8 #

Table of Contents

#
# # Jupyter DataTables 0.3.0 - ChartJS # #
# # ## New features # # - **ChartJS** charts (see https://github.com/CermakM/jupyter-datatables/issues/9) # - [x] Create `Bar` graph object # - [x] Create `CategoricalBar` graph object # - [x] [optional] Create `Line` graph object # - [x] [optional] Create `Scatter` graph object # - [x] Create `Histogram` graph object # - [x] Create `TimeSeries` graph object Implemented via `Linear` with timeseries index # - [x] ChartJS graphs are persistent # - [x] [stretch] There is a link between the table and ChartJS tooltip # # - **modular** architecture (see https://github.com/CermakM/jupyter-datatables/issues/10) # - [x] it is possible to add custom data type mapping form Jupyter Notebook # - [x] it is possible to map data types to custom plotting function directly from Jupyter Notebook # - [x] custom graph objects # # - intercative **tooltips** # - static mode is more explanatory # - sample size includes outliers # ## Setup # In[1]: get_ipython().run_line_magic('load_ext', 'autoreload') get_ipython().run_line_magic('autoreload', '2') # In[2]: import sys import string import numpy as np import pandas as pd # In[3]: sys.path.insert(0, '../') # In[4]: from jupyter_datatables import init_datatables_mode # In[5]: init_datatables_mode() # --- # ## Data # In[6]: df = pd.DataFrame(np.random.randn(50, 5), columns=list(string.ascii_uppercase[:5])) df_long = pd.DataFrame(np.random.randn(int(1e5), 5), columns=list(string.ascii_uppercase[:5])) df_wide = pd.DataFrame(np.random.randn(50, 20), columns=list(string.ascii_uppercase[:20])) labels = ["{0} - {1}".format(i, i + 9) for i in range(0, 100, 10)] df_categorical = pd.DataFrame({'value': np.random.randint(0, 100, 20)}) df_categorical['group'] = pd.cut(df_categorical.value, range(0, 105, 10), right=False, labels=labels) # In[7]: dft = pd.DataFrame({'A': np.random.rand(5), 'B': [1, 1, 3, 2, 1], 'C': 'This is a very long sentence that should automatically be trimmed', 'D': [pd.Timestamp('20010101'), pd.Timestamp('20010102'), pd.Timestamp('20010103'), pd.Timestamp('20010104'), pd.Timestamp('20010105')], 'E': pd.Series([1.0] * 5).astype('float32'), 'F': [False, True, False, False, True], }) dft.D = dft.D.apply(pd.to_datetime) dft.set_index('D', inplace=True) del dft.index.name # --- # ## Representation # In[8]: df # In[9]: df_long # Notice the automatic sampling, we sampled to 5,902 samples out of 100,000 while still preserving value of the data! # # If you wish, however, to disable that feature, you may do so: # In[10]: from jupyter_datatables.config import defaults defaults.sample_size = 1000 # In[11]: df_long # And to allow sampling again simply set `sample_size` to `None`: # In[12]: defaults.sample_size = None # Sampling can also be disabled completely (although it is not recommended). The `defaults.limit` specifies the limit after which, when exceeded, is a sample size computed. # In[13]: defaults.limit = None # Let's take a sampe from the table of size 10,000, otherwise the computation would take a while and will consume quite a lot of resources # In[14]: df_long.sample(10000) # Wide DataTables work as expected: # In[15]: df_wide # ## Support for Indices (including `Date` dtype) # Lets change the default plot for `num` from `Histogram` to `Line` and check our timeseries-like DataFrame # In[16]: get_ipython().run_cell_magic('requirejs', '', "\n$.fn.dataTable.defaults.dTypePlotMap['num'].unshift('Line')\n") # In[17]: dft # --- # ## Customization # In[18]: get_ipython().run_line_magic('load_ext', 'jupyter_require') # In[19]: get_ipython().run_cell_magic('requirejs', '', '\nlet defaultElementConfig = $("
").html(JSON.stringify(Chart.defaults.global.elements, null, 4))\n\nelement.append(defaultElementConfig)\n')


# Check out [ChartJS](https://www.chartjs.org/docs/latest/general/) docs for more information about default settings

# ---

# ## Custom Graph Objects

# You can create your custom GraphObjects by implementing a function of the following specification:
# 
# ```ts
# interface Index {
#     data: Array,
#     dtype: string
# }
# 
# function(data: Array, index: Array, dtype: string): Chart
# ```

# Suppose we wanna plot colours and we want a special kind of plot for that

# In[20]:


get_ipython().run_cell_magic('requirejs', 'chartjs', '\nlet isValidColour = function(colour) {\n    let s = new Option().style\n    s.color = colour\n    \n    return s.color !== \'\' || console.debug(`Invalid CSS colour: \'${colour}\'.`)\n}\n\nlet ColorPalette = function(data, index, dtype) {\n    const canvas = document.createElement(\'canvas\')\n    const ctx    = canvas.getContext(\'2d\')\n    \n    // perform check if the pattern is correct\n    if ( !data.every( d => typeof(d) === \'string\' && isValidColour(d) ) ) {\n        console.debug("Data does not match colour pattern.")\n        return\n    }\n    \n    // evenly slice the Pie chart by number of colours\n    const slices = new Array(data.length).fill(Number(1 / data.length).toFixed(2))\n    const labels = index[0].data\n    \n    let chart = new Chart(ctx, {\n        type: \'pie\',\n        data: {\n            labels: labels,\n            datasets: [{\n                data: slices,\n                backgroundColor: data,\n            }]\n        },\n    })\n    \n    return chart\n}\n\n// Register the new chart\n$.fn.dataTable.defaults.graphObjects[\'ColorPalette\'] = ColorPalette\n')


# And set it as default for the dtype you wanna use it for (in this case `string`):
# 
#     The default setting is:
#    
# ```
#    { 
#        boolean:  ['CategoricalBar', 'Histogram'],
#        date:     ['CategoricalBar', 'Histogram'],
#        num:      ['Histogram', 'CategoricalBar', 'Bar', 'Line'],
#        string:   ['CategoricalBar', 'Histogram'],
# 
#        undefined: ['Bar']
#    }
#     
# ```
# 
#     The order specifies fallback plots.

# In[21]:


get_ipython().run_cell_magic('requirejs', '', "\n$.fn.dataTable.defaults.dTypePlotMap['string'].unshift('ColorPalette')\n")


# In[22]:


df_colours = pd.DataFrame([
    {
        "colour": "red",
        "value" : "rgb(255, 99, 132)",
    },
    {
        "colour": "blue",
        "value" : "rgb(54, 162, 235)"
    },
    {
        "colour": "lightyellow",
        "value" : "rgba(255, 205, 86, 0.3)"  # alpha values via `rgba()`
    },
    {
        "colour": "darkorange",
        "value" : "darkorange"  # any valid CSS specifier
    }
])

df_colours.set_index("colour", inplace=True)

# As of v0.3.0, DataTables do not support index names properly
del df_colours.index.name

df_colours


# We fall back to the default chart if the colour value is invalid based on our check and use the second chart in order:

# In[23]:


df_other = pd.DataFrame([
    {
        "colour": "red",
        "value" : "red",
    },
    {
        "colour": "green",
        "value" : "invalid",
    },
    {
        "colour": "blue",
        "value" : "blue",
    },
    {
        "colour": "other",
        "value" : "invalid",
    }
])

df_other.set_index("colour", inplace=True)

del df_other.index.name

df_other