#!/usr/bin/env python
# coding: utf-8
#
Table of Contents
#
# # Jupyter DataTables 0.3.0 - ChartJS
#
#
#
# ## New features
#
# - **ChartJS** charts (see https://github.com/CermakM/jupyter-datatables/issues/9)
# - [x] Create `Bar` graph object
# - [x] Create `CategoricalBar` graph object
# - [x] [optional] Create `Line` graph object
# - [x] [optional] Create `Scatter` graph object
# - [x] Create `Histogram` graph object
# - [x] Create `TimeSeries` graph object Implemented via `Linear` with timeseries index
# - [x] ChartJS graphs are persistent
# - [x] [stretch] There is a link between the table and ChartJS tooltip
#
# - **modular** architecture (see https://github.com/CermakM/jupyter-datatables/issues/10)
# - [x] it is possible to add custom data type mapping form Jupyter Notebook
# - [x] it is possible to map data types to custom plotting function directly from Jupyter Notebook
# - [x] custom graph objects
#
# - intercative **tooltips**
# - static mode is more explanatory
# - sample size includes outliers
# ## Setup
# In[1]:
get_ipython().run_line_magic('load_ext', 'autoreload')
get_ipython().run_line_magic('autoreload', '2')
# In[2]:
import sys
import string
import numpy as np
import pandas as pd
# In[3]:
sys.path.insert(0, '../')
# In[4]:
from jupyter_datatables import init_datatables_mode
# In[5]:
init_datatables_mode()
# ---
# ## Data
# In[6]:
df = pd.DataFrame(np.random.randn(50, 5), columns=list(string.ascii_uppercase[:5]))
df_long = pd.DataFrame(np.random.randn(int(1e5), 5), columns=list(string.ascii_uppercase[:5]))
df_wide = pd.DataFrame(np.random.randn(50, 20), columns=list(string.ascii_uppercase[:20]))
labels = ["{0} - {1}".format(i, i + 9) for i in range(0, 100, 10)]
df_categorical = pd.DataFrame({'value': np.random.randint(0, 100, 20)})
df_categorical['group'] = pd.cut(df_categorical.value, range(0, 105, 10), right=False, labels=labels)
# In[7]:
dft = pd.DataFrame({'A': np.random.rand(5),
'B': [1, 1, 3, 2, 1],
'C': 'This is a very long sentence that should automatically be trimmed',
'D': [pd.Timestamp('20010101'), pd.Timestamp('20010102'), pd.Timestamp('20010103'), pd.Timestamp('20010104'), pd.Timestamp('20010105')],
'E': pd.Series([1.0] * 5).astype('float32'),
'F': [False, True, False, False, True],
})
dft.D = dft.D.apply(pd.to_datetime)
dft.set_index('D', inplace=True)
del dft.index.name
# ---
# ## Representation
# In[8]:
df
# In[9]:
df_long
# Notice the automatic sampling, we sampled to 5,902 samples out of 100,000 while still preserving value of the data!
#
# If you wish, however, to disable that feature, you may do so:
# In[10]:
from jupyter_datatables.config import defaults
defaults.sample_size = 1000
# In[11]:
df_long
# And to allow sampling again simply set `sample_size` to `None`:
# In[12]:
defaults.sample_size = None
# Sampling can also be disabled completely (although it is not recommended). The `defaults.limit` specifies the limit after which, when exceeded, is a sample size computed.
# In[13]:
defaults.limit = None
# Let's take a sampe from the table of size 10,000, otherwise the computation would take a while and will consume quite a lot of resources
# In[14]:
df_long.sample(10000)
# Wide DataTables work as expected:
# In[15]:
df_wide
# ## Support for Indices (including `Date` dtype)
# Lets change the default plot for `num` from `Histogram` to `Line` and check our timeseries-like DataFrame
# In[16]:
get_ipython().run_cell_magic('requirejs', '', "\n$.fn.dataTable.defaults.dTypePlotMap['num'].unshift('Line')\n")
# In[17]:
dft
# ---
# ## Customization
# In[18]:
get_ipython().run_line_magic('load_ext', 'jupyter_require')
# In[19]:
get_ipython().run_cell_magic('requirejs', '', '\nlet defaultElementConfig = $("").html(JSON.stringify(Chart.defaults.global.elements, null, 4))\n\nelement.append(defaultElementConfig)\n')
# Check out [ChartJS](https://www.chartjs.org/docs/latest/general/) docs for more information about default settings
# ---
# ## Custom Graph Objects
# You can create your custom GraphObjects by implementing a function of the following specification:
#
# ```ts
# interface Index {
# data: Array,
# dtype: string
# }
#
# function(data: Array, index: Array, dtype: string): Chart
# ```
# Suppose we wanna plot colours and we want a special kind of plot for that
# In[20]:
get_ipython().run_cell_magic('requirejs', 'chartjs', '\nlet isValidColour = function(colour) {\n let s = new Option().style\n s.color = colour\n \n return s.color !== \'\' || console.debug(`Invalid CSS colour: \'${colour}\'.`)\n}\n\nlet ColorPalette = function(data, index, dtype) {\n const canvas = document.createElement(\'canvas\')\n const ctx = canvas.getContext(\'2d\')\n \n // perform check if the pattern is correct\n if ( !data.every( d => typeof(d) === \'string\' && isValidColour(d) ) ) {\n console.debug("Data does not match colour pattern.")\n return\n }\n \n // evenly slice the Pie chart by number of colours\n const slices = new Array(data.length).fill(Number(1 / data.length).toFixed(2))\n const labels = index[0].data\n \n let chart = new Chart(ctx, {\n type: \'pie\',\n data: {\n labels: labels,\n datasets: [{\n data: slices,\n backgroundColor: data,\n }]\n },\n })\n \n return chart\n}\n\n// Register the new chart\n$.fn.dataTable.defaults.graphObjects[\'ColorPalette\'] = ColorPalette\n')
# And set it as default for the dtype you wanna use it for (in this case `string`):
#
# The default setting is:
#
# ```
# {
# boolean: ['CategoricalBar', 'Histogram'],
# date: ['CategoricalBar', 'Histogram'],
# num: ['Histogram', 'CategoricalBar', 'Bar', 'Line'],
# string: ['CategoricalBar', 'Histogram'],
#
# undefined: ['Bar']
# }
#
# ```
#
# The order specifies fallback plots.
# In[21]:
get_ipython().run_cell_magic('requirejs', '', "\n$.fn.dataTable.defaults.dTypePlotMap['string'].unshift('ColorPalette')\n")
# In[22]:
df_colours = pd.DataFrame([
{
"colour": "red",
"value" : "rgb(255, 99, 132)",
},
{
"colour": "blue",
"value" : "rgb(54, 162, 235)"
},
{
"colour": "lightyellow",
"value" : "rgba(255, 205, 86, 0.3)" # alpha values via `rgba()`
},
{
"colour": "darkorange",
"value" : "darkorange" # any valid CSS specifier
}
])
df_colours.set_index("colour", inplace=True)
# As of v0.3.0, DataTables do not support index names properly
del df_colours.index.name
df_colours
# We fall back to the default chart if the colour value is invalid based on our check and use the second chart in order:
# In[23]:
df_other = pd.DataFrame([
{
"colour": "red",
"value" : "red",
},
{
"colour": "green",
"value" : "invalid",
},
{
"colour": "blue",
"value" : "blue",
},
{
"colour": "other",
"value" : "invalid",
}
])
df_other.set_index("colour", inplace=True)
del df_other.index.name
df_other