Visualizing Multipore Zeolite Framework Classifications

In [1]:
import math
import pandas as pd
pd.options.display.max_rows = 240

from bokeh.io import output_notebook
from bokeh.plotting import figure, show, output_file
from bokeh.charts import Bar
from bokeh.charts.attributes import cat
from bokeh.models import HoverTool, ColumnDataSource, FixedTicker
from bokeh.palettes import Category10, Set1, Spectral, Set3, Viridis

output_notebook()
Loading BokehJS ...
In [2]:
from zefram.utils import get_framework_data
In [3]:
df = get_framework_data()

df.rename(columns={'lcd': 'Largest cavity dia.', 'pld': 'Pore limiting dia.',
                   'maxdsi': 'Max dia. of inc. sphere'}, inplace=True)

prepare the data

In [4]:
def number_of_relevant_rings(rings):
    'get the number of rings 6 < R < 13, +1 if there are any extra large rings'

    s = sorted(i for i in rings if (13 > i > 6))
    xl = sorted(i for i in rings if i > 12)
    lenxl = 0
    if len(xl) >= 1:
        lenxl = 1
    return len(s) + lenxl

def ring_colors(rings):
    'assign colors to specific ring sizes'

    colors = {7 + i: Viridis[6][i] for i in range(6)}   
    s = sorted(i for i in rings if (13 > i > 6))
    out = [colors[r] for r in s]
    
    # add a grey color if there is an extra large ring
    if any(r > 12 for r in rings):
        out.append('#696969')
    
    if len(out) > 0:
        return out

df.loc[:, 'no_rings'] = df.rings.apply(number_of_relevant_rings)
df.loc[:, 'ring_colors'] = df.rings.apply(ring_colors)

Plot with circle fractions

In [5]:
import numpy as np
from sklearn.utils.extmath import cartesian
    
def show_plot_arcs(dataframe, width=750, height=300,radius=0.4, alpha=0.6, title=None, showit=True):

    df = dataframe.copy()
    
    ncols = 15
    nitems = df.shape[0]

    if nitems % ncols == 0:
        nrows = nitems / ncols
    else:
        nrows = nitems // ncols + 1

    xy = cartesian([np.arange(nrows), np.arange(ncols)])

    df.loc[:, 'x'] = xy[:nitems, 1]
    df.loc[:, 'y'] = xy[:nitems, 0]

    p = figure(title=title,
                x_range = (df.x.min() - 0.5, df.x.max() + 0.5),
                y_range = (df.y.max() + 0.5, df.y.min() - 0.5),
                plot_width=width, plot_height=height, tools='save',
                toolbar_location="above")

    
    dmask = df['channel_dim'] > 1
    # empty circles
    x = df.loc[~dmask, 'x'].values
    y = df.loc[~dmask, 'y'].values
    p.circle(x, y, radius=radius, color='#ffffff', fill_alpha=1.0, line_color='#888888')
    
    # full circles
    mask1 = dmask & (df['no_rings'] == 1)
    x = df.loc[mask1, 'x'].values
    y = df.loc[mask1, 'y'].values
    
    c = list(list(zip(*df.loc[mask1, 'ring_colors'].values.tolist()))[0])
    p.circle(x, y, radius=radius, color=c, fill_alpha=alpha, line_color='#888888')
    
    # half circles/wedges
    mask2 = dmask & (df['no_rings'] == 2)
    x = df.loc[mask2, 'x'].values
    y = df.loc[mask2, 'y'].values
    c1, c2 = list(zip(*df.loc[mask2, 'ring_colors'].values.tolist()))
    p.wedge(x, y, radius=radius, start_angle=math.pi/4, end_angle=5*math.pi/4,
            color=list(c1), alpha=alpha, direction="anticlock")
    p.wedge(x, y, radius=radius, start_angle=5*math.pi/4, end_angle=math.pi/4,
            color=list(c2), alpha=alpha, direction="anticlock")
    
    # third circles/wedges
    mask3 = dmask & (df['no_rings'] == 3)
    x = df.loc[mask3, 'x'].values
    y = df.loc[mask3, 'y'].values
    c1, c2, c3 = list(zip(*df.loc[mask3, 'ring_colors'].values.tolist()))
    p.wedge(x, y, radius=radius, start_angle=math.pi/2, end_angle=7*math.pi/6,
            color=list(c1), alpha=alpha, direction="anticlock")
    p.wedge(x, y, radius=radius, start_angle=7*math.pi/6, end_angle=11*math.pi/6,
            color=list(c2), alpha=alpha, direction="anticlock")
    p.wedge(x, y, radius=radius, start_angle=11*math.pi/6, end_angle=math.pi/2,
            color=list(c3), alpha=alpha, direction="anticlock")
    
    # outer line
    outer_colors = {'2d': Set1[5][4], '3d': Set1[5][0]}

    d2mask = df['channel_dim'] == 2
    x = df.loc[d2mask, 'x'].values
    y = df.loc[d2mask, 'y'].values
    p.circle(x, y, radius=radius + 0.02, color='#ffffff', fill_alpha=0.0,
             line_color=outer_colors['2d'], line_width=5.0, line_alpha=alpha)

    d3mask = df['channel_dim'] == 3
    x = df.loc[d3mask, 'x'].values
    y = df.loc[d3mask, 'y'].values
    p.circle(x, y, radius=radius + 0.02, color='#ffffff', fill_alpha=0.0,
             line_color=outer_colors['3d'], line_width=5.0, line_alpha=alpha)
    
    # legend
    maxx = df['x'].max()
    maxy = df['y'].max()
    p.circle([x + 5.5 for x in range(7)], [maxy] * 7,
             color=Viridis[6] + ['#696969'], radius=0.1, alpha=alpha)
    p.circle([12.5, 13.5], [maxy] * 2,
             color='#ffffff', radius=0.15, line_color=[outer_colors['2d'], outer_colors['3d']],
             line_width=5.0, line_alpha=alpha)
    p.text([x + 5.7 for x in range(9)], [maxy] * 9,
           text=['7', '8', '9', '10', '11', '12', 'XL', '2D', '3D'],
           text_alpha=0.7, text_align='left', text_baseline='middle')

    text_props = {
            "text_font": "times",
            "text_font_style": "bold",
            "text_font_size": "13pt",
            "angle": 0,
            "color": "black",
            "text_alpha": 0.7,
            "text_align": "center",
            "text_baseline": "middle"
        }

    p.text(x=df.x.values, y=df.y.values, text=df.code.values, **text_props)
    
    p.grid.grid_line_color = None

    p.axis[0].ticker.num_minor_ticks = 0
    p.axis[1].ticker.num_minor_ticks = 0
    p.axis.major_label_text_font_size = '0pt'
    p.axis.major_tick_line_color = None

    p.border_fill_color = 'white'
    p.outline_line_width = 0
    p.axis.visible = False

    if showit:
        show(p)
    else:
        return p
In [7]:
show_plot_arcs(df, width=900, height=900, radius=0.38, alpha=0.7)

Multipore zeolites

In [60]:
import numpy as np
from sklearn.utils.extmath import cartesian
    
def has_multipores(rings):

    s = sorted(i for i in rings if (13 > i > 6))
    xl = sorted(i for i in rings if i > 12)

    if len(s) + len(xl) >= 2:
        return True
    else:
        return False
    
def show_plot_arcs_multipore(dataframe, width=750, height=300, radius=0.4, alpha=0.6, title=None, showit=True):

    df = dataframe.copy()
    
    ncols = 15
    nitems = df.shape[0]

    if nitems % ncols == 0:
        nrows = nitems / ncols
    else:
        nrows = nitems // ncols + 1

    xy = cartesian([np.arange(nrows), np.arange(ncols)])

    df.loc[:, 'x'] = xy[:nitems, 1]
    df.loc[:, 'y'] = xy[:nitems, 0]

    p = figure(title=title,
                x_range = (df.x.min() - 0.5, df.x.max() + 0.5),
                y_range = (df.y.max() + 0.5, df.y.min() - 0.5),
                plot_width=width, plot_height=height, tools='save',
                toolbar_location="above")

    
    mmask = (df['channel_dim'] > 1) & df.rings.apply(has_multipores)
    # empty circles
    x = df.loc[~mmask, 'x'].values
    y = df.loc[~mmask, 'y'].values
    p.circle(x, y, radius=radius, color='#ffffff', fill_alpha=1.0, line_color='#888888')

    # full circles special case or the IRY framework
    mask1 = (df['no_rings'] == 1) & mmask
    x = df.loc[mask1, 'x'].values
    y = df.loc[mask1, 'y'].values
    c = list(list(zip(*df.loc[mask1, 'ring_colors'].values.tolist()))[0])
    p.circle(x, y, radius=radius, color=c, fill_alpha=alpha, line_color='#888888')
    
    # half circles/wedges
    mask2 = mmask & (df['no_rings'] == 2)
    x = df.loc[mask2, 'x'].values
    y = df.loc[mask2, 'y'].values
    c1, c2 = list(zip(*df.loc[mask2, 'ring_colors'].values.tolist()))
    p.wedge(x, y, radius=radius, start_angle=math.pi/4, end_angle=5*math.pi/4,
            color=list(c1), alpha=alpha, direction="anticlock")
    p.wedge(x, y, radius=radius, start_angle=5*math.pi/4, end_angle=math.pi/4,
            color=list(c2), alpha=alpha, direction="anticlock")
    
    # third circles/wedges
    mask3 = mmask & (df['no_rings'] == 3)
    x = df.loc[mask3, 'x'].values
    y = df.loc[mask3, 'y'].values
    c1, c2, c3 = list(zip(*df.loc[mask3, 'ring_colors'].values.tolist()))
    p.wedge(x, y, radius=radius, start_angle=math.pi/2, end_angle=7*math.pi/6,
            color=list(c1), alpha=alpha, direction="anticlock")
    p.wedge(x, y, radius=radius, start_angle=7*math.pi/6, end_angle=11*math.pi/6,
            color=list(c2), alpha=alpha, direction="anticlock")
    p.wedge(x, y, radius=radius, start_angle=11*math.pi/6, end_angle=math.pi/2,
            color=list(c3), alpha=alpha, direction="anticlock")
    
    # outer line
    outer_colors = {'2d': Set1[5][4], '3d': Set1[5][0]}

    d2mask = (df['channel_dim'] == 2) & mmask
    x = df.loc[d2mask, 'x'].values
    y = df.loc[d2mask, 'y'].values
    p.circle(x, y, radius=radius + 0.02, color='#ffffff', fill_alpha=0.0,
             line_color=outer_colors['2d'], line_width=5.0, line_alpha=alpha)

    d3mask = (df['channel_dim'] == 3) & mmask
    x = df.loc[d3mask, 'x'].values
    y = df.loc[d3mask, 'y'].values
    p.circle(x, y, radius=radius + 0.02, color='#ffffff', fill_alpha=0.0,
             line_color=outer_colors['3d'], line_width=5.0, line_alpha=alpha)
    
    # legend
    maxx = df['x'].max()
    maxy = df['y'].max()
    p.circle([x + 5.5 for x in range(7)], [maxy] * 7,
             color=Viridis[6] + ['#696969'], radius=0.1, alpha=alpha)
    p.circle([12.5, 13.5], [maxy] * 2,
             color='#ffffff', radius=0.15, line_color=[outer_colors['2d'], outer_colors['3d']],
             line_width=5.0, line_alpha=alpha)
    p.text([x + 5.7 for x in range(9)], [maxy] * 9,
           text=['7', '8', '9', '10', '11', '12', 'XL', '2D', '3D'],
           text_alpha=0.7, text_align='left', text_baseline='middle')

    text_props = {
            "text_font": "times",
            "text_font_style": "bold",
            "text_font_size": "13pt",
            "angle": 0,
            "color": "black",
            "text_alpha": 0.7,
            "text_align": "center",
            "text_baseline": "middle"
        }

    p.text(x=df.x.values, y=df.y.values, text=df.code.values, **text_props)
    
    p.grid.grid_line_color = None

    p.axis[0].ticker.num_minor_ticks = 0
    p.axis[1].ticker.num_minor_ticks = 0
    p.axis.major_label_text_font_size = '0pt'
    p.axis.major_tick_line_color = None

    p.border_fill_color = 'white'
    p.outline_line_width = 0
    p.axis.visible = False

    if showit:
        show(p)
    else:
        return p
In [61]:
show_plot_arcs_multipore(df, width=900, height=900, radius=0.38, alpha=0.7)
In [52]:
def show_plot_arcs_multipore_only(dataframe, width=750, height=300, radius=0.4, alpha=0.6, title=None, showit=True):

    df = dataframe.copy()
    
    ncols = 9
    nitems = df.shape[0]

    if nitems % ncols == 0:
        nrows = nitems / ncols
    else:
        nrows = nitems // ncols + 1

    xy = cartesian([np.arange(nrows), np.arange(ncols)])

    df.loc[:, 'x'] = xy[:nitems, 1]
    df.loc[:, 'y'] = xy[:nitems, 0]

    p = figure(title=title,
                x_range = (df.x.min() - 0.5, df.x.max() + 0.5),
                y_range = (df.y.max() + 1.5, df.y.min() - 0.5),
                plot_width=width, plot_height=height, tools='save',
                toolbar_location="above")

    # full circles special case or the IRY framework
    mask1 = (df['no_rings'] == 1)
    x = df.loc[mask1, 'x'].values
    y = df.loc[mask1, 'y'].values
    c = list(list(zip(*df.loc[mask1, 'ring_colors'].values.tolist()))[0])
    p.circle(x, y, radius=radius, color=c, fill_alpha=alpha, line_color='#888888')
    
    # half circles/wedges
    mask2 = (df['no_rings'] == 2)
    x = df.loc[mask2, 'x'].values
    y = df.loc[mask2, 'y'].values
    c1, c2 = list(zip(*df.loc[mask2, 'ring_colors'].values.tolist()))
    p.wedge(x, y, radius=radius, start_angle=math.pi/4, end_angle=5*math.pi/4,
            color=list(c1), alpha=alpha, direction="anticlock")
    p.wedge(x, y, radius=radius, start_angle=5*math.pi/4, end_angle=math.pi/4,
            color=list(c2), alpha=alpha, direction="anticlock")
    
    # third circles/wedges
    mask3 = (df['no_rings'] == 3)
    x = df.loc[mask3, 'x'].values
    y = df.loc[mask3, 'y'].values
    c1, c2, c3 = list(zip(*df.loc[mask3, 'ring_colors'].values.tolist()))
    p.wedge(x, y, radius=radius, start_angle=math.pi/2, end_angle=7*math.pi/6,
            color=list(c1), alpha=alpha, direction="anticlock")
    p.wedge(x, y, radius=radius, start_angle=7*math.pi/6, end_angle=11*math.pi/6,
            color=list(c2), alpha=alpha, direction="anticlock")
    p.wedge(x, y, radius=radius, start_angle=11*math.pi/6, end_angle=math.pi/2,
            color=list(c3), alpha=alpha, direction="anticlock")
    
    # outer line
    outer_colors = {'2d': Set1[5][4], '3d': Set1[5][0]}

    #m23mask = (df['no_rings'] == 2) | (df['no_rings'] == 3)
    d2mask = (df['channel_dim'] == 2)# & m23mask
    x = df.loc[d2mask, 'x'].values
    y = df.loc[d2mask, 'y'].values
    p.circle(x, y, radius=radius + 0.03, color='#ffffff', fill_alpha=0.0,
             line_color=outer_colors['2d'], line_width=7.0, line_alpha=alpha)

    d3mask = (df['channel_dim'] == 3)# & m23mask
    x = df.loc[d3mask, 'x'].values
    y = df.loc[d3mask, 'y'].values
    p.circle(x, y, radius=radius + 0.03, color='#ffffff', fill_alpha=0.0,
             line_color=outer_colors['3d'], line_width=7.0, line_alpha=alpha)
    
    # legend
    maxx = df['x'].max()
    maxy = df['y'].max()
    p.circle([x  for x in range(7)], [maxy + 1] * 7,
             color=Viridis[6] + ['#696969'], radius=0.1, alpha=alpha)
    p.circle([7.0, 8.0], [maxy + 1] * 2,
             color='#ffffff', radius=0.15, line_color=[outer_colors['2d'], outer_colors['3d']],
             line_width=5.0, line_alpha=alpha)
    p.text([x + 0.2 for x in range(9)], [maxy + 1] * 9,
           text=['7', '8', '9', '10', '11', '12', 'XL', '2D', '3D'],
           text_alpha=0.7, text_align='left', text_baseline='middle')

    text_props = {
            "text_font": "times",
            "text_font_style": "bold",
            "text_font_size": "13pt",
            "angle": 0,
            "color": "black",
            "text_alpha": 0.7,
            "text_align": "center",
            "text_baseline": "middle"
        }

    p.text(x=df.x.values, y=df.y.values, text=df.code.values, **text_props)
    
    p.grid.grid_line_color = None

    p.axis[0].ticker.num_minor_ticks = 0
    p.axis[1].ticker.num_minor_ticks = 0
    p.axis.major_label_text_font_size = '0pt'
    p.axis.major_tick_line_color = None

    p.border_fill_color = 'white'
    p.outline_line_width = 0
    p.axis.visible = False

    if showit:
        show(p)
    else:
        return p
In [53]:
mask = (df['channel_dim'] > 1) & df.rings.apply(has_multipores)
show_plot_arcs_multipore_only(df[mask], width=900, height=900, radius=0.38, alpha=0.7)

Bar charts

In [28]:
def multid_bar_plot(data, sizes, title=None, showit=True, dropna=True):
    '''
    plot a bar chart for a given category
    
    Args:
        data (pd.DataFrame) :
        
        sizes: list of ring sizes e.g. [8, 10], [8, 10, 12]

    '''

    def in_list(rings, sizes):
        'obtain the mask'

        if set(sizes).issubset(set(rings)) and max(rings) == max(sizes):
            return True
        else:
            return False
    
    ring_mask = df.rings.apply(in_list, sizes=sizes)
    mask = (df['channel_dim'] > 1) & ring_mask
    cols = ['code', 'Largest cavity dia.', 'Pore limiting dia.', 'Max dia. of inc. sphere']
    melted = pd.melt(data.loc[mask, cols].dropna().sort_values(by='Largest cavity dia.'),
                     id_vars=['code'], value_vars=cols[1:])

    p = Bar(melted, label=cat('code', sort=False), values="value", group="variable",
            legend="top_left", xlabel='Framework code', ylabel='Length [Angstrom]',
            plot_width=900, title=title, toolbar_location="above")
    
    if showit:
        show(p)
    else:
        return p
In [29]:
multid_bar_plot(df, [8, 10], title='Multipore frameworks with 8 and 10 MR')
In [25]:
multid_bar_plot(df, [8, 12], title='Multipore frameworks with 8 and 12 MR')
In [26]:
multid_bar_plot(df, [10, 12], title='Multipore frameworks with 10 and 12 MR')
In [27]:
multid_bar_plot(df, [8, 10, 12], title='Multipore frameworks with 8, 10, 12 MR')
In [62]:
%version_information bokeh, pandas, numpy, zefram
Out[62]:
SoftwareVersion
Python3.6.1 64bit [GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]
IPython6.0.0
OSLinux 3.16.0 4 amd64 x86_64 with debian 8.8
bokeh0.12.5
pandas0.20.1
numpy1.12.1
zefram0.1.2
Tue May 23 22:29:32 2017 CEST