#!/usr/bin/env python # coding: utf-8 # # Visualizing Zeolite Framework Classifications # # Find all zeolite topologies that fit certain criteria: # # - largest channel is an 8 MR # - largest channel is a 10 MR # - largest channel is a 12 MR # - realizable as AlPO # - split plots by dimensionality of the channels, 0D, 1D, 2D, 3D # # ### First some important imports # In[1]: import math import pandas as pd from bokeh.io import output_notebook from bokeh.plotting import figure, show, output_file from bokeh.charts import Bar from bokeh.charts.attributes import cat from bokeh.models import HoverTool, ColumnDataSource, FixedTicker from bokeh.palettes import Category10, Set1, Spectral output_notebook() # # Obtain the data # # The data is accessed from the [``zefram``](https://bitbucket.org/lukaszmentel/zefram) package that has a collection of [IZA](http://www.iza-structure.org/databases/) and [ZEOMICS](http://helios.princeton.edu/zeomics/) databases accessible from Python. # In[2]: from zefram import get_session, get_table, framework, Framework, RingSize # The data is retrieved as ``pandas.DataFrame`` allowing convenient manipulations and vizualiztions. # In[3]: df = get_table('frameworks') df.info() # In[4]: def assign_label(rings): ''' assign string labels to a list of ints matching the usual zeolite pore size labels ''' if max(rings) == 8: return 's' elif max(rings) == 10: if 8 in rings: return 'ms' else: return 'm' elif max(rings) == 12: if 10 in rings and 8 in rings: return 'lms' elif 10 in rings: return 'lm' elif 8 in rings: return 'ls' else: return 'l' else: return None def are_all_even(lst): 'check if all the elements of a list are even numbers' return all(x % 2 == 0 for x in lst) # In[5]: df.loc[:, 'Rings'] = [[r.size for r in framework(row['code']).ring_sizes] for i, row in df.iterrows()] # get the largest ring df.loc[:, 'max_ring'] = df['Rings'].apply(max) # assign a string label df.loc[:, 'ring_label'] = df.Rings.apply(assign_label) df.loc[:, 'alpo_possible'] = df.Rings.apply(are_all_even) df.rename(columns={'lcd': 'Largest cavity dia.', 'pld': 'Pore limiting dia.', 'maxdsi': 'Max dia. of inc. sphere'}, inplace=True) # ## Histogram # # Now we can plot a histogram depicting the distirubtion of frameworks depneding on the largest channel. # In[6]: p = Bar(df, 'max_ring', values='max_ring', agg='count', group='channel_dim', xlabel='Largest channel [#T atoms]', ylabel='Number of frameworks', legend='top_right', plot_width=760) show(p) # In[7]: gencols = ['code', 'channel_dim', 'Largest cavity dia.', 'Pore limiting dia.', 'Max dia. of inc. sphere', 'Rings', 'ring_label', 'alpo_possible'] df0d = df.loc[df['channel_dim'] == 0, gencols].copy() df1d = df.loc[df['channel_dim'] == 1, gencols].copy() df2d = df.loc[df['channel_dim'] == 2, gencols].copy() df3d = df.loc[df['channel_dim'] == 3, gencols].copy() # ## Plotting function # In[8]: import numpy as np from sklearn.utils.extmath import cartesian def show_plot(dataframe, width=750, height=300,radius=0.4, alpha=0.6, title=None, showit=True): df = dataframe.copy() colors = {'s': Spectral[4][0], 'm': Spectral[4][1], 'l': Spectral[4][2], 'alpo': Spectral[4][3]} ncols = 10 nitems = df.shape[0] if nitems % ncols == 0: nrows = nitems / ncols else: nrows = nitems // ncols + 1 xy = cartesian([np.arange(nrows), np.arange(ncols)]) df.loc[:, 'x'] = xy[:nitems, 1] df.loc[:, 'y'] = xy[:nitems, 0] source = ColumnDataSource(data=df) hover = HoverTool( tooltips=[ ("Pore limiting diameter", "@Pore limiting dia."), ("Largest cavity D.", "@Largest cavity dia."), ("Max. D. Sph. I.", "@maxdsi"), ("Dimensionality", "@channel_dim"), ("Rings", "@rings") ] ) p = figure(title=title, x_range = (df.x.min() - 0.5, df.x.max() + 0.5), y_range = (df.y.max() + 0.5, df.y.min() - 0.5), plot_width=width, plot_height=height, tools='save') # empty circles x = df.loc[df.ring_label.str.len().isnull(), 'x'].values y = df.loc[df.ring_label.str.len().isnull(), 'y'].values p.circle(x, y, radius=radius, color='#ffffff', fill_alpha=1.0, line_color='#888888') # full circles mask = df.ring_label.str.len() == 1 x = df.loc[mask, 'x'].values y = df.loc[mask, 'y'].values c = [colors[k] for k in df.loc[mask, 'ring_label']] p.circle(x, y, radius=radius, color=c, fill_alpha=alpha, line_color='#888888') # half circles/wedges mask = df.ring_label.str.len() == 2 x = df.loc[mask, 'x'].values y = df.loc[mask, 'y'].values c1 = [colors[k] for k in df.loc[mask, 'ring_label'].str[0]] c2 = [colors[k] for k in df.loc[mask, 'ring_label'].str[1]] p.wedge(x, y, radius=radius, start_angle=math.pi/4, end_angle=5*math.pi/4, color=c1, alpha=alpha, direction="anticlock") p.wedge(x, y, radius=radius, start_angle=5*math.pi/4, end_angle=math.pi/4, color=c2, alpha=alpha, direction="anticlock") # third circles/wedges mask = df.ring_label.str.len() == 3 x = df.loc[mask, 'x'].values y = df.loc[mask, 'y'].values p.wedge(x, y, radius=radius, start_angle=math.pi/2, end_angle=7*math.pi/6, color=colors['s'], alpha=alpha, direction="anticlock") p.wedge(x, y, radius=radius, start_angle=7*math.pi/6, end_angle=11*math.pi/6, color=colors['m'], alpha=alpha, direction="anticlock") p.wedge(x, y, radius=radius, start_angle=11*math.pi/6, end_angle=math.pi/2, color=colors['l'], alpha=alpha, direction="anticlock") x = df.loc[df.alpo_possible, 'x'].values y = df.loc[df.alpo_possible, 'y'].values p.circle(x, y, radius=radius + 0.02, color='#ffffff', fill_alpha=0.0, line_color=colors['alpo'], line_width=5.0, line_alpha=0.6) maxx = df['x'].max() maxy = df['y'].max() p.circle([maxx-3.2, maxx-2.2, maxx-1.2, maxx-0.2], [maxy] * 4, color=[colors['s'], colors['m'], colors['l'], colors['alpo']], radius=0.1, alpha=alpha) p.text([maxx-3, maxx-2, maxx-1, maxx-0.05], [maxy] * 4, text=['8', '10', '12', 'AlPO'], text_alpha=0.7, text_align='left', text_baseline='middle') text_props = { "text_font": "times", "text_font_style": "bold", "text_font_size": "13pt", "source": source, "angle": 0, "color": "black", "text_alpha": 0.7, "text_align": "center", "text_baseline": "middle" } p.text(x="x", y="y", text="code", **text_props) p.grid.grid_line_color = None p.axis[0].ticker.num_minor_ticks = 0 p.axis[1].ticker.num_minor_ticks = 0 p.axis.major_label_text_font_size = '0pt' p.axis.major_tick_line_color = None p.border_fill_color = 'white' p.outline_line_width = 0 p.axis.visible = False p.legend.location = "bottom_right" p.legend.orientation = "horizontal" if showit: show(p) else: return p # In[9]: show_plot(df0d, width=760, height=250,radius=0.44, title='Zeolite Topologies') # In[10]: show_plot(df1d, width=760, height=530,radius=0.44, title='Unidirectional Zeolite Topologies') # In[11]: show_plot(df2d, width=760, height=450,radius=0.44, title='Bidirectional Zeolite Topologies') # In[12]: show_plot(df3d, width=760, height=750,radius=0.44, title='Tridirectional Zeolite Topologies') # # Bar charts with pore sizes # In[13]: def diameters_bar_plot(data, category, title=None, showit=True): ''' plot a bar chart for a given category Args: data (pd.DataFrame) : category (str) : 's', 'm', 'l', 'ms',... ''' mask = data.ring_label == category cols = ['code', 'Max dia. of inc. sphere', 'Largest cavity dia.', 'Pore limiting dia.'] melted = pd.melt(data.loc[mask, cols].sort_values(by='Largest cavity dia.'), id_vars=['code'], value_vars=cols[1:]) p = Bar(melted, label=cat('code', sort=False), values="value", group="variable", legend="top_left", xlabel='Framwork code', ylabel='Length [Angstrom]', plot_width=750, title=title) if showit: show(p) else: return p # In[14]: diameters_bar_plot(df, 's', title='Small pore frameworks (8MR)') # In[15]: diameters_bar_plot(df, 'm', title='Medium pore frameworks (10MR)') # In[16]: diameters_bar_plot(df, 'l', title='Large pore frameworks (12MR)') # In[17]: diameters_bar_plot(df, 'ms', title='Small-medium pore frameworks (8 and 10 MR)') # In[18]: diameters_bar_plot(df, 'lm', title='Medium-large pore frameworks (10 and 12 MR)') # In[19]: diameters_bar_plot(df, 'ls', title='Small-large pore frameworks (8 and 12 MR)') # In[20]: diameters_bar_plot(df, 'lms', title='Small-medium-large pore frameworks (8, 10 and 12 MR)') # In[1]: get_ipython().run_line_magic('version_information', 'bokeh, pandas, numpy, zefram')