#!/usr/bin/env python # coding: utf-8 # # Interactive plotting of Data Lab catalog data # In this notebook, we will retrieve the Hydra II SMASH catalog data and make an interactive pair of plots using the Bokeh (http://bokeh.pydata.org/en/latest/) library. # # ## Data retrieval # We will use the code example provided in the "How to use the DataLab query manager service" notebook to access the data. The columns we will need are RA, Dec, g magnitude, r magnitude, and depthflag. # # ## Visualization # Bokeh comes with a number of built-in tools for producing interactive plots. In this example, we will make a pair of plots, a plot of RA vs. Dec on the left and g-r vs. r on the right. We will use Bokeh's linked brushing tools to interactively select a set of points on the RA vs. Dec plot, which will automatically highlight the same points on the color-magnitude diagram on the right. The intended use of this notebook is that the user will start with candidate overdensities and then use Bokeh tools to explore them interactively. # # ### Initialization # # We need modules from the Bokeh library, NumPy, and Pandas. For the Data Lab query, we need authClient and queryClient from Data Lab's dl library. # In[1]: print "Start" from bokeh.models import ColumnDataSource from bokeh.models import LinearAxis,Range1d from bokeh.plotting import figure, gridplot, output_file, show from bokeh.io import output_notebook import numpy as np import sys import pandas as pd from cStringIO import StringIO from dl import authClient from dl import queryClient # Get the security token for the datalab demo user token = authClient.login('anonymous') print "Got token",token # ### Query the SMASH DR1 database # # We will query the averaged photometry table from the SMASH catalog and select Field 169, which we know contains the Hydra II dwarf. # In[2]: field = 169 # SMASH Field Number to query depth = 1 # minimum depth raname = 'ra' decname = 'dec' mags = 'gmag,rmag' dbase='smash_dr1.object' fid = 'fieldid' # Create the query string. query = ('select '+raname+','+decname+','+mags+',depthflag from '+dbase+ \ ' where ('+fid+' = \'%d\' AND' \ ' (depthflag > %d) and ' + \ ' (abs(sharp) < 0.5) and ' + \ ' (gmag is not null) and ' + \ ' (gmag between 9 and 25) and ' + \ ' ((gmag-rmag) between -1.5 and 3.0))') % \ (field, depth) print "Your query is:", query # We issue the query through the Query Manager, which connects directly to the database. # In[3]: get_ipython().run_cell_magic('time', '', 'print "Making query"\n# Call the Query Manager Service \nresponse = queryClient.query(token, adql = query, fmt = \'csv\')\ndf = pd.read_csv(StringIO(response))\n\nprint len(df), "objects found."\n') # ### Data munging # # Next we add a g-r color column to the Pandas dataframe. # In[4]: df["g_r"]=df["gmag"]-df["rmag"] df.tail() # ### Setting up the visualization with Bokeh # # This function from the Bokeh library triggers embedded plotting output in the notebook. Alternatively, we could have used output_file() to save output to html for separate viewing. # In[5]: output_notebook() # The ColumnDataSource function packages the data to use in the Bokeh plots. The dictionary labels x1, x2, y1, and y2 will be referred to when we set up the figure objects. # In[6]: source = ColumnDataSource(data=dict(x1=np.array(df["ra"]), x2=np.array(df["g_r"]), \ y1=np.array(df["dec"]), y2=np.array(df["rmag"]))) # Now we will set up the plots. First, we select the Bokeh tools that we want to use. Refer to http://bokeh.pydata.org/en/latest/docs/user_guide/tools.html to see the full list of available tools. # In[7]: TOOLS = "box_select,lasso_select,pan,wheel_zoom,box_zoom,reset,help" # Our plot on the left will be RA vs. Dec. We first create an instance of the figure object, specifying the toolset, the size of the plot, the title, whether to use WebGL acceleration, and the "Level of Detail" (lod) decimation factor, which determines how the plot behaves when doing interactive panning and zooming. Higher lod_factor means less detail is shown momentarily as the plot is updated interactively. Setting WebGL=True can speed up the interaction significantly, but isn't well handled by all browsers. Safari, for instance, will show only a blank plot with WebGL set to True. # In[8]: left = figure(tools=TOOLS, width=400, height=400, title=None,webgl=False,lod_factor=100) # Now we run the scatter plot method of the figure object that we created, specifying the dictionary labels that contain the x and y axes, the ColumnDataSource object that contains the data, the radius of the circles used as points, the color of the circles, and the transparency (fill_alpha) of the circles. We turn off connecting lines between the points and suppress axis display. We use a cosine Dec scale factor to set the symbol radius, to avoid having the symbols change for fields at different declination. # In[9]: left.scatter('x1', 'y1', source=source, radius=0.005/np.cos(np.median(df["dec"])/180*np.pi), fill_color='red', fill_alpha=0.1,line_color=None) left.x_range=Range1d(186.8,183.7) left.xaxis.axis_label = 'RA' left.yaxis.axis_label = 'Dec' # Our plot on the right will be g-r vs. r. We set the range of the g-r axis to be -2 < g-r < 3. # In[10]: right = figure(tools=TOOLS, width=400, height=400, title=None,webgl=False,lod_factor=100) right.scatter('x2', 'y2', source=source,radius=0.02, fill_color='red', fill_alpha=0.5,line_color=None) right.x_range=Range1d(-2,3) right.y_range=Range1d(25,14) right.xaxis.axis_label = 'g-r' right.yaxis.axis_label = 'r' # Now we add a plot renderer, in this case gridplot to be able to show two plots side by side. # In[11]: p = gridplot([[left, right]]) # ### The plots # # Finally, we render the plot. The figures are interactive, with ability to pan, zoom, and select samples of data that are then updated in the other plot. With the large number of points used here, the interaction can be a little slow, depending on browser and hardware. Try Box Select on the clump of points at lower left, where Hydra II is lurking. # In[12]: show(p)