#!/usr/bin/env python # coding: utf-8 # ## Visualize Parquet data # *Use the Measured Parameter Data Access Parquet format to visualize STOQS data* # # Executing this Notebook requires a personal STOQS server - these instructions are for a Docker installation. This Notebook builds on issues raised in https://github.com/stoqs/stoqs/issues/227. # # ### Docker Instructions # Install and start the software as # [detailed in the README](https://github.com/stoqs/stoqs#production-deployment-with-docker). (Note that on MacOS you will need to modify settings in your `docker-compose.yml` and `.env` files — look for comments referencing 'HOST_UID'.) # # Then, from your `$STOQS_HOME/docker` directory start the Jupyter Notebook server - you can query from the remote database or from a copy that you've made to your local system: # # #### Option A: Query from MBARI's master database # Start the Jupyter Notebook server pointing to MBARI's master STOQS database server. (Note: firewall rules limit unprivileged access to such resources): # # docker-compose exec \ # -e DATABASE_URL=postgis://everyone:guest@kraken.shore.mbari.org:5432/stoqs \ # stoqs stoqs/manage.py shell_plus --notebook # # #### Option B: Query from your local Docker Desktop # Restore a database of your choice from https://stoqs.shore.mbari.org/, for example below is how to make a local copy of the `stoqs_september2013` database from MBARI's server onto your local database and then start the Jupyter Notebook server using the default DATABASE_URL (which should be your local system) also **make sure that your Docker Desktop has at least 16 GB of RAM allocated to it**: # # cd $STOQS_HOME/docker # docker-compose exec stoqs createdb -U postgres stoqs_september2013 # curl -k https://stoqs.shore.mbari.org/media/pg_dumps/stoqs_september2013.pg_dump | \ # docker exec -i stoqs pg_restore -Fc -U postgres -d stoqs_september2013 # docker-compose exec stoqs stoqs/manage.py shell_plus --notebook # # ### Opening this Notebook # Following execution of the `stoqs/manage.py shell_plus --notebook` command a message is displayed giving a URL for you to use in a browser on your host, e.g.: # # http://127.0.0.1:8888/?token= # # In the browser window opened to this URL navigate to this file (`visualize_parquet.ipynb`) and open it. You will then be able to execute the cells and modify the code to suit your needs. # # The information in the output cells result from execution on a 2019 MacBook Pro with a 2.4 GHz 8-Core Intel Core i9 processor, 32 GB 2667 MHz DDR4 RAM, running Docker Desktop 3.1.0 with 16 GB with 4 CPUs and 16 GB allocated. # In a browser navigate to https://stoqs.mbari.org/stoqs_september2013 and make selections as shown in this screen grab: # ![Constructing a parquet download URL](https://user-images.githubusercontent.com/1771866/110736610-d15b2180-81e0-11eb-9913-3f8a5f0a94c4.png) # # We will attempt to recreate this image from [Issue 227](https://github.com/stoqs/stoqs/issues/227): # ![biplot](https://raw.githubusercontent.com/stoqs/stoqs/master/doc/papers/AUV2014/LabeledSelectionUI.png) # # but this time using Datashader which can handle a lot more data. # In[1]: import time t_start = time.time() # Issuing a STOQS api request from inside the stoqs container - where this # notebook is running - is not really possible. For testing with a # host='localhost' url you need to make that request from your system and # then copy the .parquet file to this directory: stoqs/contrib/notebooks. # We have make two downloads as lrauvs and dorado have different Parameter names. # It's theoretically possible to download all Parameter names from all three # platforms, but that exceeds the container's RAM in my 16 GB Docker machine. # It's more efficient to download just what we need. ##host = 'localhost' host = 'stoqs.shore.mbari.org' url_dorado = (f'https://{host}/stoqs_september2013/api/measuredparameter.parquet?' 'parameter__name=bbp420¶meter__name=fl700_uncorr&' 'parameter__name=salinity¶meter__name=temperature&' 'measurement__instantpoint__activity__platform__name=dorado&' 'collect=name') ##print(url_dorado) # Uncoment for 'localhost' download from system browser url_lrauvs = (f'https://{host}/stoqs_september2013/api/measuredparameter.parquet?' 'parameter__name=bb470¶meter__name=chlorophyll&' 'parameter__name=salinity¶meter__name=temperature&' 'measurement__instantpoint__activity__platform__name=daphne&' 'measurement__instantpoint__activity__platform__name=tethys&' 'collect=name') ##print(url_lrauv) # Uncoment for 'localhost' download from system browser get_ipython().system('time wget --no-check-certificate -O stoqs_september2013_dorado.parquet "{url_dorado}"') get_ipython().system('time wget --no-check-certificate -O stoqs_september2013_lrauvs.parquet "{url_lrauvs}"') # In[2]: import pandas as pd get_ipython().run_line_magic('time', "df_dorado = pd.read_parquet('stoqs_september2013_dorado.parquet')") print(f"dorado data: {df_dorado.shape}") ##print(df_dorado.head(2)) get_ipython().run_line_magic('time', "df_lrauvs = pd.read_parquet('stoqs_september2013_lrauvs.parquet')") print(f"lrauv data: {df_lrauvs.shape}") ##print(df_lrauvs.head(2)) # Combine into single DataFrame for more generalized follow-on processing df = df_dorado.append(df_lrauvs) # In[3]: # Commit with do_plots = False, change to True for plots, but don't check it in that way do_plots = False plots = None if do_plots: import colorcet import holoviews as hv from holoviews.operation.datashader import datashade hv.extension("bokeh") pts_dorado = hv.Points(df, kdims=['bbp420', 'fl700_uncorr']) pts_daphne = hv.Points(df, kdims=['bb470', 'chlorophyll']) pts_tethys = hv.Points(df, kdims=['bb470', 'chlorophyll']) plots = ( datashade(pts_dorado, cmap=colorcet.fire).opts(title='dorado') + datashade(pts_daphne, cmap=colorcet.fire).opts(title='daphne') + datashade(pts_tethys, cmap=colorcet.fire).opts(title='tethys') ) plots # In[4]: print(f"Time to execute this notebook: {(time.time() - t_start):.1f} seconds")