#!/usr/bin/env python # coding: utf-8 # # CM2.6 Ocean Model Analysis # # This notebook shows how to load and analyze ocean data from the GFDL [CM2.6](https://www.gfdl.noaa.gov/cm2-6/) high-resolution climate simulation. # # ![CM2.6 SST](https://www.gfdl.noaa.gov/wp-content/uploads/ih/2012/06/cm2.6.png) # # Right now the only output available is the 5-day 3D fields of horizontal velocity, temperature, and salinity. We hope to add more going forward. # # Thanks to [Stephen Griffies](https://www.gfdl.noaa.gov/stephen-griffies-homepage/) for providing the data. # # In[ ]: get_ipython().run_line_magic('matplotlib', 'inline') import numpy as np import xarray as xr import matplotlib.pyplot as plt import holoviews as hv import datashader from holoviews.operation.datashader import regrid, shade, datashade hv.extension('bokeh', width=100) # ## Create and Connect to Dask Distributed Cluster # # This will launch a cluster of virtual machines in the cloud. # In[ ]: from dask.distributed import Client, progress from dask_kubernetes import KubeCluster cluster = KubeCluster(n_workers=40) cluster # 👆 Don't forget to click this link to get the cluster dashboard # In[ ]: client = Client(cluster) client # ## Load CM 2.6 Data # # This data is stored in [xarray-zarr](http://xarray.pydata.org/en/latest/io.html#zarr) format in Google Cloud Storage. # This format is optimized for parallel distributed reads from within the cloud environment. # # It may take up to a minute to initialize the dataset when you run this cell. # In[ ]: #experiment = 'one_percent' experiment = 'control' # Load with Cloud object storage import gcsfs gcsmap = gcsfs.mapping.GCSMap('pangeo-data/cm2.6/%s/temp_salt_u_v-5day_avg/' % experiment) ds = xr.open_zarr(gcsmap, decode_cf=True, decode_times=False) # Print dataset ds # ## Visualize Temperature Data with Holoviews and Datashader # # The cells below show how to interactively explore the dataset. # # _**Warning**: it takes ~10-20 seconds to render each image after moving the sliders. Please be patient. There is an open [github issue](https://github.com/bokeh/datashader/issues/598) about improving the performance of datashader with this sort of dataset._ # In[ ]: hv_ds = hv.Dataset(ds['temp']) qm = hv_ds.to(hv.QuadMesh, kdims=["xt_ocean", "yt_ocean"], dynamic=True) # In[ ]: get_ipython().run_cell_magic('opts', "QuadMesh [width=800 height=500 colorbar=True] (cmap='magma')", 'regrid(qm, precompute=True)\n') # ## Make an Expensive Calculation # # Here we make a big reduction by taking the time and zonal mean of the temperature. This demonstrates how the cluster distributes the reads from storage. # In[ ]: temp_zonal_mean = ds.temp.mean(dim=('time', 'xt_ocean')) temp_zonal_mean # Depending on the size of your cluster, this next cell will take a while. On a cluster of 40 workers, it took ~12 minutes. # In[ ]: get_ipython().run_line_magic('time', 'temp_zonal_mean.load()') # In[ ]: fig, ax = plt.subplots(figsize=(16,8)) temp_zonal_mean.plot.contourf(yincrease=False, levels=np.arange(-2,30)) plt.title('Naive Zonal Mean Temperature') # In[ ]: