#!/usr/bin/env python # coding: utf-8 # # Explore CONUS404 Dataset # This dataset was created by extracting specified variables from a collection of wrf2d output files, rechunking to better facilitate data extraction for a variety of use cases, and adding CF conventions to allow easier analysis, visualization and data extraction using Xarray and Holoviz. # In[1]: import os os.environ['USE_PYGEOS'] = '0' import fsspec import xarray as xr import hvplot.xarray import intake import metpy import cartopy.crs as ccrs # ## Open Dataset # # ### 1) Load data from an Intake catalog # For this demonstration notebook, we will open a cloud-native dataset. The details # of its access are stored in an `intake` catalog. # In[2]: # open the hytest data intake catalog hytest_cat = intake.open_catalog( r"https://raw.githubusercontent.com/hytest-org/hytest/main/dataset_catalog/hytest_intake_catalog.yml" ) list(hytest_cat) # In[3]: # open the conus404 sub-catalog cat = hytest_cat['conus404-catalog'] list(cat) # In[16]: ## NOTE: we happen to know this dataset's handle/name. dataset = 'conus404-hourly-cloud' ## If you did not know this name, you could list the datasets in the catalog with ## the command `list(cat)` ## But since we do know the name, let's see its metadata cat[dataset] # ### 2) Parallelize with Dask # Some of the steps we will take are aware of parallel clustered compute environments # using `dask`. We're going to start a cluster now so that future steps can take advantage # of this ability. # # This is an optional step, but speed ups data loading significantly, especially # when accessing data from the cloud. # In[5]: get_ipython().run_line_magic('run', '/shared/users/environment_set_up/Start_Dask_Cluster_Nebari.ipynb') ## If this notebook is not being run on Nebari/ESIP, replace the above ## path name with a helper appropriate to your compute environment. Examples: # %run ../environment_set_up/Start_Dask_Cluster_Denali.ipynb # %run ../environment_set_up/Start_Dask_Cluster_Tallgrass.ipynb # In[6]: client # In[7]: cluster.scale(30) # ### 3) Explore the dataset # In[17]: print(f"Reading {dataset} metadata...", end='') ds = cat[dataset].to_dask().metpy.parse_cf() print("done") # Examine the grid data structure for SNOW: ds.SNOW # Looks like this dataset is organized in three coordinates (x, y, and time). There is a # `metpy_crs` attached: # In[14]: crs = ds['SNOW'].metpy.cartopy_crs crs # ## Example A: Load the entire spatial domain for a variable at a specific time step # In[ ]: get_ipython().run_cell_magic('time', '', "da = ds.T2.sel(time='2009-12-24 00:00').load()\n### NOTE: the `load()` is dask-aware, so will operate in parallel if\n### a cluster has been started. \n") # In[ ]: da.hvplot.quadmesh(x='lon', y='lat', rasterize=True, geo=True, tiles='OSM', cmap='viridis').opts('Image', alpha=0.5) # ### Example B: Load a time series for a variable at a specific grid cell for a specified time range # **SIDE NOTE** # To identify a point, we will start with its lat/lon coordinates. But the # data is in Lambert Conformal Conic... need to re-project/transform using the # built-in `crs` we examined earlier: # In[ ]: lat,lon = 39.978322,-105.2772194 x, y = crs.transform_point(lon, lat, src_crs=ccrs.PlateCarree()) print(x,y) # these vals are in LCC # In[ ]: get_ipython().run_cell_magic('time', '', "da = ds.PREC_ACC_NC.sel(x=x, y=y, method='nearest').sel(time=slice('2013-01-01 00:00','2013-12-31 00:00')).load()\n") # In[ ]: da.hvplot(x='time', grid=True) # ### Example C: Compute the time mean for a variable over the entire domain for a specific time period # In[20]: get_ipython().run_cell_magic('time', '', "da = ds.PREC_ACC_NC.sel(time=slice('2016-01-01 00:00','2017-01-01 00:00')).mean(dim='time').compute()\n") # In[ ]: get_ipython().run_cell_magic('time', '', "#da = ds.PREC_ACC_NC.mean(dim='time').compute()\n") # In[ ]: da.hvplot.image(x='x', y='y', rasterize=True, crs=crs, tiles='OSM', alpha=0.66, cmap='viridis') # ## Stop cluster # In[ ]: client.close(); cluster.shutdown()