#!/usr/bin/env python
# coding: utf-8

# # Explore CONUS404 Dataset
# This dataset was created by extracting specified variables from a collection of wrf2d output files, rechunking to better facilitate data extraction for a variety of use cases, and adding CF conventions to allow easier analysis, visualization and data extraction using Xarray and Holoviz.

# In[1]:


import os
os.environ['USE_PYGEOS'] = '0'

import fsspec
import xarray as xr
import hvplot.xarray
import intake
import metpy
import cartopy.crs as ccrs


# ## Open Dataset
# 
# ### 1) Load data from an Intake catalog 
# For this demonstration notebook, we will open a cloud-native dataset. The details
# of its access are stored in an `intake` catalog. 

# In[2]:


# open the hytest data intake catalog
hytest_cat = intake.open_catalog(
    r"https://raw.githubusercontent.com/hytest-org/hytest/main/dataset_catalog/hytest_intake_catalog.yml"
)
list(hytest_cat)


# In[3]:


# open the conus404 sub-catalog
cat = hytest_cat['conus404-catalog']
list(cat)


# In[16]:


## NOTE: we happen to know this dataset's handle/name.
dataset = 'conus404-hourly-cloud' 
## If you did not know this name, you could list the datasets in the catalog with
## the command `list(cat)`

## But since we do know the name, let's see its metadata
cat[dataset]


# ### 2) Parallelize with Dask 
# Some of the steps we will take are aware of parallel clustered compute environments
# using `dask`. We're going to start a cluster now so that future steps can take advantage
# of this ability. 
# 
# This is an optional step, but speed ups data loading significantly, especially 
# when accessing data from the cloud.

# In[5]:


get_ipython().run_line_magic('run', '/shared/users/environment_set_up/Start_Dask_Cluster_Nebari.ipynb')
## If this notebook is not being run on Nebari/ESIP, replace the above 
## path name with a helper appropriate to your compute environment.  Examples:
# %run ../environment_set_up/Start_Dask_Cluster_Denali.ipynb
# %run ../environment_set_up/Start_Dask_Cluster_Tallgrass.ipynb


# In[6]:


client


# In[7]:


cluster.scale(30)


# ### 3) Explore the dataset

# In[17]:


print(f"Reading {dataset} metadata...", end='')
ds = cat[dataset].to_dask().metpy.parse_cf()
print("done")
# Examine the grid data structure for SNOW: 
ds.SNOW


# Looks like this dataset is organized in three coordinates (x, y, and time).  There is a
# `metpy_crs` attached:

# In[14]:


crs = ds['SNOW'].metpy.cartopy_crs
crs


# ## Example A: Load the entire spatial domain for a variable at a specific time step

# In[ ]:


get_ipython().run_cell_magic('time', '', "da = ds.T2.sel(time='2009-12-24 00:00').load()\n### NOTE: the `load()` is dask-aware, so will operate in parallel if\n### a cluster has been started. \n")


# In[ ]:


da.hvplot.quadmesh(x='lon', y='lat', rasterize=True, geo=True, tiles='OSM', cmap='viridis').opts('Image', alpha=0.5)


# ### Example B: Load a time series for a variable at a specific grid cell for a specified time range

# **SIDE NOTE**
# To identify a point, we will start with its lat/lon coordinates.  But the
# data is in Lambert Conformal Conic... need to re-project/transform using the
# built-in `crs` we examined earlier: 

# In[ ]:


lat,lon = 39.978322,-105.2772194    
x, y = crs.transform_point(lon, lat, src_crs=ccrs.PlateCarree())   
print(x,y) # these vals are in LCC


# In[ ]:


get_ipython().run_cell_magic('time', '', "da = ds.PREC_ACC_NC.sel(x=x, y=y, method='nearest').sel(time=slice('2013-01-01 00:00','2013-12-31 00:00')).load()\n")


# In[ ]:


da.hvplot(x='time', grid=True)


# ### Example C: Compute the time mean for a variable over the entire domain for a specific time period

# In[20]:


get_ipython().run_cell_magic('time', '', "da = ds.PREC_ACC_NC.sel(time=slice('2016-01-01 00:00','2017-01-01 00:00')).mean(dim='time').compute()\n")


# In[ ]:


get_ipython().run_cell_magic('time', '', "#da = ds.PREC_ACC_NC.mean(dim='time').compute()\n")


# In[ ]:


da.hvplot.image(x='x', y='y', rasterize=True, crs=crs, tiles='OSM', alpha=0.66, cmap='viridis')


# ## Stop cluster

# In[ ]:


client.close(); cluster.shutdown()