#!/usr/bin/env python # coding: utf-8 # # National Water Model analysis using Zarr and Dask distributed # In[1]: try: import s3fs except: get_ipython().system('conda install s3fs -y') import s3fs # In[2]: get_ipython().run_line_magic('matplotlib', 'inline') import xarray as xr # In[3]: from dask.distributed import Client client = Client("dask-scheduler:8786") client # Dashboard: https://js-157-11.jetstream-cloud.org/user/rsignell-usgs/proxy/8787/status # In[4]: url='https://iu.jetstream-cloud.org:8080' # In[5]: fs = s3fs.S3FileSystem(client_kwargs=dict(endpoint_url=url), anon=True) # In[6]: s3map = s3fs.S3Map('rsignell/nwm/test_week5d', s3=fs) # In[7]: ds = xr.open_zarr(s3map) # In[8]: ds # In[9]: var='T2D' # In[10]: print('start:',ds[var][0].time.values) print(' stop:',ds[var][-1].time.values) # In[11]: ds.nbytes/1.e9 # In[12]: ds[var].nbytes/1.e9 # In[13]: get_ipython().run_cell_magic('time', '', "mean_var = ds[var][:10,:10,:10].mean(dim='time').compute()\n") # In[14]: get_ipython().run_cell_magic('time', '', "mean_var = ds[var][:,:,:].mean(dim='time').compute()\n") # In[15]: isub = 4 mean_var[::isub,::isub].plot.imshow(figsize=(8,6)); # In[16]: get_ipython().run_cell_magic('time', '', 'ds1d = ds[var][:,2000,2000]\nds1d.plot()\n')