#!/usr/bin/env python # coding: utf-8 # # Write National Water Model (NWM) model data to Zarr # In[1]: from dask.distributed import Client, progress, LocalCluster import pandas as pd import xarray as xr import s3fs # In[2]: # depends on the machine you are using cluster = LocalCluster() client = Client(cluster) client # In[3]: root = 'http://tds.renci.org:8080/thredds/dodsC/nwm/forcing_short_range/' # In[4]: dates = pd.date_range(start='2018-04-07T18:00', end='2018-04-07T20:00', freq='H') # In[5]: urls = ['{}{}/nwm.t{}z.short_range.forcing.f001.conus.nc'.format(root,a.strftime('%Y%m%d'),a.strftime('%H')) for a in dates] # In[6]: get_ipython().run_cell_magic('time', '', "ds = xr.open_mfdataset(urls,concat_dim='time')\n") # In[7]: ds # In[8]: ds['ProjectionCoordinateSystem'].dtype # In[9]: #ds = ds.drop(['ProjectionCoordinateSystem']) # In[10]: ds # In[11]: fs = s3fs.S3FileSystem(anon=False) # In[12]: f_zarr = 'rsignell/nwm/test02' # In[13]: d = s3fs.S3Map(f_zarr, s3=fs) # In[14]: get_ipython().run_line_magic('time', "ds.to_zarr(store=d, mode='w')") # ## Test to see if we can read what we wrote # In[15]: ds2 = xr.open_zarr(d) # In[ ]: ds2 # In[ ]: get_ipython().run_line_magic('matplotlib', 'inline') # In[ ]: isub=4 ds2['T2D'][0,::isub,::isub].plot.imshow() # In[ ]: ds2['T2D'][:,1000,1000].plot() # In[ ]: