#!/usr/bin/env python # coding: utf-8 # # Write National Water Model (NWM) model data to Zarr # In[1]: from dask.distributed import Client, progress, LocalCluster import pandas as pd import xarray as xr import s3fs # In[2]: # depends on the machine you are using cluster = LocalCluster() client = Client(cluster) client # In[3]: root = 'http://tds.renci.org:8080/thredds/dodsC/nwm/forcing_short_range/' # In[4]: dates = pd.date_range(start='2018-04-01T18:00', end='2018-04-02T04:00', freq='H') # In[5]: urls = ['{}{}/nwm.t{}z.short_range.forcing.f001.conus.nc'.format(root,a.strftime('%Y%m%d'),a.strftime('%H')) for a in dates] # In[6]: f_zarr = 'rsignell/nwm/test01' # In[7]: get_ipython().run_cell_magic('time', '', "ds = xr.open_mfdataset(urls,concat_dim='time')\n") # In[8]: ds # In[9]: fs = s3fs.S3FileSystem(anon=False) # In[10]: d = s3fs.S3Map(f_zarr, s3=fs) # In[11]: get_ipython().run_line_magic('time', "ds.to_zarr(store=d, mode='w')") # ## Test to see if we can read what we wrote # In[12]: s3map = s3fs.S3Map(f_zarr, s3=fs) # In[13]: # works if auto_chunk=False ds2 = xr.open_zarr(s3map, auto_chunk=False) # In[14]: ds2 # In[15]: ds3 = xr.open_zarr(s3map, auto_chunk=True) # In[ ]: