import fsspec
import xarray as xr
fs_http = fsspec.filesystem('https')
fs_http.download('https://gist.githubusercontent.com/agoodm/25d41ce0c47cd714271be66d0db0459d/raw/34cd54fd4a884979470e4ccd8df7ee5065c1daf9/parquet_refs.py',
'parquet_refs.py')
[None]
%run parquet_refs.py
fs = fsspec.filesystem('s3', anon=True,
client_kwargs={'endpoint_url':'https://ncsa.osn.xsede.org'})
lazy_refs = 's3://esip/noaa/nwm/zarr_lazy_refs'
t_opts = {'anon': True, 'client_kwargs':{'endpoint_url':'https://ncsa.osn.xsede.org'}}
print(f'Number of reference files: {len(fs.ls(lazy_refs))}')
print(f'Total size of references: {fs.du(lazy_refs)/1e9} GB')
Number of reference files: 28 Total size of references: 0.756072188 GB
%%time
mapper = ParquetReferenceMapper(lazy_refs, fs=fs)
r_opts = {'anon': True}
fs = fsspec.filesystem('reference', fo=mapper, remote_protocol='s3',
remote_options=r_opts, target_options=t_opts)
ds = xr.open_dataset(fs.get_mapper(''), engine='zarr')
CPU times: user 1.85 s, sys: 192 ms, total: 2.05 s Wall time: 4.35 s
%%time
da = ds.TRAD.sel(time='1990-01-01 00:00').load()
CPU times: user 393 ms, sys: 130 ms, total: 523 ms Wall time: 1.84 s
%%time
da = ds.TRAD.sel(time='2015-01-01 00:00').load()
CPU times: user 392 ms, sys: 96.5 ms, total: 489 ms Wall time: 1.13 s
da.mean().data
array(266.92635398)