import fsspec
import xarray as xr
import hvplot.xarray
import numpy as np
fs_read = fsspec.filesystem('s3', requester_pays=True, skip_instance_cache=True)
temp_name = 'esip-qhub/noaa/AORC/rechunk/test06.tmp'
target_name = 'esip-qhub/noaa/AORC/rechunk/test06.zarr'
flist = fs_read.glob(f'{temp_name}/*/*')
len(flist)
28808
fs_read.du(temp_name)/1e9 # dataset size in GB
7.655349661
%%time
fsize = [fs_read.size(f) for f in flist]
CPU times: user 18.8 s, sys: 971 ms, total: 19.7 s Wall time: 18.9 s
da = xr.DataArray(data=np.array(fsize)/1e6, name='size')
da.hvplot.hist(title='Temp files in MB', grid=True)
flist2 = fs_read.glob(f'{target_name}/*/*')
len(flist2)
1992
%%time
fsize2 = [fs_read.size(f) for f in flist2]
CPU times: user 283 ms, sys: 58.1 ms, total: 341 ms Wall time: 293 ms
da = xr.DataArray(data=np.array(fsize2)/1e6, name='size')
da.hvplot.hist(title='Target files in MB', grid=True)