from dask.distributed import Client, progress, LocalCluster
import xarray as xr
import s3fs
# depends on the machine you are using
cluster = LocalCluster()
client = Client(cluster)
client
Client
|
Cluster
|
Pangeo dashboard is: http://pangeo.pydata.org/user/rsignell-usgs/proxy/8787/status
So try: https://jupyter-jetstream.unidata.ucar.edu/user/rsignell/proxy/8787/status
http://pangeo.pydata.org/user/rsignell-usgs/proxy/8787/status
Hmmm, doesn't work...
bucket_endpoint='https://iu.jetstream-cloud.org:8080'
fs = s3fs.S3FileSystem(anon=False, client_kwargs=dict(endpoint_url=bucket_endpoint))
f_zarr = 'rsignell/nwm/test_week'
d = s3fs.S3Map(f_zarr, s3=fs)
ds = xr.open_zarr(d)
ds
<xarray.Dataset> Dimensions: (nv: 2, reference_time: 146, time: 146, x: 4608, y: 3840) Coordinates: * reference_time (reference_time) datetime64[ns] 2018-04-01 ... * time (time) datetime64[ns] 2018-04-01T01:00:00 ... * x (x) float64 -2.304e+06 -2.303e+06 -2.302e+06 -2.301e+06 ... * y (y) float64 -1.92e+06 -1.919e+06 -1.918e+06 -1.917e+06 ... Dimensions without coordinates: nv Data variables: LWDOWN (time, y, x) float64 dask.array<shape=(146, 3840, 4608), chunksize=(1, 3840, 4608)> PSFC (time, y, x) float64 dask.array<shape=(146, 3840, 4608), chunksize=(1, 3840, 4608)> Q2D (time, y, x) float64 dask.array<shape=(146, 3840, 4608), chunksize=(1, 3840, 4608)> RAINRATE (time, y, x) float32 dask.array<shape=(146, 3840, 4608), chunksize=(1, 3840, 4608)> SWDOWN (time, y, x) float64 dask.array<shape=(146, 3840, 4608), chunksize=(1, 3840, 4608)> T2D (time, y, x) float64 dask.array<shape=(146, 3840, 4608), chunksize=(1, 3840, 4608)> U2D (time, y, x) float64 dask.array<shape=(146, 3840, 4608), chunksize=(1, 3840, 4608)> V2D (time, y, x) float64 dask.array<shape=(146, 3840, 4608), chunksize=(1, 3840, 4608)> time_bounds (time, nv) datetime64[ns] dask.array<shape=(146, 2), chunksize=(146, 2)> Attributes: DODS.strlen: 0 DODS_EXTRA.Unlimited_Dimension: time model_initialization_time: 2018-04-01_00:00:00 model_output_valid_time: 2018-04-01_01:00:00
ds.nbytes/1.e9
155.005819456
ds['T2D'].nbytes/1.e9
20.66743296
ds['T2D'].mean(dim='time')
<xarray.DataArray 'T2D' (y: 3840, x: 4608)> dask.array<shape=(3840, 4608), dtype=float64, chunksize=(3840, 4608)> Coordinates: * x (x) float64 -2.304e+06 -2.303e+06 -2.302e+06 -2.301e+06 ... * y (y) float64 -1.92e+06 -1.919e+06 -1.918e+06 -1.917e+06 ...
%%time
isub = 4
mean_temp = ds['T2D'][:,::isub,::isub].mean(dim='time').compute()
CPU times: user 53.1 s, sys: 8.87 s, total: 1min 1s Wall time: 6min 13s
%matplotlib inline
mean_temp.plot.imshow(figsize=(12,8))
<matplotlib.image.AxesImage at 0x7f3881bc0198>
%%time
t1d = ds['T2D'][:,1000,1000].persist()
t1d.plot()
CPU times: user 14.8 s, sys: 2.73 s, total: 17.5 s Wall time: 1min 45s