%matplotlib inline
from dask.distributed import Client, progress, LocalCluster
from dask_kubernetes import KubeCluster
import xarray as xr
import s3fs
cluster = KubeCluster.from_yaml('/home/jovyan/worker-template.yaml')
cluster.scale(10);
cluster
VBox(children=(HTML(value='<h2>KubeCluster</h2>'), HBox(children=(HTML(value='\n<div>\n <style scoped>\n .…
client = Client(cluster)
client
Client
|
Cluster
|
# jetstream s3
# url='https://iu.jetstream-cloud.org:8080'
# fs = s3fs.S3FileSystem(client_kwargs=dict(endpoint_url=url), anon=True)
# s3map = s3fs.S3Map('rsignell/nwm/test_week', s3=fs)
# AWS s3
fs = s3fs.S3FileSystem(anon=True)
s3map = s3fs.S3Map('rsignell/nwm/test_week5c', s3=fs)
#s3map = s3fs.S3Map('rsignell/nwm/tiny3a', s3=fs)
ds = xr.open_zarr(s3map)
ds
<xarray.Dataset> Dimensions: (time: 168, x: 4608, y: 3840) Coordinates: * time (time) datetime64[ns] 2018-04-01T01:00:00 2018-04-01T02:00:00 ... * x (x) float64 -2.304e+06 -2.303e+06 -2.302e+06 -2.301e+06 ... * y (y) float64 -1.92e+06 -1.919e+06 -1.918e+06 -1.917e+06 ... Data variables: LWDOWN (time, y, x) float64 dask.array<shape=(168, 3840, 4608), chunksize=(168, 384, 288)> PSFC (time, y, x) float64 dask.array<shape=(168, 3840, 4608), chunksize=(168, 384, 288)> Q2D (time, y, x) float64 dask.array<shape=(168, 3840, 4608), chunksize=(168, 384, 288)> RAINRATE (time, y, x) float32 dask.array<shape=(168, 3840, 4608), chunksize=(168, 384, 288)> SWDOWN (time, y, x) float64 dask.array<shape=(168, 3840, 4608), chunksize=(168, 384, 288)> T2D (time, y, x) float64 dask.array<shape=(168, 3840, 4608), chunksize=(168, 384, 288)> U2D (time, y, x) float64 dask.array<shape=(168, 3840, 4608), chunksize=(168, 384, 288)> V2D (time, y, x) float64 dask.array<shape=(168, 3840, 4608), chunksize=(168, 384, 288)> Attributes: model_initialization_time: 2018-04-01_00:00:00 model_output_valid_time: 2018-04-01_01:00:00
var='T2D'
ds[var].nbytes/1.e9
23.78170368
ds[var].mean(dim='time')
<xarray.DataArray 'T2D' (y: 3840, x: 4608)> dask.array<shape=(3840, 4608), dtype=float64, chunksize=(384, 288)> Coordinates: * x (x) float64 -2.304e+06 -2.303e+06 -2.302e+06 -2.301e+06 ... * y (y) float64 -1.92e+06 -1.919e+06 -1.918e+06 -1.917e+06 ...
mean_var = ds[var].mean(dim='time').persist()
progress(mean_var)
VBox()
isub=2
mean_var[::isub,::isub].plot.imshow(figsize=(8,6));
%%time
ds1d = ds[var][:,2000,2000]
ds1d.plot()
CPU times: user 92 ms, sys: 8 ms, total: 100 ms Wall time: 1.36 s