%matplotlib inline
from dask.distributed import Client, progress, LocalCluster
from dask_kubernetes import KubeCluster
import xarray as xr
import s3fs
Start a dask cluster to crunch the data
cluster = KubeCluster.from_yaml('/home/jovyan/custom-worker-template.yaml')
cluster.scale(30);
client = Client(cluster)
client
Client
|
Cluster
|
# jetstream s3
# url='https://iu.jetstream-cloud.org:8080'
# fs = s3fs.S3FileSystem(client_kwargs=dict(endpoint_url=url), anon=True)
# s3map = s3fs.S3Map('rsignell/adcirc_test01', s3=fs)
# AWS s3
fs = s3fs.S3FileSystem(anon=True)
s3map = s3fs.S3Map('rsignell/adcirc_test01', s3=fs)
ds = xr.open_zarr(s3map)
ds
<xarray.Dataset> Dimensions: (mesh: 1, nbou: 1190, nele: 18300169, neta: 292, node: 9228245, nope: 1, nvel: 157605, nvertex: 3, time: 720) Coordinates: * time (time) datetime64[ns] 2031-08-03T02:10:00 ... x (node) float64 dask.array<shape=(9228245,), chunksize=(141973,)> y (node) float64 dask.array<shape=(9228245,), chunksize=(141973,)> Dimensions without coordinates: mesh, nbou, nele, neta, node, nope, nvel, nvertex Data variables: adcirc_mesh (mesh) int32 dask.array<shape=(1,), chunksize=(1,)> depth (node) float64 dask.array<shape=(9228245,), chunksize=(141973,)> element (nele, nvertex) int32 dask.array<shape=(18300169, 3), chunksize=(18300169, 3)> ibtype (nbou) int32 dask.array<shape=(1190,), chunksize=(1190,)> ibtypee (nope) int32 dask.array<shape=(1,), chunksize=(1,)> max_nvdll int32 ... max_nvell int32 ... nbdv (neta) int32 dask.array<shape=(292,), chunksize=(292,)> nbvv (nvel) int32 dask.array<shape=(157605,), chunksize=(157605,)> nvdll (nope) int32 dask.array<shape=(1,), chunksize=(1,)> nvell (nbou) int32 dask.array<shape=(1190,), chunksize=(1190,)> zeta (time, node) float64 dask.array<shape=(720, 9228245), chunksize=(10, 141973)> Attributes: Conventions: UGRID-0.9.0 _FillValue: -99999.0 a00: 0.35 agrid: grid b00: 0.3 c00: 0.35 cf: 0.0 comments: nocomments contact: clint@ices.utexas.edu convention: none cori: 0.0 creation_date: 2018-06-20 14:26:00 -05:00 description: sl15v3_2007_r7 katrina 20061020 ! 32 CHARACTER ALPH... dramp: 0.5 dt: 1.0 eslm: 50.0 fort.15: ==== Input File Parameters (below) ==== grid_type: Triangular h0: 0.1 history: NetCDF host: UTAUSTIN ics: 2 ihot: 0 institution: UniversityofTexas model: ADCIRC modification_date: 2018-06-20 14:26:00 -05:00 nbfr: 8 ncor: 1 nolibf: 1 nolica: 0 nolicat: 0 nolifa: 2 nramp: 2 ntif: 8 ntip: 1 nwp: 8 nws: 12 references: one reftim: 0.0 rnday: 10.0 rundes: sl15v3_2007_r7 katrina 20061020 ! 32 CHARACTER ALPH... runid: padcirc 46.28sb ! 24 CHARACTER ALPA... sfea0: 29.000000000000004 slam0: 265.5 source: PADCIRC statim: 0.0 tau0: -3.0 title: Scheibe version: 54.dev
ds['zeta']
<xarray.DataArray 'zeta' (time: 720, node: 9228245)> dask.array<shape=(720, 9228245), dtype=float64, chunksize=(10, 141973)> Coordinates: * time (time) datetime64[ns] 2031-08-03T02:10:00 2031-08-03T02:20:00 ... x (node) float64 dask.array<shape=(9228245,), chunksize=(141973,)> y (node) float64 dask.array<shape=(9228245,), chunksize=(141973,)> Dimensions without coordinates: node Attributes: location: node long_name: water surface elevation above geoid mesh: adcirc_mesh standard_name: sea_surface_height_above_geoid units: m
ds['zeta'].nbytes/1.e9
53.1546912
ds['zeta'].max(dim='time')
<xarray.DataArray 'zeta' (node: 9228245)> dask.array<shape=(9228245,), dtype=float64, chunksize=(141973,)> Coordinates: x (node) float64 dask.array<shape=(9228245,), chunksize=(141973,)> y (node) float64 dask.array<shape=(9228245,), chunksize=(141973,)> Dimensions without coordinates: node
max_var = ds['zeta'].max(dim='time').persist()
progress(max_var)
VBox()
import numpy as np
import datashader as dshade
import holoviews as hv
import geoviews as gv
import cartopy.crs as ccrs
from holoviews.operation.datashader import datashade, rasterize
from colorcet import cm_n
from matplotlib.cm import jet
datashade.precompute = True
hv.extension('bokeh')
%opts Image RGB VectorField [width=800 height=600]
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
v = np.vstack((ds['x'], ds['y'], max_var)).T
verts = pd.DataFrame(v, columns=['x','y','z'])
points = gv.operation.project_points(gv.Points(verts, vdims=['z']))
tris = pd.DataFrame(ds['element'].values.astype('int')-1, columns=['v0','v1','v2'])
tiles = gv.WMTS('https://maps.wikimedia.org/osm-intl/{Z}/{X}/{Y}@2x.png')
value = 'max water level'
label = '{} (m)'.format(value)
trimesh = gv.TriMesh((tris, points), label=label)
%%opts Image [colorbar=True] (cmap=jet)
error_meshes = rasterize(trimesh,aggregator=dshade.mean('z'))
tiles * error_meshes
%%time
ds['zeta'][:,800000].plot()
CPU times: user 1.35 s, sys: 136 ms, total: 1.48 s Wall time: 3.17 s
[<matplotlib.lines.Line2D at 0x7fbe18054f28>]