%matplotlib inline
from dask.distributed import Client, progress, LocalCluster
from dask_kubernetes import KubeCluster
import xarray as xr
import s3fs
import numpy as np
Start a dask cluster to crunch the data
cluster = KubeCluster.from_yaml('/home/jovyan/custom-worker-template.yaml')
cluster.scale(30);
client = Client(cluster)
client
Client
|
Cluster
|
# AWS s3
fs = s3fs.S3FileSystem(anon=True)
s3map = s3fs.S3Map('rsignell/adcirc_test01', s3=fs)
ds = xr.open_zarr(s3map)
ds
<xarray.Dataset> Dimensions: (mesh: 1, nbou: 1190, nele: 18300169, neta: 292, node: 9228245, nope: 1, nvel: 157605, nvertex: 3, time: 720) Coordinates: * time (time) datetime64[ns] 2008-09-05T12:00:00 ... x (node) float64 dask.array<shape=(9228245,), chunksize=(141973,)> y (node) float64 dask.array<shape=(9228245,), chunksize=(141973,)> Dimensions without coordinates: mesh, nbou, nele, neta, node, nope, nvel, nvertex Data variables: adcirc_mesh (mesh) int32 dask.array<shape=(1,), chunksize=(1,)> depth (node) float64 dask.array<shape=(9228245,), chunksize=(141973,)> element (nele, nvertex) int32 dask.array<shape=(18300169, 3), chunksize=(18300169, 3)> ibtype (nbou) int32 dask.array<shape=(1190,), chunksize=(1190,)> ibtypee (nope) int32 dask.array<shape=(1,), chunksize=(1,)> max_nvdll int32 ... max_nvell int32 ... nbdv (neta) int32 dask.array<shape=(292,), chunksize=(292,)> nbvv (nvel) int32 dask.array<shape=(157605,), chunksize=(157605,)> nvdll (nope) int32 dask.array<shape=(1,), chunksize=(1,)> nvell (nbou) int32 dask.array<shape=(1190,), chunksize=(1190,)> zeta (time, node) float64 dask.array<shape=(720, 9228245), chunksize=(10, 141973)> Attributes: Conventions: UGRID-1.0.0 _FillValue: -99999.0 a00: 0.35 agrid: grid b00: 0.3 c00: 0.35 cf: 0.0 comments: nocomments contact: clint@ices.utexas.edu convention: none cori: 0.0 creation_date: 2018-06-20 14:26:00 -05:00 description: Hurricane Ike, 9 million node mesh dramp: 0.5 dt: 1.0 eslm: 50.0 fort.15: ==== Input File Parameters (below) ==== grid_type: Triangular h0: 0.1 history: NetCDF host: UTAUSTIN ics: 2 ihot: 0 institution: UniversityofTexas model: ADCIRC modification_date: 2018-06-20 14:26:00 -05:00 nbfr: 8 ncor: 1 nolibf: 1 nolica: 0 nolicat: 0 nolifa: 2 nramp: 2 ntif: 8 ntip: 1 nwp: 8 nws: 12 references: one reftim: 0.0 rnday: 10.0 rundes: Hurricane Ike, 9 million node mesh runid: padcirc 46.28sb sfea0: 29.000000000000004 slam0: 265.5 source: PADCIRC statim: 0.0 tau0: -3.0 title: Scheibe version: 54.dev
ds['zeta']
<xarray.DataArray 'zeta' (time: 720, node: 9228245)> dask.array<shape=(720, 9228245), dtype=float64, chunksize=(10, 141973)> Coordinates: * time (time) datetime64[ns] 2008-09-05T12:00:00 2008-09-05T12:10:00 ... x (node) float64 dask.array<shape=(9228245,), chunksize=(141973,)> y (node) float64 dask.array<shape=(9228245,), chunksize=(141973,)> Dimensions without coordinates: node Attributes: location: node long_name: water surface elevation above geoid mesh: adcirc_mesh standard_name: sea_surface_height_above_geoid units: m
ds['zeta'].nbytes/1.e9
53.1546912
ds['zeta'].max(dim='time')
<xarray.DataArray 'zeta' (node: 9228245)> dask.array<shape=(9228245,), dtype=float64, chunksize=(141973,)> Coordinates: x (node) float64 dask.array<shape=(9228245,), chunksize=(141973,)> y (node) float64 dask.array<shape=(9228245,), chunksize=(141973,)> Dimensions without coordinates: node
max_var = ds['zeta'].max(dim='time').persist()
progress(max_var)
VBox()
import numpy as np
import datashader as dshade
import holoviews as hv
import geoviews as gv
import cartopy.crs as ccrs
from holoviews.operation.datashader import datashade, rasterize
from colorcet import cm_n
from matplotlib.cm import jet
datashade.precompute = True
hv.extension('bokeh')
%opts Image RGB VectorField [width=800 height=600]
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
v = np.vstack((ds['x'], ds['y'], max_var)).T
verts = pd.DataFrame(v, columns=['x','y','z'])
points = gv.operation.project_points(gv.Points(verts, vdims=['z']))
tris = pd.DataFrame(ds['element'].values.astype('int')-1, columns=['v0','v1','v2'])
tiles = gv.WMTS('https://maps.wikimedia.org/osm-intl/{Z}/{X}/{Y}@2x.png')
value = 'max water level'
label = '{} (m)'.format(value)
trimesh = gv.TriMesh((tris, points), label=label)
%%opts Image [colorbar=True] (cmap=jet)
meshes = rasterize(trimesh,aggregator=dshade.mean('z'))
tiles * meshes
# find the indices of the points in (x,y) closest to the points in (xi,yi)
def nearxy(x,y,xi,yi):
ind = np.ones(len(xi),dtype=int)
for i in range(len(xi)):
dist = np.sqrt((x-xi[i])**2+(y-yi[i])**2)
ind[i] = dist.argmin()
return ind
#just offshore of Galveston
lat = 29.2329856
lon = -95.1535041
ind = nearxy(ds['x'].values,ds['y'].values,[lon], [lat])
%%time
ds['zeta'][:,ind].plot()
CPU times: user 692 ms, sys: 96 ms, total: 788 ms Wall time: 2.86 s
[<matplotlib.lines.Line2D at 0x7fea441770f0>]