%matplotlib inline
from dask.distributed import Client, progress, LocalCluster
from dask_kubernetes import KubeCluster
import xarray as xr
import gcsfs
import numpy as np
Start a dask cluster to crunch the data
import dask
import os
dask.config.config['kubernetes']['worker-template']['spec']['containers'][0]['image'] = os.environ['JUPYTER_IMAGE_SPEC']
!env | grep JUPYTER
JUPYTERHUB_HOST= JUPYTERHUB_USER=rsignell-usgs-adcirc_cloud-sdjbl71r JUPYTERHUB_API_URL=http://10.31.254.204:8081/hub/api JUPYTERHUB_OAUTH_CALLBACK_URL=/user/rsignell-usgs-adcirc_cloud-sdjbl71r/oauth_callback JUPYTER_IMAGE_SPEC=gcr.io/pangeo-181919/pangeo-binderrsignell-2dusgs-2dadcirc-5fcloud-ef2345:cf56eda50c89f5030fee8ecc8c50963838daae38 JUPYTERHUB_CLIENT_ID=jupyterhub-user-rsignell-usgs-adcirc_cloud-sdjbl71r JUPYTERHUB_ADMIN_ACCESS=1 JUPYTERHUB_SERVICE_PREFIX=/user/rsignell-usgs-adcirc_cloud-sdjbl71r/ JUPYTERHUB_API_TOKEN=654df3f104c24222aff466c56bc42a91 JUPYTERHUB_BASE_URL=/
!conda list kubernetes
# packages in environment at /srv/conda: # # Name Version Build Channel dask-kubernetes 0.4.0 py_0 conda-forge kubernetes 1.11.2 h81701ea_0 conda-forge python-kubernetes 4.0.0 py36_1 conda-forge
cluster = KubeCluster(n_workers=10)
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-5-6c80d44e9da6> in <module>() ----> 1 cluster = KubeCluster(n_workers=10) /srv/conda/lib/python3.6/site-packages/dask_kubernetes/core.py in __init__(self, pod_template, name, namespace, n_workers, host, port, env, **kwargs) 181 **os.environ) 182 --> 183 self.pod_template = clean_pod_template(pod_template) 184 # Default labels that can't be overwritten 185 self.pod_template.metadata.labels['dask.pydata.org/cluster-name'] = name /srv/conda/lib/python3.6/site-packages/dask_kubernetes/objects.py in clean_pod_template(pod_template) 193 'If trying to pass a dictionary specification then use ' 194 'KubeCluster.from_dict') --> 195 raise TypeError(msg % str(pod_template)) 196 197 pod_template = copy.deepcopy(pod_template) TypeError: Expected a kubernetes.client.V1Pod object, got {'metadata': None, 'spec': {'restartPolicy': 'Never', 'containers': [{'args': ['dask-worker', '--nthreads', '2', '--no-bokeh', '--memory-limit', '6GB', '--death-timeout', '60'], 'image': 'gcr.io/pangeo-181919/pangeo-binderrsignell-2dusgs-2dadcirc-5fcloud-ef2345:cf56eda50c89f5030fee8ecc8c50963838daae38', 'name': 'dask-worker', 'resources': {'limits': {'cpu': '1.75', 'memory': '2G'}, 'requests': {'cpu': '1.75', 'memory': '2G'}}}]}}If trying to pass a dictionary specification then use KubeCluster.from_dict
cluster
client = Client(cluster)
fs = gcsfs.GCSFileSystem(token=None)
gcsmap = gcsfs.mapping.GCSMap('pangeo-data/rsignell/adcirc_test01', gcs=fs, check=False, create=False)
ds = xr.open_zarr(gcsmap)
ds
ds['zeta']
ds['zeta'].nbytes/1.e9
ds['zeta'].max(dim='time')
max_var = ds['zeta'].max(dim='time').persist()
progress(max_var)
import numpy as np
import datashader as dshade
import holoviews as hv
import geoviews as gv
import cartopy.crs as ccrs
from holoviews.operation.datashader import datashade, rasterize
from colorcet import cm_n
from matplotlib.cm import jet
datashade.precompute = True
hv.extension('bokeh')
%opts Image RGB VectorField [width=800 height=600]
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
v = np.vstack((ds['x'], ds['y'], max_var)).T
verts = pd.DataFrame(v, columns=['x','y','z'])
points = gv.operation.project_points(gv.Points(verts, vdims=['z']))
tris = pd.DataFrame(ds['element'].values.astype('int')-1, columns=['v0','v1','v2'])
tiles = gv.WMTS('https://maps.wikimedia.org/osm-intl/{Z}/{X}/{Y}@2x.png')
value = 'max water level'
label = '{} (m)'.format(value)
trimesh = gv.TriMesh((tris, points), label=label)
%%opts Image [colorbar=True] (cmap=jet)
meshes = rasterize(trimesh,aggregator=dshade.mean('z'))
tiles * meshes
# find the indices of the points in (x,y) closest to the points in (xi,yi)
def nearxy(x,y,xi,yi):
ind = np.ones(len(xi),dtype=int)
for i in range(len(xi)):
dist = np.sqrt((x-xi[i])**2+(y-yi[i])**2)
ind[i] = dist.argmin()
return ind
#just offshore of Galveston
lat = 29.2329856
lon = -95.1535041
ind = nearxy(ds['x'].values,ds['y'].values,[lon], [lat])
%%time
ds['zeta'][:,ind].plot()