Process repeat drone-derived elevation maps in Sandwich, MA using Xarray to subset the same region of interest in each map, and using Dask distributed to process in parallel. Data was collected by Chris Sherwood (csherwood@usgs.gov) and is for demonstration only (not yet published).
%matplotlib inline
import json
import numpy as np
import pandas as pd
from IPython.display import display, HTML
import matplotlib.pyplot as plt
import xarray as xr
import os
from dask.distributed import Client, progress, LocalCluster
pangeo = 'USGS-HPC-YETI'
if pangeo=='ESIP-AWS-S3':
# use direct access from s3 if credentials exist, otherwise use public HTTPs address
if os.environ.get('AWS_ACCESS_KEY_ID') and os.environ.get('AWS_SECRET_ACCESS_KEY'):
fdir = 's3://rsignell/crs_maps'
else:
fdir = 'https://rsignell.s3.amazonaws.com/crs_maps'
from dask_kubernetes import KubeCluster
cluster = KubeCluster.from_yaml('/home/jovyan/worker-template.yaml')
cluster.scale(10);
if pangeo=='USGS-HPC-YETI':
fdir = '/lustre/projects/hazards/cmgp/woodshole/rsignell/crs_maps'
from dask_jobqueue import SLURMCluster
import os
cluster = SLURMCluster(processes=4, threads=1, memory='16GB',
project='woodshole', walltime='01:00:00', queue='normal',
interface='ib0', local_directory=os.getenv('TMPDIR','/tmp'))
cluster.start_workers(10)
cluster
VBox(children=(HTML(value='<h2>SLURMCluster</h2>'), HBox(children=(HTML(value='\n<div>\n <style scoped>\n …
client = Client(cluster)
client
Client
|
Cluster
|
# list of files to read
fnames = (\
"2016-01-22_SandwichTNB_PT_DEM_10cm_trimmed.tif",\
"2016-01-25_SandwichTNB_DEM_10cm_trimmed.tif",\
"2016-02-11_SandwichTNB_DEM_10cm_trimmed.tif",\
"2016-03-30_SandwichTNB_AS_DEM_10cm_trimmed.tif",\
"2016-09-21_SandwichTNB_DEM_10cm_trimmed.tif",\
"2017-01-09_SandwichTNB_DEM_10cm_trimmed.tif",\
"2017-01-25_SandwichTNB_DEM_10cm_trimmed.tif",\
"2017-02-14_SandwichTNB_DEM_10cm_trimmed.tif",\
"2017-03-16_SandwichTNB_DEM_10cm_trimmed.tif",\
"2017-04-28_SandwichTNB_DEM_10cm_trimmed.tif",\
"2017-05-04_SandwichTNB_DEM_10cm_trimmed.tif",\
"2017-09-18_SandwichTNB_DEM_10cm_trimmed.tif",\
#"2018-01-03_SandwichTNB_DEM_10cm_trimmed.tif",\ # needs to be padded...not very good control
"2018-01-10_SandwichTNB_DEM_10cm_trimmed.tif",\
"DEM_10cm_selfcal_lokicampos_mid_denseNAD83UTM19.tif")
# list of dates for use as titles / column headers
titles = ([\
"22-Jan-2016",\
"25-Jan-2016",\
"11-Feb-2016",\
"30-Mar-2016",\
"21-Sep-2016",\
"09-Jan-2017",\
"25-Jan-2017",\
"14-Feb-2017",\
"16-Mar-2017",\
"28-Apr-2017",\
"04-May_2017",\
"18-Sep-2017",\
# "03-Jan-2017",\
"10-Jan-2018",\
"09-Mar-2018"]
)
Each geotiff covers a different region, but all files have coordinates in NAD83 UTM Zone 19N meters.
# top left corner of region of interest (UTM Zone 19N meters)
e0 = 376488.; n0 = 4625200.
# size of region in meters
xsize, ysize = 800., 680.
e1 = e0 + xsize
n1 = n0 - ysize
dslist =[]
for fname in fnames:
ds = xr.open_rasterio(f'{fdir}/{fname}',chunks={'band':1, 'y':6800, 'x':8000})
ds = ds.sel(x=slice(e0,e1), y=slice(n0,n1))
if len(dslist) == 0:
xc = ds['x']-ds['x'].values.min()
yc = ds['y']-ds['y'].values.min()
ds['x'] = xc
ds['y'] = yc
dslist.append(ds)
dsa = xr.concat(dslist, dim='band')
dsa
<xarray.DataArray (band: 14, y: 6800, x: 8000)> dask.array<shape=(14, 6800, 8000), dtype=float32, chunksize=(1, 1336, 1080)> Coordinates: * y (y) float64 679.9 679.8 679.7 679.6 679.5 679.4 679.3 679.2 ... * x (x) float64 0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 ... * band (band) int64 1 1 1 1 1 1 1 1 1 1 1 1 1 1 Attributes: crs: +init=epsg:6348 is_tiled: 1 nodatavals: (-32767.0,)
dsa.mean(dim='band')
<xarray.DataArray (y: 6800, x: 8000)> dask.array<shape=(6800, 8000), dtype=float32, chunksize=(1336, 1080)> Coordinates: * y (y) float64 679.9 679.8 679.7 679.6 679.5 679.4 679.3 679.2 ... * x (x) float64 0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 ...
%%time
zmean = dsa.mean(dim='band').persist()
CPU times: user 1.52 s, sys: 48 ms, total: 1.57 s Wall time: 1.54 s
progress(zmean)
VBox()
%%time
zmean.plot.imshow(vmin=-2, vmax=8);
CPU times: user 22.6 s, sys: 4.61 s, total: 27.2 s Wall time: 29.8 s
<matplotlib.image.AxesImage at 0x7f53e40cfb00>
%%time
# calculate the range
zrange = (dsa.max(dim='band')-dsa.min(dim='band'))
CPU times: user 504 ms, sys: 10 ms, total: 514 ms Wall time: 504 ms
%%time
zrange[::10,::10].plot.imshow(vmin=0, vmax=5);
CPU times: user 36.3 s, sys: 3.41 s, total: 39.7 s Wall time: 42.8 s
<matplotlib.image.AxesImage at 0x7f53e786e668>
tornado.application - ERROR - Exception in callback functools.partial(<function wrap.<locals>.null_wrapper at 0x7f54440b8e18>, <Future finished exception=OSError("Timed out trying to connect to 'tcp://10.12.0.20:39289' after 3 s: connect() didn't finish in time",)>) Traceback (most recent call last): File "/home/rsignell/miniconda3/envs/pangeo/lib/python3.6/site-packages/distributed/comm/core.py", line 187, in connect quiet_exceptions=EnvironmentError) File "/home/rsignell/miniconda3/envs/pangeo/lib/python3.6/site-packages/tornado/gen.py", line 1099, in run value = future.result() tornado.util.TimeoutError: Timeout During handling of the above exception, another exception occurred: Traceback (most recent call last): File "/home/rsignell/miniconda3/envs/pangeo/lib/python3.6/site-packages/tornado/ioloop.py", line 759, in _run_callback ret = callback() File "/home/rsignell/miniconda3/envs/pangeo/lib/python3.6/site-packages/tornado/stack_context.py", line 276, in null_wrapper return fn(*args, **kwargs) File "/home/rsignell/miniconda3/envs/pangeo/lib/python3.6/site-packages/tornado/ioloop.py", line 780, in _discard_future_result future.result() File "/home/rsignell/miniconda3/envs/pangeo/lib/python3.6/site-packages/tornado/gen.py", line 1107, in run yielded = self.gen.throw(*exc_info) File "/home/rsignell/miniconda3/envs/pangeo/lib/python3.6/site-packages/distributed/diagnostics/progressbar.py", line 189, in listen self.comm = yield connect(self.scheduler) File "/home/rsignell/miniconda3/envs/pangeo/lib/python3.6/site-packages/tornado/gen.py", line 1099, in run value = future.result() File "/home/rsignell/miniconda3/envs/pangeo/lib/python3.6/site-packages/tornado/gen.py", line 1107, in run yielded = self.gen.throw(*exc_info) File "/home/rsignell/miniconda3/envs/pangeo/lib/python3.6/site-packages/distributed/comm/core.py", line 196, in connect _raise(error) File "/home/rsignell/miniconda3/envs/pangeo/lib/python3.6/site-packages/distributed/comm/core.py", line 179, in _raise raise IOError(msg) OSError: Timed out trying to connect to 'tcp://10.12.0.20:39289' after 3 s: connect() didn't finish in time