Process repeat drone-derived elevation maps in Sandwich, MA using Xarray to subset the same region of interest in each map, and using Dask distributed to process in parallel. Data was collected by Chris Sherwood (csherwood@usgs.gov) and is for demonstration only (not yet published).
%matplotlib inline
import json
import numpy as np
import pandas as pd
from IPython.display import display, HTML
import matplotlib.pyplot as plt
import xarray as xr
from dask.distributed import Client, progress, LocalCluster
pangeo = 'USGS-HPC-YETI'
if pangeo=='ESIP-AWS-S3':
fdir = 's3://rsignell/crs_maps'
from dask_kubernetes import KubeCluster
cluster = KubeCluster.from_yaml('/home/jovyan/worker-template.yaml')
cluster.scale(10);
if pangeo=='USGS-HPC-YETI':
fdir = '/lustre/projects/hazards/cmgp/woodshole/rsignell/crs_maps'
from dask_jobqueue import SLURMCluster
import os
cluster = SLURMCluster(processes=4, threads=1, memory='16GB',
project='woodshole', walltime='01:00:00', queue='normal',
interface='ib0', local_directory=os.getenv('TMPDIR','/tmp'))
cluster.start_workers(10)
cluster
VBox(children=(HTML(value='<h2>SLURMCluster</h2>'), HBox(children=(HTML(value='\n<div>\n <style scoped>\n …
client = Client(cluster)
client
Client
|
Cluster
|
# list of files to read
fnames = (\
"2016-01-22_SandwichTNB_PT_DEM_10cm_trimmed.tif",\
"2016-01-25_SandwichTNB_DEM_10cm_trimmed.tif",\
"2016-02-11_SandwichTNB_DEM_10cm_trimmed.tif",\
"2016-03-30_SandwichTNB_AS_DEM_10cm_trimmed.tif",\
"2016-09-21_SandwichTNB_DEM_10cm_trimmed.tif",\
"2017-01-09_SandwichTNB_DEM_10cm_trimmed.tif",\
"2017-01-25_SandwichTNB_DEM_10cm_trimmed.tif",\
"2017-02-14_SandwichTNB_DEM_10cm_trimmed.tif",\
"2017-03-16_SandwichTNB_DEM_10cm_trimmed.tif",\
"2017-04-28_SandwichTNB_DEM_10cm_trimmed.tif",\
"2017-05-04_SandwichTNB_DEM_10cm_trimmed.tif",\
"2017-09-18_SandwichTNB_DEM_10cm_trimmed.tif",\
#"2018-01-03_SandwichTNB_DEM_10cm_trimmed.tif",\ # needs to be padded...not very good control
"2018-01-10_SandwichTNB_DEM_10cm_trimmed.tif",\
"DEM_10cm_selfcal_lokicampos_mid_denseNAD83UTM19.tif")
# list of dates for use as titles / column headers
titles = ([\
"22-Jan-2016",\
"25-Jan-2016",\
"11-Feb-2016",\
"30-Mar-2016",\
"21-Sep-2016",\
"09-Jan-2017",\
"25-Jan-2017",\
"14-Feb-2017",\
"16-Mar-2017",\
"28-Apr-2017",\
"04-May_2017",\
"18-Sep-2017",\
# "03-Jan-2017",\
"10-Jan-2018",\
"09-Mar-2018"]
)
Each geotiff covers a different region, but all files have coordinates in NAD83 UTM Zone 19N meters.
# top left corner of region of interest (UTM Zone 19N meters)
e0 = 376488.; n0 = 4625200.
# size of region in meters
xsize, ysize = 800., 680.
e1 = e0 + xsize
n1 = n0 - ysize
dslist =[]
for fname in fnames:
ds = xr.open_rasterio(f'{fdir}/{fname}',chunks={'band':1, 'y':6800, 'x':8000})
ds = ds.sel(x=slice(e0,e1), y=slice(n0,n1))
if len(dslist) == 0:
xc = ds['x']-ds['x'].values.min()
yc = ds['y']-ds['y'].values.min()
ds['x'] = xc
ds['y'] = yc
dslist.append(ds)
dsa = xr.concat(dslist, dim='band')
dsa
<xarray.DataArray (band: 14, y: 6800, x: 8000)> dask.array<shape=(14, 6800, 8000), dtype=float32, chunksize=(1, 1336, 1080)> Coordinates: * y (y) float64 679.9 679.8 679.7 679.6 679.5 679.4 679.3 679.2 ... * x (x) float64 0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 ... * band (band) int64 1 1 1 1 1 1 1 1 1 1 1 1 1 1 Attributes: crs: +init=epsg:6348 is_tiled: 1 nodatavals: (-32767.0,)
dsa.mean(dim='band')
<xarray.DataArray (y: 6800, x: 8000)> dask.array<shape=(6800, 8000), dtype=float32, chunksize=(1336, 1080)> Coordinates: * y (y) float64 679.9 679.8 679.7 679.6 679.5 679.4 679.3 679.2 ... * x (x) float64 0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 ...
%%time
zmean = dsa.mean(dim='band').persist()
CPU times: user 2.24 s, sys: 66 ms, total: 2.3 s Wall time: 2.26 s
%%time
zmean.plot.imshow(vmin=-2, vmax=8);
CPU times: user 20.1 s, sys: 8.2 s, total: 28.3 s Wall time: 58 s
<matplotlib.image.AxesImage at 0x7f679d9b5f28>
%%time
# calculate the range
zrange = (dsa.max(dim='band')-dsa.min(dim='band'))
CPU times: user 262 ms, sys: 13 ms, total: 275 ms Wall time: 267 ms
%%time
zrange[::10,::10].plot.imshow(vmin=0, vmax=5);
CPU times: user 32.5 s, sys: 6.17 s, total: 38.6 s Wall time: 38.6 s
<matplotlib.image.AxesImage at 0x7f67e009a828>