This notebook explores regridding the PFT variables as sparse arrays using the pydata/sparse
package.
Inspired by PFT-Gridding.ipynb
%matplotlib inline
import numpy as np
import xarray as xr
from ctsm_py import utils
datadir = "/glade/p/cgd/tss/people/dll/TRENDY2019_History/"
sim = "S0_control/"
datadir = datadir + sim
simname = "TRENDY2019_S0_control_v2.clm2.h1."
var = "GPP"
years = "170001-201812"
maxval = "True"
print(datadir + simname + var + "." + years + ".nc")
/glade/p/cgd/tss/people/dll/TRENDY2019_History/S0_control/TRENDY2019_S0_control_v2.clm2.h1.GPP.170001-201812.nc
# This is an example copied from Dan's script -- helps to read in multiple variables
# dir =
# sim =
# pref = 'lnd/proc/tseries/month_1'
# suff = ".clm2.h0."
# variables = [" "]
# pattern = dir + sim + proc + pref + '{var}' + suff
# files = [pattern.format(var=var) for var in variables]
# for multiple files, use xr.open_mfdataset; dan also uses utils.time_set_mid to make the time dims work properly
# 365*utils.weighted_annual_mean --> weights by days/month
# timeslice: ix_time = (ds['time.year']>1963) & (ds['time.year']<2014) # note that dan's dataset is 'ds'
# plt.subplot(121) #-->1 row, 2 plots, plot 1
# signal.detrend (?)
data1 = utils.time_set_mid(
xr.open_dataset(
datadir + simname + var + "." + years + ".nc",
decode_times=True,
chunks={"time": 100},
),
"time",
)
data1
<xarray.Dataset> Dimensions: (levgrnd: 25, levlak: 10, levdcmp: 25, lon: 288, lat: 192, gridcell: 21013, landunit: 48359, column: 111429, pft: 166408, time: 3828, hist_interval: 2) Coordinates: * levgrnd (levgrnd) float32 0.01 0.04 0.09 ... 19.48 28.87 42.0 * levlak (levlak) float32 0.05 0.6 2.1 4.6 ... 25.6 34.33 44.78 * levdcmp (levdcmp) float32 0.01 0.04 0.09 ... 19.48 28.87 42.0 * lon (lon) float32 0.0 1.25 2.5 3.75 ... 356.2 357.5 358.8 * lat (lat) float32 -90.0 -89.06 -88.12 ... 88.12 89.06 90.0 * time (time) object 1700-01-16 11:44:59.999993 ... 2018-12-... Dimensions without coordinates: gridcell, landunit, column, pft, hist_interval Data variables: (12/51) area (lat, lon) float32 dask.array<chunksize=(192, 288), meta=np.ndarray> landfrac (lat, lon) float32 dask.array<chunksize=(192, 288), meta=np.ndarray> landmask (lat, lon) float64 dask.array<chunksize=(192, 288), meta=np.ndarray> pftmask (lat, lon) float64 dask.array<chunksize=(192, 288), meta=np.ndarray> nbedrock (lat, lon) float64 dask.array<chunksize=(192, 288), meta=np.ndarray> grid1d_lon (gridcell) float64 dask.array<chunksize=(21013,), meta=np.ndarray> ... ... mscur (time) float64 dask.array<chunksize=(100,), meta=np.ndarray> nstep (time) float64 dask.array<chunksize=(100,), meta=np.ndarray> time_bounds (time, hist_interval) object dask.array<chunksize=(100, 2), meta=np.ndarray> date_written (time) object dask.array<chunksize=(100,), meta=np.ndarray> time_written (time) object dask.array<chunksize=(100,), meta=np.ndarray> GPP (time, pft) float32 dask.array<chunksize=(100, 166408), meta=np.ndarray> Attributes: (12/102) title: CLM History file information comment: NOTE: None of the variables ar... Conventions: CF-1.0 history: created on 09/27/19 16:25:57 source: Community Terrestrial Systems ... hostname: cheyenne ... ... cft_irrigated_tropical_corn: 62 cft_tropical_soybean: 63 cft_irrigated_tropical_soybean: 64 time_period_freq: month_1 Time_constant_3Dvars_filename: ./TRENDY2019_S0_constant_v2.cl... Time_constant_3Dvars: ZSOI:DZSOI:WATSAT:SUCSAT:BSW:H...
array([1.000000e-02, 4.000000e-02, 9.000000e-02, 1.600000e-01, 2.600000e-01, 4.000000e-01, 5.800000e-01, 8.000000e-01, 1.060000e+00, 1.360000e+00, 1.700000e+00, 2.080000e+00, 2.500000e+00, 2.990000e+00, 3.580000e+00, 4.270000e+00, 5.060000e+00, 5.950000e+00, 6.940000e+00, 8.030000e+00, 9.795000e+00, 1.332777e+01, 1.948313e+01, 2.887072e+01, 4.199844e+01], dtype=float32)
array([ 0.05 , 0.6 , 2.1 , 4.6 , 8.1 , 12.6 , 18.6 , 25.6 , 34.325, 44.775], dtype=float32)
array([1.000000e-02, 4.000000e-02, 9.000000e-02, 1.600000e-01, 2.600000e-01, 4.000000e-01, 5.800000e-01, 8.000000e-01, 1.060000e+00, 1.360000e+00, 1.700000e+00, 2.080000e+00, 2.500000e+00, 2.990000e+00, 3.580000e+00, 4.270000e+00, 5.060000e+00, 5.950000e+00, 6.940000e+00, 8.030000e+00, 9.795000e+00, 1.332777e+01, 1.948313e+01, 2.887072e+01, 4.199844e+01], dtype=float32)
array([ 0. , 1.25, 2.5 , ..., 356.25, 357.5 , 358.75], dtype=float32)
array([-90. , -89.057594, -88.11518 , -87.172775, -86.23037 , -85.28796 , -84.34555 , -83.403145, -82.46073 , -81.518326, -80.57591 , -79.63351 , -78.6911 , -77.74869 , -76.80628 , -75.86388 , -74.92146 , -73.97906 , -73.03665 , -72.09424 , -71.15183 , -70.20943 , -69.26701 , -68.32461 , -67.3822 , -66.43979 , -65.49738 , -64.55498 , -63.612564, -62.67016 , -61.72775 , -60.78534 , -59.842934, -58.900524, -57.958115, -57.015705, -56.0733 , -55.13089 , -54.18848 , -53.246075, -52.303665, -51.361256, -50.41885 , -49.47644 , -48.53403 , -47.59162 , -46.649216, -45.706806, -44.764397, -43.82199 , -42.87958 , -41.937172, -40.994766, -40.052357, -39.109947, -38.167538, -37.225132, -36.282722, -35.340313, -34.397907, -33.455498, -32.51309 , -31.57068 , -30.628273, -29.685863, -28.743456, -27.801046, -26.858639, -25.916231, -24.973822, -24.031414, -23.089005, -22.146597, -21.20419 , -20.26178 , -19.319372, -18.376963, -17.434555, -16.492147, -15.549738, -14.607329, -13.664922, -12.722513, -11.780105, -10.837696, -9.895288, -8.95288 , -8.010471, -7.068063, -6.125654, -5.183246, -4.240838, -3.298429, -2.356021, -1.413613, -0.471204, 0.471204, 1.413613, 2.356021, 3.298429, 4.240838, 5.183246, 6.125654, 7.068063, 8.010471, 8.95288 , 9.895288, 10.837696, 11.780105, 12.722513, 13.664922, 14.607329, 15.549738, 16.492147, 17.434555, 18.376963, 19.319372, 20.26178 , 21.20419 , 22.146597, 23.089005, 24.031414, 24.973822, 25.916231, 26.858639, 27.801046, 28.743456, 29.685863, 30.628273, 31.57068 , 32.51309 , 33.455498, 34.397907, 35.340313, 36.282722, 37.225132, 38.167538, 39.109947, 40.052357, 40.994766, 41.937172, 42.87958 , 43.82199 , 44.764397, 45.706806, 46.649216, 47.59162 , 48.53403 , 49.47644 , 50.41885 , 51.361256, 52.303665, 53.246075, 54.18848 , 55.13089 , 56.0733 , 57.015705, 57.958115, 58.900524, 59.842934, 60.78534 , 61.72775 , 62.67016 , 63.612564, 64.55498 , 65.49738 , 66.43979 , 67.3822 , 68.32461 , 69.26701 , 70.20943 , 71.15183 , 72.09424 , 73.03665 , 73.97906 , 74.92146 , 75.86388 , 76.80628 , 77.74869 , 78.6911 , 79.63351 , 80.57591 , 81.518326, 82.46073 , 83.403145, 84.34555 , 85.28796 , 86.23037 , 87.172775, 88.11518 , 89.057594, 90. ], dtype=float32)
array([cftime.DatetimeNoLeap(1700, 1, 16, 11, 44, 59, 999993), cftime.DatetimeNoLeap(1700, 2, 15, 0, 0, 0, 0), cftime.DatetimeNoLeap(1700, 3, 16, 12, 0, 0, 0), ..., cftime.DatetimeNoLeap(2018, 10, 16, 12, 0, 0, 0), cftime.DatetimeNoLeap(2018, 11, 16, 0, 0, 0, 0), cftime.DatetimeNoLeap(2018, 12, 16, 12, 0, 0, 0)], dtype=object)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def to_sparse(data, vegtype, jxy, ixy, shape):
""" Takes an input numpy array and converts it to a sparse array."""
import itertools
import sparse
codes = zip(vegtype, jxy - 1, ixy - 1)
# some magic from https://github.com/pydata/xarray/pull/5577
# This constructs a list of coordinate locations at which data exists
# it works for arbitrary number of dimensions but assumes that the last dimension
# is the "stacked" dimension i.e. "pft"
if data.ndim == 1:
indexes = codes
else:
sizes = list(itertools.product(*[range(s) for s in data.shape[:-1]]))
tuple_indexes = itertools.product(sizes, codes)
indexes = map(lambda x: list(itertools.chain(*x)), tuple_indexes)
return sparse.COO(
coords=np.array(list(indexes)).T,
data=data.ravel(),
shape=data.shape[:-1] + shape,
)
def convert_pft_variables_to_sparse(dataset):
import sparse
import xarray as xr
# extract PFT variables
pfts = xr.Dataset({k: v for k, v in dataset.items() if "pft" in v.dims})
ixy = dataset.pfts1d_ixy.astype(int)
jxy = dataset.pfts1d_jxy.astype(int)
vegtype = dataset.pfts1d_itype_veg.astype(int)
npft = vegtype.max().load().item()
# expected shape of sparse arrays (excludes time)
output_sizes = {
"pft": npft + 1,
"lat": dataset.sizes["lat"],
"lon": dataset.sizes["lon"],
}
result = xr.Dataset()
for var in pfts:
result[var] = xr.apply_ufunc(
to_sparse,
pfts[var],
vegtype,
jxy,
ixy,
input_core_dims=[["pft"]] * 4,
output_core_dims=[["pft", "lat", "lon"]],
exclude_dims=set(("pft",)), # changes size
dask="parallelized",
kwargs=dict(shape=tuple(output_sizes.values())),
dask_gufunc_kwargs=dict(
meta=sparse.COO([]),
output_sizes=output_sizes,
), # lets dask know that we are changing from numpy to sparse
output_dtypes=[pfts[var].dtype],
)
# copy over coordinate variables lat, lon
result = result.update(dataset[["lat", "lon"]])
result["pft"] = np.arange(result.sizes["pft"])
return result
pfts = convert_pft_variables_to_sparse(data1)
pfts
<xarray.Dataset> Dimensions: (lat: 192, lon: 288, pft: 78, time: 3828) Coordinates: * lat (lat) float64 -90.0 -89.06 -88.12 ... 88.12 89.06 90.0 * lon (lon) float64 0.0 1.25 2.5 3.75 ... 356.2 357.5 358.8 * time (time) object 1700-01-16 11:44:59.999993 ... 2018-12-... * pft (pft) int64 0 1 2 3 4 5 6 7 ... 70 71 72 73 74 75 76 77 Data variables: (12/15) pfts1d_lon (pft, lat, lon) float64 dask.array<chunksize=(78, 192, 288), meta=np.ndarray> pfts1d_lat (pft, lat, lon) float64 dask.array<chunksize=(78, 192, 288), meta=np.ndarray> pfts1d_ixy (pft, lat, lon) float64 dask.array<chunksize=(78, 192, 288), meta=np.ndarray> pfts1d_jxy (pft, lat, lon) float64 dask.array<chunksize=(78, 192, 288), meta=np.ndarray> pfts1d_gi (pft, lat, lon) float64 dask.array<chunksize=(78, 192, 288), meta=np.ndarray> pfts1d_li (pft, lat, lon) float64 dask.array<chunksize=(78, 192, 288), meta=np.ndarray> ... ... pfts1d_wtcol (pft, lat, lon) float64 dask.array<chunksize=(78, 192, 288), meta=np.ndarray> pfts1d_itype_veg (pft, lat, lon) float64 dask.array<chunksize=(78, 192, 288), meta=np.ndarray> pfts1d_itype_col (pft, lat, lon) float64 dask.array<chunksize=(78, 192, 288), meta=np.ndarray> pfts1d_itype_lunit (pft, lat, lon) float64 dask.array<chunksize=(78, 192, 288), meta=np.ndarray> pfts1d_active (pft, lat, lon) float64 dask.array<chunksize=(78, 192, 288), meta=np.ndarray> GPP (time, pft, lat, lon) float64 dask.array<chunksize=(100, 78, 192, 288), meta=np.ndarray>
array([-90. , -89.057594, -88.115181, -87.172775, -86.23037 , -85.287956, -84.345551, -83.403145, -82.460732, -81.518326, -80.575912, -79.633507, -78.691101, -77.748688, -76.806282, -75.863876, -74.921463, -73.979057, -73.036652, -72.094238, -71.151833, -70.209427, -69.267014, -68.324608, -67.382202, -66.439789, -65.497383, -64.554977, -63.612564, -62.670158, -61.727749, -60.785339, -59.842934, -58.900524, -57.958115, -57.015705, -56.073299, -55.13089 , -54.18848 , -53.246075, -52.303665, -51.361256, -50.41885 , -49.47644 , -48.534031, -47.591621, -46.649216, -45.706806, -44.764397, -43.821991, -42.879581, -41.937172, -40.994766, -40.052357, -39.109947, -38.167538, -37.225132, -36.282722, -35.340313, -34.397907, -33.455498, -32.513088, -31.570681, -30.628273, -29.685863, -28.743456, -27.801046, -26.858639, -25.916231, -24.973822, -24.031414, -23.089005, -22.146597, -21.204189, -20.26178 , -19.319372, -18.376963, -17.434555, -16.492147, -15.549738, -14.607329, -13.664922, -12.722513, -11.780105, -10.837696, -9.895288, -8.95288 , -8.010471, -7.068063, -6.125654, -5.183246, -4.240838, -3.298429, -2.356021, -1.413613, -0.471204, 0.471204, 1.413613, 2.356021, 3.298429, 4.240838, 5.183246, 6.125654, 7.068063, 8.010471, 8.95288 , 9.895288, 10.837696, 11.780105, 12.722513, 13.664922, 14.607329, 15.549738, 16.492147, 17.434555, 18.376963, 19.319372, 20.26178 , 21.204189, 22.146597, 23.089005, 24.031414, 24.973822, 25.916231, 26.858639, 27.801046, 28.743456, 29.685863, 30.628273, 31.570681, 32.513088, 33.455498, 34.397907, 35.340313, 36.282722, 37.225132, 38.167538, 39.109947, 40.052357, 40.994766, 41.937172, 42.879581, 43.821991, 44.764397, 45.706806, 46.649216, 47.591621, 48.534031, 49.47644 , 50.41885 , 51.361256, 52.303665, 53.246075, 54.18848 , 55.13089 , 56.073299, 57.015705, 57.958115, 58.900524, 59.842934, 60.785339, 61.727749, 62.670158, 63.612564, 64.554977, 65.497383, 66.439789, 67.382202, 68.324608, 69.267014, 70.209427, 71.151833, 72.094238, 73.036652, 73.979057, 74.921463, 75.863876, 76.806282, 77.748688, 78.691101, 79.633507, 80.575912, 81.518326, 82.460732, 83.403145, 84.345551, 85.287956, 86.23037 , 87.172775, 88.115181, 89.057594, 90. ])
array([ 0. , 1.25, 2.5 , ..., 356.25, 357.5 , 358.75])
array([cftime.DatetimeNoLeap(1700, 1, 16, 11, 44, 59, 999993), cftime.DatetimeNoLeap(1700, 2, 15, 0, 0, 0, 0), cftime.DatetimeNoLeap(1700, 3, 16, 12, 0, 0, 0), ..., cftime.DatetimeNoLeap(2018, 10, 16, 12, 0, 0, 0), cftime.DatetimeNoLeap(2018, 11, 16, 0, 0, 0, 0), cftime.DatetimeNoLeap(2018, 12, 16, 12, 0, 0, 0)], dtype=object)
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
xarray cannot handle directly plotting a sparse variable (yet). This will be fixed soon; instead we manually "densify" to a numpy array using .copy(data=gpp.data.todense())
gpp = pfts.GPP.isel(time=3606).compute()
gpp = gpp.copy(data=gpp.data.todense())
gpp.isel(pft=1).plot()
<matplotlib.collections.QuadMesh at 0x2b4d7e8fc1c0>