An udpate for PyData NYC 2019.
import xarray as xr
xr.__version__
'0.14.0+19.gba48fbcd'
xr.set_options(display_style="html")
xr.tutorial.load_dataset('rasm').chunk()
array([cftime.DatetimeNoLeap(1980, 9, 16, 12, 0, 0, 0, 5, 259), cftime.DatetimeNoLeap(1980, 10, 17, 0, 0, 0, 0, 1, 290), cftime.DatetimeNoLeap(1980, 11, 16, 12, 0, 0, 0, 3, 320), cftime.DatetimeNoLeap(1980, 12, 17, 0, 0, 0, 0, 6, 351), cftime.DatetimeNoLeap(1981, 1, 17, 0, 0, 0, 0, 2, 17), cftime.DatetimeNoLeap(1981, 2, 15, 12, 0, 0, 0, 3, 46), cftime.DatetimeNoLeap(1981, 3, 17, 0, 0, 0, 0, 5, 76), cftime.DatetimeNoLeap(1981, 4, 16, 12, 0, 0, 0, 0, 106), cftime.DatetimeNoLeap(1981, 5, 17, 0, 0, 0, 0, 3, 137), cftime.DatetimeNoLeap(1981, 6, 16, 12, 0, 0, 0, 5, 167), cftime.DatetimeNoLeap(1981, 7, 17, 0, 0, 0, 0, 1, 198), cftime.DatetimeNoLeap(1981, 8, 17, 0, 0, 0, 0, 4, 229), cftime.DatetimeNoLeap(1981, 9, 16, 12, 0, 0, 0, 6, 259), cftime.DatetimeNoLeap(1981, 10, 17, 0, 0, 0, 0, 2, 290), cftime.DatetimeNoLeap(1981, 11, 16, 12, 0, 0, 0, 4, 320), cftime.DatetimeNoLeap(1981, 12, 17, 0, 0, 0, 0, 0, 351), cftime.DatetimeNoLeap(1982, 1, 17, 0, 0, 0, 0, 3, 17), cftime.DatetimeNoLeap(1982, 2, 15, 12, 0, 0, 0, 4, 46), cftime.DatetimeNoLeap(1982, 3, 17, 0, 0, 0, 0, 6, 76), cftime.DatetimeNoLeap(1982, 4, 16, 12, 0, 0, 0, 1, 106), cftime.DatetimeNoLeap(1982, 5, 17, 0, 0, 0, 0, 4, 137), cftime.DatetimeNoLeap(1982, 6, 16, 12, 0, 0, 0, 6, 167), cftime.DatetimeNoLeap(1982, 7, 17, 0, 0, 0, 0, 2, 198), cftime.DatetimeNoLeap(1982, 8, 17, 0, 0, 0, 0, 5, 229), cftime.DatetimeNoLeap(1982, 9, 16, 12, 0, 0, 0, 0, 259), cftime.DatetimeNoLeap(1982, 10, 17, 0, 0, 0, 0, 3, 290), cftime.DatetimeNoLeap(1982, 11, 16, 12, 0, 0, 0, 5, 320), cftime.DatetimeNoLeap(1982, 12, 17, 0, 0, 0, 0, 1, 351), cftime.DatetimeNoLeap(1983, 1, 17, 0, 0, 0, 0, 4, 17), cftime.DatetimeNoLeap(1983, 2, 15, 12, 0, 0, 0, 5, 46), cftime.DatetimeNoLeap(1983, 3, 17, 0, 0, 0, 0, 0, 76), cftime.DatetimeNoLeap(1983, 4, 16, 12, 0, 0, 0, 2, 106), cftime.DatetimeNoLeap(1983, 5, 17, 0, 0, 0, 0, 5, 137), cftime.DatetimeNoLeap(1983, 6, 16, 12, 0, 0, 0, 0, 167), cftime.DatetimeNoLeap(1983, 7, 17, 0, 0, 0, 0, 3, 198), cftime.DatetimeNoLeap(1983, 8, 17, 0, 0, 0, 0, 6, 229)], dtype=object)
|
|
|
import sparse
coords = [[0, 1, 2, 3, 4],
[0, 1, 2, 3, 4]]
data = [10, 20, 30, 40, 50]
s = sparse.COO(coords, data, shape=(5, 5))
das = xr.DataArray(s, dims=['lat', 'lon'])
das
<COO: shape=(5, 5), dtype=int64, nnz=5, fill_value=0>
das.mean(dim='lon')
<COO: shape=(5,), dtype=float64, nnz=5, fill_value=0.0>
Put it inside a dask array.
das.chunk()
|
Create sparse array from pandas multiindex.
import pandas as pd
import numpy as np
tuples = [('a', 0), ('a', 2), ('b', 1), ('c', 3)]
index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second'])
s = pd.Series(np.zeros(len(index)), index=index)
s
first second a 0 0.0 2 0.0 b 1 0.0 c 3 0.0 dtype: float64
xr.DataArray.from_series(s)
array([[ 0., nan, 0., nan], [nan, 0., nan, nan], [nan, nan, nan, 0.]])
array(['a', 'b', 'c'], dtype=object)
array([0, 1, 2, 3])
das = xr.DataArray.from_series(s, sparse=True)
das
<COO: shape=(3, 4), dtype=float64, nnz=4, fill_value=nan>
array(['a', 'b', 'c'], dtype=object)
array([0, 1, 2, 3])
das.sel(first='a', second=0).data.todense()
array(0.)
Hypothetically should work for cupy
arrays, pint
arrays, etc.
day = np.arange(0, 360*10)
ds = xr.Dataset(coords={'time': ('time', day,
{'units': 'days since 4000-01-01',
'calendar': '360_day'})})
ds = xr.decode_cf(ds)
ds
array([cftime.Datetime360Day(4000, 1, 1, 0, 0, 0, 0, 2, 1), cftime.Datetime360Day(4000, 1, 2, 0, 0, 0, 0, 3, 2), cftime.Datetime360Day(4000, 1, 3, 0, 0, 0, 0, 4, 3), ..., cftime.Datetime360Day(4009, 12, 28, 0, 0, 0, 0, 1, 358), cftime.Datetime360Day(4009, 12, 29, 0, 0, 0, 0, 2, 359), cftime.Datetime360Day(4009, 12, 30, 0, 0, 0, 0, 3, 360)], dtype=object)
ds.indexes
time: CFTimeIndex([4000-01-01 00:00:00, 4000-01-02 00:00:00, 4000-01-03 00:00:00, 4000-01-04 00:00:00, 4000-01-05 00:00:00, 4000-01-06 00:00:00, 4000-01-07 00:00:00, 4000-01-08 00:00:00, 4000-01-09 00:00:00, 4000-01-10 00:00:00, ... 4009-12-21 00:00:00, 4009-12-22 00:00:00, 4009-12-23 00:00:00, 4009-12-24 00:00:00, 4009-12-25 00:00:00, 4009-12-26 00:00:00, 4009-12-27 00:00:00, 4009-12-28 00:00:00, 4009-12-29 00:00:00, 4009-12-30 00:00:00], dtype='object', name='time', length=3600)
ds.groupby('time.month')
DatasetGroupBy, grouped over 'month' 12 groups with labels 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12.