Notebook

What's new in xarray?¶

An udpate for PyData NYC 2019.

In [1]:

import xarray as xr
xr.__version__

Out[1]:

'0.14.0+19.gba48fbcd'

Rich HTML repr¶

In [2]:

xr.set_options(display_style="html")
xr.tutorial.load_dataset('rasm').chunk()

Out[2]:

xarray.Dataset

Dimensions:
- time: 36
- x: 275
- y: 205

Coordinates: (3)

time

(time)

object

1980-09-16 12:00:00 ... 1983-08-17 00:00:00

long_name :: time
type_preferred :: int

array([cftime.DatetimeNoLeap(1980, 9, 16, 12, 0, 0, 0, 5, 259),
       cftime.DatetimeNoLeap(1980, 10, 17, 0, 0, 0, 0, 1, 290),
       cftime.DatetimeNoLeap(1980, 11, 16, 12, 0, 0, 0, 3, 320),
       cftime.DatetimeNoLeap(1980, 12, 17, 0, 0, 0, 0, 6, 351),
       cftime.DatetimeNoLeap(1981, 1, 17, 0, 0, 0, 0, 2, 17),
       cftime.DatetimeNoLeap(1981, 2, 15, 12, 0, 0, 0, 3, 46),
       cftime.DatetimeNoLeap(1981, 3, 17, 0, 0, 0, 0, 5, 76),
       cftime.DatetimeNoLeap(1981, 4, 16, 12, 0, 0, 0, 0, 106),
       cftime.DatetimeNoLeap(1981, 5, 17, 0, 0, 0, 0, 3, 137),
       cftime.DatetimeNoLeap(1981, 6, 16, 12, 0, 0, 0, 5, 167),
       cftime.DatetimeNoLeap(1981, 7, 17, 0, 0, 0, 0, 1, 198),
       cftime.DatetimeNoLeap(1981, 8, 17, 0, 0, 0, 0, 4, 229),
       cftime.DatetimeNoLeap(1981, 9, 16, 12, 0, 0, 0, 6, 259),
       cftime.DatetimeNoLeap(1981, 10, 17, 0, 0, 0, 0, 2, 290),
       cftime.DatetimeNoLeap(1981, 11, 16, 12, 0, 0, 0, 4, 320),
       cftime.DatetimeNoLeap(1981, 12, 17, 0, 0, 0, 0, 0, 351),
       cftime.DatetimeNoLeap(1982, 1, 17, 0, 0, 0, 0, 3, 17),
       cftime.DatetimeNoLeap(1982, 2, 15, 12, 0, 0, 0, 4, 46),
       cftime.DatetimeNoLeap(1982, 3, 17, 0, 0, 0, 0, 6, 76),
       cftime.DatetimeNoLeap(1982, 4, 16, 12, 0, 0, 0, 1, 106),
       cftime.DatetimeNoLeap(1982, 5, 17, 0, 0, 0, 0, 4, 137),
       cftime.DatetimeNoLeap(1982, 6, 16, 12, 0, 0, 0, 6, 167),
       cftime.DatetimeNoLeap(1982, 7, 17, 0, 0, 0, 0, 2, 198),
       cftime.DatetimeNoLeap(1982, 8, 17, 0, 0, 0, 0, 5, 229),
       cftime.DatetimeNoLeap(1982, 9, 16, 12, 0, 0, 0, 0, 259),
       cftime.DatetimeNoLeap(1982, 10, 17, 0, 0, 0, 0, 3, 290),
       cftime.DatetimeNoLeap(1982, 11, 16, 12, 0, 0, 0, 5, 320),
       cftime.DatetimeNoLeap(1982, 12, 17, 0, 0, 0, 0, 1, 351),
       cftime.DatetimeNoLeap(1983, 1, 17, 0, 0, 0, 0, 4, 17),
       cftime.DatetimeNoLeap(1983, 2, 15, 12, 0, 0, 0, 5, 46),
       cftime.DatetimeNoLeap(1983, 3, 17, 0, 0, 0, 0, 0, 76),
       cftime.DatetimeNoLeap(1983, 4, 16, 12, 0, 0, 0, 2, 106),
       cftime.DatetimeNoLeap(1983, 5, 17, 0, 0, 0, 0, 5, 137),
       cftime.DatetimeNoLeap(1983, 6, 16, 12, 0, 0, 0, 0, 167),
       cftime.DatetimeNoLeap(1983, 7, 17, 0, 0, 0, 0, 3, 198),
       cftime.DatetimeNoLeap(1983, 8, 17, 0, 0, 0, 0, 6, 229)], dtype=object)

(y, x)

float64

dask.array<chunksize=(205, 275), meta=np.ndarray>

long_name :: longitude of grid cell center
units :: degrees_east
bounds :: xv





  
      Array  Chunk 
  
  
     Bytes  451.00 kB   451.00 kB 
     Shape  (205, 275)   (205, 275) 
     Count  1 Tasks  1 Chunks 
     Type  float64  numpy.ndarray

(y, x)

float64

dask.array<chunksize=(205, 275), meta=np.ndarray>

long_name :: latitude of grid cell center
units :: degrees_north
bounds :: yv





  
      Array  Chunk 
  
  
     Bytes  451.00 kB   451.00 kB 
     Shape  (205, 275)   (205, 275) 
     Count  1 Tasks  1 Chunks 
     Type  float64  numpy.ndarray

Data variables: (1)

Tair

(time, y, x)

float64

dask.array<chunksize=(36, 205, 275), meta=np.ndarray>

units :: C
long_name :: Surface air temperature
type_preferred :: double
time_rep :: instantaneous





  
      Array  Chunk 
  
  
     Bytes  16.24 MB   16.24 MB 
     Shape  (36, 205, 275)   (36, 205, 275) 
     Count  1 Tasks  1 Chunks 
     Type  float64  numpy.ndarray

Attributes: (11)
title :
/workspace/jhamman/processed/R1002RBRxaaa01a/lnd/temp/R1002RBRxaaa01a.vic.ha.1979-09-01.nc
institution :
U.W.
source :
RACM R1002RBRxaaa01a
output_frequency :
daily
output_mode :
averaged
convention :
CF-1.4
references :
Based on the initial model of Liang et al., 1994, JGR, 99, 14,415- 14,429.
comment :
Output from the Variable Infiltration Capacity (VIC) model.
nco_openmp_thread_number :
1
NCO :
"4.6.0"
history :
Tue Dec 27 14:15:22 2016: ncatted -a dimensions,,d,, rasm.nc rasm.nc Tue Dec 27 13:38:40 2016: ncks -3 rasm.nc rasm.nc history deleted for brevity

Flexible Array Support (NEP18)¶

Create sparse array and put it in an xarray Dataset.

In [3]:

import sparse
coords = [[0, 1, 2, 3, 4],
          [0, 1, 2, 3, 4]]
data = [10, 20, 30, 40, 50]
s = sparse.COO(coords, data, shape=(5, 5))
das = xr.DataArray(s, dims=['lat', 'lon'])
das

Out[3]:

xarray.DataArray

lat: 5
lon: 5

<COO: nnz=5, fill_value=0>

<COO: shape=(5, 5), dtype=int64, nnz=5, fill_value=0>

Coordinates: (0)
Attributes: (0)

In [4]:

das.mean(dim='lon')

Out[4]:

xarray.DataArray

lat: 5

<COO: nnz=5, fill_value=0.0>

<COO: shape=(5,), dtype=float64, nnz=5, fill_value=0.0>

Coordinates: (0)
Attributes: (0)

Put it inside a dask array.

In [5]:

das.chunk()

Out[5]:

xarray.DataArray

lat: 5
lon: 5

dask.array<chunksize=(5, 5), meta=sparse.COO>





  
      Array  Chunk 
  
  
     Bytes  200 B   200 B 
     Shape  (5, 5)   (5, 5) 
     Count  2 Tasks  1 Chunks 
     Type  int64  sparse.COO

Coordinates: (0)
Attributes: (0)

Create sparse array from pandas multiindex.

In [6]:

import pandas as pd
import numpy as np
tuples = [('a', 0), ('a', 2), ('b', 1), ('c', 3)]
index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second'])
s = pd.Series(np.zeros(len(index)), index=index)
s

Out[6]:

first  second
a      0         0.0
       2         0.0
b      1         0.0
c      3         0.0
dtype: float64

In [7]:

xr.DataArray.from_series(s)

Out[7]:

xarray.DataArray

first: 3
second: 4

0.0 nan 0.0 nan nan 0.0 nan nan nan nan nan 0.0

array([[ 0., nan,  0., nan],
       [nan,  0., nan, nan],
       [nan, nan, nan,  0.]])

Coordinates: (2)
- first
  (first)
  object
  'a' 'b' 'c'
```
array(['a', 'b', 'c'], dtype=object)
```
- second
  (second)
  int64
  0 1 2 3
```
array([0, 1, 2, 3])
```
Attributes: (0)

In [8]:

das = xr.DataArray.from_series(s, sparse=True)
das

Out[8]:

xarray.DataArray

first: 3
second: 4

<COO: nnz=4, fill_value=nan>

<COO: shape=(3, 4), dtype=float64, nnz=4, fill_value=nan>

Coordinates: (2)
- first
  (first)
  object
  'a' 'b' 'c'
```
array(['a', 'b', 'c'], dtype=object)
```
- second
  (second)
  int64
  0 1 2 3
```
array([0, 1, 2, 3])
```
Attributes: (0)

In [9]:

das.sel(first='a', second=0).data.todense()

Out[9]:

array(0.)

Hypothetically should work for cupy arrays, pint arrays, etc.

CFTimeIndex¶

For non-standard calendars.

In [10]:

day = np.arange(0, 360*10)
ds = xr.Dataset(coords={'time': ('time', day,
                                 {'units': 'days since 4000-01-01',
                                  'calendar': '360_day'})})
ds = xr.decode_cf(ds)
ds

Out[10]:

xarray.Dataset

Dimensions:
- time: 3600

Coordinates: (1)

time

(time)

object

4000-01-01 00:00:00 ... 4009-12-30 00:00:00

array([cftime.Datetime360Day(4000, 1, 1, 0, 0, 0, 0, 2, 1),
       cftime.Datetime360Day(4000, 1, 2, 0, 0, 0, 0, 3, 2),
       cftime.Datetime360Day(4000, 1, 3, 0, 0, 0, 0, 4, 3), ...,
       cftime.Datetime360Day(4009, 12, 28, 0, 0, 0, 0, 1, 358),
       cftime.Datetime360Day(4009, 12, 29, 0, 0, 0, 0, 2, 359),
       cftime.Datetime360Day(4009, 12, 30, 0, 0, 0, 0, 3, 360)], dtype=object)

Data variables: (0)
Attributes: (0)

In [11]:

ds.indexes

Out[11]:

time: CFTimeIndex([4000-01-01 00:00:00, 4000-01-02 00:00:00, 4000-01-03 00:00:00,
                   4000-01-04 00:00:00, 4000-01-05 00:00:00, 4000-01-06 00:00:00,
                   4000-01-07 00:00:00, 4000-01-08 00:00:00, 4000-01-09 00:00:00,
                   4000-01-10 00:00:00,
                   ...
                   4009-12-21 00:00:00, 4009-12-22 00:00:00, 4009-12-23 00:00:00,
                   4009-12-24 00:00:00, 4009-12-25 00:00:00, 4009-12-26 00:00:00,
                   4009-12-27 00:00:00, 4009-12-28 00:00:00, 4009-12-29 00:00:00,
                   4009-12-30 00:00:00],
                  dtype='object', name='time', length=3600)

In [12]:

ds.groupby('time.month')

Out[12]:

DatasetGroupBy, grouped over 'month' 
12 groups with labels 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12.

In [ ]: