This notebooks shows how to search and load data via Earth System Grid Federation infrastructure. This infrastructure works great and is the foundation of the CMIP6 distribution system.
The main technologies used here are the ESGF search API, used to figure out what data we want, and OPeNDAP, a remote data access protocol over HTTP.
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import xarray as xr
xr.set_options(display_style='html')
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
#!/usr/bin/env python
from __future__ import print_function
import requests
import xml.etree.ElementTree as ET
import numpy
# Author: Unknown
# I got the original version from a word document published by ESGF
# https://docs.google.com/document/d/1pxz1Kd3JHfFp8vR2JCVBfApbsHmbUQQstifhGNdc6U0/edit?usp=sharing
# API AT: https://github.com/ESGF/esgf.github.io/wiki/ESGF_Search_REST_API#results-pagination
def esgf_search(server="https://esgf-node.llnl.gov/esg-search/search",
files_type="OPENDAP", local_node=True, project="CMIP6",
verbose=False, format="application%2Fsolr%2Bjson",
use_csrf=False, **search):
client = requests.session()
payload = search
payload["project"] = project
payload["type"]= "File"
if local_node:
payload["distrib"] = "false"
if use_csrf:
client.get(server)
if 'csrftoken' in client.cookies:
# Django 1.6 and up
csrftoken = client.cookies['csrftoken']
else:
# older versions
csrftoken = client.cookies['csrf']
payload["csrfmiddlewaretoken"] = csrftoken
payload["format"] = format
offset = 0
numFound = 10000
all_files = []
files_type = files_type.upper()
while offset < numFound:
payload["offset"] = offset
url_keys = []
for k in payload:
url_keys += ["{}={}".format(k, payload[k])]
url = "{}/?{}".format(server, "&".join(url_keys))
print(url)
r = client.get(url)
r.raise_for_status()
resp = r.json()["response"]
numFound = int(resp["numFound"])
resp = resp["docs"]
offset += len(resp)
for d in resp:
if verbose:
for k in d:
print("{}: {}".format(k,d[k]))
url = d["url"]
for f in d["url"]:
sp = f.split("|")
if sp[-1] == files_type:
all_files.append(sp[0].split(".html")[0])
return sorted(all_files)
result = esgf_search(activity_id='CMIP', table_id='Amon', variable_id='tas', experiment_id='historical',
institution_id="NCAR", source_id="CESM2", member_id="r10i1p1f1")
result
https://esgf-node.llnl.gov/esg-search/search/?activity_id=CMIP&table_id=Amon&variable_id=tas&experiment_id=historical&institution_id=NCAR&source_id=CESM2&member_id=r10i1p1f1&project=CMIP6&type=File&distrib=false&format=application%2Fsolr%2Bjson&offset=0 https://esgf-node.llnl.gov/esg-search/search/?activity_id=CMIP&table_id=Amon&variable_id=tas&experiment_id=historical&institution_id=NCAR&source_id=CESM2&member_id=r10i1p1f1&project=CMIP6&type=File&distrib=false&format=application%2Fsolr%2Bjson&offset=10
['http://aims3.llnl.gov/thredds/dodsC/css03_data/CMIP6/CMIP/NCAR/CESM2/historical/r10i1p1f1/Amon/tas/gn/v20190313/tas_Amon_CESM2_historical_r10i1p1f1_gn_185001-189912.nc', 'http://aims3.llnl.gov/thredds/dodsC/css03_data/CMIP6/CMIP/NCAR/CESM2/historical/r10i1p1f1/Amon/tas/gn/v20190313/tas_Amon_CESM2_historical_r10i1p1f1_gn_190001-194912.nc', 'http://aims3.llnl.gov/thredds/dodsC/css03_data/CMIP6/CMIP/NCAR/CESM2/historical/r10i1p1f1/Amon/tas/gn/v20190313/tas_Amon_CESM2_historical_r10i1p1f1_gn_195001-199912.nc', 'http://aims3.llnl.gov/thredds/dodsC/css03_data/CMIP6/CMIP/NCAR/CESM2/historical/r10i1p1f1/Amon/tas/gn/v20190313/tas_Amon_CESM2_historical_r10i1p1f1_gn_200001-201412.nc', 'http://esgf-data.ucar.edu/thredds/dodsC/esg_dataroot/CMIP6/CMIP/NCAR/CESM2/historical/r10i1p1f1/Amon/tas/gn/v20190313/tas_Amon_CESM2_historical_r10i1p1f1_gn_185001-189912.nc', 'http://esgf-data.ucar.edu/thredds/dodsC/esg_dataroot/CMIP6/CMIP/NCAR/CESM2/historical/r10i1p1f1/Amon/tas/gn/v20190313/tas_Amon_CESM2_historical_r10i1p1f1_gn_190001-194912.nc', 'http://esgf-data.ucar.edu/thredds/dodsC/esg_dataroot/CMIP6/CMIP/NCAR/CESM2/historical/r10i1p1f1/Amon/tas/gn/v20190313/tas_Amon_CESM2_historical_r10i1p1f1_gn_195001-199912.nc', 'http://esgf-data.ucar.edu/thredds/dodsC/esg_dataroot/CMIP6/CMIP/NCAR/CESM2/historical/r10i1p1f1/Amon/tas/gn/v20190313/tas_Amon_CESM2_historical_r10i1p1f1_gn_200001-201412.nc', 'http://esgf-data3.diasjp.net/thredds/dodsC/esg_dataroot/CMIP6/CMIP/NCAR/CESM2/historical/r10i1p1f1/Amon/tas/gn/v20190313/tas_Amon_CESM2_historical_r10i1p1f1_gn_185001-189912.nc', 'http://esgf-data3.diasjp.net/thredds/dodsC/esg_dataroot/CMIP6/CMIP/NCAR/CESM2/historical/r10i1p1f1/Amon/tas/gn/v20190313/tas_Amon_CESM2_historical_r10i1p1f1_gn_190001-194912.nc', 'http://esgf-data3.diasjp.net/thredds/dodsC/esg_dataroot/CMIP6/CMIP/NCAR/CESM2/historical/r10i1p1f1/Amon/tas/gn/v20190313/tas_Amon_CESM2_historical_r10i1p1f1_gn_195001-199912.nc', 'http://esgf-data3.diasjp.net/thredds/dodsC/esg_dataroot/CMIP6/CMIP/NCAR/CESM2/historical/r10i1p1f1/Amon/tas/gn/v20190313/tas_Amon_CESM2_historical_r10i1p1f1_gn_200001-201412.nc']
These are OPeNDAP endpoints. Xarray, together with the netCDF4 python library, allow lazy loading.
# there are mulitple sources of the same data--need to pick one
files_to_open = result[-4:]
ds = xr.open_mfdataset(files_to_open, combine='by_coords')
ds
/srv/conda/envs/notebook/lib/python3.7/site-packages/xarray/conventions.py:500: SerializationWarning: variable 'tas' has multiple fill values {1e+20, 1e+20}, decoding all values to NaN. decode_timedelta=decode_timedelta,
<xarray.Dataset> Dimensions: (lat: 192, lon: 288, nbnd: 2, time: 1980) Coordinates: * lat (lat) float64 -90.0 -89.06 -88.12 -87.17 ... 88.12 89.06 90.0 * lon (lon) float64 0.0 1.25 2.5 3.75 5.0 ... 355.0 356.2 357.5 358.8 * time (time) object 1850-01-15 12:00:00 ... 2014-12-15 12:00:00 Dimensions without coordinates: nbnd Data variables: time_bnds (time, nbnd) object dask.array<chunksize=(600, 2), meta=np.ndarray> lat_bnds (time, lat, nbnd) float64 dask.array<chunksize=(600, 192, 2), meta=np.ndarray> lon_bnds (time, lon, nbnd) float64 dask.array<chunksize=(600, 288, 2), meta=np.ndarray> tas (time, lat, lon) float32 dask.array<chunksize=(600, 192, 288), meta=np.ndarray> Attributes: Conventions: CF-1.7 CMIP-6.2 activity_id: CMIP branch_method: standard branch_time_in_child: 674885.0 branch_time_in_parent: 306600.0 case_id: 24 cesm_casename: b.e21.BHIST.f09_g17.CMIP6-historical.010 contact: cesm_cmip6@ucar.edu creation_date: 2019-03-12T06:39:18Z data_specs_version: 01.00.29 experiment: Simulation of recent past (1850 to 2014)... experiment_id: historical external_variables: areacella forcing_index: 1 frequency: mon further_info_url: https://furtherinfo.es-doc.org/CMIP6.NCA... grid: native 0.9x1.25 finite volume grid (192x... grid_label: gn initialization_index: 1 institution: National Center for Atmospheric Research... institution_id: NCAR license: CMIP6 model data produced by <The Nation... mip_era: CMIP6 model_doi_url: https://doi.org/10.5065/D67H1H0V nominal_resolution: 100 km parent_activity_id: CMIP parent_experiment_id: piControl parent_mip_era: CMIP6 parent_source_id: CESM2 parent_time_units: days since 0001-01-01 00:00:00 parent_variant_label: r1i1p1f1 physics_index: 1 product: model-output realization_index: 10 realm: atmos source: CESM2 (2017): atmosphere: CAM6 (0.9x1.25... source_id: CESM2 source_type: AOGCM BGC sub_experiment: none sub_experiment_id: none table_id: Amon tracking_id: hdl:21.14100/e47b79db-3925-45a7-9c0a-679... variable_id: tas variant_info: CMIP6 20th century experiments (1850-201... variant_label: r10i1p1f1 DODS_EXTRA.Unlimited_Dimension: time
array([-90. , -89.057592, -88.115183, -87.172775, -86.230366, -85.287958, -84.34555 , -83.403141, -82.460733, -81.518325, -80.575916, -79.633508, -78.691099, -77.748691, -76.806283, -75.863874, -74.921466, -73.979058, -73.036649, -72.094241, -71.151832, -70.209424, -69.267016, -68.324607, -67.382199, -66.439791, -65.497382, -64.554974, -63.612565, -62.670157, -61.727749, -60.78534 , -59.842932, -58.900524, -57.958115, -57.015707, -56.073298, -55.13089 , -54.188482, -53.246073, -52.303665, -51.361257, -50.418848, -49.47644 , -48.534031, -47.591623, -46.649215, -45.706806, -44.764398, -43.82199 , -42.879581, -41.937173, -40.994764, -40.052356, -39.109948, -38.167539, -37.225131, -36.282723, -35.340314, -34.397906, -33.455497, -32.513089, -31.570681, -30.628272, -29.685864, -28.743455, -27.801047, -26.858639, -25.91623 , -24.973822, -24.031414, -23.089005, -22.146597, -21.204188, -20.26178 , -19.319372, -18.376963, -17.434555, -16.492147, -15.549738, -14.60733 , -13.664921, -12.722513, -11.780105, -10.837696, -9.895288, -8.95288 , -8.010471, -7.068063, -6.125654, -5.183246, -4.240838, -3.298429, -2.356021, -1.413613, -0.471204, 0.471204, 1.413613, 2.356021, 3.298429, 4.240838, 5.183246, 6.125654, 7.068063, 8.010471, 8.95288 , 9.895288, 10.837696, 11.780105, 12.722513, 13.664921, 14.60733 , 15.549738, 16.492147, 17.434555, 18.376963, 19.319372, 20.26178 , 21.204188, 22.146597, 23.089005, 24.031414, 24.973822, 25.91623 , 26.858639, 27.801047, 28.743455, 29.685864, 30.628272, 31.570681, 32.513089, 33.455497, 34.397906, 35.340314, 36.282723, 37.225131, 38.167539, 39.109948, 40.052356, 40.994764, 41.937173, 42.879581, 43.82199 , 44.764398, 45.706806, 46.649215, 47.591623, 48.534031, 49.47644 , 50.418848, 51.361257, 52.303665, 53.246073, 54.188482, 55.13089 , 56.073298, 57.015707, 57.958115, 58.900524, 59.842932, 60.78534 , 61.727749, 62.670157, 63.612565, 64.554974, 65.497382, 66.439791, 67.382199, 68.324607, 69.267016, 70.209424, 71.151832, 72.094241, 73.036649, 73.979058, 74.921466, 75.863874, 76.806283, 77.748691, 78.691099, 79.633508, 80.575916, 81.518325, 82.460733, 83.403141, 84.34555 , 85.287958, 86.230366, 87.172775, 88.115183, 89.057592, 90. ])
array([ 0. , 1.25, 2.5 , ..., 356.25, 357.5 , 358.75])
array([cftime.DatetimeNoLeap(1850, 1, 15, 12, 0, 0, 0), cftime.DatetimeNoLeap(1850, 2, 14, 0, 0, 0, 0), cftime.DatetimeNoLeap(1850, 3, 15, 12, 0, 0, 0), ..., cftime.DatetimeNoLeap(2014, 10, 15, 12, 0, 0, 0), cftime.DatetimeNoLeap(2014, 11, 15, 0, 0, 0, 0), cftime.DatetimeNoLeap(2014, 12, 15, 12, 0, 0, 0)], dtype=object)
|
|
|
|
Plot a map from a specific date.
ds.tas.sel(time='1950-01').squeeze().plot()
<matplotlib.collections.QuadMesh at 0x7f511a086f10>
Create a timeseries of global-average surface air temperature. For this we need the area weighting factor for each gridpoint.
files_area = esgf_search(variable_id='areacella', activity_id='CMIP',
experiment_id='historical', institution_id="NCAR", source_id="CESM2")
ds_area = xr.open_dataset(files_area[0])
ds_area
https://esgf-node.llnl.gov/esg-search/search/?variable_id=areacella&activity_id=CMIP&experiment_id=historical&institution_id=NCAR&source_id=CESM2&project=CMIP6&type=File&distrib=false&format=application%2Fsolr%2Bjson&offset=0 https://esgf-node.llnl.gov/esg-search/search/?variable_id=areacella&activity_id=CMIP&experiment_id=historical&institution_id=NCAR&source_id=CESM2&project=CMIP6&type=File&distrib=false&format=application%2Fsolr%2Bjson&offset=10 https://esgf-node.llnl.gov/esg-search/search/?variable_id=areacella&activity_id=CMIP&experiment_id=historical&institution_id=NCAR&source_id=CESM2&project=CMIP6&type=File&distrib=false&format=application%2Fsolr%2Bjson&offset=20 https://esgf-node.llnl.gov/esg-search/search/?variable_id=areacella&activity_id=CMIP&experiment_id=historical&institution_id=NCAR&source_id=CESM2&project=CMIP6&type=File&distrib=false&format=application%2Fsolr%2Bjson&offset=30
/srv/conda/envs/notebook/lib/python3.7/site-packages/xarray/conventions.py:500: SerializationWarning: variable 'areacella' has multiple fill values {1e+20, 1e+20}, decoding all values to NaN. decode_timedelta=decode_timedelta,
<xarray.Dataset> Dimensions: (lat: 192, lon: 288, nbnd: 2) Coordinates: * lat (lat) float64 -90.0 -89.06 -88.12 -87.17 ... 88.12 89.06 90.0 * lon (lon) float64 0.0 1.25 2.5 3.75 5.0 ... 355.0 356.2 357.5 358.8 Dimensions without coordinates: nbnd Data variables: lat_bnds (lat, nbnd) float64 -90.0 -89.53 -89.53 ... 89.53 89.53 90.0 lon_bnds (lon, nbnd) float64 -0.625 0.625 0.625 ... 358.1 358.1 359.4 areacella (lat, lon) float32 29948368.0 29948368.0 ... 29948368.0 Attributes: _NCProperties: version=1|netcdflibversion=4.4.1.1|hdf5libversion... Conventions: CF-1.7 CMIP-6.2 activity_id: CMIP branch_method: standard branch_time_in_child: 674885.0 branch_time_in_parent: 306600.0 case_id: 24 cesm_casename: b.e21.BHIST.f09_g17.CMIP6-historical.010 contact: cesm_cmip6@ucar.edu creation_date: 2019-03-12T06:40:59Z data_specs_version: 01.00.29 experiment: Simulation of recent past (1850 to 2014). Impose ... experiment_id: historical forcing_index: 1 frequency: fx further_info_url: https://furtherinfo.es-doc.org/CMIP6.NCAR.CESM2.h... grid: native 0.9x1.25 finite volume grid (192x288 latxlon) grid_label: gn initialization_index: 1 institution: National Center for Atmospheric Research, Climate... institution_id: NCAR license: CMIP6 model data produced by <The National Center... mip_era: CMIP6 model_doi_url: https://doi.org/10.5065/D67H1H0V nominal_resolution: 100 km parent_activity_id: CMIP parent_experiment_id: piControl parent_mip_era: CMIP6 parent_source_id: CESM2 parent_time_units: days since 0001-01-01 00:00:00 parent_variant_label: r1i1p1f1 physics_index: 1 product: model-output realization_index: 10 realm: atmos land source: CESM2 (2017): atmosphere: CAM6 (0.9x1.25 finite v... source_id: CESM2 source_type: AOGCM BGC sub_experiment: none sub_experiment_id: none table_id: fx tracking_id: hdl:21.14100/54fe4682-51a1-43ca-a7d5-66b7bca3f8c8 variable_id: areacella variant_info: CMIP6 20th century experiments (1850-2014) with C... variant_label: r10i1p1f1
array([-90. , -89.057592, -88.115183, -87.172775, -86.230366, -85.287958, -84.34555 , -83.403141, -82.460733, -81.518325, -80.575916, -79.633508, -78.691099, -77.748691, -76.806283, -75.863874, -74.921466, -73.979058, -73.036649, -72.094241, -71.151832, -70.209424, -69.267016, -68.324607, -67.382199, -66.439791, -65.497382, -64.554974, -63.612565, -62.670157, -61.727749, -60.78534 , -59.842932, -58.900524, -57.958115, -57.015707, -56.073298, -55.13089 , -54.188482, -53.246073, -52.303665, -51.361257, -50.418848, -49.47644 , -48.534031, -47.591623, -46.649215, -45.706806, -44.764398, -43.82199 , -42.879581, -41.937173, -40.994764, -40.052356, -39.109948, -38.167539, -37.225131, -36.282723, -35.340314, -34.397906, -33.455497, -32.513089, -31.570681, -30.628272, -29.685864, -28.743455, -27.801047, -26.858639, -25.91623 , -24.973822, -24.031414, -23.089005, -22.146597, -21.204188, -20.26178 , -19.319372, -18.376963, -17.434555, -16.492147, -15.549738, -14.60733 , -13.664921, -12.722513, -11.780105, -10.837696, -9.895288, -8.95288 , -8.010471, -7.068063, -6.125654, -5.183246, -4.240838, -3.298429, -2.356021, -1.413613, -0.471204, 0.471204, 1.413613, 2.356021, 3.298429, 4.240838, 5.183246, 6.125654, 7.068063, 8.010471, 8.95288 , 9.895288, 10.837696, 11.780105, 12.722513, 13.664921, 14.60733 , 15.549738, 16.492147, 17.434555, 18.376963, 19.319372, 20.26178 , 21.204188, 22.146597, 23.089005, 24.031414, 24.973822, 25.91623 , 26.858639, 27.801047, 28.743455, 29.685864, 30.628272, 31.570681, 32.513089, 33.455497, 34.397906, 35.340314, 36.282723, 37.225131, 38.167539, 39.109948, 40.052356, 40.994764, 41.937173, 42.879581, 43.82199 , 44.764398, 45.706806, 46.649215, 47.591623, 48.534031, 49.47644 , 50.418848, 51.361257, 52.303665, 53.246073, 54.188482, 55.13089 , 56.073298, 57.015707, 57.958115, 58.900524, 59.842932, 60.78534 , 61.727749, 62.670157, 63.612565, 64.554974, 65.497382, 66.439791, 67.382199, 68.324607, 69.267016, 70.209424, 71.151832, 72.094241, 73.036649, 73.979058, 74.921466, 75.863874, 76.806283, 77.748691, 78.691099, 79.633508, 80.575916, 81.518325, 82.460733, 83.403141, 84.34555 , 85.287958, 86.230366, 87.172775, 88.115183, 89.057592, 90. ])
array([ 0. , 1.25, 2.5 , ..., 356.25, 357.5 , 358.75])
array([[-90. , -89.528796], [-89.528796, -88.586387], [-88.586387, -87.643979], ..., [ 87.643979, 88.586387], [ 88.586387, 89.528796], [ 89.528796, 90. ]])
array([[ -0.625, 0.625], [ 0.625, 1.875], [ 1.875, 3.125], ..., [355.625, 356.875], [356.875, 358.125], [358.125, 359.375]])
array([[2.994837e+07, 2.994837e+07, 2.994837e+07, ..., 2.994837e+07, 2.994837e+07, 2.994837e+07], [2.395748e+08, 2.395748e+08, 2.395748e+08, ..., 2.395748e+08, 2.395748e+08, 2.395748e+08], [4.790848e+08, 4.790848e+08, 4.790848e+08, ..., 4.790848e+08, 4.790848e+08, 4.790848e+08], ..., [4.790848e+08, 4.790848e+08, 4.790848e+08, ..., 4.790848e+08, 4.790848e+08, 4.790848e+08], [2.395748e+08, 2.395748e+08, 2.395748e+08, ..., 2.395748e+08, 2.395748e+08, 2.395748e+08], [2.994837e+07, 2.994837e+07, 2.994837e+07, ..., 2.994837e+07, 2.994837e+07, 2.994837e+07]], dtype=float32)
total_area = ds_area.areacella.sum(dim=['lon', 'lat'])
ta_timeseries = (ds.tas * ds_area.areacella).sum(dim=['lon', 'lat']) / total_area
ta_timeseries
<xarray.DataArray (time: 1980)> dask.array<truediv, shape=(1980,), dtype=float32, chunksize=(600,), chunktype=numpy.ndarray> Coordinates: * time (time) object 1850-01-15 12:00:00 ... 2014-12-15 12:00:00
|
array([cftime.DatetimeNoLeap(1850, 1, 15, 12, 0, 0, 0), cftime.DatetimeNoLeap(1850, 2, 14, 0, 0, 0, 0), cftime.DatetimeNoLeap(1850, 3, 15, 12, 0, 0, 0), ..., cftime.DatetimeNoLeap(2014, 10, 15, 12, 0, 0, 0), cftime.DatetimeNoLeap(2014, 11, 15, 0, 0, 0, 0), cftime.DatetimeNoLeap(2014, 12, 15, 12, 0, 0, 0)], dtype=object)
By default the data are loaded lazily, as Dask arrays. Here we trigger computation explicitly.
%time ta_timeseries.load()
CPU times: user 3.35 s, sys: 1.48 s, total: 4.82 s Wall time: 52.8 s
<xarray.DataArray (time: 1980)> array([284.99948, 285.23215, 285.85364, ..., 288.54376, 287.61884, 287.06284], dtype=float32) Coordinates: * time (time) object 1850-01-15 12:00:00 ... 2014-12-15 12:00:00
array([284.99948, 285.23215, 285.85364, ..., 288.54376, 287.61884, 287.06284], dtype=float32)
array([cftime.DatetimeNoLeap(1850, 1, 15, 12, 0, 0, 0), cftime.DatetimeNoLeap(1850, 2, 14, 0, 0, 0, 0), cftime.DatetimeNoLeap(1850, 3, 15, 12, 0, 0, 0), ..., cftime.DatetimeNoLeap(2014, 10, 15, 12, 0, 0, 0), cftime.DatetimeNoLeap(2014, 11, 15, 0, 0, 0, 0), cftime.DatetimeNoLeap(2014, 12, 15, 12, 0, 0, 0)], dtype=object)
ta_timeseries.plot(label='monthly')
ta_timeseries.rolling(time=12).mean().plot(label='12 month rolling mean')
plt.legend()
plt.title('Global Mean Surface Air Temperature')
Text(0.5, 1.0, 'Global Mean Surface Air Temperature')
ds.nbytes / 1e6 / 40
11.330052