Authors: A.Radhakrishnan, GFDL Ack: Anderson Banihirwe for intake-esm updates, GFDL colleagues for data
from netCDF4 import Dataset
import numpy as np
import matplotlib
from matplotlib import pyplot as plt
import xarray as xr
import intake,yaml
import intake_esm
import numpy as np
%matplotlib inline
esgf-world.json is the ESM collections spec file for the netCDF data in the S3 bucket esgf-world. The catalog is updated on an on-demand basis for now. You can refer to https://github.com/aradhakrishnanGFDL/gfdl-aws-analysis/tree/community/esm-collection-spec-examples for the most recent catalogs More examples can be found in https://github.com/aradhakrishnanGFDL/gfdl-aws-analysis/tree/community/examples
col_url = "https://cmip6-nc.s3.us-east-2.amazonaws.com/esgf-world.json"
#col_url = "https://raw.githubusercontent.com/aradhakrishnanGFDL/gfdl-aws-analysis/community/esm-collection-spec-examples/esgf-world.json"
col = intake.open_esm_datastore(col_url)
col.df
project | institution_id | source_id | experiment_id | frequency | modeling_realm | table_id | member_id | grid_label | variable_id | temporal_subset | version | path | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | CMIP6 | AS-RCEC | TaiESM1 | histSST-piNTCF | NaN | NaN | AERmon | r1i1p1f1 | gn | ps | 185001-201412 | v20200318 | s3://esgf-world/CMIP6/AerChemMIP/AS-RCEC/TaiES... |
1 | CMIP6 | AS-RCEC | TaiESM1 | histSST-piNTCF | NaN | NaN | CFmon | r1i1p1f1 | gn | ta | 185001-201412 | v20200318 | s3://esgf-world/CMIP6/AerChemMIP/AS-RCEC/TaiES... |
2 | CMIP6 | AS-RCEC | TaiESM1 | histSST-piNTCF | NaN | NaN | LImon | r1i1p1f1 | gn | snc | 185002-201412 | v20200318 | s3://esgf-world/CMIP6/AerChemMIP/AS-RCEC/TaiES... |
3 | CMIP6 | AS-RCEC | TaiESM1 | histSST-piNTCF | NaN | NaN | LImon | r1i1p1f1 | gn | snd | 185002-201412 | v20200318 | s3://esgf-world/CMIP6/AerChemMIP/AS-RCEC/TaiES... |
4 | CMIP6 | AS-RCEC | TaiESM1 | histSST-piNTCF | NaN | NaN | LImon | r1i1p1f1 | gn | snw | 185002-201412 | v20200318 | s3://esgf-world/CMIP6/AerChemMIP/AS-RCEC/TaiES... |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1151014 | CMIP6 | UA | MCM-UA-1-0 | ssp585 | mon | atmos | Amon | r1i1p1f2 | gn | rlut | 201501-210012 | v20190731 | s3://esgf-world/CMIP6/ScenarioMIP/UA/MCM-UA-1-... |
1151015 | CMIP6 | UA | MCM-UA-1-0 | ssp585 | mon | atmos | Amon | r1i1p1f2 | gn | rtmt | 201501-210012 | v20190731 | s3://esgf-world/CMIP6/ScenarioMIP/UA/MCM-UA-1-... |
1151016 | CMIP6 | UA | MCM-UA-1-0 | ssp585 | mon | ocean | Omon | r1i1p1f2 | gn | sos | 201501-210012 | v20190731 | s3://esgf-world/CMIP6/ScenarioMIP/UA/MCM-UA-1-... |
1151017 | CMIP6 | UA | MCM-UA-1-0 | ssp585 | mon | ocean | Omon | r1i1p1f2 | gn | tos | 201501-210012 | v20190731 | s3://esgf-world/CMIP6/ScenarioMIP/UA/MCM-UA-1-... |
1151018 | CMIP6 | UA | MCM-UA-1-0 | ssp585 | NaN | NaN | fx | r1i1p1f2 | gn | areacella | NaN | v20190731 | s3://esgf-world/CMIP6/ScenarioMIP/UA/MCM-UA-1-... |
1151019 rows × 13 columns
#Examples to just search for what we want from the catalog
expname_filter = ['historical']
table_id_filter = 'Amon'
model_filter = 'GFDL-ESM4'
variable_id_filter = "tas"
ens_filter = "r1i1p1f1"
version_filter = "v20190726"
cat = col.search(experiment_id=expname_filter, table_id=table_id_filter,source_id=model_filter,variable_id=variable_id_filter,version="v20190726")
cat
gfdltest catalog with 1 dataset(s) from 2 asset(s):
unique | |
---|---|
project | 1 |
institution_id | 1 |
source_id | 1 |
experiment_id | 1 |
frequency | 1 |
modeling_realm | 1 |
table_id | 1 |
member_id | 1 |
grid_label | 1 |
variable_id | 1 |
temporal_subset | 2 |
version | 1 |
path | 2 |
dset_dict = cat.to_dataset_dict(cdf_kwargs={'chunks': {'time': 1}},storage_options={'anon':True})
--> The keys in the returned dictionary of datasets are constructed as follows: 'project.institution_id.source_id.experiment_id.table_id'
dset_dict
{'CMIP6.NOAA-GFDL.GFDL-ESM4.historical.Amon': <xarray.Dataset> Dimensions: (bnds: 2, lat: 180, lon: 288, member_id: 1, time: 1980) Coordinates: * bnds (bnds) float64 1.0 2.0 height float64 ... * lat (lat) float64 -89.5 -88.5 -87.5 -86.5 ... 86.5 87.5 88.5 89.5 * lon (lon) float64 0.625 1.875 3.125 4.375 ... 355.6 356.9 358.1 359.4 * time (time) object 1850-01-16 12:00:00 ... 2014-12-16 12:00:00 * member_id (member_id) <U8 'r1i1p1f1' Data variables: lat_bnds (lat, bnds) float64 dask.array<chunksize=(180, 2), meta=np.ndarray> lon_bnds (lon, bnds) float64 dask.array<chunksize=(288, 2), meta=np.ndarray> tas (member_id, time, lat, lon) float32 dask.array<chunksize=(1, 1, 180, 288), meta=np.ndarray> time_bnds (time, bnds) object dask.array<chunksize=(1, 2), meta=np.ndarray> Attributes: (12/47) history: File was processed by fremetar (GFDL analog of C... experiment_id: historical forcing_index: [1] data_specs_version: 01.00.27 frequency: mon parent_mip_era: CMIP6 ... ... branch_time_in_parent: [36500.] external_variables: areacella experiment: all-forcing simulation of the recent past source_type: AOGCM AER CHEM BGC branch_time_in_child: [0.] intake_esm_dataset_key: CMIP6.NOAA-GFDL.GFDL-ESM4.historical.Amon}
hxr_gfdl_esm4 = dset_dict["CMIP6.NOAA-GFDL.GFDL-ESM4.historical.Amon"]
hxr_gfdl_esm4.tas
<xarray.DataArray 'tas' (member_id: 1, time: 1980, lat: 180, lon: 288)> dask.array<broadcast_to, shape=(1, 1980, 180, 288), dtype=float32, chunksize=(1, 1, 180, 288), chunktype=numpy.ndarray> Coordinates: height float64 ... * lat (lat) float64 -89.5 -88.5 -87.5 -86.5 ... 86.5 87.5 88.5 89.5 * lon (lon) float64 0.625 1.875 3.125 4.375 ... 355.6 356.9 358.1 359.4 * time (time) object 1850-01-16 12:00:00 ... 2014-12-16 12:00:00 * member_id (member_id) <U8 'r1i1p1f1' Attributes: long_name: Near-Surface Air Temperature units: K cell_methods: area: time: mean cell_measures: area: areacella standard_name: air_temperature interp_method: conserve_order2 original_name: tas
|
array(2.)
array([-89.5, -88.5, -87.5, -86.5, -85.5, -84.5, -83.5, -82.5, -81.5, -80.5, -79.5, -78.5, -77.5, -76.5, -75.5, -74.5, -73.5, -72.5, -71.5, -70.5, -69.5, -68.5, -67.5, -66.5, -65.5, -64.5, -63.5, -62.5, -61.5, -60.5, -59.5, -58.5, -57.5, -56.5, -55.5, -54.5, -53.5, -52.5, -51.5, -50.5, -49.5, -48.5, -47.5, -46.5, -45.5, -44.5, -43.5, -42.5, -41.5, -40.5, -39.5, -38.5, -37.5, -36.5, -35.5, -34.5, -33.5, -32.5, -31.5, -30.5, -29.5, -28.5, -27.5, -26.5, -25.5, -24.5, -23.5, -22.5, -21.5, -20.5, -19.5, -18.5, -17.5, -16.5, -15.5, -14.5, -13.5, -12.5, -11.5, -10.5, -9.5, -8.5, -7.5, -6.5, -5.5, -4.5, -3.5, -2.5, -1.5, -0.5, 0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5, 11.5, 12.5, 13.5, 14.5, 15.5, 16.5, 17.5, 18.5, 19.5, 20.5, 21.5, 22.5, 23.5, 24.5, 25.5, 26.5, 27.5, 28.5, 29.5, 30.5, 31.5, 32.5, 33.5, 34.5, 35.5, 36.5, 37.5, 38.5, 39.5, 40.5, 41.5, 42.5, 43.5, 44.5, 45.5, 46.5, 47.5, 48.5, 49.5, 50.5, 51.5, 52.5, 53.5, 54.5, 55.5, 56.5, 57.5, 58.5, 59.5, 60.5, 61.5, 62.5, 63.5, 64.5, 65.5, 66.5, 67.5, 68.5, 69.5, 70.5, 71.5, 72.5, 73.5, 74.5, 75.5, 76.5, 77.5, 78.5, 79.5, 80.5, 81.5, 82.5, 83.5, 84.5, 85.5, 86.5, 87.5, 88.5, 89.5])
array([ 0.625, 1.875, 3.125, ..., 356.875, 358.125, 359.375])
array([cftime.DatetimeNoLeap(1850, 1, 16, 12, 0, 0, 0, has_year_zero=True), cftime.DatetimeNoLeap(1850, 2, 15, 0, 0, 0, 0, has_year_zero=True), cftime.DatetimeNoLeap(1850, 3, 16, 12, 0, 0, 0, has_year_zero=True), ..., cftime.DatetimeNoLeap(2014, 10, 16, 12, 0, 0, 0, has_year_zero=True), cftime.DatetimeNoLeap(2014, 11, 16, 0, 0, 0, 0, has_year_zero=True), cftime.DatetimeNoLeap(2014, 12, 16, 12, 0, 0, 0, has_year_zero=True)], dtype=object)
array(['r1i1p1f1'], dtype='<U8')
! GLOBAL MEAN let hgtas = tas[x=@ave,y=@ave,d=1,l=529:780@ave]
lat1 = 60
lat2 = 90
tas = hxr_gfdl_esm4.tas
weights = np.cos(np.deg2rad(tas.lat))
weights.name = "weights"
!!!!!!!!!!!!!!!! COMPUTE BASELINE (1994-2014) TAS !!!!!!!!!!!!!!!!!!!!!!!!
hgtas = hxr_gfdl_esm4.tas.sel(time=slice("1994", "2014"))
hgtas = hgtas.weighted(weights)
hgtas = hgtas.mean(dim=['lon','lat','time'])
hgtas = hgtas.values
print(hgtas)
[287.16173045]
hatas = hxr_gfdl_esm4.tas.sel(time=slice("1994", "2014"),lat=slice(lat1, lat2))
hatas = hatas.weighted(weights)
hatas = hatas.mean(dim=['lon','lat','time'])
hatas = hatas.values
print(hatas)
[264.3833672]
hgtas_hist = tas.sel(time=slice("1994", "2014"))
hgtas_hist.plot()
(array([ 54238., 294657., 313952., 822221., 868236., 1102418., 2664606., 2776262., 4056464., 110626.]), array([199.64807, 211.30887, 222.96968, 234.63048, 246.29129, 257.9521 , 269.61288, 281.27368, 292.9345 , 304.5953 , 316.2561 ], dtype=float32), <BarContainer object of 10 artists>)
hgtas2d = tas.isel(time=1).plot()