NASA's Common Metadata Repository (CMR) now has a STAC endpoint https://github.com/nasa/cmr-stac. In theory we can use Intake-STAC to browse NASA's entire data catalog!
This example is experimental as the cmr-stac service is very new.
import intake
import os
import hvplot.xarray
import pandas as pd
# testing remote reading of netcdf data
import fsspec
import aiohttp
import netrc
import xarray as xr
# Search not currently working:
# https://github.com/sat-utils/sat-search/issues/106
#import satsearch
#URL = 'https://cmr.earthdata.nasa.gov/cmr-stac/ASF'
#results = satsearch.Search.search(url=URL,
# collections=['C1595422627-ASF']
# )
#items = results.items()
#print('%s items' % len(items))
# Can up number of returned results
limit = 500
cat = intake.open_stac_catalog(f'https://cmr.earthdata.nasa.gov/cmr-stac/ASF/collections?limit={limit}')
col_info = pd.DataFrame(cat.metadata['collections'])
print(len(col_info))
col_info.head()
# Opening item collection directly also not working
# https://github.com/sat-utils/sat-stac/issues/65
#from satstac import ItemCollection
#col = 'C1595422627-ASF'
#limit=10
#url = f'https://cmr.earthdata.nasa.gov/cmr-stac/ASF/collections/{col}/items?limit={limit}'
#print(url)
#items = ItemCollection.open(url)
#print(len(items))
# Open an item directly (works but need to know item id in advance...)
item = intake.open_stac_item('https://cmr.earthdata.nasa.gov/cmr-stac/ASF/collections/C1595422627-ASF/items/G1636018550-ASF')
list(item)
# thumbnails don't always Nasa EarthData require authentication
item.browse.plot.thumbnail()
%%time
# let's open this netcdf file directly (reads entire file into memory I think, not efficient or pretty, but works
(username, account, password) = netrc.netrc().authenticators("urs.earthdata.nasa.gov")
fs = fsspec.filesystem('http', client_kwargs={'auth': aiohttp.BasicAuth(username, password)})
with fs.open(item.data.urlpath) as f:
da = xr.open_dataset(f, group='/science/grids/data', engine='h5netcdf', chunks={})
da
da['amplitude'].data
%%time
# OR just download the file and work with it locally:
localPath = item._stac_obj.download('data')
da = xr.open_dataset(localPath, group='/science/grids/data', engine='h5netcdf', chunks={})
da
# Note that it is faster to download the entire netcdf file and open it up than to read remotely (lots of network requests)
item = intake.open_stac_item('https://cmr.earthdata.nasa.gov/cmr-stac/NSIDC_ECS/collections/C1908075185-NSIDC_ECS/items/G1921160945-NSIDC_ECS')
#print(item.yaml())
print(list(item))
print(item['0'].yaml())
%%time
# Assests with RasterIOSource Driver require these GDAL environment variables
# and a properly configured .netrc with NASA EarthData credentials
os.environ['GDAL_DISABLE_READDIR_ON_OPEN']='EMPTY_DIR'
os.environ['GDAL_HTTP_COOKIEFILE']='.urs_cookies'
os.environ['GDAL_HTTP_COOKIEJAR']='.urs_cookies'
da = item['0'].to_dask()
da
# NOTE that reading the cloud optimized geotiff remotely is very dast! only metadata is read to initialize