Requires development version of fsspec_reference_maker
pip install --user git+https://github.com/intake/fsspec-reference-maker
import json
import fsspec
from fsspec_reference_maker.grib2 import scan_grib
import os
# 1GB of data files, forming a time-series
filter={'typeOfLevel': 'heightAboveGround', 'level': 2}
files = ['s3://noaa-hrrr-bdp-pds/hrrr.20190101/conus/hrrr.t22z.wrfsfcf01.grib2',
's3://noaa-hrrr-bdp-pds/hrrr.20190101/conus/hrrr.t23z.wrfsfcf01.grib2',
's3://noaa-hrrr-bdp-pds/hrrr.20190102/conus/hrrr.t00z.wrfsfcf01.grib2',
's3://noaa-hrrr-bdp-pds/hrrr.20190102/conus/hrrr.t01z.wrfsfcf01.grib2',
's3://noaa-hrrr-bdp-pds/hrrr.20190102/conus/hrrr.t02z.wrfsfcf01.grib2',
's3://noaa-hrrr-bdp-pds/hrrr.20190102/conus/hrrr.t03z.wrfsfcf01.grib2',
's3://noaa-hrrr-bdp-pds/hrrr.20190102/conus/hrrr.t04z.wrfsfcf01.grib2',
's3://noaa-hrrr-bdp-pds/hrrr.20190102/conus/hrrr.t05z.wrfsfcf01.grib2',
's3://noaa-hrrr-bdp-pds/hrrr.20190102/conus/hrrr.t06z.wrfsfcf01.grib2']
so = {"anon": True, "default_cache_type": "readahead"}
common = ['time', 'step', 'latitude', 'longitude', 'valid_time']
def create_jsons(files):
for url in files:
out = scan_grib(url, common, so, inline_threashold=100, filter=filter)
with open(os.path.basename(url).replace("grib2", "json"), "w") as f:
json.dump(out, f)
create_jsons(files)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) /tmp/ipykernel_252/2518840911.py in <module> ----> 1 create_jsons(files) NameError: name 'create_jsons' is not defined
MultiZarrToZarr()
to combine into single reference¶from glob import glob
json_list = sorted(glob('./hrrr.t*.json'))
from fsspec_reference_maker.combine import MultiZarrToZarr
mzz = MultiZarrToZarr(json_list, remote_protocol="s3", remote_options={"anon": True},
xarray_concat_args={"dim": 'time'})
mzz.translate("hrrr.total.json")
import xarray as xr
from fsspec_reference_maker.grib2 import GRIBCodec
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import matplotlib.pyplot as plt
import pandas as pd
fs = fsspec.filesystem('reference', fo='./hrrr.total.json', remote_protocol='s3', remote_options={'anon':True})
m = fs.get_mapper('')
ds = xr.open_dataset(m, engine='zarr')
/tmp/ipykernel_68/1688709781.py:3: RuntimeWarning: Failed to open Zarr store with consolidated metadata, falling back to try reading non-consolidated metadata. This is typically much slower for opening a dataset. To silence this warning, consider: 1. Consolidating metadata in this existing store with zarr.consolidate_metadata(). 2. Explicitly setting consolidated=False, to avoid trying to read consolidate metadata, or 3. Explicitly setting consolidated=True, to raise an error in this case instead of falling back to try reading non-consolidated metadata. ds = xr.open_dataset(m, engine='zarr')
ds
<xarray.Dataset> Dimensions: (y: 1059, x: 1799, time: 9) Coordinates: heightAboveGround float64 1e+03 latitude (y, x) float64 ... longitude (y, x) float64 ... step timedelta64[ns] 01:00:00 * time (time) datetime64[us] 2019-01-02 ... 2019-01-01T23:00:00 valid_time (time) datetime64[ns] 2019-01-02T01:00:00 ... NaT Dimensions without coordinates: y, x Data variables: refd (time, y, x) float32 ... si10 (time, y, x) float32 ... u (time, y, x) float32 ... u10 (time, y, x) float32 ... unknown (time, y, x) float32 ... v (time, y, x) float32 ... v10 (time, y, x) float32 ... Attributes: Conventions: CF-1.7 GRIB_centre: kwbc GRIB_centreDescription: US National Weather Service - NCEP GRIB_edition: 2 GRIB_subCentre: 0 history: 2021-08-09T15:34 GRIB to CDM+CF via cfgrib-0.9.9... institution: US National Weather Service - NCEP
array(1000.)
[1905141 values with dtype=float64]
[1905141 values with dtype=float64]
array(3600000000000, dtype='timedelta64[ns]')
array(['2019-01-02T00:00:00.000000', '2019-01-02T01:00:00.000000', '2019-01-02T02:00:00.000000', '2019-01-02T03:00:00.000000', '2019-01-02T04:00:00.000000', '2019-01-02T05:00:00.000000', '2019-01-02T06:00:00.000000', '2019-01-01T22:00:00.000000', '2019-01-01T23:00:00.000000'], dtype='datetime64[us]')
array(['2019-01-02T01:00:00.000000000', '2019-01-02T02:00:00.000000000', 'NaT', 'NaT', 'NaT', 'NaT', 'NaT', 'NaT', 'NaT'], dtype='datetime64[ns]')
[17146269 values with dtype=float32]
[17146269 values with dtype=float32]
[17146269 values with dtype=float32]
[17146269 values with dtype=float32]
[17146269 values with dtype=float32]
[17146269 values with dtype=float32]
[17146269 values with dtype=float32]
i = 0 # Time index
fig = plt.figure(figsize=(6,6))
ax = plt.subplot(111, projection=ccrs.LambertConformal())
p = ax.pcolormesh(ds.longitude, ds.latitude, ds.si10.isel(time=i), transform=ccrs.PlateCarree())
ax.coastlines()
ax.add_feature(cfeature.STATES)
time = pd.to_datetime(ds.time[i].data).strftime("%Y-%m-%d %H%M UTC")
ax.set_title(f"10m Wind Speed\n{time}")
plt.colorbar(p, orientation='horizontal', label='m/s')
<matplotlib.colorbar.Colorbar at 0x7f178ffeb790>