Using the British Met Office Pysssix S3 FUSE driver we can read any publically readable bucket using /s3/{bucket}
.
root = 'noaa-nwm-pds'
We want to see if there are 24 files per day (t00z, t01z,... t23z
) for this pattern:
/s3/noaa-nwm-pds/nwm.20180615/forcing_short_range/nwm.t00z.short_range.forcing.f001.conus.nc
import pandas as pd
import s3fs
fs = s3fs.S3FileSystem(anon=True)
fs.ls('noaa-nwm-pds')
['noaa-nwm-pds/nwm.20180616', 'noaa-nwm-pds/nwm.20180617', 'noaa-nwm-pds/nwm.20180618', 'noaa-nwm-pds/nwm.20180619', 'noaa-nwm-pds/nwm.20180620', 'noaa-nwm-pds/nwm.20180621', 'noaa-nwm-pds/nwm.20180622', 'noaa-nwm-pds/nwm.20180623', 'noaa-nwm-pds/nwm.20180624', 'noaa-nwm-pds/nwm.20180625', 'noaa-nwm-pds/nwm.20180626', 'noaa-nwm-pds/nwm.20180627', 'noaa-nwm-pds/nwm.20180628', 'noaa-nwm-pds/nwm.20180629', 'noaa-nwm-pds/nwm.20180630', 'noaa-nwm-pds/nwm.20180701', 'noaa-nwm-pds/nwm.20180702', 'noaa-nwm-pds/nwm.20180703', 'noaa-nwm-pds/nwm.20180704', 'noaa-nwm-pds/nwm.20180705', 'noaa-nwm-pds/nwm.20180706', 'noaa-nwm-pds/nwm.20180707', 'noaa-nwm-pds/nwm.20180708', 'noaa-nwm-pds/nwm.20180709', 'noaa-nwm-pds/nwm.20180710', 'noaa-nwm-pds/nwm.20180711', 'noaa-nwm-pds/nwm.20180712', 'noaa-nwm-pds/nwm.20180713', 'noaa-nwm-pds/nwm.20180714']
dates = pd.date_range(start='2018-07-01', end='2018-07-07', freq='D')
def forcing_files(date):
return fs.ls('noaa-nwm-pds/nwm.{}/forcing_short_range'.format(date.strftime('%Y%m%d')))
def daily_files(files, forecast='f001'):
return [f for f in files if forecast in f]
dates = pd.date_range(start='2018-07-01', end='2018-07-07', freq='D')
d = [forcing_files(date) for date in dates]
f001 = [daily_files(dd,forecast='f001') for dd in d]
[len(file) for file in f001]
[24, 24, 24, 24, 24, 24, 24]
dates = pd.date_range(start='2018-06-21', end='2018-06-27', freq='D')
d = [forcing_files(date) for date in dates]
f001 = [daily_files(dd,forecast='f001') for dd in d]
[len(file) for file in f001]
[23, 23, 23, 23, 23, 24, 24]
sorted(f001[0])
['noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t00z.short_range.forcing.f001.conus.nc', 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t01z.short_range.forcing.f001.conus.nc', 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t02z.short_range.forcing.f001.conus.nc', 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t03z.short_range.forcing.f001.conus.nc', 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t04z.short_range.forcing.f001.conus.nc', 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t05z.short_range.forcing.f001.conus.nc', 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t06z.short_range.forcing.f001.conus.nc', 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t07z.short_range.forcing.f001.conus.nc', 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t08z.short_range.forcing.f001.conus.nc', 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t09z.short_range.forcing.f001.conus.nc', 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t10z.short_range.forcing.f001.conus.nc', 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t11z.short_range.forcing.f001.conus.nc', 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t12z.short_range.forcing.f001.conus.nc', 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t13z.short_range.forcing.f001.conus.nc', 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t14z.short_range.forcing.f001.conus.nc', 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t15z.short_range.forcing.f001.conus.nc', 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t16z.short_range.forcing.f001.conus.nc', 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t17z.short_range.forcing.f001.conus.nc', 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t18z.short_range.forcing.f001.conus.nc', 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t19z.short_range.forcing.f001.conus.nc', 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t20z.short_range.forcing.f001.conus.nc', 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t21z.short_range.forcing.f001.conus.nc', 'noaa-nwm-pds/nwm.20180621/forcing_short_range/nwm.t22z.short_range.forcing.f001.conus.nc']