Read netcdf4 files using xarray, so that we can read the attributes
import xarray as xr import numpy as np import h5pyd as h5py
infile = '/notebooks/rsignell/data/CFSR/tmp2m_2months.nc'
outfile = '/home/rsignell/tmp2m_2months_rechunked.nc'
#outfile = '/notebooks/rsignell/data/CFSR/foo.nc'
ds = xr.open_dataset(infile, decode_cf=False)
ds.variables
Frozen(OrderedDict([('latitude', <xarray.IndexVariable 'latitude' (latitude: 880)> array([-89.843515, -89.640798, -89.436886, ..., 89.436886, 89.640798, 89.843515]) Attributes: units: degrees_north long_name: latitude), ('longitude', <xarray.IndexVariable 'longitude' (longitude: 1760)> array([ 0.000000e+00, 2.045452e-01, 4.090904e-01, ..., 3.593859e+02, 3.595905e+02, 3.597950e+02]) Attributes: units: degrees_east long_name: longitude), ('time', <xarray.IndexVariable 'time' (time: 1416)> array([ 1.483232e+09, 1.483236e+09, 1.483240e+09, ..., 1.488319e+09, 1.488323e+09, 1.488326e+09]) Attributes: units: seconds since 1970-01-01 00:00:00.0 0:00 long_name: verification time generated by wgrib2 functi... reference_time: 1483228800.0 reference_time_type: 0 reference_date: 2017.01.01 00:00:00 UTC reference_time_description: kind of product unclear, reference date is v... time_step_setting: auto time_step: 3600.0), ('TMP_2maboveground', <xarray.Variable (time: 1416, latitude: 880, longitude: 1760)> [2193100800 values with dtype=float32] Attributes: _FillValue: 9.999e+20 short_name: TMP_2maboveground long_name: Temperature level: 2 m above ground units: K)]))
f = h5py.File(outfile, 'w')
for key, val in ds.attrs.items():
if isinstance(val,str):
f.attrs[key]=val
else:
f.attrs.create(key, val, (), dtype=val.dtype)
f.attrs['Conventions']
'COARDS'
ds['latitude'].data.shape
(880,)
ds['latitude'].data.dtype
dtype('float64')
for key, val in ds.variables.items():
print(key, val.shape, val.chunks)
latitude (880,) None longitude (1760,) None time (1416,) None TMP_2maboveground (1416, 880, 1760) None
Just specify the chunk sizes for those vars that need rechunking
ds['TMP_2maboveground'].attrs['chunks'] = (4, 220, 440)
for key, val in ds.variables.items():
print(key)
dset = f.create_dataset(key, data=val.data, chunks=val.chunks)
for k,v in val.attrs.items():
print(k,v)
if isinstance(v,str):
dset.attrs[k] = v
else:
dset.attrs.create(k, np.array(v))
latitude units degrees_north long_name latitude longitude units degrees_east long_name longitude time units seconds since 1970-01-01 00:00:00.0 0:00 long_name verification time generated by wgrib2 function verftime() reference_time 1483228800.0 reference_time_type 0 reference_date 2017.01.01 00:00:00 UTC reference_time_description kind of product unclear, reference date is variable, min found reference date is given time_step_setting auto time_step 3600.0 TMP_2maboveground
# Creating dimension scales
dset.dims.set_scale(f['/latitude'])
dset.dims.set_scale(f['/time'])
dset.dims.set_scale(f['/longitude'])
# Attaching dimension scales to dataset: /TMP_2maboveground
f['/TMP_2maboveground'].dims[0].attach_scale(f['/time'])
f['/TMP_2maboveground'].dims[1].attach_scale(f['/latitude'])
f['/TMP_2maboveground'].dims[2].attach_scale(f['/longitude'])
f.close()