#!/usr/bin/env python # coding: utf-8 # # Assess time series extraction methods for a large gridded dataset (CFSR) # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') import xarray as xr import pandas as pd # ### Specify time range and location for extracted time series # In[2]: start = '2017-01-01' stop = '2017-03-01' lon0 = -71.6+360 lat0 = 41.55 # ### Extract time series using xarray from HSDS dataset # In[3]: # use URL for 12 month dataset url_hsds = 'http://149.165.156.174:5101/home/rsignell/tmp2m_2017.nc' ds_hsds = xr.open_dataset(url_hsds, engine='h5netcdf', decode_cf=False) # next two lines are to add back in missing units ds_hsds['time'].attrs['units']='seconds since 1970-01-01 00:00' ds_hsds = xr.decode_cf(ds_hsds) loc_hsds = ds_hsds.sel(longitude=lon0, latitude=lat0, method='nearest') d_hsds = loc_hsds.sel(time=slice(start,stop)) get_ipython().run_line_magic('time', "d_hsds['TMP_2maboveground'].plot();") # In[4]: import h5pyd f = h5pyd.File(url_hsds,'r') ds = f['TMP_2maboveground'] ds.chunks # ### Extract time series using xarray from concatenated netcdf file via OPeNDAP # In[5]: url_dap = 'http://js-170-55.jetstream-cloud.org/thredds/dodsC/data/CFSR/2017/tmp2m_2months.nc' ds_dap = xr.open_dataset(url_dap) loc_dap = ds_dap.sel(longitude=lon0, latitude=lat0, method='nearest') d_dap = loc_dap.sel(time=slice(start,stop)) get_ipython().run_line_magic('time', "d_dap['TMP_2maboveground'].plot();") # ### Extract time series using xarray from grib aggregation via OPeNDAP # In[7]: url_dap_grb2 = 'http://js-170-55.jetstream-cloud.org/thredds/dodsC/grib/CFSr_v2/tmp2m/Best' ds_dap_grb2 = xr.open_dataset(url_dap_grb2) loc_dap_grb2 = ds_dap_grb2.sel(lon=lon0, lat=lat0, method='nearest') d_dap_grb2 = loc_dap_grb2.sel(time=slice(start,stop)) get_ipython().run_line_magic('time', "d_dap_grb2['Temperature_height_above_ground'].plot();") # ### Extract time series using from grib aggregation using NCSS (NetCDF Subset Service) # In[8]: url = ('https://js-170-55.jetstream-cloud.org/thredds' '/ncss/grib/CFSr_v2/tmp2m/Best?var=Temperature_height_above_ground&' 'latitude={}&longitude={}&time_start={}T01%3A00%3A00Z&time_end={}T00%3A00%3A00Z&' 'vertCoord=&accept=csv'.format(lat0,lon0,start,stop)) get_ipython().run_line_magic('time', "df = pd.read_csv(url, parse_dates=True, index_col='time')") df['Temperature_height_above_ground[unit="K"]'].plot()