#!/usr/bin/env python # coding: utf-8 # # Convert time series collection of GeoTIFF images to NetCDF # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') import glob import pandas as pd import xarray as xr # We have a collection of remote sensing data over time on the same grid stored in GeoTIFF files. # We want to convert them to NetCDF, and rechunk the data with time chunked > 1 so that extraction of time series at specific locations is faster # In[2]: filenames = glob.glob('/sand/usgs/users/rsignell/data/josef/20NRKsS1-EBD/20NRKsS1_A_vh_mtfil/*.tif') # In[3]: print(len(filenames)) filenames[0:2] # Make a function to create timestamps from file names # In[4]: def time_index_from_filenames(filenames): '''helper function to create a pandas DatetimeIndex Filename example: 20NRKsS1_A_vh_20171006_0164_B_mtfil_amp.tif''' return pd.DatetimeIndex([pd.Timestamp(f.split('/')[-1][14:22]) for f in filenames]) # In[5]: time = xr.Variable('time', time_index_from_filenames(filenames)) # Specify reading of all GeoTIFF images. Specifying chunks means that Dask arrays will be used, so no data is read until needed # In[6]: chunks = {'x': 5490, 'y': 5490, 'band': 1} da = xr.concat([xr.open_rasterio(f, chunks=chunks).squeeze() for f in filenames], dim=time) # Provide a name for the data array variable # In[7]: da.name='sar' # Don't need the singleton dimension called 'band', so drop it # In[8]: da = da.drop(labels='band') # Rechunk the Dask array # In[9]: da = da.chunk(chunks={'time':10, 'x':100, 'y':100}) # Let's see what we have now # In[10]: da # Write the NetCDF file, specifying the same chunking. This is where the data finally get read! # In[11]: encoding={'sar':{'chunksizes': (10,100,100)}} da.to_netcdf('agg_chunked.nc', encoding=encoding, format='NETCDF4', engine='netcdf4', mode='w') # Now open the NetCDF file we just wrote # In[12]: ds = xr.open_dataset('agg_chunked.nc') # In[13]: ds # Extract data at a specific time # In[16]: dst = ds.sel(time='2017-09-24').squeeze() # Plot it up # In[18]: dst['sar'].plot.imshow(vmin=0, vmax=4000); # In[ ]: