%matplotlib inline
import os
import subprocess
import earthpy as et
import earthpy.plot as ep
import earthpy.spatial as es
import matplotlib.pyplot as plt
import nivapy3 as nivapy
import numpy as np
import rasterio
from rasterio.enums import Resampling
plt.style.use("ggplot")
/opt/conda/lib/python3.8/site-packages/geopandas/_compat.py:84: UserWarning: The Shapely GEOS version (3.8.0-CAPI-1.13.1 ) is incompatible with the GEOS version PyGEOS was compiled with (3.9.0-CAPI-1.16.2). Conversions between both will be slow. warnings.warn(
The raw RGB dataset is supplied as a single, 4-band (RGBA) GeoTiff with 2.2 cm cell resolution and 8-bit uint
data type. It is located here:
K:\Avdeling\Mar\HAN\2_Prosjekter\2019c_DRONING2019\FriskOslofjord\For ML analysis_James-Harry\2019-08-27_Flight9_RGB
The alpha band in this dataset provides the "no data" mask, with values of 0 corresponding to NoData and values of 255 for valid data.
The multispectral dataset is stored as separate bands here:
K:\Avdeling\Mar\HAN\2_Prosjekter\2019c_DRONING2019\FriskOslofjord\For ML analysis_James-Harry\2019-08-27_Flight6_MS
These bands have a 9.3 cm cell resolution and values are stored as 32-bit float
data type. Instead of using an "alpha mask" to represent no data, these bands use a NoData value of -10000.
Note that due to the difference in data types, values in the RGB dataset range from 0 to 255, whereas those in the multispectral data are in the range from 0 to 1. The aim of this notebook is to create a single, consistent dataset containing all the bands of interest.
Using a 5 cm cell resolution seems like a reasonable compromise between the two datasets. I have manually created a 5 cm "snap raster" named bounding_box.tif
, based on a shapefile with the same name. To code below resamples and aligns all the images.
# Snap raster
bounding_box = "/home/jovyan/shared/drones/frisk_oslofjord/raster/bounding_box.tif"
# Multi-spectral images
flist = [
"/home/jovyan/shared/drones/frisk_oslofjord/raster/multi_spec/2019-08-27_Flight6_MS_Reprocess_transparent_reflectance_blue.tif",
"/home/jovyan/shared/drones/frisk_oslofjord/raster/multi_spec/2019-08-27_Flight6_MS_Reprocess_transparent_reflectance_green.tif",
"/home/jovyan/shared/drones/frisk_oslofjord/raster/multi_spec/2019-08-27_Flight6_MS_Reprocess_transparent_reflectance_red.tif",
"/home/jovyan/shared/drones/frisk_oslofjord/raster/multi_spec/2019-08-27_Flight6_MS_Reprocess_transparent_reflectance_nir.tif",
"/home/jovyan/shared/drones/frisk_oslofjord/raster/multi_spec/2019-08-27_Flight6_MS_Reprocess_transparent_reflectance_rededge.tif",
]
# Loop over datasets
for fpath in flist:
name = os.path.split(fpath)[1].split("_")[-1]
out_path = (
"/home/jovyan/shared/drones/frisk_oslofjord/raster/multi_spec/flight6_multispec_"
+ name
)
print("Processing:", out_path)
# Use rio warp. See https://rasterio.readthedocs.io/en/stable/cli.html#warp
# Syntax: rio warp input.tif output.tif --like template.tif
args = [
"rio",
"warp",
fpath,
out_path,
"--like",
bounding_box,
"--overwrite",
]
subprocess.check_call(args)
# RGB images
fpath = "/home/jovyan/shared/drones/frisk_oslofjord/raster/rgb/2019-08-27_Flight9_RGB_transparent_mosaic_group1.tif"
out_path = "/home/jovyan/shared/drones/frisk_oslofjord/raster/rgb/flight9_rgb_all.tif"
print("Processing:", out_path)
# Use rio warp. See https://rasterio.readthedocs.io/en/stable/cli.html#warp
# Syntax: rio warp input.tif output.tif --like template.tif
args = [
"rio",
"warp",
fpath,
out_path,
"--like",
bounding_box,
"--overwrite",
]
res = subprocess.check_call(args)
Processing: /home/jovyan/shared/drones/frisk_oslofjord/raster/multi_spec/flight6_multispec_blue.tif Processing: /home/jovyan/shared/drones/frisk_oslofjord/raster/multi_spec/flight6_multispec_green.tif Processing: /home/jovyan/shared/drones/frisk_oslofjord/raster/multi_spec/flight6_multispec_red.tif Processing: /home/jovyan/shared/drones/frisk_oslofjord/raster/multi_spec/flight6_multispec_nir.tif Processing: /home/jovyan/shared/drones/frisk_oslofjord/raster/multi_spec/flight6_multispec_rededge.tif Processing: /home/jovyan/shared/drones/frisk_oslofjord/raster/rgb/flight9_rgb_all.tif
The code here extracts each of the 4 RGB bands as a single band raster for further processing.
# Open multiband raster
with rasterio.open(
"/home/jovyan/shared/drones/frisk_oslofjord/raster/rgb/flight9_rgb_all.tif"
) as src:
meta = src.meta
nbands = src.count
# Write outputs with single band
meta.update(count=1)
# Bands ordered as RGBA
names = ["red", "green", "blue", "alpha"]
# Loop over bands
for idx in range(nbands):
print("Processing:", names[idx], "band")
out_tif = f"/home/jovyan/shared/drones/frisk_oslofjord/raster/rgb/flight9_rgb_{names[idx]}.tif"
with rasterio.open(out_tif, "w", **meta) as dst:
dst.write_band(1, src.read(idx + 1))
To keep file sizes manageable, it seems best to work with 8-bit unit
images (like the RGB data), rather 32-bit float
images (like the multispectral dataset), which are four times larger. Converting from 32-bit to 8-bit involves some loss of information, which may lead to issues such as lack of "dynamic range" in the output images. However, I think 8-bit images should provide sufficient range for the ML algorithms and the savings in data size will be worth it.
Converting from 32-bit to 8-bit is simple enough, except we need to choose an appropriate NoData value in the range between 0 and 255 (note that we can't use an alpha band here, as the NoData masks are different between the RGB and multispectral datasets). The code below performs a direct conversion from 32-bit to 8-bit, by multiplying the float values by 255 and rounding to the nearest integer.
# List of 5 cm multispec images
flist = [
"/home/jovyan/shared/drones/frisk_oslofjord/raster/multi_spec/flight6_multispec_blue.tif",
"/home/jovyan/shared/drones/frisk_oslofjord/raster/multi_spec/flight6_multispec_green.tif",
"/home/jovyan/shared/drones/frisk_oslofjord/raster/multi_spec/flight6_multispec_red.tif",
"/home/jovyan/shared/drones/frisk_oslofjord/raster/multi_spec/flight6_multispec_nir.tif",
"/home/jovyan/shared/drones/frisk_oslofjord/raster/multi_spec/flight6_multispec_rededge.tif",
]
# Loop over images
for fpath in flist:
ds = rasterio.open(fpath)
data = ds.read(1)
# Set no data and rescale to range [0, 255]
data[data == -10000] = np.nan
data = np.round(data * 255, decimals=0)
print(np.nanmin(data), np.nanmax(data))
Based on this, it looks possible to use the value 255
to indicate NoData, since it's not needed in the pixel values themselves. Does this also work for the RGB bands?
# List of 5 cm rgb images
flist = [
"/home/jovyan/shared/drones/frisk_oslofjord/raster/rgb/flight9_rgb_blue.tif",
"/home/jovyan/shared/drones/frisk_oslofjord/raster/rgb/flight9_rgb_green.tif",
"/home/jovyan/shared/drones/frisk_oslofjord/raster/rgb/flight9_rgb_red.tif",
"/home/jovyan/shared/drones/frisk_oslofjord/raster/rgb/flight9_rgb_alpha.tif",
]
# Loop over images
for fpath in flist:
ds = rasterio.open(fpath)
data = ds.read(1)
print(np.nanmin(data), np.nanmax(data))
Pixel values in the RGB data do seem to include both 0 and 255, which may indicate lack of dynamic range. The code below takes a closer look at one of these images.
# Show RGB red band
with rasterio.open(
"/home/jovyan/shared/drones/frisk_oslofjord/raster/rgb/flight9_rgb_red.tif"
) as src:
data = src.read(1)
plt.imshow(data)
# Setup plot
fig, axes = plt.subplots(ncols=4, nrows=1, figsize=(15, 6))
# Loop over images
for idx, fpath in enumerate(flist):
name = os.path.split(fpath)[1].split("_")[-1][:-4]
ds = rasterio.open(fpath)
data = ds.read(1)
if name == "alpha":
axes[idx].imshow(data)
else:
axes[idx].imshow(data != 0)
axes[idx].set_title(name)
The code above checks zeros in the three RGB colour bands against the alpha mask (which shows the true location of NoData pixels). It looks as though the value 0 could be used as NoData in these bands without any major issues, since there are only a few cells with values of zero within the alpha mask.
Based on this, I suggest the following strategy for creating a combined dataset:
Discard the alpha mask from the RGB dataset. Use values of zero in these bands to indicate NoData
For the multispectral data, convert from 32-bit to 8-bit range by multiplying by 255 and rounding to the nearest integer. Values of zero should be artificially set to one, and zero can then be used as the NoData value here as well. This is a slight adjustment/fudge of the raw pixel values, but any effects on the subsequent inference process should be negligible
# List of all 5 cm images
flist = [
"/home/jovyan/shared/drones/frisk_oslofjord/raster/rgb/flight9_rgb_red.tif",
"/home/jovyan/shared/drones/frisk_oslofjord/raster/rgb/flight9_rgb_green.tif",
"/home/jovyan/shared/drones/frisk_oslofjord/raster/rgb/flight9_rgb_blue.tif",
"/home/jovyan/shared/drones/frisk_oslofjord/raster/multi_spec/flight6_multispec_red.tif",
"/home/jovyan/shared/drones/frisk_oslofjord/raster/multi_spec/flight6_multispec_green.tif",
"/home/jovyan/shared/drones/frisk_oslofjord/raster/multi_spec/flight6_multispec_blue.tif",
"/home/jovyan/shared/drones/frisk_oslofjord/raster/multi_spec/flight6_multispec_nir.tif",
"/home/jovyan/shared/drones/frisk_oslofjord/raster/multi_spec/flight6_multispec_rededge.tif",
]
# Output
out_tif = "/home/jovyan/shared/drones/frisk_oslofjord/raster/ne_akeroya_5cm_all.tif"
# Read metadata of first file
with rasterio.open(flist[0]) as src:
meta = src.meta
# Update meta to reflect the number of layers in output and NoData value
meta.update(count=len(flist), nodata=0)
# Read each layer and write it to stack
with rasterio.open(out_tif, "w", **meta) as dst:
for idx, fpath in enumerate(flist, start=1):
# Get attributes from file name
spec, name = os.path.split(fpath)[1].split("_")[1:]
name = name[:-4]
print("Processing:", spec, name)
with rasterio.open(fpath) as src:
# Read array
data = src.read(1)
if spec == "multispec": # RGB data can be written directly
# Set NoData and round
data[data == -10000] = np.nan
data = np.round(data * 255, decimals=0)
# Artificially adjust genuine zeros to ones
data[data == 0] = 1
# Set NoData as zero
data[np.isnan(data)] = 0
# Convert to uint8
data = data.astype(np.uint8)
# Save to output
dst.write_band(idx, data)
# Add band description
dst.set_band_description(idx, f"{spec}_{name}")
The code below explores and checks the integrated dataset.
# Output path
out_tif = "/home/jovyan/shared/drones/frisk_oslofjord/raster/ne_akeroya_5cm_all.tif"
# Read downsampled version for better rendering speed
resamp_fac = 4
# Print dataset properties
with rasterio.open(out_tif) as src:
band_titles = src.descriptions
data = src.read(
out_shape=(src.count, src.height // resamp_fac, src.width // resamp_fac),
resampling=Resampling.average,
)
meta = src.meta
for key in meta.keys():
print(f"{key:10}:{meta[key]}")
# Plot all bands using earthpy
ep.plot_bands(data, title=band_titles, cbar=False)
The multispectral data looks very dark compared to the RGB data. Let's take a look at histograms for each band.
# Histograms of band values
ep.hist(data, cols=3, title=band_titles)
It is clear from the histograms above that the multispectral data is using only a small portion of the available range compared to the RGB data. Applying a "linear stretch" to these data should therefore give a better aesthetic appearance. This might also help the ML algorithm - consider incorporating this change permanently?
# Plot composite images
fig, axes = plt.subplots(ncols=2, nrows=1, figsize=(15, 10))
ep.plot_rgb(data, rgb=[0, 1, 2], ax=axes[0], title="RGB composite")
ep.plot_rgb(
data,
rgb=[3, 4, 5],
ax=axes[1],
stretch=True, # Only needed for multispec
title="RGB composite from multispectral data",
)
plt.tight_layout()