This script preprocesses geodata to be used later in Matlab for further modelling. From existing raster files (DEM (digital elevation model), obstacles, aspect, slope, forest cover) with differing extents, smaller rasters with the same extent are extracted. The new extent is given by a polygon vector dataset (Perimeter). The "obstacles" dataset first has to be transformed from line vector datasets (railway lines, high-voltage power lines) to raster. The forest dataset is derived from a polygon vector dataset. Aspect and slope both are derived from the DEM. An additional requirement is that the raster coordinates be integers (multiples of 10) as shown below (example header):
ncols 2216
nrows 2273
xllcorner 668820.0
yllcorner 5270130.0
cellsize 10.0
NODATA_value -9999.0
Some functions that will be used in the preprocessing need to be defined first:
import geopandas as gpd
import numpy as np
from osgeo import gdal
import requests
import io
import zipfile
# Define function RasterizePolygon that is used to turn a vector dataset into a raster dataset:
def RasterizePolygon(path_shp_file, reference, path_output):
from osgeo import gdal
from osgeo import ogr
x_res = reference.RasterXSize
y_res = reference.RasterYSize
geo_transform = reference.GetGeoTransform()
mb_v = ogr.Open(path_shp_file)
mb_l = mb_v.GetLayer()
target_ds = gdal.GetDriverByName('GTiff').Create(path_output, x_res, y_res, 1, gdal.GDT_Byte)
target_ds.SetGeoTransform(geo_transform)
band = target_ds.GetRasterBand(1)
NoData_value = -9999
band.SetNoDataValue(NoData_value)
band.FlushCache()
gdal.RasterizeLayer(target_ds, [1], mb_l, options=['ALL_TOUCHED=TRUE'])
return target_ds
# Define function WriteASCII that is used to convert raster datasets into ASCII files:
def WriteASCII(pathout, arr, ncols, nrows, xllcorner, yllcorner, cellsize, NODATA_value):
## write new file:
TheFile=open(pathout,"w")
TheFile.write("ncols "+str(ncols)+"\n")
TheFile.write("nrows "+str(nrows)+"\n")
TheFile.write("xllcorner "+str(xllcorner)+"\n")
TheFile.write("yllcorner "+str(yllcorner)+"\n")
TheFile.write("cellsize "+str(cellsize)+"\n")
TheFile.write("NODATA_value "+str(NODATA_value)+"\n")
TheFormat="{0} "
for i in range(0,nrows):
for j in range(0,ncols):
TheFile.write(TheFormat.format(arr[i,j]))
TheFile.write("\n")
TheFile.close()
# Define functione TransferHeaderGeoTiffToHeaderASCII which is used to transform a GeoTiff header into an ASCII file header:
def TransferHeaderGeoTiffToHeaderASCII(gdata):
import numpy as np
band = gdata.GetRasterBand(1)
# arr = band.ReadAsArray()
width = gdata.RasterXSize
height = gdata.RasterYSize
gt = gdata.GetGeoTransform()
minx = gt[0]
miny = gt[3] + width*gt[4] + height*gt[5]
maxx = gt[0] + width*gt[1] + height*gt[2]
maxy = gt[3]
xllcorner = min(minx,maxx)
yllcorner = min(miny,maxy)
cellsize = gt[1]
ncols = gdata.RasterXSize
nrows = gdata.RasterYSize
band = gdata.GetRasterBand(1)
NODATA_value = band.GetNoDataValue()
names = ['ncols', 'nrows', 'xllcorner', 'yllcorner', 'cellsize', 'NODATA_value']
formats = ['int','int','float','float','float','float']
# Definition des Datentypes:
header_info = np.zeros(1, dtype={'names':names, 'formats':formats})
header_info['ncols'] = int(ncols)
header_info['nrows'] = int(nrows)
header_info['xllcorner'] = xllcorner
header_info['yllcorner'] = yllcorner
header_info['cellsize'] = cellsize
header_info['NODATA_value'] = NODATA_value
return header_info, ncols, nrows, xllcorner, yllcorner, cellsize, NODATA_value
# Define function CreateAndWriteRaster that is used to turn an array into a raster:
def CreateAndWriteRaster(Path_Raster, ReferenceRaster, ArrayToWrite, NoDataValue = -9999, datatype = 'Float32'):
## Datatypes: https://naturalatlas.github.io/node-gdal/classes/Constants%20(GDT).html
referenceProj = ReferenceRaster.GetProjection()
geotransform = ReferenceRaster.GetGeoTransform()
# create the 3-band raster file
number_of_bands = 1
if datatype == 'Float32':
raster = gdal.GetDriverByName('GTiff').Create(Path_Raster, ReferenceRaster.RasterXSize, ReferenceRaster.RasterYSize, number_of_bands, gdal.GDT_Float32)
if datatype == 'Int16':
raster = gdal.GetDriverByName('GTiff').Create(Path_Raster, ReferenceRaster.RasterXSize, ReferenceRaster.RasterYSize, number_of_bands, gdal.GDT_Int16)
raster.SetGeoTransform(geotransform) # specify coords
raster.GetRasterBand(1).WriteArray(ArrayToWrite) # write r-band to the raster
# raster.GetRasterBand(1).SetNoDataValue(255)
raster.GetRasterBand(1).SetNoDataValue(NoDataValue)
print ('NoData Value: ',raster.GetRasterBand(1).GetNoDataValue())
raster.SetProjection(referenceProj)
raster.FlushCache() # write to disk
return(raster)
# Define function Vector2Raster which is used to convert vector data into raster data:
def Vector2Raster(gdf, ReferenceRaster,path_WriteRASTER):
from osgeo import ogr
from shapely.wkt import loads
def segmentize(geom):
wkt = geom.wkt # shapely Polygon to wkt
geom = ogr.CreateGeometryFromWkt(wkt) # create ogr geometry
geom.Segmentize(2) # densify geometry
wkt2 = geom.ExportToWkt() # ogr geometry to wkt
new = loads(wkt2) # wkt to shapely Polygon
return new
## https://stackoverflow.com/questions/28995146/matlab-ind2sub-equivalent-in-python
def sub2ind(array_shape, rows, cols):
return rows*array_shape[1] + cols
def ind2sub(array_shape, ind):
## Zeile
#rows = (ind.astype('int') / array_shape[1])
rows = np.floor(ind.astype('int') / array_shape[1]).astype('int')
## Spalte
cols = (ind.astype('int') % array_shape[1]) # or numpy.mod(ind.astype('int'), array_shape[1])
return (rows, cols)
# gdf.plot()
gdf['geometry'] = gdf['geometry'].map(segmentize)
for j in range(len(gdf)):
x_coords = np.array(gdf['geometry'][j].coords.xy[0])
y_coords = np.array(gdf['geometry'][j].coords.xy[1])
if j == 0:
x_collector = x_coords
y_collector = y_coords
else:
x_collector = np.concatenate((x_collector,x_coords))
y_collector = np.concatenate((y_collector,y_coords))
points = np.concatenate(([x_collector],[y_collector]),axis = 0).T
upx, xres, xskew, upy, yskew, yres = ReferenceRaster.GetGeoTransform()
xmin = upx
xmax = upx+xres*ReferenceRaster.RasterXSize
ymin = upy
ymax = upy+yres*ReferenceRaster.RasterYSize
anz_y = ReferenceRaster.RasterYSize
anz_x = ReferenceRaster.RasterXSize
referenceProj = ReferenceRaster.GetProjection()
print (xmin,xmax,ymin,ymax)
if xres == yres:
cellsize = xres
else:
cellsize = np.nan
print ('ERROR: Cellsize differs in x and y direction')
print (xres, yres)
values_within_boundaries = np.logical_and(np.logical_and(points[:,0] > xmin, points[:,0] < xmax),np.logical_and(points[:,1] > ymin, points[:,1] < ymax))
points = points[values_within_boundaries,:]
print ((xmax - xmin)/cellsize)
# size_x = ((xmax - xmin)/cellsize).astype(int)
# size_y = ((ymax - ymin)/cellsize).astype(int)
# lb_x = np.arange(0,xmax - xmin,cellsize)
# ub_x = lb_x + cellsize
#
# ind_x = np.arange(len(lb_x))
points_ind_x=np.floor((points[:,0]-xmin)/cellsize).astype(int)
points_ind_y=np.floor((points[:,1]-ymin)/cellsize).astype(int)
points_ind_lin_ind = sub2ind([anz_x,anz_y], points_ind_x, points_ind_y)
## Unique
points_ind_lin_ind_unique = np.unique(points_ind_lin_ind)
## Write to Raster
rows, cols = ind2sub([anz_x,anz_y], points_ind_lin_ind_unique)
image_size = (int(anz_y),int(anz_x))
v_pixels = np.zeros(image_size).astype(int)
print(cols)
print(rows)
v_pixels[cols,rows] = 1
# xres = cellsize
# yres = cellsize
# geotransform = (xmin, xres, 0, ymin, 0, yres)
geotransform = ReferenceRaster.GetGeoTransform()
# create the 3-band raster file
number_of_bands = 1
rasterfromvector = gdal.GetDriverByName('GTiff').Create(path_WriteRASTER, int(anz_x), int(anz_y), number_of_bands, gdal.GDT_Int16)
rasterfromvector.SetGeoTransform(geotransform) # specify coords
rasterfromvector.GetRasterBand(1).WriteArray(v_pixels) # write r-band to the raster
rasterfromvector.GetRasterBand(1).SetNoDataValue(255)
rasterfromvector.SetProjection(referenceProj)
rasterfromvector.FlushCache() # write to disk
return(rasterfromvector)
# Define function Extract_Url_Zipfiles:
def Extract_Url_Zipfiles(url, name, path):
# file name stays the same
import requests
from zipfile import ZipFile
r = requests.get(url)
with open(name, "wb") as code:
code.write(r.content)
z = name
zf = ZipFile(z,'r')
zf.extractall(path)
Small datasets can be saved in memory. Larger datasets are downloaded and saved to disk, as shown here. The download links refer to the data available on the Envidat website.
url_forest = 'https://www.envidat.ch/dataset/d28614a0-0825-4040-bc1b-e0455b1e4df6/resource/16db97c5-5546-4f80-9cb9-e562637b589c/download/osm_forest_export_25832.zip'
url_rail = 'https://www.envidat.ch/dataset/d28614a0-0825-4040-bc1b-e0455b1e4df6/resource/a787f798-0c3d-4cd3-a9d1-c93f51176465/download/osm_railways_export_25832.zip'
url_dem = 'https://www.envidat.ch/dataset/d28614a0-0825-4040-bc1b-e0455b1e4df6/resource/1bd80d45-a8fd-44dc-b153-7abfa6345d1c/download/eudem_ogdbayern_25m_25832.tif'
url_powerlines = 'https://www.envidat.ch/dataset/d28614a0-0825-4040-bc1b-e0455b1e4df6/resource/c567f81c-26c9-4122-8fbd-045c55fb73c0/download/osm_powerlines_export_25832.zip'
url_perimeter = 'https://www.envidat.ch/dataset/d28614a0-0825-4040-bc1b-e0455b1e4df6/resource/710a065d-d50e-4a3d-a993-2a65197cef7b/download/digitized_sample_perimeter.zip'
Define the cell size of the output rasters:
delta = 10
Define path and filenames for Shapefiles:
basic_path_input_shapefiles = r'H:\Erschliessung\Envidat_Jupyter\2019\Data\Shapefiles'
# Filenames:
name_perimeter = 'digitized_sample_perimeter.shp'
name_bahn = 'osm_railways_export_25832.shp'
name_strom = 'osm_powerlines_export_25832.shp'
name_waldlayer = 'osm_forest_export_25832.shp'
path_perimeter = basic_path_input_shapefiles + '\\' + name_perimeter
path_bahn = basic_path_input_shapefiles + '\\' + name_bahn
path_strom = basic_path_input_shapefiles + '\\' + name_strom
path_Wald_shp_original = basic_path_input_shapefiles+'\\'+ name_waldlayer
Download, save and unzip the shapefiles: (MF)
path = basic_path_input_shapefiles
# Perimeter:
Extract_Url_Zipfiles(url_perimeter, name_perimeter, basic_path_input_shapefiles)
# Forest:
Extract_Url_Zipfiles(url_forest, name_waldlayer, basic_path_input_shapefiles)
# Railway lines:
Extract_Url_Zipfiles(url_rail, name_bahn, basic_path_input_shapefiles)
# Power lines:
Extract_Url_Zipfiles(url_powerlines, name_strom, basic_path_input_shapefiles)
print ('Shapefiles saved.')
Shapefiles saved.
Path and filename for the DEM raster data are also defined:
basic_path_input_rasterfiles = r'H:\Erschliessung\Envidat_Jupyter\2019\Data\Rasterfiles'
# Filenames:
name_DEM_original = 'eudem_ogdbayern_25m_25832.tif'
path_DEM_original = basic_path_input_rasterfiles + '\\' + name_DEM_original
Output path:
basic_path_out = r'H:\Erschliessung\Envidat_Jupyter\2019\Data\Output'
Download and save DEM:
DEM = requests.get(url_dem)
open(path_DEM_original, 'wb').write(DEM.content)
1695063
Next we define the filenames for the rasters derived from the input datasets:
name_NULLRASTER = 'NULL_Raster.tif'
name_DEM = 'dem_wo_nodata.tif'
name_DEM_incl_NODATA = 'dem.tif'
name_RASTER_Bahn = 'Bahn_Raster.tif'
name_RASTER_Strom = 'Strom_Raster.tif'
name_RASTER_Obstacles = 'Obstacles_Raster.tif'
name_RASTER_Slope_Degree = 'Slope_Degree.tif'
name_RASTER_Slope_Percent_direct = 'Slope_Percent_dir.tif'
name_RASTER_Slope_Percent = 'Slope_Percent.tif'
name_RASTER_aspect_direct = 'Aspect_dir.tif'
name_RASTER_aspect = 'Aspect.tif'
name_RASTER_aspect_n0 = 'Aspect_n0.tif'
name_RASTER_Wald = 'Wald.tif'
path_NULLRASTER = basic_path_out + '\\'+ name_NULLRASTER
path_DEM = basic_path_out + '\\'+ name_DEM
path_DEM_incl_NODATA = basic_path_out + '\\'+ name_DEM_incl_NODATA
path_RASTER_Bahn = basic_path_out + '\\'+ name_RASTER_Bahn
path_RASTER_Strom = basic_path_out + '\\'+ name_RASTER_Strom
path_RASTER_Obstacles = basic_path_out + '\\'+ name_RASTER_Obstacles
path_RASTER_Slope_Degree = basic_path_out + '\\'+ name_RASTER_Slope_Degree
path_RASTER_Slope_Percent_direct = basic_path_out + '\\'+ name_RASTER_Slope_Percent_direct
path_RASTER_Slope_Percent = basic_path_out + '\\'+ name_RASTER_Slope_Percent
path_RASTER_Wald = basic_path_out + '\\'+ name_RASTER_Wald
path_RASTER_aspect_direct = basic_path_out + '\\'+ name_RASTER_aspect_direct
path_RASTER_aspect = basic_path_out + '\\'+ name_RASTER_aspect
path_RASTER_aspect_n0 = basic_path_out + '\\'+ name_RASTER_aspect_n0
The raster files will be saved in the .tif and .txt format.
ins = '.tif'
outs = '.txt'
path_DEM_incl_NODATA_asc = path_DEM_incl_NODATA.replace(ins, outs)
path_RASTER_Obstacles_asc = path_RASTER_Obstacles.replace(ins, outs)
path_RASTER_Slope_Percent_asc = path_RASTER_Slope_Percent.replace(ins, outs)
path_RASTER_Wald_asc = path_RASTER_Wald.replace(ins, outs)
path_RASTER_aspect_asc = path_RASTER_aspect.replace(ins, outs)
path_RASTER_aspect_n0_asc = path_RASTER_aspect_n0.replace(ins, outs)
Open the original DEM file:
raster_DEM_original = gdal.Open(path_DEM_original)
We now create an empty reference raster with the correct extent and save it as a 3-band raster:
perimeter_df = gpd.read_file(path_perimeter)
bbox = np.array(perimeter_df.total_bounds)
bbox_f = np.floor(bbox / delta)*delta-delta/2.
bbox_c = np.ceil(bbox / delta)*delta+delta/2.
X = np.array((bbox_f[0],bbox_c[2]))
Y = np.array((bbox_f[1],bbox_c[3]))
xmin, ymin, xmax, ymax = [min(X)-delta/2, min(Y)-delta/2, max(X)+delta/2, max(Y)+delta/2]
anz_x = (xmax - xmin) / delta
anz_y = (ymax - ymin) / delta
image_size = (int(anz_y),int(anz_x))
v_pixels = np.zeros(image_size).astype(int)
x_index = np.arange(anz_x).astype(int)
y_index = np.arange(anz_y).astype(int)
x_values = np.arange(xmin,xmax+delta,delta).astype(int)
y_values = np.arange(ymin,ymax+delta,delta).astype(int)
xres = delta
yres = delta
geotransform = (xmin, xres, 0, ymin, 0, yres)
referenceProj = raster_DEM_original.GetProjection()
# create the 3-band raster file of the Empty Reference Raster
number_of_bands = 1
reference = gdal.GetDriverByName('GTiff').Create(path_NULLRASTER, int(anz_x), int(anz_y), number_of_bands, gdal.GDT_Float32)
reference.SetGeoTransform(geotransform) # specify coords
reference.GetRasterBand(1).WriteArray(v_pixels) # write r-band to the raster
reference.GetRasterBand(1).SetNoDataValue(-9999)
reference.SetProjection(referenceProj)
reference.FlushCache() # write to disk
The original DEM is resampled to 10 m resolution and converted into a numpy array:
## Resampling of the DEM:
from osgeo import gdal, gdalconst
inputf = raster_DEM_original
inputProj = inputf.GetProjection()
inputTrans = inputf.GetGeoTransform()
referenceProj = reference.GetProjection()
referenceTrans = reference.GetGeoTransform()
bandreference = reference.GetRasterBand(1)
x = reference.RasterXSize
y = reference.RasterYSize
driver= gdal.GetDriverByName('GTiff')
output = driver.Create(path_DEM,x,y,1,bandreference.DataType)
output.SetGeoTransform(referenceTrans)
output.SetProjection(referenceProj)
gdal.ReprojectImage(inputf,output,inputProj,referenceProj,gdalconst.GRA_Bilinear)
driver= gdal.GetDriverByName('GTiff')
output = driver.Create(path_DEM,x,y,1,bandreference.DataType)
output.SetGeoTransform(referenceTrans)
output.SetProjection(referenceProj)
gdal.ReprojectImage(inputf,output,inputProj,referenceProj,gdalconst.GRA_Bilinear)
## Convert the raster to a numpy array
arraydem = np.array(output.GetRasterBand(1).ReadAsArray())
arraydem[arraydem==0]=-9999
DEM = CreateAndWriteRaster(path_DEM_incl_NODATA, reference, arraydem)
print('Resampling finished.')
The obstacle raster is created from the railway and cable-line datasets:
gdf_bahn = gpd.read_file(path_bahn)
rasterbahn = Vector2Raster(gdf_bahn, output,path_RASTER_Bahn)
arraybahn = np.array(rasterbahn.GetRasterBand(1).ReadAsArray())
gdf_strom = gpd.read_file(path_strom)
rasterstrom = Vector2Raster(gdf_strom, output,path_RASTER_Strom)
arraystrom = np.array(rasterstrom.GetRasterBand(1).ReadAsArray())
array_obstacles = arraystrom + arraybahn
array_obstacles[array_obstacles>0.1]=1
obstacle = CreateAndWriteRaster(path_RASTER_Obstacles, reference, array_obstacles,datatype = 'Int16')
print('Obstacle raster saved.')
Calculate the slope raster:
slope_degree = gdal.DEMProcessing(path_RASTER_Slope_Degree, DEM, 'slope')
slope_degree.GetRasterBand(1).ReadAsArray()
slope_percent = gdal.DEMProcessing(path_RASTER_Slope_Percent_direct, DEM, 'slope', slopeFormat = 'percent')
slope_percent.GetRasterBand(1).ReadAsArray()
slope_raster_def = CreateAndWriteRaster(path_RASTER_Slope_Percent, reference, slope_percent.GetRasterBand(1).ReadAsArray())
print('Slope raster calculated.')
Create the forest raster from the forest shapefile:
wald_raster = RasterizePolygon(path_Wald_shp_original, reference, path_RASTER_Wald)
Calculate the aspect raster:
aspect = gdal.DEMProcessing(path_RASTER_aspect_direct, DEM, 'aspect')
aspect.GetRasterBand(1).ReadAsArray()
aspect_nord_0 = aspect.GetRasterBand(1).ReadAsArray()
aspect_nord_0 = aspect_nord_0+180
aspect_nord_0[aspect_nord_0>360] = (aspect_nord_0-360)[aspect_nord_0>360]
aspect_nord_0 = 360 - aspect_nord_0
aspect_nord_0[aspect.GetRasterBand(1).ReadAsArray()==-9999]=-1
aspect_raster_def = CreateAndWriteRaster(path_RASTER_aspect, reference, aspect.GetRasterBand(1).ReadAsArray())
aspect_raster_n0 = CreateAndWriteRaster(path_RASTER_aspect_n0, reference, aspect_nord_0)
print('Aspect raster calculated.')
Convert the raster files to ASCII format:
print('Start writing ASCII files...')
header_info, ncols, nrows, xllcorner, yllcorner, cellsize, NODATA_value = TransferHeaderGeoTiffToHeaderASCII(reference)
pathout = path_DEM_incl_NODATA_asc
arrfl = np.flipud(arraydem)
WriteASCII(pathout, arrfl, ncols, nrows, xllcorner, yllcorner, cellsize, NODATA_value)
pathout = path_RASTER_Obstacles_asc
arrfl = np.flipud(array_obstacles)
WriteASCII(pathout, arrfl, ncols, nrows, xllcorner, yllcorner, cellsize, NODATA_value)
pathout = path_RASTER_Slope_Percent_asc
arrfl = np.flipud(slope_percent.GetRasterBand(1).ReadAsArray())
WriteASCII(pathout, arrfl, ncols, nrows, xllcorner, yllcorner, cellsize, NODATA_value)
pathout = path_RASTER_Wald_asc
wald_raster_0_1 = wald_raster.GetRasterBand(1).ReadAsArray()
wald_raster_0_1[wald_raster_0_1>0]=1
arrfl = np.flipud(wald_raster_0_1)
WriteASCII(pathout, arrfl, ncols, nrows, xllcorner, yllcorner, cellsize, NODATA_value)
pathout = path_RASTER_aspect_asc
arrfl = np.flipud(aspect_raster_n0.GetRasterBand(1).ReadAsArray())
WriteASCII(pathout, arrfl, ncols, nrows, xllcorner, yllcorner, cellsize, NODATA_value)
print('ASCII files saved.')
We are done, all datasets are ready for Matlab modelling!