This script preprocesses geodata to be used later in Matlab for further modelling. From existing raster files (DEM (digital elevation model), obstacles, aspect, slope, forest cover) with differing extents, smaller rasters with the same extent are extracted. The new extent is given by a polygon vector dataset (Perimeter). The "obstacles" dataset first has to be transformed from line vector datasets (railway lines, high-voltage power lines) to raster. The forest dataset is derived from a polygon vector dataset. Aspect and slope both are derived from the DEM. An additional requirement is that the raster coordinates be integers (multiples of 10) as shown below (example header):

ncols 2216

nrows 2273

xllcorner 668820.0

yllcorner 5270130.0

cellsize 10.0

NODATA_value -9999.0

Some functions that will be used in the preprocessing need to be defined first:

  • RasterizePolygon (used to turn a vector dataset into a raster dataset)
  • WriteASCII (used to convert raster datasets into ASCII files)
  • TransferHeaderGeoTiffToHeaderASCII (used to transform a GeoTiff header into an ASCII file header)
  • CreateAndWriteRaster (used to turn an array into a raster)
  • Vector2Raster (used to convert vector data into raster data)
  • Extract_Url_Zipfiles (used to download zipped data from an online source and extract it)
In [22]:
import geopandas as gpd
import numpy as np
from osgeo import gdal
import requests
import io
import zipfile

# Define function RasterizePolygon that is used to turn a vector dataset into a raster dataset:

def RasterizePolygon(path_shp_file, reference, path_output):
    
    from osgeo import gdal
    from osgeo import ogr
    
    x_res = reference.RasterXSize
    y_res = reference.RasterYSize

    geo_transform = reference.GetGeoTransform()
    
    mb_v = ogr.Open(path_shp_file)
    mb_l = mb_v.GetLayer()
    target_ds = gdal.GetDriverByName('GTiff').Create(path_output, x_res, y_res, 1, gdal.GDT_Byte)
    target_ds.SetGeoTransform(geo_transform)
    
    band = target_ds.GetRasterBand(1)
    NoData_value = -9999
    band.SetNoDataValue(NoData_value)
    band.FlushCache()
    gdal.RasterizeLayer(target_ds, [1], mb_l, options=['ALL_TOUCHED=TRUE'])

    return target_ds

# Define function WriteASCII that is used to convert raster datasets into ASCII files:

def WriteASCII(pathout, arr, ncols, nrows, xllcorner, yllcorner, cellsize, NODATA_value):
    
    ## write new file:

    TheFile=open(pathout,"w")
    TheFile.write("ncols "+str(ncols)+"\n")
    TheFile.write("nrows "+str(nrows)+"\n")
    TheFile.write("xllcorner     "+str(xllcorner)+"\n")
    TheFile.write("yllcorner     "+str(yllcorner)+"\n")
    TheFile.write("cellsize      "+str(cellsize)+"\n")
    TheFile.write("NODATA_value  "+str(NODATA_value)+"\n")
    
    TheFormat="{0} "
      
    for i in range(0,nrows):
        for j in range(0,ncols):
            TheFile.write(TheFormat.format(arr[i,j]))
        TheFile.write("\n")
    TheFile.close()

# Define functione TransferHeaderGeoTiffToHeaderASCII which is used to transform a GeoTiff header into an ASCII file header:

def TransferHeaderGeoTiffToHeaderASCII(gdata):

    import numpy as np
    
    band = gdata.GetRasterBand(1)
    # arr = band.ReadAsArray()
    
    width = gdata.RasterXSize
    height = gdata.RasterYSize
    gt = gdata.GetGeoTransform()
    minx = gt[0]
    miny = gt[3] + width*gt[4] + height*gt[5] 
    maxx = gt[0] + width*gt[1] + height*gt[2]
    maxy = gt[3] 
    
    xllcorner = min(minx,maxx)
    yllcorner = min(miny,maxy)
    
    cellsize = gt[1]
    ncols = gdata.RasterXSize
    nrows = gdata.RasterYSize        
    band = gdata.GetRasterBand(1)
    NODATA_value = band.GetNoDataValue()    
    
    names = ['ncols', 'nrows', 'xllcorner', 'yllcorner', 'cellsize', 'NODATA_value']
    formats = ['int','int','float','float','float','float']
    # Definition des Datentypes:    
    header_info = np.zeros(1, dtype={'names':names, 'formats':formats})
        
    header_info['ncols'] = int(ncols)
    header_info['nrows'] = int(nrows)
    header_info['xllcorner'] = xllcorner
    header_info['yllcorner'] = yllcorner
    header_info['cellsize'] = cellsize
    header_info['NODATA_value'] = NODATA_value

    return header_info, ncols, nrows, xllcorner, yllcorner, cellsize, NODATA_value

# Define function CreateAndWriteRaster that is used to turn an array into a raster:

def CreateAndWriteRaster(Path_Raster, ReferenceRaster, ArrayToWrite, NoDataValue = -9999, datatype = 'Float32'):
    
## Datatypes: https://naturalatlas.github.io/node-gdal/classes/Constants%20(GDT).html    

    referenceProj = ReferenceRaster.GetProjection()
    geotransform = ReferenceRaster.GetGeoTransform()
    
    # create the 3-band raster file
    number_of_bands = 1
    if datatype == 'Float32':
        raster = gdal.GetDriverByName('GTiff').Create(Path_Raster, ReferenceRaster.RasterXSize, ReferenceRaster.RasterYSize, number_of_bands, gdal.GDT_Float32)
    if datatype == 'Int16':
        raster = gdal.GetDriverByName('GTiff').Create(Path_Raster, ReferenceRaster.RasterXSize, ReferenceRaster.RasterYSize, number_of_bands, gdal.GDT_Int16)
    
    raster.SetGeoTransform(geotransform)    # specify coords
    raster.GetRasterBand(1).WriteArray(ArrayToWrite)   # write r-band to the raster
    # raster.GetRasterBand(1).SetNoDataValue(255)
    
    raster.GetRasterBand(1).SetNoDataValue(NoDataValue)
    
    print ('NoData Value: ',raster.GetRasterBand(1).GetNoDataValue())
   
    raster.SetProjection(referenceProj)
    raster.FlushCache()                     # write to disk

    return(raster)

# Define function Vector2Raster which is used to convert vector data into raster data:

def Vector2Raster(gdf, ReferenceRaster,path_WriteRASTER):

    from osgeo import ogr
    from shapely.wkt import loads
        
    def segmentize(geom):
        wkt = geom.wkt  # shapely Polygon to wkt
        geom = ogr.CreateGeometryFromWkt(wkt)  # create ogr geometry
        geom.Segmentize(2)  # densify geometry
        wkt2 = geom.ExportToWkt()  # ogr geometry to wkt
        new = loads(wkt2)  # wkt to shapely Polygon
        return new

    ## https://stackoverflow.com/questions/28995146/matlab-ind2sub-equivalent-in-python
    
    def sub2ind(array_shape, rows, cols):
        return rows*array_shape[1] + cols
    
    def ind2sub(array_shape, ind):
        ## Zeile
        #rows = (ind.astype('int') / array_shape[1])
        rows = np.floor(ind.astype('int') / array_shape[1]).astype('int')
        ## Spalte
        cols = (ind.astype('int') % array_shape[1]) # or numpy.mod(ind.astype('int'), array_shape[1])
        return (rows, cols)

    # gdf.plot()
    
    gdf['geometry'] = gdf['geometry'].map(segmentize)  
    
    for j in range(len(gdf)):
        x_coords = np.array(gdf['geometry'][j].coords.xy[0])
        y_coords = np.array(gdf['geometry'][j].coords.xy[1])
        if j == 0:
            x_collector = x_coords
            y_collector = y_coords
        else:
            x_collector = np.concatenate((x_collector,x_coords))
            y_collector = np.concatenate((y_collector,y_coords))
                  
    points = np.concatenate(([x_collector],[y_collector]),axis = 0).T
    
    upx, xres, xskew, upy, yskew, yres = ReferenceRaster.GetGeoTransform()
    xmin  = upx
    xmax  = upx+xres*ReferenceRaster.RasterXSize
    ymin  = upy
    ymax  = upy+yres*ReferenceRaster.RasterYSize
    anz_y = ReferenceRaster.RasterYSize
    anz_x = ReferenceRaster.RasterXSize
    referenceProj = ReferenceRaster.GetProjection()

    print (xmin,xmax,ymin,ymax)
    
    if xres == yres:
        cellsize = xres
    else:
        cellsize = np.nan
        print ('ERROR: Cellsize differs in x and y direction')
        print (xres, yres)

    values_within_boundaries = np.logical_and(np.logical_and(points[:,0] > xmin, points[:,0] < xmax),np.logical_and(points[:,1] > ymin, points[:,1] < ymax))
       
    points = points[values_within_boundaries,:]
        
    print ((xmax - xmin)/cellsize)
    
#    size_x = ((xmax - xmin)/cellsize).astype(int)
#    size_y = ((ymax - ymin)/cellsize).astype(int)
#    lb_x = np.arange(0,xmax - xmin,cellsize)
#    ub_x = lb_x + cellsize
#    
#    ind_x = np.arange(len(lb_x))
    
    points_ind_x=np.floor((points[:,0]-xmin)/cellsize).astype(int)
    points_ind_y=np.floor((points[:,1]-ymin)/cellsize).astype(int)    
    
    points_ind_lin_ind = sub2ind([anz_x,anz_y], points_ind_x, points_ind_y)
       
    ## Unique
    points_ind_lin_ind_unique = np.unique(points_ind_lin_ind)
       
    ## Write to Raster   
    rows, cols = ind2sub([anz_x,anz_y], points_ind_lin_ind_unique) 
    image_size = (int(anz_y),int(anz_x))     
    v_pixels = np.zeros(image_size).astype(int)  

    print(cols)
    print(rows)                
    v_pixels[cols,rows] = 1
                         
#    xres = cellsize
#    yres = cellsize
    
    # geotransform = (xmin, xres, 0, ymin, 0, yres)
    geotransform = ReferenceRaster.GetGeoTransform()
    
    # create the 3-band raster file
    number_of_bands = 1
    rasterfromvector = gdal.GetDriverByName('GTiff').Create(path_WriteRASTER, int(anz_x), int(anz_y), number_of_bands, gdal.GDT_Int16)
    rasterfromvector.SetGeoTransform(geotransform)    # specify coords
    rasterfromvector.GetRasterBand(1).WriteArray(v_pixels)   # write r-band to the raster
    rasterfromvector.GetRasterBand(1).SetNoDataValue(255)
    rasterfromvector.SetProjection(referenceProj)
    rasterfromvector.FlushCache()                     # write to disk

    return(rasterfromvector)

# Define function Extract_Url_Zipfiles:

def Extract_Url_Zipfiles(url, name, path):
# file name stays the same
    
    import requests
    from zipfile import ZipFile

    r = requests.get(url)
    with open(name, "wb") as code:
        code.write(r.content)
    z = name
    zf = ZipFile(z,'r')
    zf.extractall(path)
    

Small datasets can be saved in memory. Larger datasets are downloaded and saved to disk, as shown here. The download links refer to the data available on the Envidat website.

In [23]:
url_forest = 'https://www.envidat.ch/dataset/d28614a0-0825-4040-bc1b-e0455b1e4df6/resource/16db97c5-5546-4f80-9cb9-e562637b589c/download/osm_forest_export_25832.zip'
url_rail = 'https://www.envidat.ch/dataset/d28614a0-0825-4040-bc1b-e0455b1e4df6/resource/a787f798-0c3d-4cd3-a9d1-c93f51176465/download/osm_railways_export_25832.zip'
url_dem = 'https://www.envidat.ch/dataset/d28614a0-0825-4040-bc1b-e0455b1e4df6/resource/1bd80d45-a8fd-44dc-b153-7abfa6345d1c/download/eudem_ogdbayern_25m_25832.tif'
url_powerlines = 'https://www.envidat.ch/dataset/d28614a0-0825-4040-bc1b-e0455b1e4df6/resource/c567f81c-26c9-4122-8fbd-045c55fb73c0/download/osm_powerlines_export_25832.zip'
url_perimeter = 'https://www.envidat.ch/dataset/d28614a0-0825-4040-bc1b-e0455b1e4df6/resource/710a065d-d50e-4a3d-a993-2a65197cef7b/download/digitized_sample_perimeter.zip'

Define the cell size of the output rasters:

In [24]:
delta = 10 

Define path and filenames for Shapefiles:

In [25]:
basic_path_input_shapefiles = r'H:\Erschliessung\Envidat_Jupyter\2019\Data\Shapefiles'

# Filenames:
name_perimeter      = 'digitized_sample_perimeter.shp'
name_bahn           = 'osm_railways_export_25832.shp'
name_strom          = 'osm_powerlines_export_25832.shp'
name_waldlayer      = 'osm_forest_export_25832.shp'

path_perimeter  = basic_path_input_shapefiles + '\\' + name_perimeter
path_bahn       = basic_path_input_shapefiles + '\\' + name_bahn
path_strom      = basic_path_input_shapefiles + '\\' + name_strom
path_Wald_shp_original = basic_path_input_shapefiles+'\\'+ name_waldlayer

Download, save and unzip the shapefiles: (MF)

In [26]:
path = basic_path_input_shapefiles

# Perimeter:
Extract_Url_Zipfiles(url_perimeter, name_perimeter, basic_path_input_shapefiles)

# Forest:
Extract_Url_Zipfiles(url_forest, name_waldlayer, basic_path_input_shapefiles)

# Railway lines:
Extract_Url_Zipfiles(url_rail, name_bahn, basic_path_input_shapefiles)

# Power lines:
Extract_Url_Zipfiles(url_powerlines, name_strom, basic_path_input_shapefiles)

print ('Shapefiles saved.')
Shapefiles saved.

Path and filename for the DEM raster data are also defined:

In [27]:
basic_path_input_rasterfiles = r'H:\Erschliessung\Envidat_Jupyter\2019\Data\Rasterfiles'

# Filenames:
name_DEM_original = 'eudem_ogdbayern_25m_25832.tif'
path_DEM_original = basic_path_input_rasterfiles + '\\' + name_DEM_original

Output path:

In [28]:
basic_path_out = r'H:\Erschliessung\Envidat_Jupyter\2019\Data\Output'

Download and save DEM:

In [29]:
DEM = requests.get(url_dem) 
open(path_DEM_original, 'wb').write(DEM.content)
Out[29]:
1695063

Next we define the filenames for the rasters derived from the input datasets:

In [ ]:
name_NULLRASTER         = 'NULL_Raster.tif'
name_DEM                = 'dem_wo_nodata.tif'
name_DEM_incl_NODATA    = 'dem.tif'

name_RASTER_Bahn        = 'Bahn_Raster.tif'
name_RASTER_Strom       = 'Strom_Raster.tif'
name_RASTER_Obstacles   = 'Obstacles_Raster.tif'

name_RASTER_Slope_Degree            = 'Slope_Degree.tif'
name_RASTER_Slope_Percent_direct    = 'Slope_Percent_dir.tif'
name_RASTER_Slope_Percent           = 'Slope_Percent.tif'

name_RASTER_aspect_direct       = 'Aspect_dir.tif'
name_RASTER_aspect              = 'Aspect.tif'
name_RASTER_aspect_n0           = 'Aspect_n0.tif'
name_RASTER_Wald                = 'Wald.tif'


path_NULLRASTER         = basic_path_out + '\\'+ name_NULLRASTER
path_DEM                = basic_path_out + '\\'+ name_DEM
path_DEM_incl_NODATA    = basic_path_out + '\\'+ name_DEM_incl_NODATA
path_RASTER_Bahn        = basic_path_out + '\\'+ name_RASTER_Bahn
path_RASTER_Strom       = basic_path_out + '\\'+ name_RASTER_Strom
path_RASTER_Obstacles   = basic_path_out + '\\'+ name_RASTER_Obstacles
path_RASTER_Slope_Degree  = basic_path_out + '\\'+ name_RASTER_Slope_Degree
path_RASTER_Slope_Percent_direct = basic_path_out + '\\'+ name_RASTER_Slope_Percent_direct
path_RASTER_Slope_Percent = basic_path_out + '\\'+ name_RASTER_Slope_Percent
path_RASTER_Wald = basic_path_out + '\\'+ name_RASTER_Wald

path_RASTER_aspect_direct = basic_path_out + '\\'+ name_RASTER_aspect_direct
path_RASTER_aspect = basic_path_out + '\\'+ name_RASTER_aspect
path_RASTER_aspect_n0 = basic_path_out + '\\'+ name_RASTER_aspect_n0

The raster files will be saved in the .tif and .txt format.

In [ ]:
ins = '.tif'
outs = '.txt'
path_DEM_incl_NODATA_asc = path_DEM_incl_NODATA.replace(ins, outs)
path_RASTER_Obstacles_asc = path_RASTER_Obstacles.replace(ins, outs)
path_RASTER_Slope_Percent_asc = path_RASTER_Slope_Percent.replace(ins, outs)
path_RASTER_Wald_asc = path_RASTER_Wald.replace(ins, outs)
path_RASTER_aspect_asc = path_RASTER_aspect.replace(ins, outs)
path_RASTER_aspect_n0_asc = path_RASTER_aspect_n0.replace(ins, outs)

Open the original DEM file:

In [ ]:
raster_DEM_original = gdal.Open(path_DEM_original)

We now create an empty reference raster with the correct extent and save it as a 3-band raster:

In [ ]:
perimeter_df = gpd.read_file(path_perimeter)

bbox = np.array(perimeter_df.total_bounds)
bbox_f = np.floor(bbox / delta)*delta-delta/2.
bbox_c = np.ceil(bbox / delta)*delta+delta/2.

X = np.array((bbox_f[0],bbox_c[2]))
Y = np.array((bbox_f[1],bbox_c[3]))

xmin, ymin, xmax, ymax = [min(X)-delta/2, min(Y)-delta/2, max(X)+delta/2, max(Y)+delta/2]         

anz_x = (xmax - xmin) / delta  
anz_y = (ymax - ymin) / delta           
image_size = (int(anz_y),int(anz_x))
        
v_pixels = np.zeros(image_size).astype(int)                   
x_index = np.arange(anz_x).astype(int)
y_index = np.arange(anz_y).astype(int)
x_values = np.arange(xmin,xmax+delta,delta).astype(int)
y_values = np.arange(ymin,ymax+delta,delta).astype(int)

xres = delta
yres = delta

geotransform = (xmin, xres, 0, ymin, 0, yres)
referenceProj = raster_DEM_original.GetProjection()

# create the 3-band raster file of the Empty Reference Raster    
number_of_bands = 1
reference = gdal.GetDriverByName('GTiff').Create(path_NULLRASTER, int(anz_x), int(anz_y), number_of_bands, gdal.GDT_Float32)
reference.SetGeoTransform(geotransform)    # specify coords
reference.GetRasterBand(1).WriteArray(v_pixels)   # write r-band to the raster
reference.GetRasterBand(1).SetNoDataValue(-9999)
reference.SetProjection(referenceProj)
reference.FlushCache()                     # write to disk

The original DEM is resampled to 10 m resolution and converted into a numpy array:

In [ ]:
## Resampling of the DEM:

from osgeo import gdal, gdalconst

inputf = raster_DEM_original 
inputProj = inputf.GetProjection()
inputTrans = inputf.GetGeoTransform()

referenceProj = reference.GetProjection()
referenceTrans = reference.GetGeoTransform()
bandreference = reference.GetRasterBand(1)  
  
x = reference.RasterXSize 
y = reference.RasterYSize

driver= gdal.GetDriverByName('GTiff')
output = driver.Create(path_DEM,x,y,1,bandreference.DataType)
output.SetGeoTransform(referenceTrans)
output.SetProjection(referenceProj)
gdal.ReprojectImage(inputf,output,inputProj,referenceProj,gdalconst.GRA_Bilinear)

driver= gdal.GetDriverByName('GTiff')
output = driver.Create(path_DEM,x,y,1,bandreference.DataType)
output.SetGeoTransform(referenceTrans)
output.SetProjection(referenceProj)

gdal.ReprojectImage(inputf,output,inputProj,referenceProj,gdalconst.GRA_Bilinear)

## Convert the raster to a numpy array
arraydem = np.array(output.GetRasterBand(1).ReadAsArray())
arraydem[arraydem==0]=-9999

DEM = CreateAndWriteRaster(path_DEM_incl_NODATA, reference, arraydem)

print('Resampling finished.')

The obstacle raster is created from the railway and cable-line datasets:

In [ ]:
gdf_bahn = gpd.read_file(path_bahn)
rasterbahn = Vector2Raster(gdf_bahn, output,path_RASTER_Bahn)
arraybahn = np.array(rasterbahn.GetRasterBand(1).ReadAsArray())

gdf_strom = gpd.read_file(path_strom)
rasterstrom = Vector2Raster(gdf_strom, output,path_RASTER_Strom)
arraystrom = np.array(rasterstrom.GetRasterBand(1).ReadAsArray())

array_obstacles = arraystrom + arraybahn 
array_obstacles[array_obstacles>0.1]=1

obstacle = CreateAndWriteRaster(path_RASTER_Obstacles, reference, array_obstacles,datatype = 'Int16')

print('Obstacle raster saved.')

Calculate the slope raster:

In [ ]:
slope_degree = gdal.DEMProcessing(path_RASTER_Slope_Degree, DEM, 'slope')
slope_degree.GetRasterBand(1).ReadAsArray()

slope_percent = gdal.DEMProcessing(path_RASTER_Slope_Percent_direct, DEM, 'slope', slopeFormat = 'percent')
slope_percent.GetRasterBand(1).ReadAsArray()

slope_raster_def = CreateAndWriteRaster(path_RASTER_Slope_Percent, reference, slope_percent.GetRasterBand(1).ReadAsArray())
print('Slope raster calculated.')

Create the forest raster from the forest shapefile:

In [ ]:
wald_raster = RasterizePolygon(path_Wald_shp_original, reference, path_RASTER_Wald)

Calculate the aspect raster:

In [ ]:
aspect = gdal.DEMProcessing(path_RASTER_aspect_direct, DEM, 'aspect')
aspect.GetRasterBand(1).ReadAsArray()

aspect_nord_0 = aspect.GetRasterBand(1).ReadAsArray()
aspect_nord_0 = aspect_nord_0+180
aspect_nord_0[aspect_nord_0>360] = (aspect_nord_0-360)[aspect_nord_0>360]
aspect_nord_0 = 360 - aspect_nord_0
aspect_nord_0[aspect.GetRasterBand(1).ReadAsArray()==-9999]=-1

aspect_raster_def = CreateAndWriteRaster(path_RASTER_aspect, reference, aspect.GetRasterBand(1).ReadAsArray())

aspect_raster_n0 = CreateAndWriteRaster(path_RASTER_aspect_n0, reference, aspect_nord_0)
print('Aspect raster calculated.')

Convert the raster files to ASCII format:

In [ ]:
print('Start writing ASCII files...')

header_info, ncols, nrows, xllcorner, yllcorner, cellsize, NODATA_value = TransferHeaderGeoTiffToHeaderASCII(reference)

pathout = path_DEM_incl_NODATA_asc
arrfl = np.flipud(arraydem)
WriteASCII(pathout, arrfl, ncols, nrows, xllcorner, yllcorner, cellsize, NODATA_value)

pathout = path_RASTER_Obstacles_asc
arrfl = np.flipud(array_obstacles)
WriteASCII(pathout, arrfl, ncols, nrows, xllcorner, yllcorner, cellsize, NODATA_value)

pathout = path_RASTER_Slope_Percent_asc
arrfl = np.flipud(slope_percent.GetRasterBand(1).ReadAsArray())
WriteASCII(pathout, arrfl, ncols, nrows, xllcorner, yllcorner, cellsize, NODATA_value)

pathout = path_RASTER_Wald_asc
wald_raster_0_1 = wald_raster.GetRasterBand(1).ReadAsArray()
wald_raster_0_1[wald_raster_0_1>0]=1
arrfl = np.flipud(wald_raster_0_1)
WriteASCII(pathout, arrfl, ncols, nrows, xllcorner, yllcorner, cellsize, NODATA_value)

pathout = path_RASTER_aspect_asc
arrfl = np.flipud(aspect_raster_n0.GetRasterBand(1).ReadAsArray())
WriteASCII(pathout, arrfl, ncols, nrows, xllcorner, yllcorner, cellsize, NODATA_value)

print('ASCII files saved.')

We are done, all datasets are ready for Matlab modelling!