#!/usr/bin/env python # coding: utf-8 # This script preprocesses geodata to be used later in Matlab for further modelling. From existing raster files (DEM (digital elevation model), obstacles, aspect, slope, forest cover) with differing extents, smaller rasters with the same extent are extracted. The new extent is given by a polygon vector dataset (Perimeter). # The "obstacles" dataset first has to be transformed from line vector datasets (railway lines, high-voltage power lines) to raster. The forest dataset is derived from a polygon vector dataset. Aspect and slope both are derived from the DEM. # An additional requirement is that the raster coordinates be integers (multiples of 10) as shown below (example header): # # ncols 2216 # # nrows 2273 # # xllcorner 668820.0 # # yllcorner 5270130.0 # # cellsize 10.0 # # NODATA_value -9999.0 # # Some functions that will be used in the preprocessing need to be defined first: # - RasterizePolygon (used to turn a vector dataset into a raster dataset) # - WriteASCII (used to convert raster datasets into ASCII files) # - TransferHeaderGeoTiffToHeaderASCII (used to transform a GeoTiff header into an ASCII file header) # - CreateAndWriteRaster (used to turn an array into a raster) # - Vector2Raster (used to convert vector data into raster data) # - Extract_Url_Zipfiles (used to download zipped data from an online source and extract it) # In[22]: import geopandas as gpd import numpy as np from osgeo import gdal import requests import io import zipfile # Define function RasterizePolygon that is used to turn a vector dataset into a raster dataset: def RasterizePolygon(path_shp_file, reference, path_output): from osgeo import gdal from osgeo import ogr x_res = reference.RasterXSize y_res = reference.RasterYSize geo_transform = reference.GetGeoTransform() mb_v = ogr.Open(path_shp_file) mb_l = mb_v.GetLayer() target_ds = gdal.GetDriverByName('GTiff').Create(path_output, x_res, y_res, 1, gdal.GDT_Byte) target_ds.SetGeoTransform(geo_transform) band = target_ds.GetRasterBand(1) NoData_value = -9999 band.SetNoDataValue(NoData_value) band.FlushCache() gdal.RasterizeLayer(target_ds, [1], mb_l, options=['ALL_TOUCHED=TRUE']) return target_ds # Define function WriteASCII that is used to convert raster datasets into ASCII files: def WriteASCII(pathout, arr, ncols, nrows, xllcorner, yllcorner, cellsize, NODATA_value): ## write new file: TheFile=open(pathout,"w") TheFile.write("ncols "+str(ncols)+"\n") TheFile.write("nrows "+str(nrows)+"\n") TheFile.write("xllcorner "+str(xllcorner)+"\n") TheFile.write("yllcorner "+str(yllcorner)+"\n") TheFile.write("cellsize "+str(cellsize)+"\n") TheFile.write("NODATA_value "+str(NODATA_value)+"\n") TheFormat="{0} " for i in range(0,nrows): for j in range(0,ncols): TheFile.write(TheFormat.format(arr[i,j])) TheFile.write("\n") TheFile.close() # Define functione TransferHeaderGeoTiffToHeaderASCII which is used to transform a GeoTiff header into an ASCII file header: def TransferHeaderGeoTiffToHeaderASCII(gdata): import numpy as np band = gdata.GetRasterBand(1) # arr = band.ReadAsArray() width = gdata.RasterXSize height = gdata.RasterYSize gt = gdata.GetGeoTransform() minx = gt[0] miny = gt[3] + width*gt[4] + height*gt[5] maxx = gt[0] + width*gt[1] + height*gt[2] maxy = gt[3] xllcorner = min(minx,maxx) yllcorner = min(miny,maxy) cellsize = gt[1] ncols = gdata.RasterXSize nrows = gdata.RasterYSize band = gdata.GetRasterBand(1) NODATA_value = band.GetNoDataValue() names = ['ncols', 'nrows', 'xllcorner', 'yllcorner', 'cellsize', 'NODATA_value'] formats = ['int','int','float','float','float','float'] # Definition des Datentypes: header_info = np.zeros(1, dtype={'names':names, 'formats':formats}) header_info['ncols'] = int(ncols) header_info['nrows'] = int(nrows) header_info['xllcorner'] = xllcorner header_info['yllcorner'] = yllcorner header_info['cellsize'] = cellsize header_info['NODATA_value'] = NODATA_value return header_info, ncols, nrows, xllcorner, yllcorner, cellsize, NODATA_value # Define function CreateAndWriteRaster that is used to turn an array into a raster: def CreateAndWriteRaster(Path_Raster, ReferenceRaster, ArrayToWrite, NoDataValue = -9999, datatype = 'Float32'): ## Datatypes: https://naturalatlas.github.io/node-gdal/classes/Constants%20(GDT).html referenceProj = ReferenceRaster.GetProjection() geotransform = ReferenceRaster.GetGeoTransform() # create the 3-band raster file number_of_bands = 1 if datatype == 'Float32': raster = gdal.GetDriverByName('GTiff').Create(Path_Raster, ReferenceRaster.RasterXSize, ReferenceRaster.RasterYSize, number_of_bands, gdal.GDT_Float32) if datatype == 'Int16': raster = gdal.GetDriverByName('GTiff').Create(Path_Raster, ReferenceRaster.RasterXSize, ReferenceRaster.RasterYSize, number_of_bands, gdal.GDT_Int16) raster.SetGeoTransform(geotransform) # specify coords raster.GetRasterBand(1).WriteArray(ArrayToWrite) # write r-band to the raster # raster.GetRasterBand(1).SetNoDataValue(255) raster.GetRasterBand(1).SetNoDataValue(NoDataValue) print ('NoData Value: ',raster.GetRasterBand(1).GetNoDataValue()) raster.SetProjection(referenceProj) raster.FlushCache() # write to disk return(raster) # Define function Vector2Raster which is used to convert vector data into raster data: def Vector2Raster(gdf, ReferenceRaster,path_WriteRASTER): from osgeo import ogr from shapely.wkt import loads def segmentize(geom): wkt = geom.wkt # shapely Polygon to wkt geom = ogr.CreateGeometryFromWkt(wkt) # create ogr geometry geom.Segmentize(2) # densify geometry wkt2 = geom.ExportToWkt() # ogr geometry to wkt new = loads(wkt2) # wkt to shapely Polygon return new ## https://stackoverflow.com/questions/28995146/matlab-ind2sub-equivalent-in-python def sub2ind(array_shape, rows, cols): return rows*array_shape[1] + cols def ind2sub(array_shape, ind): ## Zeile #rows = (ind.astype('int') / array_shape[1]) rows = np.floor(ind.astype('int') / array_shape[1]).astype('int') ## Spalte cols = (ind.astype('int') % array_shape[1]) # or numpy.mod(ind.astype('int'), array_shape[1]) return (rows, cols) # gdf.plot() gdf['geometry'] = gdf['geometry'].map(segmentize) for j in range(len(gdf)): x_coords = np.array(gdf['geometry'][j].coords.xy[0]) y_coords = np.array(gdf['geometry'][j].coords.xy[1]) if j == 0: x_collector = x_coords y_collector = y_coords else: x_collector = np.concatenate((x_collector,x_coords)) y_collector = np.concatenate((y_collector,y_coords)) points = np.concatenate(([x_collector],[y_collector]),axis = 0).T upx, xres, xskew, upy, yskew, yres = ReferenceRaster.GetGeoTransform() xmin = upx xmax = upx+xres*ReferenceRaster.RasterXSize ymin = upy ymax = upy+yres*ReferenceRaster.RasterYSize anz_y = ReferenceRaster.RasterYSize anz_x = ReferenceRaster.RasterXSize referenceProj = ReferenceRaster.GetProjection() print (xmin,xmax,ymin,ymax) if xres == yres: cellsize = xres else: cellsize = np.nan print ('ERROR: Cellsize differs in x and y direction') print (xres, yres) values_within_boundaries = np.logical_and(np.logical_and(points[:,0] > xmin, points[:,0] < xmax),np.logical_and(points[:,1] > ymin, points[:,1] < ymax)) points = points[values_within_boundaries,:] print ((xmax - xmin)/cellsize) # size_x = ((xmax - xmin)/cellsize).astype(int) # size_y = ((ymax - ymin)/cellsize).astype(int) # lb_x = np.arange(0,xmax - xmin,cellsize) # ub_x = lb_x + cellsize # # ind_x = np.arange(len(lb_x)) points_ind_x=np.floor((points[:,0]-xmin)/cellsize).astype(int) points_ind_y=np.floor((points[:,1]-ymin)/cellsize).astype(int) points_ind_lin_ind = sub2ind([anz_x,anz_y], points_ind_x, points_ind_y) ## Unique points_ind_lin_ind_unique = np.unique(points_ind_lin_ind) ## Write to Raster rows, cols = ind2sub([anz_x,anz_y], points_ind_lin_ind_unique) image_size = (int(anz_y),int(anz_x)) v_pixels = np.zeros(image_size).astype(int) print(cols) print(rows) v_pixels[cols,rows] = 1 # xres = cellsize # yres = cellsize # geotransform = (xmin, xres, 0, ymin, 0, yres) geotransform = ReferenceRaster.GetGeoTransform() # create the 3-band raster file number_of_bands = 1 rasterfromvector = gdal.GetDriverByName('GTiff').Create(path_WriteRASTER, int(anz_x), int(anz_y), number_of_bands, gdal.GDT_Int16) rasterfromvector.SetGeoTransform(geotransform) # specify coords rasterfromvector.GetRasterBand(1).WriteArray(v_pixels) # write r-band to the raster rasterfromvector.GetRasterBand(1).SetNoDataValue(255) rasterfromvector.SetProjection(referenceProj) rasterfromvector.FlushCache() # write to disk return(rasterfromvector) # Define function Extract_Url_Zipfiles: def Extract_Url_Zipfiles(url, name, path): # file name stays the same import requests from zipfile import ZipFile r = requests.get(url) with open(name, "wb") as code: code.write(r.content) z = name zf = ZipFile(z,'r') zf.extractall(path) # Small datasets can be saved in memory. Larger datasets are downloaded and saved to disk, as shown here. # The download links refer to the data available on the Envidat website. # In[23]: url_forest = 'https://www.envidat.ch/dataset/d28614a0-0825-4040-bc1b-e0455b1e4df6/resource/16db97c5-5546-4f80-9cb9-e562637b589c/download/osm_forest_export_25832.zip' url_rail = 'https://www.envidat.ch/dataset/d28614a0-0825-4040-bc1b-e0455b1e4df6/resource/a787f798-0c3d-4cd3-a9d1-c93f51176465/download/osm_railways_export_25832.zip' url_dem = 'https://www.envidat.ch/dataset/d28614a0-0825-4040-bc1b-e0455b1e4df6/resource/1bd80d45-a8fd-44dc-b153-7abfa6345d1c/download/eudem_ogdbayern_25m_25832.tif' url_powerlines = 'https://www.envidat.ch/dataset/d28614a0-0825-4040-bc1b-e0455b1e4df6/resource/c567f81c-26c9-4122-8fbd-045c55fb73c0/download/osm_powerlines_export_25832.zip' url_perimeter = 'https://www.envidat.ch/dataset/d28614a0-0825-4040-bc1b-e0455b1e4df6/resource/710a065d-d50e-4a3d-a993-2a65197cef7b/download/digitized_sample_perimeter.zip' # Define the cell size of the output rasters: # In[24]: delta = 10 # Define path and filenames for Shapefiles: # In[25]: basic_path_input_shapefiles = r'H:\Erschliessung\Envidat_Jupyter\2019\Data\Shapefiles' # Filenames: name_perimeter = 'digitized_sample_perimeter.shp' name_bahn = 'osm_railways_export_25832.shp' name_strom = 'osm_powerlines_export_25832.shp' name_waldlayer = 'osm_forest_export_25832.shp' path_perimeter = basic_path_input_shapefiles + '\\' + name_perimeter path_bahn = basic_path_input_shapefiles + '\\' + name_bahn path_strom = basic_path_input_shapefiles + '\\' + name_strom path_Wald_shp_original = basic_path_input_shapefiles+'\\'+ name_waldlayer # Download, save and unzip the shapefiles: (MF) # In[26]: path = basic_path_input_shapefiles # Perimeter: Extract_Url_Zipfiles(url_perimeter, name_perimeter, basic_path_input_shapefiles) # Forest: Extract_Url_Zipfiles(url_forest, name_waldlayer, basic_path_input_shapefiles) # Railway lines: Extract_Url_Zipfiles(url_rail, name_bahn, basic_path_input_shapefiles) # Power lines: Extract_Url_Zipfiles(url_powerlines, name_strom, basic_path_input_shapefiles) print ('Shapefiles saved.') # Path and filename for the DEM raster data are also defined: # In[27]: basic_path_input_rasterfiles = r'H:\Erschliessung\Envidat_Jupyter\2019\Data\Rasterfiles' # Filenames: name_DEM_original = 'eudem_ogdbayern_25m_25832.tif' path_DEM_original = basic_path_input_rasterfiles + '\\' + name_DEM_original # Output path: # In[28]: basic_path_out = r'H:\Erschliessung\Envidat_Jupyter\2019\Data\Output' # Download and save DEM: # In[29]: DEM = requests.get(url_dem) open(path_DEM_original, 'wb').write(DEM.content) # Next we define the filenames for the rasters derived from the input datasets: # In[ ]: name_NULLRASTER = 'NULL_Raster.tif' name_DEM = 'dem_wo_nodata.tif' name_DEM_incl_NODATA = 'dem.tif' name_RASTER_Bahn = 'Bahn_Raster.tif' name_RASTER_Strom = 'Strom_Raster.tif' name_RASTER_Obstacles = 'Obstacles_Raster.tif' name_RASTER_Slope_Degree = 'Slope_Degree.tif' name_RASTER_Slope_Percent_direct = 'Slope_Percent_dir.tif' name_RASTER_Slope_Percent = 'Slope_Percent.tif' name_RASTER_aspect_direct = 'Aspect_dir.tif' name_RASTER_aspect = 'Aspect.tif' name_RASTER_aspect_n0 = 'Aspect_n0.tif' name_RASTER_Wald = 'Wald.tif' path_NULLRASTER = basic_path_out + '\\'+ name_NULLRASTER path_DEM = basic_path_out + '\\'+ name_DEM path_DEM_incl_NODATA = basic_path_out + '\\'+ name_DEM_incl_NODATA path_RASTER_Bahn = basic_path_out + '\\'+ name_RASTER_Bahn path_RASTER_Strom = basic_path_out + '\\'+ name_RASTER_Strom path_RASTER_Obstacles = basic_path_out + '\\'+ name_RASTER_Obstacles path_RASTER_Slope_Degree = basic_path_out + '\\'+ name_RASTER_Slope_Degree path_RASTER_Slope_Percent_direct = basic_path_out + '\\'+ name_RASTER_Slope_Percent_direct path_RASTER_Slope_Percent = basic_path_out + '\\'+ name_RASTER_Slope_Percent path_RASTER_Wald = basic_path_out + '\\'+ name_RASTER_Wald path_RASTER_aspect_direct = basic_path_out + '\\'+ name_RASTER_aspect_direct path_RASTER_aspect = basic_path_out + '\\'+ name_RASTER_aspect path_RASTER_aspect_n0 = basic_path_out + '\\'+ name_RASTER_aspect_n0 # The raster files will be saved in the *.tif and *.txt format. # In[ ]: ins = '.tif' outs = '.txt' path_DEM_incl_NODATA_asc = path_DEM_incl_NODATA.replace(ins, outs) path_RASTER_Obstacles_asc = path_RASTER_Obstacles.replace(ins, outs) path_RASTER_Slope_Percent_asc = path_RASTER_Slope_Percent.replace(ins, outs) path_RASTER_Wald_asc = path_RASTER_Wald.replace(ins, outs) path_RASTER_aspect_asc = path_RASTER_aspect.replace(ins, outs) path_RASTER_aspect_n0_asc = path_RASTER_aspect_n0.replace(ins, outs) # Open the original DEM file: # In[ ]: raster_DEM_original = gdal.Open(path_DEM_original) # We now create an empty reference raster with the correct extent and save it as a 3-band raster: # In[ ]: perimeter_df = gpd.read_file(path_perimeter) bbox = np.array(perimeter_df.total_bounds) bbox_f = np.floor(bbox / delta)*delta-delta/2. bbox_c = np.ceil(bbox / delta)*delta+delta/2. X = np.array((bbox_f[0],bbox_c[2])) Y = np.array((bbox_f[1],bbox_c[3])) xmin, ymin, xmax, ymax = [min(X)-delta/2, min(Y)-delta/2, max(X)+delta/2, max(Y)+delta/2] anz_x = (xmax - xmin) / delta anz_y = (ymax - ymin) / delta image_size = (int(anz_y),int(anz_x)) v_pixels = np.zeros(image_size).astype(int) x_index = np.arange(anz_x).astype(int) y_index = np.arange(anz_y).astype(int) x_values = np.arange(xmin,xmax+delta,delta).astype(int) y_values = np.arange(ymin,ymax+delta,delta).astype(int) xres = delta yres = delta geotransform = (xmin, xres, 0, ymin, 0, yres) referenceProj = raster_DEM_original.GetProjection() # create the 3-band raster file of the Empty Reference Raster number_of_bands = 1 reference = gdal.GetDriverByName('GTiff').Create(path_NULLRASTER, int(anz_x), int(anz_y), number_of_bands, gdal.GDT_Float32) reference.SetGeoTransform(geotransform) # specify coords reference.GetRasterBand(1).WriteArray(v_pixels) # write r-band to the raster reference.GetRasterBand(1).SetNoDataValue(-9999) reference.SetProjection(referenceProj) reference.FlushCache() # write to disk # The original DEM is resampled to 10 m resolution and converted into a numpy array: # In[ ]: ## Resampling of the DEM: from osgeo import gdal, gdalconst inputf = raster_DEM_original inputProj = inputf.GetProjection() inputTrans = inputf.GetGeoTransform() referenceProj = reference.GetProjection() referenceTrans = reference.GetGeoTransform() bandreference = reference.GetRasterBand(1) x = reference.RasterXSize y = reference.RasterYSize driver= gdal.GetDriverByName('GTiff') output = driver.Create(path_DEM,x,y,1,bandreference.DataType) output.SetGeoTransform(referenceTrans) output.SetProjection(referenceProj) gdal.ReprojectImage(inputf,output,inputProj,referenceProj,gdalconst.GRA_Bilinear) driver= gdal.GetDriverByName('GTiff') output = driver.Create(path_DEM,x,y,1,bandreference.DataType) output.SetGeoTransform(referenceTrans) output.SetProjection(referenceProj) gdal.ReprojectImage(inputf,output,inputProj,referenceProj,gdalconst.GRA_Bilinear) ## Convert the raster to a numpy array arraydem = np.array(output.GetRasterBand(1).ReadAsArray()) arraydem[arraydem==0]=-9999 DEM = CreateAndWriteRaster(path_DEM_incl_NODATA, reference, arraydem) print('Resampling finished.') # The obstacle raster is created from the railway and cable-line datasets: # In[ ]: gdf_bahn = gpd.read_file(path_bahn) rasterbahn = Vector2Raster(gdf_bahn, output,path_RASTER_Bahn) arraybahn = np.array(rasterbahn.GetRasterBand(1).ReadAsArray()) gdf_strom = gpd.read_file(path_strom) rasterstrom = Vector2Raster(gdf_strom, output,path_RASTER_Strom) arraystrom = np.array(rasterstrom.GetRasterBand(1).ReadAsArray()) array_obstacles = arraystrom + arraybahn array_obstacles[array_obstacles>0.1]=1 obstacle = CreateAndWriteRaster(path_RASTER_Obstacles, reference, array_obstacles,datatype = 'Int16') print('Obstacle raster saved.') # Calculate the slope raster: # In[ ]: slope_degree = gdal.DEMProcessing(path_RASTER_Slope_Degree, DEM, 'slope') slope_degree.GetRasterBand(1).ReadAsArray() slope_percent = gdal.DEMProcessing(path_RASTER_Slope_Percent_direct, DEM, 'slope', slopeFormat = 'percent') slope_percent.GetRasterBand(1).ReadAsArray() slope_raster_def = CreateAndWriteRaster(path_RASTER_Slope_Percent, reference, slope_percent.GetRasterBand(1).ReadAsArray()) print('Slope raster calculated.') # Create the forest raster from the forest shapefile: # In[ ]: wald_raster = RasterizePolygon(path_Wald_shp_original, reference, path_RASTER_Wald) # Calculate the aspect raster: # In[ ]: aspect = gdal.DEMProcessing(path_RASTER_aspect_direct, DEM, 'aspect') aspect.GetRasterBand(1).ReadAsArray() aspect_nord_0 = aspect.GetRasterBand(1).ReadAsArray() aspect_nord_0 = aspect_nord_0+180 aspect_nord_0[aspect_nord_0>360] = (aspect_nord_0-360)[aspect_nord_0>360] aspect_nord_0 = 360 - aspect_nord_0 aspect_nord_0[aspect.GetRasterBand(1).ReadAsArray()==-9999]=-1 aspect_raster_def = CreateAndWriteRaster(path_RASTER_aspect, reference, aspect.GetRasterBand(1).ReadAsArray()) aspect_raster_n0 = CreateAndWriteRaster(path_RASTER_aspect_n0, reference, aspect_nord_0) print('Aspect raster calculated.') # Convert the raster files to ASCII format: # In[ ]: print('Start writing ASCII files...') header_info, ncols, nrows, xllcorner, yllcorner, cellsize, NODATA_value = TransferHeaderGeoTiffToHeaderASCII(reference) pathout = path_DEM_incl_NODATA_asc arrfl = np.flipud(arraydem) WriteASCII(pathout, arrfl, ncols, nrows, xllcorner, yllcorner, cellsize, NODATA_value) pathout = path_RASTER_Obstacles_asc arrfl = np.flipud(array_obstacles) WriteASCII(pathout, arrfl, ncols, nrows, xllcorner, yllcorner, cellsize, NODATA_value) pathout = path_RASTER_Slope_Percent_asc arrfl = np.flipud(slope_percent.GetRasterBand(1).ReadAsArray()) WriteASCII(pathout, arrfl, ncols, nrows, xllcorner, yllcorner, cellsize, NODATA_value) pathout = path_RASTER_Wald_asc wald_raster_0_1 = wald_raster.GetRasterBand(1).ReadAsArray() wald_raster_0_1[wald_raster_0_1>0]=1 arrfl = np.flipud(wald_raster_0_1) WriteASCII(pathout, arrfl, ncols, nrows, xllcorner, yllcorner, cellsize, NODATA_value) pathout = path_RASTER_aspect_asc arrfl = np.flipud(aspect_raster_n0.GetRasterBand(1).ReadAsArray()) WriteASCII(pathout, arrfl, ncols, nrows, xllcorner, yllcorner, cellsize, NODATA_value) print('ASCII files saved.') # We are done, all datasets are ready for Matlab modelling!