#!/usr/bin/env python # coding: utf-8 # # Water Mask # #### Objective: Mapping waterlogged areas during the monsoon season # The monsoon season brings the majority of the annual rainfall to the wet tropical regions of Asia. In a normal year in Bangladesh, nearly 20% of the country will be underwater after heavy rains and snowmelt cause coastal regions, rivers, and other areas to flood during the June to October monsoon season. # # To examine the changes in water occurance over the region, we'll use both multispectral and Synthetic Aperture Radar (SAR) data available through the DL Platform. Our goal is to produce a snapshot of water coverage using a simple classification method to identify water. # In[1]: import descarteslabs as dl import numpy as np import json import matplotlib.pyplot as plt import sklearn import sklearn.ensemble # ### Find imagery # In selecting classification features to identify water, we will state several assumptions about the spectral response of water relative to other land cover types: # # * We expect backscatter over standing water and submerged areas from SAR to be very low in all polarizations. # * We expect NDVI to be low relative to vegetated areas. # * We expect NDWI to be high relative to unsubmerged areas. # We create a tile within Bangladesh, our study region, as a prototype for this demonstration. # In[2]: lat, lon = 23.7859405, 90.34804269999995 # In[3]: tile = dl.raster.dltile_from_latlon(lat, lon, resolution = 20.0, tilesize = 1024, pad = 0) # In[4]: avail_scenes, ctx = dl.scenes.search(tile['geometry'], products=["sentinel-2:L1C"], start_datetime='2017-01-01', end_datetime='2017-12-31' ) avail_scenes # ### Develop classification model # ##### Define classification features to calculate from the imagery # In[5]: product_dic = {'sentinel-1:GRD': {'bands': ['vv', 'vh'], 'stats': 'min'}, 'sentinel-2:L1C': {'bands': ['ndvi', 'ndwi', 'cloud-mask', 'alpha'], 'stats': 'max'}} # ##### Calculate and visualize a sample of the data # In[6]: date = ['2017-01-01', '2017-01-31'] all_stats = [] for product in sorted(product_dic.keys()): ''' DL Metadata search for imagery over our AOI''' avail_scenes, ctx = dl.scenes.search(tile['geometry'], products=[product], start_datetime=date[0], end_datetime=date[1] ) ids = avail_scenes.each.properties.id bands = product_dic[product]['bands'] image_stack = None for i, idd in enumerate(ids): image, meta = dl.raster.ndarray(idd, bands = bands, dltile = tile) if image_stack is None: rx = image.shape[0] ry = image.shape[1] n_images = len(avail_scenes) n_bands = 2 image_stack = np.zeros((n_images, rx, ry, n_bands)) ''' Mask the clouds ''' if "cloud-mask" in bands: cloud_mask = image[:, :, 2] image[cloud_mask == 1] = 0 image_stack[i, :, :, :] = image[:, :, :2] else: image_stack[i, :, :, :] = image if product_dic[product]['stats'] == 'min': ma = np.ma.masked_equal(image_stack, 0, copy = False) all_stats.append(np.ma.min(ma, axis=0)) else: all_stats.append(np.max(image_stack, axis = 0)[:, :, :2]) # Using the `dl.raster.ndarray` parameters, we can resample and align disparate datasets to the same resolution, projection, and grid alignment. Each of the resulting arrays thus have the same shape and size. # In[7]: print('Sentinel-1 data shape: {}'.format(all_stats[0].shape)) print('Sentinel-2 data shape: {}'.format(all_stats[1].shape)) # In[8]: # combine the stats into a single array all_stats = np.dstack(all_stats).data fig = plt.figure(figsize=(10,10)) labels = ['Sentinel-1 Min VV', 'Sentinel-1 Min VH', 'Sentinel-2 Max NDVI', 'Sentinel-2 Max NDWI'] for i in range(4): ax = plt.subplot(2, 2, i + 1) ax.imshow(all_stats[:, :, i]) ax.set_xticks([]) ax.set_yticks([]) ax.set_title(labels[i]) plt.tight_layout() # #### Build a simple decision rule based on image threshold # In[9]: mask = np.where(((all_stats[:, :, 0] < 60) & (all_stats[:, :, 1] < 60) & (all_stats[:, :, 2] < 35000) & (all_stats[:, :, 3] > 35000)), 1, 0) fig = plt.figure(figsize=(6,6)) plt.imshow(mask, cmap='Blues') plt.xticks([]) plt.yticks([]) plt.tight_layout() # #### Build a simple, supervised machine learning model # Different ways obtain training data using the Descartes Labs Platform: # 1. Load existing reference information and extract image data # 2. Use an existing Platform product (e.g. Cropland Data Layer) to sample reference data # 3. Digitize reference data using Viewer and export as a GeoJSON # In[10]: with open('water_train.json') as f: train_data = json.load(f) # In[11]: train_features = [[feature['properties']['vv'], feature['properties']['vh'], feature['properties']['ndvi'], feature['properties']['ndwi']] for feature in train_data['features']] labels = [feature['properties']['class'] for feature in train_data['features']] le = sklearn.preprocessing.LabelEncoder() labels = le.fit_transform(labels) clf = sklearn.ensemble.RandomForestClassifier(n_estimators=10, criterion='gini', min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features='auto', bootstrap=True, n_jobs=1) clf.fit(train_features, labels) print('feature importances\n') for fi, label in zip(clf.feature_importances_, ['vv', 'vh', 'ndvi', 'ndwi']): print("{:<10}: {:5f}".format(label, fi)) # ##### Test predictions # In[12]: features = all_stats.reshape(all_stats.shape[0]*all_stats.shape[1], all_stats.shape[2]) predictions = clf.predict(features) predictions = predictions.reshape(all_stats.shape[0], all_stats.shape[1]) fig = plt.figure(figsize=(6,6)) plt.imshow(predictions, cmap='Blues') plt.xticks([]) plt.yticks([]) plt.tight_layout()