Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Using WOfS Classifier on sentinel 2 #438

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,093 changes: 1,093 additions & 0 deletions Real_world_examples/Water_extent_sentinel_2-wofs.ipynb

Large diffs are not rendered by default.

248 changes: 248 additions & 0 deletions Tools/deafrica_tools/wofsclassifier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,248 @@
# wofs classifier
import gc
import numpy as np
import xarray as xr


def wofs_classify(dataset_in, clean_mask=None, x_coord='x', y_coord='y',
time_coord='time', no_data=np.nan, mosaic=False, enforce_float64=False):
"""
Description:
Performs WOfS algorithm on given dataset.
Assumption:
- The WOfS algorithm is defined for Landsat 5/Landsat 7
References:
- Mueller, et al. (2015) "Water observations from space: Mapping surface water from
25 years of Landsat imagery across Australia." Remote Sensing of Environment.
- https://github.com/GeoscienceAustralia/eo-tools/blob/stable/eotools/water_classifier.py
-----
Inputs:
dataset_in (xarray.Dataset) - dataset retrieved from the Data Cube; should contain
coordinates: time, latitude, longitude
variables: blue, green, red, nir, swir1, swir2
x_coord, y_coord, time_coord: (str) - Names of DataArrays in `dataset_in` to use as x, y,
and time coordinates.
Optional Inputs:
clean_mask (nd numpy array with dtype boolean) - true for values user considers clean;
if user does not provide a clean mask, all values will be considered clean
no_data (int/float) - no data pixel value; default: -9999
mosaic (boolean) - flag to indicate if dataset_in is a mosaic. If mosaic = False, dataset_in
should have a time coordinate and wofs will run over each time slice; otherwise, dataset_in
should not have a time coordinate and wofs will run over the single mosaicked image
enforce_float64 (boolean) - flag to indicate whether or not to enforce float64 calculations;
will use float32 if false
Output:
dataset_out (xarray.DataArray) - wofs water classification results: 0 - not water; 1 - water
Throws:
ValueError - if dataset_in is an empty xarray.Dataset.
"""

def _band_ratio(a, b):
"""
Calculates a normalized ratio index
"""
return (a - b) / (a + b)

def _run_regression(band1, band2, band3, band4, band5, band7):
"""
Regression analysis based on Australia's training data
TODO: Return type
"""

# Compute normalized ratio indices
ndi_52 = _band_ratio(band5, band2)
ndi_43 = _band_ratio(band4, band3)
ndi_72 = _band_ratio(band7, band2)

#classified = np.ones(shape, dtype='uint8')

classified = np.full(shape, no_data, dtype='uint8')

# Start with the tree's left branch, finishing nodes as needed

# Left branch
r1 = ndi_52 <= -0.01

r2 = band1 <= 2083.5
classified[r1 & ~r2] = 0 #Node 3

r3 = band7 <= 323.5
_tmp = r1 & r2
_tmp2 = _tmp & r3
_tmp &= ~r3

r4 = ndi_43 <= 0.61
classified[_tmp2 & r4] = 1 #Node 6
classified[_tmp2 & ~r4] = 0 #Node 7

r5 = band1 <= 1400.5
_tmp2 = _tmp & ~r5

r6 = ndi_43 <= -0.01
classified[_tmp2 & r6] = 1 #Node 10
classified[_tmp2 & ~r6] = 0 #Node 11

_tmp &= r5

r7 = ndi_72 <= -0.23
_tmp2 = _tmp & ~r7

r8 = band1 <= 379
classified[_tmp2 & r8] = 1 #Node 14
classified[_tmp2 & ~r8] = 0 #Node 15

_tmp &= r7

r9 = ndi_43 <= 0.22
classified[_tmp & r9] = 1 #Node 17
_tmp &= ~r9

r10 = band1 <= 473
classified[_tmp & r10] = 1 #Node 19
classified[_tmp & ~r10] = 0 #Node 20

# Left branch complete; cleanup
del r2, r3, r4, r5, r6, r7, r8, r9, r10
gc.collect()

# Right branch of regression tree
r1 = ~r1

r11 = ndi_52 <= 0.23
_tmp = r1 & r11

r12 = band1 <= 334.5
_tmp2 = _tmp & ~r12
classified[_tmp2] = 0 #Node 23

_tmp &= r12

r13 = ndi_43 <= 0.54
_tmp2 = _tmp & ~r13
classified[_tmp2] = 0 #Node 25

_tmp &= r13

r14 = ndi_52 <= 0.12
_tmp2 = _tmp & r14
classified[_tmp2] = 1 #Node 27

_tmp &= ~r14

r15 = band3 <= 364.5
_tmp2 = _tmp & r15

r16 = band1 <= 129.5
classified[_tmp2 & r16] = 1 #Node 31
classified[_tmp2 & ~r16] = 0 #Node 32

_tmp &= ~r15

r17 = band1 <= 300.5
_tmp2 = _tmp & ~r17
_tmp &= r17
classified[_tmp] = 1 #Node 33
classified[_tmp2] = 0 #Node 34

_tmp = r1 & ~r11

r18 = ndi_52 <= 0.34
classified[_tmp & ~r18] = 0 #Node 36
_tmp &= r18

r19 = band1 <= 249.5
classified[_tmp & ~r19] = 0 #Node 38
_tmp &= r19

r20 = ndi_43 <= 0.45
classified[_tmp & ~r20] = 0 #Node 40
_tmp &= r20

r21 = band3 <= 364.5
classified[_tmp & ~r21] = 0 #Node 42
_tmp &= r21

r22 = band1 <= 129.5
classified[_tmp & r22] = 1 #Node 44
classified[_tmp & ~r22] = 0 #Node 45

# Completed regression tree

return classified

# Default to masking nothing.
# if clean_mask is None:
# clean_mask = create_default_clean_mask(dataset_in)

# Extract dataset bands needed for calculations
# blue = dataset_in.blue
# green = dataset_in.green
# red = dataset_in.red
# nir = dataset_in.nir
# swir1 = dataset_in.swir_1
# swir2 = dataset_in.swir_2
blue = dataset_in.blue
green = dataset_in.green
red = dataset_in.red
nir = dataset_in.nir
swir1 = dataset_in.swir_1
swir2 = dataset_in.swir_2

# Enforce float calculations - float64 if user specified, otherwise float32 will do
dtype = blue.values.dtype # This assumes all dataset bands will have
# the same dtype (should be a reasonable
# assumption)

if enforce_float64:
if dtype != 'float64':
blue.values = blue.values.astype('float64')
green.values = green.values.astype('float64')
red.values = red.values.astype('float64')
nir.values = nir.values.astype('float64')
swir1.values = swir1.values.astype('float64')
swir2.values = swir2.values.astype('float64')
else:
if dtype == 'float64':
pass
elif dtype != 'float32':
blue.values = blue.values.astype('float32')
green.values = green.values.astype('float32')
red.values = red.values.astype('float32')
nir.values = nir.values.astype('float32')
swir1.values = swir1.values.astype('float32')
swir2.values = swir2.values.astype('float32')

shape = blue.values.shape
#print('decision time!')
classified = _run_regression(blue.values, green.values, red.values, nir.values, swir1.values, swir2.values)

classified_clean = np.full(classified.shape, no_data, dtype='float64')
classified_clean[clean_mask] = classified[clean_mask] # Contains data for clear pixels

# Create xarray of data
x_coords = dataset_in[x_coord]
y_coords = dataset_in[y_coord]

time = None
coords = None
dims = None

if mosaic:
coords = [y_coords, x_coords]
dims = [y_coord, x_coord]
else:
time_coords = dataset_in[time_coord]
coords = [time_coords, y_coords, x_coords]
dims = [time_coord, y_coord, x_coord]

data_array = xr.DataArray(classified_clean, coords=coords, dims=dims)

if mosaic:
dataset_out = xr.Dataset({'wofs': data_array},
coords={y_coord: y_coords, x_coord: x_coords})
else:
dataset_out = xr.Dataset(
{'wofs': data_array},
coords={time_coord: time_coords, y_coord: y_coords, x_coord: x_coords})

return dataset_out