Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a function for subsampling functionality #141

Merged
merged 8 commits into from
Jun 22, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions tests/test_spatial_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

import geoutils as gu
import numpy as np
import pytest
import rasterio as rio

import xdem
Expand Down Expand Up @@ -176,3 +177,59 @@ def make_gdal_hillshade(filepath) -> np.ndarray:

# Make sure that this doesn't create weird division warnings.
xdem.spatial_tools.hillshade(dem.data, dem.res)


class TestSubsample:
"""
Different examples of 1D to 3D arrays with masked values for testing.
"""

# Case 1 - 1D array, 1 masked value
array1D = np.ma.masked_array(np.arange(10), mask=np.zeros(10))
array1D.mask[3] = True
assert np.ndim(array1D) == 1
assert np.count_nonzero(array1D.mask) > 0

# Case 2 - 2D array, 1 masked value
array2D = np.ma.masked_array(np.arange(9).reshape((3, 3)), mask=np.zeros((3, 3)))
array2D.mask[0, 1] = True
assert np.ndim(array2D) == 2
assert np.count_nonzero(array2D.mask) > 0

# Case 3 - 3D array, 1 masked value
array3D = np.ma.masked_array(np.arange(9).reshape((1, 3, 3)), mask=np.zeros((1, 3, 3)))
array3D = np.ma.vstack((array3D, array3D + 10))
array3D.mask[0, 0, 1] = True
assert np.ndim(array3D) == 3
assert np.count_nonzero(array3D.mask) > 0

@pytest.mark.parametrize("array", [array1D, array2D, array3D])
def test_subsample(self, array):
"""
Test xdem.spatial_tools.subsample_raster.
"""
# Test that subsample > 1 works as expected, i.e. output 1D array, with no masked values, or selected size
for npts in np.arange(2, np.size(array)):
random_values = xdem.spatial_tools.subsample_raster(array, subsample=npts)
assert np.ndim(random_values) == 1
assert np.size(random_values) == npts
assert np.count_nonzero(random_values.mask) == 0

# Test if subsample > number of valid values => return all
random_values = xdem.spatial_tools.subsample_raster(array, subsample=np.size(array) + 3)
assert np.all(np.sort(random_values) == array[~array.mask])

# Test if subsample = 1 => return all valid values
random_values = xdem.spatial_tools.subsample_raster(array, subsample=1)
assert np.all(np.sort(random_values) == array[~array.mask])

# Test if subsample < 1
random_values = xdem.spatial_tools.subsample_raster(array, subsample=0.5)
assert np.size(random_values) == int(np.size(array) * 0.5)

# Test with optional argument return_indices
indices = xdem.spatial_tools.subsample_raster(array, subsample=0.3, return_indices=True)
assert np.ndim(indices) == 2
assert len(indices) == np.ndim(array)
assert np.ndim(array[indices]) == 1
assert np.size(array[indices]) == int(np.size(array) * 0.3)
47 changes: 44 additions & 3 deletions xdem/spatial_tools.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
"""

A set of basic operations to be run on 2D arrays and DEMs.
"""

from __future__ import annotations

from typing import Callable, Union

import geoutils as gu
import numpy as np
import rasterio as rio
import rasterio.warp
from tqdm import tqdm

import geoutils as gu


def get_mask(array: Union[np.ndarray, np.ma.masked_array]) -> np.ndarray:
"""
Expand Down Expand Up @@ -396,3 +397,43 @@ def hillshade(dem: Union[np.ndarray, np.ma.masked_array], resolution: Union[floa
# Return the hillshade, scaled to uint8 ranges.
# The output is scaled by "(x + 0.6) / 1.84" to make it more similar to GDAL.
return np.clip(255 * (shaded + 0.6) / 1.84, 0, 255).astype("float32")


def subsample_raster(
array: Union[np.ndarray, np.ma.masked_array], subsample: Union[float, int], return_indices: bool = False
) -> np.ndarray:
"""
Randomly subsample a 1D or 2D array by a subsampling factor, taking only non NaN/masked values.

:param subsample: If <= 1, will be considered a fraction of valid pixels to extract.
If > 1 will be considered the number of pixels to extract.
:param return_indices: If set to True, will return the extracted indices only.

:returns: The subsampled array (1D) or the indices to extract (same shape as input array)
"""
# Get number of points to extract
if (subsample <= 1) & (subsample > 0):
npoints = int(subsample * np.size(array))
elif subsample > 1:
npoints = int(subsample)
else:
raise ValueError("`subsample` must be > 0")

# Remove invalid values and flatten array
mask = get_mask(array) # -> need to remove .squeeze in get_mask
valids = np.argwhere(~mask.flatten()).squeeze()

# Checks that array and npoints are correct
assert np.ndim(valids) == 1, "Something is wrong with array dimension, check input data and shape"
if npoints > np.size(valids):
npoints = np.size(valids)

# Randomly extract npoints without replacement
indices = np.random.choice(valids, npoints, replace=False)
unraveled_indices = np.unravel_index(indices, array.shape)

if return_indices:
return unraveled_indices

else:
return array[unraveled_indices]