Skip to content

Commit

Permalink
Add robust polynomial, sum of sinusoids fitting (#151)
Browse files Browse the repository at this point in the history
* add robust polynomial fit + tests

* add array dimension to docs

* streamline test polynomial fit

* fix polynomial fit tests

* import scikit-learn as optional dependency

* use new subsample function + small fixes

* fix test

* add comments

* fixes with Eriks comments

* improve tests with Erik comments

* fix test

* add draft for robust scaling using ML methods

* draft robust sum of sin basinhopping

* move nd binning to spatial_tools

* finish basinhopping for sumfit

* add tests for sum of sins

* move tests for nd binning

* fix typing error

* fix tests

* rewrite tests with pytest.approx

* use np.polyval instead of writing out the polynomial

* rest of amaury comments

* add fit module, refactor nmad into spatialstats

* fix tests

* finish refactor nmad, fix tests

* increase error margin of test

* try fixing test

* add print statement to check values in CI

* move print statement to the right place

* streamline comments

* further streamline comments

* remove print statement

* subdivide scipy and sklearn into wrapper functions for reuse and clarity

* skip randomly failing test

* fix skip syntax
  • Loading branch information
rhugonnet authored Sep 8, 2021
1 parent cdfc8fd commit bd40f92
Show file tree
Hide file tree
Showing 13 changed files with 614 additions and 45 deletions.
4 changes: 2 additions & 2 deletions docs/source/code/coregistration_plot_nuth_kaab.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
ddem_pre = (dem_2009.data - dem_1990.data).filled(np.nan).squeeze()
ddem_post = (dem_2009.data - dem_coreg).filled(np.nan).squeeze()

nmad_pre = xdem.spatial_tools.nmad(ddem_pre[inlier_mask.squeeze()])
nmad_post = xdem.spatial_tools.nmad(ddem_post[inlier_mask.squeeze()])
nmad_pre = xdem.spatialstats.nmad(ddem_pre[inlier_mask.squeeze()])
nmad_post = xdem.spatialstats.nmad(ddem_post[inlier_mask.squeeze()])

vlim = 20
plt.figure(figsize=(8, 5))
Expand Down
4 changes: 2 additions & 2 deletions examples/plot_blockwise_coreg.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,5 +99,5 @@
# %%
# We can compare the :ref:`spatial_stats_nmad` to validate numerically that there was an improvment:

print(f"Error before: {xdem.spatial_tools.nmad(diff_before):.2f} m")
print(f"Error after: {xdem.spatial_tools.nmad(diff_after):.2f} m")
print(f"Error before: {xdem.spatialstats.nmad(diff_before):.2f} m")
print(f"Error after: {xdem.spatialstats.nmad(diff_after):.2f} m")
2 changes: 1 addition & 1 deletion examples/plot_norm_regional_hypso.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@

difference = (ddem_filled - ddem).squeeze()[random_nans].filled(np.nan)
median = np.nanmedian(difference)
nmad = xdem.spatial_tools.nmad(difference)
nmad = xdem.spatialstats.nmad(difference)

plt.title(f"Median: {median:.2f} m, NMAD: {nmad:.2f} m")
plt.hist(difference, bins=np.linspace(-15, 15, 100))
Expand Down
4 changes: 2 additions & 2 deletions examples/plot_nuth_kaab.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@
# %%
# We can compare the :ref:`spatial_stats_nmad` to validate numerically that there was an improvment:

print(f"Error before: {xdem.spatial_tools.nmad(diff_before):.2f} m")
print(f"Error after: {xdem.spatial_tools.nmad(diff_after):.2f} m")
print(f"Error before: {xdem.spatialstats.nmad(diff_before):.2f} m")
print(f"Error after: {xdem.spatialstats.nmad(diff_after):.2f} m")

# %%
# In the plot above, one may notice a positive (blue) tendency toward the east.
Expand Down
12 changes: 6 additions & 6 deletions tests/test_coreg.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

with warnings.catch_warnings():
warnings.simplefilter("ignore")
from xdem import coreg, examples, spatial_tools, misc
from xdem import coreg, examples, spatial_tools, spatialstats, misc
import xdem


Expand Down Expand Up @@ -698,7 +698,7 @@ def test_apply_matrix():
# Check that the median is very close to zero
assert np.abs(np.nanmedian(diff)) < 0.01
# Check that the NMAD is low
assert spatial_tools.nmad(diff) < 0.01
assert spatialstats.nmad(diff) < 0.01

def rotation_matrix(rotation=30):
rotation = np.deg2rad(rotation)
Expand All @@ -721,7 +721,7 @@ def rotation_matrix(rotation=30):
)
# Make sure that the rotated DEM is way off, but is centered around the same approximate point.
assert np.abs(np.nanmedian(rotated_dem - ref.data.data)) < 1
assert spatial_tools.nmad(rotated_dem - ref.data.data) > 500
assert spatialstats.nmad(rotated_dem - ref.data.data) > 500

# Apply a rotation in the opposite direction
unrotated_dem = coreg.apply_matrix(
Expand Down Expand Up @@ -775,8 +775,8 @@ def rotation_matrix(rotation=30):
# Check that the median is very close to zero
assert np.abs(np.nanmedian(diff)) < 0.5
# Check that the NMAD is low
assert spatial_tools.nmad(diff) < 5
print(np.nanmedian(diff), spatial_tools.nmad(diff))
assert spatialstats.nmad(diff) < 5
print(np.nanmedian(diff), spatialstats.nmad(diff))


def test_warp_dem():
Expand Down Expand Up @@ -874,7 +874,7 @@ def test_warp_dem():
)
# Validate that the DEM is now more or less the same as the original.
# Due to the randomness, the threshold is quite high, but would be something like 10+ if it was incorrect.
assert spatial_tools.nmad(dem - untransformed_dem) < 0.5
assert spatialstats.nmad(dem - untransformed_dem) < 0.5

if False:
import matplotlib.pyplot as plt
Expand Down
143 changes: 143 additions & 0 deletions tests/test_fit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
"""
Functions to test the fitting tools.
"""
import numpy as np
import pandas as pd
import pytest

import xdem

from sklearn.metrics import mean_squared_error, median_absolute_error

class TestRobustFitting:

@pytest.mark.parametrize("pkg_estimator", [('sklearn','Linear'), ('scipy','Linear'), ('sklearn','Theil-Sen'),
('sklearn','RANSAC'),('sklearn','Huber')])
def test_robust_polynomial_fit(self, pkg_estimator: str):

# Define x vector
x = np.linspace(1, 10, 1000)
# Define exact polynomial
true_coefs = [-100, 5, 3, 2]
y = np.polyval(np.flip(true_coefs), x)

# Run fit
coefs, deg = xdem.fit.robust_polynomial_fit(x, y, linear_pkg=pkg_estimator[0], estimator_name=pkg_estimator[1], random_state=42)

# Check coefficients are constrained
assert deg == 3 or deg == 4
error_margins = [100, 5, 2, 1]
for i in range(4):
assert coefs[i] == pytest.approx(true_coefs[i], abs=error_margins[i])

def test_robust_polynomial_fit_noise_and_outliers(self):

np.random.seed(42)

# Define x vector
x = np.linspace(1,10,1000)
# Define an exact polynomial
true_coefs = [-100, 5, 3, 2]
y = np.polyval(np.flip(true_coefs), x)
# Add some noise on top
y += np.random.normal(loc=0, scale=3, size=1000)
# Add some outliers
y[50:75] = 0
y[900:925] = 1000

# Run with the "Linear" estimator
coefs, deg = xdem.fit.robust_polynomial_fit(x,y, estimator_name='Linear', linear_pkg='scipy',
loss='soft_l1', f_scale=0.5)

# Scipy solution should be quite robust to outliers/noise (with the soft_l1 method and f_scale parameter)
# However, it is subject to random processes inside the scipy function (couldn't find how to fix those...)
# It can find a degree 3, or 4 with coefficient close to 0
assert deg in [3, 4]
acceptable_scipy_linear_margins = [3, 3, 1, 1]
for i in range(4):
assert coefs[i] == pytest.approx(true_coefs[i], abs=acceptable_scipy_linear_margins[i])

# The sklearn Linear solution with MSE cost function will not be robust
coefs2, deg2 = xdem.fit.robust_polynomial_fit(x, y, estimator_name='Linear', linear_pkg='sklearn',
cost_func=mean_squared_error, margin_improvement=50)
# It won't find the right degree because of the outliers and noise
assert deg2 != 3
# Using the median absolute error should improve the fit
coefs3, deg3 = xdem.fit.robust_polynomial_fit(x, y, estimator_name='Linear', linear_pkg='sklearn',
cost_func=median_absolute_error, margin_improvement=50)
# Will find the right degree, but won't find the right coefficients because of the outliers and noise
assert deg3 == 3
sklearn_linear_error = [50, 10, 5, 0.5]
for i in range(4):
assert np.abs(coefs3[i] - true_coefs[i]) > sklearn_linear_error[i]

# Now, the robust estimators
# Theil-Sen should have better coefficients
coefs4, deg4 = xdem.fit.robust_polynomial_fit(x, y, estimator_name='Theil-Sen', random_state=42)
assert deg4 == 3
# High degree coefficients should be well constrained
assert coefs4[2] == pytest.approx(true_coefs[2], abs=1)
assert coefs4[3] == pytest.approx(true_coefs[3], abs=1)

# RANSAC is not always optimal, here it does not work well
coefs5, deg5 = xdem.fit.robust_polynomial_fit(x, y, estimator_name='RANSAC', random_state=42)
assert deg5 != 3

# Huber should perform well, close to the scipy robust solution
coefs6, deg6 = xdem.fit.robust_polynomial_fit(x, y, estimator_name='Huber')
assert deg6 == 3
for i in range(3):
assert coefs6[i+1] == pytest.approx(true_coefs[i+1], abs=1)

@pytest.mark.skip('This test randomly fails in CI: issue opened.')
def test_robust_sumsin_fit(self):

# Define X vector
x = np.linspace(0, 10, 1000)
# Define exact sum of sinusoid signal
true_coefs = np.array([(5, 1, np.pi),(3, 0.3, 0)]).flatten()
y = xdem.fit._sumofsinval(x, params=true_coefs)

# Check that the function runs
coefs, deg = xdem.fit.robust_sumsin_fit(x, y, random_state=42)

# Check that the estimated sum of sinusoid correspond to the input
for i in range(2):
assert coefs[3*i] == pytest.approx(true_coefs[3*i], abs=0.02)

# Check that using custom arguments does not trigger an error
bounds = [(3,7),(0.1,3),(0,2*np.pi),(1,7),(0.1,1),(0,2*np.pi),(0,1),(0.1,1),(0,2*np.pi)]
coefs, deg = xdem.fit.robust_sumsin_fit(x, y, bounds_amp_freq_phase=bounds, nb_frequency_max=2,
hop_length=0.01, random_state=42)

def test_robust_simsin_fit_noise_and_outliers(self):

# Check robustness to outliers
np.random.seed(42)
# Define X vector
x = np.linspace(0, 10, 1000)
# Define exact sum of sinusoid signal
true_coefs = np.array([(5, 1, np.pi), (3, 0.3, 0)]).flatten()
y = xdem.fit._sumofsinval(x, params=true_coefs)

# Add some noise
y += np.random.normal(loc=0, scale=0.25, size=1000)
# Add some outliers
y[50:75] = -10
y[900:925] = 10

# Define first guess for bounds and run
bounds = [(3, 7), (0.1, 3), (0, 2 * np.pi), (1, 7), (0.1, 1), (0, 2 * np.pi), (0, 1), (0.1, 1), (0, 2 * np.pi)]
coefs, deg = xdem.fit.robust_sumsin_fit(x, y, random_state=42, bounds_amp_freq_phase=bounds)

# Should be less precise, but still on point
# We need to re-order output coefficient to match input
if coefs[3] > coefs[0]:
coefs = np.concatenate((coefs[3:],coefs[0:3]))

# Check values
for i in range(2):
assert coefs[3*i] == pytest.approx(true_coefs[3*i], abs=0.2)
assert coefs[3 * i +1] == pytest.approx(true_coefs[3 * i +1], abs=0.2)
error_phase = min(np.abs(coefs[3 * i + 2] - true_coefs[ 3* i + 2]), np.abs(2* np.pi - (coefs[3 * i + 2] - true_coefs[3* i + 2])))
assert error_phase < 0.2
5 changes: 2 additions & 3 deletions tests/test_spatial_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Functions to test the spatial tools.
"""
from __future__ import annotations

import os
import shutil
import subprocess
Expand All @@ -27,7 +28,6 @@ def test_dem_subtraction():

assert np.nanmean(np.abs(diff.data)) < 100


class TestMerging:
"""
Test cases for stacking and merging DEMs
Expand Down Expand Up @@ -211,7 +211,6 @@ def test_get_array_and_mask(
else:
assert not np.shares_memory(array, arr_view)


class TestSubsample:
"""
Different examples of 1D to 3D arrays with masked values for testing.
Expand Down Expand Up @@ -265,4 +264,4 @@ def test_subsample(self, array):
assert np.ndim(indices) == 2
assert len(indices) == np.ndim(array)
assert np.ndim(array[indices]) == 1
assert np.size(array[indices]) == int(np.size(array) * 0.3)
assert np.size(array[indices]) == int(np.size(array) * 0.3)
2 changes: 1 addition & 1 deletion tests/test_terrain.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def test_attribute_functions(self, attribute: str) -> None:
diff = (attr_xdem - attr_gdal).filled(np.nan)
try:
assert np.nanmean(diff) < 5
assert xdem.spatial_tools.nmad(diff) < 5
assert xdem.spatialstats.nmad(diff) < 5
except Exception as exception:

if PLOT:
Expand Down
2 changes: 1 addition & 1 deletion xdem/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from . import coreg, dem, examples, spatial_tools, spatialstats, volume, filters, terrain
from . import coreg, dem, examples, spatial_tools, spatialstats, volume, filters, fit, terrain
from .ddem import dDEM
from .dem import DEM
from .demcollection import DEMCollection
8 changes: 4 additions & 4 deletions xdem/coreg.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@ def ramp(x_coordinates: np.ndarray, y_coordinates: np.ndarray) -> np.ndarray:
if metadata is not None:
metadata["deramp"] = {
"coefficients": coefficients,
"nmad": xdem.spatial_tools.nmad(residuals(coefficients, valid_diffs, valid_x_coords, valid_y_coords, degree))
"nmad": xdem.spatialstats.nmad(residuals(coefficients, valid_diffs, valid_x_coords, valid_y_coords, degree))
}

# Return the function which can be used later.
Expand Down Expand Up @@ -744,7 +744,7 @@ def error(self, reference_dem: Union[np.ndarray, np.ma.masked_array],
inlier_mask=inlier_mask, transform=transform)

error_functions = {
"nmad": xdem.spatial_tools.nmad,
"nmad": xdem.spatialstats.nmad,
"median": np.median,
"mean": np.mean,
"std": np.std,
Expand Down Expand Up @@ -1246,7 +1246,7 @@ def _fit_func(self, ref_dem: np.ndarray, tba_dem: np.ndarray, transform: Optiona
# Calculate initial dDEM statistics
elevation_difference = ref_dem - aligned_dem
bias = np.nanmedian(elevation_difference)
nmad_old = xdem.spatial_tools.nmad(elevation_difference)
nmad_old = xdem.spatialstats.nmad(elevation_difference)
if verbose:
print(" Statistics on initial dh:")
print(" Median = {:.2f} - NMAD = {:.2f}".format(bias, nmad_old))
Expand Down Expand Up @@ -1291,7 +1291,7 @@ def _fit_func(self, ref_dem: np.ndarray, tba_dem: np.ndarray, transform: Optiona
# Update statistics
elevation_difference = ref_dem - aligned_dem
bias = np.nanmedian(elevation_difference)
nmad_new = xdem.spatial_tools.nmad(elevation_difference)
nmad_new = xdem.spatialstats.nmad(elevation_difference)
nmad_gain = (nmad_new - nmad_old) / nmad_old*100

if verbose:
Expand Down
Loading

0 comments on commit bd40f92

Please sign in to comment.