predict-idlab · jvdd · Feb 3, 2023 · Feb 3, 2023 · Feb 3, 2023 · Feb 3, 2023
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -8,7 +8,7 @@ tsdownsample welcomes contributions in the form of Pull Requests. For small chan
 
 ### Prerequisites
 
-tsdownsample is written in Rust. You'll need to install the [Rust toolchain](https://www.rust-lang.org/tools/install) for development.  
+tsdownsample is written in Rust. You'll need to install the [Rust toolchain](https://www.rust-lang.org/tools/install) for development.
 
 This project uses the nightly version of Rust. You can install it with:
 
@@ -22,7 +22,7 @@ and then set it as the default toolchain with:
 rustup default nightly
 ```
 
-### tsdownsample 
+### tsdownsample
 
 The structure of the tsdownsample project is as follows:
 
@@ -69,7 +69,7 @@ To run the tests and linting:
 make lint
 ```
 
-### Formatting 
+### Formatting
 
 We use [black](https://github.com/psf/black) and [isort](https://github.com/PyCQA/isort) to format the Python code.
 

diff --git a/README.md b/README.md
@@ -11,7 +11,7 @@ Extremely fast **time series downsampling 📈** for visualization, written in R
 
 ## Features ✨
 
-* **Fast**: written in rust with PyO3 bindings  
+* **Fast**: written in rust with PyO3 bindings
   - leverages optimized [argminmax](https://github.com/jvdd/argminmax) - which is SIMD accelerated with runtime feature detection
   - scales linearly with the number of data points
   <!-- TODO check if it scales sublinearly -->
@@ -59,15 +59,22 @@ x = np.arange(len(y))
 # Downsample to 1000 points (assuming constant sampling rate)
 s_ds = MinMaxLTTBDownsampler().downsample(y, n_out=1000)
 
+# Select downsampled data
+downsampled_y = y[s_ds]
+
 # Downsample to 1000 points using the (possible irregularly spaced) x-data
 s_ds = MinMaxLTTBDownsampler().downsample(x, y, n_out=1000)
+
+# Select downsampled data
+downsampled_x = x[s_ds]
+downsampled_y = y[s_ds]
 ```
 
-## Downsampling algorithms & API 
+## Downsampling algorithms & API
 
 ### Downsampling API 📑
 
-Each downsampling algorithm is implemented as a class that implements a `downsample` method.  
+Each downsampling algorithm is implemented as a class that implements a `downsample` method.
 The signature of the `downsample` method:
 
 ```
@@ -84,7 +91,7 @@ downsample([x], y, n_out, **kwargs) -> ndarray[uint64]
 
 **Returns**: a `ndarray[uint64]` of indices that can be used to index the original data.
 
-<sup>*</sup><i>When there are gaps in the time series, fewer than `n_out` indices may be returned.</i>  
+<sup>*</sup><i>When there are gaps in the time series, fewer than `n_out` indices may be returned.</i>
 <sup>**</sup><i>`parallel` is not supported for `LTTBDownsampler`.</i>
 ### Downsampling algorithms 📈
 

diff --git a/tests/test_tsdownsample.py b/tests/test_tsdownsample.py
@@ -1,3 +1,5 @@
+from typing import Iterable
+
 import numpy as np
 import pytest
 from test_config import supported_dtypes_x, supported_dtypes_y
@@ -9,99 +11,81 @@
     MinMaxDownsampler,
     MinMaxLTTBDownsampler,
 )
+from tsdownsample.downsampling_interface import AbstractDownsampler
 
 # TODO: Improve tests
 #   - compare implementations with existing plotly_resampler implementations
 
 
-def test_m4_downsampler():
-    """Test M4 downsampler."""
-    arr = np.array(np.arange(10_000))
-    s_downsampled = M4Downsampler().downsample(arr, n_out=100)
-    assert s_downsampled[0] == 0
-    assert s_downsampled[-1] == len(arr) - 1
+RUST_DOWNSAMPLERS = [
+    MinMaxDownsampler(),
+    M4Downsampler(),
+    LTTBDownsampler(),
+    MinMaxLTTBDownsampler(),
+]
 
+OTHER_DOWNSAMPLERS = [EveryNthDownsampler()]
 
-def test_minmax_downsampler():
-    """Test MinMax downsampler."""
-    arr = np.array(np.arange(10_000))
-    s_downsampled = MinMaxDownsampler().downsample(arr, n_out=100)
-    assert s_downsampled[0] == 0
-    assert s_downsampled[-1] == len(arr) - 1
 
+def generate_rust_downsamplers() -> Iterable[AbstractDownsampler]:
+    for downsampler in RUST_DOWNSAMPLERS:
+        yield downsampler
 
-def test_lttb_downsampler():
-    """Test LTTB downsampler."""
-    arr = np.array(np.arange(10_000))
-    s_downsampled = LTTBDownsampler().downsample(arr, n_out=100)
-    assert s_downsampled[0] == 0
-    assert s_downsampled[-1] == len(arr) - 1
+
+def generate_all_downsamplers() -> Iterable[AbstractDownsampler]:
+    for downsampler in RUST_DOWNSAMPLERS + OTHER_DOWNSAMPLERS:
+        yield downsampler
 
 
-def test_minmaxlttb_downsampler():
-    """Test MinMaxLTTB downsampler."""
-    arr = np.array(np.arange(10_000))
-    s_downsampled = MinMaxLTTBDownsampler().downsample(arr, n_out=100)
+@pytest.mark.parametrize("downsampler", generate_rust_downsamplers())
+def test_rust_downsampler(downsampler: AbstractDownsampler):
+    """Test the Rust downsamplers."""
+    arr = np.arange(10_000)
+    s_downsampled = downsampler.downsample(arr, n_out=100)
     assert s_downsampled[0] == 0
     assert s_downsampled[-1] == len(arr) - 1
 
 
 def test_everynth_downsampler():
     """Test EveryNth downsampler."""
-    arr = np.array(np.arange(10_000))
-    s_downsampled = EveryNthDownsampler().downsample(arr, n_out=100)
+    arr = np.arange(10_000)
+    downsampler = EveryNthDownsampler()
+    s_downsampled = downsampler.downsample(arr, n_out=100)
     assert s_downsampled[0] == 0
     assert s_downsampled[-1] == 9_900
 
 
-## Parallel downsampling
-
-rust_downsamplers = [
-    MinMaxDownsampler(),
-    M4Downsampler(),
-    LTTBDownsampler(),
-    MinMaxLTTBDownsampler(),
-]
-
-
-def test_parallel_downsampling():
+@pytest.mark.parametrize("downsampler", generate_rust_downsamplers())
+def test_parallel_downsampling(downsampler: AbstractDownsampler):
     """Test parallel downsampling."""
     arr = np.random.randn(10_000).astype(np.float32)
-    for downsampler in rust_downsamplers:
-        s_downsampled = downsampler.downsample(arr, n_out=100, parallel=False)
-        s_downsampled_p = downsampler.downsample(arr, n_out=100, parallel=True)
-        assert np.all(s_downsampled == s_downsampled_p)
+    s_downsampled = downsampler.downsample(arr, n_out=100, parallel=False)
+    s_downsampled_p = downsampler.downsample(arr, n_out=100, parallel=True)
+    assert np.all(s_downsampled == s_downsampled_p)
 
 
-def test_parallel_downsampling_with_x():
+@pytest.mark.parametrize("downsampler", generate_rust_downsamplers())
+def test_parallel_downsampling_with_x(downsampler: AbstractDownsampler):
     """Test parallel downsampling with x."""
     arr = np.random.randn(10_001).astype(np.float32)  # 10_001 to test edge case
     idx = np.arange(len(arr))
-    for downsampler in rust_downsamplers:
-        s_downsampled = downsampler.downsample(idx, arr, n_out=100, parallel=False)
-        s_downsampled_p = downsampler.downsample(idx, arr, n_out=100, parallel=True)
-        assert np.all(s_downsampled == s_downsampled_p)
-
-
-## Using x
-
-all_downsamplers = rust_downsamplers + [EveryNthDownsampler()]
+    s_downsampled = downsampler.downsample(idx, arr, n_out=100, parallel=False)
+    s_downsampled_p = downsampler.downsample(idx, arr, n_out=100, parallel=True)
+    assert np.all(s_downsampled == s_downsampled_p)
 
 
-def test_downsampling_with_x():
+@pytest.mark.parametrize("downsampler", generate_all_downsamplers())
+def test_downsampling_with_x(downsampler: AbstractDownsampler):
     """Test downsampling with x."""
     arr = np.random.randn(2_001).astype(np.float32)  # 2_001 to test edge case
     idx = np.arange(len(arr))
-    for downsampler in all_downsamplers:
-        s_downsampled = downsampler.downsample(arr, n_out=100)
-        s_downsampled_x = downsampler.downsample(idx, arr, n_out=100)
-        assert np.all(s_downsampled == s_downsampled_x)
-
+    s_downsampled = downsampler.downsample(arr, n_out=100)
+    s_downsampled_x = downsampler.downsample(idx, arr, n_out=100)
+    assert np.all(s_downsampled == s_downsampled_x)
 
-## Gaps in x
 
-
-def test_downsampling_with_gaps_in_x():
+@pytest.mark.parametrize("downsampler", generate_all_downsamplers())
+def test_downsampling_with_gaps_in_x(downsampler: AbstractDownsampler):
     """Test downsampling with gaps in x.
 
     With gap we do NOT mean a NaN in the array, but a large gap in the x values.
@@ -111,89 +95,81 @@ def test_downsampling_with_gaps_in_x():
     arr = np.random.randn(10_000).astype(np.float32)
     idx = np.arange(len(arr))
     idx[: len(idx) // 2] += len(idx) // 2  # add large gap in x
-    for downsampler in all_downsamplers:
-        s_downsampled = downsampler.downsample(idx, arr, n_out=100)
-        assert len(s_downsampled) <= 100
-        assert len(s_downsampled) >= 66
+    s_downsampled = downsampler.downsample(idx, arr, n_out=100)
+    assert len(s_downsampled) <= 100
+    assert len(s_downsampled) >= 66
 
 
-## Data types
+@pytest.mark.parametrize("downsampler", generate_rust_downsamplers())
+def test_downsampling_different_dtypes(downsampler: AbstractDownsampler):
+    """Test downsampling with different data types."""
+    arr_orig = np.random.randint(0, 100, size=10_000)
+    res = []
+    for dtype_y in supported_dtypes_y:
+        arr = arr_orig.astype(dtype_y)
+        s_downsampled = downsampler.downsample(arr, n_out=100)
+        if dtype_y is not np.bool_:
+            res += [s_downsampled]
+    for i in range(1, len(res)):
+        assert np.all(res[0] == res[i])
 
 
-def test_downsampling_different_dtypes():
-    """Test downsampling with different data types."""
+@pytest.mark.parametrize("downsampler", generate_rust_downsamplers())
+def test_downsampling_different_dtypes_with_x(downsampler: AbstractDownsampler):
+    """Test downsampling with x with different data types."""
     arr_orig = np.random.randint(0, 100, size=10_000)
-    for downsampler in rust_downsamplers:
+    idx_orig = np.arange(len(arr_orig))
+    for dtype_x in supported_dtypes_x:
         res = []
-        for dtype in supported_dtypes_y:
-            arr = arr_orig.astype(dtype)
-            s_downsampled = downsampler.downsample(arr, n_out=100)
-            if dtype is not np.bool_:
+        idx = idx_orig.astype(dtype_x)
+        for dtype_y in supported_dtypes_y:
+            arr = arr_orig.astype(dtype_y)
+            s_downsampled = downsampler.downsample(idx, arr, n_out=100)
+            if dtype_y is not np.bool_:
                 res += [s_downsampled]
         for i in range(1, len(res)):
             assert np.all(res[0] == res[i])
 
 
-def test_downsampling_different_dtypes_with_x():
-    """Test downsampling with different data types."""
-    arr_orig = np.random.randint(0, 100, size=10_000)
-    idx_orig = np.arange(len(arr_orig))
-    for downsampler in rust_downsamplers:
-        for dtype_x in supported_dtypes_x:
-            res = []
-            idx = idx_orig.astype(dtype_x)
-            for dtype_y in supported_dtypes_y:
-                arr = arr_orig.astype(dtype_y)
-                s_downsampled = downsampler.downsample(idx, arr, n_out=100)
-                if dtype_y is not np.bool_:
-                    res += [s_downsampled]
-            for i in range(1, len(res)):
-                assert np.all(res[0] == res[i])
-
-
-### Check no out of bounds indexing
+@pytest.mark.parametrize("downsampler", generate_rust_downsamplers())
+def test_downsampling_no_out_of_bounds_different_dtypes(
+    downsampler: AbstractDownsampler,
+):
+    """Test no out of bounds issues when downsampling with different data types."""
+    arr_orig = np.random.randint(0, 100, size=100)
+    res = []
+    for dtype in supported_dtypes_y:
+        arr = arr_orig.astype(dtype)
+        s_downsampled = downsampler.downsample(arr, n_out=76)
+        s_downsampled_p = downsampler.downsample(arr, n_out=76, parallel=True)
+        assert np.all(s_downsampled == s_downsampled_p)
+        if dtype is not np.bool_:
+            res += [s_downsampled]
+    for i in range(1, len(res)):
+        assert np.all(res[0] == res[i])
 
 
-def test_downsampling_no_out_of_bounds_different_dtypes():
+@pytest.mark.parametrize("downsampler", generate_rust_downsamplers())
+def test_downsampling_no_out_of_bounds_different_dtypes_with_x(
+    downsampler: AbstractDownsampler,
+):
     """Test no out of bounds issues when downsampling with different data types."""
     arr_orig = np.random.randint(0, 100, size=100)
-    for downsampler in rust_downsamplers:
+    idx_orig = np.arange(len(arr_orig))
+    for dtype_x in supported_dtypes_x:
         res = []
-        for dtype in supported_dtypes_y:
-            arr = arr_orig.astype(dtype)
-            s_downsampled = downsampler.downsample(arr, n_out=76)
-            s_downsampled_p = downsampler.downsample(arr, n_out=76, parallel=True)
+        idx = idx_orig.astype(dtype_x)
+        for dtype_y in supported_dtypes_y:
+            arr = arr_orig.astype(dtype_y)
+            s_downsampled = downsampler.downsample(idx, arr, n_out=76)
+            s_downsampled_p = downsampler.downsample(idx, arr, n_out=76, parallel=True)
             assert np.all(s_downsampled == s_downsampled_p)
-            if dtype is not np.bool_:
+            if dtype_y is not np.bool_:
                 res += [s_downsampled]
         for i in range(1, len(res)):
             assert np.all(res[0] == res[i])
 
 
-def test_downsampling_no_out_of_bounds_different_dtypes_with_x():
-    """Test no out of bounds issues when downsampling with different data types."""
-    arr_orig = np.random.randint(0, 100, size=100)
-    idx_orig = np.arange(len(arr_orig))
-    for downsampler in rust_downsamplers:
-        for dtype_x in supported_dtypes_x:
-            res = []
-            idx = idx_orig.astype(dtype_x)
-            for dtype_y in supported_dtypes_y:
-                arr = arr_orig.astype(dtype_y)
-                s_downsampled = downsampler.downsample(idx, arr, n_out=76)
-                s_downsampled_p = downsampler.downsample(
-                    idx, arr, n_out=76, parallel=True
-                )
-                assert np.all(s_downsampled == s_downsampled_p)
-                if dtype_y is not np.bool_:
-                    res += [s_downsampled]
-            for i in range(1, len(res)):
-                assert np.all(res[0] == res[i])
-
-
-### Check no overflow when calculating average
-
-
 def test_lttb_no_overflow():
     """Test no overflow when calculating average."""
     ### THIS SHOULD NOT OVERFLOW & HAVE THE SAME RESULT
@@ -217,9 +193,6 @@ def test_lttb_no_overflow():
     # (just as in numpy).
 
 
-### Invalid n_out
-
-
 def test_invalid_nout():
     """Test invalid n_out."""
     arr = np.random.randint(0, 100, size=10_000)
@@ -233,9 +206,6 @@ def test_invalid_nout():
         M4Downsampler().downsample(arr, n_out=34)
 
 
-### Unsupported dtype
-
-
 def test_error_unsupported_dtype():
     """Test unsupported dtype."""
     arr = np.random.randint(0, 100, size=10_000)