Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🧹 Clean Up Tests + More Explicit Example #28

Merged
merged 7 commits into from
Feb 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ tsdownsample welcomes contributions in the form of Pull Requests. For small chan

### Prerequisites

tsdownsample is written in Rust. You'll need to install the [Rust toolchain](https://www.rust-lang.org/tools/install) for development.
tsdownsample is written in Rust. You'll need to install the [Rust toolchain](https://www.rust-lang.org/tools/install) for development.

This project uses the nightly version of Rust. You can install it with:

Expand All @@ -22,7 +22,7 @@ and then set it as the default toolchain with:
rustup default nightly
```

### tsdownsample
### tsdownsample

The structure of the tsdownsample project is as follows:

Expand Down Expand Up @@ -69,7 +69,7 @@ To run the tests and linting:
make lint
```

### Formatting
### Formatting

We use [black](https://github.com/psf/black) and [isort](https://github.com/PyCQA/isort) to format the Python code.

Expand Down
15 changes: 11 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ Extremely fast **time series downsampling 📈** for visualization, written in R

## Features ✨

* **Fast**: written in rust with PyO3 bindings
* **Fast**: written in rust with PyO3 bindings
- leverages optimized [argminmax](https://github.com/jvdd/argminmax) - which is SIMD accelerated with runtime feature detection
- scales linearly with the number of data points
<!-- TODO check if it scales sublinearly -->
Expand Down Expand Up @@ -59,15 +59,22 @@ x = np.arange(len(y))
# Downsample to 1000 points (assuming constant sampling rate)
s_ds = MinMaxLTTBDownsampler().downsample(y, n_out=1000)

# Select downsampled data
downsampled_y = y[s_ds]

# Downsample to 1000 points using the (possible irregularly spaced) x-data
s_ds = MinMaxLTTBDownsampler().downsample(x, y, n_out=1000)

# Select downsampled data
downsampled_x = x[s_ds]
downsampled_y = y[s_ds]
```

## Downsampling algorithms & API
## Downsampling algorithms & API

### Downsampling API 📑

Each downsampling algorithm is implemented as a class that implements a `downsample` method.
Each downsampling algorithm is implemented as a class that implements a `downsample` method.
The signature of the `downsample` method:

```
Expand All @@ -84,7 +91,7 @@ downsample([x], y, n_out, **kwargs) -> ndarray[uint64]

**Returns**: a `ndarray[uint64]` of indices that can be used to index the original data.

<sup>*</sup><i>When there are gaps in the time series, fewer than `n_out` indices may be returned.</i>
<sup>*</sup><i>When there are gaps in the time series, fewer than `n_out` indices may be returned.</i>
<sup>**</sup><i>`parallel` is not supported for `LTTBDownsampler`.</i>
### Downsampling algorithms 📈

Expand Down
220 changes: 95 additions & 125 deletions tests/test_tsdownsample.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Iterable

import numpy as np
import pytest
from test_config import supported_dtypes_x, supported_dtypes_y
Expand All @@ -9,99 +11,81 @@
MinMaxDownsampler,
MinMaxLTTBDownsampler,
)
from tsdownsample.downsampling_interface import AbstractDownsampler

# TODO: Improve tests
# - compare implementations with existing plotly_resampler implementations


def test_m4_downsampler():
"""Test M4 downsampler."""
arr = np.array(np.arange(10_000))
s_downsampled = M4Downsampler().downsample(arr, n_out=100)
assert s_downsampled[0] == 0
assert s_downsampled[-1] == len(arr) - 1
RUST_DOWNSAMPLERS = [
MinMaxDownsampler(),
M4Downsampler(),
LTTBDownsampler(),
MinMaxLTTBDownsampler(),
]

OTHER_DOWNSAMPLERS = [EveryNthDownsampler()]

def test_minmax_downsampler():
"""Test MinMax downsampler."""
arr = np.array(np.arange(10_000))
s_downsampled = MinMaxDownsampler().downsample(arr, n_out=100)
assert s_downsampled[0] == 0
assert s_downsampled[-1] == len(arr) - 1

def generate_rust_downsamplers() -> Iterable[AbstractDownsampler]:
for downsampler in RUST_DOWNSAMPLERS:
yield downsampler

def test_lttb_downsampler():
"""Test LTTB downsampler."""
arr = np.array(np.arange(10_000))
s_downsampled = LTTBDownsampler().downsample(arr, n_out=100)
assert s_downsampled[0] == 0
assert s_downsampled[-1] == len(arr) - 1

def generate_all_downsamplers() -> Iterable[AbstractDownsampler]:
for downsampler in RUST_DOWNSAMPLERS + OTHER_DOWNSAMPLERS:
yield downsampler


def test_minmaxlttb_downsampler():
"""Test MinMaxLTTB downsampler."""
arr = np.array(np.arange(10_000))
s_downsampled = MinMaxLTTBDownsampler().downsample(arr, n_out=100)
@pytest.mark.parametrize("downsampler", generate_rust_downsamplers())
def test_rust_downsampler(downsampler: AbstractDownsampler):
"""Test the Rust downsamplers."""
arr = np.arange(10_000)
s_downsampled = downsampler.downsample(arr, n_out=100)
assert s_downsampled[0] == 0
assert s_downsampled[-1] == len(arr) - 1


def test_everynth_downsampler():
"""Test EveryNth downsampler."""
arr = np.array(np.arange(10_000))
s_downsampled = EveryNthDownsampler().downsample(arr, n_out=100)
arr = np.arange(10_000)
downsampler = EveryNthDownsampler()
s_downsampled = downsampler.downsample(arr, n_out=100)
assert s_downsampled[0] == 0
assert s_downsampled[-1] == 9_900
jvdd marked this conversation as resolved.
Show resolved Hide resolved


## Parallel downsampling

rust_downsamplers = [
MinMaxDownsampler(),
M4Downsampler(),
LTTBDownsampler(),
MinMaxLTTBDownsampler(),
]


def test_parallel_downsampling():
@pytest.mark.parametrize("downsampler", generate_rust_downsamplers())
def test_parallel_downsampling(downsampler: AbstractDownsampler):
"""Test parallel downsampling."""
arr = np.random.randn(10_000).astype(np.float32)
for downsampler in rust_downsamplers:
s_downsampled = downsampler.downsample(arr, n_out=100, parallel=False)
s_downsampled_p = downsampler.downsample(arr, n_out=100, parallel=True)
assert np.all(s_downsampled == s_downsampled_p)
s_downsampled = downsampler.downsample(arr, n_out=100, parallel=False)
s_downsampled_p = downsampler.downsample(arr, n_out=100, parallel=True)
assert np.all(s_downsampled == s_downsampled_p)


def test_parallel_downsampling_with_x():
@pytest.mark.parametrize("downsampler", generate_rust_downsamplers())
def test_parallel_downsampling_with_x(downsampler: AbstractDownsampler):
"""Test parallel downsampling with x."""
arr = np.random.randn(10_001).astype(np.float32) # 10_001 to test edge case
idx = np.arange(len(arr))
for downsampler in rust_downsamplers:
s_downsampled = downsampler.downsample(idx, arr, n_out=100, parallel=False)
s_downsampled_p = downsampler.downsample(idx, arr, n_out=100, parallel=True)
assert np.all(s_downsampled == s_downsampled_p)


## Using x

all_downsamplers = rust_downsamplers + [EveryNthDownsampler()]
s_downsampled = downsampler.downsample(idx, arr, n_out=100, parallel=False)
s_downsampled_p = downsampler.downsample(idx, arr, n_out=100, parallel=True)
assert np.all(s_downsampled == s_downsampled_p)


def test_downsampling_with_x():
@pytest.mark.parametrize("downsampler", generate_all_downsamplers())
def test_downsampling_with_x(downsampler: AbstractDownsampler):
"""Test downsampling with x."""
arr = np.random.randn(2_001).astype(np.float32) # 2_001 to test edge case
idx = np.arange(len(arr))
for downsampler in all_downsamplers:
s_downsampled = downsampler.downsample(arr, n_out=100)
s_downsampled_x = downsampler.downsample(idx, arr, n_out=100)
assert np.all(s_downsampled == s_downsampled_x)

s_downsampled = downsampler.downsample(arr, n_out=100)
s_downsampled_x = downsampler.downsample(idx, arr, n_out=100)
assert np.all(s_downsampled == s_downsampled_x)

## Gaps in x


def test_downsampling_with_gaps_in_x():
@pytest.mark.parametrize("downsampler", generate_all_downsamplers())
def test_downsampling_with_gaps_in_x(downsampler: AbstractDownsampler):
"""Test downsampling with gaps in x.

With gap we do NOT mean a NaN in the array, but a large gap in the x values.
Expand All @@ -111,89 +95,81 @@ def test_downsampling_with_gaps_in_x():
arr = np.random.randn(10_000).astype(np.float32)
idx = np.arange(len(arr))
idx[: len(idx) // 2] += len(idx) // 2 # add large gap in x
for downsampler in all_downsamplers:
s_downsampled = downsampler.downsample(idx, arr, n_out=100)
assert len(s_downsampled) <= 100
assert len(s_downsampled) >= 66
s_downsampled = downsampler.downsample(idx, arr, n_out=100)
assert len(s_downsampled) <= 100
assert len(s_downsampled) >= 66


## Data types
@pytest.mark.parametrize("downsampler", generate_rust_downsamplers())
def test_downsampling_different_dtypes(downsampler: AbstractDownsampler):
"""Test downsampling with different data types."""
arr_orig = np.random.randint(0, 100, size=10_000)
res = []
for dtype_y in supported_dtypes_y:
arr = arr_orig.astype(dtype_y)
s_downsampled = downsampler.downsample(arr, n_out=100)
if dtype_y is not np.bool_:
res += [s_downsampled]
for i in range(1, len(res)):
assert np.all(res[0] == res[i])


def test_downsampling_different_dtypes():
"""Test downsampling with different data types."""
@pytest.mark.parametrize("downsampler", generate_rust_downsamplers())
def test_downsampling_different_dtypes_with_x(downsampler: AbstractDownsampler):
"""Test downsampling with x with different data types."""
arr_orig = np.random.randint(0, 100, size=10_000)
for downsampler in rust_downsamplers:
idx_orig = np.arange(len(arr_orig))
for dtype_x in supported_dtypes_x:
res = []
for dtype in supported_dtypes_y:
arr = arr_orig.astype(dtype)
s_downsampled = downsampler.downsample(arr, n_out=100)
if dtype is not np.bool_:
idx = idx_orig.astype(dtype_x)
for dtype_y in supported_dtypes_y:
arr = arr_orig.astype(dtype_y)
s_downsampled = downsampler.downsample(idx, arr, n_out=100)
if dtype_y is not np.bool_:
res += [s_downsampled]
for i in range(1, len(res)):
assert np.all(res[0] == res[i])


def test_downsampling_different_dtypes_with_x():
"""Test downsampling with different data types."""
arr_orig = np.random.randint(0, 100, size=10_000)
idx_orig = np.arange(len(arr_orig))
for downsampler in rust_downsamplers:
for dtype_x in supported_dtypes_x:
res = []
idx = idx_orig.astype(dtype_x)
for dtype_y in supported_dtypes_y:
arr = arr_orig.astype(dtype_y)
s_downsampled = downsampler.downsample(idx, arr, n_out=100)
if dtype_y is not np.bool_:
res += [s_downsampled]
for i in range(1, len(res)):
assert np.all(res[0] == res[i])


### Check no out of bounds indexing
@pytest.mark.parametrize("downsampler", generate_rust_downsamplers())
def test_downsampling_no_out_of_bounds_different_dtypes(
downsampler: AbstractDownsampler,
):
"""Test no out of bounds issues when downsampling with different data types."""
arr_orig = np.random.randint(0, 100, size=100)
res = []
for dtype in supported_dtypes_y:
arr = arr_orig.astype(dtype)
s_downsampled = downsampler.downsample(arr, n_out=76)
s_downsampled_p = downsampler.downsample(arr, n_out=76, parallel=True)
assert np.all(s_downsampled == s_downsampled_p)
if dtype is not np.bool_:
res += [s_downsampled]
for i in range(1, len(res)):
assert np.all(res[0] == res[i])


def test_downsampling_no_out_of_bounds_different_dtypes():
@pytest.mark.parametrize("downsampler", generate_rust_downsamplers())
def test_downsampling_no_out_of_bounds_different_dtypes_with_x(
downsampler: AbstractDownsampler,
):
"""Test no out of bounds issues when downsampling with different data types."""
arr_orig = np.random.randint(0, 100, size=100)
for downsampler in rust_downsamplers:
idx_orig = np.arange(len(arr_orig))
for dtype_x in supported_dtypes_x:
res = []
for dtype in supported_dtypes_y:
arr = arr_orig.astype(dtype)
s_downsampled = downsampler.downsample(arr, n_out=76)
s_downsampled_p = downsampler.downsample(arr, n_out=76, parallel=True)
idx = idx_orig.astype(dtype_x)
for dtype_y in supported_dtypes_y:
arr = arr_orig.astype(dtype_y)
s_downsampled = downsampler.downsample(idx, arr, n_out=76)
s_downsampled_p = downsampler.downsample(idx, arr, n_out=76, parallel=True)
assert np.all(s_downsampled == s_downsampled_p)
if dtype is not np.bool_:
if dtype_y is not np.bool_:
res += [s_downsampled]
for i in range(1, len(res)):
assert np.all(res[0] == res[i])


def test_downsampling_no_out_of_bounds_different_dtypes_with_x():
"""Test no out of bounds issues when downsampling with different data types."""
arr_orig = np.random.randint(0, 100, size=100)
idx_orig = np.arange(len(arr_orig))
for downsampler in rust_downsamplers:
for dtype_x in supported_dtypes_x:
res = []
idx = idx_orig.astype(dtype_x)
for dtype_y in supported_dtypes_y:
arr = arr_orig.astype(dtype_y)
s_downsampled = downsampler.downsample(idx, arr, n_out=76)
s_downsampled_p = downsampler.downsample(
idx, arr, n_out=76, parallel=True
)
assert np.all(s_downsampled == s_downsampled_p)
if dtype_y is not np.bool_:
res += [s_downsampled]
for i in range(1, len(res)):
assert np.all(res[0] == res[i])


### Check no overflow when calculating average


def test_lttb_no_overflow():
"""Test no overflow when calculating average."""
### THIS SHOULD NOT OVERFLOW & HAVE THE SAME RESULT
Expand All @@ -217,9 +193,6 @@ def test_lttb_no_overflow():
# (just as in numpy).


### Invalid n_out


def test_invalid_nout():
"""Test invalid n_out."""
arr = np.random.randint(0, 100, size=10_000)
Expand All @@ -233,9 +206,6 @@ def test_invalid_nout():
M4Downsampler().downsample(arr, n_out=34)


### Unsupported dtype


def test_error_unsupported_dtype():
"""Test unsupported dtype."""
arr = np.random.randint(0, 100, size=10_000)
Expand Down