Skip to content

Commit

Permalink
Fix #695 Add "subseq_isconstant" param to API (#789)
Browse files Browse the repository at this point in the history
* add T_subseq_isconstant param to naive stump

* add test for new param in naive, expected error

* add param to a core function to increase flexibility for user

* add new param to public API to increase flexibility for user

* fix decorator

* add custom_func for rolling_isconstant

* fix if block

* change black minimum version to resolve trailing-comma issue

* fix format with latest version of black

* retreive setting for black minimum version

* replace array with a custom function

* replace array with func as new param for determining constant subseqs

* revise core functions to have param isconstant_custom_func

* update stump and test_stump

* add param custom_func to naive rolling_isconstant

* add an example for isconstant custom func to naive

* add test function for isconstant custom func

* add test function for isconstant custom function

* update minimum version of black

* fix docstrings

* fix format

* revise a naive function and its name

* fix minor bug

* allow param to accept type np.ndarray or function

* minor fixes

* fixed minor issues

* fix bugs and including type functools.partial

* fix decorator

* update function preprocess_diagonal

* update function preprocess_diagonal

* remove if block

* fix format

* update naive by removing unnecessary if block

* update docstring

* add pragma nocover

* increase coverage by enhancing test function

* undo changes to black minimum version

* remove unnecessary comment

* revise docstrings

* improve redability

* move if checks to the function rolling_isconstant

* fix black format

* revise docstring

* reivse if block structure to improve readability

* add function to consider isfinite in computing isconstant

* add warning when isconstant=True for non-finite subseq

* minor changes

* add docstring and add function to preprocess

* add test function for fix_isconstant

* remove error when subseq_isconstant does not cover all stddev=0 cases

* remove unnecessary if condition

* minor changes

* Add pragma nocover comments

* reduce number of imports

* re-design function rolling_isconstant

* improve readability of comment

* added new test function

* wrap a function around func signature check and add unit test

* change output of function and update functions and tests accordingly

* remove unnecessary test function

* add param isconstant to stumped and update unit test

* fixed non_normalized  decorator

* Add param subseq_isconstant to gpu_stump and add its test function

* minor fixxes

* avoid random behavior of argsort when values are the same by passing kind

* update if block

* update naive function rolling_isconstant

* add comment

* minor updates

* fix naive stump

* change function from public to private

* fix coverage
  • Loading branch information
NimaSarajpoor authored Mar 12, 2023
1 parent c7d5321 commit c06f0e9
Show file tree
Hide file tree
Showing 9 changed files with 640 additions and 51 deletions.
279 changes: 249 additions & 30 deletions stumpy/core.py

Large diffs are not rendered by default.

41 changes: 37 additions & 4 deletions stumpy/gpu_stump.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import math
import multiprocessing as mp
import os

import numpy as np
from numba import cuda

Expand Down Expand Up @@ -477,7 +476,16 @@ def _gpu_stump(

@core.non_normalized(gpu_aamp)
def gpu_stump(
T_A, m, T_B=None, ignore_trivial=True, device_id=0, normalize=True, p=2.0, k=1
T_A,
m,
T_B=None,
ignore_trivial=True,
device_id=0,
normalize=True,
p=2.0,
k=1,
T_A_subseq_isconstant=None,
T_B_subseq_isconstant=None,
):
"""
Compute the z-normalized matrix profile with one or more GPU devices
Expand Down Expand Up @@ -525,6 +533,26 @@ def gpu_stump(
Note that this will increase the total computational time and memory usage
when k > 1.
T_A_subseq_isconstant : numpy.ndarray or function, default None
A boolean array that indicates whether a subsequence in `T_A` is constant
(True). Alternatively, a custom, user-defined function that returns a
boolean array that indicates whether a subsequence in `T_A` is constant
(True). The function must only take two arguments, `a`, a 1-D array,
and `w`, the window size, while additional arguments may be specified
by currying the user-defined function using `functools.partial`. Any
subsequence with at least one np.nan/np.inf will automatically have its
corresponding value set to False in this boolean array.
T_B_subseq_isconstant : numpy.ndarray or function, default None
A boolean array that indicates whether a subsequence in `T_B` is constant
(True). Alternatively, a custom, user-defined function that returns a
boolean array that indicates whether a subsequence in `T_B` is constant
(True). The function must only take two arguments, `a`, a 1-D array,
and `w`, the window size, while additional arguments may be specified
by currying the user-defined function using `functools.partial`. Any
subsequence with at least one np.nan/np.inf will automatically have its
corresponding value set to False in this boolean array.
Returns
-------
out : numpy.ndarray
Expand Down Expand Up @@ -592,9 +620,14 @@ def gpu_stump(
if T_B is None: # Self join!
T_B = T_A
ignore_trivial = True
T_B_subseq_isconstant = T_A_subseq_isconstant

T_A, M_T, Σ_T, T_subseq_isconstant = core.preprocess(T_A, m)
T_B, μ_Q, σ_Q, Q_subseq_isconstant = core.preprocess(T_B, m)
T_A, M_T, Σ_T, T_subseq_isconstant = core.preprocess(
T_A, m, T_subseq_isconstant=T_A_subseq_isconstant
)
T_B, μ_Q, σ_Q, Q_subseq_isconstant = core.preprocess(
T_B, m, T_subseq_isconstant=T_B_subseq_isconstant
)

if T_A.ndim != 1: # pragma: no cover
raise ValueError(
Expand Down
44 changes: 39 additions & 5 deletions stumpy/stump.py
Original file line number Diff line number Diff line change
Expand Up @@ -510,8 +510,21 @@ def _stump(
)


@core.non_normalized(aamp)
def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
@core.non_normalized(
aamp,
exclude=["normalize", "p", "T_A_subseq_isconstant", "T_B_subseq_isconstant"],
)
def stump(
T_A,
m,
T_B=None,
ignore_trivial=True,
normalize=True,
p=2.0,
k=1,
T_A_subseq_isconstant=None,
T_B_subseq_isconstant=None,
):
"""
Compute the z-normalized matrix profile
Expand Down Expand Up @@ -551,6 +564,26 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
when k > 1. If you have access to a GPU device, then you may be able to
leverage `gpu_stump` for better performance and scalability.
T_A_subseq_isconstant : numpy.ndarray or function, default None
A boolean array that indicates whether a subsequence in `T_A` is constant
(True). Alternatively, a custom, user-defined function that returns a
boolean array that indicates whether a subsequence in `T_A` is constant
(True). The function must only take two arguments, `a`, a 1-D array,
and `w`, the window size, while additional arguments may be specified
by currying the user-defined function using `functools.partial`. Any
subsequence with at least one np.nan/np.inf will automatically have its
corresponding value set to False in this boolean array.
T_B_subseq_isconstant : numpy.ndarray or function, default None
A boolean array that indicates whether a subsequence in `T_B` is constant
(True). Alternatively, a custom, user-defined function that returns a
boolean array that indicates whether a subsequence in `T_B` is constant
(True). The function must only take two arguments, `a`, a 1-D array,
and `w`, the window size, while additional arguments may be specified
by currying the user-defined function using `functools.partial`. Any
subsequence with at least one np.nan/np.inf will automatically have its
corresponding value set to False in this boolean array.
Returns
-------
out : numpy.ndarray
Expand Down Expand Up @@ -629,8 +662,9 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
[0.11633857113691416, 0, 0, -1]], dtype=object)
"""
if T_B is None:
T_B = T_A
ignore_trivial = True
T_B = T_A
T_B_subseq_isconstant = T_A_subseq_isconstant

(
T_A,
Expand All @@ -639,7 +673,7 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
μ_Q_m_1,
T_A_subseq_isfinite,
T_A_subseq_isconstant,
) = core.preprocess_diagonal(T_A, m)
) = core.preprocess_diagonal(T_A, m, T_subseq_isconstant=T_A_subseq_isconstant)

(
T_B,
Expand All @@ -648,7 +682,7 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
M_T_m_1,
T_B_subseq_isfinite,
T_B_subseq_isconstant,
) = core.preprocess_diagonal(T_B, m)
) = core.preprocess_diagonal(T_B, m, T_subseq_isconstant=T_B_subseq_isconstant)

if T_A.ndim != 1: # pragma: no cover
raise ValueError(
Expand Down
38 changes: 35 additions & 3 deletions stumpy/stumped.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,18 @@ def _dask_stumped(


@core.non_normalized(aamped)
def stumped(client, T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1):
def stumped(
client,
T_A,
m,
T_B=None,
ignore_trivial=True,
normalize=True,
p=2.0,
k=1,
T_A_subseq_isconstant=None,
T_B_subseq_isconstant=None,
):
"""
Compute the z-normalized matrix profile with a distributed dask/ray cluster
Expand Down Expand Up @@ -251,6 +262,26 @@ def stumped(client, T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0
when k > 1. If you have access to a GPU device, then you may be able to
leverage `gpu_stump` for better performance and scalability.
T_A_subseq_isconstant : numpy.ndarray or function, default None
A boolean array that indicates whether a subsequence in `T_A` is constant
(True). Alternatively, a custom, user-defined function that returns a
boolean array that indicates whether a subsequence in `T_A` is constant
(True). The function must only take two arguments, `a`, a 1-D array,
and `w`, the window size, while additional arguments may be specified
by currying the user-defined function using `functools.partial`. Any
subsequence with at least one np.nan/np.inf will automatically have its
corresponding value set to False in this boolean array.
T_B_subseq_isconstant : numpy.ndarray or function, default None
A boolean array that indicates whether a subsequence in `T_B` is constant
(True). Alternatively, a custom, user-defined function that returns a
boolean array that indicates whether a subsequence in `T_B` is constant
(True). The function must only take two arguments, `a`, a 1-D array,
and `w`, the window size, while additional arguments may be specified
by currying the user-defined function using `functools.partial`. Any
subsequence with at least one np.nan/np.inf will automatically have its
corresponding value set to False in this boolean array.
Returns
-------
out : numpy.ndarray
Expand Down Expand Up @@ -340,6 +371,7 @@ def stumped(client, T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0
if T_B is None:
T_B = T_A
ignore_trivial = True
T_B_subseq_isconstant = T_A_subseq_isconstant

(
T_A,
Expand All @@ -348,7 +380,7 @@ def stumped(client, T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0
μ_Q_m_1,
T_A_subseq_isfinite,
T_A_subseq_isconstant,
) = core.preprocess_diagonal(T_A, m)
) = core.preprocess_diagonal(T_A, m, T_subseq_isconstant=T_A_subseq_isconstant)

(
T_B,
Expand All @@ -357,7 +389,7 @@ def stumped(client, T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0
M_T_m_1,
T_B_subseq_isfinite,
T_B_subseq_isconstant,
) = core.preprocess_diagonal(T_B, m)
) = core.preprocess_diagonal(T_B, m, T_subseq_isconstant=T_B_subseq_isconstant)

if T_A.ndim != 1: # pragma: no cover
raise ValueError(
Expand Down
65 changes: 59 additions & 6 deletions tests/naive.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,32 @@
from stumpy import core, config


def rolling_isconstant(a, w):
return np.logical_and(
core.rolling_isfinite(a, w), np.ptp(core.rolling_window(a, w), axis=-1) == 0
)
def is_ptp_zero_1d(a, w): # `a` is 1-D
n = len(a) - w + 1
out = np.empty(n)
for i in range(n):
out[i] = np.max(a[i : i + w]) - np.min(a[i : i + w])
return out == 0


def rolling_isconstant(a, w, a_subseq_isconstant=None):
# a_subseq_isconstant can be numpy.ndarray or function
if a_subseq_isconstant is None:
a_subseq_isconstant = is_ptp_zero_1d

custom_func = None
if callable(a_subseq_isconstant):
custom_func = a_subseq_isconstant

if custom_func is not None:
a_subseq_isconstant = np.logical_and(
core.rolling_isfinite(a, w),
np.apply_along_axis(
lambda a_row, w: custom_func(a_row, w), axis=-1, arr=a, w=w
),
)

return a_subseq_isconstant


def rolling_nanstd(a, w):
Expand Down Expand Up @@ -178,7 +200,16 @@ def searchsorted_right(a, v):
return len(a)


def stump(T_A, m, T_B=None, exclusion_zone=None, row_wise=False, k=1):
def stump(
T_A,
m,
T_B=None,
exclusion_zone=None,
row_wise=False,
k=1,
T_A_subseq_isconstant=None,
T_B_subseq_isconstant=None,
):
"""
Traverse distance matrix diagonally and update the top-k matrix profile and
matrix profile indices if the parameter `row_wise` is set to `False`. If the
Expand All @@ -190,13 +221,26 @@ def stump(T_A, m, T_B=None, exclusion_zone=None, row_wise=False, k=1):
[distance_profile(Q, T_A, m) for Q in core.rolling_window(T_A, m)]
)
T_B = T_A.copy()
T_B_subseq_isconstant = T_A_subseq_isconstant
else:
ignore_trivial = False
distance_matrix = np.array(
[distance_profile(Q, T_B, m) for Q in core.rolling_window(T_A, m)]
)

T_A_subseq_isconstant = rolling_isconstant(T_A, m, T_A_subseq_isconstant)
T_B_subseq_isconstant = rolling_isconstant(T_B, m, T_B_subseq_isconstant)

distance_matrix[np.isnan(distance_matrix)] = np.inf
for i in range(distance_matrix.shape[0]):
for j in range(distance_matrix.shape[1]):
if np.isfinite(distance_matrix[i, j]):
if T_A_subseq_isconstant[i] and T_B_subseq_isconstant[j]:
distance_matrix[i, j] = 0.0
elif T_A_subseq_isconstant[i] or T_B_subseq_isconstant[j]:
distance_matrix[i, j] = np.sqrt(m)
else: # pragma: no cover
pass

n_A = T_A.shape[0]
n_B = T_B.shape[0]
Expand All @@ -215,7 +259,7 @@ def stump(T_A, m, T_B=None, exclusion_zone=None, row_wise=False, k=1):

for i, D in enumerate(distance_matrix): # D: distance profile
# self-join / AB-join: matrix profile and indices
indices = np.argsort(D)[:k]
indices = np.argsort(D, kind="mergesort")[:k]
P[i, :k] = D[indices]
indices[P[i, :k] == np.inf] = -1
I[i, :k] = indices
Expand Down Expand Up @@ -2032,3 +2076,12 @@ def find_matches(D, excl_zone, max_distance, max_matches=None):
matches = [x for x in matches if x < idx - excl_zone or x > idx + excl_zone]

return np.array(result[:max_matches], dtype=object)


def isconstant_func_stddev_threshold(a, w, quantile_threshold=0):
sliding_stddev = rolling_nanstd(a, w)
if quantile_threshold == 0:
return sliding_stddev == 0
else:
stddev_threshold = np.quantile(sliding_stddev, quantile_threshold)
return sliding_stddev <= stddev_threshold
Loading

0 comments on commit c06f0e9

Please sign in to comment.