From 5ab291c2a80b75cfd6fa36a74cecb57717a2f834 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Sat, 28 Jan 2023 02:27:17 -0500 Subject: [PATCH 01/72] add T_subseq_isconstant param to naive stump --- tests/naive.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/tests/naive.py b/tests/naive.py index 222b85637..0e5acc9ab 100644 --- a/tests/naive.py +++ b/tests/naive.py @@ -178,7 +178,16 @@ def searchsorted_right(a, v): return len(a) -def stump(T_A, m, T_B=None, exclusion_zone=None, row_wise=False, k=1): +def stump( + T_A, + m, + T_B=None, + exclusion_zone=None, + row_wise=False, + k=1, + T_A_subseq_isconstant=None, + T_B_subseq_isconstant=None, +): """ Traverse distance matrix diagonally and update the top-k matrix profile and matrix profile indices if the parameter `row_wise` is set to `False`. If the @@ -196,7 +205,21 @@ def stump(T_A, m, T_B=None, exclusion_zone=None, row_wise=False, k=1): [distance_profile(Q, T_B, m) for Q in core.rolling_window(T_A, m)] ) + if T_A_subseq_isconstant is None: + T_A_subseq_isconstant = rolling_isconstant(T_A, m) + if T_B_subseq_isconstant is None: + T_B_subseq_isconstant = rolling_isconstant(T_B, m) + distance_matrix[np.isnan(distance_matrix)] = np.inf + for i in range(distance_matrix.shape[0]): + for j in range(distance_matrix.shape[1]): + if np.isfinite(distance_matrix[i, j]): + if T_A_subseq_isconstant[i] and T_B_subseq_isconstant[j]: + distance_matrix[i, j] = 0.0 + elif T_A_subseq_isconstant[i] or T_B_subseq_isconstant[j]: + distance_matrix[i, j] = np.sqrt(m) + else: + continue n_A = T_A.shape[0] n_B = T_B.shape[0] From 3c930798df48efbc5c1731547e6ee160ca3179f7 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Sat, 28 Jan 2023 10:02:46 -0500 Subject: [PATCH 02/72] add test for new param in naive, expected error --- tests/test_stump.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/test_stump.py b/tests/test_stump.py index e08746758..3f7573aa9 100644 --- a/tests/test_stump.py +++ b/tests/test_stump.py @@ -271,3 +271,22 @@ def test_stump_A_B_join_KNN(T_A, T_B): comp_mp = stump(pd.Series(T_A), m, pd.Series(T_B), ignore_trivial=False, k=k) naive.replace_inf(comp_mp) npt.assert_almost_equal(ref_mp, comp_mp) + + +@pytest.mark.parametrize("T_A, T_B", test_data) +def test_stump_self_join_subseq_isconstant(T_A, T_B): + T_A = T_B + m = 3 + l = T_A.shape[0] - m + 1 + T_A_subseq_isconstant = np.full(l, 0, dtype=bool) + + full_indices_range = np.arange(l) + for i in range(l + 1): + IDX = np.random.choice(full_indices_range, i, replace=False) + T_A_subseq_isconstant[IDX] = True + + ref_mp = naive.stump(T_A, m=m, T_A_subseq_isconstant=T_A_subseq_isconstant) + comp_mp = stump(T_A, m, T_A_subseq_isconstant=T_A_subseq_isconstant) + naive.replace_inf(ref_mp) + naive.replace_inf(comp_mp) + npt.assert_almost_equal(ref_mp, comp_mp) From 8da13b91cd448a7e9353a5cea513f7124f5cfbdb Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Sat, 28 Jan 2023 10:24:58 -0500 Subject: [PATCH 03/72] add param to a core function to increase flexibility for user --- stumpy/core.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index d8d212c19..e1c324534 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -1904,7 +1904,7 @@ def preprocess_non_normalized(T, m): return T, T_subseq_isfinite -def preprocess_diagonal(T, m): +def preprocess_diagonal(T, m, T_subseq_isconstant=None): """ Preprocess a time series that is to be used when traversing the diagonals of a distance matrix. @@ -1927,6 +1927,9 @@ def preprocess_diagonal(T, m): m : int Window size + T_subseq_isconstant : numpy.ndarray, default None + A boolean array that indicates whether a subsequence in `T` is constant (True) + Returns ------- T : numpy.ndarray @@ -1952,11 +1955,18 @@ def preprocess_diagonal(T, m): check_window_size(m, max_size=T.shape[-1]) T_subseq_isfinite = rolling_isfinite(T, m) T[~np.isfinite(T)] = np.nan - T_subseq_isconstant = rolling_isconstant(T, m) + if T_subseq_isconstant is None: + T_subseq_isconstant = rolling_isconstant(T, m) T[np.isnan(T)] = 0 M_T, Σ_T = compute_mean_std(T, m) Σ_T[T_subseq_isconstant] = 1.0 # Avoid divide by zero in next inversion step + if np.any(Σ_T == 0.0): # pragma nocover + raise ValueError( + "The sliding standard deviation of input contains 0.0 at indices" + "where T_subseq_isconstant is False. Try to set those indices to" + "True in `T_subseq_isconstant`." + ) Σ_T_inverse = 1.0 / Σ_T M_T_m_1, _ = compute_mean_std(T, m - 1) From 9d4f7809774218f03505595ff128ce4ad47f4605 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Sat, 28 Jan 2023 10:43:58 -0500 Subject: [PATCH 04/72] add new param to public API to increase flexibility for user --- stumpy/stump.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/stumpy/stump.py b/stumpy/stump.py index 361be64b8..25bb48c79 100644 --- a/stumpy/stump.py +++ b/stumpy/stump.py @@ -511,7 +511,17 @@ def _stump( @core.non_normalized(aamp) -def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1): +def stump( + T_A, + m, + T_B=None, + ignore_trivial=True, + normalize=True, + p=2.0, + k=1, + T_A_subseq_isconstant=None, + T_B_subseq_isconstant=None, +): """ Compute the z-normalized matrix profile @@ -551,6 +561,12 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1): when k > 1. If you have access to a GPU device, then you may be able to leverage `gpu_stump` for better performance and scalability. + T_A_subseq_isconstant : numpy.ndarray, default None + A boolean array that indicates whether a subsequence in `T_A` is constant (True) + + T_B_subseq_isconstant : numpy.ndarray, default None + A boolean array that indicates whether a subsequence in `T_B` is constant (True) + Returns ------- out : numpy.ndarray @@ -639,7 +655,7 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1): μ_Q_m_1, T_A_subseq_isfinite, T_A_subseq_isconstant, - ) = core.preprocess_diagonal(T_A, m) + ) = core.preprocess_diagonal(T_A, m, T_A_subseq_isconstant) ( T_B, @@ -648,7 +664,7 @@ def stump(T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1): M_T_m_1, T_B_subseq_isfinite, T_B_subseq_isconstant, - ) = core.preprocess_diagonal(T_B, m) + ) = core.preprocess_diagonal(T_B, m, T_B_subseq_isconstant) if T_A.ndim != 1: # pragma: no cover raise ValueError( From 555ae13d69d6b2245357f58c3a7c48d35396aab4 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Sat, 28 Jan 2023 11:38:37 -0500 Subject: [PATCH 05/72] fix decorator --- stumpy/stump.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/stumpy/stump.py b/stumpy/stump.py index 25bb48c79..22656d2a1 100644 --- a/stumpy/stump.py +++ b/stumpy/stump.py @@ -510,7 +510,10 @@ def _stump( ) -@core.non_normalized(aamp) +@core.non_normalized( + aamp, + exclude=["normalize", "p", "T_A_subseq_isconstant", "T_B_subseq_isconstant"], +) def stump( T_A, m, From 3b2097f82a138516c79532c42835d6bcabf76b03 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Thu, 2 Feb 2023 01:14:02 -0500 Subject: [PATCH 06/72] add custom_func for rolling_isconstant --- stumpy/core.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index e1c324534..49973a5b4 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -2221,7 +2221,7 @@ def rolling_isfinite(a, w): @njit(parallel=True, fastmath={"nsz", "arcp", "contract", "afn", "reassoc"}) def _rolling_isconstant(a, w): """ - Compute the rolling isconstant for 1-D and 2-D arrays. + Compute the rolling isconstant for 1-D array. This is accomplished by comparing the min and max within each window and assigning `True` when the min and max are equal and `False` otherwise. If @@ -2248,7 +2248,7 @@ def _rolling_isconstant(a, w): return np.where(out == 0.0, True, False) -def rolling_isconstant(a, w): +def rolling_isconstant(a, w, custom_func=None): """ Compute the rolling isconstant for 1-D and 2-D arrays. @@ -2264,14 +2264,28 @@ def rolling_isconstant(a, w): w : numpy.ndarray The rolling window size + custom_func : object, default None + A custom, user defined function that returns boolean numpy ndarray that indicate + if a subsequence is constant or not. This function takes 1-D array time series, + a window size, and keyword arguments. + Returns ------- output : numpy.ndarray Rolling window isconstant. """ + rolling_isconstant_func = _rolling_isconstant + if custom_func is not None: + custom_func_args = set(inspect.signature(custom_func).parameters.keys()) + if len(set(["a", "w"]).difference(custom_func_args)): + rolling_isconstant_func = custom_func + else: + msg = "Incompatible parameters found in `custom_func`" + warnings.warn(msg) + axis = a.ndim - 1 return np.apply_along_axis( - lambda a_row, w: _rolling_isconstant(a_row, w), axis=axis, arr=a, w=w + lambda a_row, w: rolling_isconstant_func(a_row, w), axis=axis, arr=a, w=w ) From 5be22a706c083c619f87673341b5375a2d60339a Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Thu, 2 Feb 2023 01:32:23 -0500 Subject: [PATCH 07/72] fix if block --- stumpy/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index 49973a5b4..c15de6d2f 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -2278,10 +2278,10 @@ def rolling_isconstant(a, w, custom_func=None): if custom_func is not None: custom_func_args = set(inspect.signature(custom_func).parameters.keys()) if len(set(["a", "w"]).difference(custom_func_args)): - rolling_isconstant_func = custom_func - else: msg = "Incompatible parameters found in `custom_func`" warnings.warn(msg) + else: + rolling_isconstant_func = custom_func axis = a.ndim - 1 return np.apply_along_axis( From 3bce443d91d1076a92b64369ee205d31a3a6ac25 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Sat, 4 Feb 2023 00:56:23 -0500 Subject: [PATCH 08/72] change black minimum version to resolve trailing-comma issue --- environment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index 2a16ac6da..64a227201 100644 --- a/environment.yml +++ b/environment.yml @@ -8,7 +8,7 @@ dependencies: - pandas>=0.20.0 - flake8>=3.7.7 - flake8-docstrings>=1.5.0 - - black>=22.1.0 + - black>=22.8.0 - pytest-cov>=2.10.0 - dask>=1.2.2 - distributed>=1.28.1 From 9fb636b4ae0647da675b370c9183cc2eb45fa376 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Sat, 4 Feb 2023 01:08:47 -0500 Subject: [PATCH 09/72] fix format with latest version of black --- stumpy/aamp_ostinato.py | 12 ++++++++++-- stumpy/gpu_aamp_ostinato.py | 6 +++++- stumpy/gpu_ostinato.py | 6 +++++- stumpy/ostinato.py | 12 ++++++++++-- 4 files changed, 30 insertions(+), 6 deletions(-) diff --git a/stumpy/aamp_ostinato.py b/stumpy/aamp_ostinato.py index 89a7996b6..e38a26c60 100644 --- a/stumpy/aamp_ostinato.py +++ b/stumpy/aamp_ostinato.py @@ -306,7 +306,11 @@ def aamp_ostinato(Ts, m, p=2.0): Ts, m, Ts_subseq_isfinite, p ) - (central_radius, central_Ts_idx, central_subseq_idx,) = _get_aamp_central_motif( + ( + central_radius, + central_Ts_idx, + central_subseq_idx, + ) = _get_aamp_central_motif( Ts, bsf_radius, bsf_Ts_idx, bsf_subseq_idx, m, Ts_subseq_isfinite, p ) @@ -389,7 +393,11 @@ def aamp_ostinatoed(client, Ts, m, p=2.0): mp_func=aamped, ) - (central_radius, central_Ts_idx, central_subseq_idx,) = _get_aamp_central_motif( + ( + central_radius, + central_Ts_idx, + central_subseq_idx, + ) = _get_aamp_central_motif( Ts, bsf_radius, bsf_Ts_idx, diff --git a/stumpy/gpu_aamp_ostinato.py b/stumpy/gpu_aamp_ostinato.py index 61f75d4d2..7dd5a46fa 100644 --- a/stumpy/gpu_aamp_ostinato.py +++ b/stumpy/gpu_aamp_ostinato.py @@ -83,7 +83,11 @@ def gpu_aamp_ostinato(Ts, m, device_id=0, p=2.0): mp_func=gpu_aamp, ) - (central_radius, central_Ts_idx, central_subseq_idx,) = _get_aamp_central_motif( + ( + central_radius, + central_Ts_idx, + central_subseq_idx, + ) = _get_aamp_central_motif( Ts, bsf_radius, bsf_Ts_idx, diff --git a/stumpy/gpu_ostinato.py b/stumpy/gpu_ostinato.py index 5c03f69bf..c6e554bd5 100644 --- a/stumpy/gpu_ostinato.py +++ b/stumpy/gpu_ostinato.py @@ -104,7 +104,11 @@ def gpu_ostinato(Ts, m, device_id=0, normalize=True, p=2.0): Ts, m, M_Ts, Σ_Ts, Ts_subseq_isconstant, device_id=device_id, mp_func=gpu_stump ) - (central_radius, central_Ts_idx, central_subseq_idx,) = _get_central_motif( + ( + central_radius, + central_Ts_idx, + central_subseq_idx, + ) = _get_central_motif( Ts, bsf_radius, bsf_Ts_idx, bsf_subseq_idx, m, M_Ts, Σ_Ts, Ts_subseq_isconstant ) diff --git a/stumpy/ostinato.py b/stumpy/ostinato.py index cae6f9485..3f18b3a56 100644 --- a/stumpy/ostinato.py +++ b/stumpy/ostinato.py @@ -356,7 +356,11 @@ def ostinato(Ts, m, normalize=True, p=2.0): Ts, m, M_Ts, Σ_Ts, Ts_subseq_isconstant ) - (central_radius, central_Ts_idx, central_subseq_idx,) = _get_central_motif( + ( + central_radius, + central_Ts_idx, + central_subseq_idx, + ) = _get_central_motif( Ts, bsf_radius, bsf_Ts_idx, bsf_subseq_idx, m, M_Ts, Σ_Ts, Ts_subseq_isconstant ) @@ -463,7 +467,11 @@ def ostinatoed(client, Ts, m, normalize=True, p=2.0): mp_func=stumped, ) - (central_radius, central_Ts_idx, central_subseq_idx,) = _get_central_motif( + ( + central_radius, + central_Ts_idx, + central_subseq_idx, + ) = _get_central_motif( Ts, bsf_radius, bsf_Ts_idx, bsf_subseq_idx, m, M_Ts, Σ_Ts, Ts_subseq_isconstant ) From 00a7d151cf26f1b8f4419b6d93d7d0e46cb83e71 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Sat, 4 Feb 2023 01:13:04 -0500 Subject: [PATCH 10/72] retreive setting for black minimum version --- environment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index 64a227201..2a16ac6da 100644 --- a/environment.yml +++ b/environment.yml @@ -8,7 +8,7 @@ dependencies: - pandas>=0.20.0 - flake8>=3.7.7 - flake8-docstrings>=1.5.0 - - black>=22.8.0 + - black>=22.1.0 - pytest-cov>=2.10.0 - dask>=1.2.2 - distributed>=1.28.1 From e7160afd9635f9142216144a10585adf438deecd Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Mon, 6 Feb 2023 07:22:09 -0500 Subject: [PATCH 11/72] replace array with a custom function --- stumpy/core.py | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index c15de6d2f..55a1dec33 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -1809,7 +1809,15 @@ def _preprocess(T, copy=True): return T -def preprocess(T, m, copy=True, M_T=None, Σ_T=None, T_subseq_isconstant=None): +def preprocess( + T, + m, + copy=True, + M_T=None, + Σ_T=None, + T_subseq_isconstant=None, + isconstant_custom_func=None, +): """ Creates a copy of the time series where all NaN and inf values are replaced with zero. Also computes mean and standard deviation @@ -1843,6 +1851,13 @@ def preprocess(T, m, copy=True, M_T=None, Σ_T=None, T_subseq_isconstant=None): A boolean array that indicates whether a subsequence in `T` is constant (True) + isconstant_custom_func : object, default None + A custom, user-defined function that determines if a subsequence is + constant or not. It takes two arguments, `a`, a 1-D array, and `w`, + the window size, and may have keyword arguments if needed. When `None`, + this will be default to the function `core._rolling_isconstant`. This + parameter is used when `T_subseq_isconstant` is not provied. + Returns ------- T : numpy.ndarray @@ -1860,7 +1875,7 @@ def preprocess(T, m, copy=True, M_T=None, Σ_T=None, T_subseq_isconstant=None): T[np.isinf(T)] = np.nan if T_subseq_isconstant is None: - T_subseq_isconstant = rolling_isconstant(T, m) + T_subseq_isconstant = rolling_isconstant(T, m, isconstant_custom_func) if M_T is None or Σ_T is None: M_T, Σ_T = compute_mean_std(T, m) T[np.isnan(T)] = 0 @@ -1904,7 +1919,7 @@ def preprocess_non_normalized(T, m): return T, T_subseq_isfinite -def preprocess_diagonal(T, m, T_subseq_isconstant=None): +def preprocess_diagonal(T, m, isconstant_custom_func=None): """ Preprocess a time series that is to be used when traversing the diagonals of a distance matrix. @@ -1927,8 +1942,12 @@ def preprocess_diagonal(T, m, T_subseq_isconstant=None): m : int Window size - T_subseq_isconstant : numpy.ndarray, default None - A boolean array that indicates whether a subsequence in `T` is constant (True) + isconstant_custom_func : object, default None + A custom, user-defined function that determines if a subsequence is + constant or not. It takes two arguments, `a`, a 1-D array, and `w`, + the window size, and may have keyword arguments if needed. When `None`, + this will be default to the function `core._rolling_isconstant`. This + parameter is used when `T_subseq_isconstant` is not provied. Returns ------- @@ -1955,8 +1974,7 @@ def preprocess_diagonal(T, m, T_subseq_isconstant=None): check_window_size(m, max_size=T.shape[-1]) T_subseq_isfinite = rolling_isfinite(T, m) T[~np.isfinite(T)] = np.nan - if T_subseq_isconstant is None: - T_subseq_isconstant = rolling_isconstant(T, m) + T_subseq_isconstant = rolling_isconstant(T, m, isconstant_custom_func) T[np.isnan(T)] = 0 M_T, Σ_T = compute_mean_std(T, m) @@ -2266,8 +2284,8 @@ def rolling_isconstant(a, w, custom_func=None): custom_func : object, default None A custom, user defined function that returns boolean numpy ndarray that indicate - if a subsequence is constant or not. This function takes 1-D array time series, - a window size, and keyword arguments. + if a subsequence is constant or not. This function takes `a`, a 1-D array time + series, `w`, a window size, and it may also have keyword arguments. Returns ------- From ed0c2d93564fb0837d3ac83afa38fcadaf03a337 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Mon, 6 Feb 2023 07:49:04 -0500 Subject: [PATCH 12/72] replace array with func as new param for determining constant subseqs --- stumpy/core.py | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index 55a1dec33..15f6e00aa 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -1439,6 +1439,7 @@ def _mass(Q, T, QT, μ_Q, σ_Q, M_T, Σ_T, Q_subseq_isconstant, T_subseq_isconst "T_subseq_isfinite", "p", "T_subseq_isconstant", + "isconstant_custom_func", ], replace={"M_T": "T_subseq_isfinite", "Σ_T": None}, ) @@ -1451,6 +1452,7 @@ def mass( p=2.0, T_subseq_isfinite=None, T_subseq_isconstant=None, + isconstant_custom_func=None, ): """ Compute the distance profile using the MASS algorithm @@ -1488,6 +1490,13 @@ def mass( T_subseq_isconstant : numpy.ndarray, default None A boolean array that indicates whether a subsequence in `T` is constant (True) + isconstant_custom_func : object, default None + A custom, user-defined function that determines if a subsequence is + constant or not. It takes two arguments, `a`, a 1-D array, and `w`, + the window size, and may have keyword arguments if needed. When `None`, + this will be default to the function `core._rolling_isconstant`. This + parameter is used when `T_subseq_isconstant` is not provied. + Returns ------- distance_profile : numpy.ndarray @@ -1549,11 +1558,17 @@ def mass( distance_profile[:] = np.inf else: T, M_T, Σ_T, T_subseq_isconstant = preprocess( - T, m, copy=False, M_T=M_T, Σ_T=Σ_T, T_subseq_isconstant=T_subseq_isconstant + T, + m, + copy=False, + M_T=M_T, + Σ_T=Σ_T, + T_subseq_isconstant=T_subseq_isconstant, + isconstant_custom_func=isconstant_custom_func, ) QT = sliding_dot_product(Q, T) - Q_subseq_isconstant = rolling_isconstant(Q, m)[0] + Q_subseq_isconstant = rolling_isconstant(Q, m, isconstant_custom_func)[0] μ_Q, σ_Q = [arr[0] for arr in compute_mean_std(Q, m)] distance_profile[:] = _mass( Q, T, QT, μ_Q, σ_Q, M_T, Σ_T, Q_subseq_isconstant, T_subseq_isconstant @@ -1662,14 +1677,29 @@ def mass_distance_matrix( T_subseq_isconstant : numpy.ndarray, default None A boolean array that indicates whether a subsequence in `T` is constant (True) + isconstant_custom_func : object, default None + A custom, user-defined function that determines if a subsequence is + constant or not. It takes two arguments, `a`, a 1-D array, and `w`, + the window size, and may have keyword arguments if needed. When `None`, + this will be default to the function `core._rolling_isconstant`. This + does not recompute `T_subseq_isconstant` if it is already provided. + Returns ------- None """ - Q, μ_Q, σ_Q, Q_subseq_isconstant = preprocess(Q, m) + Q, μ_Q, σ_Q, Q_subseq_isconstant = preprocess( + Q, m, isconstant_custom_func=isconstant_custom_func + ) T, M_T, Σ_T, T_subseq_isconstant = preprocess( - T, m, copy=True, M_T=M_T, Σ_T=Σ_T, T_subseq_isconstant=T_subseq_isconstant + T, + m, + copy=True, + M_T=M_T, + Σ_T=Σ_T, + T_subseq_isconstant=T_subseq_isconstant, + isconstant_custom_func=isconstant_custom_func, ) check_window_size(m, max_size=min(Q.shape[-1], T.shape[-1])) From c25fdd809d8e99f9925746660d2cdb972d49bba9 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Mon, 6 Feb 2023 08:00:52 -0500 Subject: [PATCH 13/72] revise core functions to have param isconstant_custom_func --- stumpy/core.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/stumpy/core.py b/stumpy/core.py index 15f6e00aa..4124ff867 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -1648,7 +1648,14 @@ def _mass_distance_matrix( def mass_distance_matrix( - Q, T, m, distance_matrix, M_T=None, Σ_T=None, T_subseq_isconstant=None + Q, + T, + m, + distance_matrix, + M_T=None, + Σ_T=None, + T_subseq_isconstant=None, + isconstant_custom_func=None, ): """ Compute the full distance matrix between all of the subsequences of `Q` and `T` From 890e42e5caa27c686fc8bc729af71ad9221f7744 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Mon, 6 Feb 2023 08:04:27 -0500 Subject: [PATCH 14/72] update stump and test_stump --- stumpy/stump.py | 19 +++++++++---------- tests/test_stump.py | 19 ------------------- 2 files changed, 9 insertions(+), 29 deletions(-) diff --git a/stumpy/stump.py b/stumpy/stump.py index 22656d2a1..f4942fa95 100644 --- a/stumpy/stump.py +++ b/stumpy/stump.py @@ -512,7 +512,7 @@ def _stump( @core.non_normalized( aamp, - exclude=["normalize", "p", "T_A_subseq_isconstant", "T_B_subseq_isconstant"], + exclude=["normalize", "p", "isconstant_custom_func"], ) def stump( T_A, @@ -522,8 +522,7 @@ def stump( normalize=True, p=2.0, k=1, - T_A_subseq_isconstant=None, - T_B_subseq_isconstant=None, + isconstant_custom_func=None, ): """ Compute the z-normalized matrix profile @@ -564,11 +563,11 @@ def stump( when k > 1. If you have access to a GPU device, then you may be able to leverage `gpu_stump` for better performance and scalability. - T_A_subseq_isconstant : numpy.ndarray, default None - A boolean array that indicates whether a subsequence in `T_A` is constant (True) - - T_B_subseq_isconstant : numpy.ndarray, default None - A boolean array that indicates whether a subsequence in `T_B` is constant (True) + isconstant_custom_func : object, default None + A custom, user-defined function that determines if a subsequence is + constant or not. It takes two arguments, `a`, a 1-D array, and `w`, + the window size, and may have keyword arguments if needed. When `None`, + this will be default to the function `core._rolling_isconstant`. Returns ------- @@ -658,7 +657,7 @@ def stump( μ_Q_m_1, T_A_subseq_isfinite, T_A_subseq_isconstant, - ) = core.preprocess_diagonal(T_A, m, T_A_subseq_isconstant) + ) = core.preprocess_diagonal(T_A, m, isconstant_custom_func) ( T_B, @@ -667,7 +666,7 @@ def stump( M_T_m_1, T_B_subseq_isfinite, T_B_subseq_isconstant, - ) = core.preprocess_diagonal(T_B, m, T_B_subseq_isconstant) + ) = core.preprocess_diagonal(T_B, m, isconstant_custom_func) if T_A.ndim != 1: # pragma: no cover raise ValueError( diff --git a/tests/test_stump.py b/tests/test_stump.py index 3f7573aa9..e08746758 100644 --- a/tests/test_stump.py +++ b/tests/test_stump.py @@ -271,22 +271,3 @@ def test_stump_A_B_join_KNN(T_A, T_B): comp_mp = stump(pd.Series(T_A), m, pd.Series(T_B), ignore_trivial=False, k=k) naive.replace_inf(comp_mp) npt.assert_almost_equal(ref_mp, comp_mp) - - -@pytest.mark.parametrize("T_A, T_B", test_data) -def test_stump_self_join_subseq_isconstant(T_A, T_B): - T_A = T_B - m = 3 - l = T_A.shape[0] - m + 1 - T_A_subseq_isconstant = np.full(l, 0, dtype=bool) - - full_indices_range = np.arange(l) - for i in range(l + 1): - IDX = np.random.choice(full_indices_range, i, replace=False) - T_A_subseq_isconstant[IDX] = True - - ref_mp = naive.stump(T_A, m=m, T_A_subseq_isconstant=T_A_subseq_isconstant) - comp_mp = stump(T_A, m, T_A_subseq_isconstant=T_A_subseq_isconstant) - naive.replace_inf(ref_mp) - naive.replace_inf(comp_mp) - npt.assert_almost_equal(ref_mp, comp_mp) From 807f15d98cb0615cbaaeb93776050d8ea0bd8fb1 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Mon, 6 Feb 2023 08:13:03 -0500 Subject: [PATCH 15/72] add param custom_func to naive rolling_isconstant --- tests/naive.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/tests/naive.py b/tests/naive.py index 0e5acc9ab..466a60fc9 100644 --- a/tests/naive.py +++ b/tests/naive.py @@ -6,9 +6,19 @@ from stumpy import core, config -def rolling_isconstant(a, w): +def ptp_1d(a, w): # `a` is 1-D + return np.ptp(core.rolling_window(a, w), axis=1) == 0 + + +def rolling_isconstant(a, w, custom_func=None): + if custom_func is None: + custom_func = ptp_1d + return np.logical_and( - core.rolling_isfinite(a, w), np.ptp(core.rolling_window(a, w), axis=-1) == 0 + core.rolling_isfinite(a, w), + np.apply_along_axis( + lambda a_row, w: custom_func(a_row, w), axis=-1, arr=a, w=w + ), ) From 40cd400fca96d2edfef3e60faa6bc1946abed020 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Mon, 6 Feb 2023 08:30:29 -0500 Subject: [PATCH 16/72] add an example for isconstant custom func to naive --- tests/naive.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/naive.py b/tests/naive.py index 466a60fc9..3798a23d4 100644 --- a/tests/naive.py +++ b/tests/naive.py @@ -2065,3 +2065,12 @@ def find_matches(D, excl_zone, max_distance, max_matches=None): matches = [x for x in matches if x < idx - excl_zone or x > idx + excl_zone] return np.array(result[:max_matches], dtype=object) + + +def isconstant_func_stddev_threshold(a, w, quantile_threshold=0): + sliding_stddev = rolling_nanstd(a, w) + if quantile_threshold == 0: + return sliding_stddev == 0 + else: + stddev_threshold = np.quantile(sliding_stddev, quantile_threshold) + return sliding_stddev <= stddev_threshold From 186fccb5e37b01bc56a04989a5c0b03f2f6ccbae Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Mon, 6 Feb 2023 08:32:32 -0500 Subject: [PATCH 17/72] add test function for isconstant custom func --- tests/test_core.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/test_core.py b/tests/test_core.py index d86daab55..9cba0673e 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -8,6 +8,7 @@ from unittest.mock import patch import os import math +import functools import naive @@ -1035,6 +1036,19 @@ def test_rolling_isconstant(): npt.assert_almost_equal(ref, comp) +def test_rolling_isconstant_custom_func(): + a = np.random.rand(100) + for w in range(3, 5): + for q in [0, 0.01, 0.05, 0.1]: + custom_func = functools.partial( + naive.isconstant_func_stddev_threshold, quantile_threshold=q + ) + ref = naive.rolling_isconstant(a, w, custom_func) + comp = core.rolling_isconstant(a, w, custom_func) + + npt.assert_almost_equal(ref, comp) + + def test_compare_parameters(): assert ( core._compare_parameters(core.rolling_window, core.z_norm, exclude=[]) is False From 3ac00ef177d921eb2544c0f5a2dc5b75b9d32f22 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Mon, 6 Feb 2023 08:39:46 -0500 Subject: [PATCH 18/72] add test function for isconstant custom function --- tests/naive.py | 9 +++------ tests/test_stump.py | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/tests/naive.py b/tests/naive.py index 3798a23d4..ceb3762e3 100644 --- a/tests/naive.py +++ b/tests/naive.py @@ -195,8 +195,7 @@ def stump( exclusion_zone=None, row_wise=False, k=1, - T_A_subseq_isconstant=None, - T_B_subseq_isconstant=None, + isconstant_custom_func=None, ): """ Traverse distance matrix diagonally and update the top-k matrix profile and @@ -215,10 +214,8 @@ def stump( [distance_profile(Q, T_B, m) for Q in core.rolling_window(T_A, m)] ) - if T_A_subseq_isconstant is None: - T_A_subseq_isconstant = rolling_isconstant(T_A, m) - if T_B_subseq_isconstant is None: - T_B_subseq_isconstant = rolling_isconstant(T_B, m) + T_A_subseq_isconstant = rolling_isconstant(T_A, m, isconstant_custom_func) + T_B_subseq_isconstant = rolling_isconstant(T_B, m, isconstant_custom_func) distance_matrix[np.isnan(distance_matrix)] = np.inf for i in range(distance_matrix.shape[0]): diff --git a/tests/test_stump.py b/tests/test_stump.py index e08746758..8f399b30d 100644 --- a/tests/test_stump.py +++ b/tests/test_stump.py @@ -4,6 +4,7 @@ from stumpy import stump, config import pytest import naive +import functools test_data = [ @@ -271,3 +272,21 @@ def test_stump_A_B_join_KNN(T_A, T_B): comp_mp = stump(pd.Series(T_A), m, pd.Series(T_B), ignore_trivial=False, k=k) naive.replace_inf(comp_mp) npt.assert_almost_equal(ref_mp, comp_mp) + + +@pytest.mark.parametrize("T_A, T_B", test_data) +def test_stump_self_join_custom_func(T_A, T_B): + m = 3 + zone = int(np.ceil(m / 4)) + isconstant_custom_func = functools.partial( + naive.isconstant_func_stddev_threshold, quantile_threshold=0.05 + ) + ref_mp = naive.stump( + T_B, m, exclusion_zone=zone, isconstant_custom_func=isconstant_custom_func + ) + comp_mp = stump( + T_B, m, ignore_trivial=True, isconstant_custom_func=isconstant_custom_func + ) + naive.replace_inf(ref_mp) + naive.replace_inf(comp_mp) + npt.assert_almost_equal(ref_mp, comp_mp) From 6aa504b03bc12fbd47a6e6439bfac2c0834be93a Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Mon, 6 Feb 2023 08:43:05 -0500 Subject: [PATCH 19/72] update minimum version of black --- environment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index 2a16ac6da..8e97beef5 100644 --- a/environment.yml +++ b/environment.yml @@ -8,7 +8,7 @@ dependencies: - pandas>=0.20.0 - flake8>=3.7.7 - flake8-docstrings>=1.5.0 - - black>=22.1.0 + - black>=23.1.0 - pytest-cov>=2.10.0 - dask>=1.2.2 - distributed>=1.28.1 From 35a280b3ade333834519c865c3c77fbf32a795c8 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Tue, 7 Feb 2023 21:58:37 -0500 Subject: [PATCH 20/72] fix docstrings --- stumpy/core.py | 46 +++++++++++++++++++++++----------------------- stumpy/stump.py | 6 +++--- 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index 4124ff867..0cde37bdc 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -1491,11 +1491,11 @@ def mass( A boolean array that indicates whether a subsequence in `T` is constant (True) isconstant_custom_func : object, default None - A custom, user-defined function that determines if a subsequence is - constant or not. It takes two arguments, `a`, a 1-D array, and `w`, - the window size, and may have keyword arguments if needed. When `None`, - this will be default to the function `core._rolling_isconstant`. This - parameter is used when `T_subseq_isconstant` is not provied. + A custom, user-defined function that returns boolean numpy ndarray that indicate + if a subsequence is constant or not. It takes two arguments, `a`, a 1-D array, + and `w`, the window size, and may have default arguments if needed. When `None`, + this will be default to the function `core._rolling_isconstant`. This parameter + does not recompute `T_subseq_isconstant`. Returns ------- @@ -1685,11 +1685,11 @@ def mass_distance_matrix( A boolean array that indicates whether a subsequence in `T` is constant (True) isconstant_custom_func : object, default None - A custom, user-defined function that determines if a subsequence is - constant or not. It takes two arguments, `a`, a 1-D array, and `w`, - the window size, and may have keyword arguments if needed. When `None`, - this will be default to the function `core._rolling_isconstant`. This - does not recompute `T_subseq_isconstant` if it is already provided. + A custom, user-defined function that returns boolean numpy ndarray that indicate + if a subsequence is constant or not. It takes two arguments, `a`, a 1-D array, + and `w`, the window size, and may have default arguments if needed. When `None`, + this will be default to the function `core._rolling_isconstant`. This parameter + does not recompute `T_subseq_isconstant`. Returns ------- @@ -1889,11 +1889,11 @@ def preprocess( is constant (True) isconstant_custom_func : object, default None - A custom, user-defined function that determines if a subsequence is - constant or not. It takes two arguments, `a`, a 1-D array, and `w`, - the window size, and may have keyword arguments if needed. When `None`, - this will be default to the function `core._rolling_isconstant`. This - parameter is used when `T_subseq_isconstant` is not provied. + A custom, user-defined function that returns boolean numpy ndarray that indicate + if a subsequence is constant or not. It takes two arguments, `a`, a 1-D array, + and `w`, the window size, and may have default arguments if needed. When `None`, + this will be default to the function `core._rolling_isconstant`. This parameter + does not recompute `T_subseq_isconstant`. Returns ------- @@ -1980,11 +1980,10 @@ def preprocess_diagonal(T, m, isconstant_custom_func=None): Window size isconstant_custom_func : object, default None - A custom, user-defined function that determines if a subsequence is - constant or not. It takes two arguments, `a`, a 1-D array, and `w`, - the window size, and may have keyword arguments if needed. When `None`, - this will be default to the function `core._rolling_isconstant`. This - parameter is used when `T_subseq_isconstant` is not provied. + A custom, user-defined function that returns boolean numpy ndarray that indicate + if a subsequence is constant or not. It takes two arguments, `a`, a 1-D array, + and `w`, the window size, and may have default arguments if needed. When `None`, + this will be default to the function `core._rolling_isconstant`. Returns ------- @@ -2320,9 +2319,10 @@ def rolling_isconstant(a, w, custom_func=None): The rolling window size custom_func : object, default None - A custom, user defined function that returns boolean numpy ndarray that indicate - if a subsequence is constant or not. This function takes `a`, a 1-D array time - series, `w`, a window size, and it may also have keyword arguments. + A custom, user-defined function that returns boolean numpy ndarray that indicate + if a subsequence is constant or not. It takes two arguments, `a`, a 1-D array, + and `w`, the window size, and may have default arguments if needed. When `None`, + this will be default to the function `core._rolling_isconstant`. Returns ------- diff --git a/stumpy/stump.py b/stumpy/stump.py index f4942fa95..72407bb73 100644 --- a/stumpy/stump.py +++ b/stumpy/stump.py @@ -564,9 +564,9 @@ def stump( leverage `gpu_stump` for better performance and scalability. isconstant_custom_func : object, default None - A custom, user-defined function that determines if a subsequence is - constant or not. It takes two arguments, `a`, a 1-D array, and `w`, - the window size, and may have keyword arguments if needed. When `None`, + A custom, user-defined function that returns boolean numpy ndarray that indicate + if a subsequence is constant or not. It takes two arguments, `a`, a 1-D array, + and `w`, the window size, and may have default arguments if needed. When `None`, this will be default to the function `core._rolling_isconstant`. Returns From 58c6f963cc7b16c9ecabc0fd09b4db629fca3246 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Tue, 7 Feb 2023 22:17:43 -0500 Subject: [PATCH 21/72] fix format --- stumpy/core.py | 16 ++++++++-------- stumpy/stump.py | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index 0cde37bdc..4eb58611e 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -1492,9 +1492,9 @@ def mass( isconstant_custom_func : object, default None A custom, user-defined function that returns boolean numpy ndarray that indicate - if a subsequence is constant or not. It takes two arguments, `a`, a 1-D array, + if a subsequence is constant or not. It takes two arguments, `a`, a 1-D array, and `w`, the window size, and may have default arguments if needed. When `None`, - this will be default to the function `core._rolling_isconstant`. This parameter + this will be default to the function `core._rolling_isconstant`. This parameter does not recompute `T_subseq_isconstant`. Returns @@ -1686,9 +1686,9 @@ def mass_distance_matrix( isconstant_custom_func : object, default None A custom, user-defined function that returns boolean numpy ndarray that indicate - if a subsequence is constant or not. It takes two arguments, `a`, a 1-D array, + if a subsequence is constant or not. It takes two arguments, `a`, a 1-D array, and `w`, the window size, and may have default arguments if needed. When `None`, - this will be default to the function `core._rolling_isconstant`. This parameter + this will be default to the function `core._rolling_isconstant`. This parameter does not recompute `T_subseq_isconstant`. Returns @@ -1890,9 +1890,9 @@ def preprocess( isconstant_custom_func : object, default None A custom, user-defined function that returns boolean numpy ndarray that indicate - if a subsequence is constant or not. It takes two arguments, `a`, a 1-D array, + if a subsequence is constant or not. It takes two arguments, `a`, a 1-D array, and `w`, the window size, and may have default arguments if needed. When `None`, - this will be default to the function `core._rolling_isconstant`. This parameter + this will be default to the function `core._rolling_isconstant`. This parameter does not recompute `T_subseq_isconstant`. Returns @@ -1981,7 +1981,7 @@ def preprocess_diagonal(T, m, isconstant_custom_func=None): isconstant_custom_func : object, default None A custom, user-defined function that returns boolean numpy ndarray that indicate - if a subsequence is constant or not. It takes two arguments, `a`, a 1-D array, + if a subsequence is constant or not. It takes two arguments, `a`, a 1-D array, and `w`, the window size, and may have default arguments if needed. When `None`, this will be default to the function `core._rolling_isconstant`. @@ -2320,7 +2320,7 @@ def rolling_isconstant(a, w, custom_func=None): custom_func : object, default None A custom, user-defined function that returns boolean numpy ndarray that indicate - if a subsequence is constant or not. It takes two arguments, `a`, a 1-D array, + if a subsequence is constant or not. It takes two arguments, `a`, a 1-D array, and `w`, the window size, and may have default arguments if needed. When `None`, this will be default to the function `core._rolling_isconstant`. diff --git a/stumpy/stump.py b/stumpy/stump.py index 72407bb73..c56d2ab17 100644 --- a/stumpy/stump.py +++ b/stumpy/stump.py @@ -565,7 +565,7 @@ def stump( isconstant_custom_func : object, default None A custom, user-defined function that returns boolean numpy ndarray that indicate - if a subsequence is constant or not. It takes two arguments, `a`, a 1-D array, + if a subsequence is constant or not. It takes two arguments, `a`, a 1-D array, and `w`, the window size, and may have default arguments if needed. When `None`, this will be default to the function `core._rolling_isconstant`. From 339deea4fc32d07ef0d0fbf4f70ee5722ec077cc Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Sat, 18 Feb 2023 23:08:17 -0500 Subject: [PATCH 22/72] revise a naive function and its name --- tests/naive.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/naive.py b/tests/naive.py index e9c6c0220..91ef365bd 100644 --- a/tests/naive.py +++ b/tests/naive.py @@ -6,13 +6,17 @@ from stumpy import core, config -def ptp_1d(a, w): # `a` is 1-D - return np.ptp(core.rolling_window(a, w), axis=1) == 0 +def is_ptp_zero_1d(a, w): # `a` is 1-D + n = a - w + 1 + out = np.empty(n) + for i in range(n): + out[i] = np.max(a[i : i + w]) - np.min(a[i : i + w]) + return out == 0 def rolling_isconstant(a, w, custom_func=None): if custom_func is None: - custom_func = ptp_1d + custom_func = is_ptp_zero_1d return np.logical_and( core.rolling_isfinite(a, w), From 35127d703c6c83e81f450ef27dbf82f310e46c12 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Sat, 18 Feb 2023 23:11:41 -0500 Subject: [PATCH 23/72] fix minor bug --- tests/naive.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/naive.py b/tests/naive.py index 91ef365bd..b25c0aaa1 100644 --- a/tests/naive.py +++ b/tests/naive.py @@ -7,7 +7,7 @@ def is_ptp_zero_1d(a, w): # `a` is 1-D - n = a - w + 1 + n = len(a) - w + 1 out = np.empty(n) for i in range(n): out[i] = np.max(a[i : i + w]) - np.min(a[i : i + w]) From a1024a90544fa5617fdbbd631d23b0b6013a0348 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Sun, 19 Feb 2023 02:46:52 -0500 Subject: [PATCH 24/72] allow param to accept type np.ndarray or function --- stumpy/core.py | 133 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 92 insertions(+), 41 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index 4eb58611e..b32cdd304 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -5,6 +5,7 @@ import warnings import functools import inspect +import types import numpy as np from numba import njit, cuda, prange @@ -1439,7 +1440,6 @@ def _mass(Q, T, QT, μ_Q, σ_Q, M_T, Σ_T, Q_subseq_isconstant, T_subseq_isconst "T_subseq_isfinite", "p", "T_subseq_isconstant", - "isconstant_custom_func", ], replace={"M_T": "T_subseq_isfinite", "Σ_T": None}, ) @@ -1452,7 +1452,7 @@ def mass( p=2.0, T_subseq_isfinite=None, T_subseq_isconstant=None, - isconstant_custom_func=None, + Q_subseq_isconstant=None, ): """ Compute the distance profile using the MASS algorithm @@ -1487,15 +1487,25 @@ def mass( `np.nan`/`np.inf` value (False). This parameter is ignored when `normalize=True`. - T_subseq_isconstant : numpy.ndarray, default None - A boolean array that indicates whether a subsequence in `T` is constant (True) - - isconstant_custom_func : object, default None - A custom, user-defined function that returns boolean numpy ndarray that indicate - if a subsequence is constant or not. It takes two arguments, `a`, a 1-D array, - and `w`, the window size, and may have default arguments if needed. When `None`, - this will be default to the function `core._rolling_isconstant`. This parameter - does not recompute `T_subseq_isconstant`. + T_subseq_isconstant : numpy.ndarray or function, default None + A boolean array that indicates whether a subsequence in `T` is constant + (True). Alternatively, a custom, user-defined function that returns a + boolean array that indicates whether a subsequence in `T` is constant + (True). The function must only take two arguments, `a`, a 1-D array, + and `w`, the window size, while additional arguments may be specified + by currying the user-defined function using `functools.partial`. Any + subsequence with at least one nan/inf value will be enforced to have + the corresponding value `False` in this boolean array. + + Q_subseq_isconstant : numpy.ndarray or function, default None + A boolean array that indicates whether a subsequence in `Q` is constant + (True). Alternatively, a custom, user-defined function that returns a + boolean array that indicates whether a subsequence in `Q` is constant + (True). The function must only take two arguments, `a`, a 1-D array, + and `w`, the window size, while additional arguments may be specified + by currying the user-defined function using `functools.partial`. Any + subsequence with at least one nan/inf value will be enforced to have + the corresponding value `False` in this boolean array. Returns ------- @@ -1564,14 +1574,26 @@ def mass( M_T=M_T, Σ_T=Σ_T, T_subseq_isconstant=T_subseq_isconstant, - isconstant_custom_func=isconstant_custom_func, ) QT = sliding_dot_product(Q, T) - Q_subseq_isconstant = rolling_isconstant(Q, m, isconstant_custom_func)[0] - μ_Q, σ_Q = [arr[0] for arr in compute_mean_std(Q, m)] + Q, μ_Q, σ_Q, Q_subseq_isconstant = preprocess( + Q, + m, + copy=False, + T_subseq_isconstant=Q_subseq_isconstant, + ) + distance_profile[:] = _mass( - Q, T, QT, μ_Q, σ_Q, M_T, Σ_T, Q_subseq_isconstant, T_subseq_isconstant + Q, + T, + QT, + μ_Q[0], + σ_Q[0], + M_T, + Σ_T, + Q_subseq_isconstant[0], + T_subseq_isconstant, ) return distance_profile @@ -1655,7 +1677,7 @@ def mass_distance_matrix( M_T=None, Σ_T=None, T_subseq_isconstant=None, - isconstant_custom_func=None, + Q_subseq_isconstant=None, ): """ Compute the full distance matrix between all of the subsequences of `Q` and `T` @@ -1681,32 +1703,41 @@ def mass_distance_matrix( Σ_T : numpy.ndarray, default None Sliding standard deviation of `T` - T_subseq_isconstant : numpy.ndarray, default None - A boolean array that indicates whether a subsequence in `T` is constant (True) - - isconstant_custom_func : object, default None - A custom, user-defined function that returns boolean numpy ndarray that indicate - if a subsequence is constant or not. It takes two arguments, `a`, a 1-D array, - and `w`, the window size, and may have default arguments if needed. When `None`, - this will be default to the function `core._rolling_isconstant`. This parameter - does not recompute `T_subseq_isconstant`. + T_subseq_isconstant : numpy.ndarray, function, default None + A boolean array that indicates whether a subsequence in `T` is constant + (True). Alternatively, a custom, user-defined function that returns a + boolean array that indicates whether a subsequence in `T` is constant + (True). The function must only take two arguments, `a`, a 1-D array, + and `w`, the window size, while additional arguments may be specified + by currying the user-defined function using `functools.partial`. Any + subsequence with at least one nan/inf value will be enforced to have + the corresponding value `False` in this boolean array. + + Q_subseq_isconstant : numpy.ndarray, function, default None + A boolean array that indicates whether a subsequence in `Q` is constant + (True). Alternatively, a custom, user-defined function that returns a + boolean array that indicates whether a subsequence in `Q` is constant + (True). The function must only take two arguments, `a`, a 1-D array, + and `w`, the window size, while additional arguments may be specified + by currying the user-defined function using `functools.partial`. Any + subsequence with at least one nan/inf value will be enforced to have + the corresponding value `False` in this boolean array. Returns ------- None """ Q, μ_Q, σ_Q, Q_subseq_isconstant = preprocess( - Q, m, isconstant_custom_func=isconstant_custom_func + Q, m, T_subseq_isconstant=Q_subseq_isconstant ) T, M_T, Σ_T, T_subseq_isconstant = preprocess( T, m, - copy=True, + copy=True, # TEMP comment: to be consitent with Q, remove this. M_T=M_T, Σ_T=Σ_T, T_subseq_isconstant=T_subseq_isconstant, - isconstant_custom_func=isconstant_custom_func, ) check_window_size(m, max_size=min(Q.shape[-1], T.shape[-1])) @@ -1853,7 +1884,6 @@ def preprocess( M_T=None, Σ_T=None, T_subseq_isconstant=None, - isconstant_custom_func=None, ): """ Creates a copy of the time series where all NaN and inf values @@ -1884,16 +1914,15 @@ def preprocess( Σ_T : numpy.ndarray, default None Rolling standard deviation - T_subseq_isconstant : numpy.ndarray, default None - A boolean array that indicates whether a subsequence in `T` - is constant (True) - - isconstant_custom_func : object, default None - A custom, user-defined function that returns boolean numpy ndarray that indicate - if a subsequence is constant or not. It takes two arguments, `a`, a 1-D array, - and `w`, the window size, and may have default arguments if needed. When `None`, - this will be default to the function `core._rolling_isconstant`. This parameter - does not recompute `T_subseq_isconstant`. + T_subseq_isconstant : numpy.ndarray or function, default None + A boolean array that indicates whether a subsequence in `T` is constant + (True). Alternatively, a custom, user-defined function that returns a + boolean array that indicates whether a subsequence in `T` is constant + (True). The function must only take two arguments, `a`, a 1-D array, + and `w`, the window size, while additional arguments may be specified + by currying the user-defined function using `functools.partial`. Any + subsequence with at least one nan/inf value will be enforced to have + the corresponding value `False` in this boolean array. Returns ------- @@ -1911,8 +1940,30 @@ def preprocess( check_window_size(m, max_size=T.shape[-1]) T[np.isinf(T)] = np.nan - if T_subseq_isconstant is None: - T_subseq_isconstant = rolling_isconstant(T, m, isconstant_custom_func) + + isconstant_custom_func = None + if T_subseq_isconstant is not None: + if type(T_subseq_isconstant) not in {np.ndarray, types.FunctionType}: + msg = ( + "The acceptable types for `T_subseq_isconstant`" + + " are np.ndarray or function." + ) + raise ValueError(msg) + + if isinstance(T_subseq_isconstant, types.FunctionType): + isconstant_custom_func = T_subseq_isconstant + + if T_subseq_isconstant is None or isinstance( + T_subseq_isconstant, types.FunctionType + ): + T_subseq_isconstant = rolling_isconstant( + T, m, custom_func=isconstant_custom_func + ) + + # Enforced subseqs with a non-finite value correspond to + # `False` regarding being constant. + T_subseq_isconstant[~np.isfinite(T_subseq_isconstant)] = (False,) + if M_T is None or Σ_T is None: M_T, Σ_T = compute_mean_std(T, m) T[np.isnan(T)] = 0 From 00fcccdd9c503f964ad02e17407a312924efc2a0 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Sun, 19 Feb 2023 03:44:57 -0500 Subject: [PATCH 25/72] minor fixes --- stumpy/core.py | 4 ---- stumpy/stump.py | 47 +++++++++++++++++++++++++++++++++++++---------- 2 files changed, 37 insertions(+), 14 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index b32cdd304..8b7106742 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -1960,10 +1960,6 @@ def preprocess( T, m, custom_func=isconstant_custom_func ) - # Enforced subseqs with a non-finite value correspond to - # `False` regarding being constant. - T_subseq_isconstant[~np.isfinite(T_subseq_isconstant)] = (False,) - if M_T is None or Σ_T is None: M_T, Σ_T = compute_mean_std(T, m) T[np.isnan(T)] = 0 diff --git a/stumpy/stump.py b/stumpy/stump.py index 305cfe711..f23da7eb9 100644 --- a/stumpy/stump.py +++ b/stumpy/stump.py @@ -512,7 +512,7 @@ def _stump( @core.non_normalized( aamp, - exclude=["normalize", "p", "isconstant_custom_func"], + exclude=["normalize", "p", "T_A_subseq_isconstant", "T_B_subseq_isconstant"], ) def stump( T_A, @@ -522,7 +522,8 @@ def stump( normalize=True, p=2.0, k=1, - isconstant_custom_func=None, + T_A_subseq_isconstant=None, + T_B_subseq_isconstant=None, ): """ Compute the z-normalized matrix profile @@ -563,11 +564,27 @@ def stump( when k > 1. If you have access to a GPU device, then you may be able to leverage `gpu_stump` for better performance and scalability. - isconstant_custom_func : object, default None - A custom, user-defined function that returns boolean numpy ndarray that indicate - if a subsequence is constant or not. It takes two arguments, `a`, a 1-D array, - and `w`, the window size, and may have default arguments if needed. When `None`, - this will be default to the function `core._rolling_isconstant`. + T_A_subseq_isconstant : numpy.ndarray or function, default None + A boolean array that indicates whether a subsequence in `T_A` is constant + (True). Alternatively, a custom, user-defined function that returns a + boolean array that indicates whether a subsequence in `T_A` is constant + (True). The function must only take two arguments, `a`, a 1-D array, + and `w`, the window size, while additional arguments may be specified + by currying the user-defined function using `functools.partial`. Any + subsequence with at least one nan/inf value will be enforced to have + the corresponding value `False` in this boolean array. + + T_B_subseq_isconstant : numpy.ndarray or function, default None + A boolean array that indicates whether a subsequence in `T_B` is constant + (True). Alternatively, a custom, user-defined function that returns a + boolean array that indicates whether a subsequence in `T_B` is constant + (True). The function must only take two arguments, `a`, a 1-D array, + and `w`, the window size, while additional arguments may be specified + by currying the user-defined function using `functools.partial`. Any + subsequence with at least one nan/inf value will be enforced to have + the corresponding value `False` in this boolean array. When + `T_A_subseq_isconstant` is provided by the user, `T_B_subseq_isconstant` + must be provided as well unless T_B is None. Returns ------- @@ -647,8 +664,18 @@ def stump( [0.11633857113691416, 0, 0, -1]], dtype=object) """ if T_B is None: - T_B = T_A ignore_trivial = True + T_B = T_A + else: + if T_A_subseq_isconstant is not None and T_B_subseq_isconstant is None: + msg = ( + "`T_B_subseq_isconstant` is not provided. For details, see" + + "the docstring." + ) + raise ValueError(msg) + + if T_B_subseq_isconstant is None: + T_B_subseq_isconstant = T_A_subseq_isconstant ( T_A, @@ -657,7 +684,7 @@ def stump( μ_Q_m_1, T_A_subseq_isfinite, T_A_subseq_isconstant, - ) = core.preprocess_diagonal(T_A, m, isconstant_custom_func) + ) = core.preprocess_diagonal(T_A, m, T_A_subseq_isconstant) ( T_B, @@ -666,7 +693,7 @@ def stump( M_T_m_1, T_B_subseq_isfinite, T_B_subseq_isconstant, - ) = core.preprocess_diagonal(T_B, m, isconstant_custom_func) + ) = core.preprocess_diagonal(T_B, m, T_B_subseq_isconstant) if T_A.ndim != 1: # pragma: no cover raise ValueError( From d37385df49bfbeba8e30ac8336fdcd3499847027 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Sun, 19 Feb 2023 04:50:17 -0500 Subject: [PATCH 26/72] fixed minor issues --- tests/naive.py | 22 +++++++++++++++++++--- tests/test_stump.py | 4 ++-- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/tests/naive.py b/tests/naive.py index b25c0aaa1..2c1d64b30 100644 --- a/tests/naive.py +++ b/tests/naive.py @@ -4,6 +4,7 @@ from scipy.spatial.distance import cdist from scipy.stats import norm from stumpy import core, config +import types def is_ptp_zero_1d(a, w): # `a` is 1-D @@ -199,7 +200,8 @@ def stump( exclusion_zone=None, row_wise=False, k=1, - isconstant_custom_func=None, + T_A_subseq_isconstant=None, + T_B_subseq_isconstant=None, ): """ Traverse distance matrix diagonally and update the top-k matrix profile and @@ -212,14 +214,28 @@ def stump( [distance_profile(Q, T_A, m) for Q in core.rolling_window(T_A, m)] ) T_B = T_A.copy() + T_B_subseq_isconstant = T_A_subseq_isconstant else: ignore_trivial = False distance_matrix = np.array( [distance_profile(Q, T_B, m) for Q in core.rolling_window(T_A, m)] ) + if T_A_subseq_isconstant is not None: + msg = ( + "T_B_subseq_isconstant must be provided when T_B is not None" + + " and T_A_subseq_isconstant is provided." + ) + raise ValueError(msg) + + if T_A_subseq_isconstant is None or isinstance( + T_A_subseq_isconstant, types.FunctionType + ): + T_A_subseq_isconstant = rolling_isconstant(T_A, m, T_A_subseq_isconstant) - T_A_subseq_isconstant = rolling_isconstant(T_A, m, isconstant_custom_func) - T_B_subseq_isconstant = rolling_isconstant(T_B, m, isconstant_custom_func) + if T_B_subseq_isconstant is None or isinstance( + T_B_subseq_isconstant, types.FunctionType + ): + T_B_subseq_isconstant = rolling_isconstant(T_A, m, T_B_subseq_isconstant) distance_matrix[np.isnan(distance_matrix)] = np.inf for i in range(distance_matrix.shape[0]): diff --git a/tests/test_stump.py b/tests/test_stump.py index 8f399b30d..69376632a 100644 --- a/tests/test_stump.py +++ b/tests/test_stump.py @@ -282,10 +282,10 @@ def test_stump_self_join_custom_func(T_A, T_B): naive.isconstant_func_stddev_threshold, quantile_threshold=0.05 ) ref_mp = naive.stump( - T_B, m, exclusion_zone=zone, isconstant_custom_func=isconstant_custom_func + T_B, m, exclusion_zone=zone, T_A_subseq_isconstant=isconstant_custom_func ) comp_mp = stump( - T_B, m, ignore_trivial=True, isconstant_custom_func=isconstant_custom_func + T_B, m, ignore_trivial=True, T_A_subseq_isconstant=isconstant_custom_func ) naive.replace_inf(ref_mp) naive.replace_inf(comp_mp) From 3babc1fd0cd0cb504069be750dd37276546c1419 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Sun, 19 Feb 2023 17:19:46 -0500 Subject: [PATCH 27/72] fix bugs and including type functools.partial --- stumpy/core.py | 14 ++++++++------ stumpy/stump.py | 4 +--- tests/naive.py | 18 ++++++++---------- 3 files changed, 17 insertions(+), 19 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index 8b7106742..d10733fe4 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -1943,19 +1943,21 @@ def preprocess( isconstant_custom_func = None if T_subseq_isconstant is not None: - if type(T_subseq_isconstant) not in {np.ndarray, types.FunctionType}: + if type(T_subseq_isconstant) not in { + np.ndarray, + types.FunctionType, + functools.partial, + }: msg = ( "The acceptable types for `T_subseq_isconstant`" - + " are np.ndarray or function." + + " are np.ndarray, function, or functools.partial" ) raise ValueError(msg) - if isinstance(T_subseq_isconstant, types.FunctionType): + if callable(T_subseq_isconstant): isconstant_custom_func = T_subseq_isconstant - if T_subseq_isconstant is None or isinstance( - T_subseq_isconstant, types.FunctionType - ): + if T_subseq_isconstant is None or callable(T_subseq_isconstant): T_subseq_isconstant = rolling_isconstant( T, m, custom_func=isconstant_custom_func ) diff --git a/stumpy/stump.py b/stumpy/stump.py index f23da7eb9..dcb21f14b 100644 --- a/stumpy/stump.py +++ b/stumpy/stump.py @@ -666,6 +666,7 @@ def stump( if T_B is None: ignore_trivial = True T_B = T_A + T_B_subseq_isconstant = T_A_subseq_isconstant else: if T_A_subseq_isconstant is not None and T_B_subseq_isconstant is None: msg = ( @@ -674,9 +675,6 @@ def stump( ) raise ValueError(msg) - if T_B_subseq_isconstant is None: - T_B_subseq_isconstant = T_A_subseq_isconstant - ( T_A, μ_Q, diff --git a/tests/naive.py b/tests/naive.py index 2c1d64b30..5daa84b7b 100644 --- a/tests/naive.py +++ b/tests/naive.py @@ -4,7 +4,6 @@ from scipy.spatial.distance import cdist from scipy.stats import norm from stumpy import core, config -import types def is_ptp_zero_1d(a, w): # `a` is 1-D @@ -220,28 +219,27 @@ def stump( distance_matrix = np.array( [distance_profile(Q, T_B, m) for Q in core.rolling_window(T_A, m)] ) - if T_A_subseq_isconstant is not None: + if T_A_subseq_isconstant is not None and T_B_subseq_isconstant is None: msg = ( "T_B_subseq_isconstant must be provided when T_B is not None" + " and T_A_subseq_isconstant is provided." ) raise ValueError(msg) - if T_A_subseq_isconstant is None or isinstance( - T_A_subseq_isconstant, types.FunctionType - ): + if T_A_subseq_isconstant is None or callable(T_A_subseq_isconstant): T_A_subseq_isconstant = rolling_isconstant(T_A, m, T_A_subseq_isconstant) - if T_B_subseq_isconstant is None or isinstance( - T_B_subseq_isconstant, types.FunctionType - ): - T_B_subseq_isconstant = rolling_isconstant(T_A, m, T_B_subseq_isconstant) + if T_B_subseq_isconstant is None or callable(T_B_subseq_isconstant): + T_B_subseq_isconstant = rolling_isconstant(T_B, m, T_B_subseq_isconstant) distance_matrix[np.isnan(distance_matrix)] = np.inf for i in range(distance_matrix.shape[0]): for j in range(distance_matrix.shape[1]): if np.isfinite(distance_matrix[i, j]): - if T_A_subseq_isconstant[i] and T_B_subseq_isconstant[j]: + if ( + T_A_subseq_isconstant[i] # a comment to make line longer + and T_B_subseq_isconstant[j] + ): distance_matrix[i, j] = 0.0 elif T_A_subseq_isconstant[i] or T_B_subseq_isconstant[j]: distance_matrix[i, j] = np.sqrt(m) From 69853b907cebab296eff31644492f997b955ac62 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Sun, 19 Feb 2023 18:44:29 -0500 Subject: [PATCH 28/72] fix decorator --- stumpy/core.py | 1 + 1 file changed, 1 insertion(+) diff --git a/stumpy/core.py b/stumpy/core.py index d10733fe4..ab916bb48 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -1440,6 +1440,7 @@ def _mass(Q, T, QT, μ_Q, σ_Q, M_T, Σ_T, Q_subseq_isconstant, T_subseq_isconst "T_subseq_isfinite", "p", "T_subseq_isconstant", + "Q_subseq_isconstant", ], replace={"M_T": "T_subseq_isfinite", "Σ_T": None}, ) From d4c956f52949817668e1ba0a26120f6be36fcfd2 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Sun, 19 Feb 2023 22:00:38 -0500 Subject: [PATCH 29/72] update function preprocess_diagonal --- stumpy/core.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index ab916bb48..d315f8af1 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -2006,7 +2006,7 @@ def preprocess_non_normalized(T, m): return T, T_subseq_isfinite -def preprocess_diagonal(T, m, isconstant_custom_func=None): +def preprocess_diagonal(T, m, T_subseq_isconstant=None): """ Preprocess a time series that is to be used when traversing the diagonals of a distance matrix. @@ -2029,11 +2029,15 @@ def preprocess_diagonal(T, m, isconstant_custom_func=None): m : int Window size - isconstant_custom_func : object, default None - A custom, user-defined function that returns boolean numpy ndarray that indicate - if a subsequence is constant or not. It takes two arguments, `a`, a 1-D array, - and `w`, the window size, and may have default arguments if needed. When `None`, - this will be default to the function `core._rolling_isconstant`. + T_subseq_isconstant : numpy.ndarray or function, default None + A boolean array that indicates whether a subsequence in `T` is constant + (True). Alternatively, a custom, user-defined function that returns a + boolean array that indicates whether a subsequence in `T` is constant + (True). The function must only take two arguments, `a`, a 1-D array, + and `w`, the window size, while additional arguments may be specified + by currying the user-defined function using `functools.partial`. Any + subsequence with at least one nan/inf value will be enforced to have + the corresponding value `False` in this boolean array. Returns ------- From b7b5b5c88afd2028c0f27f02968074a4559b75d0 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Sun, 19 Feb 2023 22:18:53 -0500 Subject: [PATCH 30/72] update function preprocess_diagonal --- stumpy/core.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/stumpy/core.py b/stumpy/core.py index d315f8af1..1b90daf89 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -2064,7 +2064,10 @@ def preprocess_diagonal(T, m, T_subseq_isconstant=None): check_window_size(m, max_size=T.shape[-1]) T_subseq_isfinite = rolling_isfinite(T, m) T[~np.isfinite(T)] = np.nan - T_subseq_isconstant = rolling_isconstant(T, m, isconstant_custom_func) + if T_subseq_isconstant is None or callable(T_subseq_isconstant): + T_subseq_isconstant = rolling_isconstant(T, m, T_subseq_isconstant) + else: + T_subseq_isconstant = np.logical_and(T_subseq_isconstant, T_subseq_isfinite) T[np.isnan(T)] = 0 M_T, Σ_T = compute_mean_std(T, m) From d76f4f681aa2d863351cf94ccbdc4201418c1973 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Sun, 19 Feb 2023 23:19:35 -0500 Subject: [PATCH 31/72] remove if block --- stumpy/stump.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/stumpy/stump.py b/stumpy/stump.py index dcb21f14b..d6f7333a9 100644 --- a/stumpy/stump.py +++ b/stumpy/stump.py @@ -667,14 +667,7 @@ def stump( ignore_trivial = True T_B = T_A T_B_subseq_isconstant = T_A_subseq_isconstant - else: - if T_A_subseq_isconstant is not None and T_B_subseq_isconstant is None: - msg = ( - "`T_B_subseq_isconstant` is not provided. For details, see" - + "the docstring." - ) - raise ValueError(msg) - + ( T_A, μ_Q, From 9beeaca436a30052d37099c49fa57647b2f44126 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Sun, 19 Feb 2023 23:23:26 -0500 Subject: [PATCH 32/72] fix format --- stumpy/stump.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stumpy/stump.py b/stumpy/stump.py index d6f7333a9..bffc8ba4e 100644 --- a/stumpy/stump.py +++ b/stumpy/stump.py @@ -667,7 +667,7 @@ def stump( ignore_trivial = True T_B = T_A T_B_subseq_isconstant = T_A_subseq_isconstant - + ( T_A, μ_Q, From 0550185d03194d45171655b00efd6783da79852e Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Sun, 19 Feb 2023 23:35:44 -0500 Subject: [PATCH 33/72] update naive by removing unnecessary if block --- tests/naive.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/naive.py b/tests/naive.py index 5daa84b7b..9c2f0db8b 100644 --- a/tests/naive.py +++ b/tests/naive.py @@ -219,12 +219,6 @@ def stump( distance_matrix = np.array( [distance_profile(Q, T_B, m) for Q in core.rolling_window(T_A, m)] ) - if T_A_subseq_isconstant is not None and T_B_subseq_isconstant is None: - msg = ( - "T_B_subseq_isconstant must be provided when T_B is not None" - + " and T_A_subseq_isconstant is provided." - ) - raise ValueError(msg) if T_A_subseq_isconstant is None or callable(T_A_subseq_isconstant): T_A_subseq_isconstant = rolling_isconstant(T_A, m, T_A_subseq_isconstant) From 958b8442bbf8a7e5c488f635a385845a1d6ec05d Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Mon, 20 Feb 2023 00:18:57 -0500 Subject: [PATCH 34/72] update docstring --- stumpy/stump.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/stumpy/stump.py b/stumpy/stump.py index bffc8ba4e..9fcdb2d93 100644 --- a/stumpy/stump.py +++ b/stumpy/stump.py @@ -582,9 +582,7 @@ def stump( and `w`, the window size, while additional arguments may be specified by currying the user-defined function using `functools.partial`. Any subsequence with at least one nan/inf value will be enforced to have - the corresponding value `False` in this boolean array. When - `T_A_subseq_isconstant` is provided by the user, `T_B_subseq_isconstant` - must be provided as well unless T_B is None. + the corresponding value `False` in this boolean array. Returns ------- From 87faef8c99db8b9d44d81b4f1b62d99688cf9355 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Mon, 20 Feb 2023 01:50:50 -0500 Subject: [PATCH 35/72] add pragma nocover --- stumpy/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stumpy/core.py b/stumpy/core.py index 1b90daf89..d7f4fba5a 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -2389,7 +2389,7 @@ def rolling_isconstant(a, w, custom_func=None): rolling_isconstant_func = _rolling_isconstant if custom_func is not None: custom_func_args = set(inspect.signature(custom_func).parameters.keys()) - if len(set(["a", "w"]).difference(custom_func_args)): + if len(set(["a", "w"]).difference(custom_func_args)): # pragma: no cover msg = "Incompatible parameters found in `custom_func`" warnings.warn(msg) else: From bf71871bdcb0dd43e61c505384007f38831a2f47 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Mon, 20 Feb 2023 02:04:59 -0500 Subject: [PATCH 36/72] increase coverage by enhancing test function --- tests/test_stump.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tests/test_stump.py b/tests/test_stump.py index 69376632a..6b37352d3 100644 --- a/tests/test_stump.py +++ b/tests/test_stump.py @@ -6,7 +6,6 @@ import naive import functools - test_data = [ ( np.array([9, 8100, -60, 7], dtype=np.float64), @@ -275,7 +274,7 @@ def test_stump_A_B_join_KNN(T_A, T_B): @pytest.mark.parametrize("T_A, T_B", test_data) -def test_stump_self_join_custom_func(T_A, T_B): +def test_stump_self_join_custom_func_(T_A, T_B): m = 3 zone = int(np.ceil(m / 4)) isconstant_custom_func = functools.partial( @@ -290,3 +289,12 @@ def test_stump_self_join_custom_func(T_A, T_B): naive.replace_inf(ref_mp) naive.replace_inf(comp_mp) npt.assert_almost_equal(ref_mp, comp_mp) + + # testing when `subseq_isconstant` being passed as boolean array + T_B_subseq_isconstant = naive.rolling_isconstant(T_B, m, isconstant_custom_func) + ref_mp = naive.stump( + T_B, m, exclusion_zone=zone, T_A_subseq_isconstant=T_B_subseq_isconstant + ) + comp_mp = stump( + T_B, m, ignore_trivial=True, T_A_subseq_isconstant=T_B_subseq_isconstant + ) From 1181c371fb0567261707f8028e2d30ef6d313e81 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Mon, 20 Feb 2023 02:18:11 -0500 Subject: [PATCH 37/72] undo changes to black minimum version --- environment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index 8e97beef5..2a16ac6da 100644 --- a/environment.yml +++ b/environment.yml @@ -8,7 +8,7 @@ dependencies: - pandas>=0.20.0 - flake8>=3.7.7 - flake8-docstrings>=1.5.0 - - black>=23.1.0 + - black>=22.1.0 - pytest-cov>=2.10.0 - dask>=1.2.2 - distributed>=1.28.1 From 29e484becb5fd77692b07880b85a0a0f5a0badce Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Mon, 20 Feb 2023 02:19:25 -0500 Subject: [PATCH 38/72] remove unnecessary comment --- tests/naive.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/naive.py b/tests/naive.py index 9c2f0db8b..4787302dd 100644 --- a/tests/naive.py +++ b/tests/naive.py @@ -230,10 +230,7 @@ def stump( for i in range(distance_matrix.shape[0]): for j in range(distance_matrix.shape[1]): if np.isfinite(distance_matrix[i, j]): - if ( - T_A_subseq_isconstant[i] # a comment to make line longer - and T_B_subseq_isconstant[j] - ): + if T_A_subseq_isconstant[i] and T_B_subseq_isconstant[j]: distance_matrix[i, j] = 0.0 elif T_A_subseq_isconstant[i] or T_B_subseq_isconstant[j]: distance_matrix[i, j] = np.sqrt(m) From f33b3ee6598b3f168e103d52dedbef383d7dea4e Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Thu, 23 Feb 2023 21:58:14 -0500 Subject: [PATCH 39/72] revise docstrings --- stumpy/core.py | 24 ++++++++++++------------ stumpy/stump.py | 8 ++++---- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index d7f4fba5a..7bef03698 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -1495,8 +1495,8 @@ def mass( (True). The function must only take two arguments, `a`, a 1-D array, and `w`, the window size, while additional arguments may be specified by currying the user-defined function using `functools.partial`. Any - subsequence with at least one nan/inf value will be enforced to have - the corresponding value `False` in this boolean array. + subsequence with at least one np.nan/np.inf will automatically have its + corresponding value set to False in this boolean array. Q_subseq_isconstant : numpy.ndarray or function, default None A boolean array that indicates whether a subsequence in `Q` is constant @@ -1505,8 +1505,8 @@ def mass( (True). The function must only take two arguments, `a`, a 1-D array, and `w`, the window size, while additional arguments may be specified by currying the user-defined function using `functools.partial`. Any - subsequence with at least one nan/inf value will be enforced to have - the corresponding value `False` in this boolean array. + subsequence with at least one np.nan/np.inf will automatically have its + corresponding value set to False in this boolean array. Returns ------- @@ -1711,8 +1711,8 @@ def mass_distance_matrix( (True). The function must only take two arguments, `a`, a 1-D array, and `w`, the window size, while additional arguments may be specified by currying the user-defined function using `functools.partial`. Any - subsequence with at least one nan/inf value will be enforced to have - the corresponding value `False` in this boolean array. + subsequence with at least one np.nan/np.inf will automatically have its + corresponding value set to False in this boolean array. Q_subseq_isconstant : numpy.ndarray, function, default None A boolean array that indicates whether a subsequence in `Q` is constant @@ -1721,8 +1721,8 @@ def mass_distance_matrix( (True). The function must only take two arguments, `a`, a 1-D array, and `w`, the window size, while additional arguments may be specified by currying the user-defined function using `functools.partial`. Any - subsequence with at least one nan/inf value will be enforced to have - the corresponding value `False` in this boolean array. + subsequence with at least one np.nan/np.inf will automatically have its + corresponding value set to False in this boolean array. Returns ------- @@ -1922,8 +1922,8 @@ def preprocess( (True). The function must only take two arguments, `a`, a 1-D array, and `w`, the window size, while additional arguments may be specified by currying the user-defined function using `functools.partial`. Any - subsequence with at least one nan/inf value will be enforced to have - the corresponding value `False` in this boolean array. + subsequence with at least one np.nan/np.inf will automatically have its + corresponding value set to False in this boolean array. Returns ------- @@ -2036,8 +2036,8 @@ def preprocess_diagonal(T, m, T_subseq_isconstant=None): (True). The function must only take two arguments, `a`, a 1-D array, and `w`, the window size, while additional arguments may be specified by currying the user-defined function using `functools.partial`. Any - subsequence with at least one nan/inf value will be enforced to have - the corresponding value `False` in this boolean array. + subsequence with at least one np.nan/np.inf will automatically have its + corresponding value set to False in this boolean array. Returns ------- diff --git a/stumpy/stump.py b/stumpy/stump.py index 9fcdb2d93..9af5e7296 100644 --- a/stumpy/stump.py +++ b/stumpy/stump.py @@ -571,8 +571,8 @@ def stump( (True). The function must only take two arguments, `a`, a 1-D array, and `w`, the window size, while additional arguments may be specified by currying the user-defined function using `functools.partial`. Any - subsequence with at least one nan/inf value will be enforced to have - the corresponding value `False` in this boolean array. + subsequence with at least one np.nan/np.inf will automatically have its + corresponding value set to False in this boolean array. T_B_subseq_isconstant : numpy.ndarray or function, default None A boolean array that indicates whether a subsequence in `T_B` is constant @@ -581,8 +581,8 @@ def stump( (True). The function must only take two arguments, `a`, a 1-D array, and `w`, the window size, while additional arguments may be specified by currying the user-defined function using `functools.partial`. Any - subsequence with at least one nan/inf value will be enforced to have - the corresponding value `False` in this boolean array. + subsequence with at least one np.nan/np.inf will automatically have its + corresponding value set to False in this boolean array. Returns ------- From 7586dba14f3cbfab864e898b4b8f02e243ce37af Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Thu, 23 Feb 2023 22:00:23 -0500 Subject: [PATCH 40/72] improve redability --- stumpy/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index 7bef03698..9602c2a33 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -1729,13 +1729,13 @@ def mass_distance_matrix( None """ Q, μ_Q, σ_Q, Q_subseq_isconstant = preprocess( - Q, m, T_subseq_isconstant=Q_subseq_isconstant + T=Q, m=m, copy=True, T_subseq_isconstant=Q_subseq_isconstant ) T, M_T, Σ_T, T_subseq_isconstant = preprocess( T, m, - copy=True, # TEMP comment: to be consitent with Q, remove this. + copy=True, M_T=M_T, Σ_T=Σ_T, T_subseq_isconstant=T_subseq_isconstant, From 207dda8b534cc4f078fe8a6b727b6f7399558abb Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Sat, 25 Feb 2023 01:54:17 -0500 Subject: [PATCH 41/72] move if checks to the function rolling_isconstant --- stumpy/core.py | 87 +++++++++++++++++++++++++++----------------------- 1 file changed, 47 insertions(+), 40 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index 9602c2a33..f800d6f2a 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -5,7 +5,7 @@ import warnings import functools import inspect -import types +from inspect import Parameter import numpy as np from numba import njit, cuda, prange @@ -1942,26 +1942,7 @@ def preprocess( T[np.isinf(T)] = np.nan - isconstant_custom_func = None - if T_subseq_isconstant is not None: - if type(T_subseq_isconstant) not in { - np.ndarray, - types.FunctionType, - functools.partial, - }: - msg = ( - "The acceptable types for `T_subseq_isconstant`" - + " are np.ndarray, function, or functools.partial" - ) - raise ValueError(msg) - - if callable(T_subseq_isconstant): - isconstant_custom_func = T_subseq_isconstant - - if T_subseq_isconstant is None or callable(T_subseq_isconstant): - T_subseq_isconstant = rolling_isconstant( - T, m, custom_func=isconstant_custom_func - ) + T_subseq_isconstant = rolling_isconstant(T, m, custom=T_subseq_isconstant) if M_T is None or Σ_T is None: M_T, Σ_T = compute_mean_std(T, m) @@ -2064,10 +2045,7 @@ def preprocess_diagonal(T, m, T_subseq_isconstant=None): check_window_size(m, max_size=T.shape[-1]) T_subseq_isfinite = rolling_isfinite(T, m) T[~np.isfinite(T)] = np.nan - if T_subseq_isconstant is None or callable(T_subseq_isconstant): - T_subseq_isconstant = rolling_isconstant(T, m, T_subseq_isconstant) - else: - T_subseq_isconstant = np.logical_and(T_subseq_isconstant, T_subseq_isfinite) + T_subseq_isconstant = rolling_isconstant(T, m, custom=T_subseq_isconstant) T[np.isnan(T)] = 0 M_T, Σ_T = compute_mean_std(T, m) @@ -2359,7 +2337,7 @@ def _rolling_isconstant(a, w): return np.where(out == 0.0, True, False) -def rolling_isconstant(a, w, custom_func=None): +def rolling_isconstant(a, w, custom=None): """ Compute the rolling isconstant for 1-D and 2-D arrays. @@ -2375,7 +2353,7 @@ def rolling_isconstant(a, w, custom_func=None): w : numpy.ndarray The rolling window size - custom_func : object, default None + custom : np.ndarray or function, default None A custom, user-defined function that returns boolean numpy ndarray that indicate if a subsequence is constant or not. It takes two arguments, `a`, a 1-D array, and `w`, the window size, and may have default arguments if needed. When `None`, @@ -2384,21 +2362,50 @@ def rolling_isconstant(a, w, custom_func=None): Returns ------- output : numpy.ndarray - Rolling window isconstant. + Rolling window isconstant """ - rolling_isconstant_func = _rolling_isconstant - if custom_func is not None: - custom_func_args = set(inspect.signature(custom_func).parameters.keys()) - if len(set(["a", "w"]).difference(custom_func_args)): # pragma: no cover - msg = "Incompatible parameters found in `custom_func`" - warnings.warn(msg) - else: - rolling_isconstant_func = custom_func - axis = a.ndim - 1 - return np.apply_along_axis( - lambda a_row, w: rolling_isconstant_func(a_row, w), axis=axis, arr=a, w=w - ) + if custom is None: + custom = _rolling_isconstant + + if not (isinstance(custom, np.ndarray) or callable(custom)): + msg = ( + "The `custom` must be of type `np.ndarray` or a callable object. " + + f"Found {type(custom)} instead." + ) + raise ValueError(msg) + + if isinstance(custom, np.ndarray): + if not issubclass(custom.dtype.type, np.bool_): + msg = ( + f"the dtype of `custom` is {custom.dtype}" + + " but dtype `np.bool` was expected" + ) + raise ValueError(msg) + + out = custom + + else: + custom_args = [] + for arg_name, arg in inspect.signature(custom).parameters.items(): + if arg.default == Parameter.empty: + custom_args.append(arg_name) + + incomp_args = set(custom_args).difference({"a", "w"}) + if len(incomp_args) > 0: + msg = ( + f"Incompatible arguments {incomp_args} found in `custom_func`. " + + "Please provide a `custom_func` with arguments `a`, a 1-D array, " + + "and `w`, the window size." + ) + raise ValueError(msg) + + axis = a.ndim - 1 + out = np.apply_along_axis( + lambda a_row, w: custom(a_row, w), axis=axis, arr=a, w=w + ) + + return out def _get_partial_mp_func(mp_func, client=None, device_id=None): From f85d0728232601361f831e78071ea08f378b572b Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Sat, 25 Feb 2023 02:02:08 -0500 Subject: [PATCH 42/72] fix black format --- stumpy/core.py | 1 - 1 file changed, 1 deletion(-) diff --git a/stumpy/core.py b/stumpy/core.py index f800d6f2a..530ce32aa 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -2364,7 +2364,6 @@ def rolling_isconstant(a, w, custom=None): output : numpy.ndarray Rolling window isconstant """ - if custom is None: custom = _rolling_isconstant From 4f96f43ef53c2ce1326c0e0e1ae4a9382aeeab75 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Sat, 25 Feb 2023 20:47:14 -0500 Subject: [PATCH 43/72] revise docstring --- stumpy/core.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index 530ce32aa..d7a34521d 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -2354,10 +2354,13 @@ def rolling_isconstant(a, w, custom=None): The rolling window size custom : np.ndarray or function, default None - A custom, user-defined function that returns boolean numpy ndarray that indicate - if a subsequence is constant or not. It takes two arguments, `a`, a 1-D array, - and `w`, the window size, and may have default arguments if needed. When `None`, - this will be default to the function `core._rolling_isconstant`. + A boolean array that indicates whether a subsequence in `T` is constant + (True). Alternatively, a custom, user-defined function that returns a + boolean array that indicates whether a subsequence in `T` is constant + (True). The function must only take two arguments, `a`, a 1-D array, + and `w`, the window size, while additional arguments may be specified + by currying the user-defined function using `functools.partial`. When + None, this defaults to `_rolling_isconstant`. Returns ------- From 3cb8919256ecda91bd3dcb66eb0442ac0a1ae926 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Sat, 25 Feb 2023 20:53:14 -0500 Subject: [PATCH 44/72] reivse if block structure to improve readability --- stumpy/core.py | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index d7a34521d..f6ba653b1 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -2359,7 +2359,7 @@ def rolling_isconstant(a, w, custom=None): boolean array that indicates whether a subsequence in `T` is constant (True). The function must only take two arguments, `a`, a 1-D array, and `w`, the window size, while additional arguments may be specified - by currying the user-defined function using `functools.partial`. When + by currying the user-defined function using `functools.partial`. When None, this defaults to `_rolling_isconstant`. Returns @@ -2370,24 +2370,7 @@ def rolling_isconstant(a, w, custom=None): if custom is None: custom = _rolling_isconstant - if not (isinstance(custom, np.ndarray) or callable(custom)): - msg = ( - "The `custom` must be of type `np.ndarray` or a callable object. " - + f"Found {type(custom)} instead." - ) - raise ValueError(msg) - - if isinstance(custom, np.ndarray): - if not issubclass(custom.dtype.type, np.bool_): - msg = ( - f"the dtype of `custom` is {custom.dtype}" - + " but dtype `np.bool` was expected" - ) - raise ValueError(msg) - - out = custom - - else: + if callable(custom): custom_args = [] for arg_name, arg in inspect.signature(custom).parameters.items(): if arg.default == Parameter.empty: @@ -2407,6 +2390,24 @@ def rolling_isconstant(a, w, custom=None): lambda a_row, w: custom(a_row, w), axis=axis, arr=a, w=w ) + elif isinstance(custom, np.ndarray): + if not issubclass(custom.dtype.type, np.bool_): + msg = ( + f"the dtype of `custom` is {custom.dtype}" + + " but dtype `np.bool` was expected" + ) + raise ValueError(msg) + + out = custom + + else: + if not (isinstance(custom, np.ndarray) or callable(custom)): + msg = ( + "The `custom` must be of type `np.ndarray` or a callable object. " + + f"Found {type(custom)} instead." + ) + raise ValueError(msg) + return out From d2fdc5e9a762a8f2151ade5e990dbd69c5776ffe Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Sat, 25 Feb 2023 21:13:07 -0500 Subject: [PATCH 45/72] add function to consider isfinite in computing isconstant --- stumpy/core.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/stumpy/core.py b/stumpy/core.py index f6ba653b1..895e33472 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -2411,6 +2411,24 @@ def rolling_isconstant(a, w, custom=None): return out +def fix_isconstant_isfinite_conflicts( + T, m, T_subseq_isconstant, T_subseq_isfinite=None +): + if T_subseq_isfinite is None: + T_subseq_isfinite = rolling_isfinite(T, m) + + fixed = np.logical_and(T_subseq_isconstant, T_subseq_isfinite) + msg = ( + "Found indices where T_subseq_isconstant is True for subsequence " + + "with at least one np.nan/np.inf. Their corresponding value in " + + "T_subseq_isconstant is changed to False. The indices are: \n " + + f"{np.where(fixed != T_subseq_isconstant)}" + ) + warnings.warn(msg) + + return fixed + + def _get_partial_mp_func(mp_func, client=None, device_id=None): """ A convenience function for creating a `functools.partial` matrix profile function From abd63e1d96dfdd8d540eadfe4e6834db5b91dbbd Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Sat, 25 Feb 2023 21:28:55 -0500 Subject: [PATCH 46/72] add warning when isconstant=True for non-finite subseq --- stumpy/core.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index 895e33472..d7c8267bd 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -2418,13 +2418,16 @@ def fix_isconstant_isfinite_conflicts( T_subseq_isfinite = rolling_isfinite(T, m) fixed = np.logical_and(T_subseq_isconstant, T_subseq_isfinite) - msg = ( - "Found indices where T_subseq_isconstant is True for subsequence " - + "with at least one np.nan/np.inf. Their corresponding value in " - + "T_subseq_isconstant is changed to False. The indices are: \n " - + f"{np.where(fixed != T_subseq_isconstant)}" - ) - warnings.warn(msg) + + conflicts = fixed != T_subseq_isconstant + if np.any(conflicts): + msg = ( + "Found indices where T_subseq_isconstant is True for subsequence " + + "with at least one np.nan/np.inf. Their corresponding value in " + + "T_subseq_isconstant is changed to False. The indices are: \n " + + f"{np.nonzero(conflicts)}" + ) + warnings.warn(msg) return fixed From b3100f5e939818410fd9a3b718d78525908e56b0 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Sat, 25 Feb 2023 21:34:01 -0500 Subject: [PATCH 47/72] minor changes --- stumpy/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index d7c8267bd..96aa8e330 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -2424,8 +2424,8 @@ def fix_isconstant_isfinite_conflicts( msg = ( "Found indices where T_subseq_isconstant is True for subsequence " + "with at least one np.nan/np.inf. Their corresponding value in " - + "T_subseq_isconstant is changed to False. The indices are: \n " - + f"{np.nonzero(conflicts)}" + + "T_subseq_isconstant is changed to False. The affected indices " + + f" are: \n {np.nonzero(conflicts)}" ) warnings.warn(msg) From 689f09629c0a6f5e3c297b38762525c14260ad07 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Sat, 25 Feb 2023 21:57:21 -0500 Subject: [PATCH 48/72] add docstring and add function to preprocess --- stumpy/core.py | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/stumpy/core.py b/stumpy/core.py index 96aa8e330..6b84d751f 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -1943,7 +1943,7 @@ def preprocess( T[np.isinf(T)] = np.nan T_subseq_isconstant = rolling_isconstant(T, m, custom=T_subseq_isconstant) - + T_subseq_isconstant = fix_isconstant_isfinite_conflicts(T, m, T_subseq_isconstant) if M_T is None or Σ_T is None: M_T, Σ_T = compute_mean_std(T, m) T[np.isnan(T)] = 0 @@ -2046,6 +2046,9 @@ def preprocess_diagonal(T, m, T_subseq_isconstant=None): T_subseq_isfinite = rolling_isfinite(T, m) T[~np.isfinite(T)] = np.nan T_subseq_isconstant = rolling_isconstant(T, m, custom=T_subseq_isconstant) + T_subseq_isconstant = fix_isconstant_isfinite_conflicts( + T, m, T_subseq_isconstant, T_subseq_isfinite + ) T[np.isnan(T)] = 0 M_T, Σ_T = compute_mean_std(T, m) @@ -2414,6 +2417,32 @@ def rolling_isconstant(a, w, custom=None): def fix_isconstant_isfinite_conflicts( T, m, T_subseq_isconstant, T_subseq_isfinite=None ): + """ + Fix `T_subseq_isconstant` by setting its element to False if their + corresponding value in `T_subseq_isfinite` is False. + + Parameters + ---------- + T : numpy.ndarray + Time series + + m : int + Subsequence window size + + T_subseq_isconstant : numpy.ndarray + A numpy array `dtype` of boolean that indicates whether a subsequence + is constant (True) or not (False). + + T_subseq_isfinite : numpy.ndarray, default None + A boolean array that indicates whether a subsequence in `T` contains a + `np.nan`/`np.inf` value (False) + + Returns + ------- + fixed : numpy.ndarray + The same as input `T_subseq_isconstant` but with indices set to False + if their corresponding subsequence are not finite. + """ if T_subseq_isfinite is None: T_subseq_isfinite = rolling_isfinite(T, m) From f400e9504773f5d8e9cb7405e440910aa042b169 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Sat, 25 Feb 2023 23:05:18 -0500 Subject: [PATCH 49/72] add test function for fix_isconstant --- tests/test_core.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/test_core.py b/tests/test_core.py index 4ac2e1722..618eef4dc 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1527,3 +1527,22 @@ def test_gpu_searchsorted(): def test_client_to_func(): with pytest.raises(NotImplementedError): core._client_to_func(core) + + +def test_fix_isconstant_isfinite_conflicts(): + T = np.full(12, 0.0, dtype=np.float64) + nan_indices = [1, 5, 9] + for idx in nan_indices: + T[idx] = np.nan + + m = 3 + + n = len(T) - m + 1 + T_subseq_isconstant = np.full(n, 1, dtype=bool) + + ref = T_subseq_isconstant.copy() + ref[~core.rolling_isfinite(T, m)] = False + + comp = core.fix_isconstant_isfinite_conflicts(T, m, T_subseq_isconstant) + + npt.assert_almost_equal(ref, comp) From bbf3b666276819b9025452e233ae3974cd972cc1 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Sat, 25 Feb 2023 23:14:16 -0500 Subject: [PATCH 50/72] remove error when subseq_isconstant does not cover all stddev=0 cases --- stumpy/core.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index 6b84d751f..57ce770eb 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -2053,12 +2053,6 @@ def preprocess_diagonal(T, m, T_subseq_isconstant=None): M_T, Σ_T = compute_mean_std(T, m) Σ_T[T_subseq_isconstant] = 1.0 # Avoid divide by zero in next inversion step - if np.any(Σ_T == 0.0): # pragma nocover - raise ValueError( - "The sliding standard deviation of input contains 0.0 at indices" - "where T_subseq_isconstant is False. Try to set those indices to" - "True in `T_subseq_isconstant`." - ) Σ_T_inverse = 1.0 / Σ_T M_T_m_1, _ = compute_mean_std(T, m - 1) From 9e86b27210c62dcee738604cb8d21f95489fe117 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Sun, 26 Feb 2023 00:09:48 -0500 Subject: [PATCH 51/72] remove unnecessary if condition --- stumpy/core.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index 57ce770eb..ba0de554a 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -2398,12 +2398,11 @@ def rolling_isconstant(a, w, custom=None): out = custom else: - if not (isinstance(custom, np.ndarray) or callable(custom)): - msg = ( - "The `custom` must be of type `np.ndarray` or a callable object. " - + f"Found {type(custom)} instead." - ) - raise ValueError(msg) + msg = ( + "The `custom` must be of type `np.ndarray` or a callable object. " + + f"Found {type(custom)} instead." + ) + raise ValueError(msg) return out From 791d65d00dd039a13b4ffb335cf1f3c328debef5 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Sun, 26 Feb 2023 01:18:52 -0500 Subject: [PATCH 52/72] minor changes --- stumpy/core.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index ba0de554a..5921f41e2 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -2361,7 +2361,7 @@ def rolling_isconstant(a, w, custom=None): Returns ------- - output : numpy.ndarray + T_subseq_isconstant : numpy.ndarray Rolling window isconstant """ if custom is None: @@ -2383,19 +2383,12 @@ def rolling_isconstant(a, w, custom=None): raise ValueError(msg) axis = a.ndim - 1 - out = np.apply_along_axis( + T_subseq_isconstant = np.apply_along_axis( lambda a_row, w: custom(a_row, w), axis=axis, arr=a, w=w ) elif isinstance(custom, np.ndarray): - if not issubclass(custom.dtype.type, np.bool_): - msg = ( - f"the dtype of `custom` is {custom.dtype}" - + " but dtype `np.bool` was expected" - ) - raise ValueError(msg) - - out = custom + T_subseq_isconstant = custom else: msg = ( @@ -2404,7 +2397,14 @@ def rolling_isconstant(a, w, custom=None): ) raise ValueError(msg) - return out + if not issubclass(T_subseq_isconstant.dtype.type, np.bool_): + msg = ( + f"The dtype of `T_subseq_isconstant` is {T_subseq_isconstant.dtype} " + + "but dtype `np.bool` was expected" + ) + raise ValueError(msg) + + return T_subseq_isconstant def fix_isconstant_isfinite_conflicts( From 34aafe8c1970ec9bb96e1158dd2c16859c4f75d6 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Sun, 26 Feb 2023 02:22:07 -0500 Subject: [PATCH 53/72] Add pragma nocover comments --- stumpy/core.py | 6 +++--- tests/naive.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index 5921f41e2..ed95c23f0 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -2374,7 +2374,7 @@ def rolling_isconstant(a, w, custom=None): custom_args.append(arg_name) incomp_args = set(custom_args).difference({"a", "w"}) - if len(incomp_args) > 0: + if len(incomp_args) > 0: # pragma: no cover msg = ( f"Incompatible arguments {incomp_args} found in `custom_func`. " + "Please provide a `custom_func` with arguments `a`, a 1-D array, " @@ -2390,14 +2390,14 @@ def rolling_isconstant(a, w, custom=None): elif isinstance(custom, np.ndarray): T_subseq_isconstant = custom - else: + else: # pragma: no cover msg = ( "The `custom` must be of type `np.ndarray` or a callable object. " + f"Found {type(custom)} instead." ) raise ValueError(msg) - if not issubclass(T_subseq_isconstant.dtype.type, np.bool_): + if not issubclass(T_subseq_isconstant.dtype.type, np.bool_): # pragma: no cover msg = ( f"The dtype of `T_subseq_isconstant` is {T_subseq_isconstant.dtype} " + "but dtype `np.bool` was expected" diff --git a/tests/naive.py b/tests/naive.py index 4787302dd..cf6b4ed4f 100644 --- a/tests/naive.py +++ b/tests/naive.py @@ -235,7 +235,7 @@ def stump( elif T_A_subseq_isconstant[i] or T_B_subseq_isconstant[j]: distance_matrix[i, j] = np.sqrt(m) else: - continue + pass n_A = T_A.shape[0] n_B = T_B.shape[0] From c327a6ab98a08be358ed56be35f0ee6b37ac428d Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Sun, 26 Feb 2023 02:52:08 -0500 Subject: [PATCH 54/72] reduce number of imports --- stumpy/core.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index ed95c23f0..ff1093102 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -5,7 +5,6 @@ import warnings import functools import inspect -from inspect import Parameter import numpy as np from numba import njit, cuda, prange @@ -2370,7 +2369,7 @@ def rolling_isconstant(a, w, custom=None): if callable(custom): custom_args = [] for arg_name, arg in inspect.signature(custom).parameters.items(): - if arg.default == Parameter.empty: + if arg.default == inspect.Parameter.empty: custom_args.append(arg_name) incomp_args = set(custom_args).difference({"a", "w"}) From 8b54ed8b79a0185408690f68d1d8fae2a2f47a78 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Wed, 1 Mar 2023 03:07:18 -0500 Subject: [PATCH 55/72] re-design function rolling_isconstant --- stumpy/core.py | 54 ++++++++++++++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index ff1093102..2b29a1632 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -1941,7 +1941,7 @@ def preprocess( T[np.isinf(T)] = np.nan - T_subseq_isconstant = rolling_isconstant(T, m, custom=T_subseq_isconstant) + T_subseq_isconstant = rolling_isconstant(T, m, T_subseq_isconstant) T_subseq_isconstant = fix_isconstant_isfinite_conflicts(T, m, T_subseq_isconstant) if M_T is None or Σ_T is None: M_T, Σ_T = compute_mean_std(T, m) @@ -2044,7 +2044,7 @@ def preprocess_diagonal(T, m, T_subseq_isconstant=None): check_window_size(m, max_size=T.shape[-1]) T_subseq_isfinite = rolling_isfinite(T, m) T[~np.isfinite(T)] = np.nan - T_subseq_isconstant = rolling_isconstant(T, m, custom=T_subseq_isconstant) + T_subseq_isconstant = rolling_isconstant(T, m, T_subseq_isconstant) T_subseq_isconstant = fix_isconstant_isfinite_conflicts( T, m, T_subseq_isconstant, T_subseq_isfinite ) @@ -2333,7 +2333,7 @@ def _rolling_isconstant(a, w): return np.where(out == 0.0, True, False) -def rolling_isconstant(a, w, custom=None): +def rolling_isconstant(a, w, T_subseq_isconstant=None): """ Compute the rolling isconstant for 1-D and 2-D arrays. @@ -2349,7 +2349,7 @@ def rolling_isconstant(a, w, custom=None): w : numpy.ndarray The rolling window size - custom : np.ndarray or function, default None + T_subseq_isconstant : np.ndarray or function, default None A boolean array that indicates whether a subsequence in `T` is constant (True). Alternatively, a custom, user-defined function that returns a boolean array that indicates whether a subsequence in `T` is constant @@ -2363,42 +2363,48 @@ def rolling_isconstant(a, w, custom=None): T_subseq_isconstant : numpy.ndarray Rolling window isconstant """ - if custom is None: - custom = _rolling_isconstant + if T_subseq_isconstant is None: + T_subseq_isconstant = _rolling_isconstant - if callable(custom): - custom_args = [] - for arg_name, arg in inspect.signature(custom).parameters.items(): + isconstant_func = None + if callable(T_subseq_isconstant): + non_default_args = [] + for arg_name, arg in inspect.signature(T_subseq_isconstant).parameters.items(): + # inspect.signature(functools.partial(f)) returns all arguments + # including the ones with default values. the following if block + # is to find non-default arguments. if arg.default == inspect.Parameter.empty: - custom_args.append(arg_name) + non_default_args.append(arg_name) - incomp_args = set(custom_args).difference({"a", "w"}) + incomp_args = set(non_default_args).difference({"a", "w"}) if len(incomp_args) > 0: # pragma: no cover msg = ( - f"Incompatible arguments {incomp_args} found in `custom_func`. " - + "Please provide a `custom_func` with arguments `a`, a 1-D array, " - + "and `w`, the window size." + f"Incompatible arguments {incomp_args} found in `T_subseq_isconstant`. " + + "Please provide the custom function `T_subseq_isconstant` with " + + "arguments `a`, a 1-D array, and `w`, the window size." ) raise ValueError(msg) - axis = a.ndim - 1 - T_subseq_isconstant = np.apply_along_axis( - lambda a_row, w: custom(a_row, w), axis=axis, arr=a, w=w - ) - - elif isinstance(custom, np.ndarray): - T_subseq_isconstant = custom + isconstant_func = T_subseq_isconstant + elif isinstance(T_subseq_isconstant, np.ndarray): + isconstant_func = None else: # pragma: no cover msg = ( - "The `custom` must be of type `np.ndarray` or a callable object. " - + f"Found {type(custom)} instead." + "`T_subseq_isconstant` must be of type `np.ndarray` or a callable " + + f"function. Found {type(T_subseq_isconstant)} instead." ) raise ValueError(msg) + if isconstant_func is not None: + axis = a.ndim - 1 + T_subseq_isconstant = np.apply_along_axis( + lambda a_row, w: isconstant_func(a_row, w), axis=axis, arr=a, w=w + ) + if not issubclass(T_subseq_isconstant.dtype.type, np.bool_): # pragma: no cover msg = ( - f"The dtype of `T_subseq_isconstant` is {T_subseq_isconstant.dtype} " + f"The output dtype of `T_subseq_isconstant` is {T_subseq_isconstant.dtype} " + "but dtype `np.bool` was expected" ) raise ValueError(msg) From 9c4e2f58e11a65a0fd956bdf72437750666d7747 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Wed, 1 Mar 2023 03:11:15 -0500 Subject: [PATCH 56/72] improve readability of comment --- stumpy/core.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index 2b29a1632..556aacc76 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -2449,10 +2449,10 @@ def fix_isconstant_isfinite_conflicts( conflicts = fixed != T_subseq_isconstant if np.any(conflicts): msg = ( - "Found indices where T_subseq_isconstant is True for subsequence " - + "with at least one np.nan/np.inf. Their corresponding value in " - + "T_subseq_isconstant is changed to False. The affected indices " - + f" are: \n {np.nonzero(conflicts)}" + f"Subsequences located at indices {np.nonzero(conflicts)} contain one " + + "or more np.nan/np.inf and so their corresponding values in " + + "`T_subseq_isconstant` have been automatically switched from True " + + " to False." ) warnings.warn(msg) From 66f42d29cf8544b313bcfe41d9784721a894c5cd Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Sat, 4 Mar 2023 20:00:58 -0500 Subject: [PATCH 57/72] added new test function --- tests/test_stump.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/tests/test_stump.py b/tests/test_stump.py index 6b37352d3..9b52086a3 100644 --- a/tests/test_stump.py +++ b/tests/test_stump.py @@ -274,7 +274,27 @@ def test_stump_A_B_join_KNN(T_A, T_B): @pytest.mark.parametrize("T_A, T_B", test_data) -def test_stump_self_join_custom_func_(T_A, T_B): +def test_stump_self_join_custom_isconstant_as_arr(T_A, T_B): + m = 3 + zone = int(np.ceil(m / 4)) + + subseq_isconstant = np.random.choice( + [True, False], size=len(T_B) - m + 1, replace=True + ) + + ref_mp = naive.stump( + T_A=T_B, m=m, exclusion_zone=zone, T_A_subseq_isconstant=subseq_isconstant + ) + comp_mp = stump( + T_A=T_B, m=m, ignore_trivial=True, T_A_subseq_isconstant=subseq_isconstant + ) + naive.replace_inf(ref_mp) + naive.replace_inf(comp_mp) + npt.assert_almost_equal(ref_mp, comp_mp) + + +@pytest.mark.parametrize("T_A, T_B", test_data) +def test_stump_self_join_custom_isconstant_as_func(T_A, T_B): m = 3 zone = int(np.ceil(m / 4)) isconstant_custom_func = functools.partial( From acbc499ec2609af4552cb774620f290347b6e107 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Sat, 4 Mar 2023 20:46:11 -0500 Subject: [PATCH 58/72] wrap a function around func signature check and add unit test --- stumpy/core.py | 33 +++++++++++++++++++++++++++++++++ tests/test_core.py | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) diff --git a/stumpy/core.py b/stumpy/core.py index 556aacc76..5d3cdae5a 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -3518,3 +3518,36 @@ def _client_to_func(client): func = getattr(module, prefix + calling_func) return func + + +def is_signature_compatible(func, required_args): + """ + For a given `func` and `requried_args`, return True if the non-default + arguments in `func` is a subset of `required_args` + + Parameters + ---------- + func : object, callable + A callable object + + required_args : set + A set of strings, containing the name of required arguments + + Returns + ------- + out : bool + True if the non-default arguments in `func` is a subset of + required arguments + """ + if not isinstance(required_args, list): + required_args = list(required_args) + + non_default_args = [] + for arg_name, arg in inspect.signature(func).parameters.items(): + # inspect.signature(functools.partial(func)) returns all arguments + # including the ones with default values. the following if block + # is to find non-default arguments. + if arg.default == inspect.Parameter.empty: + non_default_args.append(arg_name) + + return set(non_default_args).issubset(set(required_args)) diff --git a/tests/test_core.py b/tests/test_core.py index 618eef4dc..637c1c470 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1546,3 +1546,39 @@ def test_fix_isconstant_isfinite_conflicts(): comp = core.fix_isconstant_isfinite_conflicts(T, m, T_subseq_isconstant) npt.assert_almost_equal(ref, comp) + + +def test_is_signature_compatible(): + required_args = ("x", "y") + + def func_case1(x, y): + pass + + assert core.is_signature_compatible(func_case1, required_args) + + def func_case2(x, y=None): + pass + + assert core.is_signature_compatible(func_case2, required_args) + + def func_case3(x=None, y=None): + pass + + assert core.is_signature_compatible(func_case3, required_args) + + def func_case4(x, y, z=None): + pass + + assert core.is_signature_compatible(func_case4, required_args) + + def func_case5(x, y, z): + pass + + assert core.is_signature_compatible( + functools.partial(func_case5, z=None), required_args + ) + + def func_case6(x, y, z): + pass + + assert not core.is_signature_compatible(func_case6, required_args) From 7e7b6048922b79911b8330208756fd0c10d9f32e Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Sat, 4 Mar 2023 23:02:33 -0500 Subject: [PATCH 59/72] change output of function and update functions and tests accordingly --- stumpy/core.py | 28 ++++++++++------------------ tests/test_core.py | 45 ++++++++++++++++++++++++++------------------- 2 files changed, 36 insertions(+), 37 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index 5d3cdae5a..867bf51b5 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -2368,15 +2368,7 @@ def rolling_isconstant(a, w, T_subseq_isconstant=None): isconstant_func = None if callable(T_subseq_isconstant): - non_default_args = [] - for arg_name, arg in inspect.signature(T_subseq_isconstant).parameters.items(): - # inspect.signature(functools.partial(f)) returns all arguments - # including the ones with default values. the following if block - # is to find non-default arguments. - if arg.default == inspect.Parameter.empty: - non_default_args.append(arg_name) - - incomp_args = set(non_default_args).difference({"a", "w"}) + incomp_args = find_incompatible_args(T_subseq_isconstant, ["a", "w"]) if len(incomp_args) > 0: # pragma: no cover msg = ( f"Incompatible arguments {incomp_args} found in `T_subseq_isconstant`. " @@ -3520,24 +3512,24 @@ def _client_to_func(client): return func -def is_signature_compatible(func, required_args): +def find_incompatible_args(func, required_args): """ - For a given `func` and `requried_args`, return True if the non-default - arguments in `func` is a subset of `required_args` + For a given `func` and `requried_args`, return non-default + arguments in `func` that are not in `required_args` Parameters ---------- func : object, callable A callable object - required_args : set - A set of strings, containing the name of required arguments + required_args : list + A lis containing the name of required arguments. Returns ------- - out : bool - True if the non-default arguments in `func` is a subset of - required arguments + out : set + A set of non-default arguments in `func` which are not in + required_args """ if not isinstance(required_args, list): required_args = list(required_args) @@ -3550,4 +3542,4 @@ def is_signature_compatible(func, required_args): if arg.default == inspect.Parameter.empty: non_default_args.append(arg_name) - return set(non_default_args).issubset(set(required_args)) + return set(non_default_args).difference(set(required_args)) diff --git a/tests/test_core.py b/tests/test_core.py index 637c1c470..06bb1492b 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1548,37 +1548,44 @@ def test_fix_isconstant_isfinite_conflicts(): npt.assert_almost_equal(ref, comp) -def test_is_signature_compatible(): - required_args = ("x", "y") - +def test_find_incompatible_args(): + # case1: having exact required argument def func_case1(x, y): - pass + return - assert core.is_signature_compatible(func_case1, required_args) + assert core.find_incompatible_args(func_case1, required_args=("x", "y")) == set() + # case2: one argument has default value. def func_case2(x, y=None): - pass + return - assert core.is_signature_compatible(func_case2, required_args) + assert core.find_incompatible_args(func_case2, required_args=("x", "y")) == set() + # case3: both argument has default values. def func_case3(x=None, y=None): - pass + return - assert core.is_signature_compatible(func_case3, required_args) + assert core.find_incompatible_args(func_case3, required_args=("x", "y")) == set() - def func_case4(x, y, z=None): - pass + # case4: having one extra argument `z` + def func_case4(x, y, z): + return - assert core.is_signature_compatible(func_case4, required_args) + assert core.find_incompatible_args(func_case4, required_args=("x", "y")) == {"z"} - def func_case5(x, y, z): - pass + # case5: having one extra argument `z`, but with default + def func_case5(x, y, z=None): + return - assert core.is_signature_compatible( - functools.partial(func_case5, z=None), required_args - ) + assert core.find_incompatible_args(func_case5, required_args=("x", "y")) == set() + # case6: one extra argument `z`, and using functools.partial def func_case6(x, y, z): - pass + return - assert not core.is_signature_compatible(func_case6, required_args) + assert ( + core.find_incompatible_args( + functools.partial(func_case6, z=None), required_args=("x", "y") + ) + == set() + ) From f0968d4d47510e9688826b3fbf3bcf4e4f1dc04b Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Sat, 4 Mar 2023 23:19:07 -0500 Subject: [PATCH 60/72] remove unnecessary test function --- tests/test_stump.py | 34 ++++++++-------------------------- 1 file changed, 8 insertions(+), 26 deletions(-) diff --git a/tests/test_stump.py b/tests/test_stump.py index 9b52086a3..a10854859 100644 --- a/tests/test_stump.py +++ b/tests/test_stump.py @@ -274,32 +274,23 @@ def test_stump_A_B_join_KNN(T_A, T_B): @pytest.mark.parametrize("T_A, T_B", test_data) -def test_stump_self_join_custom_isconstant_as_arr(T_A, T_B): +def test_stump_self_join_custom_isconstant(T_A, T_B): m = 3 zone = int(np.ceil(m / 4)) - - subseq_isconstant = np.random.choice( - [True, False], size=len(T_B) - m + 1, replace=True + isconstant_custom_func = functools.partial( + naive.isconstant_func_stddev_threshold, quantile_threshold=0.05 ) + # case 1: custom isconstant is a boolean array + T_B_subseq_isconstant = naive.rolling_isconstant(T_B, m, isconstant_custom_func) ref_mp = naive.stump( - T_A=T_B, m=m, exclusion_zone=zone, T_A_subseq_isconstant=subseq_isconstant + T_B, m, exclusion_zone=zone, T_A_subseq_isconstant=T_B_subseq_isconstant ) comp_mp = stump( - T_A=T_B, m=m, ignore_trivial=True, T_A_subseq_isconstant=subseq_isconstant + T_B, m, ignore_trivial=True, T_A_subseq_isconstant=T_B_subseq_isconstant ) - naive.replace_inf(ref_mp) - naive.replace_inf(comp_mp) - npt.assert_almost_equal(ref_mp, comp_mp) - -@pytest.mark.parametrize("T_A, T_B", test_data) -def test_stump_self_join_custom_isconstant_as_func(T_A, T_B): - m = 3 - zone = int(np.ceil(m / 4)) - isconstant_custom_func = functools.partial( - naive.isconstant_func_stddev_threshold, quantile_threshold=0.05 - ) + # case 2: custom isconstant is func ref_mp = naive.stump( T_B, m, exclusion_zone=zone, T_A_subseq_isconstant=isconstant_custom_func ) @@ -309,12 +300,3 @@ def test_stump_self_join_custom_isconstant_as_func(T_A, T_B): naive.replace_inf(ref_mp) naive.replace_inf(comp_mp) npt.assert_almost_equal(ref_mp, comp_mp) - - # testing when `subseq_isconstant` being passed as boolean array - T_B_subseq_isconstant = naive.rolling_isconstant(T_B, m, isconstant_custom_func) - ref_mp = naive.stump( - T_B, m, exclusion_zone=zone, T_A_subseq_isconstant=T_B_subseq_isconstant - ) - comp_mp = stump( - T_B, m, ignore_trivial=True, T_A_subseq_isconstant=T_B_subseq_isconstant - ) From 747c0e681b73b835c08a34348c095c07dc55ff61 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Sun, 5 Mar 2023 00:50:48 -0500 Subject: [PATCH 61/72] add param isconstant to stumped and update unit test --- stumpy/stumped.py | 38 ++++++++++++++++++++++++++--- tests/test_stump.py | 9 +++++-- tests/test_stumped.py | 57 +++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 97 insertions(+), 7 deletions(-) diff --git a/stumpy/stumped.py b/stumpy/stumped.py index 5dfec2169..c40468866 100644 --- a/stumpy/stumped.py +++ b/stumpy/stumped.py @@ -206,7 +206,18 @@ def _dask_stumped( @core.non_normalized(aamped) -def stumped(client, T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0, k=1): +def stumped( + client, + T_A, + m, + T_B=None, + ignore_trivial=True, + normalize=True, + p=2.0, + k=1, + T_A_subseq_isconstant=None, + T_B_subseq_isconstant=None, +): """ Compute the z-normalized matrix profile with a distributed dask/ray cluster @@ -251,6 +262,26 @@ def stumped(client, T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0 when k > 1. If you have access to a GPU device, then you may be able to leverage `gpu_stump` for better performance and scalability. + T_A_subseq_isconstant : numpy.ndarray or function, default None + A boolean array that indicates whether a subsequence in `T_A` is constant + (True). Alternatively, a custom, user-defined function that returns a + boolean array that indicates whether a subsequence in `T_A` is constant + (True). The function must only take two arguments, `a`, a 1-D array, + and `w`, the window size, while additional arguments may be specified + by currying the user-defined function using `functools.partial`. Any + subsequence with at least one np.nan/np.inf will automatically have its + corresponding value set to False in this boolean array. + + T_B_subseq_isconstant : numpy.ndarray or function, default None + A boolean array that indicates whether a subsequence in `T_B` is constant + (True). Alternatively, a custom, user-defined function that returns a + boolean array that indicates whether a subsequence in `T_B` is constant + (True). The function must only take two arguments, `a`, a 1-D array, + and `w`, the window size, while additional arguments may be specified + by currying the user-defined function using `functools.partial`. Any + subsequence with at least one np.nan/np.inf will automatically have its + corresponding value set to False in this boolean array. + Returns ------- out : numpy.ndarray @@ -340,6 +371,7 @@ def stumped(client, T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0 if T_B is None: T_B = T_A ignore_trivial = True + T_B_subseq_isconstant = T_A_subseq_isconstant ( T_A, @@ -348,7 +380,7 @@ def stumped(client, T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0 μ_Q_m_1, T_A_subseq_isfinite, T_A_subseq_isconstant, - ) = core.preprocess_diagonal(T_A, m) + ) = core.preprocess_diagonal(T_A, m, T_A_subseq_isconstant) ( T_B, @@ -357,7 +389,7 @@ def stumped(client, T_A, m, T_B=None, ignore_trivial=True, normalize=True, p=2.0 M_T_m_1, T_B_subseq_isfinite, T_B_subseq_isconstant, - ) = core.preprocess_diagonal(T_B, m) + ) = core.preprocess_diagonal(T_B, m, T_B_subseq_isconstant) if T_A.ndim != 1: # pragma: no cover raise ValueError( diff --git a/tests/test_stump.py b/tests/test_stump.py index a10854859..a7987da20 100644 --- a/tests/test_stump.py +++ b/tests/test_stump.py @@ -1,10 +1,12 @@ import numpy as np import numpy.testing as npt import pandas as pd -from stumpy import stump, config +import functools import pytest + +from stumpy import stump, config import naive -import functools + test_data = [ ( @@ -289,6 +291,9 @@ def test_stump_self_join_custom_isconstant(T_A, T_B): comp_mp = stump( T_B, m, ignore_trivial=True, T_A_subseq_isconstant=T_B_subseq_isconstant ) + naive.replace_inf(ref_mp) + naive.replace_inf(comp_mp) + npt.assert_almost_equal(ref_mp, comp_mp) # case 2: custom isconstant is func ref_mp = naive.stump( diff --git a/tests/test_stumped.py b/tests/test_stumped.py index 9181d81c8..066dabc99 100644 --- a/tests/test_stumped.py +++ b/tests/test_stumped.py @@ -1,9 +1,11 @@ import numpy as np import numpy.testing as npt import pandas as pd -from stumpy import config, stumped -from dask.distributed import Client, LocalCluster +import functools import pytest +from dask.distributed import Client, LocalCluster + +from stumpy import config, stumped import naive @@ -641,3 +643,54 @@ def test_stumped_A_B_join_KNN(T_A, T_B, dask_cluster): naive.replace_inf(ref_mp) naive.replace_inf(comp_mp) npt.assert_almost_equal(ref_mp, comp_mp) + + +@pytest.mark.filterwarnings("ignore:numpy.dtype size changed") +@pytest.mark.filterwarnings("ignore:numpy.ufunc size changed") +@pytest.mark.filterwarnings("ignore:numpy.ndarray size changed") +@pytest.mark.filterwarnings("ignore:\\s+Port 8787 is already in use:UserWarning") +@pytest.mark.parametrize("T_A, T_B", test_data) +def test_stumped_self_join_custom_isconstant(T_A, T_B, dask_cluster): + m = 3 + zone = int(np.ceil(m / 4)) + isconstant_custom_func = functools.partial( + naive.isconstant_func_stddev_threshold, quantile_threshold=0.05 + ) + + with Client(dask_cluster) as dask_client: + # case 1: custom isconstant is a boolean array + T_B_subseq_isconstant = naive.rolling_isconstant(T_B, m, isconstant_custom_func) + ref_mp = naive.stump( + T_A=T_B, + m=m, + exclusion_zone=zone, + T_A_subseq_isconstant=T_B_subseq_isconstant, + ) + comp_mp = stumped( + dask_client, + T_A=T_B, + m=m, + ignore_trivial=True, + T_A_subseq_isconstant=T_B_subseq_isconstant, + ) + naive.replace_inf(ref_mp) + naive.replace_inf(comp_mp) + npt.assert_almost_equal(ref_mp, comp_mp) + + # case 2: custom isconstant is func + ref_mp = naive.stump( + T_A=T_B, + m=m, + exclusion_zone=zone, + T_A_subseq_isconstant=isconstant_custom_func, + ) + comp_mp = stumped( + dask_client, + T_A=T_B, + m=m, + ignore_trivial=True, + T_A_subseq_isconstant=isconstant_custom_func, + ) + naive.replace_inf(ref_mp) + naive.replace_inf(comp_mp) + npt.assert_almost_equal(ref_mp, comp_mp) From bd2dc53c1d34429790b25555168b476e6fe1bbaf Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Sun, 5 Mar 2023 04:24:20 -0500 Subject: [PATCH 62/72] fixed non_normalized decorator --- stumpy/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stumpy/core.py b/stumpy/core.py index 867bf51b5..1bef9cb5d 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -121,7 +121,7 @@ def norm_func(Q, T, A_norm=None, other_norm=None, normalize=True, p=2.0): The desired z-normalized/non-normalized function (or class) """ if exclude is None: - exclude = ["normalize", "p"] + exclude = ["normalize", "p", "T_A_subseq_isconstant", "T_B_subseq_isconstant"] @functools.wraps(non_norm) def outer_wrapper(norm): From 0dc10a0720985d04ce81c5c9c290c7c63618ecff Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Sun, 5 Mar 2023 14:16:53 -0500 Subject: [PATCH 63/72] Add param subseq_isconstant to gpu_stump and add its test function --- stumpy/gpu_stump.py | 37 ++++++++++++++++++++++++--- tests/test_gpu_stump.py | 55 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 4 deletions(-) diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py index 073b75c14..661d954dd 100644 --- a/stumpy/gpu_stump.py +++ b/stumpy/gpu_stump.py @@ -4,7 +4,6 @@ import math import multiprocessing as mp import os - import numpy as np from numba import cuda @@ -477,7 +476,16 @@ def _gpu_stump( @core.non_normalized(gpu_aamp) def gpu_stump( - T_A, m, T_B=None, ignore_trivial=True, device_id=0, normalize=True, p=2.0, k=1 + T_A, + m, + T_B=None, + ignore_trivial=True, + device_id=0, + normalize=True, + p=2.0, + k=1, + T_A_subseq_isconstant=None, + T_B_subseq_isconstant=None, ): """ Compute the z-normalized matrix profile with one or more GPU devices @@ -525,6 +533,26 @@ def gpu_stump( Note that this will increase the total computational time and memory usage when k > 1. + T_A_subseq_isconstant : numpy.ndarray or function, default None + A boolean array that indicates whether a subsequence in `T_A` is constant + (True). Alternatively, a custom, user-defined function that returns a + boolean array that indicates whether a subsequence in `T_A` is constant + (True). The function must only take two arguments, `a`, a 1-D array, + and `w`, the window size, while additional arguments may be specified + by currying the user-defined function using `functools.partial`. Any + subsequence with at least one np.nan/np.inf will automatically have its + corresponding value set to False in this boolean array. + + T_B_subseq_isconstant : numpy.ndarray or function, default None + A boolean array that indicates whether a subsequence in `T_B` is constant + (True). Alternatively, a custom, user-defined function that returns a + boolean array that indicates whether a subsequence in `T_B` is constant + (True). The function must only take two arguments, `a`, a 1-D array, + and `w`, the window size, while additional arguments may be specified + by currying the user-defined function using `functools.partial`. Any + subsequence with at least one np.nan/np.inf will automatically have its + corresponding value set to False in this boolean array. + Returns ------- out : numpy.ndarray @@ -592,9 +620,10 @@ def gpu_stump( if T_B is None: # Self join! T_B = T_A ignore_trivial = True + T_B_subseq_isconstant = T_A_subseq_isconstant - T_A, M_T, Σ_T, T_subseq_isconstant = core.preprocess(T_A, m) - T_B, μ_Q, σ_Q, Q_subseq_isconstant = core.preprocess(T_B, m) + T_A, M_T, Σ_T, T_subseq_isconstant = core.preprocess(T_A, m, T_A_subseq_isconstant) + T_B, μ_Q, σ_Q, Q_subseq_isconstant = core.preprocess(T_B, m, T_B_subseq_isconstant) if T_A.ndim != 1: # pragma: no cover raise ValueError( diff --git a/tests/test_gpu_stump.py b/tests/test_gpu_stump.py index c7517929d..c2eca608e 100644 --- a/tests/test_gpu_stump.py +++ b/tests/test_gpu_stump.py @@ -5,6 +5,7 @@ from stumpy import config from numba import cuda from unittest.mock import patch +import functools try: from numba.errors import NumbaPerformanceWarning @@ -396,3 +397,57 @@ def test_gpu_stump_A_B_join_KNN(T_A, T_B): naive.replace_inf(ref_mp) naive.replace_inf(comp_mp) npt.assert_almost_equal(ref_mp, comp_mp) + + +@pytest.mark.filterwarnings("ignore", category=NumbaPerformanceWarning) +@pytest.mark.parametrize("T_A, T_B", test_data) +@patch("stumpy.config.STUMPY_THREADS_PER_BLOCK", TEST_THREADS_PER_BLOCK) +def test_gpu_stump_self_join_custom_isconstant(T_A, T_B): + m = 3 + zone = int(np.ceil(m / 4)) + isconstant_custom_func = functools.partial( + naive.isconstant_func_stddev_threshold, quantile_threshold=0.05 + ) + + # case 1: custom isconstant is a boolean array + T_B_subseq_isconstant = naive.rolling_isconstant(T_B, m, isconstant_custom_func) + for k in range(2, 4): + ref_mp = naive.stump( + T_A=T_B, + m=m, + exclusion_zone=zone, + row_wise=True, + k=k, + T_A_subseq_isconstant=T_B_subseq_isconstant, + ) + comp_mp = gpu_stump( + T_A=T_B, + m=m, + ignore_trivial=True, + k=k, + T_A_subseq_isconstant=T_B_subseq_isconstant, + ) + naive.replace_inf(ref_mp) + naive.replace_inf(comp_mp) + npt.assert_almost_equal(ref_mp, comp_mp) + + # case 2: custom isconstant is func + for k in range(2, 4): + ref_mp = naive.stump( + T_A=T_B, + m=m, + exclusion_zone=zone, + row_wise=True, + k=k, + T_A_subseq_isconstant=isconstant_custom_func, + ) + comp_mp = gpu_stump( + T_A=T_B, + m=m, + ignore_trivial=True, + k=k, + T_A_subseq_isconstant=isconstant_custom_func, + ) + naive.replace_inf(ref_mp) + naive.replace_inf(comp_mp) + npt.assert_almost_equal(ref_mp, comp_mp) From 4cdf90d842f07043ec2263fa6574680a4287e6f9 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Sun, 5 Mar 2023 14:25:20 -0500 Subject: [PATCH 64/72] minor fixxes --- stumpy/gpu_stump.py | 8 ++++++-- stumpy/stump.py | 4 ++-- stumpy/stumped.py | 4 ++-- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py index 661d954dd..9a8f34ecd 100644 --- a/stumpy/gpu_stump.py +++ b/stumpy/gpu_stump.py @@ -622,8 +622,12 @@ def gpu_stump( ignore_trivial = True T_B_subseq_isconstant = T_A_subseq_isconstant - T_A, M_T, Σ_T, T_subseq_isconstant = core.preprocess(T_A, m, T_A_subseq_isconstant) - T_B, μ_Q, σ_Q, Q_subseq_isconstant = core.preprocess(T_B, m, T_B_subseq_isconstant) + T_A, M_T, Σ_T, T_subseq_isconstant = core.preprocess( + T_A, m, T_subseq_isconstant=T_A_subseq_isconstant + ) + T_B, μ_Q, σ_Q, Q_subseq_isconstant = core.preprocess( + T_B, m, T_subseq_isconstant=T_B_subseq_isconstant + ) if T_A.ndim != 1: # pragma: no cover raise ValueError( diff --git a/stumpy/stump.py b/stumpy/stump.py index 9af5e7296..66dac4361 100644 --- a/stumpy/stump.py +++ b/stumpy/stump.py @@ -673,7 +673,7 @@ def stump( μ_Q_m_1, T_A_subseq_isfinite, T_A_subseq_isconstant, - ) = core.preprocess_diagonal(T_A, m, T_A_subseq_isconstant) + ) = core.preprocess_diagonal(T_A, m, T_subseq_isconstant=T_A_subseq_isconstant) ( T_B, @@ -682,7 +682,7 @@ def stump( M_T_m_1, T_B_subseq_isfinite, T_B_subseq_isconstant, - ) = core.preprocess_diagonal(T_B, m, T_B_subseq_isconstant) + ) = core.preprocess_diagonal(T_B, m, T_subseq_isconstant=T_B_subseq_isconstant) if T_A.ndim != 1: # pragma: no cover raise ValueError( diff --git a/stumpy/stumped.py b/stumpy/stumped.py index c40468866..59e946290 100644 --- a/stumpy/stumped.py +++ b/stumpy/stumped.py @@ -380,7 +380,7 @@ def stumped( μ_Q_m_1, T_A_subseq_isfinite, T_A_subseq_isconstant, - ) = core.preprocess_diagonal(T_A, m, T_A_subseq_isconstant) + ) = core.preprocess_diagonal(T_A, m, T_subseq_isconstant=T_A_subseq_isconstant) ( T_B, @@ -389,7 +389,7 @@ def stumped( M_T_m_1, T_B_subseq_isfinite, T_B_subseq_isconstant, - ) = core.preprocess_diagonal(T_B, m, T_B_subseq_isconstant) + ) = core.preprocess_diagonal(T_B, m, T_subseq_isconstant=T_B_subseq_isconstant) if T_A.ndim != 1: # pragma: no cover raise ValueError( From 74e163736bb65d7dbc9e8b225c33b425fdfaf30a Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Sun, 5 Mar 2023 15:26:08 -0500 Subject: [PATCH 65/72] avoid random behavior of argsort when values are the same by passing kind --- tests/naive.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/naive.py b/tests/naive.py index cf6b4ed4f..b253f3cc6 100644 --- a/tests/naive.py +++ b/tests/naive.py @@ -254,7 +254,7 @@ def stump( for i, D in enumerate(distance_matrix): # D: distance profile # self-join / AB-join: matrix profile and indices - indices = np.argsort(D)[:k] + indices = np.argsort(D, kind="mergesort")[:k] P[i, :k] = D[indices] indices[P[i, :k] == np.inf] = -1 I[i, :k] = indices From b0374c3bab46d904d0d1f0a2a8e9208ae47d73fe Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Thu, 9 Mar 2023 01:02:45 -0500 Subject: [PATCH 66/72] update if block --- stumpy/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index f9dda2aee..98dee92ad 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -2592,8 +2592,8 @@ def _idx_to_mp(I, T, m, normalize=True, p=2.0, T_subseq_isconstant=None): I = I.astype(np.int64) T = T.copy() - if normalize and T_subseq_isconstant is None: - T_subseq_isconstant = rolling_isconstant(T, m) + if normalize: + T_subseq_isconstant = rolling_isconstant(T, m, T_subseq_isconstant) T_isfinite = np.isfinite(T) T_subseq_isfinite = np.all(rolling_window(T_isfinite, m), axis=1) From 3f49999700bc98a6f5b9cca0bf2d85fea972f279 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Thu, 9 Mar 2023 01:14:31 -0500 Subject: [PATCH 67/72] update naive function rolling_isconstant --- tests/naive.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/tests/naive.py b/tests/naive.py index b253f3cc6..25076cd9f 100644 --- a/tests/naive.py +++ b/tests/naive.py @@ -14,16 +14,23 @@ def is_ptp_zero_1d(a, w): # `a` is 1-D return out == 0 -def rolling_isconstant(a, w, custom_func=None): - if custom_func is None: - custom_func = is_ptp_zero_1d - - return np.logical_and( - core.rolling_isfinite(a, w), - np.apply_along_axis( - lambda a_row, w: custom_func(a_row, w), axis=-1, arr=a, w=w - ), - ) +def rolling_isconstant(a, w, a_subseq_isconstant=None): + if a_subseq_isconstant is None: + a_subseq_isconstant = is_ptp_zero_1d + + custom_func = None + if callable(a_subseq_isconstant): + custom_func = a_subseq_isconstant + + if custom_func is not None: + a_subseq_isconstant = np.logical_and( + core.rolling_isfinite(a, w), + np.apply_along_axis( + lambda a_row, w: custom_func(a_row, w), axis=-1, arr=a, w=w + ), + ) + + return a_subseq_isconstant def rolling_nanstd(a, w): From 5ca79edc6c16b3ce84039a5600cb4e07ed94f0ad Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Thu, 9 Mar 2023 01:16:22 -0500 Subject: [PATCH 68/72] add comment --- tests/naive.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/naive.py b/tests/naive.py index 25076cd9f..afc69fd18 100644 --- a/tests/naive.py +++ b/tests/naive.py @@ -15,6 +15,7 @@ def is_ptp_zero_1d(a, w): # `a` is 1-D def rolling_isconstant(a, w, a_subseq_isconstant=None): + # a_subseq_isconstant can be numpy.ndarray or function if a_subseq_isconstant is None: a_subseq_isconstant = is_ptp_zero_1d From 11aa7c99f4bc7532e5aa4633ed1690e30104d858 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Thu, 9 Mar 2023 01:23:09 -0500 Subject: [PATCH 69/72] minor updates --- stumpy/core.py | 32 ++++++++++++++++---------------- tests/naive.py | 4 ++-- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index 98dee92ad..d10d3c004 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -2333,7 +2333,7 @@ def _rolling_isconstant(a, w): return np.where(out == 0.0, True, False) -def rolling_isconstant(a, w, T_subseq_isconstant=None): +def rolling_isconstant(a, w, a_subseq_isconstant=None): """ Compute the rolling isconstant for 1-D and 2-D arrays. @@ -2349,10 +2349,10 @@ def rolling_isconstant(a, w, T_subseq_isconstant=None): w : numpy.ndarray The rolling window size - T_subseq_isconstant : np.ndarray or function, default None - A boolean array that indicates whether a subsequence in `T` is constant + a_subseq_isconstant : np.ndarray or function, default None + A boolean array that indicates whether a subsequence in `a` is constant (True). Alternatively, a custom, user-defined function that returns a - boolean array that indicates whether a subsequence in `T` is constant + boolean array that indicates whether a subsequence in `a` is constant (True). The function must only take two arguments, `a`, a 1-D array, and `w`, the window size, while additional arguments may be specified by currying the user-defined function using `functools.partial`. When @@ -2360,15 +2360,15 @@ def rolling_isconstant(a, w, T_subseq_isconstant=None): Returns ------- - T_subseq_isconstant : numpy.ndarray + a_subseq_isconstant : numpy.ndarray Rolling window isconstant """ - if T_subseq_isconstant is None: - T_subseq_isconstant = _rolling_isconstant + if a_subseq_isconstant is None: + a_subseq_isconstant = _rolling_isconstant isconstant_func = None - if callable(T_subseq_isconstant): - incomp_args = find_incompatible_args(T_subseq_isconstant, ["a", "w"]) + if callable(a_subseq_isconstant): + incomp_args = find_incompatible_args(a_subseq_isconstant, ["a", "w"]) if len(incomp_args) > 0: # pragma: no cover msg = ( f"Incompatible arguments {incomp_args} found in `T_subseq_isconstant`. " @@ -2377,31 +2377,31 @@ def rolling_isconstant(a, w, T_subseq_isconstant=None): ) raise ValueError(msg) - isconstant_func = T_subseq_isconstant + isconstant_func = a_subseq_isconstant - elif isinstance(T_subseq_isconstant, np.ndarray): + elif isinstance(a_subseq_isconstant, np.ndarray): isconstant_func = None else: # pragma: no cover msg = ( "`T_subseq_isconstant` must be of type `np.ndarray` or a callable " - + f"function. Found {type(T_subseq_isconstant)} instead." + + f"function. Found {type(a_subseq_isconstant)} instead." ) raise ValueError(msg) if isconstant_func is not None: axis = a.ndim - 1 - T_subseq_isconstant = np.apply_along_axis( + a_subseq_isconstant = np.apply_along_axis( lambda a_row, w: isconstant_func(a_row, w), axis=axis, arr=a, w=w ) - if not issubclass(T_subseq_isconstant.dtype.type, np.bool_): # pragma: no cover + if not issubclass(a_subseq_isconstant.dtype.type, np.bool_): # pragma: no cover msg = ( - f"The output dtype of `T_subseq_isconstant` is {T_subseq_isconstant.dtype} " + f"The output dtype of `T_subseq_isconstant` is {a_subseq_isconstant.dtype} " + "but dtype `np.bool` was expected" ) raise ValueError(msg) - return T_subseq_isconstant + return a_subseq_isconstant def fix_isconstant_isfinite_conflicts( diff --git a/tests/naive.py b/tests/naive.py index afc69fd18..037511e59 100644 --- a/tests/naive.py +++ b/tests/naive.py @@ -228,10 +228,10 @@ def stump( [distance_profile(Q, T_B, m) for Q in core.rolling_window(T_A, m)] ) - if T_A_subseq_isconstant is None or callable(T_A_subseq_isconstant): + if T_A_subseq_isconstant is None: T_A_subseq_isconstant = rolling_isconstant(T_A, m, T_A_subseq_isconstant) - if T_B_subseq_isconstant is None or callable(T_B_subseq_isconstant): + if T_B_subseq_isconstant is None: T_B_subseq_isconstant = rolling_isconstant(T_B, m, T_B_subseq_isconstant) distance_matrix[np.isnan(distance_matrix)] = np.inf From 353e4b296573d7e89b0eac051b334b033aecab2e Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Thu, 9 Mar 2023 02:36:19 -0500 Subject: [PATCH 70/72] fix naive stump --- tests/naive.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tests/naive.py b/tests/naive.py index 037511e59..cfebae720 100644 --- a/tests/naive.py +++ b/tests/naive.py @@ -228,11 +228,8 @@ def stump( [distance_profile(Q, T_B, m) for Q in core.rolling_window(T_A, m)] ) - if T_A_subseq_isconstant is None: - T_A_subseq_isconstant = rolling_isconstant(T_A, m, T_A_subseq_isconstant) - - if T_B_subseq_isconstant is None: - T_B_subseq_isconstant = rolling_isconstant(T_B, m, T_B_subseq_isconstant) + T_A_subseq_isconstant = rolling_isconstant(T_A, m, T_A_subseq_isconstant) + T_B_subseq_isconstant = rolling_isconstant(T_B, m, T_B_subseq_isconstant) distance_matrix[np.isnan(distance_matrix)] = np.inf for i in range(distance_matrix.shape[0]): From 4c9d5c5cee35b1e16c2d481327dc0eea9e053c4c Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Thu, 9 Mar 2023 23:26:30 -0500 Subject: [PATCH 71/72] change function from public to private --- stumpy/core.py | 4 ++-- tests/test_core.py | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/stumpy/core.py b/stumpy/core.py index d10d3c004..4d13b9cd6 100644 --- a/stumpy/core.py +++ b/stumpy/core.py @@ -2368,7 +2368,7 @@ def rolling_isconstant(a, w, a_subseq_isconstant=None): isconstant_func = None if callable(a_subseq_isconstant): - incomp_args = find_incompatible_args(a_subseq_isconstant, ["a", "w"]) + incomp_args = _find_incompatible_args(a_subseq_isconstant, ["a", "w"]) if len(incomp_args) > 0: # pragma: no cover msg = ( f"Incompatible arguments {incomp_args} found in `T_subseq_isconstant`. " @@ -3531,7 +3531,7 @@ def _client_to_func(client): return func -def find_incompatible_args(func, required_args): +def _find_incompatible_args(func, required_args): """ For a given `func` and `requried_args`, return non-default arguments in `func` that are not in `required_args` diff --git a/tests/test_core.py b/tests/test_core.py index 4cd61e405..611f86e6b 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1576,38 +1576,38 @@ def test_find_incompatible_args(): def func_case1(x, y): return - assert core.find_incompatible_args(func_case1, required_args=("x", "y")) == set() + assert core._find_incompatible_args(func_case1, required_args=("x", "y")) == set() # case2: one argument has default value. def func_case2(x, y=None): return - assert core.find_incompatible_args(func_case2, required_args=("x", "y")) == set() + assert core._find_incompatible_args(func_case2, required_args=("x", "y")) == set() # case3: both argument has default values. def func_case3(x=None, y=None): return - assert core.find_incompatible_args(func_case3, required_args=("x", "y")) == set() + assert core._find_incompatible_args(func_case3, required_args=("x", "y")) == set() # case4: having one extra argument `z` def func_case4(x, y, z): return - assert core.find_incompatible_args(func_case4, required_args=("x", "y")) == {"z"} + assert core._find_incompatible_args(func_case4, required_args=("x", "y")) == {"z"} # case5: having one extra argument `z`, but with default def func_case5(x, y, z=None): return - assert core.find_incompatible_args(func_case5, required_args=("x", "y")) == set() + assert core._find_incompatible_args(func_case5, required_args=("x", "y")) == set() # case6: one extra argument `z`, and using functools.partial def func_case6(x, y, z): return assert ( - core.find_incompatible_args( + core._find_incompatible_args( functools.partial(func_case6, z=None), required_args=("x", "y") ) == set() From f1519ea4cbf298b3a014a44d9018b216f7b108d0 Mon Sep 17 00:00:00 2001 From: nimasarajpoor Date: Sat, 11 Mar 2023 21:14:30 -0500 Subject: [PATCH 72/72] fix coverage --- tests/naive.py | 2 +- tests/test_core.py | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/naive.py b/tests/naive.py index cfebae720..d9942c4ce 100644 --- a/tests/naive.py +++ b/tests/naive.py @@ -239,7 +239,7 @@ def stump( distance_matrix[i, j] = 0.0 elif T_A_subseq_isconstant[i] or T_B_subseq_isconstant[j]: distance_matrix[i, j] = np.sqrt(m) - else: + else: # pragma: no cover pass n_A = T_A.shape[0] diff --git a/tests/test_core.py b/tests/test_core.py index f9c27d817..7b5c3d6c4 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1573,37 +1573,37 @@ def test_fix_isconstant_isfinite_conflicts(): def test_find_incompatible_args(): # case1: having exact required argument - def func_case1(x, y): + def func_case1(x, y): # pragma: no cover return assert core._find_incompatible_args(func_case1, required_args=("x", "y")) == set() # case2: one argument has default value. - def func_case2(x, y=None): + def func_case2(x, y=None): # pragma: no cover return assert core._find_incompatible_args(func_case2, required_args=("x", "y")) == set() # case3: both argument has default values. - def func_case3(x=None, y=None): + def func_case3(x=None, y=None): # pragma: no cover return assert core._find_incompatible_args(func_case3, required_args=("x", "y")) == set() # case4: having one extra argument `z` - def func_case4(x, y, z): + def func_case4(x, y, z): # pragma: no cover return assert core._find_incompatible_args(func_case4, required_args=("x", "y")) == {"z"} # case5: having one extra argument `z`, but with default - def func_case5(x, y, z=None): + def func_case5(x, y, z=None): # pragma: no cover return assert core._find_incompatible_args(func_case5, required_args=("x", "y")) == set() # case6: one extra argument `z`, and using functools.partial - def func_case6(x, y, z): + def func_case6(x, y, z): # pragma: no cover return assert (