Skip to content

Commit

Permalink
merge main
Browse files Browse the repository at this point in the history
  • Loading branch information
NimaSarajpoor committed Sep 2, 2022
2 parents e105710 + 11bb86d commit dbaccab
Show file tree
Hide file tree
Showing 10 changed files with 198 additions and 111 deletions.
2 changes: 2 additions & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@
],
}

html_context = {"default_mode": "light"}

# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
Expand Down
51 changes: 12 additions & 39 deletions stumpy/aamp_motifs.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def _aamp_motifs(
motif_distances = []

candidate_idx = np.argmin(P[-1])
for i in range(l):
for _ in range(l):
if len(motif_indices) >= max_motifs:
break

Expand Down Expand Up @@ -357,29 +357,17 @@ def aamp_match(
to `Q` are less than or equal to`max_distance`, sorted by distance (lowest to
highest). The second column consists of the corresponding indices in `T`.
"""
if np.any(np.isnan(Q)) or np.any(np.isinf(Q)): # pragma: no cover
raise ValueError("Q contains illegal values (NaN or inf)")

if len(Q.shape) == 1:
Q = Q[np.newaxis, :]
if len(T.shape) == 1:
T = T[np.newaxis, :]

d, n = T.shape
m = Q.shape[1]

excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
if max_matches is None: # pragma: no cover
max_matches = np.inf

if np.any(np.isnan(Q)) or np.any(np.isinf(Q)): # pragma: no cover
raise ValueError("Q contains illegal values (NaN or inf)")

if max_distance is None: # pragma: no cover

def max_distance(D):
D_copy = D.copy().astype(np.float64)
D_copy[np.isinf(D_copy)] = np.nan
return np.nanmax(
[np.nanmean(D_copy) - 2.0 * np.nanstd(D_copy), np.nanmin(D_copy)]
)

if T_subseq_isfinite is None:
T, T_subseq_isfinite = core.preprocess_non_normalized(T, m)
Expand All @@ -389,28 +377,13 @@ def max_distance(D):
D = np.empty((d, n - m + 1))
for i in range(d):
D[i, :] = core.mass_absolute(Q[i], T[i], T_subseq_isfinite[i], p=p)

D = np.mean(D, axis=0)
if not isinstance(max_distance, float):
max_distance = max_distance(D)

matches = []

if query_idx is not None:
candidate_idx = query_idx
else:
candidate_idx = np.argmin(D)

for i in range(len(D)):
if (
D[candidate_idx] > atol + max_distance
or ~np.isfinite(D[candidate_idx])
or len(matches) >= max_matches
):
break

matches.append([D[candidate_idx], candidate_idx])
core.apply_exclusion_zone(D, candidate_idx, excl_zone, np.inf)
candidate_idx = np.argmin(D)

return np.array(matches, dtype=object)
return core._find_matches(
D,
excl_zone,
max_distance=max_distance,
max_matches=max_matches,
query_idx=query_idx,
atol=atol,
)
102 changes: 97 additions & 5 deletions stumpy/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1465,11 +1465,11 @@ def _mass_distance_matrix(Q, T, m, distance_matrix, μ_Q, σ_Q, M_T, Σ_T):
distance_matrix : numpy.ndarray
The full output distance matrix. This is mandatory since it may be reused.
μ_Q : float
Mean of `Q`
μ_Q : numpy.ndarray
Sliding mean of `Q`
σ_Q : float
Standard deviation of `Q`
σ_Q : numpy.ndarray
Sliding standard deviation of `Q`
M_T : numpy.ndarray
Sliding mean of `T`
Expand Down Expand Up @@ -2542,7 +2542,7 @@ def _select_P_ABBA_value(P_ABBA, k, custom_func=None):
k : int
Specify the `k`th value in the concatenated matrix profiles to return. This
parameter is ignored when `k_func` is not None.
parameter is ignored when `custom_func` is not None.
custom_func : object, default None
A custom user defined function for selecting the desired value from the
Expand Down Expand Up @@ -2592,3 +2592,95 @@ def _check_P(P, threshold=1e-6):
if are_distances_too_small(P, threshold=threshold): # pragma: no cover
logger.warning(f"A large number of values in `P` are smaller than {threshold}.")
logger.warning("For a self-join, try setting `ignore_trivial=True`.")


def _find_matches(
D, excl_zone, max_distance=None, max_matches=None, query_idx=None, atol=1e-8
):
"""
Find all matches of a query `Q` whose distance profile with `T` is `D`.
Parameters
----------
D : numpy.ndarray
The distance profile of `Q` with `T`. It is a 1D numpy array of size
`len(T)-len(Q)+1`, where `D[i]` is the distance between query `Q` and
`T[i : i + len(Q)]`.
excl_zone : int
Size of the exclusion zone. That is, after finding the next-best-match
located at index `idx`, we ignore subsequences with start index in range
(idx - excl_zone, idx + excl_zone + 1).
max_distance : float or function, default None
Maximum distance between `Q` and a subsequence `S` for `S` to be considered a
match.
If a function, then it has to be a function of one argument `D`, which will be
the distance profile of `Q` with `T` (a 1D numpy array of size `n-m+1`).
If None, this defaults to
`np.nanmax([np.nanmean(D) - 2 * np.nanstd(D), np.nanmin(D)])` (i.e. at
least the closest match will be returned).
max_matches : int, default None
The maximum amount of similar occurrences to be returned. The resulting
occurrences are sorted by distance, so a value of `10` means that the
indices of the most similar `10` subsequences is returned. If `None`, then all
occurrences are returned.
query_idx : int, default None
This is the index position along the time series, `T`, where the query
subsequence, `Q`, is located.
`query_idx` should only be used when the matrix profile is a self-join and
should be set to `None` for matrix profiles computed from AB-joins.
If `query_idx` is set to a specific integer value, then this will help ensure
that the self-match will be returned first.
atol : float, default 1e-8
The absolute tolerance parameter. This value will be added to `max_distance`
when comparing distances between subsequences.
Returns
-------
out : numpy.ndarray
The first column consists of values selected from `D`. These are the distances
of subsequences of `T` whose distances to `Q` are less than or equal to
`max_distance`, sorted by distance (lowest to highest). The second column
consists of the corresponding indices in `D`. These are in fact the start index
of susequences in `T` selected as the match of `Q`.
"""
D = D.copy()
if max_distance is None:

def max_distance(D):
D_copy = D.copy().astype(np.float64)
D_copy[np.isinf(D_copy)] = np.nan
return np.nanmax(
[np.nanmean(D_copy) - 2.0 * np.nanstd(D_copy), np.nanmin(D_copy)]
)

if not isinstance(max_distance, float):
max_distance = max_distance(D)

if max_matches is None:
max_matches = np.inf

if query_idx is not None:
candidate_idx = query_idx
else:
candidate_idx = np.argmin(D)

matches = []
for _ in range(len(D)):
if (
D[candidate_idx] > atol + max_distance
or ~np.isfinite(D[candidate_idx])
or len(matches) >= max_matches
):
break

matches.append([D[candidate_idx], candidate_idx])
apply_exclusion_zone(D, candidate_idx, excl_zone, np.inf)
candidate_idx = np.argmin(D)

return np.array(matches, dtype=object)
51 changes: 12 additions & 39 deletions stumpy/motifs.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def _motifs(
motif_distances = []

candidate_idx = np.argmin(P[-1])
for i in range(l):
for _ in range(l):
if len(motif_indices) >= max_motifs:
break

Expand Down Expand Up @@ -422,29 +422,17 @@ def match(
Q = core._preprocess(Q)
T = core._preprocess(T)

if np.any(np.isnan(Q)) or np.any(np.isinf(Q)): # pragma: no cover
raise ValueError("Q contains illegal values (NaN or inf)")

if len(Q.shape) == 1:
Q = Q[np.newaxis, :]
if len(T.shape) == 1:
T = T[np.newaxis, :]

d, n = T.shape
m = Q.shape[1]

excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
if max_matches is None: # pragma: no cover
max_matches = np.inf

if np.any(np.isnan(Q)) or np.any(np.isinf(Q)): # pragma: no cover
raise ValueError("Q contains illegal values (NaN or inf)")

if max_distance is None: # pragma: no cover

def max_distance(D):
D_copy = D.copy().astype(np.float64)
D_copy[np.isinf(D_copy)] = np.nan
return np.nanmax(
[np.nanmean(D_copy) - 2.0 * np.nanstd(D_copy), np.nanmin(D_copy)]
)

if M_T is None or Σ_T is None: # pragma: no cover
T, M_T, Σ_T = core.preprocess(T, m)
Expand All @@ -456,28 +444,13 @@ def max_distance(D):
D = np.empty((d, n - m + 1))
for i in range(d):
D[i, :] = core.mass(Q[i], T[i], M_T[i], Σ_T[i])

D = np.mean(D, axis=0)
if not isinstance(max_distance, float):
max_distance = max_distance(D)

matches = []

if query_idx is not None:
candidate_idx = query_idx
else:
candidate_idx = np.argmin(D)

for i in range(len(D)):
if (
D[candidate_idx] > atol + max_distance
or ~np.isfinite(D[candidate_idx])
or len(matches) >= max_matches
):
break

matches.append([D[candidate_idx], candidate_idx])
core.apply_exclusion_zone(D, candidate_idx, excl_zone, np.inf)
candidate_idx = np.argmin(D)

return np.array(matches, dtype=object)
return core._find_matches(
D,
excl_zone,
max_distance=max_distance,
max_matches=max_matches,
query_idx=query_idx,
atol=atol,
)
18 changes: 13 additions & 5 deletions stumpy/mpdist.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,13 +192,14 @@ def _mpdist_vect(
Time series or sequence
m : int
Window size
Window size that will be used for calculating the mpdist between Q and
any subsequence in T (of size `len(Q)`)
μ_Q : float
Mean of `Q`
μ_Q : numpy.ndarray
Sliding mean of `Q`
σ_Q : float
Standard deviation of `Q`
σ_Q : numpy.ndarray
Sliding standard deviation of `Q`
M_T : numpy.ndarray
Sliding mean of `T`
Expand All @@ -222,6 +223,13 @@ def _mpdist_vect(
and should take `P_ABBA` as its only input parameter and return a single
`MPdist` value. The `percentage` and `k` parameters are ignored when
`custom_func` is not None.
Returns
-------
MPdist_vect : numpy.ndarray
The mpdist-based distance profile of `Q` with `T`. It is a 1D array of
size `len(T) - len(Q) + 1`. MPdist_vect[i] is the mpdist distance between
`Q` and subsequence `T[i : i + len(Q)]`.
"""
j = Q.shape[0] - m + 1 # `k` is reserved for `P_ABBA` selection
l = T.shape[0] - m + 1
Expand Down
11 changes: 8 additions & 3 deletions stumpy/scraamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,10 @@ def prescraamp(T_A, m, T_B=None, s=None, p=2.0):
l = n_A - m + 1

if s is None: # pragma: no cover
s = excl_zone
if excl_zone is not None: # self-join
s = excl_zone
else: # AB-join
s = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))

indices = np.random.permutation(range(0, l, s)).astype(np.int64)
P, I = _prescraamp(
Expand Down Expand Up @@ -509,9 +512,11 @@ def __init__(
self._I[:, :] = -1

self._excl_zone = int(np.ceil(self._m / config.STUMPY_EXCL_ZONE_DENOM))

if s is None:
s = self._excl_zone
if self._excl_zone is not None: # self-join
s = self._excl_zone
else: # pragma: no cover # AB-join
s = int(np.ceil(self._m / config.STUMPY_EXCL_ZONE_DENOM))

if pre_scraamp:
if self._ignore_trivial:
Expand Down
11 changes: 8 additions & 3 deletions stumpy/scrump.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,10 @@ def prescrump(T_A, m, T_B=None, s=None, normalize=True, p=2.0):
l = n_A - m + 1

if s is None: # pragma: no cover
s = excl_zone
if excl_zone is not None: # self-join
s = excl_zone
else: # AB-join
s = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))

indices = np.random.permutation(range(0, l, s)).astype(np.int64)
P, I = _prescrump(
Expand Down Expand Up @@ -578,9 +581,11 @@ def __init__(
self._I[:, :] = -1

self._excl_zone = int(np.ceil(self._m / config.STUMPY_EXCL_ZONE_DENOM))

if s is None:
s = self._excl_zone
if self._excl_zone is not None: # self-join
s = self._excl_zone
else: # pragma: no cover # AB-join
s = int(np.ceil(self._m / config.STUMPY_EXCL_ZONE_DENOM))

if pre_scrump:
if self._ignore_trivial:
Expand Down
22 changes: 22 additions & 0 deletions tests/naive.py
Original file line number Diff line number Diff line change
Expand Up @@ -1759,3 +1759,25 @@ def _total_diagonal_ndists(tile_lower_diag, tile_upper_diag, tile_height, tile_w
)

return total_ndists


def find_matches(D, excl_zone, max_distance, max_matches=None):
if max_matches is None:
max_matches = len(D)

matches = []
for i in range(D.size):
dist = D[i]
if dist <= max_distance:
matches.append(i)

# Removes indices that are inside the exclusion zone of some occurrence with
# a smaller distance to the query
matches.sort(key=lambda x: D[x])
result = []
while len(matches) > 0:
idx = matches[0]
result.append([D[idx], idx])
matches = [x for x in matches if x < idx - excl_zone or x > idx + excl_zone]

return np.array(result[:max_matches], dtype=object)
Loading

0 comments on commit dbaccab

Please sign in to comment.