Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix #998: Speed up stumpi and aampi #1001

Merged
merged 24 commits into from
Sep 13, 2024
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
b4b950e
wrap njit-decorated function around hot spot to improve performance
NimaSarajpoor Jul 8, 2024
7eccc26
Merge branch 'main' into ENH_stumpi
NimaSarajpoor Jul 8, 2024
5f7b348
Merge branch 'main' into ENH_stumpi
NimaSarajpoor Jul 20, 2024
ca49a74
improve docstring
NimaSarajpoor Jul 20, 2024
625d1f8
Move function to stumpy.core
NimaSarajpoor Jul 20, 2024
384ea32
Add test function
NimaSarajpoor Jul 20, 2024
f8a8a82
refactored aampi._update_egress
NimaSarajpoor Jul 20, 2024
d823148
refactored stumpi._update
NimaSarajpoor Jul 20, 2024
3c3e835
refactored using newly-created function
NimaSarajpoor Jul 23, 2024
c9e361d
Rename function
NimaSarajpoor Jul 26, 2024
43eb4ce
test top-k feature in test function
NimaSarajpoor Jul 27, 2024
b0c567e
use name of parameter when passing value to be more explicit
NimaSarajpoor Jul 27, 2024
b99c3e2
update branch and resolve conflict
NimaSarajpoor Jul 27, 2024
d97e184
add comment and new case in the test function
NimaSarajpoor Jul 28, 2024
b54a9e8
revise test functions
NimaSarajpoor Aug 9, 2024
f766504
minor enhancement
NimaSarajpoor Aug 10, 2024
906bef7
extra test that causes an error
NimaSarajpoor Aug 10, 2024
7e8eeb2
Fixed error caused by loss of precision
NimaSarajpoor Aug 12, 2024
4f2c99b
revise test function and improve comments
NimaSarajpoor Aug 24, 2024
9e8a430
Revised docstring and comment
NimaSarajpoor Sep 11, 2024
b9b9104
Fixed flake8 format
NimaSarajpoor Sep 11, 2024
3e757cf
Minor changes
NimaSarajpoor Sep 11, 2024
47041d7
Fixed black format
NimaSarajpoor Sep 11, 2024
8fc35ea
replace random with np.random
NimaSarajpoor Sep 13, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 9 additions & 39 deletions stumpy/aampi.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,26 +247,9 @@ def _update_egress(self, t):
if np.any(~self._T_isfinite[-self._m :]):
D[:] = np.inf

core.apply_exclusion_zone(D, D.shape[0] - 1, self._excl_zone, np.inf)

update_idx = np.argwhere(D < self._P[:, -1]).flatten()
for i in update_idx:
idx = np.searchsorted(self._P[i], D[i], side="right")
core._shift_insert_at_index(self._P[i], idx, D[i])
core._shift_insert_at_index(
self._I[i], idx, D.shape[0] + self._n_appended - 1
)
# D.shape[0] is base-1

# Calculate the (top-k) matrix profile values/indices for the last subsequence
# by using its corresponding distance profile `D`
self._P[-1] = np.inf
self._I[-1] = -1
for i, d in enumerate(D):
if d < self._P[-1, -1]:
idx = np.searchsorted(self._P[-1], d, side="right")
core._shift_insert_at_index(self._P[-1], idx, d)
core._shift_insert_at_index(self._I[-1], idx, i + self._n_appended)
core._update_incremental_PI(
NimaSarajpoor marked this conversation as resolved.
Show resolved Hide resolved
D, self._P, self._I, self._excl_zone, n_appended=self._n_appended
)

# All neighbors of the last subsequence are on its left. So, its (top-1)
# matrix profile value/index and its left matrix profile value/index must
Expand Down Expand Up @@ -322,30 +305,17 @@ def _update(self, t):
if np.any(~self._T_isfinite[-self._m :]):
D[:] = np.inf

core.apply_exclusion_zone(D, D.shape[0] - 1, self._excl_zone, np.inf)

update_idx = np.argwhere(D[:l] < self._P[:l, -1]).flatten()
NimaSarajpoor marked this conversation as resolved.
Show resolved Hide resolved
for i in update_idx:
idx = np.searchsorted(self._P[i], D[i], side="right")
core._shift_insert_at_index(self._P[i], idx, D[i])
core._shift_insert_at_index(self._I[i], idx, l)

# Calculating top-k matrix profile and (top-1) left matrix profile (and their
# corresponding indices) for new subsequence whose distance profile is `D`
P_new = np.full(self._k, np.inf, dtype=np.float64)
I_new = np.full(self._k, -1, dtype=np.int64)
for i, d in enumerate(D):
if d < P_new[-1]: # maximum value in sorted array P_new
idx = np.searchsorted(P_new, d, side="right")
core._shift_insert_at_index(P_new, idx, d)
core._shift_insert_at_index(I_new, idx, i)
self._P = np.append(self._P, P_new.reshape(1, -1), axis=0)
self._I = np.append(self._I, I_new.reshape(1, -1), axis=0)

core._update_incremental_PI(D, self._P, self._I, self._excl_zone, n_appended=0)

left_I_new = I_new[0]
left_P_new = P_new[0]
left_I_new = self._I[-1, 0]
left_P_new = self._P[-1, 0]

self._T = T_new
self._P = np.append(self._P, P_new.reshape(1, -1), axis=0)
self._I = np.append(self._I, I_new.reshape(1, -1), axis=0)
self._left_P = np.append(self._left_P, left_P_new)
self._left_I = np.append(self._left_I, left_I_new)
self._p_norm = p_norm_new
Expand Down
57 changes: 57 additions & 0 deletions stumpy/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -4368,3 +4368,60 @@ def get_ray_nworkers(ray_client):
Total number of Ray workers
"""
return int(ray_client.cluster_resources().get("CPU"))


@njit
def _update_incremental_PI(D, P, I, excl_zone, n_appended=0):
NimaSarajpoor marked this conversation as resolved.
Show resolved Hide resolved
"""
Given the 1D array distance profile, `D`, of the last subsequence of T,
update (in-place) the (top-k) matrix profile, `P`, and the matrix profile
index, I, of T.

Parameters
----------
D : numpy.ndarray
A 1D array (with dtype float) representing the distance profile of
the last subsequence of T

P : numpy.ndarray
A 2D array representing the matrix profile of T,
with shape (len(T) - m + 1, k), where `m` is the window size.
P[-1, :] should be set to np.inf

I : numpy.ndarray
A 2D array representing the matrix profile index of T,
with shape (len(T) - m + 1, k), where `m` is the window size
I[-1, :] should be set to -1.

excl_zone : int
Size of the exclusion zone. That is, after finding the next-best-match
located at index `idx`, we ignore subsequences with start index in range
(idx - excl_zone, idx + excl_zone + 1).

n_appended : int
NimaSarajpoor marked this conversation as resolved.
Show resolved Hide resolved
Number of times the timeseries start point is shifted one to the right

Returns
-------
None
"""
_apply_exclusion_zone(D, D.shape[0] - 1, excl_zone, np.inf)

update_idx = np.argwhere(D < P[:, -1]).flatten()
for i in update_idx:
idx = np.searchsorted(P[i], D[i], side="right")
_shift_insert_at_index(P[i], idx, D[i])
_shift_insert_at_index(I[i], idx, D.shape[0] + n_appended - 1)
# D.shape[0] is base-1

# Calculate the (top-k) matrix profile values/indices for the last subsequence
# by using its corresponding distance profile `D`
P[-1] = np.inf
I[-1] = -1
for i, d in enumerate(D):
if d < P[-1, -1]:
idx = np.searchsorted(P[-1], d, side="right")
_shift_insert_at_index(P[-1], idx, d)
_shift_insert_at_index(I[-1], idx, i + n_appended)

return
49 changes: 10 additions & 39 deletions stumpy/stumpi.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,26 +354,9 @@ def _update_egress(self, t):
if np.any(~self._T_isfinite[-self._m :]):
D[:] = np.inf

core.apply_exclusion_zone(D, D.shape[0] - 1, self._excl_zone, np.inf)

update_idx = np.argwhere(D < self._P[:, -1]).flatten()
for i in update_idx:
idx = np.searchsorted(self._P[i], D[i], side="right")
core._shift_insert_at_index(self._P[i], idx, D[i])
core._shift_insert_at_index(
self._I[i], idx, D.shape[0] + self._n_appended - 1
)
# D.shape[0] is base-1

# Calculate the (top-k) matrix profile values/indices for the last subsequence
# by using its corresponding distance profile `D`
self._P[-1] = np.inf
self._I[-1] = -1
for i, d in enumerate(D):
if d < self._P[-1, -1]:
idx = np.searchsorted(self._P[-1], d, side="right")
core._shift_insert_at_index(self._P[-1], idx, d)
core._shift_insert_at_index(self._I[-1], idx, i + self._n_appended)
core._update_incremental_PI(
D, self._P, self._I, self._excl_zone, n_appended=self._n_appended
)

# All neighbors of the last subsequence are on its left. So, its (top-1)
# matrix profile value/index and its left matrix profile value/index must
Expand Down Expand Up @@ -440,30 +423,18 @@ def _update(self, t):
if np.any(~self._T_isfinite[-self._m :]):
D[:] = np.inf

core.apply_exclusion_zone(D, D.shape[0] - 1, self._excl_zone, np.inf)

update_idx = np.argwhere(D[:l] < self._P[:l, -1]).flatten()
for i in update_idx:
idx = np.searchsorted(self._P[i], D[i], side="right")
core._shift_insert_at_index(self._P[i], idx, D[i])
core._shift_insert_at_index(self._I[i], idx, l)

# Calculating top-k matrix profile and (top-1) left matrix profile (and their
# corresponding indices) for new subsequence whose distance profile is `D`
P_new = np.full(self._k, np.inf, dtype=np.float64)
I_new = np.full(self._k, -1, dtype=np.int64)
for i, d in enumerate(D):
if d < P_new[-1]: # maximum value in sorted array P_new
idx = np.searchsorted(P_new, d, side="right")
core._shift_insert_at_index(P_new, idx, d)
core._shift_insert_at_index(I_new, idx, i)
self._P = np.append(self._P, P_new.reshape(1, -1), axis=0)
self._I = np.append(self._I, I_new.reshape(1, -1), axis=0)

core._update_incremental_PI(D, self._P, self._I, self._excl_zone, n_appended=0)

left_I_new = I_new[0]
left_P_new = P_new[0]
left_I_new = self._I[-1, 0]
left_P_new = self._P[-1, 0]

self._T = T_new
self._P = np.append(self._P, P_new.reshape(1, -1), axis=0)
self._I = np.append(self._I, I_new.reshape(1, -1), axis=0)

self._left_P = np.append(self._left_P, left_P_new)
self._left_I = np.append(self._left_I, left_I_new)
self._QT = QT_new
Expand Down
71 changes: 71 additions & 0 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1778,3 +1778,74 @@ def test_process_isconstant_1d_default():
T_subseq_isconstant_comp = core.process_isconstant(T, m, T_subseq_isconstant=None)

npt.assert_array_equal(T_subseq_isconstant_ref, T_subseq_isconstant_comp)


def test_update_incremental_PI():
T = np.random.rand(64)
m = 3
excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))

# case 1: For a given time series `T`, obtain its matrix profile `P` and
# matrix profile indices `I` based on the matrix profile and matrix profile
# indices of the time series `T[:-1]`.
# In the following test: n_appended = 0
for k in range(1, 4):
# ref
mp_ref = naive.stump(T, m, row_wise=True, k=k)
P_ref = mp_ref[:, :k].astype(np.float64)
I_ref = mp_ref[:, k : 2 * k].astype(np.int64)

# comp
P_comp = np.full_like(P_ref, np.inf)
I_comp = np.full_like(I_ref, -1)

mp_comp = naive.stump(T[:-1], m, row_wise=True, k=k)
P_comp[:-1, :] = mp_comp[:, :k].astype(np.float64)
I_comp[:-1, :] = mp_comp[:, k : 2 * k].astype(np.int64)

D = core.mass(T[-m:], T)
core._update_incremental_PI(D, P_comp, I_comp, excl_zone, n_appended=0)

# check
npt.assert_almost_equal(P_ref, P_comp)
npt.assert_almost_equal(I_ref, I_comp)

# case 2: For a given time series `T`, obtain the matrix profile `P` and
# matrix profile indices `I` of `T[1:]` based on the matrix profile and
# matrix profile indices of `T[:-1]`.
# In the following test: n_appended = 1
for k in range(1, 4):
# ref
T1 = T.copy()
T1[-1] = np.nan
mp_1 = naive.stump(T1, m, row_wise=True, k=k)
P_1 = mp_1[:, :k].astype(np.float64)
I_1 = mp_1[:, k : 2 * k].astype(np.int64)

T2 = T.copy()
T2[0] = np.nan
mp_2 = naive.stump(T2, m, row_wise=True, k=k)
P_2 = mp_2[:, :k].astype(np.float64)
I_2 = mp_2[:, k : 2 * k].astype(np.int64)

core._merge_topk_PI(P_1, P_2, I_1, I_2)
P_ref = P_1[1:]
I_ref = I_1[1:]

# comp
mp_comp = naive.stump(T[:-1], m, row_wise=True, k=k)
P_comp = mp_comp[:, :k].astype(np.float64)
I_comp = mp_comp[:, k : 2 * k].astype(np.int64)

P_comp[:-1] = P_comp[1:]
P_comp[-1] = np.inf

I_comp[:-1] = I_comp[1:]
I_comp[-1] = -1

D = core.mass(T[-m:], T[1:])
core._update_incremental_PI(D, P_comp, I_comp, excl_zone, n_appended=1)

# check
npt.assert_almost_equal(P_ref, P_comp)
npt.assert_almost_equal(I_ref, I_comp)
Loading