TDAmeritrade · seanlaw · Sep 13, 2024 · Jul 8, 2024 · Jul 8, 2024 · Jul 20, 2024
@@ -247,26 +247,9 @@ def _update_egress(self, t):
         if np.any(~self._T_isfinite[-self._m :]):
             D[:] = np.inf
 
-        core.apply_exclusion_zone(D, D.shape[0] - 1, self._excl_zone, np.inf)
-
-        update_idx = np.argwhere(D < self._P[:, -1]).flatten()
-        for i in update_idx:
-            idx = np.searchsorted(self._P[i], D[i], side="right")
-            core._shift_insert_at_index(self._P[i], idx, D[i])
-            core._shift_insert_at_index(
-                self._I[i], idx, D.shape[0] + self._n_appended - 1
-            )
-            # D.shape[0] is base-1
-
-        # Calculate the (top-k) matrix profile values/indices for the last subsequence
-        # by using its corresponding distance profile `D`
-        self._P[-1] = np.inf
-        self._I[-1] = -1
-        for i, d in enumerate(D):
-            if d < self._P[-1, -1]:
-                idx = np.searchsorted(self._P[-1], d, side="right")
-                core._shift_insert_at_index(self._P[-1], idx, d)
-                core._shift_insert_at_index(self._I[-1], idx, i + self._n_appended)
+        core._update_incremental_PI(
+            D, self._P, self._I, self._excl_zone, n_appended=self._n_appended
+        )
 
         # All neighbors of the last subsequence are on its left. So, its (top-1)
         # matrix profile value/index and its left matrix profile value/index must
@@ -322,30 +305,17 @@ def _update(self, t):
         if np.any(~self._T_isfinite[-self._m :]):
             D[:] = np.inf
 
-        core.apply_exclusion_zone(D, D.shape[0] - 1, self._excl_zone, np.inf)
-
-        update_idx = np.argwhere(D[:l] < self._P[:l, -1]).flatten()
-        for i in update_idx:
-            idx = np.searchsorted(self._P[i], D[i], side="right")
-            core._shift_insert_at_index(self._P[i], idx, D[i])
-            core._shift_insert_at_index(self._I[i], idx, l)
-
-        # Calculating top-k matrix profile and (top-1) left matrix profile (and their
-        # corresponding indices) for new subsequence whose distance profile is `D`
         P_new = np.full(self._k, np.inf, dtype=np.float64)
         I_new = np.full(self._k, -1, dtype=np.int64)
-        for i, d in enumerate(D):
-            if d < P_new[-1]:  # maximum value in sorted array P_new
-                idx = np.searchsorted(P_new, d, side="right")
-                core._shift_insert_at_index(P_new, idx, d)
-                core._shift_insert_at_index(I_new, idx, i)
+        self._P = np.append(self._P, P_new.reshape(1, -1), axis=0)
+        self._I = np.append(self._I, I_new.reshape(1, -1), axis=0)
+
+        core._update_incremental_PI(D, self._P, self._I, self._excl_zone, n_appended=0)
 
-        left_I_new = I_new[0]
-        left_P_new = P_new[0]
+        left_I_new = self._I[-1, 0]
+        left_P_new = self._P[-1, 0]
 
         self._T = T_new
-        self._P = np.append(self._P, P_new.reshape(1, -1), axis=0)
-        self._I = np.append(self._I, I_new.reshape(1, -1), axis=0)
         self._left_P = np.append(self._left_P, left_P_new)
         self._left_I = np.append(self._left_I, left_I_new)
         self._p_norm = p_norm_new

@@ -4368,3 +4368,60 @@ def get_ray_nworkers(ray_client):
         Total number of Ray workers
     """
     return int(ray_client.cluster_resources().get("CPU"))
+
+
+@njit
+def _update_incremental_PI(D, P, I, excl_zone, n_appended=0):
+    """
+    Given the 1D array distance profile, `D`, of the last subsequence of T,
+    update (in-place) the (top-k) matrix profile, `P`, and the matrix profile
+    index, I, of T.
+
+    Parameters
+    ----------
+    D : numpy.ndarray
+        A 1D array (with dtype float) representing the distance profile of
+        the last subsequence of T
+
+    P : numpy.ndarray
+        A 2D array representing the matrix profile of T,
+        with shape (len(T) - m + 1, k), where `m` is the window size.
+        P[-1, :] should be set to np.inf
+
+    I : numpy.ndarray
+        A 2D array representing the matrix profile index of T,
+        with shape (len(T) - m + 1, k), where `m` is the window size
+        I[-1, :] should be set to -1.
+
+    excl_zone : int
+        Size of the exclusion zone. That is, after finding the next-best-match
+        located at index `idx`, we ignore subsequences with start index in range
+        (idx -  excl_zone, idx + excl_zone + 1).
+
+    n_appended : int
+        Number of times the timeseries start point is shifted one to the right
+
+    Returns
+    -------
+    None
+    """
+    _apply_exclusion_zone(D, D.shape[0] - 1, excl_zone, np.inf)
+
+    update_idx = np.argwhere(D < P[:, -1]).flatten()
+    for i in update_idx:
+        idx = np.searchsorted(P[i], D[i], side="right")
+        _shift_insert_at_index(P[i], idx, D[i])
+        _shift_insert_at_index(I[i], idx, D.shape[0] + n_appended - 1)
+        # D.shape[0] is base-1
+
+    # Calculate the (top-k) matrix profile values/indices for the last subsequence
+    # by using its corresponding distance profile `D`
+    P[-1] = np.inf
+    I[-1] = -1
+    for i, d in enumerate(D):
+        if d < P[-1, -1]:
+            idx = np.searchsorted(P[-1], d, side="right")
+            _shift_insert_at_index(P[-1], idx, d)
+            _shift_insert_at_index(I[-1], idx, i + n_appended)
+
+    return
@@ -354,26 +354,9 @@ def _update_egress(self, t):
         if np.any(~self._T_isfinite[-self._m :]):
             D[:] = np.inf
 
-        core.apply_exclusion_zone(D, D.shape[0] - 1, self._excl_zone, np.inf)
-
-        update_idx = np.argwhere(D < self._P[:, -1]).flatten()
-        for i in update_idx:
-            idx = np.searchsorted(self._P[i], D[i], side="right")
-            core._shift_insert_at_index(self._P[i], idx, D[i])
-            core._shift_insert_at_index(
-                self._I[i], idx, D.shape[0] + self._n_appended - 1
-            )
-            # D.shape[0] is base-1
-
-        # Calculate the (top-k) matrix profile values/indices for the last subsequence
-        # by using its corresponding distance profile `D`
-        self._P[-1] = np.inf
-        self._I[-1] = -1
-        for i, d in enumerate(D):
-            if d < self._P[-1, -1]:
-                idx = np.searchsorted(self._P[-1], d, side="right")
-                core._shift_insert_at_index(self._P[-1], idx, d)
-                core._shift_insert_at_index(self._I[-1], idx, i + self._n_appended)
+        core._update_incremental_PI(
+            D, self._P, self._I, self._excl_zone, n_appended=self._n_appended
+        )
 
         # All neighbors of the last subsequence are on its left. So, its (top-1)
         # matrix profile value/index and its left matrix profile value/index must
@@ -440,30 +423,18 @@ def _update(self, t):
         if np.any(~self._T_isfinite[-self._m :]):
             D[:] = np.inf
 
-        core.apply_exclusion_zone(D, D.shape[0] - 1, self._excl_zone, np.inf)
-
-        update_idx = np.argwhere(D[:l] < self._P[:l, -1]).flatten()
-        for i in update_idx:
-            idx = np.searchsorted(self._P[i], D[i], side="right")
-            core._shift_insert_at_index(self._P[i], idx, D[i])
-            core._shift_insert_at_index(self._I[i], idx, l)
-
-        # Calculating top-k matrix profile and (top-1) left matrix profile (and their
-        # corresponding indices) for new subsequence whose distance profile is `D`
         P_new = np.full(self._k, np.inf, dtype=np.float64)
         I_new = np.full(self._k, -1, dtype=np.int64)
-        for i, d in enumerate(D):
-            if d < P_new[-1]:  # maximum value in sorted array P_new
-                idx = np.searchsorted(P_new, d, side="right")
-                core._shift_insert_at_index(P_new, idx, d)
-                core._shift_insert_at_index(I_new, idx, i)
+        self._P = np.append(self._P, P_new.reshape(1, -1), axis=0)
+        self._I = np.append(self._I, I_new.reshape(1, -1), axis=0)
+
+        core._update_incremental_PI(D, self._P, self._I, self._excl_zone, n_appended=0)
 
-        left_I_new = I_new[0]
-        left_P_new = P_new[0]
+        left_I_new = self._I[-1, 0]
+        left_P_new = self._P[-1, 0]
 
         self._T = T_new
-        self._P = np.append(self._P, P_new.reshape(1, -1), axis=0)
-        self._I = np.append(self._I, I_new.reshape(1, -1), axis=0)
+
         self._left_P = np.append(self._left_P, left_P_new)
         self._left_I = np.append(self._left_I, left_I_new)
         self._QT = QT_new

@@ -1778,3 +1778,74 @@ def test_process_isconstant_1d_default():
     T_subseq_isconstant_comp = core.process_isconstant(T, m, T_subseq_isconstant=None)
 
     npt.assert_array_equal(T_subseq_isconstant_ref, T_subseq_isconstant_comp)
+
+
+def test_update_incremental_PI():
+    T = np.random.rand(64)
+    m = 3
+    excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
+
+    # case 1: For a given time series `T`, obtain its matrix profile `P` and
+    # matrix profile indices `I` based on the matrix profile and matrix profile
+    # indices of the time series `T[:-1]`.
+    # In the following test: n_appended = 0
+    for k in range(1, 4):
+        # ref
+        mp_ref = naive.stump(T, m, row_wise=True, k=k)
+        P_ref = mp_ref[:, :k].astype(np.float64)
+        I_ref = mp_ref[:, k : 2 * k].astype(np.int64)
+
+        # comp
+        P_comp = np.full_like(P_ref, np.inf)
+        I_comp = np.full_like(I_ref, -1)
+
+        mp_comp = naive.stump(T[:-1], m, row_wise=True, k=k)
+        P_comp[:-1, :] = mp_comp[:, :k].astype(np.float64)
+        I_comp[:-1, :] = mp_comp[:, k : 2 * k].astype(np.int64)
+
+        D = core.mass(T[-m:], T)
+        core._update_incremental_PI(D, P_comp, I_comp, excl_zone, n_appended=0)
+
+        # check
+        npt.assert_almost_equal(P_ref, P_comp)
+        npt.assert_almost_equal(I_ref, I_comp)
+
+    # case 2: For a given time series `T`, obtain the matrix profile `P` and
+    # matrix profile indices `I` of `T[1:]` based on the matrix profile and
+    # matrix profile indices of `T[:-1]`.
+    # In the following test: n_appended = 1
+    for k in range(1, 4):
+        # ref
+        T1 = T.copy()
+        T1[-1] = np.nan
+        mp_1 = naive.stump(T1, m, row_wise=True, k=k)
+        P_1 = mp_1[:, :k].astype(np.float64)
+        I_1 = mp_1[:, k : 2 * k].astype(np.int64)
+
+        T2 = T.copy()
+        T2[0] = np.nan
+        mp_2 = naive.stump(T2, m, row_wise=True, k=k)
+        P_2 = mp_2[:, :k].astype(np.float64)
+        I_2 = mp_2[:, k : 2 * k].astype(np.int64)
+
+        core._merge_topk_PI(P_1, P_2, I_1, I_2)
+        P_ref = P_1[1:]
+        I_ref = I_1[1:]
+
+        # comp
+        mp_comp = naive.stump(T[:-1], m, row_wise=True, k=k)
+        P_comp = mp_comp[:, :k].astype(np.float64)
+        I_comp = mp_comp[:, k : 2 * k].astype(np.int64)
+
+        P_comp[:-1] = P_comp[1:]
+        P_comp[-1] = np.inf
+
+        I_comp[:-1] = I_comp[1:]
+        I_comp[-1] = -1
+
+        D = core.mass(T[-m:], T[1:])
+        core._update_incremental_PI(D, P_comp, I_comp, excl_zone, n_appended=1)
+
+        # check
+        npt.assert_almost_equal(P_ref, P_comp)
+        npt.assert_almost_equal(I_ref, I_comp)