Merge branch 'main' into imbalance

# Conflicts: # aeon/testing/estimator_checking/_yield_estimator_checks.py
aeon-toolkit · Feb 3, 2025 · 2366305 · 2366305
2 parents 460a378 + 874478c
commit 2366305
Show file tree

Hide file tree

Showing 9 changed files with 36 additions and 36 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -29,7 +29,7 @@ repos:
         args: [ "--create", "--python-folders", "aeon" ]
 
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.9.3
+    rev: v0.9.4
     hooks:
       - id: ruff
         args: [ "--fix"]
@@ -41,7 +41,7 @@ repos:
         args: [ "--py39-plus" ]
 
   - repo: https://github.com/pycqa/isort
-    rev: 5.13.2
+    rev: 6.0.0
     hooks:
       - id: isort
         name: isort
@@ -55,7 +55,7 @@ repos:
         args: [ "--max-line-length=88", "--extend-ignore=E203" ]
 
   - repo: https://github.com/psf/black
-    rev: 24.10.0
+    rev: 25.1.0
     hooks:
       - id: black
         language_version: python3

diff --git a/aeon/clustering/averaging/_averaging.py b/aeon/clustering/averaging/_averaging.py
@@ -38,7 +38,7 @@ def mean_average(X: np.ndarray, **kwargs) -> np.ndarray:
 
 
 def _resolve_average_callable(
-    averaging_method: Union[str, Callable[[np.ndarray, dict], np.ndarray]]
+    averaging_method: Union[str, Callable[[np.ndarray, dict], np.ndarray]],
 ) -> Callable[[np.ndarray, dict], np.ndarray]:
     """Resolve a string or callable to a averaging callable.
 

diff --git a/aeon/datasets/_tsad_data_loaders.py b/aeon/datasets/_tsad_data_loaders.py
@@ -269,7 +269,7 @@ def load_from_timeeval_csv_file(path: Path) -> tuple[np.ndarray, np.ndarray]:
 
 
 def load_kdd_tsad_135(
-    split: Literal["train", "test"] = "test"
+    split: Literal["train", "test"] = "test",
 ) -> tuple[np.ndarray, np.ndarray]:
     """Load the KDD-TSAD 135 UCR_Anomaly_Internal_Bleeding16 univariate dataset.
 
@@ -363,7 +363,7 @@ def load_daphnet_s06r02e0() -> tuple[np.ndarray, np.ndarray]:
 def load_ecg_diff_count_3(
     learning_type: Literal[
         "unsupervised", "semi-supervised", "supervised"
-    ] = "unsupervised"
+    ] = "unsupervised",
 ) -> Union[
     tuple[np.ndarray, np.ndarray], tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]
 ]:

diff --git a/aeon/distances/_distance.py b/aeon/distances/_distance.py
@@ -498,7 +498,7 @@ def get_distance_function(method: Union[str, DistanceFunction]) -> DistanceFunct
 
 
 def get_pairwise_distance_function(
-    method: Union[str, PairwiseFunction]
+    method: Union[str, PairwiseFunction],
 ) -> PairwiseFunction:
     """Get the pairwise distance function for a given method string or callable.
 

diff --git a/aeon/testing/estimator_checking/_yield_estimator_checks.py b/aeon/testing/estimator_checking/_yield_estimator_checks.py
@@ -9,7 +9,6 @@
 
 import joblib
 import numpy as np
-from numpy.testing import assert_array_almost_equal
 from sklearn.exceptions import NotFittedError
 
 from aeon.anomaly_detection.base import BaseAnomalyDetector
@@ -625,23 +624,19 @@ def check_persistence_via_pickle(estimator, datatype):
     for method in NON_STATE_CHANGING_METHODS_ARRAYLIKE:
         if hasattr(estimator, method) and callable(getattr(estimator, method)):
             output = _run_estimator_method(estimator, method, datatype, "test")
-            assert_array_almost_equal(
-                output,
-                results[i],
-                err_msg=f"Running {method} after fit twice with test "
-                f"parameters gives different results.",
-            )
+            same, msg = deep_equals(output, results[i], return_msg=True)
+            if not same:
+                raise ValueError(
+                    f"Running {method} after serialisation parameters gives "
+                    f"different results. "
+                    f"{type(estimator)} returns data as {type(output)}: test "
+                    f"equivalence message: {msg}"
+                )
             i += 1
 
 
 def check_fit_deterministic(estimator, datatype):
-    """Test that fit is deterministic.
-
-    Check that calling fit twice is equivalent to calling it once, in terms of the
-    output of non-state changing methods such as predict and transform. Calls
-    fit, then calls all non-state changing methods, then calls fit and non-state
-    changing methods again, checking the output is the same.
-    """
+    """Check that calling fit twice is equivalent to calling it once."""
     estimator = _clone_estimator(estimator, random_state=0)
     _run_estimator_method(estimator, "fit", datatype, "train")
 
@@ -651,17 +646,20 @@ def check_fit_deterministic(estimator, datatype):
             output = _run_estimator_method(estimator, method, datatype, "test")
             results.append(output)
 
-    # run fit and other methods a second time
+    # run fit a second time
     _run_estimator_method(estimator, "fit", datatype, "train")
 
+    # check output of predict/transform etc does not change
     i = 0
     for method in NON_STATE_CHANGING_METHODS_ARRAYLIKE:
         if hasattr(estimator, method) and callable(getattr(estimator, method)):
             output = _run_estimator_method(estimator, method, datatype, "test")
-            assert_array_almost_equal(
-                output,
-                results[i],
-                err_msg=f"Running {method} after fit twice with test "
-                f"parameters gives different results.",
-            )
+            same, msg = deep_equals(output, results[i], return_msg=True)
+            if not same:
+                raise ValueError(
+                    f"Running {method} with test parameters after two calls to fit "
+                    f"gives different results."
+                    f"{type(estimator)} returns data as {type(output)}: test "
+                    f"equivalence message: {msg}"
+                )
             i += 1
diff --git a/aeon/testing/testing_config.py b/aeon/testing/testing_config.py
@@ -23,7 +23,8 @@
 NUMBA_DISABLED = os.environ.get("NUMBA_DISABLE_JIT") == "1"
 
 # exclude estimators here for short term fixes
-EXCLUDE_ESTIMATORS = ["REDCOMETS"]
+# Hydra excluded because it returns a pytorch Tensor
+EXCLUDE_ESTIMATORS = ["REDCOMETS", "HydraTransformer"]
 
 # Exclude specific tests for estimators here
 EXCLUDED_TESTS = {
@@ -49,7 +50,6 @@
     "RSASTClassifier": ["check_fit_deterministic"],
     "SAST": ["check_fit_deterministic"],
     "RSAST": ["check_fit_deterministic"],
-    "SFA": ["check_persistence_via_pickle", "check_fit_deterministic"],
     # missed in legacy testing, changes state in predict/transform
     "FLUSSSegmenter": ["check_non_state_changing_method"],
     "InformationGainSegmenter": ["check_non_state_changing_method"],

diff --git a/aeon/testing/utils/deep_equals.py b/aeon/testing/utils/deep_equals.py
@@ -72,7 +72,7 @@ def _deep_equals(x, y, depth, ignore_index):
         eq = np.isnan(y)
         msg = "" if eq else f"x ({x}) != y ({y}), depth={depth}"
         return eq, msg
-    elif isinstance(x == y, bool):
+    elif isinstance(x == y, (bool, np.bool_)):
         eq = x == y
         msg = "" if eq else f"x ({x}) != y ({y}), depth={depth}"
         return eq, msg
@@ -131,9 +131,11 @@ def _dataframe_equals(x, y, depth, ignore_index):
 def _numpy_equals(x, y, depth):
     if x.dtype != y.dtype:
         return False, f"x.dtype ({x.dtype}) != y.dtype ({y.dtype})"
-
-    eq = np.allclose(x, y, equal_nan=True)
-    msg = "" if eq else f"x ({x}) != y ({y}), depth={depth}"
+    if x.dtype == "object":
+        eq, msg = _deep_equals(x.tolist(), y.tolist(), depth, ignore_index=True)
+    else:
+        eq = np.allclose(x, y, equal_nan=True)
+        msg = "" if eq else f"x ({x}) != y ({y}), depth={depth}"
     return eq, msg
 
 

diff --git a/aeon/testing/utils/tests/test_deep_equals.py b/aeon/testing/utils/tests/test_deep_equals.py
@@ -14,7 +14,7 @@
     42,
     [],
     (()),
-    [([([([()])])])],
+    [[[[()]]]],
     np.array([2, 3, 4]),
     np.array([2, 4, 5]),
     3.5,

diff --git a/docs/conf.py b/docs/conf.py
@@ -388,7 +388,7 @@ def _does_not_start_with_underscore(input_string):
 
             # For case where tag is not included output as not supported
             if not _val or _val is None:
-                data[abbrevation].append("\u274C")
+                data[abbrevation].append("\u274c")
             else:
                 data[abbrevation].append("\u2705")