DHI · ecomodeller · Feb 8, 2022 · Jan 31, 2022 · Jan 31, 2022 · Jan 31, 2022
diff --git a/tests/test_detectors.py b/tests/test_detectors.py
@@ -2,8 +2,9 @@
 import pytest
 import numpy as np
 import pandas as pd
+import os
 
-from tsod.custom_exceptions import WrongInputDataType
+from tsod.custom_exceptions import WrongInputDataTypeError
 from tsod.detectors import (
     RangeDetector,
     DiffDetector,
@@ -89,7 +90,7 @@ def test_base_detector_exceptions(range_data, range_data_series):
     data_series, _, _ = range_data_series
 
     detector = RangeDetector()
-    pytest.raises(WrongInputDataType, detector.fit, data)
+    pytest.raises(WrongInputDataTypeError, detector.fit, data)
 
 
 def test_range_detector(range_data_series):
@@ -175,7 +176,9 @@ def test_diff_detector_autoset(range_data_series):
 
 
 def test_combined_detector():
-    df = pd.read_csv("tests/data/example.csv", parse_dates=True, index_col=0)
+    path_to_tests_super_folder = os.path.abspath(__file__).split('tests')[0]
+    df = pd.read_csv(os.path.join(path_to_tests_super_folder, 'tests', 'data', 'example.csv'),
+                     parse_dates=True, index_col=0)
     combined = CombinedDetector(
         [
             ConstantValueDetector(),
@@ -229,7 +232,7 @@ def test_hampel_detector(data_series):
     assert all(i in expected_anomalies_indices for i in anomalies_indices)
 
 
-def test_autoencoder_detector(data_series):
+def test_auto_encoder_detector(data_series):
     data_with_anomalies, expected_anomalies_indices, normal_data = data_series
     detector = AutoEncoder(
         hidden_neurons=[1, 1, 1, 1], epochs=1
@@ -239,15 +242,16 @@ def test_autoencoder_detector(data_series):
     anomalies_indices = np.array(np.where(anomalies)).flatten()
     # Validate if the found anomalies are also in the expected anomaly set
     # NB Not necessarily all of them
-    # assert all(i in expected_anomalies_indices for i in anomalies_indices)
+    assert np.mean(np.array([i in expected_anomalies_indices for i in anomalies_indices])) > 0.4
 
 
-def test_autoencoderlstm_detector(data_series):
+def test_auto_encoder_lstm_detector(data_series):
     data_with_anomalies, expected_anomalies_indices, normal_data = data_series
     detector = AutoEncoderLSTM()
     detector.fit(data_with_anomalies)
     anomalies = detector.detect(data_with_anomalies)
     anomalies_indices = np.array(np.where(anomalies)).flatten()
+    assert np.mean(np.array([i in expected_anomalies_indices for i in anomalies_indices])) > 0.01
 
 
 def test_constant_value_detector(constant_data_series):
@@ -371,7 +375,7 @@ def test_create_dataset(data_series):
     data_with_anomalies.name = "y"
     data = data_with_anomalies.to_frame()
     time_steps = 2
-    X, y = create_dataset(data[["y"]], data.y, time_steps)
+    predictors, y = create_dataset(data[["y"]], data.y, time_steps)
     assert len(y) == len(data) - time_steps
-    assert X.shape[0] == len(data) - time_steps
-    assert X.shape[1] == time_steps
+    assert predictors.shape[0] == len(data) - time_steps
+    assert predictors.shape[1] == time_steps
diff --git a/tests/test_mvdetectors.py b/tests/test_mvdetectors.py
@@ -0,0 +1,161 @@
+import pytest
+import pandas as pd
+import numpy as np
+
+from tsod.custom_exceptions import InvalidArgumentError
+from tsod.mvdetectors import MVRangeDetector
+
+
+@pytest.fixture
+def range_data():
+    n_obs = 15
+    normal_data = pd.DataFrame(np.random.uniform(size=[3, n_obs]))
+    normal_data.iloc[2, [2, 8]] = np.nan
+    normal_data.iloc[:, 13] = 1
+    normal_data.iloc[:, 14] = 0
+    abnormal_data = pd.DataFrame(np.random.uniform(size=[3, n_obs]))
+    abnormal_data.iloc[0, [2, 3, 7]] = 5
+    abnormal_data.iloc[1, [2, 12]] = -2
+    abnormal_data.iloc[0, [8]] = np.nan
+    abnormal_data.iloc[2, [8, 9]] = np.nan
+    return normal_data, abnormal_data
+
+
+@pytest.fixture
+def range_data_time_series_specific_ranges():
+    n_obs = 15
+    ts_mins = [-1, -0.5, 0]
+    ts_maxs = [2, 3, 4]
+    normal_data = pd.DataFrame(np.random.uniform(low=ts_mins, high=ts_maxs, size=(n_obs, len(ts_mins))).T)
+    normal_data.iloc[2, [2, 8]] = np.nan
+    normal_data.iloc[:, 13] = ts_mins
+    normal_data.iloc[:, 14] = ts_maxs
+    abnormal_data = pd.DataFrame(np.random.uniform(low=ts_mins, high=ts_maxs, size=(n_obs, len(ts_mins))).T)
+    abnormal_data.iloc[0, [2, 3, 7]] = 5
+    abnormal_data.iloc[1, [2, 12]] = -2
+    abnormal_data.iloc[0, [8]] = np.nan
+    abnormal_data.iloc[2, [8, 9]] = np.nan
+    return normal_data, abnormal_data
+
+
+@pytest.mark.parametrize("detector, expected_anomalies_list", [
+    (MVRangeDetector(min_value=0.0, max_value=1.0),
+     [[False, False, True, True, False, False, False, True, False, False, False, False, False, False, False],
+     [False, False, True, False, False, False, False, False, False, False, False, False, True, False, False],
+     [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False]]),
+    (MVRangeDetector(max_value=1.0),
+     [[False, False, True, True, False, False, False, True, False, False, False, False, False, False, False],
+     [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False],
+     [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False]]),
+    (MVRangeDetector(min_value=0.0),
+     [[False, False, False, False, False, False, False, False, False, False, False, False, False, False, False],
+     [False, False, True, False, False, False, False, False, False, False, False, False, True, False, False],
+     [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False]])
+])
+def test_single_range_detector_detection(range_data, detector, expected_anomalies_list):
+    normal_data, abnormal_data = range_data
+    detected_anomalies = detector.detect(abnormal_data)
+    expected_anomalies = pd.DataFrame(expected_anomalies_list, columns=abnormal_data.columns, index=abnormal_data.index)
+    pd.testing.assert_frame_equal(expected_anomalies, detected_anomalies)
+
+    detected_anomalies = detector.detect(normal_data)
+    assert not detected_anomalies.to_numpy().any()
+
+
+def test_single_range_detector_fitting(range_data):
+    normal_data, abnormal_data = range_data
+    detector = MVRangeDetector()
+    detector.fit(normal_data)
+    detected_anomalies = detector.detect(abnormal_data)
+    expected_anomalies = pd.DataFrame(
+        [[False, False, True, True, False, False, False, True, False, False, False, False, False, False, False],
+         [False, False, True, False, False, False, False, False, False, False, False, False, True, False, False],
+         [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False]],
+        columns=abnormal_data.columns, index=abnormal_data.index)
+    pd.testing.assert_frame_equal(expected_anomalies, detected_anomalies)
+
+    detected_anomalies = detector.detect(normal_data)
+    assert not detected_anomalies.to_numpy().any()
+
+
+@pytest.mark.parametrize("detector, expected_anomalies_list", [
+    (MVRangeDetector(min_value=[0.0, 0.0, 0.0], max_value=1.0),
+     [[False, False, True, True, False, False, False, True, False, False, False, False, False, False, False],
+     [False, False, True, False, False, False, False, False, False, False, False, False, True, False, False],
+     [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False]]),
+    (MVRangeDetector(min_value=0.0, max_value=[1.0, 1.0, 1.0]),
+     [[False, False, True, True, False, False, False, True, False, False, False, False, False, False, False],
+     [False, False, True, False, False, False, False, False, False, False, False, False, True, False, False],
+     [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False]]),
+    (MVRangeDetector(min_value=[0.0, 0.0, 0.0], max_value=[1.0, 1.0, 1.0]),
+     [[False, False, True, True, False, False, False, True, False, False, False, False, False, False, False],
+     [False, False, True, False, False, False, False, False, False, False, False, False, True, False, False],
+     [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False]])
+])
+def test_multi_range_detector_detection(range_data, detector, expected_anomalies_list):
+    normal_data, abnormal_data = range_data
+    detected_anomalies = detector.detect(abnormal_data)
+    expected_anomalies = pd.DataFrame(expected_anomalies_list, columns=abnormal_data.columns, index=abnormal_data.index)
+    pd.testing.assert_frame_equal(expected_anomalies, detected_anomalies)
+
+    detected_anomalies = detector.detect(normal_data)
+
+    assert not detected_anomalies.to_numpy().any()
+
+
+@pytest.mark.parametrize("detector, expected_anomalies_list", [
+    (MVRangeDetector(min_value=[-1, -0.5, 0], max_value=[2, 3, 4]),
+     [[False, False, True, True, False, False, False, True, False, False, False, False, False, False, False],
+     [False, False, True, False, False, False, False, False, False, False, False, False, True, False, False],
+     [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False]]),
+    (MVRangeDetector(max_value=[2, 3, 4]),
+     [[False, False, True, True, False, False, False, True, False, False, False, False, False, False, False],
+     [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False],
+     [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False]]),
+    (MVRangeDetector(min_value=[-1, -0.5, 0]),
+     [[False, False, False, False, False, False, False, False, False, False, False, False, False, False, False],
+     [False, False, True, False, False, False, False, False, False, False, False, False, True, False, False],
+     [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False]])
+])
+def test_multiple_ranges_detector_detection(range_data_time_series_specific_ranges, detector, expected_anomalies_list):
+    normal_data, abnormal_data = range_data_time_series_specific_ranges
+    detected_anomalies = detector.detect(abnormal_data)
+    expected_anomalies = pd.DataFrame(expected_anomalies_list, columns=abnormal_data.columns, index=abnormal_data.index)
+    pd.testing.assert_frame_equal(expected_anomalies, detected_anomalies)
+
+    detected_anomalies = detector.detect(normal_data)
+    assert not detected_anomalies.to_numpy().any()
+
+
+def test_multiple_ranges_detector_fitting(range_data_time_series_specific_ranges):
+    normal_data, abnormal_data = range_data_time_series_specific_ranges
+    detector = MVRangeDetector()
+    detector.fit(normal_data)
+    detected_anomalies = detector.detect(abnormal_data)
+    expected_anomalies = pd.DataFrame(
+        [[False, False, True, True, False, False, False, True, False, False, False, False, False, False, False],
+         [False, False, True, False, False, False, False, False, False, False, False, False, True, False, False],
+         [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False]],
+        columns=abnormal_data.columns, index=abnormal_data.index)
+    pd.testing.assert_frame_equal(expected_anomalies, detected_anomalies)
+
+    detected_anomalies = detector.detect(normal_data)
+    assert not detected_anomalies.to_numpy().any()
+
+
+@pytest.mark.parametrize("min_value, max_value",
+                         [
+                             (3, 2), ([0, 0, 3], 2), ([[0], [0], [0]], 1), (-1, [[0], [0], [0]])
+                         ])
+def test_invalid_argument_raised_min_max(min_value, max_value):
+    with pytest.raises(InvalidArgumentError):
+        MVRangeDetector(min_value=min_value, max_value=max_value)
+
+
+@pytest.mark.parametrize("quantile_prob_cut_offs",
+                         [
+                             ([0.5, 1.1]), ([-0.5, 1.1]), ([-0.5, 0.9])
+                         ])
+def test_invalid_argument_raised_quantiles(quantile_prob_cut_offs):
+    with pytest.raises(InvalidArgumentError):
+        MVRangeDetector(quantiles=quantile_prob_cut_offs)
diff --git a/tests/test_persistence.py b/tests/test_persistence.py
@@ -21,8 +21,8 @@ def test_save_and_load(tmp_path):
 
 
 def test_load():
-
-    filename = os.path.join("tests", "data", "combined.joblib")
+    path_to_tests_super_folder = os.path.abspath(__file__).split('tests')[0]
+    filename = os.path.join(path_to_tests_super_folder, "tests", "data", "combined.joblib")
 
     loaded = tsod.load(filename)
 
@@ -43,4 +43,4 @@ def test_save_and_load_filename(tmpdir):
 
     loaded = tsod.load(filename)
 
-    assert isinstance(loaded, CombinedDetector)
+    assert isinstance(loaded, CombinedDetector)
diff --git a/tsod/base.py b/tsod/base.py
@@ -5,10 +5,9 @@
 import joblib
 
 import pandas as pd
-import numpy as np
 
 
-from .custom_exceptions import WrongInputDataType
+from .custom_exceptions import WrongInputDataTypeError
 
 
 def load(path: Union[str, Path]):
@@ -29,7 +28,7 @@ class Detector(ABC):
     def __init__(self):
         pass
 
-    def fit(self, data: pd.Series):
+    def fit(self, data: Union[pd.Series, pd.DataFrame]):
         """Set detector parameters based on data.
 
         Parameters
@@ -41,11 +40,11 @@ def fit(self, data: pd.Series):
         self._fit(data)
         return self
 
-    def _fit(self, data: pd.Series):
+    def _fit(self, data: Union[pd.Series, pd.DataFrame]):
         # Default implementation is a NoOp
         return self
 
-    def detect(self, data: pd.Series) -> pd.Series:
+    def detect(self, data: Union[pd.Series, pd.DataFrame]) -> Union[pd.Series, pd.DataFrame]:
         """Detect anomalies
 
         Parameters
@@ -63,25 +62,25 @@ def detect(self, data: pd.Series) -> pd.Series:
         pred = self._detect(data)
         return self._postprocess(pred)
 
-    def _postprocess(self, pred: pd.Series) -> pd.Series:
+    def _postprocess(self, pred: Union[pd.Series, pd.DataFrame]) -> pd.Series:
         # TODO implement
         return pred
 
     @abstractmethod
-    def _detect(self, data: pd.Series) -> pd.Series:
-        "Detect anomalies"
-        NotImplementedError()
+    def _detect(self, data: Union[pd.Series, pd.DataFrame]) -> pd.Series:
+        """Detect anomalies"""
+        pass
 
-    def validate(self, data: pd.Series) -> pd.Series:
+    def validate(self, data: Union[pd.Series, pd.DataFrame]) -> Union[pd.Series, pd.DataFrame]:
         """Check that input data is in correct format and possibly adjust"""
-        if not isinstance(data, pd.Series):
-            raise WrongInputDataType()
+        if not (isinstance(data, pd.Series) or isinstance(data, pd.DataFrame)):
+            raise WrongInputDataTypeError()
         return data
 
-    def _gradient(self, data: pd.Series, periods: int = 1) -> pd.Series:
+    def _gradient(self, data: Union[pd.Series, pd.DataFrame], periods: int = 1) -> pd.Series:
         dt = data.index.to_series().diff().dt.total_seconds()
         if dt.min() < 1e-15:
-            raise ValueError("Input must be monotonic increasing")
+            raise ValueError("Index must be monotonically increasing")
 
         gradient = data.diff(periods=periods) / dt
         return gradient

diff --git a/tsod/custom_exceptions.py b/tsod/custom_exceptions.py
@@ -1,4 +1,4 @@
-class WrongInputDataType(Exception):
+class WrongInputDataTypeError(Exception):
     def __init__(self, message="Input data must be a pandas.Series."):
         self.message = message
         super().__init__(self.message)
@@ -15,18 +15,22 @@ def __init__(self, message="Or specify min/max range when instantiating detector
         super().__init__(message)
 
 
-class InvalidArgument(Exception):
+class InvalidArgumentError(Exception):
     def __init__(self, argument_name, requirement):
         self.message = f"{argument_name} must be {requirement}."
         super().__init__(self.message)
 
 
-class NotInteger(InvalidArgument):
+class NotIntegerError(InvalidArgumentError):
     def __init__(self, argument_name):
         super().__init__(argument_name, "an integer")
 
 
-class NonUniqueTimeStamps(Exception):
+class NonUniqueTimeStampsError(Exception):
     def __init__(self, message="Found multiple values at the same time stamp."):
         self.message = message
         super().__init__(self.message)
+
+
+class WrongInputSizeError(ValueError):
+    pass