SeldonIO · mauicv · Feb 27, 2023 · Feb 27, 2023 · Feb 27, 2023 · Feb 28, 2023
diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
@@ -0,0 +1,11 @@
+# Linting errors and Docstrings Fixes
+ba8801f3eeb4c462b63d731c4a3d25450d2f81cb
+b68898bfc86bc5c327bd05133f45939a65e482f8
+cb038bcc442ee3d487380b1370536e4ca25858fa
+c088b3076b0d0782c224d2c509dbc902f6ab1328
+25de434a89f1ce3a95855fa37acb3ec9045dd532
+f9783aa4d08aba6878000fc48f821f247b138d7a
+513cbd1bec9abf7fb03ec43230c3ca6da440787d
+4714b38b5456cece1d33d20dfa032102b3feeb62
+ee5bf1d475664cc4611191ef17591f8d9657e7bb
+c8dff32ce8aee3b624e7e2117ddd25159eb714f6
diff --git a/alibi_detect/ad/adversarialae.py b/alibi_detect/ad/adversarialae.py
@@ -291,8 +291,8 @@ def predict(self, X: np.ndarray, batch_size: int = int(1e10), return_instance_sc
  Returns
  -------
  Dictionary containing 'meta' and 'data' dictionaries.
- 'meta' has the model's metadata.
- 'data' contains the adversarial predictions and instance level adversarial scores.
+  - 'meta' has the model's metadata.
+  - 'data' contains the adversarial predictions and instance level adversarial scores.
  """
  adv_score = self.score(X, batch_size=batch_size)
 

diff --git a/alibi_detect/ad/model_distillation.py b/alibi_detect/ad/model_distillation.py
@@ -167,7 +167,6 @@ def score(self, X: np.ndarray, batch_size: int = int(1e10), return_predictions:
  -------
  Array with adversarial scores for each instance in the batch.
  """
-
  # model predictions
  y = predict_batch(X, self.model, batch_size=batch_size)
  y_distilled = predict_batch(X, self.distilled_model, batch_size=batch_size)
@@ -208,8 +207,8 @@ def predict(self, X: np.ndarray, batch_size: int = int(1e10), return_instance_sc
  Returns
  -------
  Dictionary containing 'meta' and 'data' dictionaries.
- 'meta' has the model's metadata.
- 'data' contains the adversarial predictions and instance level adversarial scores.
+  - 'meta' has the model's metadata.
+  - 'data' contains the adversarial predictions and instance level adversarial scores.
  """
  score = self.score(X, batch_size=batch_size)
 

diff --git a/alibi_detect/base.py b/alibi_detect/base.py
@@ -56,7 +56,7 @@ def concept_drift_dict():
 
 
 class BaseDetector(ABC):
- """ Base class for outlier, adversarial and drift detection algorithms. """
+ """Base class for outlier, adversarial and drift detection algorithms."""
 
  def __init__(self):
  self.meta = copy.deepcopy(DEFAULT_META)
@@ -104,9 +104,8 @@ def infer_threshold(self, *args, **kwargs) -> None:
 
 
 class DriftConfigMixin:
- """
- A mixin class containing methods related to a drift detector's configuration dictionary.
- """
+ """A mixin class containing methods related to a drift detector's configuration dictionary."""
+
  config: Optional[dict] = None
 
  def get_config(self) -> dict: # TODO - move to BaseDetector once config save/load implemented for non-drift
@@ -190,9 +189,7 @@ def _set_config(self, inputs: dict): # TODO - move to BaseDetector once config
 
  @property
  def _nested_detector(self):
- """
- The low-level nested detector.
- """
+ """The low-level nested detector."""
  detector = self._detector if hasattr(self, '_detector') else self # type: ignore[attr-defined]
  detector = detector._detector if hasattr(detector, '_detector') else detector # type: ignore[attr-defined]
  return detector
@@ -204,10 +201,12 @@ class Detector(Protocol):
 
  Used for typing legacy save and load functionality in `alibi_detect.saving._tensorflow.saving.py`.
 
- Note:
+ Note
+ ----
  This exists to distinguish between detectors with and without support for config saving and loading. Once all
  detector support this then this protocol will be removed.
  """
+
  meta: Dict
 
  def predict(self) -> Any: ...
@@ -219,6 +218,7 @@ class ConfigurableDetector(Detector, Protocol):
 
  Used for typing save and load functionality in `alibi_detect.saving.saving`.
  """
+
  def get_config(self) -> dict: ...
 
  @classmethod
@@ -233,6 +233,7 @@ class StatefulDetectorOnline(ConfigurableDetector, Protocol):
 
  Used for typing save and load functionality in `alibi_detect.saving.saving`.
  """
+
  t: int = 0
 
  def save_state(self, filepath: Union[str, os.PathLike]): ...

diff --git a/alibi_detect/cd/_domain_clf.py b/alibi_detect/cd/_domain_clf.py
@@ -15,6 +15,7 @@ class _DomainClf(ABC):
  test). They should predict propensity scores (probability of being test instances) as output.
  Classifiers should possess a calibrate method to calibrate the propensity scores.
  """
+
  @abstractmethod
  def __init__(self, *args, **kwargs: dict):
  raise NotImplementedError()

diff --git a/alibi_detect/cd/base.py b/alibi_detect/cd/base.py
@@ -135,10 +135,12 @@ def __init__(
  def preprocess(self, x: Union[np.ndarray, list]) -> Tuple[Union[np.ndarray, list], Union[np.ndarray, list]]:
  """
  Data preprocessing before computing the drift scores.
+
  Parameters
  ----------
  x
  Batch of instances.
+
  Returns
  -------
  Preprocessed reference data and new instances.
@@ -394,10 +396,12 @@ def __init__(
  def preprocess(self, x: Union[np.ndarray, list]) -> Tuple[Union[np.ndarray, list], Union[np.ndarray, list]]:
  """
  Data preprocessing before computing the drift scores.
+
  Parameters
  ----------
  x
  Batch of instances.
+
  Returns
  -------
  Preprocessed reference data and new instances.
@@ -418,17 +422,18 @@ def get_splits(self, x_ref: Union[np.ndarray, list], x: Union[np.ndarray, list])
  """
  Split reference and test data into two splits -- one of which to learn test locations
  and parameters and one to use for tests.
+
  Parameters
  ----------
  x_ref
  Data used as reference distribution.
  x
  Batch of instances.
+
  Returns
  -------
- Tuple containing split train data and tuple containing split test data
+ Tuple containing split train data and tuple containing split test data.
  """
-
  n_ref, n_cur = len(x_ref), len(x)
  perm_ref, perm_cur = np.random.permutation(n_ref), np.random.permutation(n_cur)
  idx_ref_tr, idx_ref_te = perm_ref[:int(n_ref * self.train_size)], perm_ref[int(n_ref * self.train_size):]
@@ -586,10 +591,12 @@ def __init__(
  def preprocess(self, x: Union[np.ndarray, list]) -> Tuple[np.ndarray, np.ndarray]:
  """
  Data preprocessing before computing the drift scores.
+
  Parameters
  ----------
  x
  Batch of instances.
+
  Returns
  -------
  Preprocessed reference data and new instances.
@@ -748,10 +755,12 @@ def __init__(
  def preprocess(self, x: Union[np.ndarray, list]) -> Tuple[np.ndarray, np.ndarray]:
  """
  Data preprocessing before computing the drift scores.
+
  Parameters
  ----------
  x
  Batch of instances.
+
  Returns
  -------
  Preprocessed reference data and new instances.
@@ -1136,10 +1145,12 @@ def __init__(
  def preprocess(self, x: Union[np.ndarray, list]) -> Tuple[np.ndarray, np.ndarray]:
  """
  Data preprocessing before computing the drift scores.
+
  Parameters
  ----------
  x
  Batch of instances.
+
  Returns
  -------
  Preprocessed reference data and new instances.
@@ -1182,9 +1193,9 @@ def predict(self, # type: ignore[override]
  Returns
  -------
  Dictionary containing 'meta' and 'data' dictionaries.
- 'meta' has the model's metadata.
- 'data' contains the drift prediction and optionally the p-value, threshold, conditional MMD test statistic
- and coupling matrices.
+  - 'meta' has the model's metadata.
+  - 'data' contains the drift prediction and optionally the p-value, threshold, conditional MMD test \
+  statistic and coupling matrices.
  """
  # compute drift scores
  p_val, dist, distance_threshold, coupling = self.score(x, c)

diff --git a/alibi_detect/cd/base_online.py b/alibi_detect/cd/base_online.py
@@ -165,9 +165,7 @@ def reset(self) -> None:
  'to its initial state use `reset_state`.', DeprecationWarning)
 
  def reset_state(self) -> None:
- """
- Resets the detector to its initial state (`t=0`). This does not include reconfiguring thresholds.
- """
+ """Resets the detector to its initial state (`t=0`). This does not include reconfiguring thresholds."""
  self._initialise_state()
 
  def predict(self, x_t: Union[np.ndarray, Any], return_test_stat: bool = True,
@@ -185,8 +183,8 @@ def predict(self, x_t: Union[np.ndarray, Any], return_test_stat: bool = True,
  Returns
  -------
  Dictionary containing 'meta' and 'data' dictionaries.
- 'meta' has the model's metadata.
- 'data' contains the drift prediction and optionally the test-statistic and threshold.
+  - 'meta' has the model's metadata.
+  - 'data' contains the drift prediction and optionally the test-statistic and threshold.
  """
  # Compute test stat and check for drift
  test_stat = self.score(x_t)
@@ -422,9 +420,7 @@ def reset(self) -> None:
  'to its initial state use `reset_state`.', DeprecationWarning)
 
  def reset_state(self) -> None:
- """
- Resets the detector to its initial state (`t=0`). This does not include reconfiguring thresholds.
- """
+ """Resets the detector to its initial state (`t=0`). This does not include reconfiguring thresholds."""
  self._initialise_state()
 
  def predict(self, x_t: Union[np.ndarray, Any], return_test_stat: bool = True,

diff --git a/alibi_detect/cd/chisquare.py b/alibi_detect/cd/chisquare.py
@@ -131,7 +131,5 @@ def feature_score(self, x_ref: np.ndarray, x: np.ndarray) -> Tuple[np.ndarray, n
  return p_val, dist
 
  def _get_counts(self, x: np.ndarray, categories: Dict[int, List[int]]) -> Dict[int, List[int]]:
- """
- Utility method for getting the counts of categories for each categorical variable.
- """
+ """Utility method for getting the counts of categories for each categorical variable."""
  return {f: [(x[:, f] == v).sum() for v in vals] for f, vals in categories.items()}
diff --git a/alibi_detect/cd/classifier.py b/alibi_detect/cd/classifier.py
@@ -186,8 +186,7 @@ def __init__(
  def predict(self, x: Union[np.ndarray, list], return_p_val: bool = True,
  return_distance: bool = True, return_probs: bool = True, return_model: bool = True) \
  -> Dict[str, Dict[str, Union[str, int, float, Callable]]]:
- """
- Predict whether a batch of data has drifted from the reference data.
+ """Predict whether a batch of data has drifted from the reference data.
 
  Parameters
  ----------
@@ -207,10 +206,8 @@ def predict(self, x: Union[np.ndarray, list], return_p_val: bool = True,
  Returns
  -------
  Dictionary containing 'meta' and 'data' dictionaries
-
- - 'meta' - has the model's metadata.
-
- - 'data' - contains the drift prediction and optionally the p-value, performance of the classifier \
+ - 'meta' - has the model's metadata.
+ - 'data' - contains the drift prediction and optionally the p-value, performance of the classifier \
  relative to its expectation under the no-change null, the out-of-fold classifier model \
  prediction probabilities on the reference and test data, and the trained model. \
  """

diff --git a/alibi_detect/cd/context_aware.py b/alibi_detect/cd/context_aware.py
@@ -142,9 +142,9 @@ def predict(self, x: Union[np.ndarray, list], c: np.ndarray,
  Returns
  -------
  Dictionary containing 'meta' and 'data' dictionaries.
- 'meta' has the model's metadata.
- 'data' contains the drift prediction and optionally the p-value, threshold, conditional MMD test statistic
- and coupling matrices.
+  - 'meta' has the model's metadata.
+  - 'data' contains the drift prediction and optionally the p-value, threshold, conditional MMD test \
+  statistic and coupling matrices.
  """
  return self._detector.predict(x, c, return_p_val, return_distance, return_coupling)
 
@@ -162,8 +162,8 @@ def score(self, x: Union[np.ndarray, list], c: np.ndarray) -> Tuple[float, float
 
  Returns
  -------
- p-value obtained from the conditional permutation test, the conditional MMD test statistic, the test
- statistic threshold above which drift is flagged, and a tuple containing the coupling matrices
- (W_{ref,ref}, W_{test,test}, W_{ref,test}).
+ p-value obtained from the conditional permutation test, the conditional MMD test statistic, the test \
+ statistic threshold above which drift is flagged, and a tuple containing the coupling matrices \
+ :math:`(W_{ref,ref}, W_{test,test}, W_{ref,test})`.
  """
  return self._detector.score(x, c)
diff --git a/alibi_detect/cd/cvm_online.py b/alibi_detect/cd/cvm_online.py
@@ -25,16 +25,16 @@ def __init__(
  input_shape: Optional[tuple] = None,
  data_type: Optional[str] = None
  ) -> None:
- """
+ r"""
  Online Cramer-von Mises (CVM) data drift detector using preconfigured thresholds, which tests for
  any change in the distribution of continuous univariate data. This detector is an adaption of that
  proposed by :cite:t:`Ross2012a`.
 
  For multivariate data, the detector makes a correction similar to the Bonferroni correction used for
  the offline detector. Given :math:`d` features, the detector configures thresholds by
- targeting the :math:`1-\\beta` quantile of test statistics over the simulated streams, where
- :math:`\\beta = 1 - (1-(1/ERT))^{(1/d)}`. For the univariate case, this simplifies to
- :math:`\\beta = 1/ERT`. At prediction time, drift is flagged if the test statistic of any feature stream
+ targeting the :math:`1-\beta` quantile of test statistics over the simulated streams, where
+ :math:`\beta = 1 - (1-(1/ERT))^{(1/d)}`. For the univariate case, this simplifies to
+ :math:`\beta = 1/ERT`. At prediction time, drift is flagged if the test statistic of any feature stream
  exceed the thresholds.
 
  Note
@@ -99,9 +99,7 @@ def __init__(
  self._configure_ref()
 
  def _configure_ref(self) -> None:
- """
- Configure the reference data.
- """
+ """Configure the reference data."""
  ids_ref_ref = self.x_ref[None, :, :] >= self.x_ref[:, None, :]
  self.ref_cdf_ref = np.sum(ids_ref_ref, axis=0) / self.n
 
@@ -133,7 +131,7 @@ def _configure_thresholds(self) -> None:
  max_stats = np.nanmax(stats, -1)
  # Now loop through each t and find threshold (at each t) that satisfies eqn. (2) in Ross et al.
  thresholds = np.full((t_max, 1), np.nan)
- for t in range(np.min(self.window_sizes)-1, t_max):
+ for t in range(np.min(self.window_sizes) - 1, t_max):
  # Compute (1-beta) quantile of max_stats at a given t, over all streams
  threshold = quantile(max_stats[:, t], 1 - beta)
  # Remove streams for which a change point has already been detected
@@ -164,7 +162,7 @@ def _simulate_streams(self, t_max: int) -> np.ndarray:
 
  # Remove stats prior to windows being full
  for k, ws in enumerate(self.window_sizes):
- stats[:, :ws-1, k] = np.nan
+ stats[:, :ws - 1, k] = np.nan
  return stats
 
  def _update_state(self, x_t: np.ndarray):
@@ -201,9 +199,7 @@ def _update_state(self, x_t: np.ndarray):
  )
 
  def _initialise_state(self) -> None:
- """
- Initialise online state (the stateful attributes updated by `score` and `predict`).
- """
+ """Initialise online state (the stateful attributes updated by `score` and `predict`)."""
  super()._initialise_state()
  self.ids_ref_wins = np.array([])
  self.ids_wins_ref = np.array([])
@@ -266,9 +262,7 @@ def _check_drift(self, test_stats: np.ndarray, thresholds: np.ndarray) -> int:
 
 @nb.njit(parallel=False, cache=True)
 def _normalise_stats(stats: np.ndarray, n: int, ws: int) -> np.ndarray:
- """
- See Eqns 3 & 14 of https://www.projecteuclid.org/euclid.aoms/1177704477.
- """
+ """See Eqns 3 & 14 of https://www.projecteuclid.org/euclid.aoms/1177704477."""
  mu = 1 / 6 + 1 / (6 * (n + ws))
  var_num = (n + ws + 1) * (4 * n * ws * (n + ws) - 3 * (n * n + ws * ws) - 2 * n * ws)
  var_denom = 45 * (n + ws) * (n + ws) * 4 * n * ws
@@ -293,7 +287,7 @@ def _ids_to_stats(
  for b in nb.prange(n_bootstraps):
  ref_cdf_all = np.sum(ids_ref_all[b], axis=0) / n
 
- cumsums = np.zeros((t_max+1, n_all))
+ cumsums = np.zeros((t_max + 1, n_all))
  for i in range(n_all):
  cumsums[1:, i] = np.cumsum(ids_stream_all[b, :, i])
 
@@ -303,8 +297,8 @@ def _ids_to_stats(
  cdf_diffs_on_ref = np.empty_like(win_cdf_ref)
  for j in range(win_cdf_ref.shape[0]): # Need to loop through as can't broadcast in njit parallel
  cdf_diffs_on_ref[j, :] = ref_cdf_all[:n] - win_cdf_ref[j, :]
- stats[b, (ws-1):, k] = np.sum(cdf_diffs_on_ref * cdf_diffs_on_ref, axis=-1)
- for t in range(ws-1, t_max):
+ stats[b, (ws - 1):, k] = np.sum(cdf_diffs_on_ref * cdf_diffs_on_ref, axis=-1)
+ for t in range(ws - 1, t_max):
  win_cdf_win = (cumsums[t + 1, n + t - ws:n + t] -
  cumsums[t + 1 - ws, n + t - ws:n + t]) / ws
  cdf_diffs_on_win = ref_cdf_all[n + t - ws:n + t] - win_cdf_win