Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add flake8-docstrings to CI #748

Closed
wants to merge 21 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions .git-blame-ignore-revs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Linting errors and Docstrings Fixes
ba8801f3eeb4c462b63d731c4a3d25450d2f81cb
b68898bfc86bc5c327bd05133f45939a65e482f8
cb038bcc442ee3d487380b1370536e4ca25858fa
c088b3076b0d0782c224d2c509dbc902f6ab1328
25de434a89f1ce3a95855fa37acb3ec9045dd532
f9783aa4d08aba6878000fc48f821f247b138d7a
513cbd1bec9abf7fb03ec43230c3ca6da440787d
4714b38b5456cece1d33d20dfa032102b3feeb62
ee5bf1d475664cc4611191ef17591f8d9657e7bb
c8dff32ce8aee3b624e7e2117ddd25159eb714f6
4 changes: 2 additions & 2 deletions alibi_detect/ad/adversarialae.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,8 +291,8 @@ def predict(self, X: np.ndarray, batch_size: int = int(1e10), return_instance_sc
Returns
-------
Dictionary containing 'meta' and 'data' dictionaries.
'meta' has the model's metadata.
'data' contains the adversarial predictions and instance level adversarial scores.
- 'meta' has the model's metadata.
- 'data' contains the adversarial predictions and instance level adversarial scores.
"""
adv_score = self.score(X, batch_size=batch_size)

Expand Down
5 changes: 2 additions & 3 deletions alibi_detect/ad/model_distillation.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,6 @@ def score(self, X: np.ndarray, batch_size: int = int(1e10), return_predictions:
-------
Array with adversarial scores for each instance in the batch.
"""

# model predictions
y = predict_batch(X, self.model, batch_size=batch_size)
y_distilled = predict_batch(X, self.distilled_model, batch_size=batch_size)
Expand Down Expand Up @@ -208,8 +207,8 @@ def predict(self, X: np.ndarray, batch_size: int = int(1e10), return_instance_sc
Returns
-------
Dictionary containing 'meta' and 'data' dictionaries.
'meta' has the model's metadata.
'data' contains the adversarial predictions and instance level adversarial scores.
- 'meta' has the model's metadata.
- 'data' contains the adversarial predictions and instance level adversarial scores.
"""
score = self.score(X, batch_size=batch_size)

Expand Down
17 changes: 9 additions & 8 deletions alibi_detect/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def concept_drift_dict():


class BaseDetector(ABC):
""" Base class for outlier, adversarial and drift detection algorithms. """
"""Base class for outlier, adversarial and drift detection algorithms."""

def __init__(self):
self.meta = copy.deepcopy(DEFAULT_META)
Expand Down Expand Up @@ -104,9 +104,8 @@ def infer_threshold(self, *args, **kwargs) -> None:


class DriftConfigMixin:
"""
A mixin class containing methods related to a drift detector's configuration dictionary.
"""
"""A mixin class containing methods related to a drift detector's configuration dictionary."""

config: Optional[dict] = None

def get_config(self) -> dict: # TODO - move to BaseDetector once config save/load implemented for non-drift
Expand Down Expand Up @@ -190,9 +189,7 @@ def _set_config(self, inputs: dict): # TODO - move to BaseDetector once config

@property
def _nested_detector(self):
"""
The low-level nested detector.
"""
"""The low-level nested detector."""
detector = self._detector if hasattr(self, '_detector') else self # type: ignore[attr-defined]
detector = detector._detector if hasattr(detector, '_detector') else detector # type: ignore[attr-defined]
return detector
Expand All @@ -204,10 +201,12 @@ class Detector(Protocol):

Used for typing legacy save and load functionality in `alibi_detect.saving._tensorflow.saving.py`.

Note:
Note
----
This exists to distinguish between detectors with and without support for config saving and loading. Once all
detector support this then this protocol will be removed.
"""

meta: Dict

def predict(self) -> Any: ...
Expand All @@ -219,6 +218,7 @@ class ConfigurableDetector(Detector, Protocol):

Used for typing save and load functionality in `alibi_detect.saving.saving`.
"""

def get_config(self) -> dict: ...

@classmethod
Expand All @@ -233,6 +233,7 @@ class StatefulDetectorOnline(ConfigurableDetector, Protocol):

Used for typing save and load functionality in `alibi_detect.saving.saving`.
"""

t: int = 0

def save_state(self, filepath: Union[str, os.PathLike]): ...
Expand Down
1 change: 1 addition & 0 deletions alibi_detect/cd/_domain_clf.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ class _DomainClf(ABC):
test). They should predict propensity scores (probability of being test instances) as output.
Classifiers should possess a calibrate method to calibrate the propensity scores.
"""

@abstractmethod
def __init__(self, *args, **kwargs: dict):
raise NotImplementedError()
Expand Down
21 changes: 16 additions & 5 deletions alibi_detect/cd/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,10 +135,12 @@ def __init__(
def preprocess(self, x: Union[np.ndarray, list]) -> Tuple[Union[np.ndarray, list], Union[np.ndarray, list]]:
"""
Data preprocessing before computing the drift scores.

Parameters
----------
x
Batch of instances.

Returns
-------
Preprocessed reference data and new instances.
Expand Down Expand Up @@ -394,10 +396,12 @@ def __init__(
def preprocess(self, x: Union[np.ndarray, list]) -> Tuple[Union[np.ndarray, list], Union[np.ndarray, list]]:
"""
Data preprocessing before computing the drift scores.

Parameters
----------
x
Batch of instances.

Returns
-------
Preprocessed reference data and new instances.
Expand All @@ -418,17 +422,18 @@ def get_splits(self, x_ref: Union[np.ndarray, list], x: Union[np.ndarray, list])
"""
Split reference and test data into two splits -- one of which to learn test locations
and parameters and one to use for tests.

Parameters
----------
x_ref
Data used as reference distribution.
x
Batch of instances.

Returns
-------
Tuple containing split train data and tuple containing split test data
Tuple containing split train data and tuple containing split test data.
"""

n_ref, n_cur = len(x_ref), len(x)
perm_ref, perm_cur = np.random.permutation(n_ref), np.random.permutation(n_cur)
idx_ref_tr, idx_ref_te = perm_ref[:int(n_ref * self.train_size)], perm_ref[int(n_ref * self.train_size):]
Expand Down Expand Up @@ -586,10 +591,12 @@ def __init__(
def preprocess(self, x: Union[np.ndarray, list]) -> Tuple[np.ndarray, np.ndarray]:
"""
Data preprocessing before computing the drift scores.

Parameters
----------
x
Batch of instances.

Returns
-------
Preprocessed reference data and new instances.
Expand Down Expand Up @@ -748,10 +755,12 @@ def __init__(
def preprocess(self, x: Union[np.ndarray, list]) -> Tuple[np.ndarray, np.ndarray]:
"""
Data preprocessing before computing the drift scores.

Parameters
----------
x
Batch of instances.

Returns
-------
Preprocessed reference data and new instances.
Expand Down Expand Up @@ -1136,10 +1145,12 @@ def __init__(
def preprocess(self, x: Union[np.ndarray, list]) -> Tuple[np.ndarray, np.ndarray]:
"""
Data preprocessing before computing the drift scores.

Parameters
----------
x
Batch of instances.

Returns
-------
Preprocessed reference data and new instances.
Expand Down Expand Up @@ -1182,9 +1193,9 @@ def predict(self, # type: ignore[override]
Returns
-------
Dictionary containing 'meta' and 'data' dictionaries.
'meta' has the model's metadata.
'data' contains the drift prediction and optionally the p-value, threshold, conditional MMD test statistic
and coupling matrices.
- 'meta' has the model's metadata.
- 'data' contains the drift prediction and optionally the p-value, threshold, conditional MMD test \
statistic and coupling matrices.
"""
# compute drift scores
p_val, dist, distance_threshold, coupling = self.score(x, c)
Expand Down
12 changes: 4 additions & 8 deletions alibi_detect/cd/base_online.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,9 +165,7 @@ def reset(self) -> None:
'to its initial state use `reset_state`.', DeprecationWarning)

def reset_state(self) -> None:
"""
Resets the detector to its initial state (`t=0`). This does not include reconfiguring thresholds.
"""
"""Resets the detector to its initial state (`t=0`). This does not include reconfiguring thresholds."""
self._initialise_state()

def predict(self, x_t: Union[np.ndarray, Any], return_test_stat: bool = True,
Expand All @@ -185,8 +183,8 @@ def predict(self, x_t: Union[np.ndarray, Any], return_test_stat: bool = True,
Returns
-------
Dictionary containing 'meta' and 'data' dictionaries.
'meta' has the model's metadata.
'data' contains the drift prediction and optionally the test-statistic and threshold.
- 'meta' has the model's metadata.
- 'data' contains the drift prediction and optionally the test-statistic and threshold.
"""
# Compute test stat and check for drift
test_stat = self.score(x_t)
Expand Down Expand Up @@ -422,9 +420,7 @@ def reset(self) -> None:
'to its initial state use `reset_state`.', DeprecationWarning)

def reset_state(self) -> None:
"""
Resets the detector to its initial state (`t=0`). This does not include reconfiguring thresholds.
"""
"""Resets the detector to its initial state (`t=0`). This does not include reconfiguring thresholds."""
self._initialise_state()

def predict(self, x_t: Union[np.ndarray, Any], return_test_stat: bool = True,
Expand Down
4 changes: 1 addition & 3 deletions alibi_detect/cd/chisquare.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,5 @@ def feature_score(self, x_ref: np.ndarray, x: np.ndarray) -> Tuple[np.ndarray, n
return p_val, dist

def _get_counts(self, x: np.ndarray, categories: Dict[int, List[int]]) -> Dict[int, List[int]]:
"""
Utility method for getting the counts of categories for each categorical variable.
"""
"""Utility method for getting the counts of categories for each categorical variable."""
return {f: [(x[:, f] == v).sum() for v in vals] for f, vals in categories.items()}
9 changes: 3 additions & 6 deletions alibi_detect/cd/classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,8 +186,7 @@ def __init__(
def predict(self, x: Union[np.ndarray, list], return_p_val: bool = True,
return_distance: bool = True, return_probs: bool = True, return_model: bool = True) \
-> Dict[str, Dict[str, Union[str, int, float, Callable]]]:
"""
Predict whether a batch of data has drifted from the reference data.
"""Predict whether a batch of data has drifted from the reference data.

Parameters
----------
Expand All @@ -207,10 +206,8 @@ def predict(self, x: Union[np.ndarray, list], return_p_val: bool = True,
Returns
-------
Dictionary containing 'meta' and 'data' dictionaries

- 'meta' - has the model's metadata.

- 'data' - contains the drift prediction and optionally the p-value, performance of the classifier \
- 'meta' - has the model's metadata.
- 'data' - contains the drift prediction and optionally the p-value, performance of the classifier \
relative to its expectation under the no-change null, the out-of-fold classifier model \
prediction probabilities on the reference and test data, and the trained model. \
"""
Expand Down
12 changes: 6 additions & 6 deletions alibi_detect/cd/context_aware.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,9 +142,9 @@ def predict(self, x: Union[np.ndarray, list], c: np.ndarray,
Returns
-------
Dictionary containing 'meta' and 'data' dictionaries.
'meta' has the model's metadata.
'data' contains the drift prediction and optionally the p-value, threshold, conditional MMD test statistic
and coupling matrices.
- 'meta' has the model's metadata.
- 'data' contains the drift prediction and optionally the p-value, threshold, conditional MMD test \
statistic and coupling matrices.
"""
return self._detector.predict(x, c, return_p_val, return_distance, return_coupling)

Expand All @@ -162,8 +162,8 @@ def score(self, x: Union[np.ndarray, list], c: np.ndarray) -> Tuple[float, float

Returns
-------
p-value obtained from the conditional permutation test, the conditional MMD test statistic, the test
statistic threshold above which drift is flagged, and a tuple containing the coupling matrices
(W_{ref,ref}, W_{test,test}, W_{ref,test}).
p-value obtained from the conditional permutation test, the conditional MMD test statistic, the test \
statistic threshold above which drift is flagged, and a tuple containing the coupling matrices \
:math:`(W_{ref,ref}, W_{test,test}, W_{ref,test})`.
"""
return self._detector.score(x, c)
30 changes: 12 additions & 18 deletions alibi_detect/cd/cvm_online.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,16 @@ def __init__(
input_shape: Optional[tuple] = None,
data_type: Optional[str] = None
) -> None:
"""
r"""
Online Cramer-von Mises (CVM) data drift detector using preconfigured thresholds, which tests for
any change in the distribution of continuous univariate data. This detector is an adaption of that
proposed by :cite:t:`Ross2012a`.

For multivariate data, the detector makes a correction similar to the Bonferroni correction used for
the offline detector. Given :math:`d` features, the detector configures thresholds by
targeting the :math:`1-\\beta` quantile of test statistics over the simulated streams, where
:math:`\\beta = 1 - (1-(1/ERT))^{(1/d)}`. For the univariate case, this simplifies to
:math:`\\beta = 1/ERT`. At prediction time, drift is flagged if the test statistic of any feature stream
targeting the :math:`1-\beta` quantile of test statistics over the simulated streams, where
:math:`\beta = 1 - (1-(1/ERT))^{(1/d)}`. For the univariate case, this simplifies to
:math:`\beta = 1/ERT`. At prediction time, drift is flagged if the test statistic of any feature stream
exceed the thresholds.

Note
Expand Down Expand Up @@ -99,9 +99,7 @@ def __init__(
self._configure_ref()

def _configure_ref(self) -> None:
"""
Configure the reference data.
"""
"""Configure the reference data."""
ids_ref_ref = self.x_ref[None, :, :] >= self.x_ref[:, None, :]
self.ref_cdf_ref = np.sum(ids_ref_ref, axis=0) / self.n

Expand Down Expand Up @@ -133,7 +131,7 @@ def _configure_thresholds(self) -> None:
max_stats = np.nanmax(stats, -1)
# Now loop through each t and find threshold (at each t) that satisfies eqn. (2) in Ross et al.
thresholds = np.full((t_max, 1), np.nan)
for t in range(np.min(self.window_sizes)-1, t_max):
for t in range(np.min(self.window_sizes) - 1, t_max):
# Compute (1-beta) quantile of max_stats at a given t, over all streams
threshold = quantile(max_stats[:, t], 1 - beta)
# Remove streams for which a change point has already been detected
Expand Down Expand Up @@ -164,7 +162,7 @@ def _simulate_streams(self, t_max: int) -> np.ndarray:

# Remove stats prior to windows being full
for k, ws in enumerate(self.window_sizes):
stats[:, :ws-1, k] = np.nan
stats[:, :ws - 1, k] = np.nan
return stats

def _update_state(self, x_t: np.ndarray):
Expand Down Expand Up @@ -201,9 +199,7 @@ def _update_state(self, x_t: np.ndarray):
)

def _initialise_state(self) -> None:
"""
Initialise online state (the stateful attributes updated by `score` and `predict`).
"""
"""Initialise online state (the stateful attributes updated by `score` and `predict`)."""
super()._initialise_state()
self.ids_ref_wins = np.array([])
self.ids_wins_ref = np.array([])
Expand Down Expand Up @@ -266,9 +262,7 @@ def _check_drift(self, test_stats: np.ndarray, thresholds: np.ndarray) -> int:

@nb.njit(parallel=False, cache=True)
def _normalise_stats(stats: np.ndarray, n: int, ws: int) -> np.ndarray:
"""
See Eqns 3 & 14 of https://www.projecteuclid.org/euclid.aoms/1177704477.
"""
"""See Eqns 3 & 14 of https://www.projecteuclid.org/euclid.aoms/1177704477."""
mu = 1 / 6 + 1 / (6 * (n + ws))
var_num = (n + ws + 1) * (4 * n * ws * (n + ws) - 3 * (n * n + ws * ws) - 2 * n * ws)
var_denom = 45 * (n + ws) * (n + ws) * 4 * n * ws
Expand All @@ -293,7 +287,7 @@ def _ids_to_stats(
for b in nb.prange(n_bootstraps):
ref_cdf_all = np.sum(ids_ref_all[b], axis=0) / n

cumsums = np.zeros((t_max+1, n_all))
cumsums = np.zeros((t_max + 1, n_all))
for i in range(n_all):
cumsums[1:, i] = np.cumsum(ids_stream_all[b, :, i])

Expand All @@ -303,8 +297,8 @@ def _ids_to_stats(
cdf_diffs_on_ref = np.empty_like(win_cdf_ref)
for j in range(win_cdf_ref.shape[0]): # Need to loop through as can't broadcast in njit parallel
cdf_diffs_on_ref[j, :] = ref_cdf_all[:n] - win_cdf_ref[j, :]
stats[b, (ws-1):, k] = np.sum(cdf_diffs_on_ref * cdf_diffs_on_ref, axis=-1)
for t in range(ws-1, t_max):
stats[b, (ws - 1):, k] = np.sum(cdf_diffs_on_ref * cdf_diffs_on_ref, axis=-1)
for t in range(ws - 1, t_max):
win_cdf_win = (cumsums[t + 1, n + t - ws:n + t] -
cumsums[t + 1 - ws, n + t - ws:n + t]) / ws
cdf_diffs_on_win = ref_cdf_all[n + t - ws:n + t] - win_cdf_win
Expand Down
Loading