Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix inner data type in interval scorer and unify cost names #50

Merged
merged 11 commits into from
Dec 11, 2024
Merged
2 changes: 1 addition & 1 deletion docs/source/api_reference/costs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ Costs

BaseCost
L2Cost
GaussianVarCost
GaussianCost
GaussianCovCost

Utility functions
Expand Down
3 changes: 1 addition & 2 deletions skchange/anomaly_detectors/anomalisers.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,7 @@ def _fit(self, X: pd.DataFrame, y: Optional[pd.DataFrame] = None):
-------
self : returns a reference to self
"""
self.change_detector_: ChangeDetector
self.change_detector_ = self.change_detector.clone()
self.change_detector_: BaseChangeDetector = self.change_detector.clone()
self.change_detector_.fit(X, y)
return self

Expand Down
17 changes: 8 additions & 9 deletions skchange/anomaly_scores/from_cost.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from typing import Union

import numpy as np
from numpy.typing import ArrayLike

from skchange.anomaly_scores.base import BaseLocalAnomalyScore, BaseSaving
from skchange.costs import BaseCost, L2Cost
Expand Down Expand Up @@ -81,13 +80,13 @@ def get_param_size(self, p: int) -> int:
"""
return self.optimised_cost.get_param_size(p)

def _fit(self, X: ArrayLike, y=None):
def _fit(self, X: np.ndarray, y=None):
"""Fit the saving scorer.

Parameters
----------
X : array-like
Input data.
X : np.ndarray
Data to evaluate. Must be a 2D array.
y : None
Ignored. Included for API consistency by convention.

Expand Down Expand Up @@ -218,13 +217,13 @@ def min_size(self) -> int:
"""Minimum valid size of the interval to evaluate."""
return self.cost.min_size

def _fit(self, X: ArrayLike, y=None):
def _fit(self, X: np.ndarray, y=None):
"""Fit the saving scorer.

Parameters
----------
X : array-like
Input data.
X : np.ndarray
Data to evaluate. Must be a 2D array.
y : None
Ignored. Included for API consistency by convention.

Expand Down Expand Up @@ -339,10 +338,10 @@ def get_test_params(cls, parameter_set="default"):
`MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
`create_test_instance` uses the first (or only) dictionary in `params`
"""
from skchange.costs import GaussianVarCost
from skchange.costs import GaussianCost

params = [
{"cost": L2Cost()},
{"cost": GaussianVarCost()},
{"cost": GaussianCost()},
]
return params
13 changes: 5 additions & 8 deletions skchange/anomaly_scores/l2_saving.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,10 @@
__author__ = ["Tveten"]

import numpy as np
from numpy.typing import ArrayLike

from skchange.anomaly_scores.base import BaseSaving
from skchange.utils.numba import njit
from skchange.utils.numba.stats import col_cumsum
from skchange.utils.validation.data import as_2d_array


@njit
Expand Down Expand Up @@ -80,13 +78,13 @@ def get_param_size(self, p: int) -> int:
"""
return p

def _fit(self, X: ArrayLike, y=None):
def _fit(self, X: np.ndarray, y=None):
"""Fit the saving evaluator.

Parameters
----------
X : array-like
Input data.
X : np.ndarray
Data to evaluate. Must be a 2D array.
y : None
Ignored. Included for API consistency by convention.

Expand All @@ -95,8 +93,7 @@ def _fit(self, X: ArrayLike, y=None):
self :
Reference to self.
"""
X = as_2d_array(X)
self.sums_ = col_cumsum(X, init_zero=True)
self._sums = col_cumsum(X, init_zero=True)
return self

def _evaluate(self, cuts: np.ndarray) -> np.ndarray:
Expand All @@ -120,7 +117,7 @@ def _evaluate(self, cuts: np.ndarray) -> np.ndarray:
"""
starts = cuts[:, 0]
ends = cuts[:, 1]
return l2_saving(starts, ends, self.sums_)
return l2_saving(starts, ends, self._sums)

@classmethod
def get_test_params(cls, parameter_set="default"):
Expand Down
12 changes: 6 additions & 6 deletions skchange/base/base_interval_scorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def fit(self, X, y=None):
Parameters
----------
X : pd.Series, pd.DataFrame, or np.ndarray
Data to score.
Data to evaluate.
y : None
Ignored. Included for API consistency by convention.

Expand All @@ -86,22 +86,22 @@ def fit(self, X, y=None):
Updates the fitted model and sets attributes ending in "_".
"""
X = check_series(X, allow_index_names=True)
self._X = X
self._X = as_2d_array(X)

self._fit(X=X, y=y)
self._fit(X=self._X, y=y)
self._is_fitted = True
return self

def _fit(self, X, y=None):
def _fit(self, X: np.ndarray, y=None):
"""Fit the interval scorer to training data.

The core logic of fitting an interval scorer to training data is implemented
here.

Parameters
----------
X : pd.Series, pd.DataFrame or np.ndarray
Data to score.
X : np.ndarray
Data to evaluate. Must be a 2D array.
y : None
Ignored. Included for API consistency by convention.

Expand Down
2 changes: 1 addition & 1 deletion skchange/base/tests/test_base_interval_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def _evaluate(self, cuts):

def test_fit():
evaluator = ConcreteIntervalEvaluator()
X = np.array([1, 2, 3, 4, 5])
X = np.array([1, 2, 3, 4, 5]).reshape(-1, 1)
evaluator.fit(X)
assert evaluator._is_fitted
assert np.array_equal(evaluator._X, X)
Expand Down
13 changes: 5 additions & 8 deletions skchange/change_scores/cusum.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,10 @@
__author__ = ["Tveten"]

import numpy as np
from numpy.typing import ArrayLike

from skchange.change_scores.base import BaseChangeScore
from skchange.utils.numba import njit
from skchange.utils.numba.stats import col_cumsum
from skchange.utils.validation.data import as_2d_array


@njit
Expand Down Expand Up @@ -74,13 +72,13 @@ def min_size(self) -> int:
"""Minimum size of the interval to evaluate."""
return 1

def _fit(self, X: ArrayLike, y=None):
def _fit(self, X: np.ndarray, y=None):
"""Fit the change score evaluator.

Parameters
----------
X : array-like
Input data.
X : np.ndarray
Data to evaluate. Must be a 2D array.
y : None
Ignored. Included for API consistency by convention.

Expand All @@ -89,8 +87,7 @@ def _fit(self, X: ArrayLike, y=None):
self :
Reference to self.
"""
X = as_2d_array(X)
self.sums_ = col_cumsum(X, init_zero=True)
self._sums = col_cumsum(X, init_zero=True)
return self

def _evaluate(self, cuts: np.ndarray):
Expand All @@ -117,7 +114,7 @@ def _evaluate(self, cuts: np.ndarray):
starts = cuts[:, 0]
splits = cuts[:, 1]
ends = cuts[:, 2]
return cusum_score(starts, ends, splits, self.sums_)
return cusum_score(starts, ends, splits, self._sums)

@classmethod
def get_test_params(cls, parameter_set="default"):
Expand Down
11 changes: 5 additions & 6 deletions skchange/change_scores/from_cost.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from typing import Union

import numpy as np
from numpy.typing import ArrayLike

from skchange.change_scores.base import BaseChangeScore
from skchange.costs.base import BaseCost
Expand Down Expand Up @@ -57,13 +56,13 @@ def min_size(self) -> int:
"""Minimum valid size of an interval to evaluate."""
return self.cost.min_size

def _fit(self, X: ArrayLike, y=None):
def _fit(self, X: np.ndarray, y=None):
"""Fit the change score.

Parameters
----------
X : array-like
Input data.
X : np.ndarray
Data to evaluate. Must be a 2D array.
y : None
Ignored. Included for API consistency by convention.

Expand Down Expand Up @@ -124,10 +123,10 @@ def get_test_params(cls, parameter_set="default"):
`MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
`create_test_instance` uses the first (or only) dictionary in `params`
"""
from skchange.costs import GaussianVarCost, L2Cost
from skchange.costs import GaussianCost, L2Cost

params = [
{"cost": L2Cost()},
{"cost": GaussianVarCost()},
{"cost": GaussianCost()},
]
return params
12 changes: 5 additions & 7 deletions skchange/change_scores/multivariate_gaussian_score.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,10 @@
__all__ = ["MultivariateGaussianScore"]

import numpy as np
from numpy.typing import ArrayLike

from skchange.change_scores.base import BaseChangeScore
from skchange.costs.multivariate_gaussian_cost import MultivariateGaussianCost
from skchange.utils.numba import njit
from skchange.utils.validation.data import as_2d_array


@njit
Expand Down Expand Up @@ -176,13 +174,13 @@ def min_size(self) -> int:
else:
return None

def _fit(self, X: ArrayLike, y=None):
def _fit(self, X: np.ndarray, y=None):
"""Fit the multivariate Gaussian change score evaluator.

Parameters
----------
X : array-like
Input data.
X : np.ndarray
Data to evaluate. Must be a 2D array.
y : None
Ignored. Included for API consistency by convention.

Expand All @@ -191,7 +189,7 @@ def _fit(self, X: ArrayLike, y=None):
self :
Reference to self.
"""
self._cost.fit(as_2d_array(X))
self._cost.fit(X)
return self

def _evaluate(self, cuts: np.ndarray):
Expand Down Expand Up @@ -229,7 +227,7 @@ def _evaluate(self, cuts: np.ndarray):
bartlett_corrections = compute_bartlett_corrections(
sequence_lengths=segment_lengths,
cut_points=segment_splits,
dimension=self._cost.data_dimension_,
dimension=self._X.shape[1],
)
return bartlett_corrections * raw_scores
else:
Expand Down
4 changes: 2 additions & 2 deletions skchange/costs/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Cost functions for cost-based change and anomaly detection."""

from skchange.costs.base import BaseCost
from skchange.costs.gaussian_var_cost import GaussianVarCost
from skchange.costs.gaussian_cost import GaussianCost
from skchange.costs.l2_cost import L2Cost
from skchange.costs.multivariate_gaussian_cost import MultivariateGaussianCost

Expand All @@ -10,7 +10,7 @@
]
COSTS = [
MultivariateGaussianCost,
GaussianVarCost,
GaussianCost,
L2Cost,
]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,12 @@
from typing import Union

import numpy as np
from numpy.typing import ArrayLike

from skchange.costs.base import BaseCost
from skchange.costs.utils import MeanType, VarType, check_mean, check_var
from skchange.utils.numba import njit
from skchange.utils.numba.general import truncate_below
from skchange.utils.numba.stats import col_cumsum
from skchange.utils.validation.data import as_2d_array


@njit
Expand Down Expand Up @@ -119,7 +117,7 @@ def gaussian_var_cost_fixed(
return -log_likelihood


class GaussianVarCost(BaseCost):
class GaussianCost(BaseCost):
"""Univariate Gaussian likelihood cost.

Parameters
Expand Down Expand Up @@ -172,23 +170,22 @@ def get_param_size(self, p: int) -> int:
"""
return 2 * p

def _fit(self, X: ArrayLike, y=None):
def _fit(self, X: np.ndarray, y=None):
"""Fit the cost.

This method precomputes quantities that speed up the cost evaluation.

Parameters
----------
X : array-like
Input data.
X : np.ndarray
Data to evaluate. Must be a 2D array.
y: None
Ignored. Included for API consistency by convention.
"""
X = as_2d_array(X)
self._param = self._check_param(self.param, X)

self.sums_ = col_cumsum(X, init_zero=True)
self.sums2_ = col_cumsum(X**2, init_zero=True)
self._sums = col_cumsum(X, init_zero=True)
self._sums2 = col_cumsum(X**2, init_zero=True)
return self

def _evaluate_optim_param(self, starts: np.ndarray, ends: np.ndarray) -> np.ndarray:
Expand All @@ -207,7 +204,7 @@ def _evaluate_optim_param(self, starts: np.ndarray, ends: np.ndarray) -> np.ndar
A 2D array of costs. One row for each interval. The number of
columns is 1 since the GaussianCovCost is inherently multivariate.
"""
return gaussian_var_cost_optim(starts, ends, self.sums_, self.sums2_)
return gaussian_var_cost_optim(starts, ends, self._sums, self._sums2)

def _evaluate_fixed_param(self, starts, ends):
"""Evaluate the cost for the fixed parameter.
Expand All @@ -226,7 +223,7 @@ def _evaluate_fixed_param(self, starts, ends):
columns is 1 since the GaussianCovCost is inherently multivariate.
"""
mean, var = self._param
return gaussian_var_cost_fixed(starts, ends, self.sums_, self.sums2_, mean, var)
return gaussian_var_cost_fixed(starts, ends, self._sums, self._sums2, mean, var)

@classmethod
def get_test_params(cls, parameter_set="default"):
Expand Down
Loading
Loading