diff --git a/CHANGELOG.md b/CHANGELOG.md index 4d73015dcee..553d69de415 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,8 +29,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Removed -- - +- Removed deprecated `BinnedAveragePrecision`, `BinnedPrecisionRecallCurve`, `RecallAtFixedPrecision` ([#1251](https://github.com/Lightning-AI/metrics/pull/1251)) +- Removed deprecated `LabelRankingAveragePrecision`, `LabelRankingLoss` and `CoverageError` ([#1251](https://github.com/Lightning-AI/metrics/pull/1251)) +- Removed deprecated `KLDivergence` and `AUC` ([#1251](https://github.com/Lightning-AI/metrics/pull/1251)) ### Fixed @@ -74,6 +75,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Improved performance of retrieval metrics ([#1242](https://github.com/Lightning-AI/metrics/pull/1242)) - Changed `SSIM` and `MSSSIM` update to be online to reduce memory usage ([#1231](https://github.com/Lightning-AI/metrics/pull/1231)) +### Deprecated + +- Deprecated `BinnedAveragePrecision`, `BinnedPrecisionRecallCurve`, `BinnedRecallAtFixedPrecision` ([#1163](https://github.com/Lightning-AI/metrics/pull/1163)) + * `BinnedAveragePrecision` -> use `AveragePrecision` with `thresholds` arg + * `BinnedPrecisionRecallCurve` -> use `AveragePrecisionRecallCurve` with `thresholds` arg + * `BinnedRecallAtFixedPrecision` -> use `RecallAtFixedPrecision` with `thresholds` arg +- Renamed and refactored `LabelRankingAveragePrecision`, `LabelRankingLoss` and `CoverageError` ([#1167](https://github.com/Lightning-AI/metrics/pull/1167)) + * `LabelRankingAveragePrecision` -> `MultilabelRankingAveragePrecision` + * `LabelRankingLoss` -> `MultilabelRankingLoss` + * `CoverageError` -> `MultilabelCoverageError` +- Deprecated `KLDivergence` and `AUC` from classification package ([#1189](https://github.com/Lightning-AI/metrics/pull/1189)) + * `KLDivergence` moved to `regression` package + * Instead of `AUC` use `torchmetrics.utils.compute.auc` ### Fixed diff --git a/docs/source/classification/auc.rst b/docs/source/classification/auc.rst deleted file mode 100644 index 8f95bbd594c..00000000000 --- a/docs/source/classification/auc.rst +++ /dev/null @@ -1,20 +0,0 @@ -.. customcarditem:: - :header: Area Under the Curve (AUC) - :image: https://pl-flash-data.s3.amazonaws.com/assets/thumbnails/tabular_classification.svg - :tags: Classification - -### -AUC -### - -Module Interface -________________ - -.. autoclass:: torchmetrics.AUC - :noindex: - -Functional Interface -____________________ - -.. autofunction:: torchmetrics.functional.auc - :noindex: diff --git a/docs/source/classification/binned_average_precision.rst b/docs/source/classification/binned_average_precision.rst deleted file mode 100644 index 344263052c7..00000000000 --- a/docs/source/classification/binned_average_precision.rst +++ /dev/null @@ -1,14 +0,0 @@ -.. customcarditem:: - :header: Binned Average Precision - :image: https://pl-flash-data.s3.amazonaws.com/assets/thumbnails/tabular_classification.svg - :tags: Classification - -######################## -Binned Average Precision -######################## - -Module Interface -________________ - -.. autoclass:: torchmetrics.BinnedAveragePrecision - :noindex: diff --git a/docs/source/classification/binned_precision_recall_curve.rst b/docs/source/classification/binned_precision_recall_curve.rst deleted file mode 100644 index 6c056068aa9..00000000000 --- a/docs/source/classification/binned_precision_recall_curve.rst +++ /dev/null @@ -1,14 +0,0 @@ -.. customcarditem:: - :header: Binned Precision Recall Curve - :image: https://pl-flash-data.s3.amazonaws.com/assets/thumbnails/tabular_classification.svg - :tags: Classification - -############################# -Binned Precision Recall Curve -############################# - -Module Interface -________________ - -.. autoclass:: torchmetrics.BinnedPrecisionRecallCurve - :noindex: diff --git a/docs/source/classification/binned_recall_fixed_precision.rst b/docs/source/classification/binned_recall_fixed_precision.rst deleted file mode 100644 index 6f169744ae7..00000000000 --- a/docs/source/classification/binned_recall_fixed_precision.rst +++ /dev/null @@ -1,14 +0,0 @@ -.. customcarditem:: - :header: Binned Recall At Fixed Precision - :image: https://pl-flash-data.s3.amazonaws.com/assets/thumbnails/tabular_classification.svg - :tags: Classification - -################################ -Binned Recall At Fixed Precision -################################ - -Module Interface -________________ - -.. autoclass:: torchmetrics.BinnedRecallAtFixedPrecision - :noindex: diff --git a/docs/source/classification/coverage_error.rst b/docs/source/classification/coverage_error.rst index 16db100c474..29f979ab456 100644 --- a/docs/source/classification/coverage_error.rst +++ b/docs/source/classification/coverage_error.rst @@ -10,17 +10,11 @@ Coverage Error Module Interface ________________ -.. autoclass:: torchmetrics.CoverageError - :noindex: - .. autoclass:: torchmetrics.classification.MultilabelCoverageError :noindex: Functional Interface ____________________ -.. autofunction:: torchmetrics.functional.coverage_error - :noindex: - .. autofunction:: torchmetrics.functional.classification.multilabel_coverage_error :noindex: diff --git a/docs/source/classification/label_ranking_average_precision.rst b/docs/source/classification/label_ranking_average_precision.rst index 32f1b0867b5..1f44bfbbfda 100644 --- a/docs/source/classification/label_ranking_average_precision.rst +++ b/docs/source/classification/label_ranking_average_precision.rst @@ -10,9 +10,6 @@ Label Ranking Average Precision Module Interface ________________ -.. autoclass:: torchmetrics.LabelRankingAveragePrecision - :noindex: - .. autoclass:: torchmetrics.classification.MultilabelRankingAveragePrecision :noindex: @@ -20,8 +17,5 @@ ________________ Functional Interface ____________________ -.. autofunction:: torchmetrics.functional.label_ranking_average_precision - :noindex: - .. autofunction:: torchmetrics.functional.classification.multilabel_ranking_average_precision :noindex: diff --git a/docs/source/classification/label_ranking_loss.rst b/docs/source/classification/label_ranking_loss.rst index 168b2c80ceb..ae7f9567a7d 100644 --- a/docs/source/classification/label_ranking_loss.rst +++ b/docs/source/classification/label_ranking_loss.rst @@ -10,18 +10,11 @@ Label Ranking Loss Module Interface ________________ -.. autoclass:: torchmetrics.LabelRankingLoss - :noindex: - - .. autoclass:: torchmetrics.classification.MultilabelRankingLoss :noindex: Functional Interface ____________________ -.. autofunction:: torchmetrics.functional.label_ranking_loss - :noindex: - .. autofunction:: torchmetrics.functional.classification.multilabel_ranking_loss :noindex: diff --git a/docs/source/index.rst b/docs/source/index.rst index f3bc95a8ae9..1977f03e303 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -132,7 +132,6 @@ Or directly from conda pages/overview pages/implement pages/lightning - pages/classification pages/retrieval .. toctree:: diff --git a/docs/source/pages/classification.rst b/docs/source/pages/classification.rst deleted file mode 100644 index 457222b07c0..00000000000 --- a/docs/source/pages/classification.rst +++ /dev/null @@ -1,101 +0,0 @@ -**************************** -Using Classification Metrics -**************************** - -Input types -~~~~~~~~~~~ - -For the purposes of classification metrics, inputs (predictions and targets) are split -into these categories (``N`` stands for the batch size and ``C`` for number of classes): - -.. csv-table:: \*dtype ``binary`` means integers that are either 0 or 1 - :header: "Type", "preds shape", "preds dtype", "target shape", "target dtype" - :widths: 20, 10, 10, 10, 10 - - "Binary", "(N,)", "``float``", "(N,)", "``binary``\*" - "Multi-class", "(N,)", "``int``", "(N,)", "``int``" - "Multi-class with logits or probabilities", "(N, C)", "``float``", "(N,)", "``int``" - "Multi-label", "(N, ...)", "``float``", "(N, ...)", "``binary``\*" - "Multi-dimensional multi-class", "(N, ...)", "``int``", "(N, ...)", "``int``" - "Multi-dimensional multi-class with logits or probabilities", "(N, C, ...)", "``float``", "(N, ...)", "``int``" - -.. note:: - All dimensions of size 1 (except ``N``) are "squeezed out" at the beginning, so - that, for example, a tensor of shape ``(N, 1)`` is treated as ``(N, )``. - -When predictions or targets are integers, it is assumed that class labels start at 0, i.e. -the possible class labels are 0, 1, 2, 3, etc. Below are some examples of different input types - -.. testcode:: - - # Binary inputs - binary_preds = torch.tensor([0.6, 0.1, 0.9]) - binary_target = torch.tensor([1, 0, 2]) - - # Multi-class inputs - mc_preds = torch.tensor([0, 2, 1]) - mc_target = torch.tensor([0, 1, 2]) - - # Multi-class inputs with probabilities - mc_preds_probs = torch.tensor([[0.8, 0.2, 0], [0.1, 0.2, 0.7], [0.3, 0.6, 0.1]]) - mc_target_probs = torch.tensor([0, 1, 2]) - - # Multi-label inputs - ml_preds = torch.tensor([[0.2, 0.8, 0.9], [0.5, 0.6, 0.1], [0.3, 0.1, 0.1]]) - ml_target = torch.tensor([[0, 1, 1], [1, 0, 0], [0, 0, 0]]) - - -Using the multiclass parameter ------------------------------- - -In some cases, you might have inputs which appear to be (multi-dimensional) multi-class -but are actually binary/multi-label - for example, if both predictions and targets are -integer (binary) tensors. Or it could be the other way around, you want to treat -binary/multi-label inputs as 2-class (multi-dimensional) multi-class inputs. - -For these cases, the metrics where this distinction would make a difference, expose the -``multiclass`` argument. Let's see how this is used on the example of -:class:`~torchmetrics.StatScores` metric. - -First, let's consider the case with label predictions with 2 classes, which we want to -treat as binary. - -.. testcode:: - - from torchmetrics.functional import stat_scores - - # These inputs are supposed to be binary, but appear as multi-class - preds = torch.tensor([0, 1, 0]) - target = torch.tensor([1, 1, 0]) - -As you can see below, by default the inputs are treated -as multi-class. We can set ``multiclass=False`` to treat the inputs as binary - -which is the same as converting the predictions to float beforehand. - -.. doctest:: - - >>> stat_scores(preds, target, reduce='macro', num_classes=2) - tensor([[1, 1, 1, 0, 1], - [1, 0, 1, 1, 2]]) - >>> stat_scores(preds, target, reduce='macro', num_classes=1, multiclass=False) - tensor([[1, 0, 1, 1, 2]]) - >>> stat_scores(preds.float(), target, reduce='macro', num_classes=1) - tensor([[1, 0, 1, 1, 2]]) - -Next, consider the opposite example: inputs are binary (as predictions are probabilities), -but we would like to treat them as 2-class multi-class, to obtain the metric for both classes. - -.. testcode:: - - preds = torch.tensor([0.2, 0.7, 0.3]) - target = torch.tensor([1, 1, 0]) - -In this case we can set ``multiclass=True``, to treat the inputs as multi-class. - -.. doctest:: - - >>> stat_scores(preds, target, reduce='macro', num_classes=1) - tensor([[1, 0, 1, 1, 2]]) - >>> stat_scores(preds, target, reduce='macro', num_classes=2, multiclass=True) - tensor([[1, 1, 1, 0, 1], - [1, 0, 1, 1, 2]]) diff --git a/src/torchmetrics/__init__.py b/src/torchmetrics/__init__.py index 8aa046a9938..514b7365ab3 100644 --- a/src/torchmetrics/__init__.py +++ b/src/torchmetrics/__init__.py @@ -21,26 +21,19 @@ SignalNoiseRatio, ) from torchmetrics.classification import ( # noqa: E402 - AUC, AUROC, ROC, Accuracy, AveragePrecision, - BinnedAveragePrecision, - BinnedPrecisionRecallCurve, - BinnedRecallAtFixedPrecision, CalibrationError, CohenKappa, ConfusionMatrix, - CoverageError, Dice, F1Score, FBetaScore, HammingDistance, HingeLoss, JaccardIndex, - LabelRankingAveragePrecision, - LabelRankingLoss, MatthewsCorrCoef, Precision, PrecisionRecallCurve, @@ -113,12 +106,8 @@ __all__ = [ "functional", "Accuracy", - "AUC", "AUROC", "AveragePrecision", - "BinnedAveragePrecision", - "BinnedPrecisionRecallCurve", - "BinnedRecallAtFixedPrecision", "BLEUScore", "BootStrapper", "CalibrationError", @@ -130,7 +119,6 @@ "CohenKappa", "ConfusionMatrix", "CosineSimilarity", - "CoverageError", "Dice", "TweedieDevianceScore", "ErrorRelativeGlobalDimensionlessSynthesis", @@ -142,8 +130,6 @@ "HingeLoss", "JaccardIndex", "KLDivergence", - "LabelRankingAveragePrecision", - "LabelRankingLoss", "MatchErrorRate", "MatthewsCorrCoef", "MaxMetric", diff --git a/src/torchmetrics/classification/__init__.py b/src/torchmetrics/classification/__init__.py index 862a6677655..29185666b5c 100644 --- a/src/torchmetrics/classification/__init__.py +++ b/src/torchmetrics/classification/__init__.py @@ -29,9 +29,7 @@ MultilabelStatScores, StatScores, ) - from torchmetrics.classification.accuracy import Accuracy, BinaryAccuracy, MulticlassAccuracy, MultilabelAccuracy -from torchmetrics.classification.auc import AUC from torchmetrics.classification.auroc import AUROC, BinaryAUROC, MulticlassAUROC, MultilabelAUROC from torchmetrics.classification.average_precision import ( AveragePrecision, @@ -39,11 +37,6 @@ MulticlassAveragePrecision, MultilabelAveragePrecision, ) -from torchmetrics.classification.binned_precision_recall import ( - BinnedAveragePrecision, - BinnedPrecisionRecallCurve, - BinnedRecallAtFixedPrecision, -) from torchmetrics.classification.calibration_error import ( BinaryCalibrationError, CalibrationError, @@ -92,9 +85,6 @@ Recall, ) from torchmetrics.classification.ranking import ( - CoverageError, - LabelRankingAveragePrecision, - LabelRankingLoss, MultilabelCoverageError, MultilabelRankingAveragePrecision, MultilabelRankingLoss, diff --git a/src/torchmetrics/classification/accuracy.py b/src/torchmetrics/classification/accuracy.py index 79051ffe3f2..5f564e1d77a 100644 --- a/src/torchmetrics/classification/accuracy.py +++ b/src/torchmetrics/classification/accuracy.py @@ -350,8 +350,6 @@ class Accuracy(StatScores): changed to subset accuracy (which requires all labels or sub-samples in the sample to be correctly predicted) by setting ``subset_accuracy=True``. - Accepts all input types listed in :ref:`pages/classification:input types`. - Args: num_classes: Number of classes. Necessary for ``'macro'``, ``'weighted'`` and ``None`` average methods. @@ -387,11 +385,10 @@ class Accuracy(StatScores): - ``'samplewise'``: In this case, the statistics are computed separately for each sample on the ``N`` axis, and then averaged over samples. The computation for each sample is done by treating the flattened extra axes ``...`` - (see :ref:`pages/classification:input types`) as the ``N`` dimension within the sample, + as the ``N`` dimension within the sample, and computing the metric for the sample based on that. - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs - (see :ref:`pages/classification:input types`) are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they were ``(N_X, C)``. From here on the ``average`` parameter applies as usual. @@ -409,9 +406,7 @@ class Accuracy(StatScores): multiclass: Used only in certain special cases, where you want to treat inputs as a different type - than what they appear to be. See the parameter's - :ref:`documentation section ` - for a more detailed explanation and examples. + than what they appear to be. subset_accuracy: Whether to compute subset accuracy for multi-label and multi-dimensional @@ -557,9 +552,7 @@ def __init__( self.add_state("total", default=tensor(0), dist_reduce_fx="sum") def update(self, preds: Tensor, target: Tensor) -> None: # type: ignore - """Update state with predictions and targets. See - :ref:`pages/classification:input types` for more information on input - types. + """Update state with predictions and targets. Args: preds: Predictions from model (logits, probabilities, or labels) diff --git a/src/torchmetrics/classification/auc.py b/src/torchmetrics/classification/auc.py deleted file mode 100644 index f1a4ee719e7..00000000000 --- a/src/torchmetrics/classification/auc.py +++ /dev/null @@ -1,84 +0,0 @@ -# Copyright The PyTorch Lightning team. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from typing import Any, List, Optional - -from torch import Tensor - -from torchmetrics.metric import Metric -from torchmetrics.utilities import rank_zero_warn -from torchmetrics.utilities.compute import _auc_compute, _auc_format_inputs -from torchmetrics.utilities.data import dim_zero_cat - - -class AUC(Metric): - r"""Computes Area Under the Curve (AUC) using the trapezoidal rule. - - Forward accepts two input tensors that should be 1D and have the same number - of elements - - .. note:: - This metric has been deprecated in v0.10 and will be removed in v0.11. - - Args: - reorder: AUC expects its first input to be sorted. If this is not the case, - setting this argument to ``True`` will use a stable sorting algorithm to - sort the input in descending order - - kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info. - """ - is_differentiable: bool = False - higher_is_better: Optional[bool] = None - full_state_update: bool = False - x: List[Tensor] - y: List[Tensor] - - def __init__( - self, - reorder: bool = False, - **kwargs: Any, - ) -> None: - super().__init__(**kwargs) - rank_zero_warn( - "`torchmetrics.classification.AUC` has been deprecated in v0.10 and will be removed in v0.11." - "A functional version is still available in `torchmetrics.utilities.compute`", - DeprecationWarning, - ) - - self.reorder = reorder - - self.add_state("x", default=[], dist_reduce_fx="cat") - self.add_state("y", default=[], dist_reduce_fx="cat") - - rank_zero_warn( - "Metric `AUC` will save all targets and predictions in buffer." - " For large datasets this may lead to large memory footprint." - ) - - def update(self, preds: Tensor, target: Tensor) -> None: - """Update state with predictions and targets. - - Args: - preds: Predictions from model (probabilities, or labels) - target: Ground truth labels - """ - x, y = _auc_format_inputs(preds, target) - - self.x.append(x) - self.y.append(y) - - def compute(self) -> Tensor: - """Computes AUC based on inputs passed in to ``update`` previously.""" - x = dim_zero_cat(self.x) - y = dim_zero_cat(self.y) - return _auc_compute(x, y, reorder=self.reorder) diff --git a/src/torchmetrics/classification/binned_precision_recall.py b/src/torchmetrics/classification/binned_precision_recall.py deleted file mode 100644 index d7253527ae3..00000000000 --- a/src/torchmetrics/classification/binned_precision_recall.py +++ /dev/null @@ -1,338 +0,0 @@ -# Copyright The PyTorch Lightning team. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from typing import Any, List, Optional, Tuple, Union - -import torch -from torch import Tensor - -from torchmetrics.functional.classification.average_precision import _average_precision_compute_with_precision_recall -from torchmetrics.metric import Metric -from torchmetrics.utilities.data import METRIC_EPS, to_onehot -from torchmetrics.utilities.prints import rank_zero_warn - - -def _recall_at_precision( - precision: Tensor, - recall: Tensor, - thresholds: Tensor, - min_precision: float, -) -> Tuple[Tensor, Tensor]: - try: - max_recall, _, best_threshold = max( - (r, p, t) for p, r, t in zip(precision, recall, thresholds) if p >= min_precision - ) - - except ValueError: - max_recall = torch.tensor(0.0, device=recall.device, dtype=recall.dtype) - best_threshold = torch.tensor(0) - - if max_recall == 0.0: - best_threshold = torch.tensor(1e6, device=thresholds.device, dtype=thresholds.dtype) - - return max_recall, best_threshold - - -class BinnedPrecisionRecallCurve(Metric): - """Computes precision-recall pairs for different thresholds. Works for both binary and multiclass problems. In - the case of multiclass, the values will be calculated based on a one-vs-the-rest approach. - - Computation is performed in constant-memory by computing precision and recall - for ``thresholds`` buckets/thresholds (evenly distributed between 0 and 1). - - .. warn: - This metric has been deprecated in v0.10 and will be removed in v0.11. - Instead use `PrecisionRecallCurve` metric with the `thresholds` argument set accordingly. - - Forward accepts - - - ``preds`` (float tensor): ``(N, ...)`` (binary) or ``(N, C, ...)`` (multiclass) tensor - with probabilities, where C is the number of classes. - - - ``target`` (long tensor): ``(N, ...)`` or ``(N, C, ...)`` with integer labels - - Args: - num_classes: integer with number of classes. For binary, set to 1. - thresholds: list or tensor with specific thresholds or a number of bins from linear sampling. - It is used for computation will lead to more detailed curve and accurate estimates, - but will be slower and consume more memory. - - kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info. - - Raises: - ValueError: - If ``thresholds`` is not a ``int``, ``list`` or ``tensor`` - - Example (binary case): - >>> from torchmetrics import BinnedPrecisionRecallCurve - >>> pred = torch.tensor([0, 0.1, 0.8, 0.4]) - >>> target = torch.tensor([0, 1, 1, 0]) - >>> pr_curve = BinnedPrecisionRecallCurve(num_classes=1, thresholds=5) - >>> precision, recall, thresholds = pr_curve(pred, target) - >>> precision - tensor([0.5000, 0.5000, 1.0000, 1.0000, 1.0000, 1.0000]) - >>> recall - tensor([1.0000, 0.5000, 0.5000, 0.5000, 0.0000, 0.0000]) - >>> thresholds - tensor([0.0000, 0.2500, 0.5000, 0.7500, 1.0000]) - - Example (multiclass case): - >>> pred = torch.tensor([[0.75, 0.05, 0.05, 0.05, 0.05], - ... [0.05, 0.75, 0.05, 0.05, 0.05], - ... [0.05, 0.05, 0.75, 0.05, 0.05], - ... [0.05, 0.05, 0.05, 0.75, 0.05]]) - >>> target = torch.tensor([0, 1, 3, 2]) - >>> pr_curve = BinnedPrecisionRecallCurve(num_classes=5, thresholds=3) - >>> precision, recall, thresholds = pr_curve(pred, target) - >>> precision - [tensor([0.2500, 1.0000, 1.0000, 1.0000]), - tensor([0.2500, 1.0000, 1.0000, 1.0000]), - tensor([2.5000e-01, 1.0000e-06, 1.0000e+00, 1.0000e+00]), - tensor([2.5000e-01, 1.0000e-06, 1.0000e+00, 1.0000e+00]), - tensor([2.5000e-07, 1.0000e+00, 1.0000e+00, 1.0000e+00])] - >>> recall - [tensor([1.0000, 1.0000, 0.0000, 0.0000]), - tensor([1.0000, 1.0000, 0.0000, 0.0000]), - tensor([1.0000, 0.0000, 0.0000, 0.0000]), - tensor([1.0000, 0.0000, 0.0000, 0.0000]), - tensor([0., 0., 0., 0.])] - >>> thresholds - [tensor([0.0000, 0.5000, 1.0000]), - tensor([0.0000, 0.5000, 1.0000]), - tensor([0.0000, 0.5000, 1.0000]), - tensor([0.0000, 0.5000, 1.0000]), - tensor([0.0000, 0.5000, 1.0000])] - """ - - is_differentiable: bool = False - higher_is_better: Optional[bool] = None - full_state_update: bool = False - TPs: Tensor - FPs: Tensor - FNs: Tensor - - def __init__( - self, - num_classes: int, - thresholds: Union[int, Tensor, List[float]] = 100, - **kwargs: Any, - ) -> None: - rank_zero_warn( - "Metric `BinnedPrecisionRecallCurve` has been deprecated in v0.10 and will be completly removed in v0.11." - " Instead, use the refactored version of `PrecisionRecallCurve` by specifying the `thresholds` argument.", - DeprecationWarning, - ) - super().__init__(**kwargs) - - self.num_classes = num_classes - if isinstance(thresholds, int): - self.num_thresholds = thresholds - self.thresholds = torch.linspace(0, 1.0, thresholds) - - elif thresholds is not None: - if not isinstance(thresholds, (list, Tensor)): - raise ValueError("Expected argument `thresholds` to either be an integer, list of floats or a tensor") - self.thresholds = torch.tensor(thresholds) if isinstance(thresholds, list) else thresholds - self.num_thresholds = self.thresholds.numel() - - for name in ("TPs", "FPs", "FNs"): - self.add_state( - name=name, - default=torch.zeros(num_classes, self.num_thresholds, dtype=torch.float32), - dist_reduce_fx="sum", - ) - - def update(self, preds: Tensor, target: Tensor) -> None: # type: ignore - """ - Args - preds: (n_samples, n_classes) tensor - target: (n_samples, n_classes) tensor - """ - # binary case - if len(preds.shape) == len(target.shape) == 1: - preds = preds.reshape(-1, 1) - target = target.reshape(-1, 1) - - if len(preds.shape) == len(target.shape) + 1: - target = to_onehot(target, num_classes=self.num_classes) - - target = target == 1 - # Iterate one threshold at a time to conserve memory - for i in range(self.num_thresholds): - predictions = preds >= self.thresholds[i] - self.TPs[:, i] += (target & predictions).sum(dim=0) - self.FPs[:, i] += ((~target) & predictions).sum(dim=0) - self.FNs[:, i] += (target & (~predictions)).sum(dim=0) - - def compute(self) -> Union[Tuple[Tensor, Tensor, Tensor], Tuple[List[Tensor], List[Tensor], List[Tensor]]]: - """Returns float tensor of size n_classes.""" - precisions = (self.TPs + METRIC_EPS) / (self.TPs + self.FPs + METRIC_EPS) - recalls = self.TPs / (self.TPs + self.FNs + METRIC_EPS) - - # Need to guarantee that last precision=1 and recall=0, similar to precision_recall_curve - t_ones = torch.ones(self.num_classes, 1, dtype=precisions.dtype, device=precisions.device) - precisions = torch.cat([precisions, t_ones], dim=1) - t_zeros = torch.zeros(self.num_classes, 1, dtype=recalls.dtype, device=recalls.device) - recalls = torch.cat([recalls, t_zeros], dim=1) - if self.num_classes == 1: - return precisions[0, :], recalls[0, :], self.thresholds - return list(precisions), list(recalls), [self.thresholds for _ in range(self.num_classes)] - - -class BinnedAveragePrecision(BinnedPrecisionRecallCurve): - """Computes the average precision score, which summarises the precision recall curve into one number. Works for - both binary and multiclass problems. In the case of multiclass, the values will be calculated based on a one- - vs-the-rest approach. - - Computation is performed in constant-memory by computing precision and recall - for ``thresholds`` buckets/thresholds (evenly distributed between 0 and 1). - - .. warn: - This metric has been deprecated in v0.10 and will be removed in v0.11. - Instead use `AveragePrecision` metric with the `thresholds` argument set accordingly. - - Forward accepts - - - ``preds`` (float tensor): ``(N, ...)`` (binary) or ``(N, C, ...)`` (multiclass) tensor - with probabilities, where C is the number of classes. - - - ``target`` (long tensor): ``(N, ...)`` with integer labels - - Args: - num_classes: integer with number of classes. Not nessesary to provide for binary problems. - thresholds: list or tensor with specific thresholds or a number of bins from linear sampling. - It is used for computation will lead to more detailed curve and accurate estimates, - but will be slower and consume more memory - - kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info. - - Raises: - ValueError: - If ``thresholds`` is not a ``list`` or ``tensor`` - - Example (binary case): - >>> from torchmetrics import BinnedAveragePrecision - >>> pred = torch.tensor([0, 1, 2, 3]) - >>> target = torch.tensor([0, 1, 1, 1]) - >>> average_precision = BinnedAveragePrecision(num_classes=1, thresholds=10) - >>> average_precision(pred, target) - tensor(1.0000) - - Example (multiclass case): - >>> pred = torch.tensor([[0.75, 0.05, 0.05, 0.05, 0.05], - ... [0.05, 0.75, 0.05, 0.05, 0.05], - ... [0.05, 0.05, 0.75, 0.05, 0.05], - ... [0.05, 0.05, 0.05, 0.75, 0.05]]) - >>> target = torch.tensor([0, 1, 3, 2]) - >>> average_precision = BinnedAveragePrecision(num_classes=5, thresholds=10) - >>> average_precision(pred, target) - [tensor(1.0000), tensor(1.0000), tensor(0.2500), tensor(0.2500), tensor(-0.)] - """ - - def __init__( - self, - num_classes: int, - thresholds: Union[int, Tensor, List[float]] = 100, - **kwargs: Any, - ) -> None: - rank_zero_warn( - "Metric `BinnedAveragePrecision` has been deprecated in v0.10 and will be completly removed in v0.11." - " Instead, use the refactored version of `AveragePrecision` by specifying the `thresholds` argument.", - DeprecationWarning, - ) - super().__init__(num_classes=num_classes, thresholds=thresholds, **kwargs) - - def compute(self) -> Union[List[Tensor], Tensor]: # type: ignore - precisions, recalls, _ = super().compute() - return _average_precision_compute_with_precision_recall(precisions, recalls, self.num_classes, average=None) - - -class BinnedRecallAtFixedPrecision(BinnedPrecisionRecallCurve): - """Computes the higest possible recall value given the minimum precision thresholds provided. - - Computation is performed in constant-memory by computing precision and recall - for ``thresholds`` buckets/thresholds (evenly distributed between 0 and 1). - - .. warn: - This metric has been deprecated in v0.10 and will be removed in v0.11. - Instead use `RecallAtFixedPrecision` metric with the `thresholds` argument set accordingly. - - Forward accepts - - - ``preds`` (float tensor): ``(N, ...)`` (binary) or ``(N, C, ...)`` (multiclass) tensor - with probabilities, where C is the number of classes. - - - ``target`` (long tensor): ``(N, ...)`` with integer labels - - Args: - num_classes: integer with number of classes. Provide 1 for binary problems. - min_precision: float value specifying minimum precision threshold. - thresholds: list or tensor with specific thresholds or a number of bins from linear sampling. - It is used for computation will lead to more detailed curve and accurate estimates, - but will be slower and consume more memory - - kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info. - - Raises: - ValueError: - If ``thresholds`` is not a list or tensor - - Example (binary case): - >>> from torchmetrics import BinnedRecallAtFixedPrecision - >>> pred = torch.tensor([0, 0.2, 0.5, 0.8]) - >>> target = torch.tensor([0, 1, 1, 0]) - >>> average_precision = BinnedRecallAtFixedPrecision(num_classes=1, thresholds=10, min_precision=0.5) - >>> average_precision(pred, target) - (tensor(1.0000), tensor(0.1111)) - - Example (multiclass case): - >>> pred = torch.tensor([[0.75, 0.05, 0.05, 0.05, 0.05], - ... [0.05, 0.75, 0.05, 0.05, 0.05], - ... [0.05, 0.05, 0.75, 0.05, 0.05], - ... [0.05, 0.05, 0.05, 0.75, 0.05]]) - >>> target = torch.tensor([0, 1, 3, 2]) - >>> average_precision = BinnedRecallAtFixedPrecision(num_classes=5, thresholds=10, min_precision=0.5) - >>> average_precision(pred, target) - (tensor([1.0000, 1.0000, 0.0000, 0.0000, 0.0000]), - tensor([6.6667e-01, 6.6667e-01, 1.0000e+06, 1.0000e+06, 1.0000e+06])) - """ - - def __init__( - self, - num_classes: int, - min_precision: float, - thresholds: Union[int, Tensor, List[float]] = 100, - **kwargs: Any, - ) -> None: - rank_zero_warn( - "Metric `BinnedRecallAtFixedPrecision` has been deprecated in v0.10 and will be completly removed in v0.11." - " Instead, use the refactored version of `RecallAtFixedPrecision` by specifying the `thresholds` argument.", - DeprecationWarning, - ) - super().__init__(num_classes=num_classes, thresholds=thresholds, **kwargs) - self.min_precision = min_precision - - def compute(self) -> Tuple[Tensor, Tensor]: # type: ignore - """Returns float tensor of size n_classes.""" - precisions, recalls, thresholds = super().compute() - - if self.num_classes == 1: - return _recall_at_precision(precisions, recalls, thresholds, self.min_precision) - - recalls_at_p = torch.zeros(self.num_classes, device=recalls[0].device, dtype=recalls[0].dtype) - thresholds_at_p = torch.zeros(self.num_classes, device=thresholds[0].device, dtype=thresholds[0].dtype) - for i in range(self.num_classes): - recalls_at_p[i], thresholds_at_p[i] = _recall_at_precision( - precisions[i], recalls[i], thresholds[i], self.min_precision - ) - return recalls_at_p, thresholds_at_p diff --git a/src/torchmetrics/classification/dice.py b/src/torchmetrics/classification/dice.py index 771318851b1..9e025dac564 100644 --- a/src/torchmetrics/classification/dice.py +++ b/src/torchmetrics/classification/dice.py @@ -33,7 +33,7 @@ class Dice(StatScores): The reduction method (how the precision scores are aggregated) is controlled by the ``average`` parameter, and additionally by the ``mdmc_average`` parameter in the - multi-dimensional multi-class case. Accepts all inputs listed in :ref:`pages/classification:input types`. + multi-dimensional multi-class case. Args: num_classes: @@ -69,11 +69,11 @@ class Dice(StatScores): - ``'samplewise'``: In this case, the statistics are computed separately for each sample on the ``N`` axis, and then averaged over samples. The computation for each sample is done by treating the flattened extra axes ``...`` - (see :ref:`pages/classification:input types`) as the ``N`` dimension within the sample, + as the ``N`` dimension within the sample, and computing the metric for the sample based on that. - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs - (see :ref:`pages/classification:input types`) are flattened into a new ``N_X`` sample axis, i.e. + are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they were ``(N_X, C)``. From here on the ``average`` parameter applies as usual. @@ -90,9 +90,7 @@ class Dice(StatScores): multiclass: Used only in certain special cases, where you want to treat inputs as a different type - than what they appear to be. See the parameter's - :ref:`documentation section ` - for a more detailed explanation and examples. + than what they appear to be. kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info. diff --git a/src/torchmetrics/classification/f_beta.py b/src/torchmetrics/classification/f_beta.py index dca2d378170..d968810ea9d 100644 --- a/src/torchmetrics/classification/f_beta.py +++ b/src/torchmetrics/classification/f_beta.py @@ -773,11 +773,10 @@ class FBetaScore(StatScores): - ``'samplewise'``: In this case, the statistics are computed separately for each sample on the ``N`` axis, and then averaged over samples. The computation for each sample is done by treating the flattened extra axes ``...`` - (see :ref:`pages/classification:input types`) as the ``N`` dimension within the sample, + as the ``N`` dimension within the sample, and computing the metric for the sample based on that. - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs - (see :ref:`pages/classification:input types`) are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they were ``(N_X, C)``. From here on the ``average`` parameter applies as usual. @@ -795,9 +794,7 @@ class FBetaScore(StatScores): multiclass: Used only in certain special cases, where you want to treat inputs as a different type - than what they appear to be. See the parameter's - :ref:`documentation section ` - for a more detailed explanation and examples. + than what they appear to be. kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info. @@ -958,11 +955,10 @@ class F1Score(FBetaScore): - ``'samplewise'``: In this case, the statistics are computed separately for each sample on the ``N`` axis, and then averaged over samples. The computation for each sample is done by treating the flattened extra axes ``...`` - (see :ref:`pages/classification:input types`) as the ``N`` dimension within the sample, + as the ``N`` dimension within the sample, and computing the metric for the sample based on that. - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs - (see :ref:`pages/classification:input types`) are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they were ``(N_X, C)``. From here on the ``average`` parameter applies as usual. @@ -979,9 +975,7 @@ class F1Score(FBetaScore): multiclass: Used only in certain special cases, where you want to treat inputs as a different type - than what they appear to be. See the parameter's - :ref:`documentation section ` - for a more detailed explanation and examples. + than what they appear to be. kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info. diff --git a/src/torchmetrics/classification/hamming.py b/src/torchmetrics/classification/hamming.py index cd8bf36e291..ef6df5d9f65 100644 --- a/src/torchmetrics/classification/hamming.py +++ b/src/torchmetrics/classification/hamming.py @@ -340,8 +340,6 @@ class HammingDistance(Metric): treats each possible label separately - meaning that, for example, multi-class data is treated as if it were multi-label. - Accepts all input types listed in :ref:`pages/classification:input types`. - Args: threshold: Threshold for transforming probability or logit predictions to binary ``(0,1)`` predictions, in the case @@ -423,8 +421,6 @@ def __init__( def update(self, preds: Tensor, target: Tensor) -> None: # type: ignore """Update state with predictions and targets. - See :ref:`pages/classification:input types` for more information on input types. - Args: preds: Predictions from model (probabilities, logits or labels) target: Ground truth labels diff --git a/src/torchmetrics/classification/kl_divergence.py b/src/torchmetrics/classification/kl_divergence.py deleted file mode 100644 index deaaaa4d303..00000000000 --- a/src/torchmetrics/classification/kl_divergence.py +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright The PyTorch Lightning team. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from typing import Any - -from typing_extensions import Literal - -from torchmetrics.regression.kl_divergence import KLDivergence as _KLDivergence -from torchmetrics.utilities.prints import rank_zero_warn - - -class KLDivergence(_KLDivergence): - r"""Computes the `KL divergence`_: - - .. math:: - D_{KL}(P||Q) = \sum_{x\in\mathcal{X}} P(x) \log\frac{P(x)}{Q{x}} - - Where :math:`P` and :math:`Q` are probability distributions where :math:`P` usually represents a distribution - over data and :math:`Q` is often a prior or approximation of :math:`P`. It should be noted that the KL divergence - is a non-symetrical metric i.e. :math:`D_{KL}(P||Q) \neq D_{KL}(Q||P)`. - - Args: - p: data distribution with shape ``[N, d]`` - q: prior or approximate distribution with shape ``[N, d]`` - log_prob: bool indicating if input is log-probabilities or probabilities. If given as probabilities, - will normalize to make sure the distributes sum to 1. - reduction: - Determines how to reduce over the ``N``/batch dimension: - - - ``'mean'`` [default]: Averages score across samples - - ``'sum'``: Sum score across samples - - ``'none'`` or ``None``: Returns score per sample - - kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info. - - .. note:: - This metric have been moved to the regression package in v0.10 and this version will be removed in v0.11. - - Raises: - TypeError: - If ``log_prob`` is not an ``bool``. - ValueError: - If ``reduction`` is not one of ``'mean'``, ``'sum'``, ``'none'`` or ``None``. - - .. note:: - Half precision is only support on GPU for this metric - - Example: - >>> import torch - >>> from torchmetrics.functional import kl_divergence - >>> p = torch.tensor([[0.36, 0.48, 0.16]]) - >>> q = torch.tensor([[1/3, 1/3, 1/3]]) - >>> kl_divergence(p, q) - tensor(0.0853) - """ - - def __init__( - self, - log_prob: bool = False, - reduction: Literal["mean", "sum", "none", None] = "mean", - **kwargs: Any, - ) -> None: - super().__init__(log_prob, reduction, **kwargs) - rank_zero_warn( - "`torchmetrics.classification.KLDivergence` have been moved to `torchmetrics.regression.KLDivergence`" - " from v0.10 and this version will be removed in v0.11. Please update import paths.", - DeprecationWarning, - ) diff --git a/src/torchmetrics/classification/precision_recall.py b/src/torchmetrics/classification/precision_recall.py index d05a38f3e99..cb2874d1efa 100644 --- a/src/torchmetrics/classification/precision_recall.py +++ b/src/torchmetrics/classification/precision_recall.py @@ -623,7 +623,7 @@ class Precision(StatScores): The reduction method (how the precision scores are aggregated) is controlled by the ``average`` parameter, and additionally by the ``mdmc_average`` parameter in the - multi-dimensional multi-class case. Accepts all inputs listed in :ref:`pages/classification:input types`. + multi-dimensional multi-class case. Args: num_classes: @@ -657,11 +657,11 @@ class Precision(StatScores): - ``'samplewise'``: In this case, the statistics are computed separately for each sample on the ``N`` axis, and then averaged over samples. The computation for each sample is done by treating the flattened extra axes ``...`` - (see :ref:`pages/classification:input types`) as the ``N`` dimension within the sample, + as the ``N`` dimension within the sample, and computing the metric for the sample based on that. - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs - (see :ref:`pages/classification:input types`) are flattened into a new ``N_X`` sample axis, i.e. + are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they were ``(N_X, C)``. From here on the ``average`` parameter applies as usual. @@ -678,9 +678,7 @@ class Precision(StatScores): multiclass: Used only in certain special cases, where you want to treat inputs as a different type - than what they appear to be. See the parameter's - :ref:`documentation section ` - for a more detailed explanation and examples. + than what they appear to be. kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info. @@ -813,7 +811,7 @@ class Recall(StatScores): The reduction method (how the recall scores are aggregated) is controlled by the ``average`` parameter, and additionally by the ``mdmc_average`` parameter in the - multi-dimensional multi-class case. Accepts all inputs listed in :ref:`pages/classification:input types`. + multi-dimensional multi-class case. Args: num_classes: @@ -846,11 +844,10 @@ class Recall(StatScores): - ``'samplewise'``: In this case, the statistics are computed separately for each sample on the ``N`` axis, and then averaged over samples. The computation for each sample is done by treating the flattened extra axes ``...`` - (see :ref:`pages/classification:input types`) as the ``N`` dimension within the sample, + as the ``N`` dimension within the sample, and computing the metric for the sample based on that. - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs - (see :ref:`pages/classification:input types`) are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they were ``(N_X, C)``. From here on the ``average`` parameter applies as usual. @@ -868,9 +865,7 @@ class Recall(StatScores): multiclass: Used only in certain special cases, where you want to treat inputs as a different type - than what they appear to be. See the parameter's - :ref:`documentation section ` - for a more detailed explanation and examples. + than what they appear to be. kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info. diff --git a/src/torchmetrics/classification/ranking.py b/src/torchmetrics/classification/ranking.py index 3b7aba9ba34..863f191fbd1 100644 --- a/src/torchmetrics/classification/ranking.py +++ b/src/torchmetrics/classification/ranking.py @@ -17,12 +17,6 @@ from torch import Tensor from torchmetrics.functional.classification.ranking import ( - _coverage_error_compute, - _coverage_error_update, - _label_ranking_average_precision_compute, - _label_ranking_average_precision_update, - _label_ranking_loss_compute, - _label_ranking_loss_update, _multilabel_confusion_matrix_arg_validation, _multilabel_confusion_matrix_format, _multilabel_coverage_error_update, @@ -243,171 +237,3 @@ def update(self, preds: Tensor, target: Tensor) -> None: # type: ignore def compute(self) -> Tensor: return _ranking_reduce(self.measure, self.total) - - -class CoverageError(Metric): - """Computes multilabel coverage error [1]. The score measure how far we need to go through the ranked scores to - cover all true labels. The best value is equal to the average number of labels in the target tensor per sample. - - Args: - kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info. - - Example: - >>> from torchmetrics import CoverageError - >>> _ = torch.manual_seed(42) - >>> preds = torch.rand(10, 5) - >>> target = torch.randint(2, (10, 5)) - >>> metric = CoverageError() - >>> metric(preds, target) - tensor(3.9000) - - References: - [1] Tsoumakas, G., Katakis, I., & Vlahavas, I. (2010). Mining multi-label data. In Data mining and - knowledge discovery handbook (pp. 667-685). Springer US. - """ - - higher_is_better: bool = False - is_differentiable: bool = False - full_state_update: bool = False - coverage: Tensor - numel: Tensor - weight: Tensor - - def __init__(self, **kwargs: Any) -> None: - super().__init__(**kwargs) - self.add_state("coverage", torch.tensor(0.0), dist_reduce_fx="sum") - self.add_state("numel", torch.tensor(0.0), dist_reduce_fx="sum") - self.add_state("weight", torch.tensor(0.0), dist_reduce_fx="sum") - - def update(self, preds: Tensor, target: Tensor, sample_weight: Optional[Tensor] = None) -> None: # type: ignore - """ - Args: - preds: tensor of shape ``[N,L]`` where ``N`` is the number of samples and ``L`` is the number - of labels. Should either be probabilities of the positive class or corresponding logits - target: tensor of shape ``[N,L]`` where ``N`` is the number of samples and ``L`` is the number - of labels. Should only contain binary labels. - sample_weight: tensor of shape ``N`` where ``N`` is the number of samples. How much each sample - should be weighted in the final score. - """ - coverage, numel, sample_weight = _coverage_error_update(preds, target, sample_weight) - self.coverage += coverage - self.numel += numel - if sample_weight is not None: - self.weight += sample_weight - - def compute(self) -> Tensor: - """Computes the multilabel coverage error.""" - return _coverage_error_compute(self.coverage, self.numel, self.weight) - - -class LabelRankingAveragePrecision(Metric): - """Computes label ranking average precision score for multilabel data [1]. - - The score is the average over each ground truth label assigned to each sample of the ratio of true vs. - total labels with lower score. Best score is 1. - - Args: - kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info. - - Example: - >>> from torchmetrics import LabelRankingAveragePrecision - >>> _ = torch.manual_seed(42) - >>> preds = torch.rand(10, 5) - >>> target = torch.randint(2, (10, 5)) - >>> metric = LabelRankingAveragePrecision() - >>> metric(preds, target) - tensor(0.7744) - - References: - [1] Tsoumakas, G., Katakis, I., & Vlahavas, I. (2010). Mining multi-label data. In Data mining and - knowledge discovery handbook (pp. 667-685). Springer US. - """ - - score: Tensor - numel: Tensor - sample_weight: Tensor - higher_is_better: bool = True - is_differentiable: bool = False - full_state_update: bool = False - - def __init__(self, **kwargs: Any) -> None: - super().__init__(**kwargs) - self.add_state("score", torch.tensor(0.0), dist_reduce_fx="sum") - self.add_state("numel", torch.tensor(0.0), dist_reduce_fx="sum") - self.add_state("sample_weight", torch.tensor(0.0), dist_reduce_fx="sum") - - def update(self, preds: Tensor, target: Tensor, sample_weight: Optional[Tensor] = None) -> None: # type: ignore - """ - Args: - preds: tensor of shape ``[N,L]`` where ``N`` is the number of samples and ``L`` is the number - of labels. Should either be probabilities of the positive class or corresponding logits - target: tensor of shape ``[N,L]`` where ``N`` is the number of samples and ``L`` is the number - of labels. Should only contain binary labels. - sample_weight: tensor of shape ``N`` where ``N`` is the number of samples. How much each sample - should be weighted in the final score. - """ - score, numel, sample_weight = _label_ranking_average_precision_update(preds, target, sample_weight) - self.score += score - self.numel += numel - if sample_weight is not None: - self.sample_weight += sample_weight - - def compute(self) -> Tensor: - """Computes the label ranking average precision score.""" - return _label_ranking_average_precision_compute(self.score, self.numel, self.sample_weight) - - -class LabelRankingLoss(Metric): - """Computes the label ranking loss for multilabel data [1]. The score is corresponds to the average number of - label pairs that are incorrectly ordered given some predictions weighted by the size of the label set and the - number of labels not in the label set. The best score is 0. - - Args: - kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info. - - Example: - >>> from torchmetrics import LabelRankingLoss - >>> _ = torch.manual_seed(42) - >>> preds = torch.rand(10, 5) - >>> target = torch.randint(2, (10, 5)) - >>> metric = LabelRankingLoss() - >>> metric(preds, target) - tensor(0.4167) - - References: - [1] Tsoumakas, G., Katakis, I., & Vlahavas, I. (2010). Mining multi-label data. In Data mining and - knowledge discovery handbook (pp. 667-685). Springer US. - """ - - loss: Tensor - numel: Tensor - sample_weight: Tensor - higher_is_better: bool = False - is_differentiable: bool = False - full_state_update: bool = False - - def __init__(self, **kwargs: Any) -> None: - super().__init__(**kwargs) - self.add_state("loss", torch.tensor(0.0), dist_reduce_fx="sum") - self.add_state("numel", torch.tensor(0.0), dist_reduce_fx="sum") - self.add_state("sample_weight", torch.tensor(0.0), dist_reduce_fx="sum") - - def update(self, preds: Tensor, target: Tensor, sample_weight: Optional[Tensor] = None) -> None: # type: ignore - """ - Args: - preds: tensor of shape ``[N,L]`` where ``N`` is the number of samples and ``L`` is the number - of labels. Should either be probabilities of the positive class or corresponding logits - target: tensor of shape ``[N,L]`` where ``N`` is the number of samples and ``L`` is the number - of labels. Should only contain binary labels. - sample_weight: tensor of shape ``N`` where ``N`` is the number of samples. How much each sample - should be weighted in the final score. - """ - loss, numel, sample_weight = _label_ranking_loss_update(preds, target, sample_weight) - self.loss += loss - self.numel += numel - if sample_weight is not None: - self.sample_weight += sample_weight - - def compute(self) -> Tensor: - """Computes the label ranking loss.""" - return _label_ranking_loss_compute(self.loss, self.numel, self.sample_weight) diff --git a/src/torchmetrics/classification/specificity.py b/src/torchmetrics/classification/specificity.py index d7966653e70..0e618c5fccb 100644 --- a/src/torchmetrics/classification/specificity.py +++ b/src/torchmetrics/classification/specificity.py @@ -314,7 +314,7 @@ class Specificity(StatScores): The reduction method (how the specificity scores are aggregated) is controlled by the ``average`` parameter, and additionally by the ``mdmc_average`` parameter in the - multi-dimensional multi-class case. Accepts all inputs listed in :ref:`pages/classification:input types`. + multi-dimensional multi-class case. Args: num_classes: @@ -348,11 +348,10 @@ class Specificity(StatScores): - ``'samplewise'``: In this case, the statistics are computed separately for each sample on the ``N`` axis, and then averaged over samples. The computation for each sample is done by treating the flattened extra axes ``...`` - (see :ref:`pages/classification:input types`) as the ``N`` dimension within the sample, + as the ``N`` dimension within the sample, and computing the metric for the sample based on that. - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs - (see :ref:`pages/classification:input types`) are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they were ``(N_X, C)``. From here on the ``average`` parameter applies as usual. @@ -371,9 +370,7 @@ class Specificity(StatScores): multiclass: Used only in certain special cases, where you want to treat inputs as a different type - than what they appear to be. See the parameter's - :ref:`documentation section ` - for a more detailed explanation and examples. + than what they appear to be. kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info. diff --git a/src/torchmetrics/classification/stat_scores.py b/src/torchmetrics/classification/stat_scores.py index c358ca63262..53d9b414653 100644 --- a/src/torchmetrics/classification/stat_scores.py +++ b/src/torchmetrics/classification/stat_scores.py @@ -503,7 +503,7 @@ class StatScores(Metric): ``reduce`` parameter, and additionally by the ``mdmc_reduce`` parameter in the multi-dimensional multi-class case. - Accepts all inputs listed in :ref:`pages/classification:input types`. + Args: threshold: @@ -539,7 +539,7 @@ class StatScores(Metric): mdmc_reduce: Defines how the multi-dimensional multi-class inputs are handeled. Should be one of the following: - ``None`` [default]: Should be left unchanged if your data is not multi-dimensional - multi-class (see :ref:`pages/classification:input types` for the definition of input types). + multi-class. - ``'samplewise'``: In this case, the statistics are computed separately for each sample on the ``N`` axis, and then the outputs are concatenated together. In each @@ -553,9 +553,7 @@ class StatScores(Metric): multiclass: Used only in certain special cases, where you want to treat inputs as a different type - than what they appear to be. See the parameter's - :ref:`documentation section ` - for a more detailed explanation and examples. + than what they appear to be. kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info. @@ -690,8 +688,6 @@ def __init__( def update(self, preds: Tensor, target: Tensor) -> None: # type: ignore """Update state with predictions and targets. - See :ref:`pages/classification:input types` for more information on input types. - Args: preds: Predictions from model (probabilities, logits or labels) target: Ground truth values diff --git a/src/torchmetrics/functional/__init__.py b/src/torchmetrics/functional/__init__.py index 44245a2d6cd..ca83c1c5f75 100644 --- a/src/torchmetrics/functional/__init__.py +++ b/src/torchmetrics/functional/__init__.py @@ -15,7 +15,6 @@ from torchmetrics.functional.audio.sdr import scale_invariant_signal_distortion_ratio, signal_distortion_ratio from torchmetrics.functional.audio.snr import scale_invariant_signal_noise_ratio, signal_noise_ratio from torchmetrics.functional.classification.accuracy import accuracy -from torchmetrics.functional.classification.auc import auc from torchmetrics.functional.classification.auroc import auroc from torchmetrics.functional.classification.average_precision import average_precision from torchmetrics.functional.classification.calibration_error import calibration_error @@ -29,11 +28,6 @@ from torchmetrics.functional.classification.matthews_corrcoef import matthews_corrcoef from torchmetrics.functional.classification.precision_recall import precision, precision_recall, recall from torchmetrics.functional.classification.precision_recall_curve import precision_recall_curve -from torchmetrics.functional.classification.ranking import ( - coverage_error, - label_ranking_average_precision, - label_ranking_loss, -) from torchmetrics.functional.classification.roc import roc from torchmetrics.functional.classification.specificity import specificity from torchmetrics.functional.classification.stat_scores import stat_scores @@ -96,7 +90,6 @@ __all__ = [ "accuracy", - "auc", "auroc", "average_precision", "bleu_score", @@ -107,7 +100,6 @@ "cohen_kappa", "confusion_matrix", "cosine_similarity", - "coverage_error", "tweedie_deviance_score", "dice_score", "dice", @@ -121,8 +113,6 @@ "image_gradients", "jaccard_index", "kl_divergence", - "label_ranking_average_precision", - "label_ranking_loss", "match_error_rate", "matthews_corrcoef", "mean_absolute_error", diff --git a/src/torchmetrics/functional/classification/__init__.py b/src/torchmetrics/functional/classification/__init__.py index 82932c0d6e3..e772f93aa80 100644 --- a/src/torchmetrics/functional/classification/__init__.py +++ b/src/torchmetrics/functional/classification/__init__.py @@ -17,7 +17,6 @@ multiclass_accuracy, multilabel_accuracy, ) -from torchmetrics.functional.classification.auc import auc # noqa: F401 from torchmetrics.functional.classification.auroc import ( # noqa: F401 auroc, binary_auroc, @@ -99,9 +98,6 @@ precision_recall_curve, ) from torchmetrics.functional.classification.ranking import ( # noqa: F401 - coverage_error, - label_ranking_average_precision, - label_ranking_loss, multilabel_coverage_error, multilabel_ranking_average_precision, multilabel_ranking_loss, diff --git a/src/torchmetrics/functional/classification/accuracy.py b/src/torchmetrics/functional/classification/accuracy.py index fa641ba59c7..9fa0ddc64d9 100644 --- a/src/torchmetrics/functional/classification/accuracy.py +++ b/src/torchmetrics/functional/classification/accuracy.py @@ -660,8 +660,6 @@ def accuracy( changed to subset accuracy (which requires all labels or sub-samples in the sample to be correctly predicted) by setting ``subset_accuracy=True``. - Accepts all input types listed in :ref:`pages/classification:input types`. - Args: preds: Predictions from model (probabilities, logits or labels) target: Ground truth labels @@ -693,11 +691,11 @@ def accuracy( - ``'samplewise'``: In this case, the statistics are computed separately for each sample on the ``N`` axis, and then averaged over samples. The computation for each sample is done by treating the flattened extra axes ``...`` - (see :ref:`pages/classification:input types`) as the ``N`` dimension within the sample, + as the ``N`` dimension within the sample, and computing the metric for the sample based on that. - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs - (see :ref:`pages/classification:input types`) + are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they were ``(N_X, C)``. From here on the ``average`` parameter applies as usual. @@ -715,9 +713,7 @@ def accuracy( Should be left at default (``None``) for all other types of inputs. multiclass: Used only in certain special cases, where you want to treat inputs as a different type - than what they appear to be. See the parameter's - :ref:`documentation section ` - for a more detailed explanation and examples. + than what they appear to be. ignore_index: Integer specifying a target class to ignore. If given, this class index does not contribute to the returned score, regardless of reduction method. If an index is ignored, and ``average=None`` diff --git a/src/torchmetrics/functional/classification/auc.py b/src/torchmetrics/functional/classification/auc.py deleted file mode 100644 index 4d62697c9ac..00000000000 --- a/src/torchmetrics/functional/classification/auc.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright The PyTorch Lightning team. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import torch -from torch import Tensor - -from torchmetrics.utilities.compute import auc as _auc -from torchmetrics.utilities.prints import rank_zero_warn - - -def auc(x: Tensor, y: Tensor, reorder: bool = False) -> Tensor: - """Computes Area Under the Curve (AUC) using the trapezoidal rule. - - .. note:: - This metric have been moved to `torchmetrics.utilities.compute` in v0.10 this version will be removed in v0.11. - - Args: - x: x-coordinates, must be either increasing or decreasing - y: y-coordinates - reorder: if True, will reorder the arrays to make it either increasing or decreasing - - Return: - Tensor containing AUC score - - Raises: - ValueError: - If both ``x`` and ``y`` tensors are not ``1d``. - ValueError: - If both ``x`` and ``y`` don't have the same numnber of elements. - ValueError: - If ``x`` tesnsor is neither increasing nor decreasing. - - Example: - >>> from torchmetrics.functional import auc - >>> x = torch.tensor([0, 1, 2, 3]) - >>> y = torch.tensor([0, 1, 2, 2]) - >>> auc(x, y) - tensor(4.) - >>> auc(x, y, reorder=True) - tensor(4.) - """ - rank_zero_warn( - "`torchmetrics.functional.auc` has been move to `torchmetrics.utilities.compute` in v0.10" - " and will be removed in v0.11.", - DeprecationWarning, - ) - return _auc(x, y, reorder=reorder) diff --git a/src/torchmetrics/functional/classification/dice.py b/src/torchmetrics/functional/classification/dice.py index 602d4a1d1d4..3449c182913 100644 --- a/src/torchmetrics/functional/classification/dice.py +++ b/src/torchmetrics/functional/classification/dice.py @@ -178,7 +178,7 @@ def dice( The reduction method (how the recall scores are aggregated) is controlled by the ``average`` parameter, and additionally by the ``mdmc_average`` parameter in the - multi-dimensional multi-class case. Accepts all inputs listed in :ref:`pages/classification:input types`. + multi-dimensional multi-class case. Args: preds: Predictions from model (probabilities, logits or labels) @@ -213,11 +213,10 @@ def dice( - ``'samplewise'``: In this case, the statistics are computed separately for each sample on the ``N`` axis, and then averaged over samples. The computation for each sample is done by treating the flattened extra axes ``...`` - (see :ref:`pages/classification:input types`) as the ``N`` dimension within the sample, + as the ``N`` dimension within the sample, and computing the metric for the sample based on that. - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs - (see :ref:`pages/classification:input types`) are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they were ``(N_X, C)``. From here on the ``average`` parameter applies as usual. @@ -240,9 +239,7 @@ def dice( Should be left at default (``None``) for all other types of inputs. multiclass: Used only in certain special cases, where you want to treat inputs as a different type - than what they appear to be. See the parameter's - :ref:`documentation section ` - for a more detailed explanation and examples. + than what they appear to be. Return: The shape of the returned tensor depends on the ``average`` parameter diff --git a/src/torchmetrics/functional/classification/f_beta.py b/src/torchmetrics/functional/classification/f_beta.py index 7448b828ca0..9e142741ec5 100644 --- a/src/torchmetrics/functional/classification/f_beta.py +++ b/src/torchmetrics/functional/classification/f_beta.py @@ -822,7 +822,7 @@ def fbeta_score( The reduction method (how the precision scores are aggregated) is controlled by the ``average`` parameter, and additionally by the ``mdmc_average`` parameter in the - multi-dimensional multi-class case. Accepts all inputs listed in :ref:`pages/classification:input types`. + multi-dimensional multi-class case. Args: preds: Predictions from model (probabilities, logits or labels) @@ -856,10 +856,9 @@ def fbeta_score( - ``'samplewise'``: In this case, the statistics are computed separately for each sample on the ``N`` axis, and then averaged over samples. The computation for each sample is done by treating the flattened extra axes ``...`` - (see :ref:`pages/classification:input types`) as the ``N`` dimension within the sample, + as the ``N`` dimension within the sample, and computing the metric for the sample based on that. - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs - (see :ref:`pages/classification:input types`) are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they were ``(N_X, C)``. From here on the ``average`` parameter applies as usual. @@ -880,9 +879,7 @@ def fbeta_score( Should be left at default (``None``) for all other types of inputs. multiclass: Used only in certain special cases, where you want to treat inputs as a different type - than what they appear to be. See the parameter's - :ref:`documentation section ` - for a more detailed explanation and examples. + than what they appear to be. Return: The shape of the returned tensor depends on the ``average`` parameter @@ -992,7 +989,7 @@ def f1_score( The reduction method (how the precision scores are aggregated) is controlled by the ``average`` parameter, and additionally by the ``mdmc_average`` parameter in the - multi-dimensional multi-class case. Accepts all inputs listed in :ref:`pages/classification:input types`. + multi-dimensional multi-class case. Args: preds: Predictions from model (probabilities, logits or labels) @@ -1026,11 +1023,10 @@ def f1_score( - ``'samplewise'``: In this case, the statistics are computed separately for each sample on the ``N`` axis, and then averaged over samples. The computation for each sample is done by treating the flattened extra axes ``...`` - (see :ref:`pages/classification:input types`) as the ``N`` dimension within the sample, + as the ``N`` dimension within the sample, and computing the metric for the sample based on that. - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs - (see :ref:`pages/classification:input types`) are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they were ``(N_X, C)``. From here on the ``average`` parameter applies as usual. @@ -1054,9 +1050,7 @@ def f1_score( multiclass: Used only in certain special cases, where you want to treat inputs as a different type - than what they appear to be. See the parameter's - :ref:`documentation section ` - for a more detailed explanation and examples. + than what they appear to be. Return: The shape of the returned tensor depends on the ``average`` parameter diff --git a/src/torchmetrics/functional/classification/hamming.py b/src/torchmetrics/functional/classification/hamming.py index 2d0b0ef7f8d..9433777f529 100644 --- a/src/torchmetrics/functional/classification/hamming.py +++ b/src/torchmetrics/functional/classification/hamming.py @@ -462,8 +462,6 @@ def hamming_distance( treats each possible label separately - meaning that, for example, multi-class data is treated as if it were multi-label. - Accepts all input types listed in :ref:`pages/classification:input types`. - Args: preds: Predictions from model (probabilities, logits or labels) target: Ground truth diff --git a/src/torchmetrics/functional/classification/kl_divergence.py b/src/torchmetrics/functional/classification/kl_divergence.py deleted file mode 100644 index 69853be74f4..00000000000 --- a/src/torchmetrics/functional/classification/kl_divergence.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright The PyTorch Lightning team. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from torch import Tensor -from typing_extensions import Literal - -from torchmetrics.functional.regression.kl_divergence import kl_divergence as _kl_divergence -from torchmetrics.utilities.prints import rank_zero_warn - - -def kl_divergence( - p: Tensor, q: Tensor, log_prob: bool = False, reduction: Literal["mean", "sum", "none", None] = "mean" -) -> Tensor: - r"""Computes `KL divergence`_ - - .. math:: - D_{KL}(P||Q) = \sum_{x\in\mathcal{X}} P(x) \log\frac{P(x)}{Q{x}} - - Where :math:`P` and :math:`Q` are probability distributions where :math:`P` usually represents a distribution - over data and :math:`Q` is often a prior or approximation of :math:`P`. It should be noted that the KL divergence - is a non-symetrical metric i.e. :math:`D_{KL}(P||Q) \neq D_{KL}(Q||P)`. - - .. note:: - This metric have been moved to the regression package in v0.10 and this version will be removed in v0.11. - - Args: - p: data distribution with shape ``[N, d]`` - q: prior or approximate distribution with shape ``[N, d]`` - log_prob: bool indicating if input is log-probabilities or probabilities. If given as probabilities, - will normalize to make sure the distributes sum to 1 - reduction: - Determines how to reduce over the ``N``/batch dimension: - - - ``'mean'`` [default]: Averages score across samples - - ``'sum'``: Sum score across samples - - ``'none'`` or ``None``: Returns score per sample - - Example: - >>> import torch - >>> p = torch.tensor([[0.36, 0.48, 0.16]]) - >>> q = torch.tensor([[1/3, 1/3, 1/3]]) - >>> kl_divergence(p, q) - tensor(0.0853) - """ - rank_zero_warn( - "`torchmetrics.functional.classification.kl_divergence` have been moved to" - "`torchmetrics.functional.regression.kl_divergence` from v0.10 and this version will be removed in v0.11." - "Please update import paths.", - DeprecationWarning, - ) - return _kl_divergence(p, q, log_prob, reduction) diff --git a/src/torchmetrics/functional/classification/precision_recall.py b/src/torchmetrics/functional/classification/precision_recall.py index 4fa6801b0c5..8052504c9de 100644 --- a/src/torchmetrics/functional/classification/precision_recall.py +++ b/src/torchmetrics/functional/classification/precision_recall.py @@ -738,7 +738,7 @@ def precision( The reduction method (how the precision scores are aggregated) is controlled by the ``average`` parameter, and additionally by the ``mdmc_average`` parameter in the - multi-dimensional multi-class case. Accepts all inputs listed in :ref:`pages/classification:input types`. + multi-dimensional multi-class case. Args: preds: Predictions from model (probabilities, logits or labels) @@ -771,11 +771,10 @@ def precision( - ``'samplewise'``: In this case, the statistics are computed separately for each sample on the ``N`` axis, and then averaged over samples. The computation for each sample is done by treating the flattened extra axes ``...`` - (see :ref:`pages/classification:input types`) as the ``N`` dimension within the sample, + as the ``N`` dimension within the sample, and computing the metric for the sample based on that. - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs - (see :ref:`pages/classification:input types`) are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they were ``(N_X, C)``. From here on the ``average`` parameter applies as usual. @@ -798,9 +797,7 @@ def precision( Should be left at default (``None``) for all other types of inputs. multiclass: Used only in certain special cases, where you want to treat inputs as a different type - than what they appear to be. See the parameter's - :ref:`documentation section ` - for a more detailed explanation and examples. + than what they appear to be. Return: The shape of the returned tensor depends on the ``average`` parameter @@ -969,7 +966,7 @@ def recall( The reduction method (how the recall scores are aggregated) is controlled by the ``average`` parameter, and additionally by the ``mdmc_average`` parameter in the - multi-dimensional multi-class case. Accepts all inputs listed in :ref:`pages/classification:input types`. + multi-dimensional multi-class case. Args: preds: Predictions from model (probabilities, logits or labels) @@ -1003,11 +1000,10 @@ def recall( - ``'samplewise'``: In this case, the statistics are computed separately for each sample on the ``N`` axis, and then averaged over samples. The computation for each sample is done by treating the flattened extra axes ``...`` - (see :ref:`pages/classification:input types`) as the ``N`` dimension within the sample, + as the ``N`` dimension within the sample, and computing the metric for the sample based on that. - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs - (see :ref:`pages/classification:input types`) are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they were ``(N_X, C)``. From here on the ``average`` parameter applies as usual. @@ -1030,9 +1026,7 @@ def recall( Should be left at default (``None``) for all other types of inputs. multiclass: Used only in certain special cases, where you want to treat inputs as a different type - than what they appear to be. See the parameter's - :ref:`documentation section ` - for a more detailed explanation and examples. + than what they appear to be. Return: The shape of the returned tensor depends on the ``average`` parameter @@ -1139,7 +1133,7 @@ def precision_recall( The reduction method (how the recall scores are aggregated) is controlled by the ``average`` parameter, and additionally by the ``mdmc_average`` parameter in the - multi-dimensional multi-class case. Accepts all inputs listed in :ref:`pages/classification:input types`. + multi-dimensional multi-class case. Args: preds: Predictions from model (probabilities, logits or labels) @@ -1172,11 +1166,10 @@ def precision_recall( - ``'samplewise'``: In this case, the statistics are computed separately for each sample on the ``N`` axis, and then averaged over samples. The computation for each sample is done by treating the flattened extra axes ``...`` - (see :ref:`pages/classification:input types`) as the ``N`` dimension within the sample, + as the ``N`` dimension within the sample, and computing the metric for the sample based on that. - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs - (see :ref:`pages/classification:input types`) are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they were ``(N_X, C)``. From here on the ``average`` parameter applies as usual. @@ -1197,9 +1190,7 @@ def precision_recall( Should be left at default (``None``) for all other types of inputs. multiclass: Used only in certain special cases, where you want to treat inputs as a different type - than what they appear to be. See the parameter's - :ref:`documentation section ` - for a more detailed explanation and examples. + than what they appear to be. Return: The function returns a tuple with two elements: precision and recall. Their shape diff --git a/src/torchmetrics/functional/classification/ranking.py b/src/torchmetrics/functional/classification/ranking.py index 635d6ac1b8a..51b941c9163 100644 --- a/src/torchmetrics/functional/classification/ranking.py +++ b/src/torchmetrics/functional/classification/ranking.py @@ -256,219 +256,3 @@ def multilabel_ranking_loss( ) loss, n_elements = _multilabel_ranking_loss_update(preds, target) return _ranking_reduce(loss, n_elements) - - -def _check_ranking_input(preds: Tensor, target: Tensor, sample_weight: Optional[Tensor] = None) -> Tensor: - """Check that ranking input have the correct dimensions.""" - if preds.ndim != 2 or target.ndim != 2: - raise ValueError( - "Expected both predictions and target to matrices of shape `[N,C]`" - f" but got {preds.ndim} and {target.ndim}" - ) - if preds.shape != target.shape: - raise ValueError("Expected both predictions and target to have same shape") - if sample_weight is not None: - if sample_weight.ndim != 1 or sample_weight.shape[0] != preds.shape[0]: - raise ValueError( - "Expected sample weights to be 1 dimensional and have same size" - f" as the first dimension of preds and target but got {sample_weight.shape}" - ) - - -def _coverage_error_update( - preds: Tensor, target: Tensor, sample_weight: Optional[Tensor] = None -) -> Tuple[Tensor, int, Optional[Tensor]]: - """Accumulate state for coverage error - Args: - preds: tensor with predictions - target: tensor with ground truth labels - sample_weight: optional tensor with weight for each sample - - """ - _check_ranking_input(preds, target, sample_weight) - offset = torch.zeros_like(preds) - offset[target == 0] = preds.min().abs() + 10 # Any number >1 works - preds_mod = preds + offset - preds_min = preds_mod.min(dim=1)[0] - coverage = (preds >= preds_min[:, None]).sum(dim=1).to(torch.float32) - if isinstance(sample_weight, Tensor): - coverage *= sample_weight - sample_weight = sample_weight.sum() - return coverage.sum(), coverage.numel(), sample_weight - - -def _coverage_error_compute(coverage: Tensor, n_elements: int, sample_weight: Optional[Tensor] = None) -> Tensor: - if sample_weight is not None and sample_weight != 0.0: - return coverage / sample_weight - return coverage / n_elements - - -def coverage_error(preds: Tensor, target: Tensor, sample_weight: Optional[Tensor] = None) -> Tensor: - """Computes multilabel coverage error [1]. The score measure how far we need to go through the ranked scores to - cover all true labels. The best value is equal to the average number of labels in the target tensor per sample. - - Args: - preds: tensor of shape ``[N,L]`` where ``N`` is the number of samples and ``L`` is the number - of labels. Should either be probabilities of the positive class or corresponding logits - target: tensor of shape ``[N,L]`` where ``N`` is the number of samples and ``L`` is the number - of labels. Should only contain binary labels. - sample_weight: tensor of shape ``N`` where ``N`` is the number of samples. How much each sample - should be weighted in the final score. - - Example: - >>> from torchmetrics.functional import coverage_error - >>> _ = torch.manual_seed(42) - >>> preds = torch.rand(10, 5) - >>> target = torch.randint(2, (10, 5)) - >>> coverage_error(preds, target) - tensor(3.9000) - - References: - [1] Tsoumakas, G., Katakis, I., & Vlahavas, I. (2010). Mining multi-label data. In Data mining and - knowledge discovery handbook (pp. 667-685). Springer US. - """ - coverage, n_elements, sample_weight = _coverage_error_update(preds, target, sample_weight) - return _coverage_error_compute(coverage, n_elements, sample_weight) - - -def _label_ranking_average_precision_update( - preds: Tensor, target: Tensor, sample_weight: Optional[Tensor] = None -) -> Tuple[Tensor, int, Optional[Tensor]]: - """Accumulate state for label ranking average precision. - - Args: - preds: tensor with predictions - target: tensor with ground truth labels - sample_weight: optional tensor with weight for each sample - """ - _check_ranking_input(preds, target, sample_weight) - # Invert so that the highest score receives rank 1 - neg_preds = -preds - - score = torch.tensor(0.0, device=neg_preds.device) - n_preds, n_labels = neg_preds.shape - for i in range(n_preds): - relevant = target[i] == 1 - ranking = _rank_data(neg_preds[i][relevant]).float() - if len(ranking) > 0 and len(ranking) < n_labels: - rank = _rank_data(neg_preds[i])[relevant].float() - score_idx = (ranking / rank).mean() - else: - score_idx = 1.0 - - if sample_weight is not None: - score_idx *= sample_weight[i] - - score += score_idx - - return score, n_preds, sample_weight.sum() if isinstance(sample_weight, Tensor) else sample_weight - - -def _label_ranking_average_precision_compute( - score: Tensor, n_elements: int, sample_weight: Optional[Tensor] = None -) -> Tensor: - """Computes the final label ranking average precision score.""" - if sample_weight is not None and sample_weight != 0.0: - return score / sample_weight - return score / n_elements - - -def label_ranking_average_precision(preds: Tensor, target: Tensor, sample_weight: Optional[Tensor] = None) -> Tensor: - """Computes label ranking average precision score for multilabel data [1]. The score is the average over each - ground truth label assigned to each sample of the ratio of true vs. total labels with lower score. Best score - is 1. - - Args: - preds: tensor of shape ``[N,L]`` where ``N`` is the number of samples and ``L`` is the number - of labels. Should either be probabilities of the positive class or corresponding logits - target: tensor of shape ``[N,L]`` where ``N`` is the number of samples and ``L`` is the number - of labels. Should only contain binary labels. - sample_weight: tensor of shape ``N`` where ``N`` is the number of samples. How much each sample - should be weighted in the final score. - - Example: - >>> from torchmetrics.functional import label_ranking_average_precision - >>> _ = torch.manual_seed(42) - >>> preds = torch.rand(10, 5) - >>> target = torch.randint(2, (10, 5)) - >>> label_ranking_average_precision(preds, target) - tensor(0.7744) - - References: - [1] Tsoumakas, G., Katakis, I., & Vlahavas, I. (2010). Mining multi-label data. In Data mining and - knowledge discovery handbook (pp. 667-685). Springer US. - """ - score, n_elements, sample_weight = _label_ranking_average_precision_update(preds, target, sample_weight) - return _label_ranking_average_precision_compute(score, n_elements, sample_weight) - - -def _label_ranking_loss_update( - preds: Tensor, target: Tensor, sample_weight: Optional[Tensor] = None -) -> Tuple[Tensor, int, Optional[Tensor]]: - """Accumulate state for label ranking loss. - - Args: - preds: tensor with predictions - target: tensor with ground truth labels - sample_weight: optional tensor with weight for each sample - """ - _check_ranking_input(preds, target, sample_weight) - n_preds, n_labels = preds.shape - relevant = target == 1 - n_relevant = relevant.sum(dim=1) - - # Ignore instances where number of true labels is 0 or n_labels - mask = (n_relevant > 0) & (n_relevant < n_labels) - preds = preds[mask] - relevant = relevant[mask] - n_relevant = n_relevant[mask] - - # Nothing is relevant - if len(preds) == 0: - return torch.tensor(0.0, device=preds.device), 1, sample_weight - - inverse = preds.argsort(dim=1).argsort(dim=1) - per_label_loss = ((n_labels - inverse) * relevant).to(torch.float32) - correction = 0.5 * n_relevant * (n_relevant + 1) - denom = n_relevant * (n_labels - n_relevant) - loss = (per_label_loss.sum(dim=1) - correction) / denom - if isinstance(sample_weight, Tensor): - loss *= sample_weight[mask] - sample_weight = sample_weight.sum() - return loss.sum(), n_preds, sample_weight - - -def _label_ranking_loss_compute(loss: Tensor, n_elements: int, sample_weight: Optional[Tensor] = None) -> Tensor: - """Computes the final label ranking loss.""" - if sample_weight is not None and sample_weight != 0.0: - return loss / sample_weight - return loss / n_elements - - -def label_ranking_loss(preds: Tensor, target: Tensor, sample_weight: Optional[Tensor] = None) -> Tensor: - """Computes the label ranking loss for multilabel data [1]. The score is corresponds to the average number of - label pairs that are incorrectly ordered given some predictions weighted by the size of the label set and the - number of labels not in the label set. The best score is 0. - - Args: - preds: tensor of shape ``[N,L]`` where ``N`` is the number of samples and ``L`` is the number - of labels. Should either be probabilities of the positive class or corresponding logits - target: tensor of shape ``[N,L]`` where ``N`` is the number of samples and ``L`` is the number - of labels. Should only contain binary labels. - sample_weight: tensor of shape ``N`` where ``N`` is the number of samples. How much each sample - should be weighted in the final score. - - Example: - >>> from torchmetrics.functional import label_ranking_loss - >>> _ = torch.manual_seed(42) - >>> preds = torch.rand(10, 5) - >>> target = torch.randint(2, (10, 5)) - >>> label_ranking_loss(preds, target) - tensor(0.4167) - - References: - [1] Tsoumakas, G., Katakis, I., & Vlahavas, I. (2010). Mining multi-label data. In Data mining and - knowledge discovery handbook (pp. 667-685). Springer US. - """ - loss, n_element, sample_weight = _label_ranking_loss_update(preds, target, sample_weight) - return _label_ranking_loss_compute(loss, n_element, sample_weight) diff --git a/src/torchmetrics/functional/classification/specificity.py b/src/torchmetrics/functional/classification/specificity.py index b78757d0be1..d155b1b9dcf 100644 --- a/src/torchmetrics/functional/classification/specificity.py +++ b/src/torchmetrics/functional/classification/specificity.py @@ -437,7 +437,7 @@ def specificity( The reduction method (how the specificity scores are aggregated) is controlled by the ``average`` parameter, and additionally by the ``mdmc_average`` parameter in the - multi-dimensional multi-class case. Accepts all inputs listed in :ref:`pages/classification:input types`. + multi-dimensional multi-class case. Args: preds: Predictions from model (probabilities, or labels) @@ -470,11 +470,10 @@ def specificity( - ``'samplewise'``: In this case, the statistics are computed separately for each sample on the ``N`` axis, and then averaged over samples. The computation for each sample is done by treating the flattened extra axes ``...`` - (see :ref:`pages/classification:input types`) as the ``N`` dimension within the sample, + as the ``N`` dimension within the sample, and computing the metric for the sample based on that. - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs - (see :ref:`pages/classification:input types`) are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they were ``(N_X, C)``. From here on the ``average`` parameter applies as usual. @@ -496,9 +495,7 @@ def specificity( Should be left unset (``None``) for inputs with label predictions. multiclass: Used only in certain special cases, where you want to treat inputs as a different type - than what they appear to be. See the parameter's - :ref:`documentation section ` - for a more detailed explanation and examples. + than what they appear to be. Return: The shape of the returned tensor depends on the ``average`` parameter diff --git a/src/torchmetrics/functional/classification/stat_scores.py b/src/torchmetrics/functional/classification/stat_scores.py index ab41fc6dee8..9909f095b1c 100644 --- a/src/torchmetrics/functional/classification/stat_scores.py +++ b/src/torchmetrics/functional/classification/stat_scores.py @@ -1107,7 +1107,7 @@ def stat_scores( The reduction method (how the statistics are aggregated) is controlled by the ``reduce`` parameter, and additionally by the ``mdmc_reduce`` parameter in the - multi-dimensional multi-class case. Accepts all inputs listed in :ref:`pages/classification:input types`. + multi-dimensional multi-class case. Args: preds: Predictions from model (probabilities, logits or labels) @@ -1147,7 +1147,7 @@ def stat_scores( one of the following: - ``None`` [default]: Should be left unchanged if your data is not multi-dimensional - multi-class (see :ref:`pages/classification:input types` for the definition of input types). + multi-class. - ``'samplewise'``: In this case, the statistics are computed separately for each sample on the ``N`` axis, and then the outputs are concatenated together. In each @@ -1161,9 +1161,7 @@ def stat_scores( multiclass: Used only in certain special cases, where you want to treat inputs as a different type - than what they appear to be. See the parameter's - :ref:`documentation section ` - for a more detailed explanation and examples. + than what they appear to be. Return: The metric returns a tensor of shape ``(..., 5)``, where the last dimension corresponds diff --git a/src/torchmetrics/wrappers/multioutput.py b/src/torchmetrics/wrappers/multioutput.py index c51cc0cb02f..ffc3d9ead3c 100644 --- a/src/torchmetrics/wrappers/multioutput.py +++ b/src/torchmetrics/wrappers/multioutput.py @@ -65,17 +65,6 @@ class MultioutputWrapper(Metric): >>> r2score = MultioutputWrapper(R2Score(), 2) >>> r2score(preds, target) [tensor(0.9654), tensor(0.9082)] - >>> # Classification metric where prediction and label tensors have different shapes. - >>> from torchmetrics import BinnedAveragePrecision - >>> target = torch.tensor([[1, 2], [2, 0], [1, 2]]) - >>> preds = torch.tensor([ - ... [[.1, .8], [.8, .05], [.1, .15]], - ... [[.1, .1], [.2, .3], [.7, .6]], - ... [[.002, .4], [.95, .45], [.048, .15]] - ... ]) - >>> binned_avg_precision = MultioutputWrapper(BinnedAveragePrecision(3, thresholds=5), 2) - >>> binned_avg_precision(preds, target) - [[tensor(-0.), tensor(1.0000), tensor(1.0000)], [tensor(0.3333), tensor(-0.), tensor(0.6667)]] """ is_differentiable = False diff --git a/tests/unittests/classification/test_binned_precision_recall.py b/tests/unittests/classification/test_binned_precision_recall.py deleted file mode 100644 index ba909a3e0a2..00000000000 --- a/tests/unittests/classification/test_binned_precision_recall.py +++ /dev/null @@ -1,171 +0,0 @@ -# Copyright The PyTorch Lightning team. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from functools import partial -from typing import Tuple - -import numpy as np -import pytest -import torch -from sklearn.metrics import average_precision_score as _sk_average_precision_score -from sklearn.metrics import precision_recall_curve as _sk_precision_recall_curve -from torch import Tensor - -from torchmetrics.classification.binned_precision_recall import BinnedAveragePrecision, BinnedRecallAtFixedPrecision -from unittests.classification.inputs import _input_binary_prob -from unittests.classification.inputs import _input_binary_prob_plausible as _input_binary_prob_ok -from unittests.classification.inputs import _input_multilabel_prob as _input_mlb_prob -from unittests.classification.inputs import _input_multilabel_prob_plausible as _input_mlb_prob_ok -from unittests.helpers import seed_all -from unittests.helpers.testers import NUM_CLASSES, MetricTester - -seed_all(42) - - -def recall_at_precision_x_multilabel(predictions: Tensor, targets: Tensor, min_precision: float) -> Tuple[float, float]: - precision, recall, thresholds = _sk_precision_recall_curve(targets, predictions) - - try: - tuple_all = [(r, p, t) for p, r, t in zip(precision, recall, thresholds) if p >= min_precision] - max_recall, _, best_threshold = max(tuple_all) - except ValueError: - max_recall, best_threshold = 0, 1e6 - - return float(max_recall), float(best_threshold) - - -def _sk_prec_recall_mclass_prob(predictions, targets, num_classes, min_precision): - max_recalls = torch.zeros(num_classes) - best_thresholds = torch.zeros(num_classes) - - for i in range(num_classes): - max_recalls[i], best_thresholds[i] = recall_at_precision_x_multilabel( - predictions[:, i], targets[:, i], min_precision - ) - return max_recalls, best_thresholds - - -def _sk_prec_recall_binary_prob(predictions, targets, num_classes, min_precision): - return recall_at_precision_x_multilabel(predictions, targets, min_precision) - - -def _sk_avg_prec_multiclass(predictions, targets, num_classes): - # replace nan with 0 - return np.nan_to_num(_sk_average_precision_score(targets, predictions, average=None)) - - -@pytest.mark.parametrize( - "preds, target, sk_metric, num_classes", - [ - (_input_binary_prob.preds, _input_binary_prob.target, _sk_prec_recall_binary_prob, 1), - (_input_binary_prob_ok.preds, _input_binary_prob_ok.target, _sk_prec_recall_binary_prob, 1), - (_input_mlb_prob_ok.preds, _input_mlb_prob_ok.target, _sk_prec_recall_mclass_prob, NUM_CLASSES), - (_input_mlb_prob.preds, _input_mlb_prob.target, _sk_prec_recall_mclass_prob, NUM_CLASSES), - ], -) -class TestBinnedRecallAtPrecision(MetricTester): - atol = 0.02 - - @pytest.mark.parametrize("ddp", [True, False]) - @pytest.mark.parametrize("dist_sync_on_step", [True, False]) - @pytest.mark.parametrize("min_precision", [0.05, 0.1, 0.3, 0.5, 0.8, 0.95]) - def test_binned_recall_at_precision( - self, preds, target, sk_metric, num_classes, ddp, dist_sync_on_step, min_precision - ): - # rounding will simulate binning for both implementations - preds = Tensor(np.round(preds.numpy(), 2)) + 1e-6 - - self.run_class_metric_test( - ddp=ddp, - preds=preds, - target=target, - metric_class=BinnedRecallAtFixedPrecision, - sk_metric=partial(sk_metric, num_classes=num_classes, min_precision=min_precision), - dist_sync_on_step=dist_sync_on_step, - metric_args={ - "num_classes": num_classes, - "min_precision": min_precision, - "thresholds": 101, - }, - ) - - @pytest.mark.parametrize("ddp", [True, False]) - @pytest.mark.parametrize("dist_sync_on_step", [True, False]) - @pytest.mark.parametrize("min_precision", [0.05, 0.1, 0.3, 0.5, 0.8, 0.95]) - def test_binned_recall_at_precision_default_thresholds( - self, preds, target, sk_metric, num_classes, ddp, dist_sync_on_step, min_precision - ): - # rounding will simulate binning for both implementations - preds = Tensor(np.round(preds.numpy(), 2)) + 1e-6 - - self.run_class_metric_test( - ddp=ddp, - preds=preds, - target=target, - metric_class=BinnedRecallAtFixedPrecision, - sk_metric=partial(sk_metric, num_classes=num_classes, min_precision=min_precision), - dist_sync_on_step=dist_sync_on_step, - metric_args={ - "num_classes": num_classes, - "min_precision": min_precision, - }, - ) - - -@pytest.mark.parametrize( - "preds, target, sk_metric, num_classes", - [ - (_input_binary_prob.preds, _input_binary_prob.target, _sk_avg_prec_multiclass, 1), - (_input_binary_prob_ok.preds, _input_binary_prob_ok.target, _sk_avg_prec_multiclass, 1), - (_input_mlb_prob_ok.preds, _input_mlb_prob_ok.target, _sk_avg_prec_multiclass, NUM_CLASSES), - (_input_mlb_prob.preds, _input_mlb_prob.target, _sk_avg_prec_multiclass, NUM_CLASSES), - ], -) -class TestBinnedAveragePrecision(MetricTester): - atol = 0.002 - - @pytest.mark.parametrize("ddp", [True, False]) - @pytest.mark.parametrize("dist_sync_on_step", [True, False]) - @pytest.mark.parametrize("thresholds", (301, torch.linspace(0.0, 1.0, 101))) - def test_binned_average_precision(self, preds, target, sk_metric, num_classes, ddp, dist_sync_on_step, thresholds): - # rounding will simulate binning for both implementations - preds = Tensor(np.round(preds.numpy(), 2)) + 1e-6 - - self.run_class_metric_test( - ddp=ddp, - preds=preds, - target=target, - metric_class=BinnedAveragePrecision, - sk_metric=partial(sk_metric, num_classes=num_classes), - dist_sync_on_step=dist_sync_on_step, - metric_args={"num_classes": num_classes, "thresholds": thresholds}, - ) - - @pytest.mark.parametrize("ddp", [True, False]) - @pytest.mark.parametrize("dist_sync_on_step", [True, False]) - def test_binned_average_precision_default_thresholds( - self, preds, target, sk_metric, num_classes, ddp, dist_sync_on_step - ): - # rounding will simulate binning for both implementations - preds = Tensor(np.round(preds.numpy(), 2)) + 1e-6 - - self.run_class_metric_test( - ddp=ddp, - preds=preds, - target=target, - metric_class=BinnedAveragePrecision, - sk_metric=partial(sk_metric, num_classes=num_classes), - dist_sync_on_step=dist_sync_on_step, - metric_args={"num_classes": num_classes}, - )