From 3a56a6024e0d8b239801cce558381807c24ba3d0 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Fri, 19 Mar 2021 22:48:52 +0100 Subject: [PATCH] Prune metrics: other classification 7/n (#6584) * confusion_matrix * iou * f_beta * hamming_distance * stat_scores * tests * flake8 * chlog --- CHANGELOG.md | 2 + .../classification/confusion_matrix.py | 90 +---- .../metrics/classification/f_beta.py | 180 +--------- .../classification/hamming_distance.py | 85 +---- .../metrics/classification/iou.py | 83 +---- .../metrics/classification/stat_scores.py | 239 +------------- .../metrics/functional/confusion_matrix.py | 75 +---- .../metrics/functional/f_beta.py | 120 +------ .../metrics/functional/hamming_distance.py | 58 +--- pytorch_lightning/metrics/functional/iou.py | 88 +---- .../metrics/functional/stat_scores.py | 271 +-------------- tests/metrics/classification/__init__.py | 0 tests/metrics/classification/inputs.py | 66 ---- .../classification/test_confusion_matrix.py | 128 ------- tests/metrics/classification/test_f_beta.py | 153 --------- .../classification/test_hamming_distance.py | 80 ----- tests/metrics/classification/test_inputs.py | 312 ------------------ tests/metrics/classification/test_iou.py | 216 ------------ .../classification/test_stat_scores.py | 255 -------------- tests/metrics/test_remove_1-5_metrics.py | 75 +++++ 20 files changed, 155 insertions(+), 2421 deletions(-) delete mode 100644 tests/metrics/classification/__init__.py delete mode 100644 tests/metrics/classification/inputs.py delete mode 100644 tests/metrics/classification/test_confusion_matrix.py delete mode 100644 tests/metrics/classification/test_f_beta.py delete mode 100644 tests/metrics/classification/test_hamming_distance.py delete mode 100644 tests/metrics/classification/test_inputs.py delete mode 100644 tests/metrics/classification/test_iou.py delete mode 100644 tests/metrics/classification/test_stat_scores.py diff --git a/CHANGELOG.md b/CHANGELOG.md index bd8f5e31770d2..01c7ae193555a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -78,6 +78,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). [#6573](https://github.com/PyTorchLightning/pytorch-lightning/pull/6573), + [#6584](https://github.com/PyTorchLightning/pytorch-lightning/pull/6584), + ) diff --git a/pytorch_lightning/metrics/classification/confusion_matrix.py b/pytorch_lightning/metrics/classification/confusion_matrix.py index 112fb4940e6e2..aacd8dcf3b498 100644 --- a/pytorch_lightning/metrics/classification/confusion_matrix.py +++ b/pytorch_lightning/metrics/classification/confusion_matrix.py @@ -13,64 +13,14 @@ # limitations under the License. from typing import Any, Optional -import torch -from torchmetrics import Metric +from torchmetrics import ConfusionMatrix as _ConfusionMatrix -from pytorch_lightning.metrics.functional.confusion_matrix import _confusion_matrix_compute, _confusion_matrix_update +from pytorch_lightning.utilities.deprecation import deprecated -class ConfusionMatrix(Metric): - """ - Computes the `confusion matrix - `_. Works with binary, - multiclass, and multilabel data. Accepts probabilities from a model output or - integer class values in prediction. Works with multi-dimensional preds and - target. - - Note: - This metric produces a multi-dimensional output, so it can not be directly logged. - - Forward accepts - - - ``preds`` (float or long tensor): ``(N, ...)`` or ``(N, C, ...)`` where C is the number of classes - - ``target`` (long tensor): ``(N, ...)`` - - If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument - to convert into integer labels. This is the case for binary and multi-label probabilities. - - If preds has an extra dimension as in the case of multi-class scores we perform an argmax on ``dim=1``. - - Args: - num_classes: Number of classes in the dataset. - normalize: Normalization mode for confusion matrix. Choose from - - - ``None`` or ``'none'``: no normalization (default) - - ``'true'``: normalization over the targets (most commonly used) - - ``'pred'``: normalization over the predictions - - ``'all'``: normalization over the whole matrix - - threshold: - Threshold value for binary or multi-label probabilites. default: 0.5 - compute_on_step: - Forward only calls ``update()`` and return None if this is set to False. default: True - dist_sync_on_step: - Synchronize metric state across processes at each ``forward()`` - before returning the value at the step. default: False - process_group: - Specify the process group on which synchronization is called. default: None (which selects the entire world) - - Example: - - >>> from pytorch_lightning.metrics import ConfusionMatrix - >>> target = torch.tensor([1, 1, 0, 0]) - >>> preds = torch.tensor([0, 1, 0, 0]) - >>> confmat = ConfusionMatrix(num_classes=2) - >>> confmat(preds, target) - tensor([[2., 0.], - [1., 1.]]) - - """ +class ConfusionMatrix(_ConfusionMatrix): + @deprecated(target=_ConfusionMatrix, ver_deprecate="1.3.0", ver_remove="1.5.0") def __init__( self, num_classes: int, @@ -80,35 +30,9 @@ def __init__( dist_sync_on_step: bool = False, process_group: Optional[Any] = None, ): - - super().__init__( - compute_on_step=compute_on_step, - dist_sync_on_step=dist_sync_on_step, - process_group=process_group, - ) - self.num_classes = num_classes - self.normalize = normalize - self.threshold = threshold - - allowed_normalize = ('true', 'pred', 'all', 'none', None) - assert self.normalize in allowed_normalize, \ - f"Argument average needs to one of the following: {allowed_normalize}" - - self.add_state("confmat", default=torch.zeros(num_classes, num_classes), dist_reduce_fx="sum") - - def update(self, preds: torch.Tensor, target: torch.Tensor): - """ - Update state with predictions and targets. - - Args: - preds: Predictions from model - target: Ground truth values """ - confmat = _confusion_matrix_update(preds, target, self.num_classes, self.threshold) - self.confmat += confmat + This implementation refers to :class:`~torchmetrics.ConfusionMatrix`. - def compute(self) -> torch.Tensor: - """ - Computes confusion matrix + .. deprecated:: + Use :class:`~torchmetrics.ConfusionMatrix`. Will be removed in v1.5.0. """ - return _confusion_matrix_compute(self.confmat, self.normalize) diff --git a/pytorch_lightning/metrics/classification/f_beta.py b/pytorch_lightning/metrics/classification/f_beta.py index a46b01a1aa8b7..bac3cc3e99c4e 100644 --- a/pytorch_lightning/metrics/classification/f_beta.py +++ b/pytorch_lightning/metrics/classification/f_beta.py @@ -13,72 +13,15 @@ # limitations under the License. from typing import Any, Optional -import torch -from torchmetrics import Metric +from torchmetrics import F1 as _F1 +from torchmetrics import FBeta as _FBeta -from pytorch_lightning.metrics.functional.f_beta import _fbeta_compute, _fbeta_update -from pytorch_lightning.utilities import rank_zero_warn +from pytorch_lightning.utilities.deprecation import deprecated -class FBeta(Metric): - r""" - Computes `F-score `_, specifically: - - .. math:: - F_\beta = (1 + \beta^2) * \frac{\text{precision} * \text{recall}} - {(\beta^2 * \text{precision}) + \text{recall}} - - Where :math:`\beta` is some positive real factor. Works with binary, multiclass, and multilabel data. - Accepts probabilities from a model output or integer class values in prediction. - Works with multi-dimensional preds and target. - - Forward accepts - - - ``preds`` (float or long tensor): ``(N, ...)`` or ``(N, C, ...)`` where C is the number of classes - - ``target`` (long tensor): ``(N, ...)`` - - If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument - to convert into integer labels. This is the case for binary and multi-label probabilities. - - If preds has an extra dimension as in the case of multi-class scores we perform an argmax on ``dim=1``. - - Args: - num_classes: Number of classes in the dataset. - beta: Beta coefficient in the F measure. - threshold: - Threshold value for binary or multi-label probabilities. default: 0.5 - - average: - - ``'micro'`` computes metric globally - - ``'macro'`` computes metric for each class and uniformly averages them - - ``'weighted'`` computes metric for each class and does a weighted-average, - where each class is weighted by their support (accounts for class imbalance) - - ``'none'`` or ``None`` computes and returns the metric per class - - multilabel: If predictions are from multilabel classification. - compute_on_step: - Forward only calls ``update()`` and return None if this is set to False. default: True - dist_sync_on_step: - Synchronize metric state across processes at each ``forward()`` - before returning the value at the step. default: False - process_group: - Specify the process group on which synchronization is called. default: None (which selects the entire world) - - Raises: - ValueError: - If ``average`` is none of ``"micro"``, ``"macro"``, ``"weighted"``, ``"none"``, ``None``. - - Example: - - >>> from pytorch_lightning.metrics import FBeta - >>> target = torch.tensor([0, 1, 2, 0, 1, 2]) - >>> preds = torch.tensor([0, 2, 1, 0, 0, 1]) - >>> f_beta = FBeta(num_classes=3, beta=0.5) - >>> f_beta(preds, target) - tensor(0.3333) - - """ +class FBeta(_FBeta): + @deprecated(target=_FBeta, ver_deprecate="1.3.0", ver_remove="1.5.0") def __init__( self, num_classes: int, @@ -90,103 +33,17 @@ def __init__( dist_sync_on_step: bool = False, process_group: Optional[Any] = None, ): - super().__init__( - compute_on_step=compute_on_step, - dist_sync_on_step=dist_sync_on_step, - process_group=process_group, - ) - - self.num_classes = num_classes - self.beta = beta - self.threshold = threshold - self.average = average - self.multilabel = multilabel - - allowed_average = ("micro", "macro", "weighted", "none", None) - if self.average not in allowed_average: - raise ValueError( - 'Argument `average` expected to be one of the following:' - f' {allowed_average} but got {self.average}' - ) - - self.add_state("true_positives", default=torch.zeros(num_classes), dist_reduce_fx="sum") - self.add_state("predicted_positives", default=torch.zeros(num_classes), dist_reduce_fx="sum") - self.add_state("actual_positives", default=torch.zeros(num_classes), dist_reduce_fx="sum") - - def update(self, preds: torch.Tensor, target: torch.Tensor): - """ - Update state with predictions and targets. - - Args: - preds: Predictions from model - target: Ground truth values """ - true_positives, predicted_positives, actual_positives = _fbeta_update( - preds, target, self.num_classes, self.threshold, self.multilabel - ) - - self.true_positives += true_positives - self.predicted_positives += predicted_positives - self.actual_positives += actual_positives + This implementation refers to :class:`~torchmetrics.FBeta`. - def compute(self) -> torch.Tensor: + .. deprecated:: + Use :class:`~torchmetrics.FBeta`. Will be removed in v1.5.0. """ - Computes fbeta over state. - """ - return _fbeta_compute( - self.true_positives, self.predicted_positives, self.actual_positives, self.beta, self.average - ) - - -class F1(FBeta): - """ - Computes F1 metric. F1 metrics correspond to a harmonic mean of the - precision and recall scores. - - Works with binary, multiclass, and multilabel data. - Accepts logits from a model output or integer class values in prediction. - Works with multi-dimensional preds and target. - Forward accepts - - ``preds`` (float or long tensor): ``(N, ...)`` or ``(N, C, ...)`` where C is the number of classes - - ``target`` (long tensor): ``(N, ...)`` - - If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument. - This is the case for binary and multi-label logits. - - If preds has an extra dimension as in the case of multi-class scores we perform an argmax on ``dim=1``. - - Args: - num_classes: Number of classes in the dataset. - threshold: - Threshold value for binary or multi-label logits. default: 0.5 - - average: - - ``'micro'`` computes metric globally - - ``'macro'`` computes metric for each class and uniformly averages them - - ``'weighted'`` computes metric for each class and does a weighted-average, - where each class is weighted by their support (accounts for class imbalance) - - ``'none'`` or ``None`` computes and returns the metric per class - - multilabel: If predictions are from multilabel classification. - compute_on_step: - Forward only calls ``update()`` and returns None if this is set to False. default: True - dist_sync_on_step: - Synchronize metric state across processes at each ``forward()`` - before returning the value at the step. default: False - process_group: - Specify the process group on which synchronization is called. default: None (which selects the entire world) - - Example: - >>> from pytorch_lightning.metrics import F1 - >>> target = torch.tensor([0, 1, 2, 0, 1, 2]) - >>> preds = torch.tensor([0, 2, 1, 0, 0, 1]) - >>> f1 = F1(num_classes=3) - >>> f1(preds, target) - tensor(0.3333) - """ +class F1(_F1): + @deprecated(target=_F1, ver_deprecate="1.3.0", ver_remove="1.5.0") def __init__( self, num_classes: int, @@ -197,16 +54,9 @@ def __init__( dist_sync_on_step: bool = False, process_group: Optional[Any] = None, ): - if multilabel is not False: - rank_zero_warn(f'The `multilabel={multilabel}` parameter is unused and will not have any effect.') + """ + This implementation refers to :class:`~torchmetrics.F1`. - super().__init__( - num_classes=num_classes, - beta=1.0, - threshold=threshold, - average=average, - multilabel=multilabel, - compute_on_step=compute_on_step, - dist_sync_on_step=dist_sync_on_step, - process_group=process_group, - ) + .. deprecated:: + Use :class:`~torchmetrics.F1`. Will be removed in v1.5.0. + """ diff --git a/pytorch_lightning/metrics/classification/hamming_distance.py b/pytorch_lightning/metrics/classification/hamming_distance.py index dceb90c0a4ca9..b59c3e1053ab8 100644 --- a/pytorch_lightning/metrics/classification/hamming_distance.py +++ b/pytorch_lightning/metrics/classification/hamming_distance.py @@ -13,59 +13,14 @@ # limitations under the License. from typing import Any, Callable, Optional -import torch -from torchmetrics import Metric +from torchmetrics import HammingDistance as _HammingDistance -from pytorch_lightning.metrics.functional.hamming_distance import _hamming_distance_compute, _hamming_distance_update +from pytorch_lightning.utilities.deprecation import deprecated -class HammingDistance(Metric): - r""" - Computes the average `Hamming distance `_ (also - known as Hamming loss) between targets and predictions: - - .. math:: - \text{Hamming distance} = \frac{1}{N \cdot L}\sum_i^N \sum_l^L 1(y_{il} \neq \hat{y_{il}}) - - Where :math:`y` is a tensor of target values, :math:`\hat{y}` is a tensor of predictions, - and :math:`\bullet_{il}` refers to the :math:`l`-th label of the :math:`i`-th sample of that - tensor. - - This is the same as ``1-accuracy`` for binary data, while for all other types of inputs it - treats each possible label separately - meaning that, for example, multi-class data is - treated as if it were multi-label. - - Args: - threshold: - Threshold probability value for transforming probability predictions to binary - (0 or 1) predictions, in the case of binary or multi-label inputs. - compute_on_step: - Forward only calls ``update()`` and return ``None`` if this is set to ``False``. - dist_sync_on_step: - Synchronize metric state across processes at each ``forward()`` - before returning the value at the step. - process_group: - Specify the process group on which synchronization is called. - default: ``None`` (which selects the entire world) - dist_sync_fn: - Callback that performs the allgather operation on the metric state. When ``None``, DDP - will be used to perform the all gather. - - Raises: - ValueError: - If ``threshold`` is not between ``0`` and ``1``. - - Example: - - >>> from pytorch_lightning.metrics import HammingDistance - >>> target = torch.tensor([[0, 1], [1, 1]]) - >>> preds = torch.tensor([[0, 1], [0, 1]]) - >>> hamming_distance = HammingDistance() - >>> hamming_distance(preds, target) - tensor(0.2500) - - """ +class HammingDistance(_HammingDistance): + @deprecated(target=_HammingDistance, ver_deprecate="1.3.0", ver_remove="1.5.0") def __init__( self, threshold: float = 0.5, @@ -74,35 +29,9 @@ def __init__( process_group: Optional[Any] = None, dist_sync_fn: Callable = None, ): - super().__init__( - compute_on_step=compute_on_step, - dist_sync_on_step=dist_sync_on_step, - process_group=process_group, - dist_sync_fn=dist_sync_fn, - ) - - self.add_state("correct", default=torch.tensor(0), dist_reduce_fx="sum") - self.add_state("total", default=torch.tensor(0), dist_reduce_fx="sum") - - if not 0 < threshold < 1: - raise ValueError("The `threshold` should lie in the (0,1) interval.") - self.threshold = threshold - - def update(self, preds: torch.Tensor, target: torch.Tensor): """ - Update state with predictions and targets. + This implementation refers to :class:`~torchmetrics.HammingDistance`. - Args: - preds: Predictions from model (probabilities, or labels) - target: Ground truth labels - """ - correct, total = _hamming_distance_update(preds, target, self.threshold) - - self.correct += correct - self.total += total - - def compute(self) -> torch.Tensor: - """ - Computes hamming distance based on inputs passed in to ``update`` previously. + .. deprecated:: + Use :class:`~torchmetrics.HammingDistance`. Will be removed in v1.5.0. """ - return _hamming_distance_compute(self.correct, self.total) diff --git a/pytorch_lightning/metrics/classification/iou.py b/pytorch_lightning/metrics/classification/iou.py index a261b767a8190..d5b5d8eeb47e2 100644 --- a/pytorch_lightning/metrics/classification/iou.py +++ b/pytorch_lightning/metrics/classification/iou.py @@ -13,70 +13,14 @@ # limitations under the License. from typing import Any, Optional -import torch +from torchmetrics import IoU as _IoU -from pytorch_lightning.metrics.classification.confusion_matrix import ConfusionMatrix -from pytorch_lightning.metrics.functional.iou import _iou_from_confmat +from pytorch_lightning.utilities.deprecation import deprecated -class IoU(ConfusionMatrix): - r""" - Computes `Intersection over union, or Jaccard index calculation `_: - - .. math:: J(A,B) = \frac{|A\cap B|}{|A\cup B|} - - Where: :math:`A` and :math:`B` are both tensors of the same size, containing integer class values. - They may be subject to conversion from input data (see description below). Note that it is different from box IoU. - - Works with binary, multiclass and multi-label data. - Accepts probabilities from a model output or integer class values in prediction. - Works with multi-dimensional preds and target. - - Forward accepts - - - ``preds`` (float or long tensor): ``(N, ...)`` or ``(N, C, ...)`` where C is the number of classes - - ``target`` (long tensor): ``(N, ...)`` - - If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument - to convert into integer labels. This is the case for binary and multi-label probabilities. - - If preds has an extra dimension as in the case of multi-class scores we perform an argmax on ``dim=1``. - - Args: - num_classes: Number of classes in the dataset. - ignore_index: optional int specifying a target class to ignore. If given, this class index does not contribute - to the returned score, regardless of reduction method. Has no effect if given an int that is not in the - range [0, num_classes-1]. By default, no index is ignored, and all classes are used. - absent_score: score to use for an individual class, if no instances of the class index were present in - `pred` AND no instances of the class index were present in `target`. For example, if we have 3 classes, - [0, 0] for `pred`, and [0, 2] for `target`, then class 1 would be assigned the `absent_score`. - threshold: - Threshold value for binary or multi-label probabilities. - reduction: a method to reduce metric score over labels. - - - ``'elementwise_mean'``: takes the mean (default) - - ``'sum'``: takes the sum - - ``'none'``: no reduction will be applied - - compute_on_step: - Forward only calls ``update()`` and return None if this is set to False. - dist_sync_on_step: - Synchronize metric state across processes at each ``forward()`` - before returning the value at the step. - process_group: - Specify the process group on which synchronization is called. default: None (which selects the entire world) - - Example: - >>> from pytorch_lightning.metrics import IoU - >>> target = torch.randint(0, 2, (10, 25, 25)) - >>> pred = torch.tensor(target) - >>> pred[2:5, 7:13, 9:15] = 1 - pred[2:5, 7:13, 9:15] - >>> iou = IoU(num_classes=2) - >>> iou(pred, target) - tensor(0.9660) - - """ +class IoU(_IoU): + @deprecated(target=_IoU, ver_deprecate="1.3.0", ver_remove="1.5.0") def __init__( self, num_classes: int, @@ -88,20 +32,9 @@ def __init__( dist_sync_on_step: bool = False, process_group: Optional[Any] = None, ): - super().__init__( - num_classes=num_classes, - normalize=None, - threshold=threshold, - compute_on_step=compute_on_step, - dist_sync_on_step=dist_sync_on_step, - process_group=process_group, - ) - self.reduction = reduction - self.ignore_index = ignore_index - self.absent_score = absent_score - - def compute(self) -> torch.Tensor: """ - Computes intersection over union (IoU) + This implementation refers to :class:`~torchmetrics.IoU`. + + .. deprecated:: + Use :class:`~torchmetrics.IoU`. Will be removed in v1.5.0. """ - return _iou_from_confmat(self.confmat, self.num_classes, self.ignore_index, self.absent_score, self.reduction) diff --git a/pytorch_lightning/metrics/classification/stat_scores.py b/pytorch_lightning/metrics/classification/stat_scores.py index 672b0f41c6fc5..2c4764477b262 100644 --- a/pytorch_lightning/metrics/classification/stat_scores.py +++ b/pytorch_lightning/metrics/classification/stat_scores.py @@ -11,120 +11,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Callable, Optional, Tuple +from typing import Any, Callable, Optional -import torch -from torchmetrics import Metric +from torchmetrics import StatScores as _StatScores -from pytorch_lightning.metrics.functional.stat_scores import _stat_scores_compute, _stat_scores_update +from pytorch_lightning.utilities.deprecation import deprecated -class StatScores(Metric): - """Computes the number of true positives, false positives, true negatives, false negatives. - Related to `Type I and Type II errors `__ - and the `confusion matrix `__. - - The reduction method (how the statistics are aggregated) is controlled by the - ``reduce`` parameter, and additionally by the ``mdmc_reduce`` parameter in the - multi-dimensional multi-class case. - - Args: - threshold: - Threshold probability value for transforming probability predictions to binary - (0 or 1) predictions, in the case of binary or multi-label inputs. - - top_k: - Number of highest probability entries for each sample to convert to 1s - relevant - only for inputs with probability predictions. If this parameter is set for multi-label - inputs, it will take precedence over ``threshold``. For (multi-dim) multi-class inputs, - this parameter defaults to 1. - - Should be left unset (``None``) for inputs with label predictions. - - reduce: - Defines the reduction that is applied. Should be one of the following: - - - ``'micro'`` [default]: Counts the statistics by summing over all [sample, class] - combinations (globally). Each statistic is represented by a single integer. - - ``'macro'``: Counts the statistics for each class separately (over all samples). - Each statistic is represented by a ``(C,)`` tensor. Requires ``num_classes`` - to be set. - - ``'samples'``: Counts the statistics for each sample separately (over all classes). - Each statistic is represented by a ``(N, )`` 1d tensor. - - Note that what is considered a sample in the multi-dimensional multi-class case - depends on the value of ``mdmc_reduce``. - - num_classes: - Number of classes. Necessary for (multi-dimensional) multi-class or multi-label data. - - ignore_index: - Specify a class (label) to ignore. If given, this class index does not contribute - to the returned score, regardless of reduction method. If an index is ignored, and - ``reduce='macro'``, the class statistics for the ignored class will all be returned - as ``-1``. - - mdmc_reduce: - Defines how the multi-dimensional multi-class inputs are handeled. Should be - one of the following: - - - ``None`` [default]: Should be left unchanged if your data is not multi-dimensional multi-class. - - - ``'samplewise'``: In this case, the statistics are computed separately for each - sample on the ``N`` axis, and then the outputs are concatenated together. In each - sample the extra axes ``...`` are flattened to become the sub-sample axis, and - statistics for each sample are computed by treating the sub-sample axis as the - ``N`` axis for that sample. - - - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs are - flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they - were ``(N_X, C)``. From here on the ``reduce`` parameter applies as usual. - - is_multiclass: - Used only in certain special cases, where you want to treat inputs as a different type - than what they appear to be. - - compute_on_step: - Forward only calls ``update()`` and return ``None`` if this is set to ``False``. - dist_sync_on_step: - Synchronize metric state across processes at each ``forward()`` - before returning the value at the step - process_group: - Specify the process group on which synchronization is called. - default: ``None`` (which selects the entire world) - dist_sync_fn: - Callback that performs the allgather operation on the metric state. When ``None``, DDP - will be used to perform the allgather. - - Raises: - ValueError: - If ``threshold`` is not a ``float`` between ``0`` and ``1``. - ValueError: - If ``reduce`` is none of ``"micro"``, ``"macro"`` or ``"samples"``. - ValueError: - If ``mdmc_reduce`` is none of ``None``, ``"samplewise"``, ``"global"``. - ValueError: - If ``reduce`` is set to ``"macro"`` and ``num_classes`` is not provided. - ValueError: - If ``num_classes`` is set - and ``ignore_index`` is not in the range ``0`` <= ``ignore_index`` < ``num_classes``. - - Example: - - >>> from pytorch_lightning.metrics.classification import StatScores - >>> preds = torch.tensor([1, 0, 2, 1]) - >>> target = torch.tensor([1, 1, 2, 0]) - >>> stat_scores = StatScores(reduce='macro', num_classes=3) - >>> stat_scores(preds, target) - tensor([[0, 1, 2, 1, 1], - [1, 1, 1, 1, 2], - [1, 0, 3, 0, 1]]) - >>> stat_scores = StatScores(reduce='micro') - >>> stat_scores(preds, target) - tensor([2, 2, 6, 2, 4]) - - """ +class StatScores(_StatScores): + @deprecated(target=_StatScores, ver_deprecate="1.3.0", ver_remove="1.5.0") def __init__( self, threshold: float = 0.5, @@ -139,128 +35,9 @@ def __init__( process_group: Optional[Any] = None, dist_sync_fn: Callable = None, ): - super().__init__( - compute_on_step=compute_on_step, - dist_sync_on_step=dist_sync_on_step, - process_group=process_group, - dist_sync_fn=dist_sync_fn, - ) - - self.reduce = reduce - self.mdmc_reduce = mdmc_reduce - self.num_classes = num_classes - self.threshold = threshold - self.is_multiclass = is_multiclass - self.ignore_index = ignore_index - self.top_k = top_k - - if not 0 < threshold < 1: - raise ValueError(f"The `threshold` should be a float in the (0,1) interval, got {threshold}") - - if reduce not in ["micro", "macro", "samples"]: - raise ValueError(f"The `reduce` {reduce} is not valid.") - - if mdmc_reduce not in [None, "samplewise", "global"]: - raise ValueError(f"The `mdmc_reduce` {mdmc_reduce} is not valid.") - - if reduce == "macro" and (not num_classes or num_classes < 1): - raise ValueError("When you set `reduce` as 'macro', you have to provide the number of classes.") - - if num_classes and ignore_index is not None and (not 0 <= ignore_index < num_classes or num_classes == 1): - raise ValueError(f"The `ignore_index` {ignore_index} is not valid for inputs with {num_classes} classes") - - if mdmc_reduce != "samplewise" and reduce != "samples": - if reduce == "micro": - zeros_shape = [] - elif reduce == "macro": - zeros_shape = (num_classes, ) - default, reduce_fn = lambda: torch.zeros(zeros_shape, dtype=torch.long), "sum" - else: - default, reduce_fn = lambda: [], None - - for s in ("tp", "fp", "tn", "fn"): - self.add_state(s, default=default(), dist_reduce_fx=reduce_fn) - - def update(self, preds: torch.Tensor, target: torch.Tensor): - """ - Update state with predictions and targets. - - Args: - preds: Predictions from model (probabilities or labels) - target: Ground truth values - """ - - tp, fp, tn, fn = _stat_scores_update( - preds, - target, - reduce=self.reduce, - mdmc_reduce=self.mdmc_reduce, - threshold=self.threshold, - num_classes=self.num_classes, - top_k=self.top_k, - is_multiclass=self.is_multiclass, - ignore_index=self.ignore_index, - ) - - # Update states - if self.reduce != "samples" and self.mdmc_reduce != "samplewise": - self.tp += tp - self.fp += fp - self.tn += tn - self.fn += fn - else: - self.tp.append(tp) - self.fp.append(fp) - self.tn.append(tn) - self.fn.append(fn) - - def _get_final_stats(self) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: - """Performs concatenation on the stat scores if neccesary, - before passing them to a compute function. """ + This implementation refers to :class:`~torchmetrics.StatScores`. - if isinstance(self.tp, list): - tp = torch.cat(self.tp) - fp = torch.cat(self.fp) - tn = torch.cat(self.tn) - fn = torch.cat(self.fn) - else: - tp, fp, tn, fn = self.tp, self.fp, self.tn, self.fn - - return tp, fp, tn, fn - - def compute(self) -> torch.Tensor: - """ - Computes the stat scores based on inputs passed in to ``update`` previously. - - Return: - The metric returns a tensor of shape ``(..., 5)``, where the last dimension corresponds - to ``[tp, fp, tn, fn, sup]`` (``sup`` stands for support and equals ``tp + fn``). The - shape depends on the ``reduce`` and ``mdmc_reduce`` (in case of multi-dimensional - multi-class data) parameters: - - - If the data is not multi-dimensional multi-class, then - - - If ``reduce='micro'``, the shape will be ``(5, )`` - - If ``reduce='macro'``, the shape will be ``(C, 5)``, - where ``C`` stands for the number of classes - - If ``reduce='samples'``, the shape will be ``(N, 5)``, where ``N`` stands for - the number of samples - - - If the data is multi-dimensional multi-class and ``mdmc_reduce='global'``, then - - - If ``reduce='micro'``, the shape will be ``(5, )`` - - If ``reduce='macro'``, the shape will be ``(C, 5)`` - - If ``reduce='samples'``, the shape will be ``(N*X, 5)``, where ``X`` stands for - the product of sizes of all "extra" dimensions of the data (i.e. all dimensions - except for ``C`` and ``N``) - - - If the data is multi-dimensional multi-class and ``mdmc_reduce='samplewise'``, then - - - If ``reduce='micro'``, the shape will be ``(N, 5)`` - - If ``reduce='macro'``, the shape will be ``(N, C, 5)`` - - If ``reduce='samples'``, the shape will be ``(N, X, 5)`` - + .. deprecated:: + Use :class:`~torchmetrics.StatScores`. Will be removed in v1.5.0. """ - tp, fp, tn, fn = self._get_final_stats() - return _stat_scores_compute(tp, fp, tn, fn) diff --git a/pytorch_lightning/metrics/functional/confusion_matrix.py b/pytorch_lightning/metrics/functional/confusion_matrix.py index e77fc4224d25e..5cf8818176696 100644 --- a/pytorch_lightning/metrics/functional/confusion_matrix.py +++ b/pytorch_lightning/metrics/functional/confusion_matrix.py @@ -14,45 +14,12 @@ from typing import Optional import torch -from torchmetrics.classification.checks import _input_format_classification -from torchmetrics.utilities.enums import DataType +from torchmetrics.functional import confusion_matrix as _confusion_matrix -from pytorch_lightning.utilities import rank_zero_warn - - -def _confusion_matrix_update( - preds: torch.Tensor, target: torch.Tensor, num_classes: int, threshold: float = 0.5 -) -> torch.Tensor: - preds, target, mode = _input_format_classification(preds, target, threshold) - if mode not in (DataType.BINARY, DataType.MULTILABEL): - preds = preds.argmax(dim=1) - target = target.argmax(dim=1) - unique_mapping = (target.view(-1) * num_classes + preds.view(-1)).to(torch.long) - bins = torch.bincount(unique_mapping, minlength=num_classes**2) - confmat = bins.reshape(num_classes, num_classes) - return confmat - - -def _confusion_matrix_compute(confmat: torch.Tensor, normalize: Optional[str] = None) -> torch.Tensor: - allowed_normalize = ('true', 'pred', 'all', 'none', None) - assert normalize in allowed_normalize, \ - f"Argument average needs to one of the following: {allowed_normalize}" - confmat = confmat.float() - if normalize is not None and normalize != 'none': - if normalize == 'true': - cm = confmat / confmat.sum(axis=1, keepdim=True) - elif normalize == 'pred': - cm = confmat / confmat.sum(axis=0, keepdim=True) - elif normalize == 'all': - cm = confmat / confmat.sum() - nan_elements = cm[torch.isnan(cm)].nelement() - if nan_elements != 0: - cm[torch.isnan(cm)] = 0 - rank_zero_warn(f'{nan_elements} nan values found in confusion matrix have been replaced with zeros.') - return cm - return confmat +from pytorch_lightning.utilities.deprecation import deprecated +@deprecated(target=_confusion_matrix, ver_deprecate="1.3.0", ver_remove="1.5.0") def confusion_matrix( preds: torch.Tensor, target: torch.Tensor, @@ -61,38 +28,6 @@ def confusion_matrix( threshold: float = 0.5 ) -> torch.Tensor: """ - Computes the confusion matrix. Works with binary, multiclass, and multilabel data. - Accepts probabilities from a model output or integer class values in prediction. - Works with multi-dimensional preds and target. - - If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument - to convert into integer labels. This is the case for binary and multi-label probabilities. - - If preds has an extra dimension as in the case of multi-class scores we perform an argmax on ``dim=1``. - - Args: - preds: (float or long tensor), Either a ``(N, ...)`` tensor with labels or - ``(N, C, ...)`` where C is the number of classes, tensor with labels/probabilities - target: ``target`` (long tensor), tensor with shape ``(N, ...)`` with ground true labels - num_classes: Number of classes in the dataset. - normalize: Normalization mode for confusion matrix. Choose from - - - ``None`` or ``'none'``: no normalization (default) - - ``'true'``: normalization over the targets (most commonly used) - - ``'pred'``: normalization over the predictions - - ``'all'``: normalization over the whole matrix - - threshold: - Threshold value for binary or multi-label probabilities. default: 0.5 - - Example: - - >>> from pytorch_lightning.metrics.functional import confusion_matrix - >>> target = torch.tensor([1, 1, 0, 0]) - >>> preds = torch.tensor([0, 1, 0, 0]) - >>> confusion_matrix(preds, target, num_classes=2) - tensor([[2., 0.], - [1., 1.]]) + .. deprecated:: + Use :func:`torchmetrics.functional.confusion_matrix`. Will be removed in v1.5.0. """ - confmat = _confusion_matrix_update(preds, target, num_classes, threshold) - return _confusion_matrix_compute(confmat, normalize) diff --git a/pytorch_lightning/metrics/functional/f_beta.py b/pytorch_lightning/metrics/functional/f_beta.py index 5be4786297b65..e4d926e0ab8bf 100644 --- a/pytorch_lightning/metrics/functional/f_beta.py +++ b/pytorch_lightning/metrics/functional/f_beta.py @@ -11,46 +11,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Tuple - import torch -from torchmetrics.utilities import class_reduce -from torchmetrics.utilities.checks import _input_format_classification_one_hot - - -def _fbeta_update( - preds: torch.Tensor, - target: torch.Tensor, - num_classes: int, - threshold: float = 0.5, - multilabel: bool = False -) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: - preds, target = _input_format_classification_one_hot(num_classes, preds, target, threshold, multilabel) - true_positives = torch.sum(preds * target, dim=1) - predicted_positives = torch.sum(preds, dim=1) - actual_positives = torch.sum(target, dim=1) - return true_positives, predicted_positives, actual_positives - +from torchmetrics.functional import f1 as _f1 +from torchmetrics.functional import fbeta as _fbeta -def _fbeta_compute( - true_positives: torch.Tensor, - predicted_positives: torch.Tensor, - actual_positives: torch.Tensor, - beta: float = 1.0, - average: str = "micro" -) -> torch.Tensor: - if average == "micro": - precision = true_positives.sum().float() / predicted_positives.sum() - recall = true_positives.sum().float() / actual_positives.sum() - else: - precision = true_positives.float() / predicted_positives - recall = true_positives.float() / actual_positives - - num = (1 + beta**2) * precision * recall - denom = beta**2 * precision + recall - return class_reduce(num, denom, weights=actual_positives, class_reduction=average) +from pytorch_lightning.utilities.deprecation import deprecated +@deprecated(target=_fbeta, ver_deprecate="1.3.0", ver_remove="1.5.0") def fbeta( preds: torch.Tensor, target: torch.Tensor, @@ -61,49 +29,12 @@ def fbeta( multilabel: bool = False ) -> torch.Tensor: """ - Computes f_beta metric. - - Works with binary, multiclass, and multilabel data. - Accepts probabilities from a model output or integer class values in prediction. - Works with multi-dimensional preds and target. - - If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument - to convert into integer labels. This is the case for binary and multi-label probabilities. - - If preds has an extra dimension as in the case of multi-class scores we perform an argmax on ``dim=1``. - - Args: - preds: predictions from model (probabilities, or labels) - target: ground truth labels - num_classes: Number of classes in the dataset. - beta: Beta coefficient in the F measure. - threshold: - Threshold value for binary or multi-label probabilities. default: 0.5 - - average: - - ``'micro'`` computes metric globally - - ``'macro'`` computes metric for each class and uniformly averages them - - ``'weighted'`` computes metric for each class and does a weighted-average, - where each class is weighted by their support (accounts for class imbalance) - - ``'none'`` or ``None`` computes and returns the metric per class - - multilabel: If predictions are from multilabel classification. - - Example: - - >>> from pytorch_lightning.metrics.functional import fbeta - >>> target = torch.tensor([0, 1, 2, 0, 1, 2]) - >>> preds = torch.tensor([0, 2, 1, 0, 0, 1]) - >>> fbeta(preds, target, num_classes=3, beta=0.5) - tensor(0.3333) - + .. deprecated:: + Use :func:`torchmetrics.functional.accuracy`. Will be removed in v1.5.0. """ - true_positives, predicted_positives, actual_positives = _fbeta_update( - preds, target, num_classes, threshold, multilabel - ) - return _fbeta_compute(true_positives, predicted_positives, actual_positives, beta, average) +@deprecated(target=_f1, ver_deprecate="1.3.0", ver_remove="1.5.0") def f1( preds: torch.Tensor, target: torch.Tensor, @@ -113,39 +44,6 @@ def f1( multilabel: bool = False ) -> torch.Tensor: """ - Computes F1 metric. F1 metrics correspond to a equally weighted average of the - precision and recall scores. - - Works with binary, multiclass, and multilabel data. - Accepts probabilities from a model output or integer class values in prediction. - Works with multi-dimensional preds and target. - - If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument - to convert into integer labels. This is the case for binary and multi-label probabilities. - - If preds has an extra dimension as in the case of multi-class scores we perform an argmax on ``dim=1``. - - Args: - preds: predictions from model (probabilities, or labels) - target: ground truth labels - num_classes: Number of classes in the dataset. - threshold: - Threshold value for binary or multi-label probabilities. default: 0.5 - - average: - - ``'micro'`` computes metric globally - - ``'macro'`` computes metric for each class and uniformly averages them - - ``'weighted'`` computes metric for each class and does a weighted-average, - where each class is weighted by their support (accounts for class imbalance) - - ``'none'`` or ``None`` computes and returns the metric per class - - multilabel: If predictions are from multilabel classification. - - Example: - >>> from pytorch_lightning.metrics.functional import f1 - >>> target = torch.tensor([0, 1, 2, 0, 1, 2]) - >>> preds = torch.tensor([0, 2, 1, 0, 0, 1]) - >>> f1(preds, target, num_classes=3) - tensor(0.3333) + .. deprecated:: + Use :func:`torchmetrics.functional.f1`. Will be removed in v1.5.0. """ - return fbeta(preds, target, num_classes, 1.0, threshold, average, multilabel) diff --git a/pytorch_lightning/metrics/functional/hamming_distance.py b/pytorch_lightning/metrics/functional/hamming_distance.py index 3254dcbf8badb..ef6bb3277fef2 100644 --- a/pytorch_lightning/metrics/functional/hamming_distance.py +++ b/pytorch_lightning/metrics/functional/hamming_distance.py @@ -11,61 +11,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Tuple, Union - import torch -from torchmetrics.classification.checks import _input_format_classification - - -def _hamming_distance_update( - preds: torch.Tensor, - target: torch.Tensor, - threshold: float = 0.5, -) -> Tuple[torch.Tensor, int]: - preds, target, _ = _input_format_classification(preds, target, threshold=threshold) - - correct = (preds == target).sum() - total = preds.numel() +from torchmetrics.functional import hamming_distance as _hamming_distance - return correct, total - - -def _hamming_distance_compute(correct: torch.Tensor, total: Union[int, torch.Tensor]) -> torch.Tensor: - return 1 - correct.float() / total +from pytorch_lightning.utilities.deprecation import deprecated +@deprecated(target=_hamming_distance, ver_deprecate="1.3.0", ver_remove="1.5.0") def hamming_distance(preds: torch.Tensor, target: torch.Tensor, threshold: float = 0.5) -> torch.Tensor: - r""" - Computes the average `Hamming distance `_ (also - known as Hamming loss) between targets and predictions: - - .. math:: - \text{Hamming distance} = \frac{1}{N \cdot L} \sum_i^N \sum_l^L 1(y_{il} \neq \hat{y}_{il}) - - Where :math:`y` is a tensor of target values, :math:`\hat{y}` is a tensor of predictions, - and :math:`\bullet_{il}` refers to the :math:`l`-th label of the :math:`i`-th sample of that - tensor. - - This is the same as ``1-accuracy`` for binary data, while for all other types of inputs it - treats each possible label separately - meaning that, for example, multi-class data is - treated as if it were multi-label. - - Args: - preds: Predictions from model - target: Ground truth - threshold: - Threshold probability value for transforming probability predictions to binary - (0 or 1) predictions, in the case of binary or multi-label inputs. - - Example: - - >>> from pytorch_lightning.metrics.functional import hamming_distance - >>> target = torch.tensor([[0, 1], [1, 1]]) - >>> preds = torch.tensor([[0, 1], [0, 1]]) - >>> hamming_distance(preds, target) - tensor(0.2500) - """ - - correct, total = _hamming_distance_update(preds, target, threshold) - return _hamming_distance_compute(correct, total) + .. deprecated:: + Use :func:`torchmetrics.functional.hamming_distance`. Will be removed in v1.5.0. + """ diff --git a/pytorch_lightning/metrics/functional/iou.py b/pytorch_lightning/metrics/functional/iou.py index 0f8152d314848..7ae520eb25dee 100644 --- a/pytorch_lightning/metrics/functional/iou.py +++ b/pytorch_lightning/metrics/functional/iou.py @@ -14,35 +14,12 @@ from typing import Optional import torch -from torchmetrics.utilities import reduce -from torchmetrics.utilities.data import get_num_classes +from torchmetrics.functional import iou as _iou -from pytorch_lightning.metrics.functional.confusion_matrix import _confusion_matrix_update - - -def _iou_from_confmat( - confmat: torch.Tensor, - num_classes: int, - ignore_index: Optional[int] = None, - absent_score: float = 0.0, - reduction: str = 'elementwise_mean', -): - intersection = torch.diag(confmat) - union = confmat.sum(0) + confmat.sum(1) - intersection - - # If this class is absent in both target AND pred (union == 0), then use the absent_score for this class. - scores = intersection.float() / union.float() - scores[union == 0] = absent_score - - # Remove the ignored class index from the scores. - if ignore_index is not None and ignore_index >= 0 and ignore_index < num_classes: - scores = torch.cat([ - scores[:ignore_index], - scores[ignore_index + 1:], - ]) - return reduce(scores, reduction=reduction) +from pytorch_lightning.utilities.deprecation import deprecated +@deprecated(target=_iou, ver_deprecate="1.3.0", ver_remove="1.5.0") def iou( pred: torch.Tensor, target: torch.Tensor, @@ -52,60 +29,7 @@ def iou( num_classes: Optional[int] = None, reduction: str = 'elementwise_mean', ) -> torch.Tensor: - r""" - Computes `Intersection over union, or Jaccard index calculation `_: - - .. math:: J(A,B) = \frac{|A\cap B|}{|A\cup B|} - - Where: :math:`A` and :math:`B` are both tensors of the same size, - containing integer class values. They may be subject to conversion from - input data (see description below). - - Note that it is different from box IoU. - - If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument - to convert into integer labels. This is the case for binary and multi-label probabilities. - - If pred has an extra dimension as in the case of multi-class scores we - perform an argmax on ``dim=1``. - - Args: - preds: tensor containing predictions from model (probabilities, or labels) with shape ``[N, d1, d2, ...]`` - target: tensor containing ground truth labels with shape ``[N, d1, d2, ...]`` - ignore_index: optional int specifying a target class to ignore. If given, - this class index does not contribute to the returned score, regardless - of reduction method. Has no effect if given an int that is not in the - range [0, num_classes-1], where num_classes is either given or derived - from pred and target. By default, no index is ignored, and all classes are used. - absent_score: score to use for an individual class, if no instances of - the class index were present in `pred` AND no instances of the class - index were present in `target`. For example, if we have 3 classes, - [0, 0] for `pred`, and [0, 2] for `target`, then class 1 would be - assigned the `absent_score`. - threshold: - Threshold value for binary or multi-label probabilities. default: 0.5 - num_classes: - Optionally specify the number of classes - reduction: a method to reduce metric score over labels. - - - ``'elementwise_mean'``: takes the mean (default) - - ``'sum'``: takes the sum - - ``'none'``: no reduction will be applied - - Return: - IoU score : Tensor containing single value if reduction is - 'elementwise_mean', or number of classes if reduction is 'none' - - Example: - - >>> from pytorch_lightning.metrics.functional import iou - >>> target = torch.randint(0, 2, (10, 25, 25)) - >>> pred = torch.tensor(target) - >>> pred[2:5, 7:13, 9:15] = 1 - pred[2:5, 7:13, 9:15] - >>> iou(pred, target) - tensor(0.9660) """ - - num_classes = get_num_classes(pred=pred, target=target, num_classes=num_classes) - confmat = _confusion_matrix_update(pred, target, num_classes, threshold) - return _iou_from_confmat(confmat, num_classes, ignore_index, absent_score, reduction) + .. deprecated:: + Use :func:`torchmetrics.functional.iou`. Will be removed in v1.5.0. + """ diff --git a/pytorch_lightning/metrics/functional/stat_scores.py b/pytorch_lightning/metrics/functional/stat_scores.py index fb1849d3805b2..6f234e84d9aab 100644 --- a/pytorch_lightning/metrics/functional/stat_scores.py +++ b/pytorch_lightning/metrics/functional/stat_scores.py @@ -11,130 +11,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Optional, Tuple +from typing import Optional import torch -from torchmetrics.classification.checks import _input_format_classification +from torchmetrics.functional import stat_scores as _stat_scores - -def _del_column(tensor: torch.Tensor, index: int): - """ Delete the column at index.""" - - return torch.cat([tensor[:, :index], tensor[:, (index + 1):]], 1) - - -def _stat_scores( - preds: torch.Tensor, - target: torch.Tensor, - reduce: str = "micro", -) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: - """Calculate the number of tp, fp, tn, fn. - - Args: - preds: - An ``(N, C)`` or ``(N, C, X)`` tensor of predictions (0 or 1) - target: - An ``(N, C)`` or ``(N, C, X)`` tensor of true labels (0 or 1) - reduce: - One of ``'micro'``, ``'macro'``, ``'samples'`` - - Return: - Returns a list of 4 tensors; tp, fp, tn, fn. - The shape of the returned tensors depnds on the shape of the inputs - and the ``reduce`` parameter: - - If inputs are of the shape ``(N, C)``, then - - If ``reduce='micro'``, the returned tensors are 1 element tensors - - If ``reduce='macro'``, the returned tensors are ``(C,)`` tensors - - If ``reduce'samples'``, the returned tensors are ``(N,)`` tensors - - If inputs are of the shape ``(N, C, X)``, then - - If ``reduce='micro'``, the returned tensors are ``(N,)`` tensors - - If ``reduce='macro'``, the returned tensors are ``(N,C)`` tensors - - If ``reduce='samples'``, the returned tensors are ``(N,X)`` tensors - """ - if reduce == "micro": - dim = [0, 1] if preds.ndim == 2 else [1, 2] - elif reduce == "macro": - dim = 0 if preds.ndim == 2 else 2 - elif reduce == "samples": - dim = 1 - - true_pred, false_pred = target == preds, target != preds - pos_pred, neg_pred = preds == 1, preds == 0 - - tp = (true_pred * pos_pred).sum(dim=dim) - fp = (false_pred * pos_pred).sum(dim=dim) - - tn = (true_pred * neg_pred).sum(dim=dim) - fn = (false_pred * neg_pred).sum(dim=dim) - - return tp.long(), fp.long(), tn.long(), fn.long() - - -def _stat_scores_update( - preds: torch.Tensor, - target: torch.Tensor, - reduce: str = "micro", - mdmc_reduce: Optional[str] = None, - num_classes: Optional[int] = None, - top_k: Optional[int] = None, - threshold: float = 0.5, - is_multiclass: Optional[bool] = None, - ignore_index: Optional[int] = None, -) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: - - preds, target, _ = _input_format_classification( - preds, target, threshold=threshold, num_classes=num_classes, is_multiclass=is_multiclass, top_k=top_k - ) - - if ignore_index is not None and not 0 <= ignore_index < preds.shape[1]: - raise ValueError(f"The `ignore_index` {ignore_index} is not valid for inputs with {preds.shape[0]} classes") - - if ignore_index is not None and preds.shape[1] == 1: - raise ValueError("You can not use `ignore_index` with binary data.") - - if preds.ndim == 3: - if not mdmc_reduce: - raise ValueError( - "When your inputs are multi-dimensional multi-class, you have to set the `mdmc_reduce` parameter" - ) - if mdmc_reduce == "global": - preds = torch.transpose(preds, 1, 2).reshape(-1, preds.shape[1]) - target = torch.transpose(target, 1, 2).reshape(-1, target.shape[1]) - - # Delete what is in ignore_index, if applicable (and classes don't matter): - if ignore_index is not None and reduce != "macro": - preds = _del_column(preds, ignore_index) - target = _del_column(target, ignore_index) - - tp, fp, tn, fn = _stat_scores(preds, target, reduce=reduce) - - # Take care of ignore_index - if ignore_index is not None and reduce == "macro": - tp[..., ignore_index] = -1 - fp[..., ignore_index] = -1 - tn[..., ignore_index] = -1 - fn[..., ignore_index] = -1 - - return tp, fp, tn, fn - - -def _stat_scores_compute(tp: torch.Tensor, fp: torch.Tensor, tn: torch.Tensor, fn: torch.Tensor) -> torch.Tensor: - - outputs = [ - tp.unsqueeze(-1), - fp.unsqueeze(-1), - tn.unsqueeze(-1), - fn.unsqueeze(-1), - tp.unsqueeze(-1) + fn.unsqueeze(-1), # support - ] - outputs = torch.cat(outputs, -1) - outputs = torch.where(outputs < 0, torch.tensor(-1, device=outputs.device), outputs) - - return outputs +from pytorch_lightning.utilities.deprecation import deprecated +@deprecated(target=_stat_scores, ver_deprecate="1.3.0", ver_remove="1.5.0") def stat_scores( preds: torch.Tensor, target: torch.Tensor, @@ -146,149 +31,7 @@ def stat_scores( is_multiclass: Optional[bool] = None, ignore_index: Optional[int] = None, ) -> torch.Tensor: - """Computes the number of true positives, false positives, true negatives, false negatives. - Related to `Type I and Type II errors `__ - and the `confusion matrix `__. - - The reduction method (how the statistics are aggregated) is controlled by the - ``reduce`` parameter, and additionally by the ``mdmc_reduce`` parameter in the - multi-dimensional multi-class case. - - Args: - preds: Predictions from model (probabilities or labels) - target: Ground truth values - threshold: - Threshold probability value for transforming probability predictions to binary - (0 or 1) predictions, in the case of binary or multi-label inputs. - - top_k: - Number of highest probability entries for each sample to convert to 1s - relevant - only for inputs with probability predictions. If this parameter is set for multi-label - inputs, it will take precedence over ``threshold``. For (multi-dim) multi-class inputs, - this parameter defaults to 1. - - Should be left unset (``None``) for inputs with label predictions. - - reduce: - Defines the reduction that is applied. Should be one of the following: - - - ``'micro'`` [default]: Counts the statistics by summing over all [sample, class] - combinations (globally). Each statistic is represented by a single integer. - - ``'macro'``: Counts the statistics for each class separately (over all samples). - Each statistic is represented by a ``(C,)`` tensor. Requires ``num_classes`` - to be set. - - ``'samples'``: Counts the statistics for each sample separately (over all classes). - Each statistic is represented by a ``(N, )`` 1d tensor. - - Note that what is considered a sample in the multi-dimensional multi-class case - depends on the value of ``mdmc_reduce``. - - num_classes: - Number of classes. Necessary for (multi-dimensional) multi-class or multi-label data. - - ignore_index: - Specify a class (label) to ignore. If given, this class index does not contribute - to the returned score, regardless of reduction method. If an index is ignored, and - ``reduce='macro'``, the class statistics for the ignored class will all be returned - as ``-1``. - - mdmc_reduce: - Defines how the multi-dimensional multi-class inputs are handeled. Should be - one of the following: - - - ``None`` [default]: Should be left unchanged if your data is not multi-dimensional multi-class. - - - ``'samplewise'``: In this case, the statistics are computed separately for each - sample on the ``N`` axis, and then the outputs are concatenated together. In each - sample the extra axes ``...`` are flattened to become the sub-sample axis, and - statistics for each sample are computed by treating the sub-sample axis as the - ``N`` axis for that sample. - - - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs are - flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they - were ``(N_X, C)``. From here on the ``reduce`` parameter applies as usual. - - is_multiclass: - Used only in certain special cases, where you want to treat inputs as a different type - than what they appear to be. - - Return: - The metric returns a tensor of shape ``(..., 5)``, where the last dimension corresponds - to ``[tp, fp, tn, fn, sup]`` (``sup`` stands for support and equals ``tp + fn``). The - shape depends on the ``reduce`` and ``mdmc_reduce`` (in case of multi-dimensional - multi-class data) parameters: - - - If the data is not multi-dimensional multi-class, then - - - If ``reduce='micro'``, the shape will be ``(5, )`` - - If ``reduce='macro'``, the shape will be ``(C, 5)``, - where ``C`` stands for the number of classes - - If ``reduce='samples'``, the shape will be ``(N, 5)``, where ``N`` stands for - the number of samples - - - If the data is multi-dimensional multi-class and ``mdmc_reduce='global'``, then - - - If ``reduce='micro'``, the shape will be ``(5, )`` - - If ``reduce='macro'``, the shape will be ``(C, 5)`` - - If ``reduce='samples'``, the shape will be ``(N*X, 5)``, where ``X`` stands for - the product of sizes of all "extra" dimensions of the data (i.e. all dimensions - except for ``C`` and ``N``) - - - If the data is multi-dimensional multi-class and ``mdmc_reduce='samplewise'``, then - - - If ``reduce='micro'``, the shape will be ``(N, 5)`` - - If ``reduce='macro'``, the shape will be ``(N, C, 5)`` - - If ``reduce='samples'``, the shape will be ``(N, X, 5)`` - - Raises: - ValueError: - If ``reduce`` is none of ``"micro"``, ``"macro"`` or ``"samples"``. - ValueError: - If ``mdmc_reduce`` is none of ``None``, ``"samplewise"``, ``"global"``. - ValueError: - If ``reduce`` is set to ``"macro"`` and ``num_classes`` is not provided. - ValueError: - If ``num_classes`` is set - and ``ignore_index`` is not in the range ``[0, num_classes)``. - ValueError: - If ``ignore_index`` is used with ``binary data``. - ValueError: - If inputs are ``multi-dimensional multi-class`` and ``mdmc_reduce`` is not provided. - - Example: - - >>> from pytorch_lightning.metrics.functional import stat_scores - >>> preds = torch.tensor([1, 0, 2, 1]) - >>> target = torch.tensor([1, 1, 2, 0]) - >>> stat_scores(preds, target, reduce='macro', num_classes=3) - tensor([[0, 1, 2, 1, 1], - [1, 1, 1, 1, 2], - [1, 0, 3, 0, 1]]) - >>> stat_scores(preds, target, reduce='micro') - tensor([2, 2, 6, 2, 4]) """ - - if reduce not in ["micro", "macro", "samples"]: - raise ValueError(f"The `reduce` {reduce} is not valid.") - - if mdmc_reduce not in [None, "samplewise", "global"]: - raise ValueError(f"The `mdmc_reduce` {mdmc_reduce} is not valid.") - - if reduce == "macro" and (not num_classes or num_classes < 1): - raise ValueError("When you set `reduce` as 'macro', you have to provide the number of classes.") - - if num_classes and ignore_index is not None and (not 0 <= ignore_index < num_classes or num_classes == 1): - raise ValueError(f"The `ignore_index` {ignore_index} is not valid for inputs with {num_classes} classes") - - tp, fp, tn, fn = _stat_scores_update( - preds, - target, - reduce=reduce, - mdmc_reduce=mdmc_reduce, - top_k=top_k, - threshold=threshold, - num_classes=num_classes, - is_multiclass=is_multiclass, - ignore_index=ignore_index, - ) - return _stat_scores_compute(tp, fp, tn, fn) + .. deprecated:: + Use :func:`torchmetrics.functional.stat_scores`. Will be removed in v1.5.0. + """ diff --git a/tests/metrics/classification/__init__.py b/tests/metrics/classification/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/tests/metrics/classification/inputs.py b/tests/metrics/classification/inputs.py deleted file mode 100644 index 7f2ac450385fe..0000000000000 --- a/tests/metrics/classification/inputs.py +++ /dev/null @@ -1,66 +0,0 @@ -from collections import namedtuple - -import torch - -from tests.metrics.utils import BATCH_SIZE, EXTRA_DIM, NUM_BATCHES, NUM_CLASSES - -Input = namedtuple('Input', ["preds", "target"]) - -_input_binary_prob = Input( - preds=torch.rand(NUM_BATCHES, BATCH_SIZE), target=torch.randint(high=2, size=(NUM_BATCHES, BATCH_SIZE)) -) - -_input_binary = Input( - preds=torch.randint(high=2, size=(NUM_BATCHES, BATCH_SIZE)), - target=torch.randint(high=2, size=(NUM_BATCHES, BATCH_SIZE)) -) - -_input_multilabel_prob = Input( - preds=torch.rand(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES), - target=torch.randint(high=2, size=(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES)) -) - -_input_multilabel_multidim_prob = Input( - preds=torch.rand(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES, EXTRA_DIM), - target=torch.randint(high=2, size=(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES, EXTRA_DIM)) -) - -_input_multilabel = Input( - preds=torch.randint(high=2, size=(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES)), - target=torch.randint(high=2, size=(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES)) -) - -_input_multilabel_multidim = Input( - preds=torch.randint(high=2, size=(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES, EXTRA_DIM)), - target=torch.randint(high=2, size=(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES, EXTRA_DIM)) -) - -# Generate edge multilabel edge case, where nothing matches (scores are undefined) -__temp_preds = torch.randint(high=2, size=(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES)) -__temp_target = abs(__temp_preds - 1) - -_input_multilabel_no_match = Input(preds=__temp_preds, target=__temp_target) - -__mc_prob_preds = torch.rand(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES) -__mc_prob_preds = __mc_prob_preds / __mc_prob_preds.sum(dim=2, keepdim=True) - -_input_multiclass_prob = Input( - preds=__mc_prob_preds, target=torch.randint(high=NUM_CLASSES, size=(NUM_BATCHES, BATCH_SIZE)) -) - -_input_multiclass = Input( - preds=torch.randint(high=NUM_CLASSES, size=(NUM_BATCHES, BATCH_SIZE)), - target=torch.randint(high=NUM_CLASSES, size=(NUM_BATCHES, BATCH_SIZE)) -) - -__mdmc_prob_preds = torch.rand(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES, EXTRA_DIM) -__mdmc_prob_preds = __mdmc_prob_preds / __mdmc_prob_preds.sum(dim=2, keepdim=True) - -_input_multidim_multiclass_prob = Input( - preds=__mdmc_prob_preds, target=torch.randint(high=NUM_CLASSES, size=(NUM_BATCHES, BATCH_SIZE, EXTRA_DIM)) -) - -_input_multidim_multiclass = Input( - preds=torch.randint(high=NUM_CLASSES, size=(NUM_BATCHES, BATCH_SIZE, EXTRA_DIM)), - target=torch.randint(high=NUM_CLASSES, size=(NUM_BATCHES, BATCH_SIZE, EXTRA_DIM)) -) diff --git a/tests/metrics/classification/test_confusion_matrix.py b/tests/metrics/classification/test_confusion_matrix.py deleted file mode 100644 index 5371044d6d4b0..0000000000000 --- a/tests/metrics/classification/test_confusion_matrix.py +++ /dev/null @@ -1,128 +0,0 @@ -from functools import partial - -import numpy as np -import pytest -import torch -from sklearn.metrics import confusion_matrix as sk_confusion_matrix - -from pytorch_lightning.metrics.classification.confusion_matrix import ConfusionMatrix -from pytorch_lightning.metrics.functional.confusion_matrix import confusion_matrix -from tests.metrics.classification.inputs import _input_binary, _input_binary_prob -from tests.metrics.classification.inputs import _input_multiclass as _input_mcls -from tests.metrics.classification.inputs import _input_multiclass_prob as _input_mcls_prob -from tests.metrics.classification.inputs import _input_multidim_multiclass as _input_mdmc -from tests.metrics.classification.inputs import _input_multidim_multiclass_prob as _input_mdmc_prob -from tests.metrics.classification.inputs import _input_multilabel as _input_mlb -from tests.metrics.classification.inputs import _input_multilabel_prob as _input_mlb_prob -from tests.metrics.utils import MetricTester, NUM_CLASSES, THRESHOLD - -torch.manual_seed(42) - - -def _sk_cm_binary_prob(preds, target, normalize=None): - sk_preds = (preds.view(-1).numpy() >= THRESHOLD).astype(np.uint8) - sk_target = target.view(-1).numpy() - - return sk_confusion_matrix(y_true=sk_target, y_pred=sk_preds, normalize=normalize) - - -def _sk_cm_binary(preds, target, normalize=None): - sk_preds = preds.view(-1).numpy() - sk_target = target.view(-1).numpy() - - return sk_confusion_matrix(y_true=sk_target, y_pred=sk_preds, normalize=normalize) - - -def _sk_cm_multilabel_prob(preds, target, normalize=None): - sk_preds = (preds.view(-1).numpy() >= THRESHOLD).astype(np.uint8) - sk_target = target.view(-1).numpy() - - return sk_confusion_matrix(y_true=sk_target, y_pred=sk_preds, normalize=normalize) - - -def _sk_cm_multilabel(preds, target, normalize=None): - sk_preds = preds.view(-1).numpy() - sk_target = target.view(-1).numpy() - - return sk_confusion_matrix(y_true=sk_target, y_pred=sk_preds, normalize=normalize) - - -def _sk_cm_multiclass_prob(preds, target, normalize=None): - sk_preds = torch.argmax(preds, dim=len(preds.shape) - 1).view(-1).numpy() - sk_target = target.view(-1).numpy() - - return sk_confusion_matrix(y_true=sk_target, y_pred=sk_preds, normalize=normalize) - - -def _sk_cm_multiclass(preds, target, normalize=None): - sk_preds = preds.view(-1).numpy() - sk_target = target.view(-1).numpy() - - return sk_confusion_matrix(y_true=sk_target, y_pred=sk_preds, normalize=normalize) - - -def _sk_cm_multidim_multiclass_prob(preds, target, normalize=None): - sk_preds = torch.argmax(preds, dim=len(preds.shape) - 2).view(-1).numpy() - sk_target = target.view(-1).numpy() - - return sk_confusion_matrix(y_true=sk_target, y_pred=sk_preds, normalize=normalize) - - -def _sk_cm_multidim_multiclass(preds, target, normalize=None): - sk_preds = preds.view(-1).numpy() - sk_target = target.view(-1).numpy() - - return sk_confusion_matrix(y_true=sk_target, y_pred=sk_preds, normalize=normalize) - - -@pytest.mark.parametrize("normalize", ['true', 'pred', 'all', None]) -@pytest.mark.parametrize( - "preds, target, sk_metric, num_classes", - [(_input_binary_prob.preds, _input_binary_prob.target, _sk_cm_binary_prob, 2), - (_input_binary.preds, _input_binary.target, _sk_cm_binary, 2), - (_input_mlb_prob.preds, _input_mlb_prob.target, _sk_cm_multilabel_prob, 2), - (_input_mlb.preds, _input_mlb.target, _sk_cm_multilabel, 2), - (_input_mcls_prob.preds, _input_mcls_prob.target, _sk_cm_multiclass_prob, NUM_CLASSES), - (_input_mcls.preds, _input_mcls.target, _sk_cm_multiclass, NUM_CLASSES), - (_input_mdmc_prob.preds, _input_mdmc_prob.target, _sk_cm_multidim_multiclass_prob, NUM_CLASSES), - (_input_mdmc.preds, _input_mdmc.target, _sk_cm_multidim_multiclass, NUM_CLASSES)] -) -class TestConfusionMatrix(MetricTester): - - @pytest.mark.parametrize("ddp", [True, False]) - @pytest.mark.parametrize("dist_sync_on_step", [True, False]) - def test_confusion_matrix(self, normalize, preds, target, sk_metric, num_classes, ddp, dist_sync_on_step): - self.run_class_metric_test( - ddp=ddp, - preds=preds, - target=target, - metric_class=ConfusionMatrix, - sk_metric=partial(sk_metric, normalize=normalize), - dist_sync_on_step=dist_sync_on_step, - metric_args={ - "num_classes": num_classes, - "threshold": THRESHOLD, - "normalize": normalize - } - ) - - def test_confusion_matrix_functional(self, normalize, preds, target, sk_metric, num_classes): - self.run_functional_metric_test( - preds, - target, - metric_functional=confusion_matrix, - sk_metric=partial(sk_metric, normalize=normalize), - metric_args={ - "num_classes": num_classes, - "threshold": THRESHOLD, - "normalize": normalize - } - ) - - -def test_warning_on_nan(tmpdir): - preds = torch.randint(3, size=(20, )) - target = torch.randint(3, size=(20, )) - - with pytest.warns(UserWarning, match='.* nan values found in confusion matrix have been replaced with zeros.'): - confusion_matrix(preds, target, num_classes=5, normalize='true') diff --git a/tests/metrics/classification/test_f_beta.py b/tests/metrics/classification/test_f_beta.py deleted file mode 100644 index b9458fb6c530c..0000000000000 --- a/tests/metrics/classification/test_f_beta.py +++ /dev/null @@ -1,153 +0,0 @@ -from functools import partial - -import numpy as np -import pytest -import torch -from sklearn.metrics import fbeta_score - -from pytorch_lightning.metrics import F1, FBeta -from pytorch_lightning.metrics.functional import f1, fbeta -from tests.metrics.classification.inputs import _input_binary, _input_binary_prob -from tests.metrics.classification.inputs import _input_multiclass as _input_mcls -from tests.metrics.classification.inputs import _input_multiclass_prob as _input_mcls_prob -from tests.metrics.classification.inputs import _input_multidim_multiclass as _input_mdmc -from tests.metrics.classification.inputs import _input_multidim_multiclass_prob as _input_mdmc_prob -from tests.metrics.classification.inputs import _input_multilabel as _input_mlb -from tests.metrics.classification.inputs import _input_multilabel_no_match as _input_mlb_nomatch -from tests.metrics.classification.inputs import _input_multilabel_prob as _mlb_prob_inputs -from tests.metrics.utils import MetricTester, NUM_CLASSES, THRESHOLD - -torch.manual_seed(42) - - -def _sk_fbeta_binary_prob(preds, target, average='micro', beta=1.0): - sk_preds = (preds.view(-1).numpy() >= THRESHOLD).astype(np.uint8) - sk_target = target.view(-1).numpy() - - return fbeta_score(y_true=sk_target, y_pred=sk_preds, average='binary', beta=beta) - - -def _sk_fbeta_binary(preds, target, average='micro', beta=1.0): - sk_preds = preds.view(-1).numpy() - sk_target = target.view(-1).numpy() - - return fbeta_score(y_true=sk_target, y_pred=sk_preds, average='binary', beta=beta) - - -def _sk_fbeta_multilabel_prob(preds, target, average='micro', beta=1.0): - sk_preds = (preds.view(-1, NUM_CLASSES).numpy() >= THRESHOLD).astype(np.uint8) - sk_target = target.view(-1, NUM_CLASSES).numpy() - - return fbeta_score(y_true=sk_target, y_pred=sk_preds, average=average, beta=beta) - - -def _sk_fbeta_multilabel(preds, target, average='micro', beta=1.0): - sk_preds = preds.view(-1, NUM_CLASSES).numpy() - sk_target = target.view(-1, NUM_CLASSES).numpy() - - return fbeta_score(y_true=sk_target, y_pred=sk_preds, average=average, beta=beta) - - -def _sk_fbeta_multiclass_prob(preds, target, average='micro', beta=1.0): - sk_preds = torch.argmax(preds, dim=len(preds.shape) - 1).view(-1).numpy() - sk_target = target.view(-1).numpy() - - return fbeta_score(y_true=sk_target, y_pred=sk_preds, average=average, beta=beta) - - -def _sk_fbeta_multiclass(preds, target, average='micro', beta=1.0): - sk_preds = preds.view(-1).numpy() - sk_target = target.view(-1).numpy() - - return fbeta_score(y_true=sk_target, y_pred=sk_preds, average=average, beta=beta) - - -def _sk_fbeta_multidim_multiclass_prob(preds, target, average='micro', beta=1.0): - sk_preds = torch.argmax(preds, dim=len(preds.shape) - 2).view(-1).numpy() - sk_target = target.view(-1).numpy() - - return fbeta_score(y_true=sk_target, y_pred=sk_preds, average=average, beta=beta) - - -def _sk_fbeta_multidim_multiclass(preds, target, average='micro', beta=1.0): - sk_preds = preds.view(-1).numpy() - sk_target = target.view(-1).numpy() - - return fbeta_score(y_true=sk_target, y_pred=sk_preds, average=average, beta=beta) - - -@pytest.mark.parametrize( - "preds, target, sk_metric, num_classes, multilabel", - [ - (_input_binary_prob.preds, _input_binary_prob.target, _sk_fbeta_binary_prob, 1, False), - (_input_binary.preds, _input_binary.target, _sk_fbeta_binary, 1, False), - (_mlb_prob_inputs.preds, _mlb_prob_inputs.target, _sk_fbeta_multilabel_prob, NUM_CLASSES, True), - (_input_mlb.preds, _input_mlb.target, _sk_fbeta_multilabel, NUM_CLASSES, True), - (_input_mlb_nomatch.preds, _input_mlb_nomatch.target, _sk_fbeta_multilabel, NUM_CLASSES, True), - (_input_mcls_prob.preds, _input_mcls_prob.target, _sk_fbeta_multiclass_prob, NUM_CLASSES, False), - (_input_mcls.preds, _input_mcls.target, _sk_fbeta_multiclass, NUM_CLASSES, False), - (_input_mdmc_prob.preds, _input_mdmc_prob.target, _sk_fbeta_multidim_multiclass_prob, NUM_CLASSES, False), - (_input_mdmc.preds, _input_mdmc.target, _sk_fbeta_multidim_multiclass, NUM_CLASSES, False), - ], -) -@pytest.mark.parametrize("average", ['micro', 'macro', 'weighted', None]) -@pytest.mark.parametrize("beta", [0.5, 1.0, 2.0]) -class TestFBeta(MetricTester): - - @pytest.mark.parametrize("ddp", [True, False]) - @pytest.mark.parametrize("dist_sync_on_step", [True, False]) - def test_fbeta(self, preds, target, sk_metric, num_classes, multilabel, average, beta, ddp, dist_sync_on_step): - metric_class = F1 if beta == 1.0 else partial(FBeta, beta=beta) - - self.run_class_metric_test( - ddp=ddp, - preds=preds, - target=target, - metric_class=metric_class, - sk_metric=partial(sk_metric, average=average, beta=beta), - dist_sync_on_step=dist_sync_on_step, - metric_args={ - "num_classes": num_classes, - "average": average, - "multilabel": multilabel, - "threshold": THRESHOLD, - }, - check_dist_sync_on_step=False, - check_batch=False, - ) - - def test_fbeta_functional(self, preds, target, sk_metric, num_classes, multilabel, average, beta): - metric_functional = f1 if beta == 1.0 else partial(fbeta, beta=beta) - - self.run_functional_metric_test( - preds=preds, - target=target, - metric_functional=metric_functional, - sk_metric=partial(sk_metric, average=average, beta=beta), - metric_args={ - "num_classes": num_classes, - "average": average, - "multilabel": multilabel, - "threshold": THRESHOLD - } - ) - - -@pytest.mark.parametrize(['pred', 'target', 'beta', 'exp_score'], [ - pytest.param([1., 0., 1., 0.], [0., 1., 1., 0.], 0.5, [0.5, 0.5]), - pytest.param([1., 0., 1., 0.], [0., 1., 1., 0.], 1, [0.5, 0.5]), - pytest.param([1., 0., 1., 0.], [0., 1., 1., 0.], 2, [0.5, 0.5]), -]) -def test_fbeta_score(pred, target, beta, exp_score): - score = fbeta(torch.tensor(pred), torch.tensor(target), num_classes=1, beta=beta, average='none') - assert torch.allclose(score, torch.tensor(exp_score)) - - -@pytest.mark.parametrize(['pred', 'target', 'exp_score'], [ - pytest.param([0., 0., 0., 0.], [1., 1., 1., 1.], [0.0, 0.0]), - pytest.param([1., 0., 1., 0.], [0., 1., 1., 0.], [0.5, 0.5]), - pytest.param([1., 0., 1., 0.], [1., 0., 1., 0.], [1.0, 1.0]), -]) -def test_f1_score(pred, target, exp_score): - score = f1(torch.tensor(pred), torch.tensor(target), num_classes=1, average='none') - assert torch.allclose(score, torch.tensor(exp_score)) diff --git a/tests/metrics/classification/test_hamming_distance.py b/tests/metrics/classification/test_hamming_distance.py deleted file mode 100644 index a4db9c7f339b2..0000000000000 --- a/tests/metrics/classification/test_hamming_distance.py +++ /dev/null @@ -1,80 +0,0 @@ -import pytest -import torch -from sklearn.metrics import hamming_loss as sk_hamming_loss -from torchmetrics.classification.checks import _input_format_classification - -from pytorch_lightning.metrics import HammingDistance -from pytorch_lightning.metrics.functional import hamming_distance -from tests.metrics.classification.inputs import _input_binary, _input_binary_prob -from tests.metrics.classification.inputs import _input_multiclass as _input_mcls -from tests.metrics.classification.inputs import _input_multiclass_prob as _input_mcls_prob -from tests.metrics.classification.inputs import _input_multidim_multiclass as _input_mdmc -from tests.metrics.classification.inputs import _input_multidim_multiclass_prob as _input_mdmc_prob -from tests.metrics.classification.inputs import _input_multilabel as _input_mlb -from tests.metrics.classification.inputs import _input_multilabel_multidim as _input_mlmd -from tests.metrics.classification.inputs import _input_multilabel_multidim_prob as _input_mlmd_prob -from tests.metrics.classification.inputs import _input_multilabel_prob as _input_mlb_prob -from tests.metrics.utils import MetricTester, THRESHOLD - -torch.manual_seed(42) - - -def _sk_hamming_loss(preds, target): - sk_preds, sk_target, _ = _input_format_classification(preds, target, threshold=THRESHOLD) - sk_preds, sk_target = sk_preds.numpy(), sk_target.numpy() - sk_preds, sk_target = sk_preds.reshape(sk_preds.shape[0], -1), sk_target.reshape(sk_target.shape[0], -1) - - return sk_hamming_loss(y_true=sk_target, y_pred=sk_preds) - - -@pytest.mark.parametrize( - "preds, target", - [ - (_input_binary_prob.preds, _input_binary_prob.target), - (_input_binary.preds, _input_binary.target), - (_input_mlb_prob.preds, _input_mlb_prob.target), - (_input_mlb.preds, _input_mlb.target), - (_input_mcls_prob.preds, _input_mcls_prob.target), - (_input_mcls.preds, _input_mcls.target), - (_input_mdmc_prob.preds, _input_mdmc_prob.target), - (_input_mdmc.preds, _input_mdmc.target), - (_input_mlmd_prob.preds, _input_mlmd_prob.target), - (_input_mlmd.preds, _input_mlmd.target), - ], -) -class TestHammingDistance(MetricTester): - - @pytest.mark.parametrize("ddp", [True, False]) - @pytest.mark.parametrize("dist_sync_on_step", [False, True]) - def test_hamming_distance_class(self, ddp, dist_sync_on_step, preds, target): - self.run_class_metric_test( - ddp=ddp, - preds=preds, - target=target, - metric_class=HammingDistance, - sk_metric=_sk_hamming_loss, - dist_sync_on_step=dist_sync_on_step, - metric_args={"threshold": THRESHOLD}, - ) - - def test_hamming_distance_fn(self, preds, target): - self.run_functional_metric_test( - preds, - target, - metric_functional=hamming_distance, - sk_metric=_sk_hamming_loss, - metric_args={"threshold": THRESHOLD}, - ) - - -@pytest.mark.parametrize("threshold", [1.5]) -def test_wrong_params(threshold): - preds, target = _input_mcls_prob.preds, _input_mcls_prob.target - - with pytest.raises(ValueError): - ham_dist = HammingDistance(threshold=threshold) - ham_dist(preds, target) - ham_dist.compute() - - with pytest.raises(ValueError): - hamming_distance(preds, target, threshold=threshold) diff --git a/tests/metrics/classification/test_inputs.py b/tests/metrics/classification/test_inputs.py deleted file mode 100644 index f07a9c2821f56..0000000000000 --- a/tests/metrics/classification/test_inputs.py +++ /dev/null @@ -1,312 +0,0 @@ -import pytest -import torch -from torch import rand, randint -from torchmetrics.classification.checks import _input_format_classification -from torchmetrics.utilities.data import select_topk, to_onehot -from torchmetrics.utilities.enums import DataType - -from tests.metrics.classification.inputs import _input_binary as _bin -from tests.metrics.classification.inputs import _input_binary_prob as _bin_prob -from tests.metrics.classification.inputs import _input_multiclass as _mc -from tests.metrics.classification.inputs import _input_multiclass_prob as _mc_prob -from tests.metrics.classification.inputs import _input_multidim_multiclass as _mdmc -from tests.metrics.classification.inputs import _input_multidim_multiclass_prob as _mdmc_prob -from tests.metrics.classification.inputs import _input_multilabel as _ml -from tests.metrics.classification.inputs import _input_multilabel_multidim as _mlmd -from tests.metrics.classification.inputs import _input_multilabel_multidim_prob as _mlmd_prob -from tests.metrics.classification.inputs import _input_multilabel_prob as _ml_prob -from tests.metrics.classification.inputs import Input -from tests.metrics.utils import BATCH_SIZE, EXTRA_DIM, NUM_BATCHES, NUM_CLASSES, THRESHOLD - -torch.manual_seed(42) - -# Some additional inputs to test on -_ml_prob_half = Input(_ml_prob.preds.half(), _ml_prob.target) - -_mc_prob_2cls_preds = rand(NUM_BATCHES, BATCH_SIZE, 2) -_mc_prob_2cls_preds /= _mc_prob_2cls_preds.sum(dim=2, keepdim=True) -_mc_prob_2cls = Input(_mc_prob_2cls_preds, randint(high=2, size=(NUM_BATCHES, BATCH_SIZE))) - -_mdmc_prob_many_dims_preds = rand(NUM_BATCHES, BATCH_SIZE, NUM_CLASSES, EXTRA_DIM, EXTRA_DIM) -_mdmc_prob_many_dims_preds /= _mdmc_prob_many_dims_preds.sum(dim=2, keepdim=True) -_mdmc_prob_many_dims = Input( - _mdmc_prob_many_dims_preds, - randint(high=2, size=(NUM_BATCHES, BATCH_SIZE, EXTRA_DIM, EXTRA_DIM)), -) - -_mdmc_prob_2cls_preds = rand(NUM_BATCHES, BATCH_SIZE, 2, EXTRA_DIM) -_mdmc_prob_2cls_preds /= _mdmc_prob_2cls_preds.sum(dim=2, keepdim=True) -_mdmc_prob_2cls = Input(_mdmc_prob_2cls_preds, randint(high=2, size=(NUM_BATCHES, BATCH_SIZE, EXTRA_DIM))) - -# Some utils -T = torch.Tensor - - -def _idn(x): - return x - - -def _usq(x): - return x.unsqueeze(-1) - - -def _thrs(x): - return x >= THRESHOLD - - -def _rshp1(x): - return x.reshape(x.shape[0], -1) - - -def _rshp2(x): - return x.reshape(x.shape[0], x.shape[1], -1) - - -def _onehot(x): - return to_onehot(x, NUM_CLASSES) - - -def _onehot2(x): - return to_onehot(x, 2) - - -def _top1(x): - return select_topk(x, 1) - - -def _top2(x): - return select_topk(x, 2) - - -# To avoid ugly black line wrapping -def _ml_preds_tr(x): - return _rshp1(_thrs(x)) - - -def _onehot_rshp1(x): - return _onehot(_rshp1(x)) - - -def _onehot2_rshp1(x): - return _onehot2(_rshp1(x)) - - -def _top1_rshp2(x): - return _top1(_rshp2(x)) - - -def _top2_rshp2(x): - return _top2(_rshp2(x)) - - -def _probs_to_mc_preds_tr(x): - return _onehot2(_thrs(x)) - - -def _mlmd_prob_to_mc_preds_tr(x): - return _onehot2(_rshp1(_thrs(x))) - - -######################## -# Test correct inputs -######################## - - -@pytest.mark.parametrize( - "inputs, num_classes, is_multiclass, top_k, exp_mode, post_preds, post_target", - [ - ############################# - # Test usual expected cases - (_bin, None, False, None, "multi-class", _usq, _usq), - (_bin, 1, False, None, "multi-class", _usq, _usq), - (_bin_prob, None, None, None, "binary", lambda x: _usq(_thrs(x)), _usq), - (_ml_prob, None, None, None, "multi-label", _thrs, _idn), - (_ml, None, False, None, "multi-dim multi-class", _idn, _idn), - (_ml_prob, None, None, None, "multi-label", _ml_preds_tr, _rshp1), - (_ml_prob, None, None, 2, "multi-label", _top2, _rshp1), - (_mlmd, None, False, None, "multi-dim multi-class", _rshp1, _rshp1), - (_mc, NUM_CLASSES, None, None, "multi-class", _onehot, _onehot), - (_mc_prob, None, None, None, "multi-class", _top1, _onehot), - (_mc_prob, None, None, 2, "multi-class", _top2, _onehot), - (_mdmc, NUM_CLASSES, None, None, "multi-dim multi-class", _onehot, _onehot), - (_mdmc_prob, None, None, None, "multi-dim multi-class", _top1_rshp2, _onehot), - (_mdmc_prob, None, None, 2, "multi-dim multi-class", _top2_rshp2, _onehot), - (_mdmc_prob_many_dims, None, None, None, "multi-dim multi-class", _top1_rshp2, _onehot_rshp1), - (_mdmc_prob_many_dims, None, None, 2, "multi-dim multi-class", _top2_rshp2, _onehot_rshp1), - ########################### - # Test some special cases - # Make sure that half precision works, i.e. is converted to full precision - (_ml_prob_half, None, None, None, "multi-label", lambda x: _ml_preds_tr(x.float()), _rshp1), - # Binary as multiclass - (_bin, None, None, None, "multi-class", _onehot2, _onehot2), - # Binary probs as multiclass - (_bin_prob, None, True, None, "binary", _probs_to_mc_preds_tr, _onehot2), - # Multilabel as multiclass - (_ml, None, True, None, "multi-dim multi-class", _onehot2, _onehot2), - # Multilabel probs as multiclass - (_ml_prob, None, True, None, "multi-label", _probs_to_mc_preds_tr, _onehot2), - # Multidim multilabel as multiclass - (_mlmd, None, True, None, "multi-dim multi-class", _onehot2_rshp1, _onehot2_rshp1), - # Multidim multilabel probs as multiclass - (_mlmd_prob, None, True, None, "multi-label", _mlmd_prob_to_mc_preds_tr, _onehot2_rshp1), - # Multiclass prob with 2 classes as binary - (_mc_prob_2cls, None, False, None, "multi-class", lambda x: _top1(x)[:, [1]], _usq), - # Multi-dim multi-class with 2 classes as multi-label - (_mdmc_prob_2cls, None, False, None, "multi-dim multi-class", lambda x: _top1(x)[:, 1], _idn), - ], -) -def test_usual_cases(inputs, num_classes, is_multiclass, top_k, exp_mode, post_preds, post_target): - - def __get_data_type_enum(str_exp_mode): - return next(DataType[n] for n in dir(DataType) if DataType[n] == str_exp_mode) - - for exp_mode in (exp_mode, __get_data_type_enum(exp_mode)): - preds_out, target_out, mode = _input_format_classification( - preds=inputs.preds[0], - target=inputs.target[0], - threshold=THRESHOLD, - num_classes=num_classes, - is_multiclass=is_multiclass, - top_k=top_k, - ) - - assert mode == exp_mode - assert torch.equal(preds_out, post_preds(inputs.preds[0]).int()) - assert torch.equal(target_out, post_target(inputs.target[0]).int()) - - # Test that things work when batch_size = 1 - preds_out, target_out, mode = _input_format_classification( - preds=inputs.preds[0][[0], ...], - target=inputs.target[0][[0], ...], - threshold=THRESHOLD, - num_classes=num_classes, - is_multiclass=is_multiclass, - top_k=top_k, - ) - - assert mode == exp_mode - assert torch.equal(preds_out, post_preds(inputs.preds[0][[0], ...]).int()) - assert torch.equal(target_out, post_target(inputs.target[0][[0], ...]).int()) - - -# Test that threshold is correctly applied -def test_threshold(): - target = T([1, 1, 1]).int() - preds_probs = T([0.5 - 1e-5, 0.5, 0.5 + 1e-5]) - - preds_probs_out, _, _ = _input_format_classification(preds_probs, target, threshold=0.5) - - assert torch.equal(torch.tensor([0, 1, 1], dtype=torch.int), preds_probs_out.squeeze().int()) - - -######################################################################## -# Test incorrect inputs -######################################################################## - - -@pytest.mark.parametrize("threshold", [-0.5, 0.0, 1.0, 1.5]) -def test_incorrect_threshold(threshold): - preds, target = rand(size=(7, )), randint(high=2, size=(7, )) - with pytest.raises(ValueError): - _input_format_classification(preds, target, threshold=threshold) - - -@pytest.mark.parametrize( - "preds, target, num_classes, is_multiclass", - [ - # Target not integer - (randint(high=2, size=(7, )), randint(high=2, size=(7, )).float(), None, None), - # Target negative - (randint(high=2, size=(7, )), -randint(high=2, size=(7, )), None, None), - # Preds negative integers - (-randint(high=2, size=(7, )), randint(high=2, size=(7, )), None, None), - # Negative probabilities - (-rand(size=(7, )), randint(high=2, size=(7, )), None, None), - # is_multiclass=False and target > 1 - (rand(size=(7, )), randint(low=2, high=4, size=(7, )), None, False), - # is_multiclass=False and preds integers with > 1 - (randint(low=2, high=4, size=(7, )), randint(high=2, size=(7, )), None, False), - # Wrong batch size - (randint(high=2, size=(8, )), randint(high=2, size=(7, )), None, None), - # Completely wrong shape - (randint(high=2, size=(7, )), randint(high=2, size=(7, 4)), None, None), - # Same #dims, different shape - (randint(high=2, size=(7, 3)), randint(high=2, size=(7, 4)), None, None), - # Same shape and preds floats, target not binary - (rand(size=(7, 3)), randint(low=2, high=4, size=(7, 3)), None, None), - # #dims in preds = 1 + #dims in target, C shape not second or last - (rand(size=(7, 3, 4, 3)), randint(high=4, size=(7, 3, 3)), None, None), - # #dims in preds = 1 + #dims in target, preds not float - (randint(high=2, size=(7, 3, 3, 4)), randint(high=4, size=(7, 3, 3)), None, None), - # is_multiclass=False, with C dimension > 2 - (_mc_prob.preds[0], randint(high=2, size=(BATCH_SIZE, )), None, False), - # Probs of multiclass preds do not sum up to 1 - (rand(size=(7, 3, 5)), randint(high=2, size=(7, 5)), None, None), - # Max target larger or equal to C dimension - (_mc_prob.preds[0], randint(low=NUM_CLASSES + 1, high=100, size=(BATCH_SIZE, )), None, None), - # C dimension not equal to num_classes - (_mc_prob.preds[0], _mc_prob.target[0], NUM_CLASSES + 1, None), - # Max target larger than num_classes (with #dim preds = 1 + #dims target) - (_mc_prob.preds[0], randint(low=NUM_CLASSES + 1, high=100, size=(BATCH_SIZE, NUM_CLASSES)), 4, None), - # Max target larger than num_classes (with #dim preds = #dims target) - (randint(high=4, size=(7, 3)), randint(low=5, high=7, size=(7, 3)), 4, None), - # Max preds larger than num_classes (with #dim preds = #dims target) - (randint(low=5, high=7, size=(7, 3)), randint(high=4, size=(7, 3)), 4, None), - # Num_classes=1, but is_multiclass not false - (randint(high=2, size=(7, )), randint(high=2, size=(7, )), 1, None), - # is_multiclass=False, but implied class dimension (for multi-label, from shape) != num_classes - (randint(high=2, size=(7, 3, 3)), randint(high=2, size=(7, 3, 3)), 4, False), - # Multilabel input with implied class dimension != num_classes - (rand(size=(7, 3, 3)), randint(high=2, size=(7, 3, 3)), 4, False), - # Multilabel input with is_multiclass=True, but num_classes != 2 (or None) - (rand(size=(7, 3)), randint(high=2, size=(7, 3)), 4, True), - # Binary input, num_classes > 2 - (rand(size=(7, )), randint(high=2, size=(7, )), 4, None), - # Binary input, num_classes == 2 and is_multiclass not True - (rand(size=(7, )), randint(high=2, size=(7, )), 2, None), - (rand(size=(7, )), randint(high=2, size=(7, )), 2, False), - # Binary input, num_classes == 1 and is_multiclass=True - (rand(size=(7, )), randint(high=2, size=(7, )), 1, True), - ], -) -def test_incorrect_inputs(preds, target, num_classes, is_multiclass): - with pytest.raises(ValueError): - _input_format_classification( - preds=preds, target=target, threshold=THRESHOLD, num_classes=num_classes, is_multiclass=is_multiclass - ) - - -@pytest.mark.parametrize( - "preds, target, num_classes, is_multiclass, top_k", - [ - # Topk set with non (md)mc or ml prob data - (_bin.preds[0], _bin.target[0], None, None, 2), - (_bin_prob.preds[0], _bin_prob.target[0], None, None, 2), - (_mc.preds[0], _mc.target[0], None, None, 2), - (_ml.preds[0], _ml.target[0], None, None, 2), - (_mlmd.preds[0], _mlmd.target[0], None, None, 2), - (_mdmc.preds[0], _mdmc.target[0], None, None, 2), - # top_k = 0 - (_mc_prob_2cls.preds[0], _mc_prob_2cls.target[0], None, None, 0), - # top_k = float - (_mc_prob_2cls.preds[0], _mc_prob_2cls.target[0], None, None, 0.123), - # top_k =2 with 2 classes, is_multiclass=False - (_mc_prob_2cls.preds[0], _mc_prob_2cls.target[0], None, False, 2), - # top_k = number of classes (C dimension) - (_mc_prob.preds[0], _mc_prob.target[0], None, None, NUM_CLASSES), - # is_multiclass = True for ml prob inputs, top_k set - (_ml_prob.preds[0], _ml_prob.target[0], None, True, 2), - # top_k = num_classes for ml prob inputs - (_ml_prob.preds[0], _ml_prob.target[0], None, True, NUM_CLASSES), - ], -) -def test_incorrect_inputs_topk(preds, target, num_classes, is_multiclass, top_k): - with pytest.raises(ValueError): - _input_format_classification( - preds=preds, - target=target, - threshold=THRESHOLD, - num_classes=num_classes, - is_multiclass=is_multiclass, - top_k=top_k, - ) diff --git a/tests/metrics/classification/test_iou.py b/tests/metrics/classification/test_iou.py deleted file mode 100644 index 6bb100f68165a..0000000000000 --- a/tests/metrics/classification/test_iou.py +++ /dev/null @@ -1,216 +0,0 @@ -from functools import partial - -import numpy as np -import pytest -import torch -from sklearn.metrics import jaccard_score as sk_jaccard_score - -from pytorch_lightning.metrics.classification.iou import IoU -from pytorch_lightning.metrics.functional.iou import iou -from tests.metrics.classification.inputs import _input_binary, _input_binary_prob -from tests.metrics.classification.inputs import _input_multiclass as _input_mcls -from tests.metrics.classification.inputs import _input_multiclass_prob as _input_mcls_prob -from tests.metrics.classification.inputs import _input_multidim_multiclass as _input_mdmc -from tests.metrics.classification.inputs import _input_multidim_multiclass_prob as _input_mdmc_prob -from tests.metrics.classification.inputs import _input_multilabel as _input_mlb -from tests.metrics.classification.inputs import _input_multilabel_prob as _input_mlb_prob -from tests.metrics.utils import MetricTester, NUM_CLASSES, THRESHOLD - - -def _sk_iou_binary_prob(preds, target, average=None): - sk_preds = (preds.view(-1).numpy() >= THRESHOLD).astype(np.uint8) - sk_target = target.view(-1).numpy() - - return sk_jaccard_score(y_true=sk_target, y_pred=sk_preds, average=average) - - -def _sk_iou_binary(preds, target, average=None): - sk_preds = preds.view(-1).numpy() - sk_target = target.view(-1).numpy() - - return sk_jaccard_score(y_true=sk_target, y_pred=sk_preds, average=average) - - -def _sk_iou_multilabel_prob(preds, target, average=None): - sk_preds = (preds.view(-1).numpy() >= THRESHOLD).astype(np.uint8) - sk_target = target.view(-1).numpy() - - return sk_jaccard_score(y_true=sk_target, y_pred=sk_preds, average=average) - - -def _sk_iou_multilabel(preds, target, average=None): - sk_preds = preds.view(-1).numpy() - sk_target = target.view(-1).numpy() - - return sk_jaccard_score(y_true=sk_target, y_pred=sk_preds, average=average) - - -def _sk_iou_multiclass_prob(preds, target, average=None): - sk_preds = torch.argmax(preds, dim=len(preds.shape) - 1).view(-1).numpy() - sk_target = target.view(-1).numpy() - - return sk_jaccard_score(y_true=sk_target, y_pred=sk_preds, average=average) - - -def _sk_iou_multiclass(preds, target, average=None): - sk_preds = preds.view(-1).numpy() - sk_target = target.view(-1).numpy() - - return sk_jaccard_score(y_true=sk_target, y_pred=sk_preds, average=average) - - -def _sk_iou_multidim_multiclass_prob(preds, target, average=None): - sk_preds = torch.argmax(preds, dim=len(preds.shape) - 2).view(-1).numpy() - sk_target = target.view(-1).numpy() - - return sk_jaccard_score(y_true=sk_target, y_pred=sk_preds, average=average) - - -def _sk_iou_multidim_multiclass(preds, target, average=None): - sk_preds = preds.view(-1).numpy() - sk_target = target.view(-1).numpy() - - return sk_jaccard_score(y_true=sk_target, y_pred=sk_preds, average=average) - - -@pytest.mark.parametrize("reduction", ['elementwise_mean', 'none']) -@pytest.mark.parametrize( - "preds, target, sk_metric, num_classes", - [(_input_binary_prob.preds, _input_binary_prob.target, _sk_iou_binary_prob, 2), - (_input_binary.preds, _input_binary.target, _sk_iou_binary, 2), - (_input_mlb_prob.preds, _input_mlb_prob.target, _sk_iou_multilabel_prob, 2), - (_input_mlb.preds, _input_mlb.target, _sk_iou_multilabel, 2), - (_input_mcls_prob.preds, _input_mcls_prob.target, _sk_iou_multiclass_prob, NUM_CLASSES), - (_input_mcls.preds, _input_mcls.target, _sk_iou_multiclass, NUM_CLASSES), - (_input_mdmc_prob.preds, _input_mdmc_prob.target, _sk_iou_multidim_multiclass_prob, NUM_CLASSES), - (_input_mdmc.preds, _input_mdmc.target, _sk_iou_multidim_multiclass, NUM_CLASSES)] -) -class TestIoU(MetricTester): - - @pytest.mark.parametrize("ddp", [True, False]) - @pytest.mark.parametrize("dist_sync_on_step", [True, False]) - def test_confusion_matrix(self, reduction, preds, target, sk_metric, num_classes, ddp, dist_sync_on_step): - average = 'macro' if reduction == 'elementwise_mean' else None # convert tags - self.run_class_metric_test( - ddp=ddp, - preds=preds, - target=target, - metric_class=IoU, - sk_metric=partial(sk_metric, average=average), - dist_sync_on_step=dist_sync_on_step, - metric_args={ - "num_classes": num_classes, - "threshold": THRESHOLD, - "reduction": reduction - } - ) - - def test_confusion_matrix_functional(self, reduction, preds, target, sk_metric, num_classes): - average = 'macro' if reduction == 'elementwise_mean' else None # convert tags - self.run_functional_metric_test( - preds, - target, - metric_functional=iou, - sk_metric=partial(sk_metric, average=average), - metric_args={ - "num_classes": num_classes, - "threshold": THRESHOLD, - "reduction": reduction - } - ) - - -@pytest.mark.parametrize(['half_ones', 'reduction', 'ignore_index', 'expected'], [ - pytest.param(False, 'none', None, torch.Tensor([1, 1, 1])), - pytest.param(False, 'elementwise_mean', None, torch.Tensor([1])), - pytest.param(False, 'none', 0, torch.Tensor([1, 1])), - pytest.param(True, 'none', None, torch.Tensor([0.5, 0.5, 0.5])), - pytest.param(True, 'elementwise_mean', None, torch.Tensor([0.5])), - pytest.param(True, 'none', 0, torch.Tensor([0.5, 0.5])), -]) -def test_iou(half_ones, reduction, ignore_index, expected): - pred = (torch.arange(120) % 3).view(-1, 1) - target = (torch.arange(120) % 3).view(-1, 1) - if half_ones: - pred[:60] = 1 - iou_val = iou( - pred=pred, - target=target, - ignore_index=ignore_index, - reduction=reduction, - ) - assert torch.allclose(iou_val, expected, atol=1e-9) - - -# test `absent_score` -@pytest.mark.parametrize( - ['pred', 'target', 'ignore_index', 'absent_score', 'num_classes', 'expected'], - [ - # Note that -1 is used as the absent_score in almost all tests here to distinguish it from the range of valid - # scores the function can return ([0., 1.] range, inclusive). - # 2 classes, class 0 is correct everywhere, class 1 is absent. - pytest.param([0], [0], None, -1., 2, [1., -1.]), - pytest.param([0, 0], [0, 0], None, -1., 2, [1., -1.]), - # absent_score not applied if only class 0 is present and it's the only class. - pytest.param([0], [0], None, -1., 1, [1.]), - # 2 classes, class 1 is correct everywhere, class 0 is absent. - pytest.param([1], [1], None, -1., 2, [-1., 1.]), - pytest.param([1, 1], [1, 1], None, -1., 2, [-1., 1.]), - # When 0 index ignored, class 0 does not get a score (not even the absent_score). - pytest.param([1], [1], 0, -1., 2, [1.0]), - # 3 classes. Only 0 and 2 are present, and are perfectly predicted. 1 should get absent_score. - pytest.param([0, 2], [0, 2], None, -1., 3, [1., -1., 1.]), - pytest.param([2, 0], [2, 0], None, -1., 3, [1., -1., 1.]), - # 3 classes. Only 0 and 1 are present, and are perfectly predicted. 2 should get absent_score. - pytest.param([0, 1], [0, 1], None, -1., 3, [1., 1., -1.]), - pytest.param([1, 0], [1, 0], None, -1., 3, [1., 1., -1.]), - # 3 classes, class 0 is 0.5 IoU, class 1 is 0 IoU (in pred but not target; should not get absent_score), class - # 2 is absent. - pytest.param([0, 1], [0, 0], None, -1., 3, [0.5, 0., -1.]), - # 3 classes, class 0 is 0.5 IoU, class 1 is 0 IoU (in target but not pred; should not get absent_score), class - # 2 is absent. - pytest.param([0, 0], [0, 1], None, -1., 3, [0.5, 0., -1.]), - # Sanity checks with absent_score of 1.0. - pytest.param([0, 2], [0, 2], None, 1.0, 3, [1., 1., 1.]), - pytest.param([0, 2], [0, 2], 0, 1.0, 3, [1., 1.]), - ] -) -def test_iou_absent_score(pred, target, ignore_index, absent_score, num_classes, expected): - iou_val = iou( - pred=torch.tensor(pred), - target=torch.tensor(target), - ignore_index=ignore_index, - absent_score=absent_score, - num_classes=num_classes, - reduction='none', - ) - assert torch.allclose(iou_val, torch.tensor(expected).to(iou_val)) - - -# example data taken from -# https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/metrics/tests/test_ranking.py -@pytest.mark.parametrize( - ['pred', 'target', 'ignore_index', 'num_classes', 'reduction', 'expected'], - [ - # Ignoring an index outside of [0, num_classes-1] should have no effect. - pytest.param([0, 1, 1, 2, 2], [0, 1, 2, 2, 2], None, 3, 'none', [1, 1 / 2, 2 / 3]), - pytest.param([0, 1, 1, 2, 2], [0, 1, 2, 2, 2], -1, 3, 'none', [1, 1 / 2, 2 / 3]), - pytest.param([0, 1, 1, 2, 2], [0, 1, 2, 2, 2], 255, 3, 'none', [1, 1 / 2, 2 / 3]), - # Ignoring a valid index drops only that index from the result. - pytest.param([0, 1, 1, 2, 2], [0, 1, 2, 2, 2], 0, 3, 'none', [1 / 2, 2 / 3]), - pytest.param([0, 1, 1, 2, 2], [0, 1, 2, 2, 2], 1, 3, 'none', [1, 2 / 3]), - pytest.param([0, 1, 1, 2, 2], [0, 1, 2, 2, 2], 2, 3, 'none', [1, 1 / 2]), - # When reducing to mean or sum, the ignored index does not contribute to the output. - pytest.param([0, 1, 1, 2, 2], [0, 1, 2, 2, 2], 0, 3, 'elementwise_mean', [7 / 12]), - pytest.param([0, 1, 1, 2, 2], [0, 1, 2, 2, 2], 0, 3, 'sum', [7 / 6]), - ] -) -def test_iou_ignore_index(pred, target, ignore_index, num_classes, reduction, expected): - iou_val = iou( - pred=torch.tensor(pred), - target=torch.tensor(target), - ignore_index=ignore_index, - num_classes=num_classes, - reduction=reduction, - ) - assert torch.allclose(iou_val, torch.tensor(expected).to(iou_val)) diff --git a/tests/metrics/classification/test_stat_scores.py b/tests/metrics/classification/test_stat_scores.py deleted file mode 100644 index 6ccb5abed6711..0000000000000 --- a/tests/metrics/classification/test_stat_scores.py +++ /dev/null @@ -1,255 +0,0 @@ -from functools import partial -from typing import Callable, Optional - -import numpy as np -import pytest -import torch -from sklearn.metrics import multilabel_confusion_matrix -from torchmetrics.classification.checks import _input_format_classification - -from pytorch_lightning.metrics import StatScores -from pytorch_lightning.metrics.functional import stat_scores -from tests.metrics.classification.inputs import _input_binary, _input_binary_prob, _input_multiclass -from tests.metrics.classification.inputs import _input_multiclass_prob as _input_mccls_prob -from tests.metrics.classification.inputs import _input_multidim_multiclass as _input_mdmc -from tests.metrics.classification.inputs import _input_multidim_multiclass_prob as _input_mdmc_prob -from tests.metrics.classification.inputs import _input_multilabel as _input_mcls -from tests.metrics.classification.inputs import _input_multilabel_prob as _input_mlb_prob -from tests.metrics.utils import MetricTester, NUM_CLASSES, THRESHOLD - -torch.manual_seed(42) - - -def _sk_stat_scores(preds, target, reduce, num_classes, is_multiclass, ignore_index, top_k, mdmc_reduce=None): - preds, target, _ = _input_format_classification( - preds, target, threshold=THRESHOLD, num_classes=num_classes, is_multiclass=is_multiclass, top_k=top_k - ) - sk_preds, sk_target = preds.numpy(), target.numpy() - - if reduce != "macro" and ignore_index is not None and preds.shape[1] > 1: - sk_preds = np.delete(sk_preds, ignore_index, 1) - sk_target = np.delete(sk_target, ignore_index, 1) - - if preds.shape[1] == 1 and reduce == "samples": - sk_target = sk_target.T - sk_preds = sk_preds.T - - sk_stats = multilabel_confusion_matrix( - sk_target, sk_preds, samplewise=(reduce == "samples") and preds.shape[1] != 1 - ) - - if preds.shape[1] == 1 and reduce != "samples": - sk_stats = sk_stats[[1]].reshape(-1, 4)[:, [3, 1, 0, 2]] - else: - sk_stats = sk_stats.reshape(-1, 4)[:, [3, 1, 0, 2]] - - if reduce == "micro": - sk_stats = sk_stats.sum(axis=0, keepdims=True) - - sk_stats = np.concatenate([sk_stats, sk_stats[:, [3]] + sk_stats[:, [0]]], 1) - - if reduce == "micro": - sk_stats = sk_stats[0] - - if reduce == "macro" and ignore_index is not None and preds.shape[1]: - sk_stats[ignore_index, :] = -1 - - return sk_stats - - -def _sk_stat_scores_mdim_mcls(preds, target, reduce, mdmc_reduce, num_classes, is_multiclass, ignore_index, top_k): - preds, target, _ = _input_format_classification( - preds, target, threshold=THRESHOLD, num_classes=num_classes, is_multiclass=is_multiclass, top_k=top_k - ) - - if mdmc_reduce == "global": - preds = torch.transpose(preds, 1, 2).reshape(-1, preds.shape[1]) - target = torch.transpose(target, 1, 2).reshape(-1, target.shape[1]) - - return _sk_stat_scores(preds, target, reduce, None, False, ignore_index, top_k) - elif mdmc_reduce == "samplewise": - scores = [] - - for i in range(preds.shape[0]): - pred_i = preds[i, ...].T - target_i = target[i, ...].T - scores_i = _sk_stat_scores(pred_i, target_i, reduce, None, False, ignore_index, top_k) - - scores.append(np.expand_dims(scores_i, 0)) - - return np.concatenate(scores) - - -@pytest.mark.parametrize( - "reduce, mdmc_reduce, num_classes, inputs, ignore_index", - [ - ["unknown", None, None, _input_binary, None], - ["micro", "unknown", None, _input_binary, None], - ["macro", None, None, _input_binary, None], - ["micro", None, None, _input_mdmc_prob, None], - ["micro", None, None, _input_binary_prob, 0], - ["micro", None, None, _input_mccls_prob, NUM_CLASSES], - ["micro", None, NUM_CLASSES, _input_mccls_prob, NUM_CLASSES], - ], -) -def test_wrong_params(reduce, mdmc_reduce, num_classes, inputs, ignore_index): - """Test a combination of parameters that are invalid and should raise an error. - - This includes invalid ``reduce`` and ``mdmc_reduce`` parameter values, not setting - ``num_classes`` when ``reduce='macro'`, not setting ``mdmc_reduce`` when inputs - are multi-dim multi-class``, setting ``ignore_index`` when inputs are binary, as well - as setting ``ignore_index`` to a value higher than the number of classes. - """ - with pytest.raises(ValueError): - stat_scores( - inputs.preds[0], inputs.target[0], reduce, mdmc_reduce, num_classes=num_classes, ignore_index=ignore_index - ) - - with pytest.raises(ValueError): - sts = StatScores(reduce=reduce, mdmc_reduce=mdmc_reduce, num_classes=num_classes, ignore_index=ignore_index) - sts(inputs.preds[0], inputs.target[0]) - - -def test_wrong_threshold(): - with pytest.raises(ValueError): - StatScores(threshold=1.5) - - -@pytest.mark.parametrize("ignore_index", [None, 0]) -@pytest.mark.parametrize("reduce", ["micro", "macro", "samples"]) -@pytest.mark.parametrize( - "preds, target, sk_fn, mdmc_reduce, num_classes, is_multiclass, top_k", - [ - (_input_binary_prob.preds, _input_binary_prob.target, _sk_stat_scores, None, 1, None, None), - (_input_binary.preds, _input_binary.target, _sk_stat_scores, None, 1, False, None), - (_input_mlb_prob.preds, _input_mlb_prob.target, _sk_stat_scores, None, NUM_CLASSES, None, None), - (_input_mlb_prob.preds, _input_mlb_prob.target, _sk_stat_scores, None, NUM_CLASSES, None, 2), - (_input_mcls.preds, _input_mcls.target, _sk_stat_scores, None, NUM_CLASSES, False, None), - (_input_mccls_prob.preds, _input_mccls_prob.target, _sk_stat_scores, None, NUM_CLASSES, None, None), - (_input_mccls_prob.preds, _input_mccls_prob.target, _sk_stat_scores, None, NUM_CLASSES, None, 2), - (_input_multiclass.preds, _input_multiclass.target, _sk_stat_scores, None, NUM_CLASSES, None, None), - (_input_mdmc.preds, _input_mdmc.target, _sk_stat_scores_mdim_mcls, "samplewise", NUM_CLASSES, None, None), - ( - _input_mdmc_prob.preds, _input_mdmc_prob.target, _sk_stat_scores_mdim_mcls, "samplewise", NUM_CLASSES, None, - None - ), - (_input_mdmc.preds, _input_mdmc.target, _sk_stat_scores_mdim_mcls, "global", NUM_CLASSES, None, None), - (_input_mdmc_prob.preds, _input_mdmc_prob.target, _sk_stat_scores_mdim_mcls, "global", NUM_CLASSES, None, None), - ], -) -class TestStatScores(MetricTester): - # DDP tests temporarily disabled due to hanging issues - @pytest.mark.parametrize("ddp", [False]) - @pytest.mark.parametrize("dist_sync_on_step", [True, False]) - def test_stat_scores_class( - self, - ddp: bool, - dist_sync_on_step: bool, - sk_fn: Callable, - preds: torch.Tensor, - target: torch.Tensor, - reduce: str, - mdmc_reduce: Optional[str], - num_classes: Optional[int], - is_multiclass: Optional[bool], - ignore_index: Optional[int], - top_k: Optional[int], - ): - if ignore_index is not None and preds.ndim == 2: - pytest.skip("Skipping ignore_index test with binary inputs.") - - self.run_class_metric_test( - ddp=ddp, - preds=preds, - target=target, - metric_class=StatScores, - sk_metric=partial( - sk_fn, - reduce=reduce, - mdmc_reduce=mdmc_reduce, - num_classes=num_classes, - is_multiclass=is_multiclass, - ignore_index=ignore_index, - top_k=top_k, - ), - dist_sync_on_step=dist_sync_on_step, - metric_args={ - "num_classes": num_classes, - "reduce": reduce, - "mdmc_reduce": mdmc_reduce, - "threshold": THRESHOLD, - "is_multiclass": is_multiclass, - "ignore_index": ignore_index, - "top_k": top_k, - }, - check_dist_sync_on_step=True, - check_batch=True, - ) - - def test_stat_scores_fn( - self, - sk_fn: Callable, - preds: torch.Tensor, - target: torch.Tensor, - reduce: str, - mdmc_reduce: Optional[str], - num_classes: Optional[int], - is_multiclass: Optional[bool], - ignore_index: Optional[int], - top_k: Optional[int], - ): - if ignore_index is not None and preds.ndim == 2: - pytest.skip("Skipping ignore_index test with binary inputs.") - - self.run_functional_metric_test( - preds, - target, - metric_functional=stat_scores, - sk_metric=partial( - sk_fn, - reduce=reduce, - mdmc_reduce=mdmc_reduce, - num_classes=num_classes, - is_multiclass=is_multiclass, - ignore_index=ignore_index, - top_k=top_k, - ), - metric_args={ - "num_classes": num_classes, - "reduce": reduce, - "mdmc_reduce": mdmc_reduce, - "threshold": THRESHOLD, - "is_multiclass": is_multiclass, - "ignore_index": ignore_index, - "top_k": top_k, - }, - ) - - -_mc_k_target = torch.tensor([0, 1, 2]) -_mc_k_preds = torch.tensor([[0.35, 0.4, 0.25], [0.1, 0.5, 0.4], [0.2, 0.1, 0.7]]) -_ml_k_target = torch.tensor([[0, 1, 0], [1, 1, 0], [0, 0, 0]]) -_ml_k_preds = torch.tensor([[0.9, 0.2, 0.75], [0.1, 0.7, 0.8], [0.6, 0.1, 0.7]]) - - -@pytest.mark.parametrize( - "k, preds, target, reduce, expected", - [ - (1, _mc_k_preds, _mc_k_target, "micro", torch.tensor([2, 1, 5, 1, 3])), - (2, _mc_k_preds, _mc_k_target, "micro", torch.tensor([3, 3, 3, 0, 3])), - (1, _ml_k_preds, _ml_k_target, "micro", torch.tensor([0, 3, 3, 3, 3])), - (2, _ml_k_preds, _ml_k_target, "micro", torch.tensor([1, 5, 1, 2, 3])), - (1, _mc_k_preds, _mc_k_target, "macro", torch.tensor([[0, 1, 1], [0, 1, 0], [2, 1, 2], [1, 0, 0], [1, 1, 1]])), - (2, _mc_k_preds, _mc_k_target, "macro", torch.tensor([[1, 1, 1], [1, 1, 1], [1, 1, 1], [0, 0, 0], [1, 1, 1]])), - (1, _ml_k_preds, _ml_k_target, "macro", torch.tensor([[0, 0, 0], [1, 0, 2], [1, 1, 1], [1, 2, 0], [1, 2, 0]])), - (2, _ml_k_preds, _ml_k_target, "macro", torch.tensor([[0, 1, 0], [2, 0, 3], [0, 1, 0], [1, 1, 0], [1, 2, 0]])), - ], -) -def test_top_k(k: int, preds: torch.Tensor, target: torch.Tensor, reduce: str, expected: torch.Tensor): - """ A simple test to check that top_k works as expected """ - - class_metric = StatScores(top_k=k, reduce=reduce, num_classes=3) - class_metric.update(preds, target) - - assert torch.equal(class_metric.compute(), expected.T) - assert torch.equal(stat_scores(preds, target, top_k=k, reduce=reduce, num_classes=3), expected.T) diff --git a/tests/metrics/test_remove_1-5_metrics.py b/tests/metrics/test_remove_1-5_metrics.py index 41ccfb6da8015..339d07b163632 100644 --- a/tests/metrics/test_remove_1-5_metrics.py +++ b/tests/metrics/test_remove_1-5_metrics.py @@ -21,21 +21,33 @@ AUC, AUROC, AveragePrecision, + ConfusionMatrix, + F1, + FBeta, + HammingDistance, + IoU, MetricCollection, Precision, PrecisionRecallCurve, Recall, ROC, + StatScores, ) from pytorch_lightning.metrics.functional import ( auc, auroc, average_precision, + confusion_matrix, + f1, + fbeta, + hamming_distance, + iou, precision, precision_recall, precision_recall_curve, recall, roc, + stat_scores, ) from pytorch_lightning.metrics.functional.accuracy import accuracy from pytorch_lightning.metrics.utils import get_num_classes, select_topk, to_categorical, to_onehot @@ -162,3 +174,66 @@ def test_v1_5_metric_precision_recall(): assert torch.equal(prec, torch.tensor([1., 1., 1., 1.])) assert torch.allclose(rc, torch.tensor([1., 0.6667, 0.3333, 0.]), atol=1e-4) assert torch.equal(thrs, torch.tensor([1, 2, 3])) + + +def test_v1_5_metric_classif_mix(): + ConfusionMatrix.__init__.warned = False + with pytest.deprecated_call(match='It will be removed in v1.5.0'): + ConfusionMatrix(num_classes=1) + + FBeta.__init__.warned = False + with pytest.deprecated_call(match='It will be removed in v1.5.0'): + FBeta(num_classes=1) + + F1.__init__.warned = False + with pytest.deprecated_call(match='It will be removed in v1.5.0'): + F1(num_classes=1) + + HammingDistance.__init__.warned = False + with pytest.deprecated_call(match='It will be removed in v1.5.0'): + HammingDistance() + + StatScores.__init__.warned = False + with pytest.deprecated_call(match='It will be removed in v1.5.0'): + StatScores() + + target = torch.tensor([1, 1, 0, 0]) + preds = torch.tensor([0, 1, 0, 0]) + confusion_matrix.warned = False + with pytest.deprecated_call(match='It will be removed in v1.5.0'): + assert torch.equal(confusion_matrix(preds, target, num_classes=2), torch.tensor([[2., 0.], [1., 1.]])) + + target = torch.tensor([0, 1, 2, 0, 1, 2]) + preds = torch.tensor([0, 2, 1, 0, 0, 1]) + fbeta.warned = False + with pytest.deprecated_call(match='It will be removed in v1.5.0'): + assert torch.allclose(fbeta(preds, target, num_classes=3, beta=0.5), torch.tensor(0.3333), atol=1e-4) + + f1.warned = False + with pytest.deprecated_call(match='It will be removed in v1.5.0'): + assert torch.allclose(f1(preds, target, num_classes=3), torch.tensor(0.3333), atol=1e-4) + + target = torch.tensor([[0, 1], [1, 1]]) + preds = torch.tensor([[0, 1], [0, 1]]) + hamming_distance.warned = False + with pytest.deprecated_call(match='It will be removed in v1.5.0'): + assert hamming_distance(preds, target) == torch.tensor(0.25) + + preds = torch.tensor([1, 0, 2, 1]) + target = torch.tensor([1, 1, 2, 0]) + stat_scores.warned = False + with pytest.deprecated_call(match='It will be removed in v1.5.0'): + assert torch.equal(stat_scores(preds, target, reduce='micro'), torch.tensor([2, 2, 6, 2, 4])) + + +def test_v1_5_metric_detect(): + IoU.__init__.warned = False + with pytest.deprecated_call(match='It will be removed in v1.5.0'): + IoU(num_classes=1) + + target = torch.randint(0, 2, (10, 25, 25)) + pred = torch.tensor(target) + pred[2:5, 7:13, 9:15] = 1 - pred[2:5, 7:13, 9:15] + iou.warned = False + with pytest.deprecated_call(match='It will be removed in v1.5.0'): + assert torch.allclose(iou(pred, target), torch.tensor(0.9660), atol=1e-4)