Lightning-AI · tchaton · Dec 21, 2020 · Nov 24, 2020 · Nov 24, 2020 · Nov 24, 2020
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,6 +9,11 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ### Added
 
+- `Accuracy` metric now generalizes to Top-k accuracy for (multi-dimensional) multi-class inputs using the `top_k` parameter ([#4838](https://github.com/PyTorchLightning/pytorch-lightning/pull/4838))
+
+- `Accuracy` metric now enables the computation of subset accuracy for multi-label or multi-dimensional multi-class inputs with the `subset_accuracy` parameter ([#4838](https://github.com/PyTorchLightning/pytorch-lightning/pull/4838))
+
+- `HammingDistance` metric to compute the hamming distance (loss) ([#4838](https://github.com/PyTorchLightning/pytorch-lightning/pull/4838))
 
 ### Changed
 
@@ -19,6 +24,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 ### Removed
 
 
+
 ### Fixed
 
 

diff --git a/docs/source/metrics.rst b/docs/source/metrics.rst
@@ -292,6 +292,12 @@ FBeta
 .. autoclass:: pytorch_lightning.metrics.classification.FBeta
  :noindex:
 
+Hamming Distance
+~~~~~~~~~~~~~~~~
+
+.. autoclass:: pytorch_lightning.metrics.classification.HammingDistance
+ :noindex:
+
 Precision
 ~~~~~~~~~
 
@@ -323,10 +329,9 @@ Functional Metrics (Classification)
 accuracy [func]
 ~~~~~~~~~~~~~~~
 
-.. autofunction:: pytorch_lightning.metrics.functional.classification.accuracy
+.. autofunction:: pytorch_lightning.metrics.functional.accuracy
  :noindex:
 
-
 auc [func]
 ~~~~~~~~~~
 
@@ -382,6 +387,11 @@ fbeta [func]
 .. autofunction:: pytorch_lightning.metrics.functional.fbeta
  :noindex:
 
+hamming_distance [func]
+~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autofunction:: pytorch_lightning.metrics.functional.hamming_distance
+ :noindex:
 
 iou [func]
 ~~~~~~~~~~

diff --git a/pytorch_lightning/metrics/__init__.py b/pytorch_lightning/metrics/__init__.py
@@ -15,6 +15,7 @@
 
 from pytorch_lightning.metrics.classification import ( # noqa: F401
  Accuracy,
+ HammingDistance,
  Precision,
  Recall,
  ConfusionMatrix,

diff --git a/pytorch_lightning/metrics/classification/__init__.py b/pytorch_lightning/metrics/classification/__init__.py
@@ -15,6 +15,7 @@
 from pytorch_lightning.metrics.classification.average_precision import AveragePrecision # noqa: F401
 from pytorch_lightning.metrics.classification.confusion_matrix import ConfusionMatrix # noqa: F401
 from pytorch_lightning.metrics.classification.f_beta import FBeta, Fbeta, F1 # noqa: F401
+from pytorch_lightning.metrics.classification.hamming_distance import HammingDistance # noqa: F401
 from pytorch_lightning.metrics.classification.precision_recall import Precision, Recall # noqa: F401
 from pytorch_lightning.metrics.classification.precision_recall_curve import PrecisionRecallCurve # noqa: F401
 from pytorch_lightning.metrics.classification.roc import ROC # noqa: F401
diff --git a/pytorch_lightning/metrics/classification/accuracy.py b/pytorch_lightning/metrics/classification/accuracy.py
@@ -16,35 +16,57 @@
 import torch
 
 from pytorch_lightning.metrics.metric import Metric
-from pytorch_lightning.metrics.utils import _input_format_classification
+from pytorch_lightning.metrics.functional.accuracy import _accuracy_update, _accuracy_compute
 
 
 class Accuracy(Metric):
  r"""
  Computes `Accuracy <https://en.wikipedia.org/wiki/Accuracy_and_precision>`_:
 
- .. math:: \text{Accuracy} = \frac{1}{N}\sum_i^N 1(y_i = \hat{y_i})
+ .. math::
+ \text{Accuracy} = \frac{1}{N}\sum_i^N 1(y_i = \hat{y}_i)
 
  Where :math:`y` is a tensor of target values, and :math:`\hat{y}` is a
- tensor of predictions. Works with binary, multiclass, and multilabel
- data. Accepts logits from a model output or integer class values in
- prediction. Works with multi-dimensional preds and target.
+ tensor of predictions.
 
- Forward accepts
+ For multi-class and multi-dimensional multi-class data with probability predictions, the
+ parameter ``top_k`` generalizes this metric to a Top-K accuracy metric: for each sample the
+ top-K highest probability items are considered to find the correct label.
 
- - ``preds`` (float or long tensor): ``(N, ...)`` or ``(N, C, ...)`` where C is the number of classes
- - ``target`` (long tensor): ``(N, ...)``
+ For multi-label and multi-dimensional multi-class inputs, this metric computes the "global"
+ accuracy by default, which counts all labels or sub-samples separately. This can be
+ changed to subset accuracy (which requires all labels or sub-samples in the sample to
+ be correctly predicted) by setting ``subset_accuracy=True``.
 
- If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument.
- This is the case for binary and multi-label logits.
-
- If preds has an extra dimension as in the case of multi-class scores we perform an argmax on ``dim=1``.
+ Accepts all input types listed in :ref:`metrics:Input types`.
 
  Args:
  threshold:
- Threshold value for binary or multi-label logits. default: 0.5
+ Threshold probability value for transforming probability predictions to binary
+ `(0,1)` predictions, in the case of binary or multi-label inputs.
+ top_k:
+ Number of highest probability predictions considered to find the correct label, relevant
+ only for (multi-dimensional) multi-class inputs with probability predictions. The
+ default value (``None``) will be interpreted as 1 for these inputs.
+
+ Should be left at default (``None``) for all other types of inputs.
+ subset_accuracy:
+ Whether to compute subset accuracy for multi-label and multi-dimensional
+ multi-class inputs (has no effect for other input types).
+
+ For multi-label inputs, if the parameter is set to `True`, then all labels for
+ each sample must be correctly predicted for the sample to count as correct. If it
+ is set to `False`, then all labels are counted separately - this is equivalent to
+ flattening inputs beforehand (i.e. ``preds = preds.flatten()`` and same for ``target``).
+
+ For multi-dimensional multi-class inputs, if the parameter is set to `True`, then all
+ sub-sample (on the extra axis) must be correct for the sample to be counted as correct.
+ If it is set to `False`, then all sub-samples are counter separately - this is equivalent,
+ in the case of label predictions, to flattening the inputs beforehand (i.e.
+ ``preds = preds.flatten()`` and same for ``target``). Note that the ``top_k`` parameter
+ still applies in both cases, if set.
  compute_on_step:
- Forward only calls ``update()`` and return None if this is set to False. default: True
+ Forward only calls ``update()`` and return None if this is set to False.
  dist_sync_on_step:
  Synchronize metric state across processes at each ``forward()``
  before returning the value at the step. default: False
@@ -63,10 +85,19 @@ class Accuracy(Metric):
  >>> accuracy(preds, target)
  tensor(0.5000)
 
+ >>> target = torch.tensor([0, 1, 2])
+ >>> preds = torch.tensor([[0.1, 0.9, 0], [0.3, 0.1, 0.6], [0.2, 0.5, 0.3]])
+ >>> accuracy = Accuracy(top_k=2)
+ >>> accuracy(preds, target)
+ tensor(0.6667)
+
  """
+
  def __init__(
  self,
  threshold: float = 0.5,
+ top_k: Optional[int] = None,
+ subset_accuracy: bool = False,
  compute_on_step: bool = True,
  dist_sync_on_step: bool = False,
  process_group: Optional[Any] = None,
@@ -82,24 +113,35 @@ def __init__(
  self.add_state("correct", default=torch.tensor(0), dist_reduce_fx="sum")
  self.add_state("total", default=torch.tensor(0), dist_reduce_fx="sum")
 
+ if not 0 <= threshold <= 1:
+ raise ValueError("The `threshold` should lie in the [0,1] interval.")
+
+ if top_k is not None and top_k <= 0:
+ raise ValueError("The `top_k` should be an integer larger than 1.")
+
  self.threshold = threshold
+ self.top_k = top_k
+ self.subset_accuracy = subset_accuracy
 
  def update(self, preds: torch.Tensor, target: torch.Tensor):
  """
- Update state with predictions and targets.
+ Update state with predictions and targets. See :ref:`metrics:Input types` for more information
+ on input types.
 
  Args:
- preds: Predictions from model
- target: Ground truth values
+ preds: Predictions from model (probabilities, or labels)
+ target: Ground truth labels
  """
- preds, target = _input_format_classification(preds, target, self.threshold)
- assert preds.shape == target.shape
 
- self.correct += torch.sum(preds == target)
- self.total += target.numel()
+ correct, total = _accuracy_update(
+ preds, target, threshold=self.threshold, top_k=self.top_k, subset_accuracy=self.subset_accuracy
+ )
+
+ self.correct += correct
+ self.total += total
 
- def compute(self):
+ def compute(self) -> torch.Tensor:
  """
- Computes accuracy over state.
+ Computes accuracy based on inputs passed in to ``update`` previously.
  """
- return self.correct.float() / self.total
+ return _accuracy_compute(self.correct, self.total)
diff --git a/pytorch_lightning/metrics/classification/hamming_distance.py b/pytorch_lightning/metrics/classification/hamming_distance.py
@@ -0,0 +1,105 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Callable, Optional
+
+import torch
+from pytorch_lightning.metrics.metric import Metric
+from pytorch_lightning.metrics.functional.hamming_distance import _hamming_distance_update, _hamming_distance_compute
+
+
+class HammingDistance(Metric):
+ r"""
+ Computes the average `Hamming distance <https://en.wikipedia.org/wiki/Hamming_distance>`_ (also
+ known as Hamming loss) between targets and predictions:
+
+ .. math::
+ \text{Hamming distance} = \frac{1}{N \cdot L}\sum_i^N \sum_l^L 1(y_{il} \neq \hat{y_{il}})
+
+ Where :math:`y` is a tensor of target values, :math:`\hat{y}` is a tensor of predictions,
+ and :math:`\bullet_{il}` refers to the :math:`l`-th label of the :math:`i`-th sample of that
+ tensor.
+
+ This is the same as ``1-accuracy`` for binary data, while for all other types of inputs it
+ treats each possible label separately - meaning that, for example, multi-class data is
+ treated as if it were multi-label.
+
+ Accepts all input types listed in :ref:`metrics:Input types`.
+
+ Args:
+ threshold:
+ Threshold probability value for transforming probability predictions to binary
+ `(0,1)` predictions, in the case of binary or multi-label inputs.
+ compute_on_step:
+ Forward only calls ``update()`` and return None if this is set to False.
+ dist_sync_on_step:
+ Synchronize metric state across processes at each ``forward()``
+ before returning the value at the step.
+ process_group:
+ Specify the process group on which synchronization is called. default: None (which selects the entire world)
+ dist_sync_fn:
+ Callback that performs the allgather operation on the metric state. When ``None``, DDP
+ will be used to perform the all gather.
+
+ Example:
+
+ >>> from pytorch_lightning.metrics import HammingDistance
+ >>> target = torch.tensor([[0, 1], [1, 1]])
+ >>> preds = torch.tensor([[0, 1], [0, 1]])
+ >>> hamming_distance = HammingDistance()
+ >>> hamming_distance(preds, target)
+ tensor(0.2500)
+
+ """
+
+ def __init__(
+ self,
+ threshold: float = 0.5,
+ compute_on_step: bool = True,
+ dist_sync_on_step: bool = False,
+ process_group: Optional[Any] = None,
+ dist_sync_fn: Callable = None,
+ ):
+ super().__init__(
+ compute_on_step=compute_on_step,
+ dist_sync_on_step=dist_sync_on_step,
+ process_group=process_group,
+ dist_sync_fn=dist_sync_fn,
+ )
+
+ self.add_state("correct", default=torch.tensor(0), dist_reduce_fx="sum")
+ self.add_state("total", default=torch.tensor(0), dist_reduce_fx="sum")
+
+ if not 0 <= threshold <= 1:
+ raise ValueError("The `threshold` should lie in the [0,1] interval.")
+ self.threshold = threshold
+
+ def update(self, preds: torch.Tensor, target: torch.Tensor):
+ """
+ Update state with predictions and targets. See :ref:`metrics:Input types` for more information
+ on input types.
+
+ Args:
+ preds: Predictions from model (probabilities, or labels)
+ target: Ground truth labels
+ """
+ correct, total = _hamming_distance_update(preds, target, self.threshold)
+
+ self.correct += correct
+ self.total += total
+
+ def compute(self) -> torch.Tensor:
+ """
+ Computes hamming distance based on inputs passed in to ``update`` previously.
+ """
+ return _hamming_distance_compute(self.correct, self.total)
diff --git a/pytorch_lightning/metrics/classification/helpers.py b/pytorch_lightning/metrics/classification/helpers.py
@@ -405,6 +405,11 @@ def _input_format_classification(
  else:
  preds, target = preds.squeeze(), target.squeeze()
 
+ # Convert half precision tensors to full precision, as not all ops are supported
+ # for example, min() is not supported
+ if preds.dtype == torch.float16:
+ preds = preds.float()
+
  case = _check_classification_inputs(
  preds,
  target,

diff --git a/pytorch_lightning/metrics/functional/__init__.py b/pytorch_lightning/metrics/functional/__init__.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 from pytorch_lightning.metrics.functional.average_precision import average_precision # noqa: F401
 from pytorch_lightning.metrics.functional.classification import ( # noqa: F401
- accuracy,
  auc,
  auroc,
  dice_score,
@@ -32,8 +31,10 @@
 )
 from pytorch_lightning.metrics.functional.confusion_matrix import confusion_matrix # noqa: F401
 # TODO: unify metrics between class and functional, add below
+from pytorch_lightning.metrics.functional.accuracy import accuracy # noqa: F401
 from pytorch_lightning.metrics.functional.explained_variance import explained_variance # noqa: F401
 from pytorch_lightning.metrics.functional.f_beta import fbeta, f1 # noqa: F401
+from pytorch_lightning.metrics.functional.hamming_distance import hamming_distance # noqa: F401
 from pytorch_lightning.metrics.functional.mean_absolute_error import mean_absolute_error # noqa: F401
 from pytorch_lightning.metrics.functional.mean_squared_error import mean_squared_error # noqa: F401
 from pytorch_lightning.metrics.functional.mean_squared_log_error import mean_squared_log_error # noqa: F401