New metric: Calinski Harabasz Score (#2036)

* docs * functional * module * tests * changelog * try another link * mypy * remove broken link * change image * use new inputs * fix * fix flaky tests --------- Co-authored-by: Daniel Stancl <46073029+stancld@users.noreply.github.com> Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
Lightning-AI · Sep 4, 2023 · b10cc2f · b10cc2f
1 parent c139a96
commit b10cc2f
Show file tree

Hide file tree

Showing 13 changed files with 295 additions and 8 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -11,13 +11,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 
-- Added `MutualInformationScore` metric to cluster package ([#2008](https://github.com/Lightning-AI/torchmetrics/pull/2008)
+- Added `MutualInformationScore` metric to cluster package ([#2008](https://github.com/Lightning-AI/torchmetrics/pull/2008))
 
 
-- Added `RandScore` metric to cluster package ([#2025](https://github.com/Lightning-AI/torchmetrics/pull/2025)
+- Added `RandScore` metric to cluster package ([#2025](https://github.com/Lightning-AI/torchmetrics/pull/2025))
 
 
-- Added `NormalizedMutualInfoScore` metric to cluster package ([#2029](https://github.com/Lightning-AI/torchmetrics/pull/2029)
+- Added `CalinskiHarabaszScore` metric to cluster package ([#2036](https://github.com/Lightning-AI/torchmetrics/pull/2036))
+
+
+- Added `NormalizedMutualInfoScore` metric to cluster package ([#2029](https://github.com/Lightning-AI/torchmetrics/pull/2029))
+
 
 
 ### Changed

diff --git a/docs/source/clustering/calinski_harabasz_score.rst b/docs/source/clustering/calinski_harabasz_score.rst
@@ -0,0 +1,21 @@
+.. customcarditem::
+ :header: Calinski Harabasz Score
+ :image: https://pl-flash-data.s3.amazonaws.com/assets/thumbnails/default.svg
+ :tags: Clustering
+
+.. include:: ../links.rst
+
+#######################
+Calinski Harabasz Score
+#######################
+
+Module Interface
+________________
+
+.. autoclass:: torchmetrics.clustering.CalinskiHarabaszScore
+ :exclude-members: update, compute
+
+Functional Interface
+____________________
+
+.. autofunction:: torchmetrics.functional.clustering.calinski_harabasz_score
diff --git a/docs/source/clustering/mutual_info_score.rst b/docs/source/clustering/mutual_info_score.rst
@@ -1,6 +1,6 @@
 .. customcarditem::
  :header: Mutual Information Score
- :image: https://pl-flash-data.s3.amazonaws.com/assets/thumbnails/clustering.svg
+ :image: https://pl-flash-data.s3.amazonaws.com/assets/thumbnails/default.svg
  :tags: Clustering
 
 .. include:: ../links.rst

diff --git a/docs/source/clustering/normalized_mutual_info_score.rst b/docs/source/clustering/normalized_mutual_info_score.rst
@@ -1,6 +1,6 @@
 .. customcarditem::
  :header: Normalized Mutual Information Score
- :image: https://pl-flash-data.s3.amazonaws.com/assets/thumbnails/clustering.svg
+ :image: https://pl-flash-data.s3.amazonaws.com/assets/thumbnails/default.svg
  :tags: Clustering
 
 .. include:: ../links.rst

diff --git a/docs/source/clustering/rand_score.rst b/docs/source/clustering/rand_score.rst
@@ -1,6 +1,6 @@
 .. customcarditem::
  :header: Rand Score
- :image: https://pl-flash-data.s3.amazonaws.com/assets/thumbnails/clustering.svg
+ :image: https://pl-flash-data.s3.amazonaws.com/assets/thumbnails/default.svg
  :tags: Clustering
 
 .. include:: ../links.rst

diff --git a/src/torchmetrics/clustering/__init__.py b/src/torchmetrics/clustering/__init__.py
@@ -11,11 +11,13 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from torchmetrics.clustering.calinski_harabasz_score import CalinskiHarabaszScore
 from torchmetrics.clustering.mutual_info_score import MutualInfoScore
 from torchmetrics.clustering.normalized_mutual_info_score import NormalizedMutualInfoScore
 from torchmetrics.clustering.rand_score import RandScore
 
 __all__ = [
+ "CalinskiHarabaszScore",
  "MutualInfoScore",
  "NormalizedMutualInfoScore",
  "RandScore",

diff --git a/src/torchmetrics/clustering/calinski_harabasz_score.py b/src/torchmetrics/clustering/calinski_harabasz_score.py
@@ -0,0 +1,126 @@
+# Copyright The Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, List, Optional, Sequence, Union
+
+from torch import Tensor
+
+from torchmetrics.functional.clustering.calinski_harabasz_score import calinski_harabasz_score
+from torchmetrics.metric import Metric
+from torchmetrics.utilities.data import dim_zero_cat
+from torchmetrics.utilities.imports import _MATPLOTLIB_AVAILABLE
+from torchmetrics.utilities.plot import _AX_TYPE, _PLOT_OUT_TYPE
+
+if not _MATPLOTLIB_AVAILABLE:
+ __doctest_skip__ = ["CalinskiHarabaszScore.plot"]
+
+
+class CalinskiHarabaszScore(Metric):
+ r"""Compute Calinski Harabasz Score (also known as variance ratio criterion) for clustering algorithms.
+
+ .. math::
+ CHS(X, L) = \frac{B(X, L) \cdot (n_\text{samples} - n_\text{labels})}{W(X, L) \cdot (n_\text{labels} - 1)}
+
+ where :math:`B(X, L)` is the between-cluster dispersion, which is the squared distance between the cluster centers
+ and the dataset mean, weighted by the size of the clusters, :math:`n_\text{samples}` is the number of samples,
+ :math:`n_\text{labels}` is the number of labels, and :math:`W(X, L)` is the within-cluster dispersion e.g. the
+ sum of squared distances between each samples and its closest cluster center.
+
+ This clustering metric is an intrinsic measure, because it does not rely on ground truth labels for the evaluation.
+ Instead it examines how well the clusters are separated from each other. The score is higher when clusters are dense
+ and well separated, which relates to a standard concept of a cluster.
+
+ As input to ``forward`` and ``update`` the metric accepts the following input:
+
+ - ``data`` (:class:`~torch.Tensor`): float tensor with shape ``(N,d)`` with the embedded data. ``d`` is the
+ dimensionality of the embedding space.
+ - ``labels`` (:class:`~torch.Tensor`): single integer tensor with shape ``(N,)`` with cluster labels
+
+ As output of ``forward`` and ``compute`` the metric returns the following output:
+
+ - ``chs`` (:class:`~torch.Tensor`): A tensor with the Calinski Harabasz Score
+
+ Args:
+ kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info.
+
+ Example:
+ >>> import torch
+ >>> from torchmetrics.clustering import CalinskiHarabaszScore
+ >>> _ = torch.manual_seed(42)
+ >>> data = torch.randn(10, 3)
+ >>> labels = torch.randint(3, (10,))
+ >>> metric = CalinskiHarabaszScore()
+ >>> metric(data, labels)
+ tensor(3.0053)
+
+ """
+ is_differentiable: bool = True
+ higher_is_better: bool = True
+ full_state_update: bool = False
+ plot_lower_bound: float = 0.0
+ data: List[Tensor]
+ labels: List[Tensor]
+
+ def __init__(self, **kwargs: Any) -> None:
+ super().__init__(**kwargs)
+
+ self.add_state("data", default=[], dist_reduce_fx="cat")
+ self.add_state("labels", default=[], dist_reduce_fx="cat")
+
+ def update(self, data: Tensor, labels: Tensor) -> None:
+ """Update metric state with new data and labels."""
+ self.data.append(data)
+ self.labels.append(labels)
+
+ def compute(self) -> Tensor:
+ """Compute the Calinski Harabasz Score over all data and labels."""
+ return calinski_harabasz_score(dim_zero_cat(self.data), dim_zero_cat(self.labels))
+
+ def plot(self, val: Union[Tensor, Sequence[Tensor], None] = None, ax: Optional[_AX_TYPE] = None) -> _PLOT_OUT_TYPE:
+ """Plot a single or multiple values from the metric.
+
+ Args:
+ val: Either a single result from calling `metric.forward` or `metric.compute` or a list of these results.
+ If no value is provided, will automatically call `metric.compute` and plot that result.
+ ax: An matplotlib axis object. If provided will add plot to that axis
+
+ Returns:
+ Figure and Axes object
+
+ Raises:
+ ModuleNotFoundError:
+ If `matplotlib` is not installed
+
+ .. plot::
+ :scale: 75
+
+ >>> # Example plotting a single value
+ >>> import torch
+ >>> from torchmetrics.clustering import RandScore
+ >>> metric = RandScore()
+ >>> metric.update(torch.randint(0, 4, (10,)), torch.randint(0, 4, (10,)))
+ >>> fig_, ax_ = metric.plot(metric.compute())
+
+ .. plot::
+ :scale: 75
+
+ >>> # Example plotting multiple values
+ >>> import torch
+ >>> from torchmetrics.clustering import RandScore
+ >>> metric = RandScore()
+ >>> for _ in range(10):
+ ... metric.update(torch.randint(0, 4, (10,)), torch.randint(0, 4, (10,)))
+ >>> fig_, ax_ = metric.plot(metric.compute())
+
+ """
+ return self._plot(val, ax)
diff --git a/src/torchmetrics/detection/giou.py b/src/torchmetrics/detection/giou.py
@@ -174,7 +174,7 @@ def plot(
  ... ]
  >>> target = lambda : [
  ... {
- ... "boxes": torch.tensor([[300.00, 100.00, 315.00, 150.00]]) + torch.randint(-10, 10, (1, 4)),
+ ... "boxes": torch.tensor([[300.00, 100.00, 335.00, 150.00]]) + torch.randint(-10, 10, (1, 4)),
  ... "labels": torch.tensor([5]),
  ... }
  ... ]

diff --git a/src/torchmetrics/functional/clustering/__init__.py b/src/torchmetrics/functional/clustering/__init__.py
@@ -11,11 +11,13 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from torchmetrics.functional.clustering.calinski_harabasz_score import calinski_harabasz_score
 from torchmetrics.functional.clustering.mutual_info_score import mutual_info_score
 from torchmetrics.functional.clustering.normalized_mutual_info_score import normalized_mutual_info_score
 from torchmetrics.functional.clustering.rand_score import rand_score
 
 __all__ = [
+ "calinski_harabasz_score",
  "mutual_info_score",
  "normalized_mutual_info_score",
  "rand_score",

diff --git a/src/torchmetrics/functional/clustering/calinski_harabasz_score.py b/src/torchmetrics/functional/clustering/calinski_harabasz_score.py
@@ -0,0 +1,73 @@
+# Copyright The Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+from torch import Tensor
+
+
+def _calinski_harabasz_score_validate_input(data: Tensor, labels: Tensor) -> None:
+ """Validate that the input data and labels have correct shape and type."""
+ if data.ndim != 2:
+ raise ValueError(f"Expected 2D data, got {data.ndim}D data instead")
+ if not data.is_floating_point():
+ raise ValueError(f"Expected floating point data, got {data.dtype} data instead")
+ if labels.ndim != 1:
+ raise ValueError(f"Expected 1D labels, got {labels.ndim}D labels instead")
+
+
+def calinski_harabasz_score(data: Tensor, labels: Tensor) -> Tensor:
+ """Compute the Calinski Harabasz Score (also known as variance ratio criterion) for clustering algorithms.
+
+ Args:
+ data: float tensor with shape ``(N,d)`` with the embedded data.
+ labels: single integer tensor with shape ``(N,)`` with cluster labels
+
+ Returns:
+ Scalar tensor with the Calinski Harabasz Score
+
+ Example:
+ >>> import torch
+ >>> from torchmetrics.functional.clustering import calinski_harabasz_score
+ >>> _ = torch.manual_seed(42)
+ >>> data = torch.randn(10, 3)
+ >>> labels = torch.randint(0, 2, (10,))
+ >>> calinski_harabasz_score(data, labels)
+ tensor(3.4998)
+
+ """
+ _calinski_harabasz_score_validate_input(data, labels)
+
+ # convert to zero indexed labels
+ unique_labels, labels = torch.unique(labels, return_inverse=True)
+ n_labels = len(unique_labels)
+
+ n_samples = data.shape[0]
+
+ if not 1 < n_labels < n_samples:
+ raise ValueError(
+ "Number of detected clusters must be greater than one and less than the number of samples."
+ f"Got {n_labels} clusters and {n_samples} samples."
+ )
+
+ mean = data.mean(dim=0)
+ between_cluster_dispersion = torch.tensor(0.0, device=data.device)
+ within_cluster_dispersion = torch.tensor(0.0, device=data.device)
+ for k in range(n_labels):
+ cluster_k = data[labels == k, :]
+ mean_k = cluster_k.mean(dim=0)
+ between_cluster_dispersion += ((mean_k - mean) ** 2).sum() * cluster_k.shape[0]
+ within_cluster_dispersion += ((cluster_k - mean_k) ** 2).sum()
+
+ if within_cluster_dispersion == 0:
+ return torch.tensor(1.0, device=data.device, dtype=torch.float32)
+ return between_cluster_dispersion * (n_samples - n_labels) / (within_cluster_dispersion * (n_labels - 1.0))
diff --git a/tests/unittests/clustering/test_calinski_harabasz_score.py b/tests/unittests/clustering/test_calinski_harabasz_score.py
@@ -0,0 +1,56 @@
+# Copyright The Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import pytest
+from sklearn.metrics import calinski_harabasz_score as sklearn_calinski_harabasz_score
+from torchmetrics.clustering.calinski_harabasz_score import CalinskiHarabaszScore
+from torchmetrics.functional.clustering.calinski_harabasz_score import calinski_harabasz_score
+
+from unittests.clustering.inputs import _single_target_intrinsic1, _single_target_intrinsic2
+from unittests.helpers import seed_all
+from unittests.helpers.testers import MetricTester
+
+seed_all(42)
+
+
+@pytest.mark.parametrize(
+ "preds, target",
+ [
+ (_single_target_intrinsic1.preds, _single_target_intrinsic1.target),
+ (_single_target_intrinsic2.preds, _single_target_intrinsic2.target),
+ ],
+)
+class TestCalinskiHarabaszScore(MetricTester):
+ """Test class for `CalinskiHarabaszScore` metric."""
+
+ atol = 1e-5
+
+ @pytest.mark.parametrize("ddp", [True, False])
+ def test_calinski_harabasz_score(self, preds, target, ddp):
+ """Test class implementation of metric."""
+ self.run_class_metric_test(
+ ddp=ddp,
+ preds=preds,
+ target=target,
+ metric_class=CalinskiHarabaszScore,
+ reference_metric=sklearn_calinski_harabasz_score,
+ )
+
+ def test_calinski_harabasz_score_functional(self, preds, target):
+ """Test functional implementation of metric."""
+ self.run_functional_metric_test(
+ preds=preds,
+ target=target,
+ metric_functional=calinski_harabasz_score,
+ reference_metric=sklearn_calinski_harabasz_score,
+ )
diff --git a/tests/unittests/image/test_perceptual_path_length.py b/tests/unittests/image/test_perceptual_path_length.py
@@ -164,6 +164,7 @@ def num_classes(self):
  ),
  ],
 )
+@skip_on_running_out_of_memory()
 def test_raises_error_on_wrong_generator(generator, errortype, match):
  """Test that appropriate errors are raised on wrong generator."""
  with pytest.raises(errortype, match=match):
@@ -176,6 +177,7 @@ def test_raises_error_on_wrong_generator(generator, errortype, match):
 
 @pytest.mark.skipif(not _TORCH_FIDELITY_AVAILABLE, reason="test requires torch_fidelity")
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine")
+@skip_on_running_out_of_memory()
 def test_compare():
  """Test against torch_fidelity.
 

diff --git a/tests/unittests/utilities/test_plot.py b/tests/unittests/utilities/test_plot.py
@@ -91,7 +91,7 @@
  MultilabelROC,
  MultilabelSpecificity,
 )
-from torchmetrics.clustering import MutualInfoScore, NormalizedMutualInfoScore, RandScore
+from torchmetrics.clustering import CalinskiHarabaszScore, MutualInfoScore, NormalizedMutualInfoScore, RandScore
 from torchmetrics.detection import PanopticQuality
 from torchmetrics.detection.mean_ap import MeanAveragePrecision
 from torchmetrics.functional.audio import scale_invariant_signal_noise_ratio
@@ -617,6 +617,7 @@
  pytest.param(TranslationEditRate, _text_input_3, _text_input_4, id="translation edit rate"),
  pytest.param(MutualInfoScore, _nominal_input, _nominal_input, id="mutual info score"),
  pytest.param(RandScore, _nominal_input, _nominal_input, id="rand score"),
+ pytest.param(CalinskiHarabaszScore, lambda: torch.randn(100, 3), _nominal_input, id="calinski harabasz score"),
  pytest.param(NormalizedMutualInfoScore, _nominal_input, _nominal_input, id="normalized mutual info score"),
  ],
 )