diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4d73015dcee..553d69de415 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,8 +29,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Removed
 
--
-
+- Removed deprecated `BinnedAveragePrecision`, `BinnedPrecisionRecallCurve`, `RecallAtFixedPrecision` ([#1251](https://github.com/Lightning-AI/metrics/pull/1251))
+- Removed deprecated `LabelRankingAveragePrecision`, `LabelRankingLoss` and `CoverageError` ([#1251](https://github.com/Lightning-AI/metrics/pull/1251))
+- Removed deprecated `KLDivergence` and `AUC` ([#1251](https://github.com/Lightning-AI/metrics/pull/1251))
 
 ### Fixed
 
@@ -74,6 +75,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Improved performance of retrieval metrics ([#1242](https://github.com/Lightning-AI/metrics/pull/1242))
 - Changed `SSIM` and `MSSSIM` update to be online to reduce memory usage ([#1231](https://github.com/Lightning-AI/metrics/pull/1231))
 
+### Deprecated
+
+- Deprecated `BinnedAveragePrecision`, `BinnedPrecisionRecallCurve`, `BinnedRecallAtFixedPrecision` ([#1163](https://github.com/Lightning-AI/metrics/pull/1163))
+  * `BinnedAveragePrecision` -> use `AveragePrecision` with `thresholds` arg
+  * `BinnedPrecisionRecallCurve` -> use `AveragePrecisionRecallCurve` with `thresholds` arg
+  * `BinnedRecallAtFixedPrecision` -> use `RecallAtFixedPrecision` with `thresholds` arg
+- Renamed and refactored `LabelRankingAveragePrecision`, `LabelRankingLoss` and `CoverageError` ([#1167](https://github.com/Lightning-AI/metrics/pull/1167))
+  * `LabelRankingAveragePrecision` -> `MultilabelRankingAveragePrecision`
+  * `LabelRankingLoss` -> `MultilabelRankingLoss`
+  * `CoverageError` -> `MultilabelCoverageError`
+- Deprecated `KLDivergence` and `AUC` from classification package ([#1189](https://github.com/Lightning-AI/metrics/pull/1189))
+  * `KLDivergence` moved to `regression` package
+  * Instead of `AUC` use `torchmetrics.utils.compute.auc`
 
 ### Fixed
 
diff --git a/docs/source/classification/auc.rst b/docs/source/classification/auc.rst
deleted file mode 100644
index 8f95bbd594c..00000000000
--- a/docs/source/classification/auc.rst
+++ /dev/null
@@ -1,20 +0,0 @@
-.. customcarditem::
-   :header: Area Under the Curve (AUC)
-   :image: https://pl-flash-data.s3.amazonaws.com/assets/thumbnails/tabular_classification.svg
-   :tags: Classification
-
-###
-AUC
-###
-
-Module Interface
-________________
-
-.. autoclass:: torchmetrics.AUC
-    :noindex:
-
-Functional Interface
-____________________
-
-.. autofunction:: torchmetrics.functional.auc
-    :noindex:
diff --git a/docs/source/classification/binned_average_precision.rst b/docs/source/classification/binned_average_precision.rst
deleted file mode 100644
index 344263052c7..00000000000
--- a/docs/source/classification/binned_average_precision.rst
+++ /dev/null
@@ -1,14 +0,0 @@
-.. customcarditem::
-   :header: Binned Average Precision
-   :image: https://pl-flash-data.s3.amazonaws.com/assets/thumbnails/tabular_classification.svg
-   :tags: Classification
-
-########################
-Binned Average Precision
-########################
-
-Module Interface
-________________
-
-.. autoclass:: torchmetrics.BinnedAveragePrecision
-    :noindex:
diff --git a/docs/source/classification/binned_precision_recall_curve.rst b/docs/source/classification/binned_precision_recall_curve.rst
deleted file mode 100644
index 6c056068aa9..00000000000
--- a/docs/source/classification/binned_precision_recall_curve.rst
+++ /dev/null
@@ -1,14 +0,0 @@
-.. customcarditem::
-   :header: Binned Precision Recall Curve
-   :image: https://pl-flash-data.s3.amazonaws.com/assets/thumbnails/tabular_classification.svg
-   :tags: Classification
-
-#############################
-Binned Precision Recall Curve
-#############################
-
-Module Interface
-________________
-
-.. autoclass:: torchmetrics.BinnedPrecisionRecallCurve
-    :noindex:
diff --git a/docs/source/classification/binned_recall_fixed_precision.rst b/docs/source/classification/binned_recall_fixed_precision.rst
deleted file mode 100644
index 6f169744ae7..00000000000
--- a/docs/source/classification/binned_recall_fixed_precision.rst
+++ /dev/null
@@ -1,14 +0,0 @@
-.. customcarditem::
-   :header: Binned Recall At Fixed Precision
-   :image: https://pl-flash-data.s3.amazonaws.com/assets/thumbnails/tabular_classification.svg
-   :tags: Classification
-
-################################
-Binned Recall At Fixed Precision
-################################
-
-Module Interface
-________________
-
-.. autoclass:: torchmetrics.BinnedRecallAtFixedPrecision
-    :noindex:
diff --git a/docs/source/classification/coverage_error.rst b/docs/source/classification/coverage_error.rst
index 16db100c474..29f979ab456 100644
--- a/docs/source/classification/coverage_error.rst
+++ b/docs/source/classification/coverage_error.rst
@@ -10,17 +10,11 @@ Coverage Error
 Module Interface
 ________________
 
-.. autoclass:: torchmetrics.CoverageError
-    :noindex:
-
 .. autoclass:: torchmetrics.classification.MultilabelCoverageError
     :noindex:
 
 Functional Interface
 ____________________
 
-.. autofunction:: torchmetrics.functional.coverage_error
-    :noindex:
-
 .. autofunction:: torchmetrics.functional.classification.multilabel_coverage_error
     :noindex:
diff --git a/docs/source/classification/label_ranking_average_precision.rst b/docs/source/classification/label_ranking_average_precision.rst
index 32f1b0867b5..1f44bfbbfda 100644
--- a/docs/source/classification/label_ranking_average_precision.rst
+++ b/docs/source/classification/label_ranking_average_precision.rst
@@ -10,9 +10,6 @@ Label Ranking Average Precision
 Module Interface
 ________________
 
-.. autoclass:: torchmetrics.LabelRankingAveragePrecision
-    :noindex:
-
 .. autoclass:: torchmetrics.classification.MultilabelRankingAveragePrecision
     :noindex:
 
@@ -20,8 +17,5 @@ ________________
 Functional Interface
 ____________________
 
-.. autofunction:: torchmetrics.functional.label_ranking_average_precision
-    :noindex:
-
 .. autofunction:: torchmetrics.functional.classification.multilabel_ranking_average_precision
     :noindex:
diff --git a/docs/source/classification/label_ranking_loss.rst b/docs/source/classification/label_ranking_loss.rst
index 168b2c80ceb..ae7f9567a7d 100644
--- a/docs/source/classification/label_ranking_loss.rst
+++ b/docs/source/classification/label_ranking_loss.rst
@@ -10,18 +10,11 @@ Label Ranking Loss
 Module Interface
 ________________
 
-.. autoclass:: torchmetrics.LabelRankingLoss
-    :noindex:
-
-
 .. autoclass:: torchmetrics.classification.MultilabelRankingLoss
     :noindex:
 
 Functional Interface
 ____________________
 
-.. autofunction:: torchmetrics.functional.label_ranking_loss
-    :noindex:
-
 .. autofunction:: torchmetrics.functional.classification.multilabel_ranking_loss
     :noindex:
diff --git a/docs/source/index.rst b/docs/source/index.rst
index f3bc95a8ae9..1977f03e303 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -132,7 +132,6 @@ Or directly from conda
    pages/overview
    pages/implement
    pages/lightning
-   pages/classification
    pages/retrieval
 
 .. toctree::
diff --git a/docs/source/pages/classification.rst b/docs/source/pages/classification.rst
deleted file mode 100644
index 457222b07c0..00000000000
--- a/docs/source/pages/classification.rst
+++ /dev/null
@@ -1,101 +0,0 @@
-****************************
-Using Classification Metrics
-****************************
-
-Input types
-~~~~~~~~~~~
-
-For the purposes of classification metrics, inputs (predictions and targets) are split
-into these categories (``N`` stands for the batch size and ``C`` for number of classes):
-
-.. csv-table:: \*dtype ``binary`` means integers that are either 0 or 1
-    :header: "Type", "preds shape", "preds dtype", "target shape", "target dtype"
-    :widths: 20, 10, 10, 10, 10
-
-    "Binary", "(N,)", "``float``", "(N,)", "``binary``\*"
-    "Multi-class", "(N,)", "``int``", "(N,)", "``int``"
-    "Multi-class with logits or probabilities", "(N, C)", "``float``", "(N,)", "``int``"
-    "Multi-label", "(N, ...)", "``float``", "(N, ...)", "``binary``\*"
-    "Multi-dimensional multi-class", "(N, ...)", "``int``", "(N, ...)", "``int``"
-    "Multi-dimensional multi-class with logits or probabilities", "(N, C, ...)", "``float``", "(N, ...)", "``int``"
-
-.. note::
-    All dimensions of size 1 (except ``N``) are "squeezed out" at the beginning, so
-    that, for example, a tensor of shape ``(N, 1)`` is treated as ``(N, )``.
-
-When predictions or targets are integers, it is assumed that class labels start at 0, i.e.
-the possible class labels are 0, 1, 2, 3, etc. Below are some examples of different input types
-
-.. testcode::
-
-    # Binary inputs
-    binary_preds  = torch.tensor([0.6, 0.1, 0.9])
-    binary_target = torch.tensor([1, 0, 2])
-
-    # Multi-class inputs
-    mc_preds  = torch.tensor([0, 2, 1])
-    mc_target = torch.tensor([0, 1, 2])
-
-    # Multi-class inputs with probabilities
-    mc_preds_probs  = torch.tensor([[0.8, 0.2, 0], [0.1, 0.2, 0.7], [0.3, 0.6, 0.1]])
-    mc_target_probs = torch.tensor([0, 1, 2])
-
-    # Multi-label inputs
-    ml_preds  = torch.tensor([[0.2, 0.8, 0.9], [0.5, 0.6, 0.1], [0.3, 0.1, 0.1]])
-    ml_target = torch.tensor([[0, 1, 1], [1, 0, 0], [0, 0, 0]])
-
-
-Using the multiclass parameter
-------------------------------
-
-In some cases, you might have inputs which appear to be (multi-dimensional) multi-class
-but are actually binary/multi-label - for example, if both predictions and targets are
-integer (binary) tensors. Or it could be the other way around, you want to treat
-binary/multi-label inputs as 2-class (multi-dimensional) multi-class inputs.
-
-For these cases, the metrics where this distinction would make a difference, expose the
-``multiclass`` argument. Let's see how this is used on the example of
-:class:`~torchmetrics.StatScores` metric.
-
-First, let's consider the case with label predictions with 2 classes, which we want to
-treat as binary.
-
-.. testcode::
-
-   from torchmetrics.functional import stat_scores
-
-   # These inputs are supposed to be binary, but appear as multi-class
-   preds  = torch.tensor([0, 1, 0])
-   target = torch.tensor([1, 1, 0])
-
-As you can see below, by default the inputs are treated
-as multi-class. We can set ``multiclass=False`` to treat the inputs as binary -
-which is the same as converting the predictions to float beforehand.
-
-.. doctest::
-
-    >>> stat_scores(preds, target, reduce='macro', num_classes=2)
-    tensor([[1, 1, 1, 0, 1],
-            [1, 0, 1, 1, 2]])
-    >>> stat_scores(preds, target, reduce='macro', num_classes=1, multiclass=False)
-    tensor([[1, 0, 1, 1, 2]])
-    >>> stat_scores(preds.float(), target, reduce='macro', num_classes=1)
-    tensor([[1, 0, 1, 1, 2]])
-
-Next, consider the opposite example: inputs are binary (as predictions are probabilities),
-but we would like to treat them as 2-class multi-class, to obtain the metric for both classes.
-
-.. testcode::
-
-   preds  = torch.tensor([0.2, 0.7, 0.3])
-   target = torch.tensor([1, 1, 0])
-
-In this case we can set ``multiclass=True``, to treat the inputs as multi-class.
-
-.. doctest::
-
-    >>> stat_scores(preds, target, reduce='macro', num_classes=1)
-    tensor([[1, 0, 1, 1, 2]])
-    >>> stat_scores(preds, target, reduce='macro', num_classes=2, multiclass=True)
-    tensor([[1, 1, 1, 0, 1],
-            [1, 0, 1, 1, 2]])
diff --git a/src/torchmetrics/__init__.py b/src/torchmetrics/__init__.py
index 8aa046a9938..514b7365ab3 100644
--- a/src/torchmetrics/__init__.py
+++ b/src/torchmetrics/__init__.py
@@ -21,26 +21,19 @@
     SignalNoiseRatio,
 )
 from torchmetrics.classification import (  # noqa: E402
-    AUC,
     AUROC,
     ROC,
     Accuracy,
     AveragePrecision,
-    BinnedAveragePrecision,
-    BinnedPrecisionRecallCurve,
-    BinnedRecallAtFixedPrecision,
     CalibrationError,
     CohenKappa,
     ConfusionMatrix,
-    CoverageError,
     Dice,
     F1Score,
     FBetaScore,
     HammingDistance,
     HingeLoss,
     JaccardIndex,
-    LabelRankingAveragePrecision,
-    LabelRankingLoss,
     MatthewsCorrCoef,
     Precision,
     PrecisionRecallCurve,
@@ -113,12 +106,8 @@
 __all__ = [
     "functional",
     "Accuracy",
-    "AUC",
     "AUROC",
     "AveragePrecision",
-    "BinnedAveragePrecision",
-    "BinnedPrecisionRecallCurve",
-    "BinnedRecallAtFixedPrecision",
     "BLEUScore",
     "BootStrapper",
     "CalibrationError",
@@ -130,7 +119,6 @@
     "CohenKappa",
     "ConfusionMatrix",
     "CosineSimilarity",
-    "CoverageError",
     "Dice",
     "TweedieDevianceScore",
     "ErrorRelativeGlobalDimensionlessSynthesis",
@@ -142,8 +130,6 @@
     "HingeLoss",
     "JaccardIndex",
     "KLDivergence",
-    "LabelRankingAveragePrecision",
-    "LabelRankingLoss",
     "MatchErrorRate",
     "MatthewsCorrCoef",
     "MaxMetric",
diff --git a/src/torchmetrics/classification/__init__.py b/src/torchmetrics/classification/__init__.py
index 862a6677655..29185666b5c 100644
--- a/src/torchmetrics/classification/__init__.py
+++ b/src/torchmetrics/classification/__init__.py
@@ -29,9 +29,7 @@
     MultilabelStatScores,
     StatScores,
 )
-
 from torchmetrics.classification.accuracy import Accuracy, BinaryAccuracy, MulticlassAccuracy, MultilabelAccuracy
-from torchmetrics.classification.auc import AUC
 from torchmetrics.classification.auroc import AUROC, BinaryAUROC, MulticlassAUROC, MultilabelAUROC
 from torchmetrics.classification.average_precision import (
     AveragePrecision,
@@ -39,11 +37,6 @@
     MulticlassAveragePrecision,
     MultilabelAveragePrecision,
 )
-from torchmetrics.classification.binned_precision_recall import (
-    BinnedAveragePrecision,
-    BinnedPrecisionRecallCurve,
-    BinnedRecallAtFixedPrecision,
-)
 from torchmetrics.classification.calibration_error import (
     BinaryCalibrationError,
     CalibrationError,
@@ -92,9 +85,6 @@
     Recall,
 )
 from torchmetrics.classification.ranking import (
-    CoverageError,
-    LabelRankingAveragePrecision,
-    LabelRankingLoss,
     MultilabelCoverageError,
     MultilabelRankingAveragePrecision,
     MultilabelRankingLoss,
diff --git a/src/torchmetrics/classification/accuracy.py b/src/torchmetrics/classification/accuracy.py
index 79051ffe3f2..5f564e1d77a 100644
--- a/src/torchmetrics/classification/accuracy.py
+++ b/src/torchmetrics/classification/accuracy.py
@@ -350,8 +350,6 @@ class Accuracy(StatScores):
     changed to subset accuracy (which requires all labels or sub-samples in the sample to
     be correctly predicted) by setting ``subset_accuracy=True``.
 
-    Accepts all input types listed in :ref:`pages/classification:input types`.
-
     Args:
         num_classes:
             Number of classes. Necessary for ``'macro'``, ``'weighted'`` and ``None`` average methods.
@@ -387,11 +385,10 @@ class Accuracy(StatScores):
             - ``'samplewise'``: In this case, the statistics are computed separately for each
               sample on the ``N`` axis, and then averaged over samples.
               The computation for each sample is done by treating the flattened extra axes ``...``
-              (see :ref:`pages/classification:input types`) as the ``N`` dimension within the sample,
+              as the ``N`` dimension within the sample,
               and computing the metric for the sample based on that.
 
             - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs
-              (see :ref:`pages/classification:input types`)
               are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they
               were ``(N_X, C)``. From here on the ``average`` parameter applies as usual.
 
@@ -409,9 +406,7 @@ class Accuracy(StatScores):
 
         multiclass:
             Used only in certain special cases, where you want to treat inputs as a different type
-            than what they appear to be. See the parameter's
-            :ref:`documentation section <pages/classification:using the multiclass parameter>`
-            for a more detailed explanation and examples.
+            than what they appear to be.
 
         subset_accuracy:
             Whether to compute subset accuracy for multi-label and multi-dimensional
@@ -557,9 +552,7 @@ def __init__(
             self.add_state("total", default=tensor(0), dist_reduce_fx="sum")
 
     def update(self, preds: Tensor, target: Tensor) -> None:  # type: ignore
-        """Update state with predictions and targets. See
-        :ref:`pages/classification:input types` for more information on input
-        types.
+        """Update state with predictions and targets.
 
         Args:
             preds: Predictions from model (logits, probabilities, or labels)
diff --git a/src/torchmetrics/classification/auc.py b/src/torchmetrics/classification/auc.py
deleted file mode 100644
index f1a4ee719e7..00000000000
--- a/src/torchmetrics/classification/auc.py
+++ /dev/null
@@ -1,84 +0,0 @@
-# Copyright The PyTorch Lightning team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from typing import Any, List, Optional
-
-from torch import Tensor
-
-from torchmetrics.metric import Metric
-from torchmetrics.utilities import rank_zero_warn
-from torchmetrics.utilities.compute import _auc_compute, _auc_format_inputs
-from torchmetrics.utilities.data import dim_zero_cat
-
-
-class AUC(Metric):
-    r"""Computes Area Under the Curve (AUC) using the trapezoidal rule.
-
-    Forward accepts two input tensors that should be 1D and have the same number
-    of elements
-
-    .. note::
-        This metric has been deprecated in v0.10 and will be removed in v0.11.
-
-    Args:
-        reorder: AUC expects its first input to be sorted. If this is not the case,
-            setting this argument to ``True`` will use a stable sorting algorithm to
-            sort the input in descending order
-
-        kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info.
-    """
-    is_differentiable: bool = False
-    higher_is_better: Optional[bool] = None
-    full_state_update: bool = False
-    x: List[Tensor]
-    y: List[Tensor]
-
-    def __init__(
-        self,
-        reorder: bool = False,
-        **kwargs: Any,
-    ) -> None:
-        super().__init__(**kwargs)
-        rank_zero_warn(
-            "`torchmetrics.classification.AUC` has been deprecated in v0.10 and will be removed in v0.11."
-            "A functional version is still available in `torchmetrics.utilities.compute`",
-            DeprecationWarning,
-        )
-
-        self.reorder = reorder
-
-        self.add_state("x", default=[], dist_reduce_fx="cat")
-        self.add_state("y", default=[], dist_reduce_fx="cat")
-
-        rank_zero_warn(
-            "Metric `AUC` will save all targets and predictions in buffer."
-            " For large datasets this may lead to large memory footprint."
-        )
-
-    def update(self, preds: Tensor, target: Tensor) -> None:
-        """Update state with predictions and targets.
-
-        Args:
-            preds: Predictions from model (probabilities, or labels)
-            target: Ground truth labels
-        """
-        x, y = _auc_format_inputs(preds, target)
-
-        self.x.append(x)
-        self.y.append(y)
-
-    def compute(self) -> Tensor:
-        """Computes AUC based on inputs passed in to ``update`` previously."""
-        x = dim_zero_cat(self.x)
-        y = dim_zero_cat(self.y)
-        return _auc_compute(x, y, reorder=self.reorder)
diff --git a/src/torchmetrics/classification/binned_precision_recall.py b/src/torchmetrics/classification/binned_precision_recall.py
deleted file mode 100644
index d7253527ae3..00000000000
--- a/src/torchmetrics/classification/binned_precision_recall.py
+++ /dev/null
@@ -1,338 +0,0 @@
-# Copyright The PyTorch Lightning team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from typing import Any, List, Optional, Tuple, Union
-
-import torch
-from torch import Tensor
-
-from torchmetrics.functional.classification.average_precision import _average_precision_compute_with_precision_recall
-from torchmetrics.metric import Metric
-from torchmetrics.utilities.data import METRIC_EPS, to_onehot
-from torchmetrics.utilities.prints import rank_zero_warn
-
-
-def _recall_at_precision(
-    precision: Tensor,
-    recall: Tensor,
-    thresholds: Tensor,
-    min_precision: float,
-) -> Tuple[Tensor, Tensor]:
-    try:
-        max_recall, _, best_threshold = max(
-            (r, p, t) for p, r, t in zip(precision, recall, thresholds) if p >= min_precision
-        )
-
-    except ValueError:
-        max_recall = torch.tensor(0.0, device=recall.device, dtype=recall.dtype)
-        best_threshold = torch.tensor(0)
-
-    if max_recall == 0.0:
-        best_threshold = torch.tensor(1e6, device=thresholds.device, dtype=thresholds.dtype)
-
-    return max_recall, best_threshold
-
-
-class BinnedPrecisionRecallCurve(Metric):
-    """Computes precision-recall pairs for different thresholds. Works for both binary and multiclass problems. In
-    the case of multiclass, the values will be calculated based on a one-vs-the-rest approach.
-
-    Computation is performed in constant-memory by computing precision and recall
-    for ``thresholds`` buckets/thresholds (evenly distributed between 0 and 1).
-
-    .. warn:
-        This metric has been deprecated in v0.10 and will be removed in v0.11.
-        Instead use `PrecisionRecallCurve` metric with the `thresholds` argument set accordingly.
-
-    Forward accepts
-
-    - ``preds`` (float tensor): ``(N, ...)`` (binary) or ``(N, C, ...)`` (multiclass) tensor
-      with probabilities, where C is the number of classes.
-
-    - ``target`` (long tensor): ``(N, ...)`` or ``(N, C, ...)`` with integer labels
-
-    Args:
-        num_classes: integer with number of classes. For binary, set to 1.
-        thresholds: list or tensor with specific thresholds or a number of bins from linear sampling.
-            It is used for computation will lead to more detailed curve and accurate estimates,
-            but will be slower and consume more memory.
-
-        kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info.
-
-    Raises:
-        ValueError:
-            If ``thresholds`` is not a ``int``, ``list`` or ``tensor``
-
-    Example (binary case):
-        >>> from torchmetrics import BinnedPrecisionRecallCurve
-        >>> pred = torch.tensor([0, 0.1, 0.8, 0.4])
-        >>> target = torch.tensor([0, 1, 1, 0])
-        >>> pr_curve = BinnedPrecisionRecallCurve(num_classes=1, thresholds=5)
-        >>> precision, recall, thresholds = pr_curve(pred, target)
-        >>> precision
-        tensor([0.5000, 0.5000, 1.0000, 1.0000, 1.0000, 1.0000])
-        >>> recall
-        tensor([1.0000, 0.5000, 0.5000, 0.5000, 0.0000, 0.0000])
-        >>> thresholds
-        tensor([0.0000, 0.2500, 0.5000, 0.7500, 1.0000])
-
-    Example (multiclass case):
-        >>> pred = torch.tensor([[0.75, 0.05, 0.05, 0.05, 0.05],
-        ...                      [0.05, 0.75, 0.05, 0.05, 0.05],
-        ...                      [0.05, 0.05, 0.75, 0.05, 0.05],
-        ...                      [0.05, 0.05, 0.05, 0.75, 0.05]])
-        >>> target = torch.tensor([0, 1, 3, 2])
-        >>> pr_curve = BinnedPrecisionRecallCurve(num_classes=5, thresholds=3)
-        >>> precision, recall, thresholds = pr_curve(pred, target)
-        >>> precision
-        [tensor([0.2500, 1.0000, 1.0000, 1.0000]),
-        tensor([0.2500, 1.0000, 1.0000, 1.0000]),
-        tensor([2.5000e-01, 1.0000e-06, 1.0000e+00, 1.0000e+00]),
-        tensor([2.5000e-01, 1.0000e-06, 1.0000e+00, 1.0000e+00]),
-        tensor([2.5000e-07, 1.0000e+00, 1.0000e+00, 1.0000e+00])]
-        >>> recall
-        [tensor([1.0000, 1.0000, 0.0000, 0.0000]),
-        tensor([1.0000, 1.0000, 0.0000, 0.0000]),
-        tensor([1.0000, 0.0000, 0.0000, 0.0000]),
-        tensor([1.0000, 0.0000, 0.0000, 0.0000]),
-        tensor([0., 0., 0., 0.])]
-        >>> thresholds
-        [tensor([0.0000, 0.5000, 1.0000]),
-        tensor([0.0000, 0.5000, 1.0000]),
-        tensor([0.0000, 0.5000, 1.0000]),
-        tensor([0.0000, 0.5000, 1.0000]),
-        tensor([0.0000, 0.5000, 1.0000])]
-    """
-
-    is_differentiable: bool = False
-    higher_is_better: Optional[bool] = None
-    full_state_update: bool = False
-    TPs: Tensor
-    FPs: Tensor
-    FNs: Tensor
-
-    def __init__(
-        self,
-        num_classes: int,
-        thresholds: Union[int, Tensor, List[float]] = 100,
-        **kwargs: Any,
-    ) -> None:
-        rank_zero_warn(
-            "Metric `BinnedPrecisionRecallCurve` has been deprecated in v0.10 and will be completly removed in v0.11."
-            " Instead, use the refactored version of `PrecisionRecallCurve` by specifying the `thresholds` argument.",
-            DeprecationWarning,
-        )
-        super().__init__(**kwargs)
-
-        self.num_classes = num_classes
-        if isinstance(thresholds, int):
-            self.num_thresholds = thresholds
-            self.thresholds = torch.linspace(0, 1.0, thresholds)
-
-        elif thresholds is not None:
-            if not isinstance(thresholds, (list, Tensor)):
-                raise ValueError("Expected argument `thresholds` to either be an integer, list of floats or a tensor")
-            self.thresholds = torch.tensor(thresholds) if isinstance(thresholds, list) else thresholds
-            self.num_thresholds = self.thresholds.numel()
-
-        for name in ("TPs", "FPs", "FNs"):
-            self.add_state(
-                name=name,
-                default=torch.zeros(num_classes, self.num_thresholds, dtype=torch.float32),
-                dist_reduce_fx="sum",
-            )
-
-    def update(self, preds: Tensor, target: Tensor) -> None:  # type: ignore
-        """
-        Args
-            preds: (n_samples, n_classes) tensor
-            target: (n_samples, n_classes) tensor
-        """
-        # binary case
-        if len(preds.shape) == len(target.shape) == 1:
-            preds = preds.reshape(-1, 1)
-            target = target.reshape(-1, 1)
-
-        if len(preds.shape) == len(target.shape) + 1:
-            target = to_onehot(target, num_classes=self.num_classes)
-
-        target = target == 1
-        # Iterate one threshold at a time to conserve memory
-        for i in range(self.num_thresholds):
-            predictions = preds >= self.thresholds[i]
-            self.TPs[:, i] += (target & predictions).sum(dim=0)
-            self.FPs[:, i] += ((~target) & predictions).sum(dim=0)
-            self.FNs[:, i] += (target & (~predictions)).sum(dim=0)
-
-    def compute(self) -> Union[Tuple[Tensor, Tensor, Tensor], Tuple[List[Tensor], List[Tensor], List[Tensor]]]:
-        """Returns float tensor of size n_classes."""
-        precisions = (self.TPs + METRIC_EPS) / (self.TPs + self.FPs + METRIC_EPS)
-        recalls = self.TPs / (self.TPs + self.FNs + METRIC_EPS)
-
-        # Need to guarantee that last precision=1 and recall=0, similar to precision_recall_curve
-        t_ones = torch.ones(self.num_classes, 1, dtype=precisions.dtype, device=precisions.device)
-        precisions = torch.cat([precisions, t_ones], dim=1)
-        t_zeros = torch.zeros(self.num_classes, 1, dtype=recalls.dtype, device=recalls.device)
-        recalls = torch.cat([recalls, t_zeros], dim=1)
-        if self.num_classes == 1:
-            return precisions[0, :], recalls[0, :], self.thresholds
-        return list(precisions), list(recalls), [self.thresholds for _ in range(self.num_classes)]
-
-
-class BinnedAveragePrecision(BinnedPrecisionRecallCurve):
-    """Computes the average precision score, which summarises the precision recall curve into one number. Works for
-    both binary and multiclass problems. In the case of multiclass, the values will be calculated based on a one-
-    vs-the-rest approach.
-
-    Computation is performed in constant-memory by computing precision and recall
-    for ``thresholds`` buckets/thresholds (evenly distributed between 0 and 1).
-
-    .. warn:
-        This metric has been deprecated in v0.10 and will be removed in v0.11.
-        Instead use `AveragePrecision` metric with the `thresholds` argument set accordingly.
-
-    Forward accepts
-
-    - ``preds`` (float tensor): ``(N, ...)`` (binary) or ``(N, C, ...)`` (multiclass) tensor
-      with probabilities, where C is the number of classes.
-
-    - ``target`` (long tensor): ``(N, ...)`` with integer labels
-
-    Args:
-        num_classes: integer with number of classes. Not nessesary to provide for binary problems.
-        thresholds: list or tensor with specific thresholds or a number of bins from linear sampling.
-            It is used for computation will lead to more detailed curve and accurate estimates,
-            but will be slower and consume more memory
-
-        kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info.
-
-    Raises:
-        ValueError:
-            If ``thresholds`` is not a ``list`` or ``tensor``
-
-    Example (binary case):
-        >>> from torchmetrics import BinnedAveragePrecision
-        >>> pred = torch.tensor([0, 1, 2, 3])
-        >>> target = torch.tensor([0, 1, 1, 1])
-        >>> average_precision = BinnedAveragePrecision(num_classes=1, thresholds=10)
-        >>> average_precision(pred, target)
-        tensor(1.0000)
-
-    Example (multiclass case):
-        >>> pred = torch.tensor([[0.75, 0.05, 0.05, 0.05, 0.05],
-        ...                      [0.05, 0.75, 0.05, 0.05, 0.05],
-        ...                      [0.05, 0.05, 0.75, 0.05, 0.05],
-        ...                      [0.05, 0.05, 0.05, 0.75, 0.05]])
-        >>> target = torch.tensor([0, 1, 3, 2])
-        >>> average_precision = BinnedAveragePrecision(num_classes=5, thresholds=10)
-        >>> average_precision(pred, target)
-        [tensor(1.0000), tensor(1.0000), tensor(0.2500), tensor(0.2500), tensor(-0.)]
-    """
-
-    def __init__(
-        self,
-        num_classes: int,
-        thresholds: Union[int, Tensor, List[float]] = 100,
-        **kwargs: Any,
-    ) -> None:
-        rank_zero_warn(
-            "Metric `BinnedAveragePrecision` has been deprecated in v0.10 and will be completly removed in v0.11."
-            " Instead, use the refactored version of `AveragePrecision` by specifying the `thresholds` argument.",
-            DeprecationWarning,
-        )
-        super().__init__(num_classes=num_classes, thresholds=thresholds, **kwargs)
-
-    def compute(self) -> Union[List[Tensor], Tensor]:  # type: ignore
-        precisions, recalls, _ = super().compute()
-        return _average_precision_compute_with_precision_recall(precisions, recalls, self.num_classes, average=None)
-
-
-class BinnedRecallAtFixedPrecision(BinnedPrecisionRecallCurve):
-    """Computes the higest possible recall value given the minimum precision thresholds provided.
-
-    Computation is performed in constant-memory by computing precision and recall
-    for ``thresholds`` buckets/thresholds (evenly distributed between 0 and 1).
-
-    .. warn:
-        This metric has been deprecated in v0.10 and will be removed in v0.11.
-        Instead use `RecallAtFixedPrecision` metric with the `thresholds` argument set accordingly.
-
-    Forward accepts
-
-    - ``preds`` (float tensor): ``(N, ...)`` (binary) or ``(N, C, ...)`` (multiclass) tensor
-      with probabilities, where C is the number of classes.
-
-    - ``target`` (long tensor): ``(N, ...)`` with integer labels
-
-    Args:
-        num_classes: integer with number of classes. Provide 1 for binary problems.
-        min_precision: float value specifying minimum precision threshold.
-        thresholds: list or tensor with specific thresholds or a number of bins from linear sampling.
-            It is used for computation will lead to more detailed curve and accurate estimates,
-            but will be slower and consume more memory
-
-        kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info.
-
-    Raises:
-        ValueError:
-            If ``thresholds`` is not a list or tensor
-
-    Example (binary case):
-        >>> from torchmetrics import BinnedRecallAtFixedPrecision
-        >>> pred = torch.tensor([0, 0.2, 0.5, 0.8])
-        >>> target = torch.tensor([0, 1, 1, 0])
-        >>> average_precision = BinnedRecallAtFixedPrecision(num_classes=1, thresholds=10, min_precision=0.5)
-        >>> average_precision(pred, target)
-        (tensor(1.0000), tensor(0.1111))
-
-    Example (multiclass case):
-        >>> pred = torch.tensor([[0.75, 0.05, 0.05, 0.05, 0.05],
-        ...                      [0.05, 0.75, 0.05, 0.05, 0.05],
-        ...                      [0.05, 0.05, 0.75, 0.05, 0.05],
-        ...                      [0.05, 0.05, 0.05, 0.75, 0.05]])
-        >>> target = torch.tensor([0, 1, 3, 2])
-        >>> average_precision = BinnedRecallAtFixedPrecision(num_classes=5, thresholds=10, min_precision=0.5)
-        >>> average_precision(pred, target)
-        (tensor([1.0000, 1.0000, 0.0000, 0.0000, 0.0000]),
-        tensor([6.6667e-01, 6.6667e-01, 1.0000e+06, 1.0000e+06, 1.0000e+06]))
-    """
-
-    def __init__(
-        self,
-        num_classes: int,
-        min_precision: float,
-        thresholds: Union[int, Tensor, List[float]] = 100,
-        **kwargs: Any,
-    ) -> None:
-        rank_zero_warn(
-            "Metric `BinnedRecallAtFixedPrecision` has been deprecated in v0.10 and will be completly removed in v0.11."
-            " Instead, use the refactored version of `RecallAtFixedPrecision` by specifying the `thresholds` argument.",
-            DeprecationWarning,
-        )
-        super().__init__(num_classes=num_classes, thresholds=thresholds, **kwargs)
-        self.min_precision = min_precision
-
-    def compute(self) -> Tuple[Tensor, Tensor]:  # type: ignore
-        """Returns float tensor of size n_classes."""
-        precisions, recalls, thresholds = super().compute()
-
-        if self.num_classes == 1:
-            return _recall_at_precision(precisions, recalls, thresholds, self.min_precision)
-
-        recalls_at_p = torch.zeros(self.num_classes, device=recalls[0].device, dtype=recalls[0].dtype)
-        thresholds_at_p = torch.zeros(self.num_classes, device=thresholds[0].device, dtype=thresholds[0].dtype)
-        for i in range(self.num_classes):
-            recalls_at_p[i], thresholds_at_p[i] = _recall_at_precision(
-                precisions[i], recalls[i], thresholds[i], self.min_precision
-            )
-        return recalls_at_p, thresholds_at_p
diff --git a/src/torchmetrics/classification/dice.py b/src/torchmetrics/classification/dice.py
index 771318851b1..9e025dac564 100644
--- a/src/torchmetrics/classification/dice.py
+++ b/src/torchmetrics/classification/dice.py
@@ -33,7 +33,7 @@ class Dice(StatScores):
 
     The reduction method (how the precision scores are aggregated) is controlled by the
     ``average`` parameter, and additionally by the ``mdmc_average`` parameter in the
-    multi-dimensional multi-class case. Accepts all inputs listed in :ref:`pages/classification:input types`.
+    multi-dimensional multi-class case.
 
     Args:
         num_classes:
@@ -69,11 +69,11 @@ class Dice(StatScores):
             - ``'samplewise'``: In this case, the statistics are computed separately for each
               sample on the ``N`` axis, and then averaged over samples.
               The computation for each sample is done by treating the flattened extra axes ``...``
-              (see :ref:`pages/classification:input types`) as the ``N`` dimension within the sample,
+              as the ``N`` dimension within the sample,
               and computing the metric for the sample based on that.
 
             - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs
-              (see :ref:`pages/classification:input types`) are flattened into a new ``N_X`` sample axis, i.e.
+              are flattened into a new ``N_X`` sample axis, i.e.
               the inputs are treated as if they were ``(N_X, C)``.
               From here on the ``average`` parameter applies as usual.
 
@@ -90,9 +90,7 @@ class Dice(StatScores):
 
         multiclass:
             Used only in certain special cases, where you want to treat inputs as a different type
-            than what they appear to be. See the parameter's
-            :ref:`documentation section <pages/classification:using the multiclass parameter>`
-            for a more detailed explanation and examples.
+            than what they appear to be.
 
         kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info.
 
diff --git a/src/torchmetrics/classification/f_beta.py b/src/torchmetrics/classification/f_beta.py
index dca2d378170..d968810ea9d 100644
--- a/src/torchmetrics/classification/f_beta.py
+++ b/src/torchmetrics/classification/f_beta.py
@@ -773,11 +773,10 @@ class FBetaScore(StatScores):
             - ``'samplewise'``: In this case, the statistics are computed separately for each
               sample on the ``N`` axis, and then averaged over samples.
               The computation for each sample is done by treating the flattened extra axes ``...``
-              (see :ref:`pages/classification:input types`) as the ``N`` dimension within the sample,
+              as the ``N`` dimension within the sample,
               and computing the metric for the sample based on that.
 
             - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs
-              (see :ref:`pages/classification:input types`)
               are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they
               were ``(N_X, C)``. From here on the ``average`` parameter applies as usual.
 
@@ -795,9 +794,7 @@ class FBetaScore(StatScores):
 
         multiclass:
             Used only in certain special cases, where you want to treat inputs as a different type
-            than what they appear to be. See the parameter's
-            :ref:`documentation section <pages/classification:using the multiclass parameter>`
-            for a more detailed explanation and examples.
+            than what they appear to be.
 
         kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info.
 
@@ -958,11 +955,10 @@ class F1Score(FBetaScore):
             - ``'samplewise'``: In this case, the statistics are computed separately for each
               sample on the ``N`` axis, and then averaged over samples.
               The computation for each sample is done by treating the flattened extra axes ``...``
-              (see :ref:`pages/classification:input types`) as the ``N`` dimension within the sample,
+              as the ``N`` dimension within the sample,
               and computing the metric for the sample based on that.
 
             - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs
-              (see :ref:`pages/classification:input types`)
               are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they
               were ``(N_X, C)``. From here on the ``average`` parameter applies as usual.
 
@@ -979,9 +975,7 @@ class F1Score(FBetaScore):
 
         multiclass:
             Used only in certain special cases, where you want to treat inputs as a different type
-            than what they appear to be. See the parameter's
-            :ref:`documentation section <pages/classification:using the multiclass parameter>`
-            for a more detailed explanation and examples.
+            than what they appear to be.
 
         kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info.
 
diff --git a/src/torchmetrics/classification/hamming.py b/src/torchmetrics/classification/hamming.py
index cd8bf36e291..ef6df5d9f65 100644
--- a/src/torchmetrics/classification/hamming.py
+++ b/src/torchmetrics/classification/hamming.py
@@ -340,8 +340,6 @@ class HammingDistance(Metric):
     treats each possible label separately - meaning that, for example, multi-class data is
     treated as if it were multi-label.
 
-    Accepts all input types listed in :ref:`pages/classification:input types`.
-
     Args:
         threshold:
             Threshold for transforming probability or logit predictions to binary ``(0,1)`` predictions, in the case
@@ -423,8 +421,6 @@ def __init__(
     def update(self, preds: Tensor, target: Tensor) -> None:  # type: ignore
         """Update state with predictions and targets.
 
-        See :ref:`pages/classification:input types` for more information on input types.
-
         Args:
             preds: Predictions from model (probabilities, logits or labels)
             target: Ground truth labels
diff --git a/src/torchmetrics/classification/kl_divergence.py b/src/torchmetrics/classification/kl_divergence.py
deleted file mode 100644
index deaaaa4d303..00000000000
--- a/src/torchmetrics/classification/kl_divergence.py
+++ /dev/null
@@ -1,78 +0,0 @@
-# Copyright The PyTorch Lightning team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from typing import Any
-
-from typing_extensions import Literal
-
-from torchmetrics.regression.kl_divergence import KLDivergence as _KLDivergence
-from torchmetrics.utilities.prints import rank_zero_warn
-
-
-class KLDivergence(_KLDivergence):
-    r"""Computes the `KL divergence`_:
-
-    .. math::
-        D_{KL}(P||Q) = \sum_{x\in\mathcal{X}} P(x) \log\frac{P(x)}{Q{x}}
-
-    Where :math:`P` and :math:`Q` are probability distributions where :math:`P` usually represents a distribution
-    over data and :math:`Q` is often a prior or approximation of :math:`P`. It should be noted that the KL divergence
-    is a non-symetrical metric i.e. :math:`D_{KL}(P||Q) \neq D_{KL}(Q||P)`.
-
-    Args:
-        p: data distribution with shape ``[N, d]``
-        q: prior or approximate distribution with shape ``[N, d]``
-        log_prob: bool indicating if input is log-probabilities or probabilities. If given as probabilities,
-            will normalize to make sure the distributes sum to 1.
-        reduction:
-            Determines how to reduce over the ``N``/batch dimension:
-
-            - ``'mean'`` [default]: Averages score across samples
-            - ``'sum'``: Sum score across samples
-            - ``'none'`` or ``None``: Returns score per sample
-
-        kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info.
-
-    .. note::
-        This metric have been moved to the regression package in v0.10 and this version will be removed in v0.11.
-
-    Raises:
-        TypeError:
-            If ``log_prob`` is not an ``bool``.
-        ValueError:
-            If ``reduction`` is not one of ``'mean'``, ``'sum'``, ``'none'`` or ``None``.
-
-    .. note::
-        Half precision is only support on GPU for this metric
-
-    Example:
-        >>> import torch
-        >>> from torchmetrics.functional import kl_divergence
-        >>> p = torch.tensor([[0.36, 0.48, 0.16]])
-        >>> q = torch.tensor([[1/3, 1/3, 1/3]])
-        >>> kl_divergence(p, q)
-        tensor(0.0853)
-    """
-
-    def __init__(
-        self,
-        log_prob: bool = False,
-        reduction: Literal["mean", "sum", "none", None] = "mean",
-        **kwargs: Any,
-    ) -> None:
-        super().__init__(log_prob, reduction, **kwargs)
-        rank_zero_warn(
-            "`torchmetrics.classification.KLDivergence` have been moved to `torchmetrics.regression.KLDivergence`"
-            " from v0.10 and this version will be removed in v0.11. Please update import paths.",
-            DeprecationWarning,
-        )
diff --git a/src/torchmetrics/classification/precision_recall.py b/src/torchmetrics/classification/precision_recall.py
index d05a38f3e99..cb2874d1efa 100644
--- a/src/torchmetrics/classification/precision_recall.py
+++ b/src/torchmetrics/classification/precision_recall.py
@@ -623,7 +623,7 @@ class Precision(StatScores):
 
     The reduction method (how the precision scores are aggregated) is controlled by the
     ``average`` parameter, and additionally by the ``mdmc_average`` parameter in the
-    multi-dimensional multi-class case. Accepts all inputs listed in :ref:`pages/classification:input types`.
+    multi-dimensional multi-class case.
 
     Args:
         num_classes:
@@ -657,11 +657,11 @@ class Precision(StatScores):
             - ``'samplewise'``: In this case, the statistics are computed separately for each
               sample on the ``N`` axis, and then averaged over samples.
               The computation for each sample is done by treating the flattened extra axes ``...``
-              (see :ref:`pages/classification:input types`) as the ``N`` dimension within the sample,
+              as the ``N`` dimension within the sample,
               and computing the metric for the sample based on that.
 
             - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs
-              (see :ref:`pages/classification:input types`) are flattened into a new ``N_X`` sample axis, i.e.
+              are flattened into a new ``N_X`` sample axis, i.e.
               the inputs are treated as if they were ``(N_X, C)``.
               From here on the ``average`` parameter applies as usual.
 
@@ -678,9 +678,7 @@ class Precision(StatScores):
 
         multiclass:
             Used only in certain special cases, where you want to treat inputs as a different type
-            than what they appear to be. See the parameter's
-            :ref:`documentation section <pages/classification:using the multiclass parameter>`
-            for a more detailed explanation and examples.
+            than what they appear to be.
 
         kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info.
 
@@ -813,7 +811,7 @@ class Recall(StatScores):
 
     The reduction method (how the recall scores are aggregated) is controlled by the
     ``average`` parameter, and additionally by the ``mdmc_average`` parameter in the
-    multi-dimensional multi-class case. Accepts all inputs listed in :ref:`pages/classification:input types`.
+    multi-dimensional multi-class case.
 
     Args:
         num_classes:
@@ -846,11 +844,10 @@ class Recall(StatScores):
             - ``'samplewise'``: In this case, the statistics are computed separately for each
               sample on the ``N`` axis, and then averaged over samples.
               The computation for each sample is done by treating the flattened extra axes ``...``
-              (see :ref:`pages/classification:input types`) as the ``N`` dimension within the sample,
+              as the ``N`` dimension within the sample,
               and computing the metric for the sample based on that.
 
             - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs
-              (see :ref:`pages/classification:input types`)
               are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they
               were ``(N_X, C)``. From here on the ``average`` parameter applies as usual.
 
@@ -868,9 +865,7 @@ class Recall(StatScores):
 
         multiclass:
             Used only in certain special cases, where you want to treat inputs as a different type
-            than what they appear to be. See the parameter's
-            :ref:`documentation section <pages/classification:using the multiclass parameter>`
-            for a more detailed explanation and examples.
+            than what they appear to be.
 
         kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info.
 
diff --git a/src/torchmetrics/classification/ranking.py b/src/torchmetrics/classification/ranking.py
index 3b7aba9ba34..863f191fbd1 100644
--- a/src/torchmetrics/classification/ranking.py
+++ b/src/torchmetrics/classification/ranking.py
@@ -17,12 +17,6 @@
 from torch import Tensor
 
 from torchmetrics.functional.classification.ranking import (
-    _coverage_error_compute,
-    _coverage_error_update,
-    _label_ranking_average_precision_compute,
-    _label_ranking_average_precision_update,
-    _label_ranking_loss_compute,
-    _label_ranking_loss_update,
     _multilabel_confusion_matrix_arg_validation,
     _multilabel_confusion_matrix_format,
     _multilabel_coverage_error_update,
@@ -243,171 +237,3 @@ def update(self, preds: Tensor, target: Tensor) -> None:  # type: ignore
 
     def compute(self) -> Tensor:
         return _ranking_reduce(self.measure, self.total)
-
-
-class CoverageError(Metric):
-    """Computes multilabel coverage error [1]. The score measure how far we need to go through the ranked scores to
-    cover all true labels. The best value is equal to the average number of labels in the target tensor per sample.
-
-    Args:
-        kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info.
-
-    Example:
-        >>> from torchmetrics import CoverageError
-        >>> _ = torch.manual_seed(42)
-        >>> preds = torch.rand(10, 5)
-        >>> target = torch.randint(2, (10, 5))
-        >>> metric = CoverageError()
-        >>> metric(preds, target)
-        tensor(3.9000)
-
-    References:
-        [1] Tsoumakas, G., Katakis, I., & Vlahavas, I. (2010). Mining multi-label data. In Data mining and
-        knowledge discovery handbook (pp. 667-685). Springer US.
-    """
-
-    higher_is_better: bool = False
-    is_differentiable: bool = False
-    full_state_update: bool = False
-    coverage: Tensor
-    numel: Tensor
-    weight: Tensor
-
-    def __init__(self, **kwargs: Any) -> None:
-        super().__init__(**kwargs)
-        self.add_state("coverage", torch.tensor(0.0), dist_reduce_fx="sum")
-        self.add_state("numel", torch.tensor(0.0), dist_reduce_fx="sum")
-        self.add_state("weight", torch.tensor(0.0), dist_reduce_fx="sum")
-
-    def update(self, preds: Tensor, target: Tensor, sample_weight: Optional[Tensor] = None) -> None:  # type: ignore
-        """
-        Args:
-            preds: tensor of shape ``[N,L]`` where ``N`` is the number of samples and ``L`` is the number
-                of labels. Should either be probabilities of the positive class or corresponding logits
-            target: tensor of shape ``[N,L]`` where ``N`` is the number of samples and ``L`` is the number
-                of labels. Should only contain binary labels.
-            sample_weight: tensor of shape ``N`` where ``N`` is the number of samples. How much each sample
-                should be weighted in the final score.
-        """
-        coverage, numel, sample_weight = _coverage_error_update(preds, target, sample_weight)
-        self.coverage += coverage
-        self.numel += numel
-        if sample_weight is not None:
-            self.weight += sample_weight
-
-    def compute(self) -> Tensor:
-        """Computes the multilabel coverage error."""
-        return _coverage_error_compute(self.coverage, self.numel, self.weight)
-
-
-class LabelRankingAveragePrecision(Metric):
-    """Computes label ranking average precision score for multilabel data [1].
-
-    The score is the average over each ground truth label assigned to each sample of the ratio of true vs.
-    total labels with lower score. Best score is 1.
-
-    Args:
-        kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info.
-
-    Example:
-        >>> from torchmetrics import LabelRankingAveragePrecision
-        >>> _ = torch.manual_seed(42)
-        >>> preds = torch.rand(10, 5)
-        >>> target = torch.randint(2, (10, 5))
-        >>> metric = LabelRankingAveragePrecision()
-        >>> metric(preds, target)
-        tensor(0.7744)
-
-    References:
-        [1] Tsoumakas, G., Katakis, I., & Vlahavas, I. (2010). Mining multi-label data. In Data mining and
-        knowledge discovery handbook (pp. 667-685). Springer US.
-    """
-
-    score: Tensor
-    numel: Tensor
-    sample_weight: Tensor
-    higher_is_better: bool = True
-    is_differentiable: bool = False
-    full_state_update: bool = False
-
-    def __init__(self, **kwargs: Any) -> None:
-        super().__init__(**kwargs)
-        self.add_state("score", torch.tensor(0.0), dist_reduce_fx="sum")
-        self.add_state("numel", torch.tensor(0.0), dist_reduce_fx="sum")
-        self.add_state("sample_weight", torch.tensor(0.0), dist_reduce_fx="sum")
-
-    def update(self, preds: Tensor, target: Tensor, sample_weight: Optional[Tensor] = None) -> None:  # type: ignore
-        """
-        Args:
-            preds: tensor of shape ``[N,L]`` where ``N`` is the number of samples and ``L`` is the number
-                of labels. Should either be probabilities of the positive class or corresponding logits
-            target: tensor of shape ``[N,L]`` where ``N`` is the number of samples and ``L`` is the number
-                of labels. Should only contain binary labels.
-            sample_weight: tensor of shape ``N`` where ``N`` is the number of samples. How much each sample
-                should be weighted in the final score.
-        """
-        score, numel, sample_weight = _label_ranking_average_precision_update(preds, target, sample_weight)
-        self.score += score
-        self.numel += numel
-        if sample_weight is not None:
-            self.sample_weight += sample_weight
-
-    def compute(self) -> Tensor:
-        """Computes the label ranking average precision score."""
-        return _label_ranking_average_precision_compute(self.score, self.numel, self.sample_weight)
-
-
-class LabelRankingLoss(Metric):
-    """Computes the label ranking loss for multilabel data [1]. The score is corresponds to the average number of
-    label pairs that are incorrectly ordered given some predictions weighted by the size of the label set and the
-    number of labels not in the label set. The best score is 0.
-
-    Args:
-        kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info.
-
-    Example:
-        >>> from torchmetrics import LabelRankingLoss
-        >>> _ = torch.manual_seed(42)
-        >>> preds = torch.rand(10, 5)
-        >>> target = torch.randint(2, (10, 5))
-        >>> metric = LabelRankingLoss()
-        >>> metric(preds, target)
-        tensor(0.4167)
-
-    References:
-        [1] Tsoumakas, G., Katakis, I., & Vlahavas, I. (2010). Mining multi-label data. In Data mining and
-        knowledge discovery handbook (pp. 667-685). Springer US.
-    """
-
-    loss: Tensor
-    numel: Tensor
-    sample_weight: Tensor
-    higher_is_better: bool = False
-    is_differentiable: bool = False
-    full_state_update: bool = False
-
-    def __init__(self, **kwargs: Any) -> None:
-        super().__init__(**kwargs)
-        self.add_state("loss", torch.tensor(0.0), dist_reduce_fx="sum")
-        self.add_state("numel", torch.tensor(0.0), dist_reduce_fx="sum")
-        self.add_state("sample_weight", torch.tensor(0.0), dist_reduce_fx="sum")
-
-    def update(self, preds: Tensor, target: Tensor, sample_weight: Optional[Tensor] = None) -> None:  # type: ignore
-        """
-        Args:
-            preds: tensor of shape ``[N,L]`` where ``N`` is the number of samples and ``L`` is the number
-                of labels. Should either be probabilities of the positive class or corresponding logits
-            target: tensor of shape ``[N,L]`` where ``N`` is the number of samples and ``L`` is the number
-                of labels. Should only contain binary labels.
-            sample_weight: tensor of shape ``N`` where ``N`` is the number of samples. How much each sample
-                should be weighted in the final score.
-        """
-        loss, numel, sample_weight = _label_ranking_loss_update(preds, target, sample_weight)
-        self.loss += loss
-        self.numel += numel
-        if sample_weight is not None:
-            self.sample_weight += sample_weight
-
-    def compute(self) -> Tensor:
-        """Computes the label ranking loss."""
-        return _label_ranking_loss_compute(self.loss, self.numel, self.sample_weight)
diff --git a/src/torchmetrics/classification/specificity.py b/src/torchmetrics/classification/specificity.py
index d7966653e70..0e618c5fccb 100644
--- a/src/torchmetrics/classification/specificity.py
+++ b/src/torchmetrics/classification/specificity.py
@@ -314,7 +314,7 @@ class Specificity(StatScores):
 
     The reduction method (how the specificity scores are aggregated) is controlled by the
     ``average`` parameter, and additionally by the ``mdmc_average`` parameter in the
-    multi-dimensional multi-class case. Accepts all inputs listed in :ref:`pages/classification:input types`.
+    multi-dimensional multi-class case.
 
     Args:
         num_classes:
@@ -348,11 +348,10 @@ class Specificity(StatScores):
             - ``'samplewise'``: In this case, the statistics are computed separately for each
               sample on the ``N`` axis, and then averaged over samples.
               The computation for each sample is done by treating the flattened extra axes ``...``
-              (see :ref:`pages/classification:input types`) as the ``N`` dimension within the sample,
+              as the ``N`` dimension within the sample,
               and computing the metric for the sample based on that.
 
             - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs
-              (see :ref:`pages/classification:input types`)
               are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they
               were ``(N_X, C)``. From here on the ``average`` parameter applies as usual.
 
@@ -371,9 +370,7 @@ class Specificity(StatScores):
 
         multiclass:
             Used only in certain special cases, where you want to treat inputs as a different type
-            than what they appear to be. See the parameter's
-            :ref:`documentation section <pages/classification:using the multiclass parameter>`
-            for a more detailed explanation and examples.
+            than what they appear to be.
 
         kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info.
 
diff --git a/src/torchmetrics/classification/stat_scores.py b/src/torchmetrics/classification/stat_scores.py
index c358ca63262..53d9b414653 100644
--- a/src/torchmetrics/classification/stat_scores.py
+++ b/src/torchmetrics/classification/stat_scores.py
@@ -503,7 +503,7 @@ class StatScores(Metric):
     ``reduce`` parameter, and additionally by the ``mdmc_reduce`` parameter in the
     multi-dimensional multi-class case.
 
-    Accepts all inputs listed in :ref:`pages/classification:input types`.
+
 
     Args:
         threshold:
@@ -539,7 +539,7 @@ class StatScores(Metric):
         mdmc_reduce: Defines how the multi-dimensional multi-class inputs are handeled. Should be one of the following:
 
             - ``None`` [default]: Should be left unchanged if your data is not multi-dimensional
-              multi-class (see :ref:`pages/classification:input types` for the definition of input types).
+              multi-class.
 
             - ``'samplewise'``: In this case, the statistics are computed separately for each
               sample on the ``N`` axis, and then the outputs are concatenated together. In each
@@ -553,9 +553,7 @@ class StatScores(Metric):
 
         multiclass:
             Used only in certain special cases, where you want to treat inputs as a different type
-            than what they appear to be. See the parameter's
-            :ref:`documentation section <pages/classification:using the multiclass parameter>`
-            for a more detailed explanation and examples.
+            than what they appear to be.
 
         kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info.
 
@@ -690,8 +688,6 @@ def __init__(
     def update(self, preds: Tensor, target: Tensor) -> None:  # type: ignore
         """Update state with predictions and targets.
 
-        See :ref:`pages/classification:input types` for more information on input types.
-
         Args:
             preds: Predictions from model (probabilities, logits or labels)
             target: Ground truth values
diff --git a/src/torchmetrics/functional/__init__.py b/src/torchmetrics/functional/__init__.py
index 44245a2d6cd..ca83c1c5f75 100644
--- a/src/torchmetrics/functional/__init__.py
+++ b/src/torchmetrics/functional/__init__.py
@@ -15,7 +15,6 @@
 from torchmetrics.functional.audio.sdr import scale_invariant_signal_distortion_ratio, signal_distortion_ratio
 from torchmetrics.functional.audio.snr import scale_invariant_signal_noise_ratio, signal_noise_ratio
 from torchmetrics.functional.classification.accuracy import accuracy
-from torchmetrics.functional.classification.auc import auc
 from torchmetrics.functional.classification.auroc import auroc
 from torchmetrics.functional.classification.average_precision import average_precision
 from torchmetrics.functional.classification.calibration_error import calibration_error
@@ -29,11 +28,6 @@
 from torchmetrics.functional.classification.matthews_corrcoef import matthews_corrcoef
 from torchmetrics.functional.classification.precision_recall import precision, precision_recall, recall
 from torchmetrics.functional.classification.precision_recall_curve import precision_recall_curve
-from torchmetrics.functional.classification.ranking import (
-    coverage_error,
-    label_ranking_average_precision,
-    label_ranking_loss,
-)
 from torchmetrics.functional.classification.roc import roc
 from torchmetrics.functional.classification.specificity import specificity
 from torchmetrics.functional.classification.stat_scores import stat_scores
@@ -96,7 +90,6 @@
 
 __all__ = [
     "accuracy",
-    "auc",
     "auroc",
     "average_precision",
     "bleu_score",
@@ -107,7 +100,6 @@
     "cohen_kappa",
     "confusion_matrix",
     "cosine_similarity",
-    "coverage_error",
     "tweedie_deviance_score",
     "dice_score",
     "dice",
@@ -121,8 +113,6 @@
     "image_gradients",
     "jaccard_index",
     "kl_divergence",
-    "label_ranking_average_precision",
-    "label_ranking_loss",
     "match_error_rate",
     "matthews_corrcoef",
     "mean_absolute_error",
diff --git a/src/torchmetrics/functional/classification/__init__.py b/src/torchmetrics/functional/classification/__init__.py
index 82932c0d6e3..e772f93aa80 100644
--- a/src/torchmetrics/functional/classification/__init__.py
+++ b/src/torchmetrics/functional/classification/__init__.py
@@ -17,7 +17,6 @@
     multiclass_accuracy,
     multilabel_accuracy,
 )
-from torchmetrics.functional.classification.auc import auc  # noqa: F401
 from torchmetrics.functional.classification.auroc import (  # noqa: F401
     auroc,
     binary_auroc,
@@ -99,9 +98,6 @@
     precision_recall_curve,
 )
 from torchmetrics.functional.classification.ranking import (  # noqa: F401
-    coverage_error,
-    label_ranking_average_precision,
-    label_ranking_loss,
     multilabel_coverage_error,
     multilabel_ranking_average_precision,
     multilabel_ranking_loss,
diff --git a/src/torchmetrics/functional/classification/accuracy.py b/src/torchmetrics/functional/classification/accuracy.py
index fa641ba59c7..9fa0ddc64d9 100644
--- a/src/torchmetrics/functional/classification/accuracy.py
+++ b/src/torchmetrics/functional/classification/accuracy.py
@@ -660,8 +660,6 @@ def accuracy(
     changed to subset accuracy (which requires all labels or sub-samples in the sample to
     be correctly predicted) by setting ``subset_accuracy=True``.
 
-    Accepts all input types listed in :ref:`pages/classification:input types`.
-
     Args:
         preds: Predictions from model (probabilities, logits or labels)
         target: Ground truth labels
@@ -693,11 +691,11 @@ def accuracy(
             - ``'samplewise'``: In this case, the statistics are computed separately for each
               sample on the ``N`` axis, and then averaged over samples.
               The computation for each sample is done by treating the flattened extra axes ``...``
-              (see :ref:`pages/classification:input types`) as the ``N`` dimension within the sample,
+              as the ``N`` dimension within the sample,
               and computing the metric for the sample based on that.
 
             - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs
-              (see :ref:`pages/classification:input types`)
+
               are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they
               were ``(N_X, C)``. From here on the ``average`` parameter applies as usual.
 
@@ -715,9 +713,7 @@ def accuracy(
             Should be left at default (``None``) for all other types of inputs.
         multiclass:
             Used only in certain special cases, where you want to treat inputs as a different type
-            than what they appear to be. See the parameter's
-            :ref:`documentation section <pages/classification:using the multiclass parameter>`
-            for a more detailed explanation and examples.
+            than what they appear to be.
         ignore_index:
             Integer specifying a target class to ignore. If given, this class index does not contribute
             to the returned score, regardless of reduction method. If an index is ignored, and ``average=None``
diff --git a/src/torchmetrics/functional/classification/auc.py b/src/torchmetrics/functional/classification/auc.py
deleted file mode 100644
index 4d62697c9ac..00000000000
--- a/src/torchmetrics/functional/classification/auc.py
+++ /dev/null
@@ -1,57 +0,0 @@
-# Copyright The PyTorch Lightning team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import torch
-from torch import Tensor
-
-from torchmetrics.utilities.compute import auc as _auc
-from torchmetrics.utilities.prints import rank_zero_warn
-
-
-def auc(x: Tensor, y: Tensor, reorder: bool = False) -> Tensor:
-    """Computes Area Under the Curve (AUC) using the trapezoidal rule.
-
-    .. note::
-        This metric have been moved to `torchmetrics.utilities.compute` in v0.10 this version will be removed in v0.11.
-
-    Args:
-        x: x-coordinates, must be either increasing or decreasing
-        y: y-coordinates
-        reorder: if True, will reorder the arrays to make it either increasing or decreasing
-
-    Return:
-        Tensor containing AUC score
-
-    Raises:
-        ValueError:
-            If both ``x`` and ``y`` tensors are not ``1d``.
-        ValueError:
-            If both ``x`` and ``y`` don't have the same numnber of elements.
-        ValueError:
-            If ``x`` tesnsor is neither increasing nor decreasing.
-
-    Example:
-        >>> from torchmetrics.functional import auc
-        >>> x = torch.tensor([0, 1, 2, 3])
-        >>> y = torch.tensor([0, 1, 2, 2])
-        >>> auc(x, y)
-        tensor(4.)
-        >>> auc(x, y, reorder=True)
-        tensor(4.)
-    """
-    rank_zero_warn(
-        "`torchmetrics.functional.auc` has been move to `torchmetrics.utilities.compute` in v0.10"
-        " and will be removed in v0.11.",
-        DeprecationWarning,
-    )
-    return _auc(x, y, reorder=reorder)
diff --git a/src/torchmetrics/functional/classification/dice.py b/src/torchmetrics/functional/classification/dice.py
index 602d4a1d1d4..3449c182913 100644
--- a/src/torchmetrics/functional/classification/dice.py
+++ b/src/torchmetrics/functional/classification/dice.py
@@ -178,7 +178,7 @@ def dice(
 
     The reduction method (how the recall scores are aggregated) is controlled by the
     ``average`` parameter, and additionally by the ``mdmc_average`` parameter in the
-    multi-dimensional multi-class case. Accepts all inputs listed in :ref:`pages/classification:input types`.
+    multi-dimensional multi-class case.
 
     Args:
         preds: Predictions from model (probabilities, logits or labels)
@@ -213,11 +213,10 @@ def dice(
             - ``'samplewise'``: In this case, the statistics are computed separately for each
               sample on the ``N`` axis, and then averaged over samples.
               The computation for each sample is done by treating the flattened extra axes ``...``
-              (see :ref:`pages/classification:input types`) as the ``N`` dimension within the sample,
+              as the ``N`` dimension within the sample,
               and computing the metric for the sample based on that.
 
             - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs
-              (see :ref:`pages/classification:input types`)
               are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they
               were ``(N_X, C)``. From here on the ``average`` parameter applies as usual.
 
@@ -240,9 +239,7 @@ def dice(
             Should be left at default (``None``) for all other types of inputs.
         multiclass:
             Used only in certain special cases, where you want to treat inputs as a different type
-            than what they appear to be. See the parameter's
-            :ref:`documentation section <pages/classification:using the multiclass parameter>`
-            for a more detailed explanation and examples.
+            than what they appear to be.
 
     Return:
         The shape of the returned tensor depends on the ``average`` parameter
diff --git a/src/torchmetrics/functional/classification/f_beta.py b/src/torchmetrics/functional/classification/f_beta.py
index 7448b828ca0..9e142741ec5 100644
--- a/src/torchmetrics/functional/classification/f_beta.py
+++ b/src/torchmetrics/functional/classification/f_beta.py
@@ -822,7 +822,7 @@ def fbeta_score(
 
     The reduction method (how the precision scores are aggregated) is controlled by the
     ``average`` parameter, and additionally by the ``mdmc_average`` parameter in the
-    multi-dimensional multi-class case. Accepts all inputs listed in :ref:`pages/classification:input types`.
+    multi-dimensional multi-class case.
 
     Args:
         preds: Predictions from model (probabilities, logits or labels)
@@ -856,10 +856,9 @@ def fbeta_score(
                 - ``'samplewise'``: In this case, the statistics are computed separately for each
                   sample on the ``N`` axis, and then averaged over samples.
                   The computation for each sample is done by treating the flattened extra axes ``...``
-                  (see :ref:`pages/classification:input types`) as the ``N`` dimension within the sample,
+                  as the ``N`` dimension within the sample,
                   and computing the metric for the sample based on that.
                 - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs
-                  (see :ref:`pages/classification:input types`)
                   are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they
                   were ``(N_X, C)``. From here on the ``average`` parameter applies as usual.
 
@@ -880,9 +879,7 @@ def fbeta_score(
             Should be left at default (``None``) for all other types of inputs.
         multiclass:
             Used only in certain special cases, where you want to treat inputs as a different type
-            than what they appear to be. See the parameter's
-            :ref:`documentation section <pages/classification:using the multiclass parameter>`
-            for a more detailed explanation and examples.
+            than what they appear to be.
 
     Return:
         The shape of the returned tensor depends on the ``average`` parameter
@@ -992,7 +989,7 @@ def f1_score(
 
     The reduction method (how the precision scores are aggregated) is controlled by the
     ``average`` parameter, and additionally by the ``mdmc_average`` parameter in the
-    multi-dimensional multi-class case. Accepts all inputs listed in :ref:`pages/classification:input types`.
+    multi-dimensional multi-class case.
 
     Args:
         preds: Predictions from model (probabilities, logits or labels)
@@ -1026,11 +1023,10 @@ def f1_score(
             - ``'samplewise'``: In this case, the statistics are computed separately for each
               sample on the ``N`` axis, and then averaged over samples.
               The computation for each sample is done by treating the flattened extra axes ``...``
-              (see :ref:`pages/classification:input types`) as the ``N`` dimension within the sample,
+              as the ``N`` dimension within the sample,
               and computing the metric for the sample based on that.
 
             - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs
-              (see :ref:`pages/classification:input types`)
               are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they
               were ``(N_X, C)``. From here on the ``average`` parameter applies as usual.
 
@@ -1054,9 +1050,7 @@ def f1_score(
 
         multiclass:
             Used only in certain special cases, where you want to treat inputs as a different type
-            than what they appear to be. See the parameter's
-            :ref:`documentation section <pages/classification:using the multiclass parameter>`
-            for a more detailed explanation and examples.
+            than what they appear to be.
 
     Return:
         The shape of the returned tensor depends on the ``average`` parameter
diff --git a/src/torchmetrics/functional/classification/hamming.py b/src/torchmetrics/functional/classification/hamming.py
index 2d0b0ef7f8d..9433777f529 100644
--- a/src/torchmetrics/functional/classification/hamming.py
+++ b/src/torchmetrics/functional/classification/hamming.py
@@ -462,8 +462,6 @@ def hamming_distance(
     treats each possible label separately - meaning that, for example, multi-class data is
     treated as if it were multi-label.
 
-    Accepts all input types listed in :ref:`pages/classification:input types`.
-
     Args:
         preds: Predictions from model (probabilities, logits or labels)
         target: Ground truth
diff --git a/src/torchmetrics/functional/classification/kl_divergence.py b/src/torchmetrics/functional/classification/kl_divergence.py
deleted file mode 100644
index 69853be74f4..00000000000
--- a/src/torchmetrics/functional/classification/kl_divergence.py
+++ /dev/null
@@ -1,61 +0,0 @@
-# Copyright The PyTorch Lightning team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from torch import Tensor
-from typing_extensions import Literal
-
-from torchmetrics.functional.regression.kl_divergence import kl_divergence as _kl_divergence
-from torchmetrics.utilities.prints import rank_zero_warn
-
-
-def kl_divergence(
-    p: Tensor, q: Tensor, log_prob: bool = False, reduction: Literal["mean", "sum", "none", None] = "mean"
-) -> Tensor:
-    r"""Computes `KL divergence`_
-
-    .. math::
-        D_{KL}(P||Q) = \sum_{x\in\mathcal{X}} P(x) \log\frac{P(x)}{Q{x}}
-
-    Where :math:`P` and :math:`Q` are probability distributions where :math:`P` usually represents a distribution
-    over data and :math:`Q` is often a prior or approximation of :math:`P`. It should be noted that the KL divergence
-    is a non-symetrical metric i.e. :math:`D_{KL}(P||Q) \neq D_{KL}(Q||P)`.
-
-    .. note::
-        This metric have been moved to the regression package in v0.10 and this version will be removed in v0.11.
-
-    Args:
-        p: data distribution with shape ``[N, d]``
-        q: prior or approximate distribution with shape ``[N, d]``
-        log_prob: bool indicating if input is log-probabilities or probabilities. If given as probabilities,
-            will normalize to make sure the distributes sum to 1
-        reduction:
-            Determines how to reduce over the ``N``/batch dimension:
-
-            - ``'mean'`` [default]: Averages score across samples
-            - ``'sum'``: Sum score across samples
-            - ``'none'`` or ``None``: Returns score per sample
-
-    Example:
-        >>> import torch
-        >>> p = torch.tensor([[0.36, 0.48, 0.16]])
-        >>> q = torch.tensor([[1/3, 1/3, 1/3]])
-        >>> kl_divergence(p, q)
-        tensor(0.0853)
-    """
-    rank_zero_warn(
-        "`torchmetrics.functional.classification.kl_divergence` have been moved to"
-        "`torchmetrics.functional.regression.kl_divergence` from v0.10 and this version will be removed in v0.11."
-        "Please update import paths.",
-        DeprecationWarning,
-    )
-    return _kl_divergence(p, q, log_prob, reduction)
diff --git a/src/torchmetrics/functional/classification/precision_recall.py b/src/torchmetrics/functional/classification/precision_recall.py
index 4fa6801b0c5..8052504c9de 100644
--- a/src/torchmetrics/functional/classification/precision_recall.py
+++ b/src/torchmetrics/functional/classification/precision_recall.py
@@ -738,7 +738,7 @@ def precision(
 
     The reduction method (how the precision scores are aggregated) is controlled by the
     ``average`` parameter, and additionally by the ``mdmc_average`` parameter in the
-    multi-dimensional multi-class case. Accepts all inputs listed in :ref:`pages/classification:input types`.
+    multi-dimensional multi-class case.
 
     Args:
         preds: Predictions from model (probabilities, logits or labels)
@@ -771,11 +771,10 @@ def precision(
             - ``'samplewise'``: In this case, the statistics are computed separately for each
               sample on the ``N`` axis, and then averaged over samples.
               The computation for each sample is done by treating the flattened extra axes ``...``
-              (see :ref:`pages/classification:input types`) as the ``N`` dimension within the sample,
+              as the ``N`` dimension within the sample,
               and computing the metric for the sample based on that.
 
             - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs
-              (see :ref:`pages/classification:input types`)
               are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they
               were ``(N_X, C)``. From here on the ``average`` parameter applies as usual.
 
@@ -798,9 +797,7 @@ def precision(
             Should be left at default (``None``) for all other types of inputs.
         multiclass:
             Used only in certain special cases, where you want to treat inputs as a different type
-            than what they appear to be. See the parameter's
-            :ref:`documentation section <pages/classification:using the multiclass parameter>`
-            for a more detailed explanation and examples.
+            than what they appear to be.
 
     Return:
         The shape of the returned tensor depends on the ``average`` parameter
@@ -969,7 +966,7 @@ def recall(
 
     The reduction method (how the recall scores are aggregated) is controlled by the
     ``average`` parameter, and additionally by the ``mdmc_average`` parameter in the
-    multi-dimensional multi-class case. Accepts all inputs listed in :ref:`pages/classification:input types`.
+    multi-dimensional multi-class case.
 
     Args:
         preds: Predictions from model (probabilities, logits or labels)
@@ -1003,11 +1000,10 @@ def recall(
             - ``'samplewise'``: In this case, the statistics are computed separately for each
               sample on the ``N`` axis, and then averaged over samples.
               The computation for each sample is done by treating the flattened extra axes ``...``
-              (see :ref:`pages/classification:input types`) as the ``N`` dimension within the sample,
+              as the ``N`` dimension within the sample,
               and computing the metric for the sample based on that.
 
             - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs
-              (see :ref:`pages/classification:input types`)
               are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they
               were ``(N_X, C)``. From here on the ``average`` parameter applies as usual.
 
@@ -1030,9 +1026,7 @@ def recall(
             Should be left at default (``None``) for all other types of inputs.
         multiclass:
             Used only in certain special cases, where you want to treat inputs as a different type
-            than what they appear to be. See the parameter's
-            :ref:`documentation section <pages/classification:using the multiclass parameter>`
-            for a more detailed explanation and examples.
+            than what they appear to be.
 
     Return:
         The shape of the returned tensor depends on the ``average`` parameter
@@ -1139,7 +1133,7 @@ def precision_recall(
 
     The reduction method (how the recall scores are aggregated) is controlled by the
     ``average`` parameter, and additionally by the ``mdmc_average`` parameter in the
-    multi-dimensional multi-class case. Accepts all inputs listed in :ref:`pages/classification:input types`.
+    multi-dimensional multi-class case.
 
     Args:
         preds: Predictions from model (probabilities, logits or labels)
@@ -1172,11 +1166,10 @@ def precision_recall(
             - ``'samplewise'``: In this case, the statistics are computed separately for each
               sample on the ``N`` axis, and then averaged over samples.
               The computation for each sample is done by treating the flattened extra axes ``...``
-              (see :ref:`pages/classification:input types`) as the ``N`` dimension within the sample,
+              as the ``N`` dimension within the sample,
               and computing the metric for the sample based on that.
 
             - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs
-              (see :ref:`pages/classification:input types`)
               are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they
               were ``(N_X, C)``. From here on the ``average`` parameter applies as usual.
 
@@ -1197,9 +1190,7 @@ def precision_recall(
             Should be left at default (``None``) for all other types of inputs.
         multiclass:
             Used only in certain special cases, where you want to treat inputs as a different type
-            than what they appear to be. See the parameter's
-            :ref:`documentation section <pages/classification:using the multiclass parameter>`
-            for a more detailed explanation and examples.
+            than what they appear to be.
 
     Return:
         The function returns a tuple with two elements: precision and recall. Their shape
diff --git a/src/torchmetrics/functional/classification/ranking.py b/src/torchmetrics/functional/classification/ranking.py
index 635d6ac1b8a..51b941c9163 100644
--- a/src/torchmetrics/functional/classification/ranking.py
+++ b/src/torchmetrics/functional/classification/ranking.py
@@ -256,219 +256,3 @@ def multilabel_ranking_loss(
     )
     loss, n_elements = _multilabel_ranking_loss_update(preds, target)
     return _ranking_reduce(loss, n_elements)
-
-
-def _check_ranking_input(preds: Tensor, target: Tensor, sample_weight: Optional[Tensor] = None) -> Tensor:
-    """Check that ranking input have the correct dimensions."""
-    if preds.ndim != 2 or target.ndim != 2:
-        raise ValueError(
-            "Expected both predictions and target to matrices of shape `[N,C]`"
-            f" but got {preds.ndim} and {target.ndim}"
-        )
-    if preds.shape != target.shape:
-        raise ValueError("Expected both predictions and target to have same shape")
-    if sample_weight is not None:
-        if sample_weight.ndim != 1 or sample_weight.shape[0] != preds.shape[0]:
-            raise ValueError(
-                "Expected sample weights to be 1 dimensional and have same size"
-                f" as the first dimension of preds and target but got {sample_weight.shape}"
-            )
-
-
-def _coverage_error_update(
-    preds: Tensor, target: Tensor, sample_weight: Optional[Tensor] = None
-) -> Tuple[Tensor, int, Optional[Tensor]]:
-    """Accumulate state for coverage error
-    Args:
-        preds: tensor with predictions
-        target: tensor with ground truth labels
-        sample_weight: optional tensor with weight for each sample
-
-    """
-    _check_ranking_input(preds, target, sample_weight)
-    offset = torch.zeros_like(preds)
-    offset[target == 0] = preds.min().abs() + 10  # Any number >1 works
-    preds_mod = preds + offset
-    preds_min = preds_mod.min(dim=1)[0]
-    coverage = (preds >= preds_min[:, None]).sum(dim=1).to(torch.float32)
-    if isinstance(sample_weight, Tensor):
-        coverage *= sample_weight
-        sample_weight = sample_weight.sum()
-    return coverage.sum(), coverage.numel(), sample_weight
-
-
-def _coverage_error_compute(coverage: Tensor, n_elements: int, sample_weight: Optional[Tensor] = None) -> Tensor:
-    if sample_weight is not None and sample_weight != 0.0:
-        return coverage / sample_weight
-    return coverage / n_elements
-
-
-def coverage_error(preds: Tensor, target: Tensor, sample_weight: Optional[Tensor] = None) -> Tensor:
-    """Computes multilabel coverage error [1]. The score measure how far we need to go through the ranked scores to
-    cover all true labels. The best value is equal to the average number of labels in the target tensor per sample.
-
-    Args:
-        preds: tensor of shape ``[N,L]`` where ``N`` is the number of samples and ``L`` is the number
-            of labels. Should either be probabilities of the positive class or corresponding logits
-        target: tensor of shape ``[N,L]`` where ``N`` is the number of samples and ``L`` is the number
-            of labels. Should only contain binary labels.
-        sample_weight: tensor of shape ``N`` where ``N`` is the number of samples. How much each sample
-            should be weighted in the final score.
-
-    Example:
-        >>> from torchmetrics.functional import coverage_error
-        >>> _ = torch.manual_seed(42)
-        >>> preds = torch.rand(10, 5)
-        >>> target = torch.randint(2, (10, 5))
-        >>> coverage_error(preds, target)
-        tensor(3.9000)
-
-    References:
-        [1] Tsoumakas, G., Katakis, I., & Vlahavas, I. (2010). Mining multi-label data. In Data mining and
-        knowledge discovery handbook (pp. 667-685). Springer US.
-    """
-    coverage, n_elements, sample_weight = _coverage_error_update(preds, target, sample_weight)
-    return _coverage_error_compute(coverage, n_elements, sample_weight)
-
-
-def _label_ranking_average_precision_update(
-    preds: Tensor, target: Tensor, sample_weight: Optional[Tensor] = None
-) -> Tuple[Tensor, int, Optional[Tensor]]:
-    """Accumulate state for label ranking average precision.
-
-    Args:
-        preds: tensor with predictions
-        target: tensor with ground truth labels
-        sample_weight: optional tensor with weight for each sample
-    """
-    _check_ranking_input(preds, target, sample_weight)
-    # Invert so that the highest score receives rank 1
-    neg_preds = -preds
-
-    score = torch.tensor(0.0, device=neg_preds.device)
-    n_preds, n_labels = neg_preds.shape
-    for i in range(n_preds):
-        relevant = target[i] == 1
-        ranking = _rank_data(neg_preds[i][relevant]).float()
-        if len(ranking) > 0 and len(ranking) < n_labels:
-            rank = _rank_data(neg_preds[i])[relevant].float()
-            score_idx = (ranking / rank).mean()
-        else:
-            score_idx = 1.0
-
-        if sample_weight is not None:
-            score_idx *= sample_weight[i]
-
-        score += score_idx
-
-    return score, n_preds, sample_weight.sum() if isinstance(sample_weight, Tensor) else sample_weight
-
-
-def _label_ranking_average_precision_compute(
-    score: Tensor, n_elements: int, sample_weight: Optional[Tensor] = None
-) -> Tensor:
-    """Computes the final label ranking average precision score."""
-    if sample_weight is not None and sample_weight != 0.0:
-        return score / sample_weight
-    return score / n_elements
-
-
-def label_ranking_average_precision(preds: Tensor, target: Tensor, sample_weight: Optional[Tensor] = None) -> Tensor:
-    """Computes label ranking average precision score for multilabel data [1]. The score is the average over each
-    ground truth label assigned to each sample of the ratio of true vs. total labels with lower score. Best score
-    is 1.
-
-    Args:
-        preds: tensor of shape ``[N,L]`` where ``N`` is the number of samples and ``L`` is the number
-            of labels. Should either be probabilities of the positive class or corresponding logits
-        target: tensor of shape ``[N,L]`` where ``N`` is the number of samples and ``L`` is the number
-            of labels. Should only contain binary labels.
-        sample_weight: tensor of shape ``N`` where ``N`` is the number of samples. How much each sample
-            should be weighted in the final score.
-
-    Example:
-        >>> from torchmetrics.functional import label_ranking_average_precision
-        >>> _ = torch.manual_seed(42)
-        >>> preds = torch.rand(10, 5)
-        >>> target = torch.randint(2, (10, 5))
-        >>> label_ranking_average_precision(preds, target)
-        tensor(0.7744)
-
-    References:
-        [1] Tsoumakas, G., Katakis, I., & Vlahavas, I. (2010). Mining multi-label data. In Data mining and
-        knowledge discovery handbook (pp. 667-685). Springer US.
-    """
-    score, n_elements, sample_weight = _label_ranking_average_precision_update(preds, target, sample_weight)
-    return _label_ranking_average_precision_compute(score, n_elements, sample_weight)
-
-
-def _label_ranking_loss_update(
-    preds: Tensor, target: Tensor, sample_weight: Optional[Tensor] = None
-) -> Tuple[Tensor, int, Optional[Tensor]]:
-    """Accumulate state for label ranking loss.
-
-    Args:
-        preds: tensor with predictions
-        target: tensor with ground truth labels
-        sample_weight: optional tensor with weight for each sample
-    """
-    _check_ranking_input(preds, target, sample_weight)
-    n_preds, n_labels = preds.shape
-    relevant = target == 1
-    n_relevant = relevant.sum(dim=1)
-
-    # Ignore instances where number of true labels is 0 or n_labels
-    mask = (n_relevant > 0) & (n_relevant < n_labels)
-    preds = preds[mask]
-    relevant = relevant[mask]
-    n_relevant = n_relevant[mask]
-
-    # Nothing is relevant
-    if len(preds) == 0:
-        return torch.tensor(0.0, device=preds.device), 1, sample_weight
-
-    inverse = preds.argsort(dim=1).argsort(dim=1)
-    per_label_loss = ((n_labels - inverse) * relevant).to(torch.float32)
-    correction = 0.5 * n_relevant * (n_relevant + 1)
-    denom = n_relevant * (n_labels - n_relevant)
-    loss = (per_label_loss.sum(dim=1) - correction) / denom
-    if isinstance(sample_weight, Tensor):
-        loss *= sample_weight[mask]
-        sample_weight = sample_weight.sum()
-    return loss.sum(), n_preds, sample_weight
-
-
-def _label_ranking_loss_compute(loss: Tensor, n_elements: int, sample_weight: Optional[Tensor] = None) -> Tensor:
-    """Computes the final label ranking loss."""
-    if sample_weight is not None and sample_weight != 0.0:
-        return loss / sample_weight
-    return loss / n_elements
-
-
-def label_ranking_loss(preds: Tensor, target: Tensor, sample_weight: Optional[Tensor] = None) -> Tensor:
-    """Computes the label ranking loss for multilabel data [1]. The score is corresponds to the average number of
-    label pairs that are incorrectly ordered given some predictions weighted by the size of the label set and the
-    number of labels not in the label set. The best score is 0.
-
-    Args:
-        preds: tensor of shape ``[N,L]`` where ``N`` is the number of samples and ``L`` is the number
-            of labels. Should either be probabilities of the positive class or corresponding logits
-        target: tensor of shape ``[N,L]`` where ``N`` is the number of samples and ``L`` is the number
-            of labels. Should only contain binary labels.
-        sample_weight: tensor of shape ``N`` where ``N`` is the number of samples. How much each sample
-            should be weighted in the final score.
-
-    Example:
-        >>> from torchmetrics.functional import label_ranking_loss
-        >>> _ = torch.manual_seed(42)
-        >>> preds = torch.rand(10, 5)
-        >>> target = torch.randint(2, (10, 5))
-        >>> label_ranking_loss(preds, target)
-        tensor(0.4167)
-
-    References:
-        [1] Tsoumakas, G., Katakis, I., & Vlahavas, I. (2010). Mining multi-label data. In Data mining and
-        knowledge discovery handbook (pp. 667-685). Springer US.
-    """
-    loss, n_element, sample_weight = _label_ranking_loss_update(preds, target, sample_weight)
-    return _label_ranking_loss_compute(loss, n_element, sample_weight)
diff --git a/src/torchmetrics/functional/classification/specificity.py b/src/torchmetrics/functional/classification/specificity.py
index b78757d0be1..d155b1b9dcf 100644
--- a/src/torchmetrics/functional/classification/specificity.py
+++ b/src/torchmetrics/functional/classification/specificity.py
@@ -437,7 +437,7 @@ def specificity(
 
     The reduction method (how the specificity scores are aggregated) is controlled by the
     ``average`` parameter, and additionally by the ``mdmc_average`` parameter in the
-    multi-dimensional multi-class case. Accepts all inputs listed in :ref:`pages/classification:input types`.
+    multi-dimensional multi-class case.
 
     Args:
         preds: Predictions from model (probabilities, or labels)
@@ -470,11 +470,10 @@ def specificity(
             - ``'samplewise'``: In this case, the statistics are computed separately for each
               sample on the ``N`` axis, and then averaged over samples.
               The computation for each sample is done by treating the flattened extra axes ``...``
-              (see :ref:`pages/classification:input types`) as the ``N`` dimension within the sample,
+              as the ``N`` dimension within the sample,
               and computing the metric for the sample based on that.
 
             - ``'global'``: In this case the ``N`` and ``...`` dimensions of the inputs
-              (see :ref:`pages/classification:input types`)
               are flattened into a new ``N_X`` sample axis, i.e. the inputs are treated as if they
               were ``(N_X, C)``. From here on the ``average`` parameter applies as usual.
 
@@ -496,9 +495,7 @@ def specificity(
             Should be left unset (``None``) for inputs with label predictions.
         multiclass:
             Used only in certain special cases, where you want to treat inputs as a different type
-            than what they appear to be. See the parameter's
-            :ref:`documentation section <pages/classification:using the multiclass parameter>`
-            for a more detailed explanation and examples.
+            than what they appear to be.
 
     Return:
         The shape of the returned tensor depends on the ``average`` parameter
diff --git a/src/torchmetrics/functional/classification/stat_scores.py b/src/torchmetrics/functional/classification/stat_scores.py
index ab41fc6dee8..9909f095b1c 100644
--- a/src/torchmetrics/functional/classification/stat_scores.py
+++ b/src/torchmetrics/functional/classification/stat_scores.py
@@ -1107,7 +1107,7 @@ def stat_scores(
 
     The reduction method (how the statistics are aggregated) is controlled by the
     ``reduce`` parameter, and additionally by the ``mdmc_reduce`` parameter in the
-    multi-dimensional multi-class case. Accepts all inputs listed in :ref:`pages/classification:input types`.
+    multi-dimensional multi-class case.
 
     Args:
         preds: Predictions from model (probabilities, logits or labels)
@@ -1147,7 +1147,7 @@ def stat_scores(
             one of the following:
 
             - ``None`` [default]: Should be left unchanged if your data is not multi-dimensional
-              multi-class (see :ref:`pages/classification:input types` for the definition of input types).
+              multi-class.
 
             - ``'samplewise'``: In this case, the statistics are computed separately for each
               sample on the ``N`` axis, and then the outputs are concatenated together. In each
@@ -1161,9 +1161,7 @@ def stat_scores(
 
         multiclass:
             Used only in certain special cases, where you want to treat inputs as a different type
-            than what they appear to be. See the parameter's
-            :ref:`documentation section <pages/classification:using the multiclass parameter>`
-            for a more detailed explanation and examples.
+            than what they appear to be.
 
     Return:
         The metric returns a tensor of shape ``(..., 5)``, where the last dimension corresponds
diff --git a/src/torchmetrics/wrappers/multioutput.py b/src/torchmetrics/wrappers/multioutput.py
index c51cc0cb02f..ffc3d9ead3c 100644
--- a/src/torchmetrics/wrappers/multioutput.py
+++ b/src/torchmetrics/wrappers/multioutput.py
@@ -65,17 +65,6 @@ class MultioutputWrapper(Metric):
          >>> r2score = MultioutputWrapper(R2Score(), 2)
          >>> r2score(preds, target)
          [tensor(0.9654), tensor(0.9082)]
-         >>> # Classification metric where prediction and label tensors have different shapes.
-         >>> from torchmetrics import BinnedAveragePrecision
-         >>> target = torch.tensor([[1, 2], [2, 0], [1, 2]])
-         >>> preds = torch.tensor([
-         ...     [[.1, .8], [.8, .05], [.1, .15]],
-         ...     [[.1, .1], [.2, .3], [.7, .6]],
-         ...     [[.002, .4], [.95, .45], [.048, .15]]
-         ... ])
-         >>> binned_avg_precision = MultioutputWrapper(BinnedAveragePrecision(3, thresholds=5), 2)
-         >>> binned_avg_precision(preds, target)
-         [[tensor(-0.), tensor(1.0000), tensor(1.0000)], [tensor(0.3333), tensor(-0.), tensor(0.6667)]]
     """
 
     is_differentiable = False
diff --git a/tests/unittests/classification/test_binned_precision_recall.py b/tests/unittests/classification/test_binned_precision_recall.py
deleted file mode 100644
index ba909a3e0a2..00000000000
--- a/tests/unittests/classification/test_binned_precision_recall.py
+++ /dev/null
@@ -1,171 +0,0 @@
-# Copyright The PyTorch Lightning team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from functools import partial
-from typing import Tuple
-
-import numpy as np
-import pytest
-import torch
-from sklearn.metrics import average_precision_score as _sk_average_precision_score
-from sklearn.metrics import precision_recall_curve as _sk_precision_recall_curve
-from torch import Tensor
-
-from torchmetrics.classification.binned_precision_recall import BinnedAveragePrecision, BinnedRecallAtFixedPrecision
-from unittests.classification.inputs import _input_binary_prob
-from unittests.classification.inputs import _input_binary_prob_plausible as _input_binary_prob_ok
-from unittests.classification.inputs import _input_multilabel_prob as _input_mlb_prob
-from unittests.classification.inputs import _input_multilabel_prob_plausible as _input_mlb_prob_ok
-from unittests.helpers import seed_all
-from unittests.helpers.testers import NUM_CLASSES, MetricTester
-
-seed_all(42)
-
-
-def recall_at_precision_x_multilabel(predictions: Tensor, targets: Tensor, min_precision: float) -> Tuple[float, float]:
-    precision, recall, thresholds = _sk_precision_recall_curve(targets, predictions)
-
-    try:
-        tuple_all = [(r, p, t) for p, r, t in zip(precision, recall, thresholds) if p >= min_precision]
-        max_recall, _, best_threshold = max(tuple_all)
-    except ValueError:
-        max_recall, best_threshold = 0, 1e6
-
-    return float(max_recall), float(best_threshold)
-
-
-def _sk_prec_recall_mclass_prob(predictions, targets, num_classes, min_precision):
-    max_recalls = torch.zeros(num_classes)
-    best_thresholds = torch.zeros(num_classes)
-
-    for i in range(num_classes):
-        max_recalls[i], best_thresholds[i] = recall_at_precision_x_multilabel(
-            predictions[:, i], targets[:, i], min_precision
-        )
-    return max_recalls, best_thresholds
-
-
-def _sk_prec_recall_binary_prob(predictions, targets, num_classes, min_precision):
-    return recall_at_precision_x_multilabel(predictions, targets, min_precision)
-
-
-def _sk_avg_prec_multiclass(predictions, targets, num_classes):
-    # replace nan with 0
-    return np.nan_to_num(_sk_average_precision_score(targets, predictions, average=None))
-
-
-@pytest.mark.parametrize(
-    "preds, target, sk_metric, num_classes",
-    [
-        (_input_binary_prob.preds, _input_binary_prob.target, _sk_prec_recall_binary_prob, 1),
-        (_input_binary_prob_ok.preds, _input_binary_prob_ok.target, _sk_prec_recall_binary_prob, 1),
-        (_input_mlb_prob_ok.preds, _input_mlb_prob_ok.target, _sk_prec_recall_mclass_prob, NUM_CLASSES),
-        (_input_mlb_prob.preds, _input_mlb_prob.target, _sk_prec_recall_mclass_prob, NUM_CLASSES),
-    ],
-)
-class TestBinnedRecallAtPrecision(MetricTester):
-    atol = 0.02
-
-    @pytest.mark.parametrize("ddp", [True, False])
-    @pytest.mark.parametrize("dist_sync_on_step", [True, False])
-    @pytest.mark.parametrize("min_precision", [0.05, 0.1, 0.3, 0.5, 0.8, 0.95])
-    def test_binned_recall_at_precision(
-        self, preds, target, sk_metric, num_classes, ddp, dist_sync_on_step, min_precision
-    ):
-        # rounding will simulate binning for both implementations
-        preds = Tensor(np.round(preds.numpy(), 2)) + 1e-6
-
-        self.run_class_metric_test(
-            ddp=ddp,
-            preds=preds,
-            target=target,
-            metric_class=BinnedRecallAtFixedPrecision,
-            sk_metric=partial(sk_metric, num_classes=num_classes, min_precision=min_precision),
-            dist_sync_on_step=dist_sync_on_step,
-            metric_args={
-                "num_classes": num_classes,
-                "min_precision": min_precision,
-                "thresholds": 101,
-            },
-        )
-
-    @pytest.mark.parametrize("ddp", [True, False])
-    @pytest.mark.parametrize("dist_sync_on_step", [True, False])
-    @pytest.mark.parametrize("min_precision", [0.05, 0.1, 0.3, 0.5, 0.8, 0.95])
-    def test_binned_recall_at_precision_default_thresholds(
-        self, preds, target, sk_metric, num_classes, ddp, dist_sync_on_step, min_precision
-    ):
-        # rounding will simulate binning for both implementations
-        preds = Tensor(np.round(preds.numpy(), 2)) + 1e-6
-
-        self.run_class_metric_test(
-            ddp=ddp,
-            preds=preds,
-            target=target,
-            metric_class=BinnedRecallAtFixedPrecision,
-            sk_metric=partial(sk_metric, num_classes=num_classes, min_precision=min_precision),
-            dist_sync_on_step=dist_sync_on_step,
-            metric_args={
-                "num_classes": num_classes,
-                "min_precision": min_precision,
-            },
-        )
-
-
-@pytest.mark.parametrize(
-    "preds, target, sk_metric, num_classes",
-    [
-        (_input_binary_prob.preds, _input_binary_prob.target, _sk_avg_prec_multiclass, 1),
-        (_input_binary_prob_ok.preds, _input_binary_prob_ok.target, _sk_avg_prec_multiclass, 1),
-        (_input_mlb_prob_ok.preds, _input_mlb_prob_ok.target, _sk_avg_prec_multiclass, NUM_CLASSES),
-        (_input_mlb_prob.preds, _input_mlb_prob.target, _sk_avg_prec_multiclass, NUM_CLASSES),
-    ],
-)
-class TestBinnedAveragePrecision(MetricTester):
-    atol = 0.002
-
-    @pytest.mark.parametrize("ddp", [True, False])
-    @pytest.mark.parametrize("dist_sync_on_step", [True, False])
-    @pytest.mark.parametrize("thresholds", (301, torch.linspace(0.0, 1.0, 101)))
-    def test_binned_average_precision(self, preds, target, sk_metric, num_classes, ddp, dist_sync_on_step, thresholds):
-        # rounding will simulate binning for both implementations
-        preds = Tensor(np.round(preds.numpy(), 2)) + 1e-6
-
-        self.run_class_metric_test(
-            ddp=ddp,
-            preds=preds,
-            target=target,
-            metric_class=BinnedAveragePrecision,
-            sk_metric=partial(sk_metric, num_classes=num_classes),
-            dist_sync_on_step=dist_sync_on_step,
-            metric_args={"num_classes": num_classes, "thresholds": thresholds},
-        )
-
-    @pytest.mark.parametrize("ddp", [True, False])
-    @pytest.mark.parametrize("dist_sync_on_step", [True, False])
-    def test_binned_average_precision_default_thresholds(
-        self, preds, target, sk_metric, num_classes, ddp, dist_sync_on_step
-    ):
-        # rounding will simulate binning for both implementations
-        preds = Tensor(np.round(preds.numpy(), 2)) + 1e-6
-
-        self.run_class_metric_test(
-            ddp=ddp,
-            preds=preds,
-            target=target,
-            metric_class=BinnedAveragePrecision,
-            sk_metric=partial(sk_metric, num_classes=num_classes),
-            dist_sync_on_step=dist_sync_on_step,
-            metric_args={"num_classes": num_classes},
-        )